Thursday, April 12, 2007

Bram and I were discussing how to merge sorted lists...


import time
import heapq
from random import randint

def report(*arg): print "%-20s: sorted %d lists with a total of %d elements in %d milliseconds" % arg

domain, listsize, nlists = 100000, 5000, 500

lists = [ sorted([ randint(0, domain) for j in range(randint(1,listsize)) ]) for i in range(nlists) ]
indices = dict([ (i, 0) for i in range(nlists) ])

start_time = time.time()
result = sorted(sum(lists, []))
end_time = time.time()
report("primitive method", nlists, len(result), int((end_time - start_time) * 1000))

heap = [ (lists[list_index][0], list_index, 0) for list_index in range(nlists) ]
heapq.heapify(heap)

start_time = time.time()

result = []
while len(heap):
min_value, min_list_index, min_value_index = heapq.heappop(heap)
result.append(min_value)
min_value_index += 1
if min_value_index < len(lists[min_list_index]):
heapq.heappush(heap, (lists[min_list_index][min_value_index], min_list_index, min_value_index))

end_time = time.time()
report("better method", nlists, len(result), int((end_time - start_time) * 1000))


No comments: