#!/usr/bin/python """ Benchmark Key Lookup Methods Usage: ./keyspeed.py """ import sys, os, time join = os.path.join lookups = {'love': True, 'something': True, 'excellent': True, 'whatever': True, 'probably': True, 'ablator': True, 'actinally': True, 'adagietto': True, 'alerted': True, 'amicableness': True, 'anthidium': True, 'backwaters': True, 'beautifullest': True, 'beauty': True, 'belove': True, 'biennium': True, 'biskup': True, 'cosmological': True, 'froberg': True, 'groleau': True, 'haplontic': True, 'snoreless': True, 'tryne': True, 'quassk': False, 'promblebobble': False, 'osrijgoarj': False, 'oisjregoirag': False, 'iursgarugh': False, 'quembobble': False, 'smodfoddle': False, 'oairjgoarig': False, 'chibblesmink': False, 'garonflep': False, 'chaccklefloig': False, 'ambosmoip': False} def read(fn): result = {} f = open(fn) while True: line = f.readline() if not line: break result[line.rstrip('\r\n')] = True f.close() return result def example(): """Example output""" return """ Dictionary length: 400936 Lookups length: 35 Benchmarking 'shelve'... shelve - Doing the lookup test... Time taken for shelve: 0.0769999999902 memdict - Doing the lookup test... Time taken for memdict: 0.00300000002608 cdb - Doing the lookup test Time taken for cdb: 0.0100000000093 dbhash - Doing the lookup test... Time taken for dbhash: 0.0819999999949 Sizes: $ ls -al . [...] -rw-r--r-- 1 sbp sbp 13222320 Oct 22 16:27 cdb.test -rw-r--r-- 1 sbp sbp 10484736 Oct 22 16:36 dbhash.test -rw-r--r-- 1 sbp sbp 19950592 Oct 22 16:37 shelve.test """ def benchmark(path, udict): if not os.path.isdir(path): raise "ArgError", "First argument must be a directory." if not os.path.isfile(udict): raise "ArgError", "Second argument must be path to dictionary." memdict = read(udict) print "Dictionary length:", len(memdict) print "Lookups length:", len(lookups) print "Benchmarking 'shelve'..." import shelve if not os.path.exists(join(path, 'shelve.test')): d = shelve.open(join(path, 'shelve.test')) print "Storing dictonary data in shelve.test..." for key in memdict.iterkeys(): d[key] = True d.close() print "done!" print "shelve - Doing the lookup test..." d = shelve.open(join(path, 'shelve.test')) t = time.clock() for i in xrange(50): for key in lookups.iterkeys(): assert d.has_key(key) == lookups[key] print "Time taken for shelve:", (time.clock() - t) del d print print "memdict - Doing the lookup test..." t = time.clock() for i in xrange(50): for key in lookups.iterkeys(): assert memdict.has_key(key) == lookups[key] print "Time taken for memdict:", (time.clock() - t) print import cdb if not os.path.exists(join(path, 'cdb.test')): print "Making cdb database...", maker = cdb.cdbmake(join(path, 'cdb.test'), join(path, 'cdb.tmp')) for key in memdict.iterkeys(): maker.add(key, '') maker.finish() del maker print "done!" print "cdb - Doing the lookup test" c = cdb.init(join(path, 'cdb.test')) t = time.clock() for i in xrange(50): for key in lookups.iterkeys(): assert c.has_key(key) == lookups[key] print "Time taken for cdb:", (time.clock() - t) del c print import dbhash # BSD DataBase if not os.path.exists(join(path, 'dbhash.test')): d = dbhash.open(join(path, 'dbhash.test'), 'n') print "Storing dictonary data in dbhash.test..." for key in memdict.iterkeys(): d[key] = None d.close() print "done!" print "dbhash - Doing the lookup test..." d = dbhash.open(join(path, 'dbhash.test'), 'r') t = time.clock() for i in xrange(50): for key in lookups.iterkeys(): assert d.has_key(key) == lookups[key] print "Time taken for dbhash:", (time.clock() - t) del d print def main(): if len(sys.argv) != 3: print __doc__ sys.exit() path = sys.argv[1] udict = sys.argv[2] benchmark(path, udict) if __name__=="__main__": main()