#!/usr/bin/env python2.4 """Sets vs. Regexp! Who will win?""" import re, random, time ints = (0x20, 0x21) + tuple(xrange(0x23, 0x5B)) + tuple(xrange(0x5D, 0x7E)) bytes = frozenset([chr(i) for i in ints]) r_safe = re.compile(r'[\x20\x21\x23-\x5B\x5D-\x7E]+') teststring = '' for i in xrange(50000): rand = random.random() if rand < 0.95: teststring += random.choice(tuple(bytes)) else: teststring += random.choice('\x02\t\n\r"\\\x80\x8f\xff') print 'teststring[:50]', teststring[:50] print 'teststring[50:100]', teststring[50:100] t = time.clock() safe = 0 for c in teststring: if c in bytes: safe += 1 print "sets: Found %s safe in %s seconds" % (safe, time.clock() - t) t = time.clock() safe = len(''.join(r_safe.findall(teststring))) print "regexp: Found %s safe in %s seconds" % (safe, time.clock() - t) r_safe = re.compile(r'[\x20\x21\x23-\x5B\x5D-\x7E]') t = time.clock() safe = 0 for c in teststring: if r_safe.match(c): safe += 1 print "regexpiter: Found %s safe in %s seconds" % (safe, time.clock() - t)