#!/usr/bin/python import sys, re, os r_nonprint = re.compile(r'\\x..') def categorize(fn): bytes = {} for byte in open(fn).read(): try: bytes[byte] += 1 except KeyError: bytes[byte] = 1 keys = bytes.keys() keys.sort() freq = {} for key in keys: count = bytes[key] if freq.has_key(count): freq[count].append(key) else: freq[count] = [key] freqkeys = freq.keys() freqkeys.sort() freqkeys.reverse() result = '' for key in freqkeys: for byte in freq[key]: result += ('%r' % byte)[1:-1] result = r_nonprint.sub('.', result)[:35] if len(result) < 35: result += ('.' * (35 - len(result))) return result def main(): for fn in sys.argv[1:]: if os.path.isfile(fn): print categorize(fn), fn if __name__=="__main__": main()