#!/usr/bin/env python import re, os, datetime dates = {} total = {} r_tag = re.compile(r'<([A-Za-z0-9]+)') r_xmp = re.compile(r'(?is)(.*?)') for root, dirs, files in os.walk('.'): for name in files: filename = os.path.join(root, name) if not filename.endswith('.html'): continue mtime = os.path.getmtime(filename) t = str(datetime.datetime.fromtimestamp(mtime)) if not (t.startswith('1990') or t.startswith('1991')): continue bytes = r_xmp.sub('...', open(filename).read()) tags = [tag.lower() for tag in r_tag.findall(bytes)] date = map(int, t[:10].split('-')) date[1] = date[1] - 1 date = 'new Date' + str(tuple(date)) if not dates.has_key(date): dates[date] = {} for tag in tags: tag = '"' + tag + '"' if not dates[date].has_key(tag): dates[date][tag] = 0 dates[date][tag] += 1 if not total.has_key(tag): total[tag] = 0 total[tag] += 1 for date, tags in dates.iteritems(): for tag, count in tags.iteritems(): print ' {date: %s, tag: %s, count: %s},' % (date, tag, count) total = sorted(((a, b) for (b, a) in total.iteritems()), reverse=True) print '[' + ', '.join(pair[1] for pair in total) + ']'