#!/usr/bin/env python
"""
changes.py - Show Site Changes
Author: Sean B. Palmer, inamidst.com
"""
import cgitb; cgitb.enable()
import sys, os, re, bz2
docroot = os.environ.get('DOCUMENT_ROOT', '.')
sys.path.append(os.path.join(docroot, 'dev/code'))
import inventory
htlogdir = os.path.join(docroot, 'odds/.htlogdir')
logdir = open(htlogdir).read()
logdir = logdir.replace('$DOCUMENT_ROOT', docroot)
logdir = logdir.strip()
r_month = re.compile(r'^\d{4}-\d{2}$')
def cgipath(default='/changes/'):
pathinfo = os.environ.get('PATH_INFO', '')
requri = os.environ.get('REQUEST_URI', default)
if requri.endswith(pathinfo):
return requri[:-len(pathinfo)]
return requri
def dateToFilename(date):
day, t = date.split(' ', 1)
norm = day + '.' + ''.join(t.split(':'))
return os.path.join(logdir, day[:7], 'diff.%s.bz2' % norm)
def diffname(date, path):
# Parsing from bzip2'd files was too slow, so now this uses logs.
# (The log files are compiled in bin/changes, i.e. made automatically.)
#
# Before: 700 8.580 0.012 13.000 0.019 changes.py:33(diffname)
# After: 700 0.050 0.000 0.130 0.000 changes.py:33(diffname)
# - profile.run('changes.latestAsHTML(15)')
bz2fn = dateToFilename(date)
logfn = bz2fn[:-4] + '.log'
if not os.path.isfile(logfn):
return None
f = open(logfn)
for line in f:
line = line.strip('\r\n')
if line.endswith(' www' + path):
f.close()
return bz2fn
f.close()
return None
def pathToFilename(path):
"""Convert a path to a diff escaped path.
>>> pathToFilename('/phenny/modules/codepoint.py')
'phenny_modules_codepoint.py'
"""
path = path.lstrip('/')
path = path.replace('_', '__')
return path.replace('/', '_')
def infoToFilename(date, path):
"""Convert a date and path to a diff filename.
>>> infoToFilename('2006-03-05 00:30:56', '/phenny/modules/codepoint.py')
'2006-03-05-00:30:56-phenny_modules_codepoint.py.diff'
"""
date = date.replace(' ', '-')
path = pathToFilename(path)
return date + '-' + path + '.diff'
def getdiff(filename):
"""Get a diff using an escaped diff filename, returning lines.
>>> diff = '2006-03-05-00:30:56-phenny_modules_codepoint.py.diff'
>>> lines = getdiff(diff)
>>> len(list(lines))
28
"""
import tarfile, itertools
def generator():
tar = tarfile.open('diffs/' + filename[:7] + '.tar.bz2', 'r:bz2')
for tarinfo in tar:
name = tarinfo.name[8:] # minus leading "yyyy-mm/"
if tarinfo.isreg() and (name == filename):
diff = tar.extractfile(tarinfo)
while True:
line = diff.readline()
if not line: break
yield line
diff.close()
break
tar.close()
# In case you haven't noticed, this is absurd
lines = generator()
lines, copy = itertools.tee(lines)
try: copy.next()
except StopIteration: return False
else: return lines
def diffsFromArchive(month, epath, start=None, finish=None, files=True):
"""Get diffs from a month and escaped path, with optional bounds.
>>> diffs = diffsFromArchive('2006-03', 'phenny_modules_codepoint.py')
>>> for name, f in diffs:
... print name
...
2006-03-27-09:50:45-phenny_modules_codepoint.py.diff
2006-03-05-00:30:56-phenny_modules_codepoint.py.diff
"""
import re, datetime, tarfile # @@ , fnmatch
start = start or datetime.date(1970, 1, 1)
finish = finish or datetime.date.today()
r_epath = re.compile(r'^%s$' % epath)
tarfn = 'diffs/' + month + '.tar.bz2'
if not os.path.exists(tarfn): return
tar = tarfile.open(tarfn, 'r:bz2')
for tarinfo in reversed(list(tar)):
name = tarinfo.name[8:] # minus leading "yyyy-mm/"
args = tuple(int(arg) for arg in name[:10].split('-'))
tardate = datetime.date(*args)
if start <= tardate <= finish:
# if fnmatch.fnmatch(name[20:-5], epath): # Strip date and extension
if r_epath.match(name[20:-5]):
if files:
f = tar.extractfile(tarinfo)
yield name, f
f.close()
else: yield name, None
else: break
else: tar.close()
def diffExists(filename):
import anydbm
fn = os.path.join(logdir, filename[:7] + '/diffs.db')
if not os.path.exists(fn):
return False
# @@ How will "with" work with the following?
db = anydbm.open(fn, 'r')
if db.has_key(filename):
db.close()
return True
db.close()
return False
def public(exclusions, path):
for exclude in exclusions.split(' '):
if path.startswith(exclude):
return False
return True
class Changeset(object):
def __init__(self, fn, lines):
self.filename = fn
self.lines = lines
self.parse()
def __str__(self):
return self.format('html')
def format(self, media):
day, t = self.date.split(' ', 1)
lines = [self.message + ' (%s)' % t]
lines.append('
')
template = ' - %s%s
'
for (rawpath, path, public) in self.modified:
fn = infoToFilename(self.date, rawpath)
if False: # diffExists(fn):
diff = ' (diff)' % fn
else: diff = ''
if public: lines.append(template % (path, path, diff))
template = ' - %s New!
'
for (rawpath, path, public) in self.added:
if public: lines.append(template % (path, path))
template = ' - %s Removed
'
for (rawpath, path, public) in self.deleted:
if public: lines.append(template % path)
lines.append('
')
return '\n'.join(lines)
def parse(self):
self.modified = []
self.added = []
self.deleted = []
seenpaths = set()
for line in self.lines:
line = line.strip(' \t\r\n')
if line.startswith('#'):
line = line.lstrip('# ')
attr, value = line.split(': ', 1)
attr = attr.lower()
setattr(self, attr, value)
# Schema: (rawpath, path, public, diff)
elif line and not line.startswith('#'):
rawpath = line.split(' ', 1)[1].strip(' \t\r\n')
if not rawpath.startswith('/'):
rawpath = '/' + rawpath
# Normalise the path, and filter out .htacces, .pyc, etc.
path = inventory.pathstrip(rawpath)
if inventory.ignorable(path):
continue
# Remove duplicates, e.g. /newdir{/} and /newdir/{index.html}
if path in seenpaths:
continue
seenpaths.add(path)
if line.startswith('M'):
# diff = diffname(self.date, rawpath)
parts = (rawpath, path, public(self.robots, path)) # , diff)
self.modified.append(parts)
elif line.startswith('A'):
# diff = diffname(self.date, rawpath)
parts = (rawpath, path, public(self.robots, path)) # , diff)
self.added.append(parts)
elif line.startswith('D'):
parts = (rawpath, path, public(self.robots, path)) # , None)
self.deleted.append(parts)
self.modified.sort()
self.added.sort()
self.deleted.sort()
def public(self):
all = self.modified + self.added + self.deleted
for (rawpath, path, public) in all:
if public: return True
return False
class Changelog(object):
"""chlog = Changelog(filename, [reverse,] [maximum]) -> new change log.
chlog.changesets -> the changesets
chlog.days -> days to changesets mapping, order as reverse karg
"""
def __init__(self, filename, reverse=False, maximum=None):
self.filename = filename
self.reverse = reverse
self.maximum = maximum
self.changesets = []
self.parse()
def __len__(self):
return len(list(c for c in self.changesets if c.public()))
def __str__(self):
lines = []
days = sorted(self.days.iterkeys())
if self.reverse:
days.reverse()
for day in days:
public = False
for changeset in self.days[day]:
if changeset.public():
public = True
break
if public:
lines.append('%s
' % (day, day))
lines.append('')
for changeset in self.days[day]:
if changeset.public():
lines.append(' - %s
' % changeset)
lines.append('
')
return '\n'.join(lines)
def parse(self):
changeLines = []
changelog = open(self.filename)
for line in changelog:
if line.startswith('# Date'):
changeLines.append([])
if changeLines:
changeLines[-1].append(line)
changelog.close()
for changesetLines in changeLines:
changeset = Changeset(self.filename, changesetLines)
self.changesets.append(changeset)
if self.reverse:
self.changesets.reverse()
if self.maximum is not None:
self.changesets = self.changesets[:self.maximum]
self.days = {}
for changeset in self.changesets:
day = changeset.date[:10]
if self.days.has_key(day):
self.days[day].append(changeset)
else: self.days[day] = [changeset]
def changesByMonth(month):
changelog = os.path.join(logdir, month + '.log')
chlog = Changelog(changelog)
return str(chlog)
def monthsBackwards():
import datetime
# Shame that datetime.date can't do UTC
# Cf. http://swhack.com/logs/2006-05-01#T03-37-14
today = datetime.datetime.utcnow()
first = today.replace(day=1)
twoDays = datetime.timedelta(days=2)
while True:
yield first.strftime('%Y-%m')
first = (first - twoDays).replace(day=1)
if first.year < 1900: break
def monthLogsBackwards():
logs = set([])
for filename in os.listdir(logdir):
if filename.endswith('.log') and r_month.match(filename[:-4]):
logs.add(filename)
months = monthsBackwards()
while logs:
month = months.next()
log = month + '.log'
if log in logs:
logs.remove(log)
yield month
def monthExists(month):
changelog = os.path.join(logdir, month + '.log')
if not os.path.isfile(changelog):
return False
chlog = Changelog(changelog)
if len(chlog):
return True
return False
def latest(n):
changelogs = []
for month in monthLogsBackwards(): # was monthsBackwards()
maximum = n - sum(len(c) for c in changelogs)
changelog = os.path.join(logdir, month + '.log')
if os.path.isfile(changelog):
chlog = Changelog(changelog, reverse=True, maximum=maximum)
changelogs.append(chlog)
elif changelogs: break
if sum(len(c) for c in changelogs) >= n:
break
return changelogs
def latestAsChangesets(n):
changelogs = latest(n)
for chlog in changelogs:
for day in reversed(sorted(chlog.days.iterkeys())):
for changeset in chlog.days[day]:
yield changeset
def latestAsHTML(n):
changelogs = latest(n)
return '\n'.join(str(chlog) for chlog in changelogs)
def genchanges(n=15):
"""Generator of (day, time, message)."""
for changeset in latestAsChangesets(n):
if not changeset.public():
continue
day, time = changeset.date.split(' ', 1)
yield day, time, changeset.message
if __name__=="__main__":
print __doc__