#!/usr/bin/env python """ siteupdates.py - Prints out the most recently modified files in a site License: GPL 2; share and enjoy! Author: Sean B. Palmer, inamidst.com Help and usage notes: ./siteupdates.py --help """ import sys, os, time, robotparser from optparse import OptionParser def siteupdates(rootpath, t=None, n=None, strip=None, output=None): if t is None: t = (3600 * 24 * 7) # a week if output is None: output = sys.stdout elif isinstance(output, basestring): from StringIO import StringIO output = StringIO(output) now = time.time() then = now - t if rootpath.endswith('/'): prefix = '/' else: prefix = '' robotstxt = os.path.join(rootpath, 'robots.txt') if os.path.exists(robotstxt): rp = robotparser.RobotFileParser() rp.set_url(robotstxt) rp.read() # @@ this reads borked files too else: rp = None result = [] for (dirpath, dirnames, filenames) in os.walk(rootpath): for name in filenames: fn = os.path.join(dirpath, name) mtime = os.stat(fn).st_mtime if mtime > then: datetime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(mtime)) filename = prefix + fn[len(rootpath):] if filename.endswith('.pyc'): continue if rp is not None: if rp.can_fetch('*', filename): result.append((datetime, filename)) else: result.append((datetime, filename)) result.sort() result.reverse() if n is not None: result = result[:n] day = None strext = ('index.html', '.html', 'index.cgi', '.cgi', '.txt', '.jpg', '.png', '.php', '.rss') for (datetime, filename) in result: if strip: for ext in strext: if filename.endswith(ext): filename = filename[:-len(ext)] break if day is None: day = datetime[:10] output.write('

%s

\n' % (day, day)) output.write('\n\n') output.write('

%s

\n' % (day, day)) output.write('\n') return output def main(argv=None): parser = OptionParser(usage='%prog [options] ') parser.add_option("-t", "--time", dest="time", default=False, help="cut off last modification date for inclusion") parser.add_option("-n", "--number", dest="number", default=False, help="maximum number of results to be output") options, args = parser.parse_args(argv) if len(args) == 1: if options.time: t = int(options.time) else: t = None if options.number: n = int(options.number) else: n = None siteupdates(args[0], t, n) else: parser.error("Incorrect number of arguments.") if __name__=="__main__": main()