#!/usr/bin/env python """ sarc - Archive Safari Browser History Author: Sean B. Palmer, inamidst.com Usage: $ sarc yyyy-mm-dd $ sarc today Output is: date, uri, title, written in plain utf-8 encoded text to a file named history-yyyy-mm-dd.txt """ from __future__ import with_statement import sys, datetime, subprocess import os.path import xml.parsers.expat apple_epoch = 978307200 def history(): fn = os.path.expanduser('~/Library/Safari/History.plist') command = ['PlistBuddy', '-x', '-c', 'Print', fn] p = subprocess.Popen(command, stdout=subprocess.PIPE, bufsize=1) return p.stdout def parse(f, extents): state = type('State', (), {}) state.mode = None state.item = {} state.key = None state.string = None state.data = '' state.history = {} def register(uri, date, title): uri = uri.replace(' ', '%20') apple_date = int(float(date)) if (extents[0] <= apple_date) and (apple_date < extents[1]): pydate = datetime.datetime.fromtimestamp(apple_epoch + apple_date) isodate = pydate.strftime('%Y-%m-%dT%H:%M:%SZ') state.history.setdefault(isodate, set()).add((uri, title)) def start_element(name, attrs): if (name == 'key') or (name == 'string'): state.mode = name def end_element(name): if name == state.mode: setattr(state, name, state.data.strip()) state.mode = None state.data = '' if name == 'string': state.item[state.key] = state.string if state.item.has_key('') and \ state.item.has_key('lastVisitedDate') and \ state.item.has_key('title'): uri = state.item[''].encode('utf-8') date = state.item['lastVisitedDate'].encode('utf-8') title = state.item['title'].encode('utf-8') register(uri, date, title) state.item = {} def character_data(data): if state.mode: state.data += data p = xml.parsers.expat.ParserCreate() p.StartElementHandler = start_element p.EndElementHandler = end_element p.CharacterDataHandler = character_data for line in f: p.Parse(line, False) p.Parse('', True) return state def date(arg): if arg == 'today': today = datetime.date.today() arg = today.strftime('%Y-%m-%d') def date_error(): print 'Error: Date must be in format yyyy-mm-dd' sys.exit(1) if arg.count('-') != 2: date_error() y, m, d = arg.split('-') try: y, m, d = int(y), int(m), int(d) except ValueError: date_error() day = datetime.date(y, m, d) next_day = day + datetime.timedelta(1) a = int(day.strftime('%s')) - apple_epoch b = int(next_day.strftime('%s')) - apple_epoch return arg, (a, b) def main(): try: arg = sys.argv[1] except IndexError: print __doc__.strip() sys.exit() day, extents = date(arg) fn = 'history-%s.txt' % day if os.path.exists(fn): print 'Error: %s already exists, please move or delete it' % fn sys.exit(1) with history() as f: state = parse(f, extents) lines = 0 o = open(fn, 'w') for isodate in sorted(state.history.keys()): for uri, title in state.history[isodate]: print >> o, isodate, uri, title lines += 1 o.close() bytes = os.path.getsize(fn) print 'Wrote %s lines in %s bytes to %s' % (lines, bytes, fn) if __name__ == '__main__': main()