#!/usr/bin/env python """ gooshparse.py - Google Search History RSS Parser Author: Sean B. Palmer, inamidst.com """ import sys, time, xml.dom.minidom def text(element): return element.firstChild.wholeText def main(): f = open(sys.argv[1]) bytes = f.read() f.close() doc = xml.dom.minidom.parseString(bytes) for item in doc.getElementsByTagName('item'): date = text(item.getElementsByTagName('pubDate')[0]) date = time.strptime(date, '%a, %d %b %Y %H:%M:%S GMT') date = time.strftime('%Y-%m-%dT%H:%M:%SZ', date) title = text(item.getElementsByTagName('title')[0]) title = title.strip(' \t') category = text(item.getElementsByTagName('category')[0]) category = ('Q', 'R')['result' in category] if category == 'R': link = text(item.getElementsByTagName('link')[0]) link = link.strip(' \t').replace(' ', '%20') print date, category, link, title.encode('utf-8') else: print date, category, title.encode('utf-8') if __name__ == '__main__': main()