#!/usr/bin/env python
"""
Wiki Weblog RSS 1.0 Feed Manipulator
Author: Sean B. Palmer, inamidst.com
Now with a certain-percent extra DOM Munging!
"""

import os, re, time, xml.dom.minidom
from urlparse import urljoin as urijoin

timenow = int(time.time())
__version__ = '2005-02-24 (loaded: %i)' % timenow

def first(seq): 
   for result in seq: 
      return result
   return None

def the(seq): 
   iter = (item for item in seq)
   first = iter.next()
   try: iter.next()
   except StopIteration: pass
   else: raise ValueError, "Sequence has more than one item"
   return first

def last(seq): 
   return seq[-1]

def text(data): 
   node = xml.dom.minidom.Text()
   node.replaceWholeText(data)
   return node

def blog(wikiname, description, basedir=None): 
   if basedir is None: 
      basedir = './'
   rssfeed = os.path.join(basedir, 'rss1.0.rss')
   wikifile = os.path.join(basedir, ('%s.html' % wikiname))
   rawarchive = os.path.join(basedir, 'rawarchive.txt')
   blogFeed(rssfeed, wikifile, wikiname, description, rawarchive)

def blogFeed(rssfeed, wikifile, wikiname, description, rawarchive=None): 
   dom = xml.dom.minidom.parse(rssfeed)
   channel = the(dom.getElementsByTagName('channel'))
   baseURI = channel.getAttribute('rdf:about')
   link = urijoin(baseURI, wikiname)
   title, content = getTitleAndContent(wikifile)
   lastmod = os.stat(wikifile).st_mtime
   dom = addItem(dom, link, title, description, content)

   rss = open(rssfeed, 'w')
   dom.writexml(rss)
   rss.close()
   dom.unlink()

   if rawarchive and os.path.isfile(rawarchive): 
      now = time.strftime('%Y-%m-%d %H:%M', time.gmtime())
      f = open(rawarchive, 'a')
      print >> f, '\t'.join([now, wikiname, title, description])
      f.close()

r_title = re.compile(r'(?ims)<title>(.*?)</title>')
r_body = re.compile(r'(?ims)<body>(.*)</body>')
r_whitespace = re.compile(r'[ \t\r\n]+')

def getTitleAndContent(wikifile): 
   f = open(wikifile)
   html = f.read()
   f.close()
   title = r_title.search(html).group(1)
   title = r_whitespace.sub(' ', title)
   title = title.strip()
   content = r_body.search(html).group(1)
   return title, content

def addItem(dom, link, title, description, content): 
   uri = link + '#' + str(int(time.time()))
   date = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())

   rdf = the(dom.getElementsByTagName('rdf:RDF'))
   channel = the(rdf.getElementsByTagName('channel'))
   seq = the(channel.getElementsByTagName('rdf:Seq'))

   dateElements = channel.getElementsByTagName('dc:date')
   if len(dateElements) == 1: 
      dateElement = the(dateElements)
      if dateElement.firstChild: 
         dateElement.firstChild.replaceWholeText(date)
      else: dateElement.appendChild(text(date))

   seqChildElements = seq.getElementsByTagName('rdf:li')
   seqLength = len(seqChildElements)
   if seqLength >= 10: 
      seqLast = last(seqChildElements)
      seq.removeChild(seqLast)
   li = xml.dom.minidom.Element('rdf:li')
   li.setAttribute('rdf:resource', uri)
   if seqLength: 
      seqFirst = first(seqChildElements)
      seq.insertBefore(li, seqFirst)
   else: seq.appendChild(li)

   itemElements = rdf.getElementsByTagName('item')
   itemElementsLength = len(itemElements)
   if itemElementsLength >= 10: 
      itemLast = last(itemElements)
      rdf.removeChild(lastItem)
   item = xml.dom.minidom.Element('item')
   item.setAttribute('rdf:about', uri)

   titleElement = xml.dom.minidom.Element('title')
   titleElement.appendChild(text(title))
   item.appendChild(titleElement)

   descriptionElement = xml.dom.minidom.Element('description')
   descriptionElement.appendChild(text(description))
   item.appendChild(descriptionElement)

   linkElement = xml.dom.minidom.Element('link')
   linkElement.appendChild(text(link))
   item.appendChild(linkElement)

   creatorElements = channel.getElementsByTagName('dc:creator')
   if len(creatorElements) == 1: 
      creatorElement = the(creatorElements).cloneNode(True)
      item.appendChild(creatorElement)

   dateElement = xml.dom.minidom.Element('dc:date')
   dateElement.appendChild(text(date))
   item.appendChild(dateElement)

   contentElement = xml.dom.minidom.Element('content:encoded')
   contentElement.appendChild(text(content))
   item.appendChild(contentElement)

   if itemElementsLength: 
      itemFirst = first(itemElements)
      rdf.insertBefore(item, itemFirst)
   else: rdf.appendChild(item)
   return dom

def test(args=None): 
   import sys
   if args is None: 
      args = sys.argv[1:]
   if len(args): 
      basedir = args[0]
      blog('test', 'This is a test file.', basedir=basedir)
   else: print __doc__

if __name__=="__main__": 
   print __doc__