#!/usr/bin/env python
"""
Wiki Weblog RSS 1.0 Feed Manipulator
Author: Sean B. Palmer, inamidst.com
Now with a certain-percent extra DOM Munging!
"""
import os, re, time, xml.dom.minidom
from urlparse import urljoin as urijoin
timenow = int(time.time())
__version__ = '2005-02-24 (loaded: %i)' % timenow
def first(seq):
for result in seq:
return result
return None
def the(seq):
iter = (item for item in seq)
first = iter.next()
try: iter.next()
except StopIteration: pass
else: raise ValueError, "Sequence has more than one item"
return first
def last(seq):
return seq[-1]
def text(data):
node = xml.dom.minidom.Text()
node.replaceWholeText(data)
return node
def blog(wikiname, description, basedir=None):
if basedir is None:
basedir = './'
rssfeed = os.path.join(basedir, 'rss1.0.rss')
wikifile = os.path.join(basedir, ('%s.html' % wikiname))
rawarchive = os.path.join(basedir, 'rawarchive.txt')
blogFeed(rssfeed, wikifile, wikiname, description, rawarchive)
def blogFeed(rssfeed, wikifile, wikiname, description, rawarchive=None):
dom = xml.dom.minidom.parse(rssfeed)
channel = the(dom.getElementsByTagName('channel'))
baseURI = channel.getAttribute('rdf:about')
link = urijoin(baseURI, wikiname)
title, content = getTitleAndContent(wikifile)
lastmod = os.stat(wikifile).st_mtime
dom = addItem(dom, link, title, description, content)
rss = open(rssfeed, 'w')
dom.writexml(rss)
rss.close()
dom.unlink()
if rawarchive and os.path.isfile(rawarchive):
now = time.strftime('%Y-%m-%d %H:%M', time.gmtime())
f = open(rawarchive, 'a')
print >> f, '\t'.join([now, wikiname, title, description])
f.close()
r_title = re.compile(r'(?ims)
(.*?)')
r_body = re.compile(r'(?ims)(.*)')
r_whitespace = re.compile(r'[ \t\r\n]+')
def getTitleAndContent(wikifile):
f = open(wikifile)
html = f.read()
f.close()
title = r_title.search(html).group(1)
title = r_whitespace.sub(' ', title)
title = title.strip()
content = r_body.search(html).group(1)
return title, content
def addItem(dom, link, title, description, content):
uri = link + '#' + str(int(time.time()))
date = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
rdf = the(dom.getElementsByTagName('rdf:RDF'))
channel = the(rdf.getElementsByTagName('channel'))
seq = the(channel.getElementsByTagName('rdf:Seq'))
dateElements = channel.getElementsByTagName('dc:date')
if len(dateElements) == 1:
dateElement = the(dateElements)
if dateElement.firstChild:
dateElement.firstChild.replaceWholeText(date)
else: dateElement.appendChild(text(date))
seqChildElements = seq.getElementsByTagName('rdf:li')
seqLength = len(seqChildElements)
if seqLength >= 10:
seqLast = last(seqChildElements)
seq.removeChild(seqLast)
li = xml.dom.minidom.Element('rdf:li')
li.setAttribute('rdf:resource', uri)
if seqLength:
seqFirst = first(seqChildElements)
seq.insertBefore(li, seqFirst)
else: seq.appendChild(li)
itemElements = rdf.getElementsByTagName('item')
itemElementsLength = len(itemElements)
if itemElementsLength >= 10:
itemLast = last(itemElements)
rdf.removeChild(lastItem)
item = xml.dom.minidom.Element('item')
item.setAttribute('rdf:about', uri)
titleElement = xml.dom.minidom.Element('title')
titleElement.appendChild(text(title))
item.appendChild(titleElement)
descriptionElement = xml.dom.minidom.Element('description')
descriptionElement.appendChild(text(description))
item.appendChild(descriptionElement)
linkElement = xml.dom.minidom.Element('link')
linkElement.appendChild(text(link))
item.appendChild(linkElement)
creatorElements = channel.getElementsByTagName('dc:creator')
if len(creatorElements) == 1:
creatorElement = the(creatorElements).cloneNode(True)
item.appendChild(creatorElement)
dateElement = xml.dom.minidom.Element('dc:date')
dateElement.appendChild(text(date))
item.appendChild(dateElement)
contentElement = xml.dom.minidom.Element('content:encoded')
contentElement.appendChild(text(content))
item.appendChild(contentElement)
if itemElementsLength:
itemFirst = first(itemElements)
rdf.insertBefore(item, itemFirst)
else: rdf.appendChild(item)
return dom
def test(args=None):
import sys
if args is None:
args = sys.argv[1:]
if len(args):
basedir = args[0]
blog('test', 'This is a test file.', basedir=basedir)
else: print __doc__
if __name__=="__main__":
print __doc__