#!/usr/bin/python """ wiki.py - A Simple Python Wiki Author: Sean B. Palmer, inamidst.com """ import cgitb; cgitb.enable() import sys, os, re, cgi, glob, time from HTMLParser import HTMLParser try: from cStringIO import StringIO except ImportError: from StringIO import StringIO __version__ = str(time.time()) class Parser(object): EOF = 0 def __init__(self, write=None, error=None): self.text = None self.pos = 0 if write is None: write = sys.stdout.write self.write = write if error is None: # example: sys.stderr.write("%s: %s" % (e, msg)) error = lambda e, msg: None self.error = error def find(self, tokens): """For each token in tokens, if the current position matches one of those tokens, return True. Return False otherwise.""" for token in tokens: if token == self.EOF: if self.pos == len(self.text): return True elif self.text[self.pos:].startswith(token): return True return False def eat(self, token): """Eat the length of token if token's an int, or the token itself.""" if type(token) is int: if (self.pos + token) > len(self.text): self.error("UnexpectedEOF", "Reached end of file") return None s = self.text[self.pos:self.pos+token] self.pos += token return s else: assert self.find([token]) self.pos += len(token) if self.pos > len(self.text): self.error("UnexpectedEOF", "Reached end of file") return None return token def get(self, tokens, start=None, finish=None): if start is not None: self.eat(start) content = '' while not self.find(tokens): s = self.eat(1) if s is not None: content += s else: return content # reached EOF if finish is not None: self.eat(finish) return content r_tag = re.compile(r'(?"]+$') r_emdash = re.compile(r'[A-Za-z0-9"]--(?=[A-Za-z0-9"{])') r_alpha = re.compile(r'[A-Za-z]+') def makeID(s, current): s = (''.join(r_alpha.findall(s)) or 'id') + str(len(s)) while s in current: s += 'n' return s class TextParser(Parser): LIST = 0 HEADING = 1 PRE = 2 QUOT = 3 PARAGRAPH = 4 LI_START = '* ' LI_OPEN = '\n* ' PRE_START = '{{{\n' PRE_END = '\n}}}' QUOT_START = '[[[\n' QUOT_END = '\n]]]' H_START = '@ ' SEPERATOR = '\n\n' def __init__(self, write=None, error=None): Parser.__init__(self, write=write, error=error) self.rawlinks = [] self.ids = [] def __call__(self, s): self.text = s self.normalize() self.parse() def normalize(self): self.text = self.text.strip() # ('\t\r\n ') self.text = self.text.replace('\r\n', '\n') self.text = self.text.replace('\r', '\n') self.text = re.sub(r'(?sm)\n[ \t]*\n', '\n\n', self.text) def parse(self): blocks = [] while 1: blocks.append(self.blockElement()) if self.find([Parser.EOF]): break for block in blocks: blocktype, values = block[0], block[1:] {self.LIST: self.listElement, self.HEADING: self.headingElement, self.PRE: self.preElement, self.QUOT: self.quotElement, self.PARAGRAPH: self.paragraphElement }[blocktype](*values) def blockElement(self): self.whitespace() if self.find([self.LI_START]): content = self.get([self.SEPERATOR, Parser.EOF], self.LI_START) content = tuple(content.split('\n* ')) return (self.LIST,) + content elif self.find([self.H_START]): content = self.get(['\n', Parser.EOF], self.H_START) return (self.HEADING, content) elif self.find([self.PRE_START]): content = self.get([self.PRE_END], self.PRE_START, self.PRE_END) return (self.PRE, content) elif self.find([self.QUOT_START]): content = self.get([self.QUOT_END], self.QUOT_START, self.QUOT_END) if self.find([' - ']): citation = self.get(['\n', Parser.EOF], ' - ') if not (r_uri.match(citation) and citation): self.error('CitationURIError', # @@ allow other stuff? 'Citation (%s) must be a URI.' % citation) else: citation = None return (self.QUOT, content, citation) else: return (self.PARAGRAPH, self.get([self.SEPERATOR, Parser.EOF])) def whitespace(self): while self.find(' \t\n'): self.eat(1) def listElement(self, *items): self.write('') self.write('\n') def headingElement(self, content): content = self.wikiParse(content) newid = makeID(content, self.ids) self.ids.append(newid) self.write('

' % newid) self.write(content) self.write('

') self.write('\n') def preElement(self, content): self.write('
')

      self.write('\n')
      self.write(self.wikiParse(content, level=0))
      self.write('\n')

      self.write('
') self.write('\n') def quotElement(self, content, cite): self.write('') self.write('\n') self.write('
') # @@
      self.write('\n')
      self.write(self.wikiParse(content, level=0))
      self.write('\n')
      self.write('
') self.write('\n') self.write('') self.write('\n') def paragraphElement(self, content): self.write('

') self.write(self.wikiParse(content)) self.write('

') self.write('\n') def wikiParse(self, s, level=None): if level is None: level = 1 # @@ use a proper parser, or catch the matches pos, result = 0, '' while pos < len(s): m = r_tag.match(s[pos:]) if m: span = m.span() result += self.tag(s[pos:pos+span[1]], level=level) pos += span[1] - span[0] else: m = r_emdash.match(s[pos:]) if m and (level > 0): # unicode must be explicit in
               result += s[pos] + '—' # u'\u2014'.encode('utf-8')
               pos += 3
            elif (s[pos] == '{') and (s[pos+1:pos+2] != '{') and (level > 0): 
               if (s < 10): area = s[0:pos+10]
               else: area = s[pos-10:pos+10]
               msg = "The '{' must be escaped as '{{' in %r" % area
               raise "WikiParseError", msg
            elif (s[pos:pos+2] == '{{'): # d8uv bug "and (level > 0): "
               result += '{'
               pos += 2
            elif s[pos] == '&': 
               result += '&'
               pos += 1
            elif s[pos] == '<': 
               result += '<'
               pos += 1
            else: 
               result += s[pos]
               pos += 1
      return result

   def iriParse(self, uri): 
      r_unicode = re.compile(r'\{U\+([1-9A-F][0-9A-F]{1,5})\}')
      def escape(m): 
         bytes = unichr(int(m.group(1), 16)).encode('utf-8')
         return ''.join(['%%%02X' % ord(s) for s in bytes])
      return r_unicode.sub(escape, uri)

   def unicodeify(self, s): 
      if len(s) not in (2, 4, 6): 
         raise ValueError, 'Must be of length 2, 4, or 6'
      for letter in 'abcdef': 
         if letter in s: 
            raise ValueError, 'Unicode escapes must be lower-case'
      i = int(s.lstrip('0'), 16)
      raw = [0x9, 0xA, 0xD] + list(xrange(0x20, 0x7E))
      del raw[raw.index(0x2D)], raw[raw.index(0x5D)], raw[raw.index(0x7D)]
      if i in raw: return chr(i) # printable - '-]}'
      elif i > 0x10FFFF: 
         raise ValueError, 'Codepoint is out of range'
      return '&#x%s;' % s

   def tag(self, s, level=None): 
      if level is None: 
         level = 1 # @@ { {U+..}?
      s = s[1:-1] # @@ or s.strip('{}')
      if s.startswith('U+'): 
         try: result = self.unicodeify(s[2:])
         except ValueError: result = cgi.escape('{%s}' % s)
      elif s == '$timenow': 
         result = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
      elif s == '$datenow': 
         result = time.strftime('%Y-%m-%d', time.gmtime())
      elif level < 1: 
         result = '{' + self.wikiParse('%s}' % s)
      elif s.startswith('* '): 
         result = '%s' % s[2:]
      elif s.startswith('#'): 
         i = s.find(' ')
         href, title = s[:i], s[i+1:]
         result = '%s' % (href, title)
      elif not re.compile(r'[A-Za-z0-9/_.-]').match(s): 
         result = cgi.escape('{%s}' % s)
      else: 
         self.rawlinks.append(s)
         words = s.split(' ')
         words = [word.strip() for word in words if word.strip()]
         if ('/' not in words[0]) and (':' not in words[0]): # @@!
            wikiname = ''.join(words)
            uri = './%s' % wikiname
         else: uri, s = words[0], ' '.join(words[1:])
         uri, s = cgi.escape(uri, quote=1), cgi.escape(s)
         result = '%s' % (uri, s)
      return result

def wikiParse(s): 
   output = StringIO()
   parse = TextParser(write=output.write)
   parse(s)
   output.flush()
   output.seek(0)
   return output.read()

class Wikify(HTMLParser): 
   def __init__(self, write=None): 
      HTMLParser.__init__(self)
      if write is None: 
         self.write = sys.stdout.write
      else: self.write = write
      self.content = False
      self.block = False
      self.blockquote = False
      self.anchor = False
      self.current = None
      self.heading = False

   def handle_starttag(self, tag, attrs): 
      self.current = tag
      attrs = dict(attrs)

      xhtmlxmlns = 'http://www.w3.org/1999/xhtml'
      if (tag == 'html') and (attrs.get('xmlns') != xhtmlxmlns): 
         raise "ParseError", "document is not XHTML"
      elif (tag == 'div') and (attrs.get('class') == 'content'): 
         self.content = True

      if self.content: 
         if tag in ('p', 'li', 'h1', 'h2', 'pre'): 
            self.block = True

         if tag == 'li': 
            self.write('* ')
         elif tag in ('h1', 'h2'): 
            self.write('@ ')
         elif tag == 'pre' and not self.blockquote: 
            self.write('{{{')
         elif tag == 'blockquote': 
            self.blockquote = attrs or True
            self.write('[[[')
         elif tag == 'strong': 
            self.write('{* ')
         elif tag == 'a': 
            self.anchor = attrs
            self.anchor['_'] = ''
      elif tag == 'h1': 
         self.write('@ ')
         self.heading = True

   def handle_endtag(self, tag): 
      self.current = None

      if self.content: 
         if tag in ('p', 'li', 'h1', 'h2', 'pre'): 
            self.block = False

         if tag in ('p', 'h1', 'h2'): 
            self.write('\n\n')
         elif tag in ('ul', 'li'): 
            self.write('\n')
         elif tag == 'pre' and not self.blockquote: 
            self.write('}}}\n\n')
         elif tag == 'blockquote': 
            self.write(']]]')
            if hasattr(self.blockquote, 'get'): 
               cite = self.blockquote.get('cite', None)
               if cite is not None: self.write(' - %s' % cite)
            self.write('\n\n')
            self.blockquote = False
         elif tag == 'a': 
            attrs, dual = self.anchor, True
            uri, title = attrs.get('href', ''), attrs.get('_', '')
            stuff = [w.strip() for w in title.split(' ') if w.strip()]
            stitle = ''.join(stuff)
            if uri.startswith('./'): 
               wikiname = uri[2:]
               if r_name.match(wikiname): 
                  if wikiname == stitle: 
                     dual = False
            if not dual: self.write('{%s}' % title)
            else: self.write('{%s %s}' % (uri, title))
            self.anchor = False
         elif tag == 'strong': 
            self.write('}')
         elif tag == 'div': 
            self.content = False
      elif tag == 'h1': 
         self.write('\n\n')
         self.heading = False

   def handle_data(self, data): 
      if self.current in ('p', 'li', 'h1', 'h2', 'pre'): # d8uv, pre added
         data = data.replace('{', '{{')

      if (self.content and self.block) or self.heading: 
         if not self.anchor: 
            self.write(data)
         else: self.anchor['_'] += data

   def handle_charref(self, name): 
      if (self.content and self.block) or self.heading: 
         if name.startswith('x'): 
            result = '{U+%s}' % name.lstrip('x')
         elif name == '8212': 
            result = '--'
         else: raise "ParseError", "Unknown character reference: %s" % name

         if not self.anchor: 
            self.write(result)
         else: self.anchor['_'] += result

   def handle_entityref(self, name): 
      if (self.content and self.block) or self.heading: 
         entities = {'lt':'<', 'gt':'>', 'amp':'&', 'quot':'"'}
         result = entities.get(name, '?')

         if not self.anchor: 
            self.write(result)
         else: self.anchor['_'] += result

def wikify(s): 
   output = StringIO()
   parser = Wikify(write=output.write)
   parser.feed(s) # @@ except?
   output.flush()
   output.seek(0)
   return output.read()

def html(title, body): 
   return (
      '', '', 
      '', 
      '', 
      '%s' % title, 
      '', 
      '', 
      '\n%s\n' % body, 
      '', ''
   )

def compile(wikiname, text): 
   content = wikiParse(text)

   s = ''
   heading = None
   if content.startswith('')
      heading = content[:i][len('')]
      s += '\n\n' % heading
      content = content[i:].lstrip('\r\n')
      heading = s[:i][j+1:-len('')]
   else: s += '

%s

\n' % (wikiname) s += '
\n%s\n
\n\n' % content s += '''
Sean B. Palmer
''' if (heading is not None) and (heading != wikiname): title = '%s - %s' % (heading, wikiname) else: title = wikiname return html(title, s) def noop(): # @@ Should we display some sort of user warning? pass def get(wikiname): filename = '%s.html' % wikiname if os.path.isfile(filename): f = open(filename) for line in f: yield line f.close() else: lines = ('

%s

\n

This page does not yet exist. ' % wikiname, 'Create it!

' % wikiname) for line in html('Create %s' % wikiname, '\n'.join(lines)): yield line def edit(wikiname): filename = '%s.html' % wikiname if os.path.isfile(filename): f = open(filename) data = f.read() try: text = wikify(data) except "ParseError", e: text = "Error: couldn't wikify source! (%s)\n" % e f.close() else: text = '' if wikiname == 'index': wikiname = '' lines = ('
' % wikiname, '
' % cgi.escape(text), '
', '
') return html('Editing %s' % (wikiname or 'index'), '\n'.join(lines)) def doPost(wikiname, text, append=False): if append: filename = '%s.html' % wikiname if os.path.isfile(filename): f = open(filename) data = f.read() # try: text = wikify(data) + text # except "ParseError", e: # msg = "Error: couldn't wikify existing source! (%s)\n" % e # text = msg + text f.close() if text: # If there's any content to the post... html = [(line + '\n') for line in compile(wikiname, text)] filename = '%s.html' % wikiname f = open(filename, 'w') f.writelines(html) f.close() try: os.chmod(filename, 0646) except OSError: pass elif (not append) and os.path.isfile('%s.html' % wikiname): os.remove('%s.html' % wikiname) else: noop() def post(wikiname): form = cgi.FieldStorage() form.__call__ = lambda s: form[s].value text = form('text') if form.has_key('append'): append = True else: append = False doPost(wikiname, text, append=append) def main(env=None, out=None): if env is None: env = os.environ if out is None: out = sys.stdout method = env.get('REQUEST_METHOD') if method not in frozenset(['GET', 'POST']): raise ValueError('Unsupported method: %s' % method) out.write('Content-Type: text/html; charset=utf-8\r\n') out.write('\r\n') uri = env.get('REQUEST_URI') path, directory, filename = uri.rsplit('/', 2) wikiname = filename or 'index' if not r_name.match(wikiname): raise ValueError('Invalid wikiname: %s' % wikiname) if directory != '@edit': if method == 'POST': post(wikiname) out.writelines(get(wikiname)) else: out.writelines(edit(wikiname)) if __name__=='__main__': main()