#!/usr/bin/python
"""
wiki.py - A Simple Python Wiki
Author: Sean B. Palmer, inamidst.com
"""

import cgitb; cgitb.enable()
import sys, os, re, cgi, glob, time
from HTMLParser import HTMLParser
try: from cStringIO import StringIO
except ImportError: 
   from StringIO import StringIO

__version__ = str(time.time())

class Parser(object): 
   EOF = 0

   def __init__(self, write=None, error=None): 
      self.text = None
      self.pos = 0

      if write is None: 
         write = sys.stdout.write
      self.write = write

      if error is None: 
         # example: sys.stderr.write("%s: %s" % (e, msg))
         error = lambda e, msg: None
      self.error = error

   def find(self, tokens): 
      """For each token in tokens, if the current position matches one of 
         those tokens, return True. Return False otherwise."""
      for token in tokens: 
         if token == self.EOF: 
            if self.pos == len(self.text): 
               return True
         elif self.text[self.pos:].startswith(token): 
            return True
      return False

   def eat(self, token): 
      """Eat the length of token if token's an int, or the token itself."""
      if type(token) is int: 
         if (self.pos + token) > len(self.text): 
            self.error("UnexpectedEOF", "Reached end of file")
            return None
         s = self.text[self.pos:self.pos+token]
         self.pos += token
         return s
      else: 
         assert self.find([token])
         self.pos += len(token)
         if self.pos > len(self.text): 
            self.error("UnexpectedEOF", "Reached end of file")
            return None
         return token

   def get(self, tokens, start=None, finish=None): 
      if start is not None: 
         self.eat(start)
      content = ''
      while not self.find(tokens): 
         s = self.eat(1)
         if s is not None: 
            content += s
         else: return content # reached EOF
      if finish is not None: 
         self.eat(finish)
      return content

r_tag = re.compile(r'(?<!\{)\{(?!\{)([^}]+)\}')
r_name = re.compile(r'^[A-Za-z0-9-]+$')
r_uri = re.compile(r'^[A-Za-z][A-Za-z0-9+.-]*:[^<>"]+$')
r_emdash = re.compile(r'[A-Za-z0-9"]--(?=[A-Za-z0-9"{])')
r_alpha = re.compile(r'[A-Za-z]+')

def makeID(s, current): 
   s = (''.join(r_alpha.findall(s)) or 'id') + str(len(s))
   while s in current: 
      s += 'n'
   return s

class TextParser(Parser): 
   LIST = 0
   HEADING = 1
   PRE = 2
   QUOT = 3
   PARAGRAPH = 4

   LI_START = '* '
   LI_OPEN = '\n* '
   PRE_START = '{{{\n'
   PRE_END = '\n}}}'
   QUOT_START = '[[[\n'
   QUOT_END = '\n]]]'
   H_START = '@ '
   SEPERATOR = '\n\n'

   def __init__(self, write=None, error=None): 
      Parser.__init__(self, write=write, error=error)

      self.rawlinks = []
      self.ids = []

   def __call__(self, s): 
      self.text = s
      self.normalize()
      self.parse()

   def normalize(self): 
      self.text = self.text.strip() # ('\t\r\n ')
      self.text = self.text.replace('\r\n', '\n')
      self.text = self.text.replace('\r', '\n')
      self.text = re.sub(r'(?sm)\n[ \t]*\n', '\n\n', self.text)

   def parse(self): 
      blocks = []

      while 1: 
         blocks.append(self.blockElement())
         if self.find([Parser.EOF]): break

      for block in blocks: 
         blocktype, values = block[0], block[1:]
         {self.LIST: self.listElement, 
          self.HEADING: self.headingElement, 
          self.PRE: self.preElement, 
          self.QUOT: self.quotElement, 
          self.PARAGRAPH: self.paragraphElement
         }[blocktype](*values)

   def blockElement(self): 
      self.whitespace()

      if self.find([self.LI_START]): 
         content = self.get([self.SEPERATOR, Parser.EOF], self.LI_START)
         content = tuple(content.split('\n* '))
         return (self.LIST,) + content
      elif self.find([self.H_START]): 
         content = self.get(['\n', Parser.EOF], self.H_START)
         return (self.HEADING, content)
      elif self.find([self.PRE_START]): 
         content = self.get([self.PRE_END], self.PRE_START, self.PRE_END)
         return (self.PRE, content)
      elif self.find([self.QUOT_START]): 
         content = self.get([self.QUOT_END], self.QUOT_START, self.QUOT_END)
         if self.find([' - ']): 
            citation = self.get(['\n', Parser.EOF], ' - ')
            if not (r_uri.match(citation) and citation): 
               self.error('CitationURIError', # @@ allow other stuff?
                          'Citation (%s) must be a URI.' % citation)
         else: citation = None
         return (self.QUOT, content, citation)
      else: return (self.PARAGRAPH, self.get([self.SEPERATOR, Parser.EOF]))

   def whitespace(self): 
      while self.find(' \t\n'): 
         self.eat(1)

   def listElement(self, *items): 
      self.write('<ul>')
      self.write('\n')

      for item in items: 
         self.write('<li>')
         self.write(self.wikiParse(item))
         self.write('</li>')
         self.write('\n')

      self.write('</ul>')
      self.write('\n')

   def headingElement(self, content): 
      content = self.wikiParse(content)

      newid = makeID(content, self.ids)
      self.ids.append(newid)

      self.write('<h2 id="%s">' % newid)
      self.write(content)
      self.write('</h2>')
      self.write('\n')

   def preElement(self, content): 
      self.write('<pre>')

      self.write('\n')
      self.write(self.wikiParse(content, level=0))
      self.write('\n')

      self.write('</pre>')
      self.write('\n')

   def quotElement(self, content, cite): 
      self.write('<blockquote')
      if cite: 
         cite = self.iriParse(cite)
         cite = cgi.escape(cite, quote=1) # @@
         self.write(' cite="%s"' % cite)
      self.write('>')
      self.write('\n')

      self.write('<pre class="quote">') # @@
      self.write('\n')
      self.write(self.wikiParse(content, level=0))
      self.write('\n')
      self.write('</pre>')
      self.write('\n')

      self.write('</blockquote>')
      self.write('\n')

   def paragraphElement(self, content): 
      self.write('<p>')
      self.write(self.wikiParse(content))
      self.write('</p>')
      self.write('\n')

   def wikiParse(self, s, level=None): 
      if level is None: 
         level = 1
      # @@ use a proper parser, or catch the matches
      pos, result = 0, ''
      while pos < len(s): 
         m = r_tag.match(s[pos:])
         if m: 
            span = m.span()
            result += self.tag(s[pos:pos+span[1]], level=level)
            pos += span[1] - span[0]
         else: 
            m = r_emdash.match(s[pos:])
            if m and (level > 0): # unicode must be explicit in <pre>
               result += s[pos] + '&#8212;' # u'\u2014'.encode('utf-8')
               pos += 3
            elif (s[pos] == '{') and (s[pos+1:pos+2] != '{') and (level > 0): 
               if (s < 10): area = s[0:pos+10]
               else: area = s[pos-10:pos+10]
               msg = "The '{' must be escaped as '{{' in %r" % area
               raise "WikiParseError", msg
            elif (s[pos:pos+2] == '{{'): # d8uv bug "and (level > 0): "
               result += '{'
               pos += 2
            elif s[pos] == '&': 
               result += '&amp;'
               pos += 1
            elif s[pos] == '<': 
               result += '&lt;'
               pos += 1
            else: 
               result += s[pos]
               pos += 1
      return result

   def iriParse(self, uri): 
      r_unicode = re.compile(r'\{U\+([1-9A-F][0-9A-F]{1,5})\}')
      def escape(m): 
         bytes = unichr(int(m.group(1), 16)).encode('utf-8')
         return ''.join(['%%%02X' % ord(s) for s in bytes])
      return r_unicode.sub(escape, uri)

   def unicodeify(self, s): 
      if len(s) not in (2, 4, 6): 
         raise ValueError, 'Must be of length 2, 4, or 6'
      for letter in 'abcdef': 
         if letter in s: 
            raise ValueError, 'Unicode escapes must be lower-case'
      i = int(s.lstrip('0'), 16)
      raw = [0x9, 0xA, 0xD] + list(xrange(0x20, 0x7E))
      del raw[raw.index(0x2D)], raw[raw.index(0x5D)], raw[raw.index(0x7D)]
      if i in raw: return chr(i) # printable - '-]}'
      elif i > 0x10FFFF: 
         raise ValueError, 'Codepoint is out of range'
      return '&#x%s;' % s

   def tag(self, s, level=None): 
      if level is None: 
         level = 1 # @@ { {U+..}?
      s = s[1:-1] # @@ or s.strip('{}')
      if s.startswith('U+'): 
         try: result = self.unicodeify(s[2:])
         except ValueError: result = cgi.escape('{%s}' % s)
      elif s == '$timenow': 
         result = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
      elif s == '$datenow': 
         result = time.strftime('%Y-%m-%d', time.gmtime())
      elif level < 1: 
         result = '{' + self.wikiParse('%s}' % s)
      elif s.startswith('* '): 
         result = '<strong>%s</strong>' % s[2:]
      elif s.startswith('#'): 
         i = s.find(' ')
         href, title = s[:i], s[i+1:]
         result = '<a href="%s">%s</a>' % (href, title)
      elif not re.compile(r'[A-Za-z0-9/_.-]').match(s): 
         result = cgi.escape('{%s}' % s)
      else: 
         self.rawlinks.append(s)
         words = s.split(' ')
         words = [word.strip() for word in words if word.strip()]
         if ('/' not in words[0]) and (':' not in words[0]): # @@!
            wikiname = ''.join(words)
            uri = './%s' % wikiname
         else: uri, s = words[0], ' '.join(words[1:])
         uri, s = cgi.escape(uri, quote=1), cgi.escape(s)
         result = '<a href="%s">%s</a>' % (uri, s)
      return result

def wikiParse(s): 
   output = StringIO()
   parse = TextParser(write=output.write)
   parse(s)
   output.flush()
   output.seek(0)
   return output.read()

class Wikify(HTMLParser): 
   def __init__(self, write=None): 
      HTMLParser.__init__(self)
      if write is None: 
         self.write = sys.stdout.write
      else: self.write = write
      self.content = False
      self.block = False
      self.blockquote = False
      self.anchor = False
      self.current = None
      self.heading = False

   def handle_starttag(self, tag, attrs): 
      self.current = tag
      attrs = dict(attrs)

      xhtmlxmlns = 'http://www.w3.org/1999/xhtml'
      if (tag == 'html') and (attrs.get('xmlns') != xhtmlxmlns): 
         raise "ParseError", "document is not XHTML"
      elif (tag == 'div') and (attrs.get('class') == 'content'): 
         self.content = True

      if self.content: 
         if tag in ('p', 'li', 'h1', 'h2', 'pre'): 
            self.block = True

         if tag == 'li': 
            self.write('* ')
         elif tag in ('h1', 'h2'): 
            self.write('@ ')
         elif tag == 'pre' and not self.blockquote: 
            self.write('{{{')
         elif tag == 'blockquote': 
            self.blockquote = attrs or True
            self.write('[[[')
         elif tag == 'strong': 
            self.write('{* ')
         elif tag == 'a': 
            self.anchor = attrs
            self.anchor['_'] = ''
      elif tag == 'h1': 
         self.write('@ ')
         self.heading = True

   def handle_endtag(self, tag): 
      self.current = None

      if self.content: 
         if tag in ('p', 'li', 'h1', 'h2', 'pre'): 
            self.block = False

         if tag in ('p', 'h1', 'h2'): 
            self.write('\n\n')
         elif tag in ('ul', 'li'): 
            self.write('\n')
         elif tag == 'pre' and not self.blockquote: 
            self.write('}}}\n\n')
         elif tag == 'blockquote': 
            self.write(']]]')
            if hasattr(self.blockquote, 'get'): 
               cite = self.blockquote.get('cite', None)
               if cite is not None: self.write(' - %s' % cite)
            self.write('\n\n')
            self.blockquote = False
         elif tag == 'a': 
            attrs, dual = self.anchor, True
            uri, title = attrs.get('href', ''), attrs.get('_', '')
            stuff = [w.strip() for w in title.split(' ') if w.strip()]
            stitle = ''.join(stuff)
            if uri.startswith('./'): 
               wikiname = uri[2:]
               if r_name.match(wikiname): 
                  if wikiname == stitle: 
                     dual = False
            if not dual: self.write('{%s}' % title)
            else: self.write('{%s %s}' % (uri, title))
            self.anchor = False
         elif tag == 'strong': 
            self.write('}')
         elif tag == 'div': 
            self.content = False
      elif tag == 'h1': 
         self.write('\n\n')
         self.heading = False

   def handle_data(self, data): 
      if self.current in ('p', 'li', 'h1', 'h2', 'pre'): # d8uv, pre added
         data = data.replace('{', '{{')

      if (self.content and self.block) or self.heading: 
         if not self.anchor: 
            self.write(data)
         else: self.anchor['_'] += data

   def handle_charref(self, name): 
      if (self.content and self.block) or self.heading: 
         if name.startswith('x'): 
            result = '{U+%s}' % name.lstrip('x')
         elif name == '8212': 
            result = '--'
         else: raise "ParseError", "Unknown character reference: %s" % name

         if not self.anchor: 
            self.write(result)
         else: self.anchor['_'] += result

   def handle_entityref(self, name): 
      if (self.content and self.block) or self.heading: 
         entities = {'lt':'<', 'gt':'>', 'amp':'&', 'quot':'"'}
         result = entities.get(name, '?')

         if not self.anchor: 
            self.write(result)
         else: self.anchor['_'] += result

def wikify(s): 
   output = StringIO()
   parser = Wikify(write=output.write)
   parser.feed(s) # @@ except?
   output.flush()
   output.seek(0)
   return output.read()

def html(title, body): 
   return (
      '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" >', '', 
      '<html xmlns="http://www.w3.org/1999/xhtml">', 
      '<head>', 
      '<title>%s</title>' % title, 
      '<link rel="stylesheet" type="text/css" href="style.css" />', 
      '</head>', 
      '<body>\n%s\n</body>' % body, 
      '</html>', ''
   )

def compile(wikiname, text): 
   content = wikiParse(text)

   s = ''
   heading = None
   if content.startswith('<h2'): 
      i = content.find('\n')
      if i < 0: i = len(content)
      j = content.find('>')
      heading = content[:i][len('<h2'):-len('</h2>')]
      s += '<h1%s</h1>\n\n' % heading
      content = content[i:].lstrip('\r\n')
      heading = s[:i][j+1:-len('</h1>')]
   else: s += '<h1>%s</h1>\n' % (wikiname)

   s += '<div class="content">\n%s\n</div>\n\n' % content
   s += '''
<address>
<a href="http://inamidst.com/sbp/">Sean B. Palmer</a>
</address>
'''
   if (heading is not None) and (heading != wikiname): 
      title = '%s - %s' % (heading, wikiname)
   else: title = wikiname
   return html(title, s)

def noop(): 
   # @@ Should we display some sort of user warning?
   pass

def get(wikiname): 
   filename = '%s.html' % wikiname
   if os.path.isfile(filename): 
      f = open(filename)
      for line in f: yield line
      f.close()
   else: 
      lines = ('<h1>%s</h1>\n<p>This page does not yet exist. ' % wikiname, 
               '<a href="./@edit/%s">Create it!</a></p>' % wikiname)
      for line in html('Create %s' % wikiname, '\n'.join(lines)): 
         yield line

def edit(wikiname): 
   filename = '%s.html' % wikiname
   if os.path.isfile(filename): 
      f = open(filename)
      data = f.read()
      try: text = wikify(data)
      except "ParseError", e: 
         text = "Error: couldn't wikify source! (%s)\n" % e
      f.close()
   else: text = ''

   if wikiname == 'index': 
      wikiname = ''

   lines = ('<form action="../%s" method="POST">' % wikiname, 
            '   <div><textarea rows="20" cols="80" ', 
            '     name="text">%s</textarea></div>' % cgi.escape(text), 
            '   <div><input type="submit" /></div>', 
            '</form>')
   return html('Editing %s' % (wikiname or 'index'), '\n'.join(lines))

def doPost(wikiname, text, append=False): 
   if append: 
      filename = '%s.html' % wikiname
      if os.path.isfile(filename): 
         f = open(filename)
         data = f.read()
         # try: 
         text = wikify(data) + text
         # except "ParseError", e:
         #    msg = "Error: couldn't wikify existing source! (%s)\n" % e
         #    text = msg + text
         f.close()

   if text: # If there's any content to the post...
      html = [(line + '\n') for line in compile(wikiname, text)]
      filename = '%s.html' % wikiname
      f = open(filename, 'w')
      f.writelines(html)
      f.close()
      try: os.chmod(filename, 0646)
      except OSError: pass
   elif (not append) and os.path.isfile('%s.html' % wikiname): 
      os.remove('%s.html' % wikiname)
   else: noop()

def post(wikiname): 
   form = cgi.FieldStorage()
   form.__call__ = lambda s: form[s].value

   text = form('text')
   if form.has_key('append'): 
      append = True
   else: append = False
   doPost(wikiname, text, append=append)

def main(env=None, out=None): 
   if env is None: env = os.environ
   if out is None: out = sys.stdout

   method = env.get('REQUEST_METHOD')
   if method not in frozenset(['GET', 'POST']): 
      raise ValueError('Unsupported method: %s' % method)

   out.write('Content-Type: text/html; charset=utf-8\r\n')
   out.write('\r\n')

   uri = env.get('REQUEST_URI')
   path, directory, filename = uri.rsplit('/', 2)
   wikiname = filename or 'index'
   if not r_name.match(wikiname): 
      raise ValueError('Invalid wikiname: %s' % wikiname)

   if directory != '@edit': 
      if method == 'POST': 
         post(wikiname)
      out.writelines(get(wikiname))
   else: out.writelines(edit(wikiname))

if __name__=='__main__': 
   main()