#!/usr/bin/python """ pwyky - A Simple Wiki in Python. Documentation: Author: Sean B. Palmer, License: GPL 2; share and enjoy! """ import sys, os, re, cgi, glob, time from cStringIO import StringIO from HTMLParser import HTMLParser config = {} configvars = ('default', 'sname', 'lname', 'pedit', 'owner', 'logdir') r_meta = re.compile('(?s)([^\n:]+): (.*?)(?=\n[^ \t]|\Z)') if os.path.exists('config.txt'): meta = r_meta.findall(open('config.txt').read()) for key, value in filter(lambda (k, v): k in configvars, meta): value = value.strip() if value in ('0', 'None', 'False'): value = False config[key] = value default = config.get('default', 'index') shortname = config.get('sname', 'pwyky') longname = config.get('lname', 'my pwyky site') pedit = config.get('pedit', True) owner = config.get('owner', '%s owner' % os.environ.get('SERVER_NAME', 'Site')) logdir = config.get('logdir', False) nedit = 'edit' profile = 'http://infomesh.net/pwyky/profile#' s_name = os.environ.get('SCRIPT_NAME', '/notes/index.cgi') base, script = s_name[:s_name.rfind('/')], s_name[s_name.rfind('/')+1:] if not os.path.exists('.htaccess'): f = open('.htaccess', 'w') print >> f, 'DirectoryIndex %s' % script print >> f, 'Options -MultiViews' print >> f, 'RewriteEngine on' print >> f, 'RewriteBase %s' % base print >> f, 'RewriteRule ^@[a-z]+/([A-Za-z0-9-]+)$ %s [L]' % script print >> f, 'RewriteRule ^([A-Za-z0-9-]+)$ %s [L]' % script print >> f, 'RewriteRule ^([A-Za-z0-9-]+)\.html$ - [L]' f.close() if not os.path.exists('style.css'): from base64 import decodestring from zlib import decompress f = open('style.css', 'w') print >> f, decompress(decodestring(""" eJyNUs1OwzAMvu8prHIBiVTdJpDaHTjuAeAFvNZdI9K4JN7oQLw72dLujx04pIrc78+xV1zt4Bsm ANCiW2tbwDR9ohZm4cTb4bs4QGq2ompstdkVsCQOBHwEj9YrT07XAfUzmTTTR2hmg+oF5Q0bbgMj WWpj4DXwkiv6yPgkvW6kAMuuRRN1saoceX8u7PUXFeADxERyF0DarpVwV0CWzo/RV+wqcrE+63rw bHQFd9lzfubqZWfotimmVGk5996i02hlsFcldj5SKr1NccWbI9owBpjbd7S4zp6lecy490otW+q1 F7IjuWTDrghBs+wGqMBayA3QLTnRJRqFRq/DIEOvf+x6dfZYZajvRSB5SWJFqBdVUckORbPdP4Wl 6Ns5Cj5xSZShWopxM8LPleHy/WPDQkOWcQ4RmKXXcxjr87BgwyjyvIxOJ7GL9m4P/NSDUgmgiLsv tdDDtRTE/P/e4l/8SPME""")) f.close() class Parser(object): EOF = 0 def __init__(self, write=None, error=None): self.text = None self.pos = 0 if write is None: write = sys.stdout.write self.write = write if error is None: # example: sys.stderr.write("%s: %s" % (e, msg)) error = lambda e, msg: None self.error = error def find(self, tokens): """For each token in tokens, if the current position matches one of those tokens, return True. Return False otherwise.""" for token in tokens: if token == self.EOF: if self.pos == len(self.text): return True elif self.text[self.pos:].startswith(token): return True return False def eat(self, token): """Eat the length of token if token's an int, or the token itself.""" if type(token) is int: if (self.pos + token) > len(self.text): self.error("UnexpectedEOF", "Reached end of file") return None s = self.text[self.pos:self.pos+token] self.pos += token return s else: assert self.find([token]) self.pos += len(token) if self.pos > len(self.text): self.error("UnexpectedEOF", "Reached end of file") return None return token def get(self, tokens, start=None, finish=None): if start is not None: self.eat(start) content = '' while not self.find(tokens): s = self.eat(1) if s is not None: content += s else: return content # reached EOF if finish is not None: self.eat(finish) return content r_tag = re.compile(r'(?"]+$') r_emdash = re.compile(r'[A-Za-z0-9"]--(?=[A-Za-z0-9"{])') r_alpha = re.compile(r'[A-Za-z]+') def makeID(s, current): s = (''.join(r_alpha.findall(s)) or 'id') + str(len(s)) while s in current: s += 'n' return s class TextParser(Parser): LIST = 0 OLIST = 1 HEADING = 2 PRE = 3 QUOT = 4 PARAGRAPH = 5 HR = 6 LI_START = '- ' LI_OPEN = '\n- ' OL_START = '# ' OL_OPEN = '\n# ' PRE_START = '{{{\n' PRE_END = '\n}}}' QUOT_START = '[[[\n' QUOT_END = '\n]]]' H_START = '@ ' HR_START = '--' SEPERATOR = '\n\n' def __init__(self, write=None, error=None, exists=None): Parser.__init__(self, write=write, error=error) if exists is None: exists = lambda: True self.exists = exists self.rawlinks = [] self.ids = [] def __call__(self, s): self.text = s self.normalize() self.parse() def normalize(self): self.text = self.text.strip() # ('\t\r\n ') self.text = self.text.replace('\r\n', '\n') self.text = self.text.replace('\r', '\n') self.text = re.sub(r'(?sm)\n[ \t]*\n', '\n\n', self.text) def parse(self): blocks = [] while 1: blocks.append(self.blockElement()) if self.find([Parser.EOF]): break for block in blocks: blocktype, values = block[0], block[1:] {self.LIST: self.listElement, self.OLIST: self.olistElement, self.HEADING: self.headingElement, self.HR: self.hrElement, self.PRE: self.preElement, self.QUOT: self.quotElement, self.PARAGRAPH: self.paragraphElement }[blocktype](*values) def blockElement(self): self.whitespace() if self.find([self.LI_START]): content = self.get([self.SEPERATOR, Parser.EOF], self.LI_START) content = tuple(content.split('\n- ')) return (self.LIST,) + content elif self.find([self.OL_START]): content = self.get([self.SEPERATOR, Parser.EOF], self.OL_START) content = tuple(content.split('\n# ')) return (self.OLIST,) + content elif self.find([self.H_START]): content = self.get(['\n', Parser.EOF], self.H_START) return (self.HEADING, content) elif self.find([self.HR_START]): content = self.get(['\n', Parser.EOF], self.HR_START) return (self.HR, content) elif self.find([self.PRE_START]): content = self.get([self.PRE_END], self.PRE_START, self.PRE_END) return (self.PRE, content) elif self.find([self.QUOT_START]): content = self.get([self.QUOT_END], self.QUOT_START, self.QUOT_END) if self.find([' - ']): citation = self.get(['\n', Parser.EOF], ' - ') if not (r_uri.match(citation) and citation): self.error('CitationURIError', # @@ allow other stuff? 'Citation (%s) must be a URI.' % citation) else: citation = None return (self.QUOT, content, citation) else: return (self.PARAGRAPH, self.get([self.SEPERATOR, Parser.EOF])) def whitespace(self): while self.find(' \t\n'): self.eat(1) def listElement(self, *items): self.write('') self.write('\n') def olistElement(self, *items): self.write('
    ') self.write('\n') for item in items: self.write('
  1. ') self.write(self.wikiParse(item)) self.write('
  2. ') self.write('\n') self.write('
') self.write('\n') def headingElement(self, content): content = self.wikiParse(content) newid = makeID(content, self.ids) self.ids.append(newid) self.write('

' % newid) self.write(content) self.write('

') self.write('\n') def hrElement(self, content): if (not content) or (not content.strip('-')): self.write('
') self.write('\n') else: self.paragraphElement(self.HR_START + content) def preElement(self, content): self.write('
')

      self.write('\n')
      self.write(self.wikiParse(content, level=0))
      self.write('\n')

      self.write('
') self.write('\n') def quotElement(self, content, cite): self.write('') self.write('\n') self.write('
') # @@
      self.write('\n')
      self.write(self.wikiParse(content, level=0))
      self.write('\n')
      self.write('
') self.write('\n') self.write('') self.write('\n') def paragraphElement(self, content): self.write('

') self.write(self.wikiParse(content)) self.write('

') self.write('\n') def wikiParse(self, s, level=None): if level is None: level = 1 # @@ use a proper parser, or catch the matches pos, result = 0, '' while pos < len(s): m = r_tag.match(s[pos:]) if m: span = m.span() result += self.tag(s[pos:pos+span[1]], level=level) pos += span[1] - span[0] else: m = r_emdash.match(s[pos:]) if m and (level > 0): # unicode must be explicit in
               result += s[pos] + '—' # u'\u2014'.encode('utf-8')
               pos += 3
            elif (s[pos] == '{') and (s[pos+1:pos+2] != '{') and (level > 0): 
               if (s < 10): area = s[0:pos+10]
               else: area = s[pos-10:pos+10]
               msg = "The '{' must be escaped as '{{' in %r" % area
               raise "WikiParseError", msg
            elif (s[pos:pos+2] == '{{'): # d8uv bug "and (level > 0): "
               result += '{'
               pos += 2
            elif s[pos] == '&': 
               result += '&'
               pos += 1
            elif s[pos] == '<': 
               result += '<'
               pos += 1
            else: 
               result += s[pos]
               pos += 1
      return result

   def iriParse(self, uri): 
      r_unicode = re.compile(r'\{U\+([1-9A-F][0-9A-F]{1,5})\}')
      def escape(m): 
         bytes = unichr(int(m.group(1), 16)).encode('utf-8')
         return ''.join(['%%%02X' % ord(s) for s in bytes])
      return r_unicode.sub(escape, uri)

   def unicodeify(self, s): 
      if len(s) not in (2, 4, 6): 
         raise ValueError, 'Must be of length 2, 4, or 6'
      for letter in 'abcdef': 
         if letter in s: 
            raise ValueError, 'Unicode escapes must be lower-case'
      i = int(s.lstrip('0'), 16)
      raw = [0x9, 0xA, 0xD] + list(xrange(0x20, 0x7E))
      del raw[raw.index(0x2D)], raw[raw.index(0x5D)], raw[raw.index(0x7D)]
      if i in raw: return chr(i) # printable - '-]}'
      elif i > 0x10FFFF: 
         raise ValueError, 'Codepoint is out of range'
      return '&#x%s;' % s

   def tag(self, s, level=None): 
      if level is None: 
         level = 1 # @@ { {U+..}?
      s = s[1:-1] # @@ or s.strip('{}')
      if s.startswith('U+'): 
         try: result = self.unicodeify(s[2:])
         except ValueError: result = cgi.escape('{%s}' % s)
      elif s == '$timenow': 
         result = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
      elif s == '$datenow': 
         result = time.strftime('%Y-%m-%d', time.gmtime())
      elif level < 1: 
         result = '{' + self.wikiParse('%s}' % s)
      elif s.startswith('* '): 
         result = '%s' % s[2:]
      elif s.startswith('#'): 
         i = s.find(' ')
         href, title = s[:i], s[i+1:]
         result = '%s' % (href, title)
      elif not re.compile(r'[A-Za-z0-9_.-]').match(s): 
         result = cgi.escape('{%s}' % s)
      else: 
         self.rawlinks.append(s)
         words = s.split(' ')
         words = [word.strip() for word in words if word.strip()]
         if ('/' not in words[0]) and (':' not in words[0]): # @@!
            wn = ''.join(words)
            uri = './%s' % wn
            if not self.exists(wn): 
               cls = ' class="nonexistent"'
            else: cls = ''
         else: uri, s, cls = words[0], ' '.join(words[1:]), ''
         uri, s = cgi.escape(uri, quote=1), cgi.escape(s)
         result = '%s' % (uri, cls, s)
      return result

def wikiParse(s, getlinks=False): 
   output = StringIO()
   parse = TextParser(write=output.write, 
                      exists=lambda wn: os.path.exists(wn + '.html'))
   parse(s)
   output.flush()
   output.seek(0)
   if getlinks: 
      return output.read(), parse.rawlinks
   return output.read()

class Wikify(HTMLParser): 
   def __init__(self, write=None): 
      HTMLParser.__init__(self)
      if write is None: 
         self.write = sys.stdout.write
      else: self.write = write
      self.content = False
      self.block = False
      self.list = False
      self.blockquote = False
      self.anchor = False
      self.current = None

   def handle_starttag(self, tag, attrs): 
      self.current = tag
      attrs = dict(attrs)

      xhtmlxmlns = 'http://www.w3.org/1999/xhtml'
      if (tag == 'html') and (attrs.get('xmlns') != xhtmlxmlns): 
         raise "ParseError", "document is not XHTML"
      elif (tag == 'head') and (attrs.get('profile') != profile): 
         raise "ParseError", "document has incorrect profile"
      elif (tag == 'div') and (attrs.get('class') == 'content'): 
         self.content = True

      if tag in ('ul', 'ol'): 
         self.list = tag

      if self.content: 
         if tag in ('p', 'li', 'h1', 'h2', 'pre'): 
            self.block = True

         if tag == 'li': 
            if self.list == 'ul': 
               self.write('- ')
            else: self.write('# ')
         elif tag in ('h1', 'h2'): 
            self.write('@ ')
         elif tag == 'hr': 
            self.write('--\n\n')
         elif tag == 'pre' and not self.blockquote: 
            self.write('{{{')
         elif tag == 'blockquote': 
            self.blockquote = attrs
            self.write('[[[')
         elif tag == 'strong': 
            self.write('{* ')
         elif tag == 'a': 
            self.anchor = attrs
            self.anchor['_'] = ''

   def handle_endtag(self, tag): 
      self.current = None

      if tag in ('ul', 'ol'): 
         self.list = False

      if self.content: 
         if tag in ('p', 'li', 'h1', 'h2', 'pre'): 
            self.block = False

         if tag in ('p', 'h1', 'h2'): 
            self.write('\n\n')
         elif tag in ('ul', 'ol', 'li'): 
            self.write('\n')
         elif tag == 'pre' and not self.blockquote: 
            self.write('}}}\n\n')
         elif tag == 'blockquote': 
            self.write(']]]')
            cite = self.blockquote.get('cite', None)
            if cite is not None: 
               self.write(' - %s' % cite)
            self.write('\n\n')
            self.blockquote = False
         elif tag == 'a': 
            attrs, dual = self.anchor, True
            uri, title = attrs.get('href', ''), attrs.get('_', '')
            stuff = [w.strip() for w in title.split(' ') if w.strip()]
            stitle = ''.join(stuff)
            if uri.startswith('./'): 
               wn = uri[2:]
               if r_name.match(wn): 
                  if wn == stitle: 
                     dual = False
            if not dual: self.write('{%s}' % title)
            else: self.write('{%s %s}' % (uri, title))
            self.anchor = False
         elif tag == 'strong': 
            self.write('}')
         elif tag == 'div': 
            self.content = False

   def handle_data(self, data): 
      if self.current in ('p', 'li', 'h1', 'h2', 'pre'): # d8uv, pre added
         data = data.replace('{', '{{')

      if self.content and self.block: 
         if not self.anchor: 
            self.write(data)
         else: self.anchor['_'] += data

   def handle_charref(self, name): 
      if self.content and self.block: 
         if name.startswith('x'): 
            result = '{U+%s}' % name.lstrip('x')
         elif name == '8212': 
            result = '--'
         else: raise "ParseError", "Unknown character reference: %s" % name

         if not self.anchor: 
            self.write(result)
         else: self.anchor['_'] += result

   def handle_entityref(self, name): 
      if self.content and self.block: 
         entities = {'lt':'<', 'gt':'>', 'amp':'&', 'quot':'"'}
         result = entities.get(name, '?')

         if not self.anchor: 
            self.write(result)
         else: self.anchor['_'] += result

def wikify(s): 
   output = StringIO()
   parser = Wikify(write=output.write)
   parser.feed(s) # @@ except?
   output.flush()
   output.seek(0)
   return output.read()

def inboundLinks(wn): 
   if not os.path.exists('@links'): # @@ isDir?
      os.mkdir('@links')
   return [fn[9:-(len(wn)+1)] for fn in glob.glob('./@links/*%%%s' % wn)]

def outboundLinks(wn): 
   if not os.path.exists('@links'): # @@ isDir?
      os.mkdir('@links')
   return [fn[len(wn)+10:] for fn in glob.glob('./@links/%s%%*' % wn)]

def html(title, body): 
   s = '\n\n'
   s += '\n'
   s += '\n' % profile
   s += '%s\n' % title
   if '/' in os.environ.get('REQUEST_URI')[len(base)+1:]: # heh
      s += '\n'
   else: s += '\n'
   s += '\n'
   s += '\n'
   s += body + '\n'
   s += '\n'
   s += '\n'
   return s

def compile(wn, text, getlinks=False): 
   if getlinks: # @@ horky, but oh well
      content, rawlinks = wikiParse(text, getlinks=getlinks)
   else: content = wikiParse(text, getlinks=getlinks)

   s = '
(home | ') heading = content[:i][len('')] content = '%s' % (heading, content[i:]) heading = content[:i][j+1:-len('')] else: s += '

%s: %s

\n' % (shortname, wn) s += '
\n%s\n
\n\n' % content s += '
%s. This is a pwyky site.' if pedit: s += ' ' % (nedit, wn) s += 'Edit this document.' s += '
' if (heading is not None) and (heading != wn): title = '%s - %s' % (heading, wn) else: title = wn if getlinks: return html(title, s), rawlinks return html(title, s) def rebuild(fn): stat = os.stat(fn) atime, mtime = stat.st_atime, stat.st_mtime s = open(fn).read() try: s = wikify(s) except "ParseError", e: s = '\n\n' % e s += open(fn).read() else: s = compile(fn[:-len('.html')], s) open(fn, 'w').write(s) try: os.utime(fn, (atime, mtime)) except OSError: pass def get(wn): if os.path.exists(wn + '.html'): return open(wn + '.html').read() else: msg = '

%s

\n' % wn msg += '

This page does not yet exist.' if pedit: msg += ' Create it!' % (nedit, wn) msg += '

\n' return html('Create %s' % wn, msg) def edit(wn): if os.path.exists(wn + '.html'): try: s = wikify(open(wn + '.html').read()) except "ParseError", e: s = "Error: couldn't wikify source! (%s)\n" % e else: s = '' if wn == default: wn = '' return html('Editing %s' % (wn or default), '''
''' % (wn, cgi.escape(s))) def info(wn): if not os.path.exists(wn + '.html'): return "Page doesn't exist: %s" % wn results = [] for fn in glob.glob('*.html'): fn = fn[:-len('.html')] for title in outboundLinks(fn): words = title.split(' ') words = [word.strip() for word in words if word.strip()] if ('/' not in words[0]) and (':' not in words[0]): if ''.join(words) == wn: results.append(fn) # @@ break r = ['- {../%s %s} ({../@info/%s @info})' % (f, f, f) for f in results] try: content = wikify(open(wn + '.html').read()) except "ParseError": content = open(wn + '.html').read() t = os.stat(wn + '.html').st_mtime lastmod = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(t)) s = '

About %s

\n' % (wn, wn) if os.path.exists(wn + '.prev'): pt = os.stat(wn + '.html').st_mtime plastmod = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(pt)) s += '

Previous ' % wn s += 'version (%s).

\n' % plastmod s += '

' s += ' ' % wn s += '\n' s += '

\n' else: s += '

(This is the first version).

\n' s += '

Statistics

\n' s += '
  • Characters: %s
  • \n' % len(content) s += '
  • Word count: %s
  • \n' % len(content.split(' ')) s += '
  • Last-modified: %s
  • \n
\n' % lastmod if r: s += '

Inbound Links

\n' s += wikiParse('\n'.join(r)) + '\n' else: s += '

This page has no inbound links.

\n' s += '
%s. Nearby: home.
\n' % owner return html('Information About %s' % wn, s) def meta(wn): if wn == 'about': pages = { 'about': 'This page.', 'archive': 'Create a .tar.gz of the whole site.', 'diff': 'Find differences between a page and its previous version.', 'grep': 'Grep (search) the text in the site.', 'names': 'Provide a list of all pages.', 'needed': 'List of pages that are linked to but not yet made.', 'todo': 'List of todo items in the site.', 'unlinked': 'Pages that are not linked to elsewhere.', 'updates': 'Shows the most recent changes.' }.items() pages.sort() pages = ['- {./%s %s} - %s' % (k, k, v) for k, v in pages] pagelist = wikiParse('\n'.join(pages)) s = '

@meta: About This Site

\n' s += '

This site is a pwyky installation (All pages have been rebuilt.

\n') elif wn == 'todo': todo = '@@' r = '(?sm)%s[ \n].+?(?:\.(?=[ \n<])|\?(?=[ \n<])|.(?=<)|\n\n)' % todo r_regexp = re.compile(r) results = {} for fn in glob.glob('*.html'): for line in open(fn).read().splitlines(): find = r_regexp.findall(line) if find: if results.has_key(fn): results[fn] += find else: results[fn] = find results = results.items() results.sort() s = '

Todo Items

\n' s += '
\n' for (fn, found) in results: wn = fn[:-len('.html')] s += '
%s
\n' % (wn, wn) for find in found: s += '
%s
\n' % find s += '
\n' s += '
%s. Nearby: > f, t, method, path, addr, referer f.close() if path == '/': path = '/%s' % default if not path.startswith('/@'): action = 'get' wn = path[len('/'):] else: i = path.rfind('/') action = path[2:i] wn = path[(i+1):] if (not r_name.match(wn)) and (wn != script): raise 'ScriptError', 'Invalid filename: %s' % wn if wn == script: print '

Welcome to this pwyky site. ' print 'Try: home page.

' elif action == 'get': if method == 'POST': post(wn) print get(wn) elif action == nedit: print edit(wn) elif action == 'info': print info(wn) elif action == 'meta': print meta(wn) else: print '

Unknown action: %s

' % action def run(argv=None): if argv is None: argv = sys.argv[1:] if argv: if argv[0] in ('-h', '--help'): print __doc__.lstrip() else: if argv[0] in ('-w', '--wikify'): func = wikify elif argv[0] in ('-p', '--parse'): func = wikiParse if len(argv) > 1: import urllib s = urllib.urlopen(argv[1]).read() else: s = sys.stdin.read() sys.stdout.write(func(s)) if __name__=='__main__': if os.environ.has_key('SCRIPT_NAME'): try: main() except: cgi.print_exception() else: run()