#!/usr/bin/env python
"""
process - Juno HTML 5 Post-Processor
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

* Turns undelimited blocks into paragraphs
* Convert {... http://...} into links
* Converts & into &amp;, < into &lt;
* Converts |HTML| into quoted <code>HTML</code>
* Convert *...* into em, **...** into strong

http://inamidst.com/whits/code/juno/
"""

from __future__ import with_statement
import sys, re, shutil

r_link = re.compile(r'\{([^}]+?)[ \n]+(http://[^\s<>\'"}]+?)\}')

def convert_links(text): 
   return r_link.sub(r'<a href="\g<2>">\g<1></a>', text)

r_ampersand = re.compile(r'&(?!(#[0-9]+|#x[0-9A-Fa-f]+|[A-Za-z]+);)')

def convert_ampersands(text): 
   return r_ampersand.sub('&amp;', text)

r_lessthan = re.compile(r'<(?![A-Za-z0-9!?/])')

def convert_lessthans(text): 
   return r_lessthan.sub('&lt;', text)

r_code = re.compile(r'\|(<[^|]+>)\|')

def convert_code(text): 
   def code(m): 
      return '<code>' + m.group(1).replace('<', '&lt;') + '</code>'
   return r_code.sub(code, text)

r_emphasis = re.compile(r'\*(?=[\w\'<_.-])([^*]+)(?<=[\w\'>?!_.-])\*')

def convert_emphasis(text): 
   return r_emphasis.sub(r'<em>\g<1></em>', text)

r_strong = re.compile(r'\*\*(?=[\w\'<_.-])([^*]+)(?<=[\w\'>?!_.-])\*\*')

def convert_strong(text): 
   return r_strong.sub(r'<strong>\g<1></strong>', text)

def convert_phrasing(text): 
   text = convert_links(text)
   text = convert_ampersands(text)
   text = convert_lessthans(text)
   text = convert_code(text)
   text = convert_strong(text) # must come before emphasis
   return convert_emphasis(text)

def process_blocks(blocks, output): 
   block = []

   for obj in blocks: 
      if isinstance(obj, list): 
         for line in obj: 
            output.write(convert_phrasing(line))
         continue
      else: line = obj

      if line.strip(' \t\r\n'): 
         block.append(line)
      else: 
         if block: 
            content = convert_phrasing(''.join(block))
            if content.endswith('\n'): 
               content = content[:-1]
            output.write('<p>' + content + '</p>\n' + line)
         else: output.write(line)
         block = []
   if block: 
      content = convert_phrasing(''.join(block))
      if content.endswith('\n'): 
         content = content[:-1]
      output.write('<p>' + content + '</p>\n')

r_tag = re.compile(r'<([A-Za-z0-9]+)')

def process(input, output): 
   blocks = []
   section = False
   element = None

   for line in input: 
      if line.startswith('<section'): 
         section = True
         output.write(line)
         continue
      elif line.startswith('</section>'): 
         section = False
         process_blocks(blocks, output)
         output.write(line)
         continue
      elif not section: 
         output.write(line)
         continue

      m = r_tag.match(line)
      if (element is None) and m: 
         element = m.group(1)
         blocks.append([line])
      elif element is not None: 
         blocks[-1].append(line)
         if ('</' + element.lower() + '>') in line.lower(): 
            element = None
      elif (element is None) and not m: 
         blocks.append(line)

def main(): 
   filename = sys.argv[1]

   with open(filename) as input: 
      with open('/tmp/juno.processed', 'w') as output: 
         process(input, output)

   shutil.move(filename, '/tmp/juno.backup')
   shutil.move('/tmp/juno.processed', filename)

if __name__ == '__main__': 
   main()
