(.*)

""" Author: tav, http://tav.espians.com/aaken.png http://tav.espians.com/espra/images/esp/aaken.png URI: https://svn.espnow.net/24weeks/trunk/source/python/format/rst/rst.py License: Public Domain; share and enjoy! Downloaded by Sean B. Palmer, March 2008, to save it for posterity... -- A simple ReStructuredText (RST) -> XHTML convertor. It makes a few healthy (for me) assumptions: - the parameters are taken as paths to the source files. - if '-s' is listed in the parameters, then output is redirected to the stdout. - otherwise, the output is written to filenames in the current directory which have .html either appended or spliced/appended to the original filenames. - if '-e' is listed in the parameters, then the output is templated with the 'header.tpl' and 'footer.tpl' files Note: This needs to be refactored and some bits rewritten for disco. """ import os import re import settings import sys import traceback from codecs import open as open_file from optparse import OptionParser, OptionError from string import punctuation as PUNCTUATION from time import clock from docutils import nodes from docutils.core import Publisher from docutils.io import FileInput, StringInput, StringOutput from docutils.parsers.rst import directives from docutils.readers.standalone import Reader # this step takes time from docutils.writers.html4css1 import Writer as HTMLWriter from docutils.writers.latex2e import Writer as LatexWriter from docutils.utils import relative_path pimp('format/parser', 'IteratorParser') pimp('os/find') pimp('importer', 'load_module_from_code') # ------------------------------------------------------------------------------ # some konstants # ------------------------------------------------------------------------------ LINE = '-' * 78 DOCTYPE = """""" # ------------------------------------------------------------------------------ # rst direktives # ------------------------------------------------------------------------------ def imap_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): """Render image s""" attributes = {'format': 'html'} encoding = options.get('encoding', state.document.settings.input_encoding) image_alt = options.get('alt', '') image_class = options.get('class', '') attributes['source'] = imap_source = arguments[0] source_dir = os.path.dirname( os.path.abspath(state.document.current_source) ) path = os.path.normpath(os.path.join(source_dir, imap_source)) path = relative_path(None, path) try: state.document.settings.record_dependencies.add(path) raw_file = FileInput( source_path=path, encoding=encoding, error_handler=state.document.settings.input_encoding_error_handler, handle_io_errors=None) except IOError, error: severe = state_machine.reporter.severe( 'Problems with "%s" directive path:\n%s.' % (name, error), nodes.literal_block(block_text, block_text), line=lineno) return [severe] text = raw_file.read().replace( 'ALT-GOES-HERE', image_alt).replace( 'CLASS-GOES-HERE', image_class) raw_node = nodes.raw('', text, **attributes) return [raw_node] imap_directive.arguments = (1, 0, False) # (required, optional, is-spaces-allowed-in-last) imap_directive.options = { 'alt': directives.unchanged, 'class': directives.class_option } imap_directive.content = False directives.register_directive('imap', imap_directive) # break! def break_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): attributes = {'format': 'html'} if arguments: break_class = arguments[0] else: break_class = 'clear' raw_node = nodes.raw('', '

' % break_class, **attributes) # return [nodes.transition()] return [raw_node] break_directive.arguments = (0, 1, True) break_directive.options = { 'class':directives.class_option } break_directive.content = False directives.register_directive('break', break_directive) # code! def code_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): """Prettify

 snippets into marked up html blocks"""

    attributes = {'format': 'html'}
    encoding = options.get('encoding', state.document.settings.input_encoding)

    code_type = arguments[0]

    content = "\n" + "\n".join(content) + "\n"
    content = str(content) # unicode gah

    text = _code2html(code_type, content)

    raw_node = nodes.raw('', text, **attributes)
    return [raw_node]
    
code_directive.arguments = (1, 0, False)
code_directive.options = {
    'code_type':directives.unchanged
    }
code_directive.content = True

directives.register_directive('code', code_directive)

# tasks!

status_values = ('not started', 'in progress', 'completed')

def is_a_valid_status(argument):
    return directives.choice(argument, status_values)

def task_directive(name, arguments, options, content, lineno,
                   content_offset, block_text, state, state_machine):
    if content:
        text = '\n'.join(content)
        info = state_machine.reporter.info(
            'Directive processed. Type="%s", arguments=%r, options=%r, '
            'content:' % (name, arguments, options),
            nodes.literal_block(text, text), line=lineno)
    else:
        info = state_machine.reporter.info(
            'Directive processed. Type="%s", arguments=%r, options=%r, '
            'content: None' % (name, arguments, options), line=lineno)

    hmz = str(options)

    task_node = nodes.raw(hmz)
    return [task_node]

task_directive.arguments = (0, 1, True)
task_directive.options = {
    'option':directives.unchanged,
    'title':directives.unchanged,
    'description':directives.unchanged,
    'assigned-by':directives.unchanged,
    'assigned-to':directives.unchanged,
    'status':is_a_valid_status,
    'predicted-eta':directives.unchanged,
    'value':directives.unchanged
    }
task_directive.content = True

# ------------------------------------------------------------------------------
# konvert plain kode snippets to funky html
# ------------------------------------------------------------------------------

def code2html(content):
    """Delegates code snippet -> html output to specific convertors"""

    code_type = content.group(1).strip().lower()
    content = content.group(2)

    return _code2html(code_type, content)

def _code2html(code_type, content):

    if code_type == 'python':
        return py2html(content)

    elif code_type == 'pyrex':
        return pyx2html(content)

    elif code_type == 'ocaml':
        return ocaml2html(content)

    elif code_type in ['html', 'xml', 'zcml']:
        return xml2html(content).replace(
            '%s' % (content)

def increment_counter(variable, context):

    if variable in context:
        context[variable] += 1
        return context[variable]

    context[variable] = 0

    return ''

def py2html(content):
    """Convert raw python code snippets into css'd output"""

    content = content.replace(
        '"', '"').replace(
        '&', '&').replace(
        '@', '@').replace(
        '<', '<').replace(
        '>', '>')

    from mx.TextTools.mxTextTools import tag
    pimp('parser/mxspec/python', 'python_script as pytable')

    formatter = lambda text,tag=tag,pytable=pytable: tag(text, pytable)[1]

    return render_formatted_code('py', content, formatter, py2html.tag_dict)

py2html.tag_dict = {
    'all':('',''),
    'comment':('',''),
    'kw':('',''),
    'parameter':('',''),
    'identifier':(lambda x, context:
                  ''
                  % (x.strip(), increment_counter(x.strip(), context)),
                  ''),
    'str':('','')
    }

def pyx2html(content):
    """Convert raw pyrex code snippets into css'd output"""

    content = content.replace(
        '"', '"').replace(
        '&', '&').replace(
        '@', '@').replace(
        '<', '<').replace(
        '>', '>')

    from mx.TextTools.mxTextTools import tag
    pimp('parser/mxspec/pyrex', 'pyrex_script as pyxtable')

    formatter = lambda text,tag=tag,pyxtable=pyxtable: tag(text, pyxtable)[1]

    return render_formatted_code('pyx', content, formatter, py2html.tag_dict)


def xml2html(content):
    """Convert raw xml/html snippets into css'd output"""

    # note : this implementation is a really shit quik hak
    # -- fix when time allows

    content = content.replace(
        '"', '"').replace(
        '&', '&').replace(
        '<', '<').replace(
        '>', '>')

    from mx.TextTools.mxTextTools import tag
    pimp('parser/mxspec/html', 'htmltable')

    formatter = lambda text,tag=tag,htmltable=htmltable: tag(text, htmltable)[1]

    tag_dict = {
        'all':('',''),
        'comment':('',''),
        'htmltag':('',''),
        'text':('',''),
        'tagname':('',''),
        '*syntax error':('',''),
        'tagattr':('','')
        }

    return render_formatted_code('xml', content, formatter, tag_dict)

def ocaml2html(content):
    """Convert raw ocaml snippets into css'd output"""

    from mx.TextTools.mxTextTools import tag
    pimp('parser/mxspec/ocaml', 'ocaml_table')

    formatter = lambda text,tag=tag,table=ocaml_table: tag(text, ocaml_table)[1]

    return render_formatted_code('ocaml', content, formatter, py2html.tag_dict)

def render_formatted_code(code_type, content, formatter, tag_dict):
    """Parse using the formatter and render according to the tag_dict"""

    # parse
    taglist = formatter(content)

    if settings.debug:
        print taglist

    # prepend spesial 'all' tag:
    taglist.insert(0, ('all', 0, len(content), None))

    # prepare splitting
    splitlist = []
    context = {}
    merge_splits(splitlist, content, tag_dict, taglist, context)

    # do splitting & inserting
    splitlist.sort()
    l = []
    li = 0

    for ri, dummy, insert in splitlist:
        if ri > li:
            l.append(content[li:ri])
        l.append(insert)
        li = ri

    if li < len(content):
        l.append(content[li:])

    if l[2:3] == ['\n']:
        l[2:3] = '' # get rid of any preceding whitespace

    # return ''.join(l)
    output = ''.join(l).strip()

    # gah!
    if output.endswith(''):
        output = output[:-13] + ''

    return output

def merge_splits(splitlist, content, tag_dict, taglist, context):
    """Recursive utility function to merge splits"""

    for id, left, right, sublist in taglist:

        try:
            pre, post = tag_dict[id]
        except KeyError:
            pre, post = '', ''

        if not isinstance(pre, basestring):
            pre = pre(content[left:right], context)

        if not isinstance(post, basestring):
            post = post(content[left:right], context)

        # len(splits) is a dummy used to make sorting stable
        splitlist.append((left, len(splitlist), pre))

        if sublist:
            merge_splits(splitlist, content, tag_dict, sublist, context)

        splitlist.append((right, len(splitlist), post))

def doctest2html(content):
    """Convert doctest strings to css'd html"""

    out = []

    for line in content.splitlines():
        if line.startswith('>>>') or line.startswith('...'):
            line = '' + line + ''
        elif line:
            line = '' + line + ''
        out.append(line)

    return '\n'.join(out)

# ------------------------------------------------------------------------------
# pretty typographical syntax converter
# ------------------------------------------------------------------------------

def convert(content):
    """Convert certain characters to prettier typographical syntax"""

    # remember: the order of the replacements matter...

    content = content.replace(
        '<', '<').replace(
        '>', '>').replace(
        '"', '"').replace(
        '&', '&').replace(
        ' -->', 'HTML-COMMENT-ELEMENT-CLOSE').replace(
        '->', '→').replace(
        '<-', '←').replace(
        '---', '–').replace(
        '--', '—').replace(
        '<<', '«').replace(
        '>>', '»').replace(
        '(C)','©').replace(    # hmz, why am i promoting ipr? ;p
        '(c)','©').replace(
        '(tm)','™').replace(
        '(TM)','™').replace(
        '(r)','®').replace(
        '(R)','®').replace(
        '...', '…').replace(
        'HTML-COMMENT-ELEMENT-CLOSE', ' -->')

    icontent = IteratorParser(content)
    content = []

    _scurly = _dcurly = False
    _space = True
    _apply = False

    index = 0
    prev = ''

    while True:

        try:
            char = icontent.next()
        except StopIteration:
            break

        if not (_scurly or _dcurly) and _space:
            if char == "'":
                _scurly = index + 1
            elif char == '"':
                _dcurly = index + 1

        if _scurly and (_scurly != index + 1) and char == "'" and prev != '\\':
            try:
                n = icontent.next()
                if n in PUNCTUATION or n.isspace():
                    _apply = True
                icontent.push(n)
            except:
                _apply = True
            if _apply:
                content[_scurly - 1] = '‘'
                char = '’'
                _scurly = False
            _apply = False
        
        if _dcurly and (_dcurly != index + 1) and char == '"' and prev != '\\':
            try:
                n = icontent.next()
                if n in PUNCTUATION or n.isspace():
                    _apply = True
                icontent.push(n)
            except:
                _apply = True
            if _apply:
                content[_dcurly - 1] = '“'
                char = '”'
                _dcurly = False
            _apply = False

        content.append(char)
        prev = char
        index += 1
        
        if char.isspace():
            _space = True
        else:
            _space = False

    return ''.join(content)

    # perhaps === heading === stylee ?

# ------------------------------------------------------------------------------
# the meta prettifier funktion which kalls the above ones
# ------------------------------------------------------------------------------

def escape_and_prettify(content):
    """Escape them angle brackets appropriately and prettify certain blocks"""

    # our markers and our output gatherer

    _literal_block = _element = _content = False
    output = []

    for i, block in enumerate(re.split('(?sm)<(.*?)>',  content.strip())):

        # we setup the state

        if i % 2:

            _content = False
            _element = True

            if _literal_block and (_literal_block[1] == block):
                    _literal_block = False

            if block == 'tt class="literal"':
                _literal_block = ('tt', '/tt')
            elif block in ['pre class="literal-block"',
                           'pre class="last literal-block"',
                           'pre class="code"']:
                _literal_block = ('pre', '/pre')
            elif block == 'pre class="doctest-block"':
                _literal_block = ('doctest', '/pre')

        else:

            _content = True
            _element = False

        # we do different things based on the state

        if _element:
            output.append('<' + block + '>')
        elif _content:
            if _literal_block:
                if _literal_block[0] == 'doctest':
                    output.append(doctest2html(block))
                else:
                    output.append(block)
            else:
                output.append(convert(block))

    output = ''.join(output)

    # gah!

    output = output.replace('<<', '<<')

    # praise be to them negative lookahead regex thingies

    output = re.sub('&(?![^\s&]*;)', '&', output)

    return output

# ------------------------------------------------------------------------------
# some utility funktions
# ------------------------------------------------------------------------------

def normalise_name(x):
    """Normalise a name/phrase into a canonically comparable form"""

    return '_'.join(
        x.strip().lower().split()
        ).replace("'", "").replace("-", "_")

def render_drop_cap(content):
    """Render the first character as a drop capital"""

    content = content.groups()[0]

    if content:
        if len(content) >= 2:
            return '' + content[0] + \
                   ''  + content[1:]

    return '
'

# ------------------------------------------------------------------------------
# some sektions could be a lot niser, e.g. adding auto-hide
# ------------------------------------------------------------------------------

def render_toc(content):
    """Render table of contents with optional auto-hide toggler"""

    classes = content.group(1)
    div_class = content.group(2)
    toc_content = content.group(3)

    if classes.count('show'):
        _show_toc = 'show'
        _style = ''
        classes = classes.strip('show').strip()
    else:
        _show_toc = 'hide'
        _style = ' style="display:none"'

    return r"""

    
      
        
      
      %s
    

    """ % (classes, _show_toc, _style, toc_content)

# a global konstant
intro_notes_id = 0

def render_intro_notes(content):
    """Render introductory notes with optional auto-hide toggler"""

    status, content = content.group(1), content.group(2)

    if status:
        status = status.strip().strip('-')
        _style = ''
    else:
        status = 'hide'
        _style = ' style="display:none"'

    cid = 'intro-notes'

    global intro_notes_id

    if intro_notes_id:
        cid += '-' + str(intro_notes_id)
    intro_notes_id += 1

    return r"""

    
      
        
      
      %s
    

    """ % (status, cid, cid, _style, content)

# ------------------------------------------------------------------------------
# ehm, a rather domain spesific "plexlink" rendering
# ------------------------------------------------------------------------------

def render_plexlink(content):
    """Render [[plexlink]] if it exists or add markup to indicate it doesn't"""

    content = content.group(1).strip().replace('"', '"')
    link_to = name = content

    if content.endswith('"') and content[:-1].find('"') != -1:
        index = content.find('"')
        link_to = content[:index].strip()
        name = content[index+1:-1]

    link = link_to = '_'.join(
        link_to.lower().split()).replace("'", "").replace("-", "_")

    link_to = ((link_to in WORKS) and link_to) or \
              (('on_' + link_to in WORKS) and 'on_' + link_to) or \
              (('on_the_' + link_to in WORKS) and 'on_the_' + link_to) or \
              (('a_' + link_to in WORKS) and 'a_' + link_to) or \
              (('person_' + link_to in WORKS) and 'person_' + link_to) or \
              (('_'.join(link_to.split('/')) in WORKS) and \
                '_'.join(link_to.split('/')))
              
    if link_to:
        return '%s' % (link_to,
                                                                     name)
    if link.split('/')[0:1] == ['person']:
        link = '/'.join(link.split('/')[1:])
        return '%s' % (
            link, name)

    return '%s' % (link_to, name)

# ------------------------------------------------------------------------------
# again, pseudo-plexlink like inkludes
# ------------------------------------------------------------------------------

def render_includes(content):
    """Render [[# include-plexlinks]] and recurse until all are included"""

    title = plexname = content.group(1).strip()

    if plexname.endswith('"') and plexname[:-1].find('"') != -1:
        index = plexname.find('"')
        title = plexname[index+1:-1]
        plexname = plexname[:index].strip()

    plexname = '_'.join(
        plexname.lower().split()).replace("'", "").replace("-", "_")

    if plexname in WORKS:
        source = file(os.path.join(SOURCES, plexname + '.txt'), 'r').read()
    else:
        raise NameError, "plexname %s not found" % plexname

    #isource = IteratorParser(source)
    output = []
    _start = False

    for line in source.splitlines():
        if not _start:
            pass
            _start = True
            #line.startswith(':') and line.find(':', 2) != -1:

        if _start:
            output.append(line)

    return '\n'.join(output)

    #source_data = re.sub('(?sm)\[\[# (.*?)\]\]', plexlink_include, source_data)

def fixup_headings(source, heading_level):
    """Fixup h1 .. h6 headings to fit toplevel headings appropriately"""

    pass

# ------------------------------------------------------------------------------
# parse the :properties: included in a document
# ------------------------------------------------------------------------------

include_author  = """:Author:    [[A brief history of tav "tav -- espian, artifex, symbiotic engineer"]] """
include_contents_no_break = """

.. contents::   Table of Contents
                :depth: 2
                :backlinks: top

.. sectnum::    :depth: 2

"""

include_contents = include_contents_no_break + """

.. break::      hide

"""

def parse_headers(source_data, props, toplevel=False):
    """Parse the metadata stored in the file headers"""

    # we strip out some kommon editing characters
    source_data = source_data.replace(
        '>-> ', '').replace(
        '<-|\n', '').replace(
        '[[# author]]', include_author).replace(
        '[[# contents]]', include_contents).replace(
        '[[# contents-no-break]]', include_contents_no_break).strip()

    if not source_data:
        return '', props

    # strip_props = [
    #     'summary', 'topics', 'author', 'version', 'date', 'authors', 'title'
    #     ]

    strip_props = ['topics', 'summary']
    new_data = []
    # _reached_real_data = False

    iterative_source = IteratorParser(source_data.splitlines())

    for line in iterative_source:

        # if ((not _reached_real_data) and
        if line.startswith(':') and line.find(':', 2) != -1:

            marker = line.find(':', 2)
            prop, value = line[1:marker], line[marker+1:].strip()

            if prop.lower() in strip_props:
                _strip_prop = True
            else:
                _strip_prop = False

            if not _strip_prop:
                new_data.append(line)

            while True:
                try:
                    line = iterative_source.next()
                    if not _strip_prop:
                        new_data.append(line)
                except:
                    break
                sline = line.strip()
                if sline and not (sline.startswith(':') and \
                                  sline.find(':', 2) != -1):
                    value += ' ' + sline
                else:
                    iterative_source.push(line)
                    if not _strip_prop:
                        del new_data[-1]
                    break
            props[prop.lower()] = value
        else:
            new_data.append(line)

    if settings.debug:
        print 'props:', props

    source_data = '\n'.join(new_data)

    # source_data = re.sub('(?sm)\[\[# (.*?)\]\]', render_includes, source_data)

    return source_data, props

# ------------------------------------------------------------------------------
# utility wrapper funktions
# ------------------------------------------------------------------------------

def rst2tex(argv):
    """Convert rST (reStructuredText) into TeX."""

    argv.extend(['--format', 'tex'])
    return main(argv)

def rst2html(argv):
    """Convert rST (reStructuredText) into HTML."""

    argv.extend(['--format', 'html'])
    return main(argv)

include_regex = re.compile(r'.. include:: (.*?)\n')


def include_data(content, base_directory):

    filename = content.group(1).strip()
    # filename = os.path.join(base_directory, filename)

    if not os.path.isfile(filename):
        print "Couldn't find:", filename
        
    data = '\n' + file(filename).read().strip() + '\n'

    if include_regex.finditer(data):
        data = include_regex.sub(include_data, data)

    return data

def rst2txt(argv):

    source_filename = argv[-1]

    output_filename = os.path.splitext(
        os.path.basename(source_filename)
        )[0] + '.txt'

    base_directory = os.path.dirname(source_filename)
    output_filename = os.path.join(base_directory, output_filename)

    if source_filename == output_filename:
        raise ValueError(
            "Source must have a filename which doesn't end in 'txt'."
            )

    print '---> Creating', output_filename

    data = file(source_filename, 'r').read()

    output = file(output_filename, 'w')
    output.write(
        include_regex.sub(
          lambda content: include_data(content, base_directory),
          data
          )
        )
    output.close()

# ------------------------------------------------------------------------------
# our kore renderer
# ------------------------------------------------------------------------------

def render_rst(content, format='xhtml', encoding='utf-8',
               output_encoding=None, for_templating=True):

    format = format.lower()

    if format in ['html', 'xhtml']:
        Writer = HTMLWriter

    elif format in ['tex', 'latex', 'latex2e']:
        Writer = LatexWriter
    else:
        Writer = HTMLWriter

    # we parse for metadata whilst also rekursive includes

    if settings.debug:
        print clock(), '\t', 'starting prop find iteration'

    props = {}
    source_data, props = parse_headers(content, props, True)

    # we setup docutils and do the rst -> html konversion

    if settings.debug:
        print clock(), '\t', 'docutils setup'

    #sys.argv[1:1] = [
    #    "--no-generator",
    #    "--traceback",
        # "--no-datestamp",
        # "--pep-references",
        # "--rfc-references",
    #    "--trim-footnote-reference-space",
    #    ]

    pub = Publisher(source_class=StringInput, destination_class=StringOutput)

    # pub.set_components('standalone', 'restructuredtext', 'html') # takes time
    # ^ line above replased by the below paragraph

    pub.reader = Reader(None, 'restructuredtext') # same as Reader()
    pub.parser = pub.reader.parser
    pub.writer = Writer()

    pub_settings = pub.get_settings()
    pub_settings._update({
        'input_encoding': encoding,
        'output_encoding': output_encoding or encoding,
        'footnote_references': 'superscript', # 'mixed', 'brackets'
        # 'report_level':2,
        'halt_level':6,
        }, 'loose')

    pub.set_source(source_data)

    # we do the kore konversion

    if settings.debug:
        print clock(), '\t', 'docutils source -> rst'

    #data = pub.publish(description='', enable_exit_status=1)
    data = pub.publish(description='')

    # and, speaking of pubs,

    # --------------------------------------------------------------------------
    #
    # A man goes into a pub, and the barmaid asks what he wants.
    #
    # "I want to bury my face in your cleavage and lick the sweat from between
    # your tits" he says.
    #
    # "You dirty git!" shouts the barmaid, "get out before I fetch my husband."
    #
    # The man apologises and promises not to repeat his gaffe.
    #
    # The barmaid accepts this and asks him again what he wants.
    #
    # "I want to pull your pants down, spread yoghurt between the cheeks of
    # your arse and lick it all off" he says.
    #
    # "You dirty filthy pervert. You're banned! Get out!" she storms.
    #
    # Again the man apologies and swears never ever to do it again.
    #
    # "One more chance" says the barmaid.
    #
    # "Now what do you want?" "I want to turn you upside down, fill your fanny
    # with Guinness, and then drink every last drop."
    #
    # The barmaid is furious at this personal intrusion, and runs upstairs to
    # fetch her husband, who's sitting quietly watching the telly.
    #
    # "What's up, Love?" he asks.
    #
    # "There's a man in the bar who wants to put his head between my tits and
    # lick the sweat off" she says.
    #
    # "I'll kill him. Where is he?" storms the husband.
    #
    # "Then he said he wanted to pour yoghurt down between my arse cheeks and
    # lick it off" she screams.
    #
    # "Right. He's dead!" says the husband, reaching for a baseball bat.
    #
    # "Then he said he wanted to turn me upside down, fill my fanny with
    # Guinness and then drink it all" she cries.
    #
    # The husband puts down his bat and returns to his armchair, and switches
    # the telly back on.
    #
    # "Aren't you going to do something about it?" she cries hysterically.
    #
    # "Look love -- I'm not messing with someone who can drink 12 pints of
    # Guinness..."
    #
    # --------------------------------------------------------------------------

    if settings.debug:
        print clock(), '\t', 'post processing'

    # post rst-konversion prosessing

    if format in ['html', 'xhtml']:

        # [[plexlinks]]
        # data = re.sub(
        #     '(?sm)\[\[(.*?)\]\]',
        #     render_plexlink,
        #     data)

        # syntax highlighting for kode snippets
        data = re.sub(
            '(?sm)(?:\s)?<code class="(.*?)">(?::)?
(?:\n)?\n(.*?)
(?:\n
)?\n(?:\s)?</code>',
            code2html,
            data)

        # support for embedding html into rst dokuments and prettifikation
        data = escape_and_prettify(data)

        # my own h3 header style
        data = re.sub(
            '(?sm)#\n/ (.*?)\n#',
            r'\1',
            data)

        # various  sektions
        data = re.sub(
            '(?sm)<(.*?)>
(.*?)',
            r'
‹\1›
',
            data)

        # table of kontents!
        # data = re.sub(
        #     '(?sm)\n(.*?)
(.*?)',
        #     renderTOC,
        #     data)

        # toc href id and div adder
        data = re.sub(
            '(?sm)(.*?)(.*?)',
            '\\3
\n\\4',
            data)

        # table of kontents!
        data = re.sub(
            '(?sm)\n(.*?)
(.*?)',
            render_toc,
            data)

        # inserting an "#abstract" id
        data = re.sub(
            '',
            r'',
            data)

        # footnote refs looking a bit too superskripted
        # data = re.sub(
        #     '(?sm)^(.*?)',
        #     r'\2',
        #     data)

        #  sektions
        data = re.sub(
            '(?sm)
(.*?)',
            r'\1',
            data)

        #  sektions
        data = re.sub(
            '(?sm)
(.*?)',
            r'\1',
            data)

        # intro notes
        data = re.sub(
            '(?sm)(.*?)', 
            render_intro_notes,
            data)

        # drop shadow wrappers for figures
        data = re.sub(
            '(?sm)
\3',
            data)

        # hmz, a name="" no no
        data = re.sub(r'', '', data)
        data = re.sub(r'', r'', data)

        # get rid of around floating images
        # data = re.sub(
        #     '(?sm)',
        #     data)

        # niser 

        # data = re.sub(
        #     '',
        #    r'',
        #    data)

        # drop cap them first letters
        # data = re.sub(
        #     '(?sm)(.*?)',
        #     render_drop_cap,
        #     data, count=1)

        # try to decipher the document title
        title = re.findall('(.*)', data)
        if title:
            props['title'] = title[0]

        if for_templating:
            data = re.findall('(?sm)(.*)', data)[0]

        # strip out comments
        data = re.sub(r'(?sm)\n?\s*\s*\n?', '', data)

    if settings.debug:
        print clock(), '\t', 'end'

    return data, props

# ------------------------------------------------------------------------------
# ignore the following. we parse the skript parameters
# ------------------------------------------------------------------------------

def main(argv):

    argv = argv[1:]
    # WORKS = [f[:-4] for f in os.listdir(SOURCES) if f.endswith('.txt')]

    # setup the options

    op = OptionParser()

    op.add_option('-t', '--template', dest='template', default='',
                  help="set the location to the template file")

    op.add_option('-f', '--format', dest='format', default='html',
                  help="specify the output format")

    op.add_option('-e', '--encoding', dest='encoding', default='utf-8',
                  help="set the default encoding")

    op.add_option('--output-encoding', dest='output_encoding', default=None,
                  help="set the output encoding if it differs")

    op.add_option('-s', '--std-out', dest='std_output', default=False,
                  action='store_true',
                  help="redirect to standard out instead of to a file?")

    op.add_option('-o', '--output-directory', dest='output_dir',
                  default=os.getcwd(),
                  help="specify the output format")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    std_output = options.std_output and sys.stdout or None

    if not options.template:
        templating = False
    elif not os.path.isfile(options.template):
        print >> sys.stderr, "%r is not a valid template!" % options.template
        return
    else:
        template_file = open(options.template, 'r')
        template_data = template_file.read()
        template_file.close()
        from genshi.template import MarkupTemplate
        template = MarkupTemplate(template_data)
        templating = True

    output_directory = options.output_dir

    if (output_directory != 'FROM_FILE_PATH' and
        (not os.path.isdir(output_directory))):
        print >> sys.stderr, "%r is not a valid directory!" % output_directory
        return

    for source in args:

        if not std_output:
            print
            print LINE
            print 'Converting:', source
            print LINE
            print

        # we do various cheks to verify that we are transforming a text file

        if settings.debug:
            print clock(), '\t', 'the initial existence checks'

        if not os.path.isfile(source):
            print >> sys.stderr, "%r doesn't seem to be a valid file!" % source
            continue

        try:
            source_data = file(source, 'rUb').read()
        except:
            print >> sys.stderr, "Something fucked up reading %r" % source
            continue

        if '\0' in source_data:
            print >> sys.stderr, "Dies ist ein binary file!"
            continue

        # we make note of the basename of the file for future use
        source_basename = os.path.splitext(os.path.basename(source))[0]
        output_filename = source_basename + '.html'

        # try to derive a default title from the filename
        default_title = output_filename[:-5].replace('_', ' ')

        output_encoding = options.output_encoding or options.encoding

        output, props = render_rst(
            source_data, options.format, options.encoding,
            output_encoding, templating
            )

        if templating:

            try:

                output = unicode(output, output_encoding)

                output = template.generate(
                    title=props.get('title', default_title),
                    content=output,
                    props=props,
                    filename=output_filename
                    ).render('html', encoding=output_encoding)

            except:

                raise

                traceback.print_exc()
                output = DOCTYPE + output
                output = unicode(output, output_encoding)

        if std_output:

            print output

        else:

            if options.output_dir == 'FROM_FILE_PATH':
                output_directory = os.path.dirname(source)

            output_filepath = os.path.join(output_directory, output_filename)

            if settings.debug:
                print output_filepath

            # output_file = open_file(output_filepath, 'wb', output_encoding)
            output_file = open(output_filepath, 'wb')
            output_file.write(output)
            output_file.close()

            print 'Done!'

    return

def template_sample(data, props, template, default_title, output_filename):

    if 'topics' in props:
        topics = map(normalise_name,
                     ' '.join(props['topics'].strip().split()).split(','))
    else:
        topics = ['journal']

    props['topics'] = topics

    title = props.get('title', default_title)

    if topics:
        topics_html = 'Related Topics'
        for topic in topics:
            topics_html += '%s' % (
                topic, topic.replace('_', ' '))
        topics_html += ""
    else:
        topics_html = ''

    return data

# ------------------------------------------------------------------------------
# store them props
# ------------------------------------------------------------------------------

def write_props_to_file(props, source_basename):
    """Store some of the properties to files"""

    summary_file = os.path.join(PROPS, source_basename + '.summary')
    if 'summary' in props:
        summary_file = file(summary_file, 'w')
        print >> summary_file, props['summary']
    else:
        if os.path.isfile(summary_file):
            os.unlink(summary_file)

    title_file = os.path.join(PROPS, source_basename + '.title')
    if 'title' in props:
        title_file = file(title_file, 'w')
        title_file.write(props['title'].strip())
    else:
        if os.path.isfile(title_file):
            os.unlink(title_file)

def extra_bollocks():

    # persist certain properties to files
    # write_props_to_file(props, source_basename)

    index_cgi = os.path.join(TOPICS, 'index.cgi')

    existing_symlinks = find.find(output_filename, TOPICS, 1)
    for symlink in existing_symlinks:
        os.unlink(symlink)

    for topic in topics:
        tdir = os.path.join(TOPICS, topic)
        nfname = os.path.join(tdir, output_filename)
        if os.path.isdir(tdir):
            os.symlink(output_filepath, nfname)
        else:
            os.mkdir(tdir)
            os.symlink(index_cgi, os.path.join(tdir, 'index.cgi'))
            os.symlink(output_filepath, nfname)