#!/usr/bin/env python """ n3mp - An N3 Metaparser using n3.n3 Author: Sean B. Palmer, inamidst.com Licence: GPL 2; share and enjoy! Licence: http://www.w3.org/Consortium/Legal/copyright-software Documentation: http://inamidst.com/n3p/ Derived from: http://www.w3.org/2000/10/swap/grammar/predictiveParser.py - predictiveParser.py, Tim Berners-Lee, 2004 """ import sys, os, re, pprint import cPickle as pickle # From http://infomesh.net/.../rdf.py # Based on rdflib from rdf import Namespace, Graph, URI, bNode, Literal N3G = Namespace('http://www.w3.org/2000/10/swap/grammar/n3#') BNF = Namespace('http://www.w3.org/2000/10/swap/grammar/bnf#') class N3Metaparser(object): def __init__(self, verbose=False): self.branches = {} self.regexps = {} self.verbose = verbose self.todo = [] self.done = [] def __call__(self, start): self.doProduction(start) while self.todo: first, self.todo = self.todo[0], self.todo[1:] self.done.append(first) self.doProduction(first) def progress(self, msg, err=False): if err or self.verbose: print >> sys.stderr, msg def doProduction(self, prod): self.progress("Doing production: %s" % prod) if isinstance(prod, Literal): return rhs = G.theObject(prod, BNF['matches']) if rhs: self.match(prod, rhs) return rhs = G.theObject(prod, BNF['mustBeOneSequence']) if rhs: self.sequence(prod, rhs) elif prod == BNF['eof']: pass # print "@@ EOF" else: raise Exception("No definition of %s" % prod) def match(self, prod, rhs): self.progress("Token %s matches %s" % (prod, rhs)) # @@ check that the prod hasn't already been added self.regexps[prod] = re.compile(rhs) if not G.objects(prod, BNF['canStartWith']): msg = "Warning: No record of what %s can start with" % prod self.progress(msg, err=True) def sequence(self, prod, rhs): prodBranch = {} self.progress("Found mustBeOneSequence: %s" % rhs) for branch in G.objects(prod, BNF['branch']): self.progress("Branch: %s" % branch) optionName = G.theObject(branch, BNF['sequence']) options = tuple(G.items(optionName)) self.progress("Branch option: %s" % optionName) for item in options: if not ((item in self.todo) or (item in self.done)): self.todo.append(item) conditions = G.objects(branch, BNF['condition']) self.checkSelector(conditions, prod, optionName, options) # Pythonise the options pyoptions = [] for option in options: if isinstance(option, Literal): pyoptions.append(unicode(option)) else: pyoptions.append(str(option)) for condition in conditions: self.checkCondition(condition, prodBranch) prodBranch[condition] = pyoptions self.checkProdBranch(prod, prodBranch) self.branches[prod] = prodBranch def checkSelector(self, conditions, prod, optionName, options): if not conditions: msg = "Error! No selector for %s:%s" % (prod, optionName) self.progress(msg, err=True) if not options: for successor in G.object(prod, BNF['canPrecede']): self.progress(" Successor: %s" % successor, err=True) def checkCondition(self, condition, prodBranch): if prodBranch.has_key(condition): self.progress("Warning: %s is the condition for %s" % \ (condition, prodBranch[condition]), err=True) def checkProdBranch(self, prod, prodBranch): for p in prodBranch.iterkeys(): for q in prodBranch.iterkeys(): if (p.startswith(q) or q.startswith(p)) and p != q: self.progress("Warning: for %s, %s -> %s, but %s -> %s" % \ (prod, p, prodBranch[p], q, prodBranch[q]), err=True) def mkmodule(result, out): branches = result['branches'] regexps = result['regexps'] pp = pprint.PrettyPrinter() print >> out, '#!/usr/bin/env python' print >> out, '"""n3meta - For use with n3p.py."""' print >> out, '# Automatically generated by n3mp.py' print >> out print >> out, 'import re' print >> out print >> out, 'branches =', pp.pformat(branches) print >> out, 'regexps = {' for (key, regexp) in regexps.iteritems(): print >> out, ' %r: re.compile(%r), ' % (key, regexp.pattern) print >> out, '}' print >> out print >> out, 'if __name__=="__main__": ' print >> out, ' print __doc__' def metaparse(grammar, start, output, pickle=False, verbose=False): global G G = Graph(grammar) metaparser = N3Metaparser(verbose=verbose) metaparser(start) result = {'branches': metaparser.branches, 'regexps': metaparser.regexps} if pickle: pickle.dump(result, output) else: mkmodule(result, output) def barf(msg): print >> sys.stderr, msg sys.exit(1) def main(argv=None): from optparse import OptionParser parser = OptionParser(usage='%prog [options] ') parser.add_option("-g", "--grammar", dest="grammar", default='n3-selectors.rdf', metavar="URI", help="RDF/XML RDF BNF grammar file URI") parser.add_option("-s", "--start", dest="start", default=False, help="start production URI", metavar="URI") parser.add_option("-p", "--pickle", dest="pickle", action="store_true", default=False, help="output a pickle file, not python module") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="toggle verbosity mode") options, args = parser.parse_args(argv) grammar = options.grammar verbose = options.verbose if options.start: start = URI(options.start) else: start = N3G['document'] if len(args) > 1: barf("Error: you may only specify one output filename") elif args: fn = args[0] elif options.pickle: fn = 'n3meta.pkl' else: fn = 'n3meta.py' if os.path.exists(fn): barf("Error: File <%s> already exists, won't overwrite" % fn) else: output = open(fn, 'wb') metaparse(grammar, start, output, pickle=options.pickle, verbose=verbose) if __name__=="__main__": main()