#!/usr/bin/env python """ garner.py - A GRDDL client implementation using Pyrple Author: Sean B. Palmer, inamidst.com Date: the 5th of June, 2004 Background: http://www.w3.org/2003/g/data-view http://www.w3.org/2004/01/rdxh/spec License and Derivation: Being much deriv'd from that glean.py of Connolly-fame: http://www.w3.org/2003/g/glean.py And conceived in likeness of its visage, only wholly using the gallant author's "Pyrple" module in subversion of that contemptible "Redland"; A work which whomsoever should use it shall take it 'pon themselves to understand that it be so licensed in the replendent shine of yon W3C Software License: http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 And that, inasmuch as one would quite expect, this very work be produced under the aegis of same said license, for the sharement and enjoyousness of all those for whom its use be of merit. Whatcha need: /usr/bin/xsltproc http://infomesh.net/pyrple/ """ import sys, os, cgi, tempfile, urllib, StringIO from optparse import OptionParser import pyrple HTML_Dialect = 'http://www.w3.org/1999/xhtml' GRDDL_Dialect = 'http://www.w3.org/2003/g/data-view' RDF_Dialect = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' DataView = pyrple.Namespace(GRDDL_Dialect + '#') DEBUG = False def grddl(uri, already=None): """Garner formal meaning of doc at uri, returning an RDF Graph.""" if already is None: already = [GRDDL_Dialect, HTML_Dialect] progress("grddl(", (uri, already), ")") info = urllib.urlopen(uri).info() if (info.get('Content-Type') or '?').startswith('application/rdf+xml'): return pyrple.Graph(uri=uri) G = pyrple.Graph() for xform in getTransforms(uri, already): s = doXSLT(xform, uri, {"xmlfile": uri}) G.feedRDFXML(s) progress("data size:", len(G)) return G def getTransforms(uri, already): ret, profiles = [], [] s = doXSLT('getTransforms.xsl', uri, {"xmlfile": uri}) for line in s.splitlines(): progress("getTransforms line:", line) k, u = line.split(None, 1) if k in ('P', 'R'): profiles.append(u) else: ret.append(u) for profile in profiles: if profile in already: continue progress("PROFILE: ", profile) try: profileGraph = grddl(profile, already + [profile]) except IOError: continue query = (pyrple.URI(profile), DataView.profileTransformation, None) for (s, p, o) in profileGraph.get(*query): progress("PROFILE XFORM: ", o.value) ret.append(o.value) query = (pyrple.URI(profile), DataView.namespaceTransformation, None) for (s, p, o) in profileGraph.get(*query): progress("NAMESPACE XFORM: ", o.value) ret.append(o.value) already.append(profile) return ret def mktemp(text=None): fd, fn = tempfile.mkstemp() if text: f = os.fdopen(fd, 'w') f.write(text) f.close() return fn def shell(script, scriptname): def command(args): status = os.spawnv(os.P_WAIT, script, [scriptname] + args) if status != 0: msg = "%s %s returned %s status" raise Exception, msg % (scriptname, args, abs(status)) return return command xsltproc = shell('/usr/bin/xsltproc', 'xsltproc') def xslt(xsltfn, xmlfn, outfn, params={}): args = ['--novalid', '-o', outfn] for k in params.keys(): args.extend(('--stringparam', k, params[k])) xsltproc(args + [xsltfn, xmlfn]) def doXSLT(xsltfn, xmlfn, params={}): outfn = mktemp() xslt(xsltfn, xmlfn, outfn, params=params) f = open(outfn, 'r') s = f.read() f.close() os.unlink(outfn) return s def progress(*args): if not DEBUG: return for a in args: sys.stderr.write(str(a)) sys.stderr.write('\n') def run(uri, output=None, format=None, debug=None): if output and not hasattr(output, 'write'): output = open(output, 'w') output = output or sys.stdout format = format or 'rdfxml' if debug is not None: global DEBUG DEBUG = True G = grddl(uri) progress("GRDDL size:", len(G)) output.write(G.serialize(format)) def garner(uri, format=None): f = StringIO.StringIO() run(uri, output=f, format=format) f.seek(0) s = f.read() f.close() return s def main(argv=None): parser = OptionParser(usage='%prog [options] ') parser.add_option("-o", "--output", dest="output", default=False, help="filename of output; will use stdout otherwise") parser.add_option("-f", "--format", dest="format", default=False, help="output format (either rdfxml or ntriples)") parser.add_option("-d", "--debug", dest="debug", default=False, action="store_true", help="print info to stderr") options, args = parser.parse_args(argv) if len(args) == 1: run(args[0], options.output, options.format, options.debug) else: parser.error("Incorrect number of arguments.") if __name__=="__main__": main()