#!/usr/bin/env python """ Copyright 2007, Sean B. Palmer, inamidst.com Licensed under the Eiffel Forum License 2. """ import re # @@ duck-typing for the win! # Kinds of serialisation: # GRDDL with various profiles # N-Triples # RDF/XML (done) # RDF/XML Pretty # Turtle Pretty # N3 Pretty # # +bz2 r_hex4_32 = re.compile(ur'([\x00-\x08\x0b\x0C\x0E-\x1F\x7F-\uFFFF]+)') r_hex4_33 = re.compile(ur'([\x00-\x08\x0b\x0C\x0E-\x1F\x3E\x7F-\uFFFF]+)') # Cf. http://bugs.python.org/issue1477 try: r_hex6 = re.compile(u'([\U00010000-\U0010FFFF]+)') except: r_hex6 = None def hex4(m): return u''.join('\\u%04X' % ord(c) for c in m.group(1)) def hex6(m): return u''.join('\\U%08X' % ord(c) for c in m.group(1)) def escaped32(s): # http://www.w3.org/TR/rdf-testcases/#ntrip_strings s = s.replace('\\', '\\\\') s = s.replace('\t', '\\t') s = s.replace('\n', '\\n') s = s.replace('\r', '\\r') s = s.replace('"', '\\"') s = r_hex4_32.sub(hex4, s) if r_hex6 is not None: return r_hex6.sub(hex6, s) if __import__('sys').maxunicode <= 0xffff: warning = ("Warning: You're using a Narrow Python build", "This means that N-Triples output might not be fully compliant", "Use a python compiled with --enable-unicode=ucs4 to fix this") for line in warning: print >> __import__('sys').stderr, line return s raise ValueError('r_hex6 did not compile properly') def escaped33(s): # http://lists.w3.org/Archives/Public/www-rdf-comments/2007OctDec/0008 s = s.replace('\\', '\\\\') s = s.replace('\t', '\\t') s = s.replace('\n', '\\n') s = s.replace('\r', '\\r') s = s.replace('"', '\\"') s = r_hex4_33.sub(hex4, s) if r_hex6 is not None: return r_hex6.sub(hex6, s) if __import__('sys').maxunicode <= 0xffff: warning = ("Warning: You're using a Narrow Python build", "This means that N-Triples output might not be fully compliant", "Use a python compiled with --enable-unicode=ucs4 to fix this") for line in warning: print >> __import__('sys').stderr, line return s raise ValueError('r_hex6 did not compile properly') def ntriples(graph): for triple in graph: if hasattr(triple.subject, 'value'): uri = triple.subject.value subject = '<' + escaped33(uri.encode('utf-8')) + '>' elif hasattr(triple.subject, 'label'): label = triple.subject.label subject = '_:' + label.encode('utf-8') if hasattr(triple.predicate, 'value'): uri = triple.predicate.value predicate = '<' + escaped33(uri.encode('utf-8')) + '>' else: raise ValueError('P: %s' % triple.predicate) if hasattr(triple.object, 'value'): uri = triple.object.value object = '<' + escaped33(uri.encode('utf-8')) + '>' elif hasattr(triple.object, 'label'): label = triple.object.label object = '_:' + label.encode('utf-8') elif hasattr(triple.object, 'lexical'): lexical = triple.object.lexical object = '"' + escaped32(lexical) + '"' yield subject + ' ' + predicate + ' ' + object + ' .' def rdfxml(graph): yield '' def split(uri): if uri.startswith(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#'): return uri[:43].encode('utf-8'), uri[43:].encode('utf-8') return uri[:-1].encode('utf-8'), uri[-1].encode('utf-8') for triple in graph: if hasattr(triple.subject, 'value'): uri = triple.subject.value yield '' % uri.encode('utf-8') elif hasattr(triple.subject, 'label'): label = triple.subject.label yield '' % label else: raise ValueError('Unable to serialise %s' % triple.subject) if hasattr(triple.predicate, 'value'): nsname, tagname = split(triple.predicate.value) yield '<%s xmlns="%s"' % (tagname, nsname) else: raise ValueError('Unable to serialise %s' % triple.predicate) if hasattr(triple.object, 'language'): if triple.object.language: # might be None yield 'xml:lang="%s"' % triple.object.language elif hasattr(triple.object, 'datatype'): if triple.object != XMLLiteral: yield 'rdf:datatype="%s"' % triple.object.datatype.encode('utf-8') else: yield 'rdf:parseType="Literal"' if hasattr(triple.object, 'value'): uri = triple.object.value yield 'rdf:resource="%s"/>' % uri.encode('utf-8') elif hasattr(triple.object, 'label'): label = triple.object.label yield 'rdf:nodeID="%s"/>' % label elif hasattr(triple.object, 'lexical'): lexical = triple.object.lexical end = '' % tagname for line in ('>' + lexical.encode('utf-8') + end).split('\n'): yield line else: raise ValueError('Unable to serialise %s' % triple.object) yield '' yield '' # EOF