#!/usr/bin/python """fromIcal.py -- interpret iCalendar data as RDF USAGE: python fromIcal.py [options] foo.ics > foo.rdf options: --base uri --noprotocol Supress SEQUENCE and DTSTAMP --noalarm Supress VALARMs --x include X- properties REFERENCES Internet Calendaring and Scheduling Core Object Specification (iCalendar) November 1998 http://www.ietf.org/rfc/rfc2445.txt http://www.w3.org/2002/12/cal/rfc2445 http://www.w3.org/2002/12/cal/rfc2445.html NOTE: We don't claim to implement the whole spec, nor to even have read all of it. We're taking a data-driven, test-driven approach to RFC2445 coverage/conformance. We start with a .ics file that we understand (because it came from a tool that acts as we expect in response to it or some such) and we implement the parts of the spec necessary to grok the data in that file. As we work on more test files, we cover (and carefully read) more parts of the spec. Building an RDF model: A quick look at iCalendar http://www.w3.org/2000/01/foo TimBL 2000/10/02 Python Style Guide Author: Guido van Rossum http://www.python.org/doc/essays/styleguide.html TODO - RDF API in place of SAX? - rename fromIcal.py? LICENSE RDF Calendar Workspace: http://www.w3.org/2002/12/cal/ Copyright 2002-2003 World Wide Web Consortium, (Massachusetts Institute of Technology, European Research Consortium for Informatics and Mathematics, Keio University). All Rights Reserved. This work is distributed under the W3C(R) Software License http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. """ __version__ = "$Id: fromIcal.py,v 2.29 2005/11/09 23:10:48 connolly Exp $" from warnings import warn import codecs, quopri import XMLWriter def main(): import sys sx = XMLWriter.T(codecs.getwriter('utf-8')(sys.stdout)) base = None suppressed = ['X-'] while len(sys.argv) > 1: if sys.argv[1] == '--base': base = sys.argv[2] del sys.argv[1:3] elif sys.argv[1] == '--noprotocol': suppressed = suppressed + [ 'SEQUENCE', 'DTSTAMP'] del sys.argv[1:2] elif sys.argv[1] == '--x': del suppressed[0] elif sys.argv[1] == '--noalarm': suppressed = suppressed + [ 'Valarm'] del sys.argv[1:2] elif sys.argv[1] == '--notimezone': suppressed = suppressed + [ 'Vtimezone'] del sys.argv[1:2] elif sys.argv[1] == '--help': print __doc__ return else: break interpret(sx, codecs.open(sys.argv[1], 'r', 'utf-8'), base, suppressed) class Namespace: def __init__(self, nsURI, names=()): self._n = nsURI self._names = names def bindAttr(self, pfx, attrs): if pfx: attrs['xmlns:%s' % pfx] = self._n else: attrs['xmlns'] = self._n def __getattr__(self, lname): if lname in self._names: return self._n+lname else: raise AttributeError, lname def sym(self, lname): return self._n + lname RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') iCalendar = Namespace('http://www.w3.org/2002/12/cal/icaltzd#', ('dateTime', )) XMLSchema = Namespace('http://www.w3.org/2001/XMLSchema#', ('dateTime', 'date', 'double', 'integer', 'duration')) def interpret(sx, fp, base=None, suppressed =[]): lines = contentLines(fp) calendars, dummy = findComponents(lines) attrs = {} RDF.bindAttr('rdf', attrs) iCalendar.bindAttr('', attrs) if base: attrs['xml:base'] = base sx.startElement('rdf:RDF', attrs) doComponents(sx, calendars, iCalendarDefs, suppressed = suppressed) sx.endElement('rdf:RDF') ######### # # Property Declarations: # ICALTOKEN -> ('rdfname', 'DEFAULT-TYPE', minCardinality, maxCardinality) # # In theory, these could be derived from the rfc2445-formal schema, but # they're copied by hand, so far. Reading from rfc2445-formal rather # than from rfc2445.txt ensures that we have the relevant information # formalized in machine-readable form, but also provides careful # review where a mechanized translation might mask some bugs. # # The default types are keyed in on-demand as we encounter # the properties in test data. Those that we haven't tested # produce a RuntimeError, telling us to add the default type # (and a test case for it!). # # Mappings to rdfname and DEFAULT-TYPE could be flattened, # but mappings to cardinalities depend on the containing component. # Cardinalities aren't used yet. (oops; forgot YouArentGonnaNeedIt) # # # We represent symbolic values as strings, i.e. # :transp "OPAQUE"; # where the calendar test suite currently uses URIs, i.e. # :transp :opaque; # Relevant code is marked with @@symbol. # # See also "How should I implement controlled vocabularies?" # in http://esw.w3.org/topic/PropertiesForNaming # as of 2003-04-18 16:10:57 # tzprop = { "DTSTART": ('dtstart', 'DATE-TIME', 1, 1), "TZOFFSETTO": ('tzoffsetto', 'TEXT', 1, 1), "TZOFFSETFROM": ('tzoffsetfrom', 'TEXT', 1, 1), "COMMENT": ('comment', 'TEXT', 0, None), "RDATE": ('rdate', 'DATE-TIME', 0, None), "RRULE": ('rrule', 'RECUR', 0, None), 'EXDATE': (u'exdate', 'DATE-TIME', None, None), 'RECURRENCE-ID': (u'recurrenceId', 'DATE-TIME', None, None), "TZNAME": ('tzname', 'TEXT', 0, None), } ValarmDefs = ('Valarm', {"ACTION": ('action', 'TEXT', 0, None), #@@symbol "ATTACH": ('attach', 'URI', 0, None), "ATTENDEE": ('attendee', 'CAL-ADDRESS', 0, None), "DESCRIPTION": ('description', 'TEXT', 0, 1), "DURATION": ('duration', 'DURATION', 0, None), "REPEAT": ('repeat', 0, None), "SUMMARY": ('summary', "TEXT", 0, None), "TRIGGER": ('trigger', "DURATION", 0, None), }, {}) iCalendarDefs = {'VCALENDAR': ('Vcalendar', {'CALSCALE': ('calscale', 'TEXT', 0, 1), 'METHOD': ('method', 'TEXT', 0, 1), 'VERSION': ('version', 'TEXT', 1, 1), 'PRODID': ('prodid', 'TEXT', 1, 1) }, {'VTIMEZONE': ('Vtimezone', {"TZID": ('tzid', 'TEXT', 1, 1), #hmm... fragid? "LAST-MODIFIED": ('lastModified', "DATE-TIME", 0, 1), "TZURL": ('tzurl', 'URI', 0, 1), }, {"STANDARD": ('standard', tzprop, {}), "DAYLIGHT": ('daylight', tzprop, {}) } ), 'VEVENT': ('Vevent', {"ATTACH": ('attach', 'URI', 0, None), "CATEGORIES": ('categories', "TEXT", 0, None), #@@list "SUMMARY": ('summary', "TEXT", 0, None), "DTEND": ('dtend', 'DATE-TIME', 0, None), "DTSTART": ('dtstart', 'DATE-TIME', 0, None), "DURATION": ('duration', 'DURATION', 0, None), "TRANSP": ('transp', 'TEXT', 0, None), #@@symbol "ATTENDEE": ('attendee', 'CAL-ADDRESS', 0, None), "CONTACT": ('contact', 0, None), "ORGANIZER": ('organizer', "CAL-ADDRESS", 0, None), "RELATED-TO": ('relatedTo', 0, None), # notes on rfc2445#sec4.8.4.6 Uniform Resource Locator # # This is very muddled modelling; url makes sense as # a value type, but not as a property name. It's a grab-bag # for concepts like foaf:homePage, dc:related (which # is another grab bag) etc. "URL": ('url', 'URI', 0, None), "UID": ('uid', "TEXT", 0, None), "EXRULE": ('exrule', 0, None), "CLASS": ('class', 'TEXT', 0, None), #@@symbol "RDATE": ('rdate', 'DATE-TIME', 0, None), "RRULE": ('rrule', 'RECUR', 0, None), 'EXDATE': (u'exdate', 'DATE-TIME', None, None), 'RECURRENCE-ID': (u'recurrenceId', 'DATE-TIME', None, None), "TRIGGER": ('trigger', "DURATION", 0, None), "CREATED": ('created', "DATE-TIME", 0, None), "DTSTAMP": ('dtstamp', 'DATE-TIME', 1, 1), "LAST-MODIFIED": ('lastModified', "DATE-TIME", 0, 1), "SEQUENCE": ('sequence', "INTEGER", 0, None), "REQUEST-STATUS": ('requestStatus', 0, None), "COMMENT": ('comment', 'TEXT', 0, None), "DESCRIPTION": ('description', "TEXT", 0, 1), 'GEO': ('geo', ('FLOAT',), None, None), "LOCATION": ('location', 'TEXT', 0, None), 'PRIORITY': ('priority', 'INTEGER', None, None), "RESOURCES": ('resources', 0, None), "STATUS": ('status', 'TEXT', 0, 1), }, {"VALARM": ValarmDefs } ), 'VTODO': (u'Vtodo', {'ATTACH': (u'attach', 'URI', None, None), 'ATTENDEE': (u'attendee', 'CAL-ADDRESS', None, None), 'CATEGORIES' : (u'categories', 'TEXT', None, None), 'CLASS': (u'class', 'TEXT', None, None), 'COMMENT': (u'comment', 'TEXT', None, None), 'COMPLETED': (u'completed', 'DATE-TIME', None, None), 'CONTACT': (u'contact', 'TEXT', None, None), 'CREATED': (u'created', 'DATE-TIME', None, None), 'DESCRIPTION': (u'description', 'TEXT', None, None), 'DTSTAMP': (u'dtstamp', 'DATE-TIME', None, None), 'DTSTART': (u'dtstart', 'DATE-TIME', None, None), 'DUE': (u'due', 'DATE-TIME', None, None), 'DURATION': (u'duration', 'DURATION', None, None), 'EXRULE': (u'exrule', 'RECUR', None, None), 'GEO': (u'geo', '@@', None, None), 'LAST-MODIFIED': (u'lastModified', 'DATE-TIME', None, None), 'LOCATION': (u'location', 'TEXT', None, None), 'ORGANIZER': (u'organizer', 'CAL-ADDRESS', None, None), 'PERCENT-COMPLETE': (u'percentComplete', 'INTEGER', None, None), 'PRIORITY': (u'priority', 'INTEGER', None, None), 'RDATE': (u'rdate', 'DATE-TIME', None, None), 'RELATED-TO': (u'relatedTo', 'TEXT', None, None), 'REQUEST-STATUS': (u'requestStatus', 'TEXT', None, None), 'RESOURCES': (u'resources', 'TEXT', None, None), 'RRULE': (u'rrule', 'RECUR', None, None), 'EXDATE': (u'exdate', 'DATE-TIME', None, None), 'RECURRENCE-ID': (u'recurrenceId', 'DATE-TIME', None, None), 'SEQUENCE': (u'sequence', 'INTEGER', None, None), 'STATUS': (u'status', 'TEXT', None, None), 'SUMMARY': (u'summary', 'TEXT', None, None), 'TRIGGER': (u'trigger', 'DURATION', None, None), 'UID': (u'uid', 'TEXT', None, None), 'URL': (u'url', 'URI', None, None)}, {"VALARM": ValarmDefs }, ) #@@others } ) } def doComponents(sx, components, compDecls, stripe=None, suppressed =[]): """interpret components stripe says whether we need a element or a parseType="Resource" attribute to fix up the striping. or None, in which case we need to declare X- namespaces raises KeyError for unknown component name. @@test this """ for name, props, subs in components: elt, propDecls, subDecls = compDecls[name] if elt in suppressed: continue attrs = {} if stripe == 'component': sx.startElement('component', {}) elif stripe == 'Resource': attrs['rdf:parseType'] = stripe else: bindX(attrs, props, components) try: i = lookup(props, 'UID') if not('#' in i): i = "#" + i attrs['rdf:about'] = i except KeyError: try: tzid = lookup(props, 'TZID') attrs['rdf:about'] = timeZoneNameSpace(tzid) + 'tz' except KeyError: pass sx.startElement(elt, attrs) doProperties(sx, '', props, propDecls, suppressed = suppressed) if elt == 'Vtimezone': doComponents(sx, subs, subDecls, 'Resource', suppressed = suppressed) else: doComponents(sx, subs, subDecls, 'component', suppressed = suppressed) sx.endElement(elt) if stripe == 'component': sx.endElement('component') def bindX(attrs, props, components): """ bind x: namespace per prodid hmm... use vendorid as prefix? """ try: prodid = lookup(props, 'PRODID') #@@ call unesc to parse \, except KeyError: warn("RFC2445 requires a prodid. none found") return def findVendorids(results, components): for name, props, subs in components: for n, p, v in props: if n[:2] == "X-": vendorid, lname = n[2:].split('-', 1) results.append(vendorid.lower()) findVendorids(results, subs) vendorids = [] findVendorids(vendorids, components) puri = prodURI(prodid) for vid in vendorids: attrs['xmlns:x-' + vid] = puri def lookup(props, k): for n, p, v in props: if n == k: return v raise KeyError, k def prodURI(prodid): r"""turn prodid into a URI cf discussion starting with x-properties and namespaces posted by DanC at 2003-02-26 17:17 (+) http://rdfig.xmlhack.com/2003/02/26/2003-02-26.html#1046279854.884486 and continuing thru RDF calendar agenda item C: prodid support to ical2rdf.pl posted by libby at 2003-07-09 14:59 (+) http://rdfig.xmlhack.com/2003/07/09/2003-07-09.html#1057762764.179078 >>> prodURI("-//Apple Computer\, Inc//iCal 1.0//EN") 'http://www.w3.org/2002/12/cal/prod/Apple_Comp_628d9d8459c556fa#' """ import sha if prodid[:3] == "-//": prodid = prodid[3:] prodid = prodid.replace(' ', '_').replace("//", "_") digest = sha.new(prodid.lower()).hexdigest() return 'http://www.w3.org/2002/12/cal/prod/' + \ prodid[:10] + '_' + digest[:16] + '#' def doProperties(sx, pfx, props, schema, suppressed =[]): """write each property as XML raises KeyError for properties that are neither X- properties nor in the schema """ for n, params, val in props: if n in suppressed: continue if n[:2] == "X-": if 'X-' in suppressed: continue vendorid, lname = n[2:].split('-', 1) id = 'x-' + vendorid.lower() + ':' + camelCase(lname) vtype, minc, maxc = 'TEXT', 0, None else: try: id, vtype, minc, maxc = schema[n] except ValueError: raise RuntimeError, "missing default type: %s -> %s" %\ (n, schema[n]) elt = '%s%s' % (pfx, id) for pn, pv in params: if pn == 'VALUE': vtype = pv.upper() if vtype == 'TEXT': doText(sx, elt, params, val) elif vtype == 'INTEGER': doInteger(sx, elt, params, val) elif vtype == 'DURATION': doDuration(sx, elt, params, val) elif vtype == 'DATE-TIME': doDateTime(sx, elt, params, val) elif vtype == 'DATE': doDate(sx, elt, params, val) elif vtype == 'RECUR': doRecur(sx, elt, params, val) elif vtype == 'CAL-ADDRESS': doCalAddress(sx, elt, params, val) elif vtype == 'URI': doURI(sx, elt, params, val) elif vtype == ('FLOAT',): doListOfFLOAT(sx, elt, params, val) else: warn("@@value type %s not implemented (%s: %s)" % (vtype, n, val)) def doInteger(sx, elt, params, val): sx.startElement(elt, {'rdf:datatype': XMLSchema.integer}) sx.characters(val, 0, len(val)) for pn, pv in params: if pn=='VALUE': pass else: raise ValueError, "unexpected param %s=%s" % (pn, pv) sx.endElement(elt) def doURI(sx, elt, params, val): sx.startElement(elt, {'rdf:resource': val}) for pn, pv in params: if pn=='VALUE': pass else: raise ValueError, "unexpected param %s=%s" % (pn, pv) sx.endElement(elt) def doText(sx, elt, params, val): attrs = {} val = unesc(val) # @@ or only if not QUOTED-PRINTABLE ? for pn, pv in params: if pn=='VALUE': pass elif (pn, pv) == ('ENCODING', 'QUOTED-PRINTABLE'): val = quopri.decodestring(val) elif pn == "LANGUAGE": attrs[ 'xml:lang'] = pv else: raise ValueError, "unexpected param %s=%s on value '%s'" % (pn, pv, val) sx.startElement(elt, attrs) sx.characters(val, 0, len(val)) sx.endElement(elt) def unesc(val): r""" undo escaping ala rfc2445#sec4.3.11 >>> unesc("abc\\ndef") 'abc\ndef' >>> unesc("abc\\\\def") 'abc\\def' """ i = 0 ret = '' while i < len(val): j = val.find('\\', i) if j < 0: ret += val[i:] break ret += val[i:j] c = val[j+1] if c == 'n' or c == 'N': c = LF ret += c i = j + 2 return ret # Hmm... we can't use dt:dateTime nor dt:duration as the property # here, because datatype properties are inverse functional, but # iCalendar DATE-TIME values have other properties, i.e. tzid. # # In a way, it's a good thing anyway, since using datatype properties # would take us out of OWL DL. def doDateTime(sx, elt, params, val): val = datePunc(val) tzid = None for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? elif pn == 'TZID': tzid = pv else: raise ValueError, "unexpected param %s=%s" % (pn, pv) if val.endswith('Z'): sx.startElement(elt, {'rdf:datatype': XMLSchema.dateTime}) sx.characters(val, 0, len(val)) sx.endElement(elt) elif tzid: sx.startElement(elt, {'rdf:datatype': timeZoneNameSpace(tzid)+'tz'}) sx.characters(val, 0, len(val)) sx.endElement(elt) else: sx.startElement(elt, {'rdf:datatype': iCalendar.dateTime}) sx.characters(val, 0, len(val)) sx.endElement(elt) OlsonPfxs=('/softwarestudio.org/Olson_20011030_5/', '/softwarestudio.org/Olson_20010831_3/', '/softwarestudio.org/Olson_20011030_4/', '/softwarestudio.org/Olson_20020614_6/', '/softwarestudio.org/Olson_20011030_2/') TzdPfx='http://www.w3.org/2002/12/cal/tzd/' def timeZoneNameSpace(tzid): """map tzid into URI space rooted at TzdPfx rfc2445#sec4.8.3.1 says: "The presence of the SOLIDUS character (US-ASCII decimal 47) as a prefix, indicates that this TZID represents an unique ID in a globally defined time zone registry (when such registry is defined)." >>> timeZoneNameSpace('/softwarestudio.org/Olson_20011030_5/Europe/London') 'http://www.w3.org/2002/12/cal/tzd/Europe/London#' If an unknown registry is uses... >>> timeZoneNameSpace('/foo') Traceback (most recent call last): raise ValueError, "unknown global tzid:" + tzid ValueError: unknown global tzid:/foo We do some namespace squatting: in theory, the name 'Europe/London' could be used as a local reference for Chicago time. But in practice, this works: >>> timeZoneNameSpace('Europe/London') 'http://www.w3.org/2002/12/cal/tzd/Europe/London#' A bit more squatting: >>> timeZoneNameSpace('US/Eastern') 'http://www.w3.org/2002/12/cal/tzd/America/New_York#' @@raises RuntimeError for unrecognized local refs. """ if tzid.startswith('/'): for pfx in OlsonPfxs: if tzid.startswith(pfx): tzns = TzdPfx + tzid[len(pfx):] + '#' break else: raise ValueError, "unknown global tzid:" + tzid else: tzid = {'US/Eastern': 'America/New_York', 'US/Central': 'America/Chicago', # mountain? denver? 'US/Pacific': 'America/Los_Angeles', }.get(tzid, tzid) for area in ('Africa', 'Antarctica', 'Asia', 'Australia', 'Europe', 'Pacific', 'America', 'Arctic', 'Atlantic', 'Indian'): if tzid.startswith(area + '/'): tzns = TzdPfx + tzid + '#' break else: raise RuntimeError, "unsupported local timezone: " + tzid return tzns def doDate(sx, elt, params, val): sx.startElement(elt, {'rdf:datatype': XMLSchema.date}) val = "%s-%s-%s" % (val[:4], val[4:6], val[6:8]) sx.characters(val, 0, len(val)) for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? else: raise ValueError, "unexpected param %s=%s" % (pn, pv) sx.endElement(elt) def doCalAddress(sx, elt, params, val): sx.startElement(elt, {'rdf:parseType': "Resource"}) val = val.replace("MAILTO:", 'mailto:') # rfc2445#sec4.3.3 weirdness # hmm... use or mention of the address? sx.startElement('calAddress', {'rdf:resource': val}) sx.endElement('calAddress') for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? elif pn == 'CN': sx.startElement('cn', {}) #@@ add to ical schema sx.characters(pv, 0, len(pv)) sx.endElement('cn') elif pn == 'DIR': sx.startElement('dir', {'rdf:resource': pv}) sx.endElement('cn') elif pn in ('CUTYPE', 'ROLE', 'RSVP', 'PARTSTAT', 'LANGUAGE'): pn = pn.lower() # lower is sufficient to camelCase pv = pv.upper() # @@symbol sx.startElement(pn, {}) sx.characters(pv, 0, len(pv)) sx.endElement(pn) elif pn == 'X-UID': pn = pn.lower() # @@@@ should be namespaced from the client software mfr sx.startElement(pn, {}) sx.characters(pv, 0, len(pv)) sx.endElement(pn) else: raise ValueError, "unexpected param %s=%s" % (pn, pv) sx.endElement(elt) def doRecur(sx, elt, params, val): sx.startElement(elt, {'rdf:parseType': "Resource"}) for n_v in val.split(';'): n, v = n_v.split('=') if n in ('FREQ', 'BYDAY', 'BYMONTH', 'WKST'): n = n.lower() # lower is sufficient to camelCase v = v.upper() # @@symbol sx.startElement(n, {}) sx.characters(v, 0, len(v)) sx.endElement(n) elif n == 'UNTIL': val = "%s-%s-%s" % (val[:4], val[4:6], val[6:8]) sx.startElement('until', {}) sx.characters(v, 0, len(v)) sx.endElement('until') # hmm... rfc2445 isn't explicit about datatype for count, interval elif n in ('INTERVAL', 'COUNT'): n = n.lower() sx.startElement(n, {'rdf:datatype': XMLSchema.integer}) sx.characters(v, 0, len(v)) sx.endElement(n) else: raise ValueError, "unexpected rrule arg %s=%s" % (n, v) for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? else: raise ValueError, "unexpected param %s=%s" % (pn, pv) sx.endElement(elt) def doDuration(sx, elt, params, val): """duration is an odd beast in iCalendar. There is a duration property as well as a duration value type. We'll use cal:duration for the property. The DURATION value type is actually more than just a XMLSchema.duration; it also has a RELATED parameter. So for TRIGGER;VALUE=DURATION;RELATED=START:-PT15M we'll write { ?E cal:trigger [ rdf:value "-PT15M"^^xsdt:duration; cal:related "START"] } """ sx.startElement(elt, {'rdf:parseType': "Resource"}) sx.startElement('rdf:value', {'rdf:datatype': XMLSchema.duration}) sx.characters(val, 0, len(val)) sx.endElement('duration') for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? elif pn == 'RELATED': pv = pv.upper() # rfc2445#sec2 names sx.startElement('related', {}) sx.characters(pv, 0, len(pv)) sx.endElement('related') else: raise ValueError, "unexpected param %s=%s" % (pn, pv) sx.endElement(elt) # We considered generalizing this to lists of # other sorts of values but declined on the # YouArentGonnaNeedIt principle. def doListOfFLOAT(sx, elt, params, val): x, y = val.split(';') for pn, pv in params: if pn=='VALUE': pass else: raise ValueError, "unexpected param %s=%s" % (pn, pv) #ugh... can't use parseType="Collection" with literal values #http://www.w3.org/2000/03/rdf-tracking/#rdfxml-literals-in-collections sx.startElement(elt, {'rdf:parseType': "Resource"}) sx.startElement('rdf:first', {'rdf:datatype': XMLSchema.double}) sx.characters(x, 0, len(x)) sx.endElement('rdf:first') sx.startElement('rdf:rest', {'rdf:parseType': "Resource"}) sx.startElement('rdf:first', {'rdf:datatype': XMLSchema.double}) sx.characters(y, 0, len(y)) sx.endElement('rdf:first') sx.startElement('rdf:rest', {'rdf:resource': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'}) sx.endElement('rdf:rest') sx.endElement('rdf:rest') sx.endElement(elt) def datePunc(val): """convert RFC2445 date to ISO/W3C/XML date form >>> datePunc('19701025T020000') '1970-10-25T02:00:00' >>> datePunc('20020630T230353Z') '2002-06-30T23:03:53Z' """ return "%s-%s-%sT%s:%s:%s%s" % (val[:4], val[4:6], val[6:8], val[9:11], val[11:13], val[13:15], val[15:]) def camelCase(n, initialCap=0): """ >>> camelCase("VEVENT", 1) 'Vevent' >>> camelCase("LAST-MODIFIED") 'lastModified' >>> camelCase("LOCATION") 'location' """ words = map(lambda w: w.lower(), n.split('-')) def ucfirst(w): return w[0].upper() + w[1:] if initialCap: return ''.join(map(ucfirst, words)) else: return words[0] + ''.join(map(ucfirst, words[1:])) def findComponents(lines, container = None): """return a list of (name, props, subcomponents) and remaining lines # @@TODO: make this a generator """ components = [] while lines: n, p, v = lines[0] #print "finding...", n, p, v if n == 'END': if v.upper() == container: return components, lines[1:] else: raise ValueError, 'expected %s but found %s' % (container, v) elif n == 'BEGIN': name = v.upper() props = [] subs = [] i = 1 while i < len(lines): n, p, v = lines[i] #print "began", name, n, p, v if n == 'END': # @@hmm... found an extra space after END:DAYLIGHT # in test/20030410querymtg.ics # where did that come from? allow it, or fix test data? v = v.rstrip().upper() if v != name: raise ValueError, 'expected "%s" but found "%s"' % \ (name, v) lines = lines[i+1:] break if n == 'BEGIN': subs, lines = findComponents(lines[i:], container = name) i = 0 #print "end recur:", `lines[:3]` break else: props.append((n, p, v)) i += 1 components.append((name, props, subs)) else: raise ValueError, 'expected BEGIN but found %s: %s' % (n, v) return components, lines ############# # rfc2445#sec4.1 Content Lines # CR = chr(13) LF = chr(10) CRLF = CR + LF SPACE = chr(32) TAB = chr(9) def contentLines(fp): # @@TODO: make this a generator lines = [] while 1: ln = fp.readline() if not ln: break ln = ln.rstrip(CRLF) if not ln: # hmm... RFC2445 seems to prohibit blank lines, but we skip them. continue if ln[0] == SPACE or ln[0] == TAB: lines[-1] += ln[1:] else: lines.append(ln) return map(parseLine, lines) def parseLine(ln): """break a content line into name, params, value rfc2445#sec2 property names, parameter names are case-insensitive @@evolution seemed to not implement this. @@if none of our test data mixes case, should we not implement it either? >>> parseLine("RDATE;VALUE=DATE:19970304,19970504,19970704,19970904") ('RDATE', [('VALUE', 'DATE')], '19970304,19970504,19970704,19970904') >>> parseLine('DESCRIPTION;ALTREP="http://www.wiz.org":The Fall 98 Wild Wizards Conference - - Las Vegas, NV, USA') ('DESCRIPTION', [('ALTREP', 'http://www.wiz.org')], 'The Fall 98 Wild Wizards Conference - - Las Vegas, NV, USA') >>> parseLine('URL;VALUE=URI:https://www.virtual.example/new/reservations.html;jsessionid=A2j3D8E6b112FjARUGSFSHBqBe7OafARYkuR0F7lbCV0HNKa5kRh!-346797512!-1748917503!7001!8001?JSESSIONID=A2j3D8E6b112FjARUGSFSHBqBe7OafARYkuR0F7lbCV0HNKa5kRh!-346797512!-1748917503!7001!8001&pnr=Clksdjf') ('URL', [('VALUE', 'URI')], 'https://www.virtual.example/new/reservations.html;jsessionid=A2j3D8E6b112FjARUGSFSHBqBe7OafARYkuR0F7lbCV0HNKa5kRh!-346797512!-1748917503!7001!8001?JSESSIONID=A2j3D8E6b112FjARUGSFSHBqBe7OafARYkuR0F7lbCV0HNKa5kRh!-346797512!-1748917503!7001!8001&pnr=Clksdjf') """ params = [] semi = ln.find(';') colon = ln.find(':') if semi >= 0 and semi < colon: name = ln[:semi] while semi < colon: eq = ln.find('=', semi) pname = ln[semi+1:eq] if ln[eq+1] == '"': strend = ln.find('"', eq+2) pval = ln[eq+2:strend] semi = ln.find(';', strend+1) colon = ln.find(':', strend+1) else: semi = ln.find(';', eq+1) colon = ln.find(':', eq+1) if semi < 0 or semi > colon: semi = colon pval = ln[eq+1:semi] params.append((pname.upper(), pval)) if semi < 0: break else: name = ln[:colon] value = ln[colon+1:] return name.upper(), params, value def _test(): import sys from pprint import pprint import doctest, fromIcal doctest.testmod(fromIcal) lines = contentLines(open(sys.argv[1])) #print lines c, lines = findComponents(lines) assert lines == [] pprint(c) #unittest.main() if __name__ == '__main__': import sys if sys.argv[1] == '--test': del sys.argv[1] _test() else: main() # $Log: fromIcal.py,v $ # Revision 2.29 2005/11/09 23:10:48 connolly # - changed the way duration values are modelled # The iCalendar DURATION value type is actually more than just a # XMLSchema.duration; it also has a RELATED parameter. # So for # TRIGGER;VALUE=DURATION;RELATED=START:-PT15M # we'll write # { ?E cal:trigger [ rdf:value "-PT15M"^^xsdt:duration; # cal:related "START"] } # # - fixed test data to have rdf:datatype on integer # values, to match the schema (which matches the RFC) # # - fixed schema to show DATE-TIME properties (dtstart, ...) # as DatatypeProperties # (there are little/no tests for PERIOD; beware) # # - scraped more details about property parameters (e.g. partstat, cn, # cutype, ...) and rrule parts (freq, interval, ...) from the RFC so # that they show up as links in the hypertext version and as RDF # properties in the schema. likewise timezone components (standard, # daylight) # - side effect: added some whitespace in rfc2445.html # # - demoted x- properties # - removed x- properties from .rdf versions of test data # this allows the round-trip tests to pass # - fromIcal.py doesn't output them unless you give the --x option # # - added Makefile support for consistency checking with pellet # # - demoted blank line diagnostic in fromIcal.py to a comment # # - silenced some left-over debug diagnostics in slurpIcalSpec.py # # - fixed test/test-created.rdf; added it to fromIcalTest.py list # # Revision 2.28 2005/09/08 00:43:49 connolly # avoid double hashes in ID # # Revision 2.27 2005/04/22 14:16:56 connolly # fix problems found when converting all the timezone files # in evolution-data-server_1.0.4-1_i386.deb: # - handle RDATE # - handle multiple OlsonPfxs # # Revision 2.26 2005/04/04 21:17:14 connolly # fix initialization of iCalendar namespace # # Revision 2.25 2005/03/30 15:35:21 connolly # new namespace for timezones-as-datatypes design: icaltzd # # Revision 2.24 2005/02/26 03:20:47 connolly # fromIcal.py # - revert the uid: trick; back to uids as fragids # - timezones as datatypes in dates, dateTimes # - Valarm supported in Vtodo as well as Vevent # (@@need test smaller than MozMulipleVcalendars.ics) # - re-indented Vtodo decls while I was at it # - case-fold END:xyz # # fromIcalTest.py # - base in http space # - new tag-bug case # # test/*.rdf # - base in http space # - timezones as datatypes # # test/cal-regression.n3 # - moved tests that don't use X- first # - got rid of initRDF # # test/cal-retest.py # - replace ical2rdf.pl with fromIcal.py # - base in http space # # test/cal-spec-examples.n3 new # # test/graphCompare.n3 oops; extra debug crud # # Revision 2.23 2005/02/10 21:39:00 timbl # COUNT, LANGUAGE, X-UID, QUOTED-PRINTABLE under DanC's supervision # # Revision 2.22 2005/02/02 21:54:45 timbl # added --noalarm option - kindofa hack - take 2 # # Revision 2.21 2005/02/02 21:51:46 timbl # added --noalarm option - kindofa hack # # Revision 2.20 2005/02/02 21:39:20 timbl # sync # # Revision 2.19 2005/02/01 15:29:43 timbl # hack to CREATED to add default type DATE-TIME. # # Revision 2.18 2005/02/01 15:26:53 timbl # pre hack to CREATED default type. # # Revision 2.17 2005/01/28 04:07:49 timbl # Event URIs now absolute. Added --noprotocol and --help options # # Revision 2.16 2004/09/30 14:16:01 connolly # parseLine was buggy in the case of ; in values # # Revision 2.15 2004/04/14 21:31:26 connolly # added --base support so we can test with fragids # # Revision 2.14 2004/04/14 21:12:13 connolly # # - revamped doDateTime: use datatypes for dateTime values # - added __getattr__ to Namespace class # - make well-known tzids into URIs in 2002/12/cal space # - make UID into fragid # - make local tzid into fragid # # Revision 2.13 2004/04/08 14:09:11 connolly # priority on VEVENT fixed # # Revision 2.12 2004/04/07 18:27:17 connolly # use real datatypes for list of floats, i.e. geo # # Revision 2.11 2004/04/07 18:10:22 connolly # convert list of float, as in GEO # # Revision 2.10 2004/03/25 04:00:59 connolly # allow recurrenceId wherever rrule can go # handle WKST in recur values # # Revision 2.9 2004/03/25 03:45:09 connolly # handle UNTIL in rrule # added EXDATE to compDecls wherever RRULE occurs # # Revision 2.7 2004/03/23 14:59:28 connolly # allow missing PRODID # # Revision 2.6 2004/03/10 21:59:31 connolly # calendar schema is now generated from the RFC # # Revision 2.5 2004/02/29 14:52:11 connolly # todo support in fromIcal; value type label in schema # # Revision 2.4 2004/02/12 07:17:05 connolly # - handle URI value type # - a few more default value type declarations # # Revision 2.3 2004/02/12 06:30:48 connolly # - doText unescapes text values per rfc2445#sec4.3.11 # - LAST-MODIFIED applies to VEVENT (fixed typo in RFC) # - default type added for COMMENT # - disabled UID->fragid conversion cuz it interferes with graph comparison # - handle DIR parameter on CAL-ADDRESS value type # # Revision 2.2 2004/02/11 22:04:10 connolly # slightly nicer XML writer # # Revision 2.1 2004/02/11 16:40:23 connolly # finish renaming icalWebize.py to fromIcal.py # # Revision 2.0 2004/02/11 16:37:48 connolly # copied from icalWebize.py # # Revision 1.11 2004/02/09 23:09:11 connolly # - more refined X- property handling # - generate fragids for uid, TZID # - fixed utf-8 reading (thx MK for move test data!) # - noted RDF API, name change TODOs per discussion with timbl # - allow blank lines, space after names, (warn about possible confict with RFC) # - pass over VALUE parameter in doText # # Revision 1.10 2004/02/08 19:14:39 connolly # handles all of cal01.ics, though not quite the same way # that ical2rdf.pl does # - elaborated comment about declarations and rfc2445-formal # - elaborated @@symbol comment # - fixed tzoffsetfrom decl bug # - handle X- properties # - handle DATE value type # - changed calAddress to use, rather than mention, ala ical2rdf.pl # - handle (and test) Z on dateTimes # - more uniform handling of case insensitivity (still not tested) # # Revision 1.9 2004/02/08 05:17:56 connolly # working except X- props # symbols are still text; not (yet?) interpreted as URIs # treating Valarm as a class; leading toward changing it this way # # Revision 1.8 2004/02/08 04:28:43 connolly # removed i: prefix # enough property declarations with value types for cal01.ics # added elements and parseType to fix striping # factored out text property code # # Revision 1.7 2004/02/08 00:10:00 connolly # data-driven processing of components # # Revision 1.6 2004/02/07 06:28:35 connolly # note a bug with unescaping TEXT values # # Revision 1.5 2004/02/07 03:40:17 connolly # note intention to use generators # rearrange module docstring # # Revision 1.4 2004/02/06 07:41:41 connolly # syntax bug in one of the "not implemented" warnings # # Revision 1.3 2004/02/06 07:13:24 connolly # started support for VEVENT # - TODO: reformat dates # - non-text properties; e.g. organizer # moved main() before Namespace class # # Revision 1.2 2004/02/06 06:56:38 connolly # lines are arranged into components # calendar object and its properties are written as RDF # # Revision 1.1 2004/02/06 05:22:11 connolly # parses lines into name, params, value #