#!/usr/bin/python import re # Notes programs will have to handle dates # What we want to do is use the following forms: # # "yesterday|today|tomorrow" # "[+-][1-9][0-9]*" # "(next|last)-(week|month|year)" # "[1-9][0-9]*-days(-ago)?" # "(0?[1-9]|[12][0-9]|30|31)(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|\ # Sep|Oct|Nov|Dec)([0-9]*)?[0-9]{2}" # "1st|2nd|3rd|[4-9]th|1[0-9]th|21st|22nd|23rd|2[04-9]th|30th|31st" # "(?i)(this-)?(mon|tue|wed|thu|fri|sat|sun)" # "(?i)(this-)?(monday|tuesday|wednesday|thursday|friday|saturday|sunday)" # "(?i)next-(mon|tue|wed|thu|fri|sat|sun)" # "(?i)next-(monday|tuesday|wednesday|thursday|friday|saturday|sunday)" # # and normalize them into yyyy-mm-dd format # "[0-9]{4}-[0-9]{2}-[0-9]{2}" days = "monday|tuesday|wednesday|thursday|friday|saturday|sunday" months = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec" r_dates = [re.compile("(?i)^%s$" % p) for p in ( "(yesterday|today|tomorrow)", "[+-][1-9][0-9]*", "(next|last)-(week|month|year)", "[1-9][0-9]*-days(-ago)?", "(0?[1-9]|[12][0-9]|30|31)(%s)([0-9]*)?[0-9]{2}" % months, "1st|2nd|3rd|[4-9]th|1[0-9]th|21st|22nd|23rd|2[04-9]th|30th|31st", "(this-)?(mon|tue|wed|thu|fri|sat|sun)", "(this-)?(%s)" % days, "next-(mon|tue|wed|thu|fri|sat|sun)", "next-(%s)" % days )] def normalizeDate(s, base=None): for i in range(len(r_dates)): if i == 0: # "(yesterday|today|tomorrow)" # "[+-][1-9][0-9]*" # "(next|last)-(week|month|year)" # "[1-9][0-9]*-days(-ago)?" # "(0?[1-9]|[12][0-9]|30|31)(%s)([0-9]*)?[0-9]{2}" % months # "1st|2nd|3rd|[4-9]th|1[0-9]th|21st|22nd|23rd|2[04-9]th|30th|31st" # "(this-)?(mon|tue|wed|thu|fri|sat|sun)" # "(this-)?(%s)" % days # "next-(mon|tue|wed|thu|fri|sat|sun)" # "next-(%s)" % days # We want to be able to parse things simply from text input # The following will simply return if it gets a string with a newline # Otherwise, it'll return a tokenized tuple r_line = re.compile(r"([A-Za-z]+) ((?:[^@]|@:@:|@(?!:))*)(?:@: +(.+))?") def tokenize(s): s = s.replace('\r\n', '\n') s = s.replace('\r', '\n') if '\n' in s: return tuple(s) m = r_line.match(s) if m: groups = list(m.groups()) groups[2] = groups[2] or '' groups[2] = tuple([s for s in groups[2].split(' ') if s.strip()]) if groups[1].endswith(' '): groups[1] = groups[1][:-1] return tuple(groups) return tuple() def test(): assert (tokenize("note blargh @: something") == ('note', 'blargh', ('something',))) assert (tokenize("note blargh @: something etc. ") == ('note', 'blargh', ('something', 'etc.'))) print "Tests passed." if __name__=="__main__": test()