#!/usr/bin/python import sys, os, re, cgi, urllib, time, calendar, BaseHTTPServer from SimpleHTTPServer import SimpleHTTPRequestHandler # Date parsing stuff def datetime(): """Return the current UTC time as standard date format.""" return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) r_datescrape = re.compile(r'[1-9]\d*|0(?=\D|\Z)') def dateToInt(s): date = r_datescrape.findall(s) return calendar.timegm(tuple([int(i) for i in date + list('000')])) def sanitizeDate(s): assert len(s) == 20 year, month, day = s[0:4], s[5:7], s[8:10] hour, minute, second = s[11:13], s[14:16], s[17:19] return "%s-%s-%s %s:%s:%s" % (year, month, day, hour, minute, second) # Other utilities def giveMeMyDataBack(fn): """Convert the serialization format we use into datestamped files.""" s = open(fn).read() s = s.replace('\r\n', '\n') s = s.replace('\r', '\n') for line in s.splitlines(): pass # @@ r_name = re.compile(r'^[A-Za-z]+$') r_datetime = re.compile(r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$') r_word = re.compile(r"[A-Za-z0-9'_-]+") # Compiled note: (datetime, {metadata}, data) defaulturi = 'notes.text' class NotesHandler(object): """Does general note stuff, and so forth.""" def __init__(self): self.uids = [] # a list of datetimes, sorted self.notes = {} # the note mappings: {datetime: (metadata, data)} self.index = {} self.uri = None # location of the notes file self.loadNotes() def loadNotes(self, uri=None): """Load in notes from an external file.""" # Note: just adds them all to the end of the file... # @@ integrity checking self.uri = uri or defaulturi if ':' in self.uri: s = urllib.urlopen(self.uri).read() elif not os.path.exists(self.uri): open(self.uri, 'w').write('') s = '' else: s = open(self.uri).read() for line in s.splitlines(): if line: note = self.parseFromStorage(line) self.addNote(note) def makeNote(self, metadata, data): """Take in data from a CGI form, and add an n-tuple and string.""" note = self.parseFromInput(metadata, data) self.addNote(note) self.appendNote(note) def parseFromStorage(self, s): """Parse a string from a storage file and return an n-tuple.""" # parse into metadata and data i = s.find('\t') metadata, data = s[:i], s[i+1:] # unescape data data = data.replace('\\n', '\n') data = data.replace('\\\\', '\\') metadata = filter(lambda s: s.strip(), metadata.split(' ')) assert len(metadata) > 0 timestamp = metadata[0] metadata = dict([(key.lower(), 0) for key in metadata[1:]]) # @@ verify more stuff here assert r_datetime.match(timestamp) return (timestamp, metadata, data) def parseFromInput(self, metadata, data): """Parse input from a CGI form, create a note, return an n-tuple.""" # Normalize input data assert ('\n' not in metadata) and ('\r' not in metadata) data = data.replace('\r\n', '\n') data = data.replace('\r', '\n') # generate timestamp timestamp = datetime() # Munge and check metadata metadatums = filter(lambda s: s.strip(), metadata.split(' ')) for datum in metadatums: if not r_name.match(datum): self.error("Datum must be a name: %s" % datum) metadata = dict([(key.lower(), 0) for key in metadatums]) return (timestamp, metadata, data) def addNote(self, note): """Add an n-tuple note to the internal notes store, compiled.""" timestamp, metadata, data = note assert not self.notes.has_key(timestamp) if len(self.uids) > 0: assert dateToInt(timestamp) > dateToInt(self.uids[-1]) self.uids.append(timestamp) self.notes[timestamp] = (metadata, data) for word in (metadata.keys() + r_word.findall(data)): self.index.setdefault(word.lower(), {})[timestamp] = 0 def serializeNote(self, note): """Converts an n-tuple into a string for dumping to a file.""" timestamp, metadata, data = note # Escape data data = data.replace('\\', '\\\\') data = data.replace('\n', '\\n') return "%s %s\t%s\n" % (timestamp, ' '.join(metadata), data) def getNote(self, query): # @@ cache results? assumes stuff won't go out of order... if isinstance(query, int): try: timestamp = self.uids[query] metadata, data = self.notes[timestamp] return query, (timestamp, metadata, data) except IndexError: return False else: if isinstance(query, str): timestamp = query elif isinstance(query, tuple): timestamp = tuple[0] else: return False if self.notes.has_key(timestamp): i = self.uids.index(timestamp) metadata, data = self.notes[timestamp] return i, (timestamp, metadata, data) return False def query(self, constraints): all = [] for timestamp in self.uids: metadata, data = self.notes[timestamp] all.append((timestamp, metadata, data)) if constraints.has_key('keywords'): keywords = constraints['keywords'] def filterByKeyword(n): for keyword in keywords: if not n[1].has_key(keyword): return False return True all = filter(lambda n: filterByKeyword(n), all) if constraints.has_key('contains'): contains = constraints['contains'] for word in contains: word = word.lower() all = filter(lambda n: n[2].lower().count(word), all) return all def noteToHTML(self, q): """Convert an n-tuple into HTML suitable for output.""" i, (timestamp, metadata, data) = self.getNote(q) data = data.replace('&', '&') data = data.replace('<', '<') body = '
\n' if '\n' in data.strip(): data = '%s\n' % data else: data = '
%s
\n' % data t = sanitizeDate(timestamp) meta = '%s
\n' % t nav = ('\n') % (i - 1, i + 1) return body + data + meta + nav + '' def noteToSummaryHTML(self, q): """Convert an n-tuple into an HTML summary suitable for output.""" i, (timestamp, metadata, data) = self.getNote(q) data = data[:70] data = data.replace('&', '&') data = data.replace('<', '<') data = data.replace('\n', '\\n') def replaceWithLink(m): word = m.group(0) if len(self.index.get(word.lower()) or []) > 1: return '%s' % (word.lower(), word) else: return word data = r_word.sub(replaceWithLink, data) metadata = metadata.keys() metadata.sort() metadata = ', '.join( ['%s' % (m, m) for m in metadata] ) or '' t = sanitizeDate(timestamp) t = '(%s)' % t link = '#%02i)' % (i, i) return '%s %s %sNo keywords found
' all = self.query({'contains': keywords}) if all: s += 'No entries found containing the search terms.
' return self.makeHTML("Search Result", s) def makeHTML(self, title, body): html = '\n' html += '