#!/usr/bin/env python """ persian.py - Phenny Persian Module Copyright 2012, Sean B. Palmer, inamidst.com Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ import re import web link = "http://dsal.uchicago.edu/cgi-bin/philologic/search3dsal" query = "dbname=steingass&query=%s&matchtype=exact&display=utf8" r_paragraph = re.compile("(?is)

(.*?)(?=)") r_tag = re.compile("(?i)<[^>]+>") r_whitespace = re.compile(r"(?s)[ \t\r\n]+") def parse(definition): definition = definition.rsplit("", 1).pop() definition = definition.rsplit("", 1).pop() definition = definition.strip() definition = definition.replace("-\n", "") definition = r_tag.sub("", definition) definition = r_whitespace.sub(" ", definition) return definition def lookup(search, limit=3): assert isinstance(search, unicode) search = web.urllib.quote(search.encode("utf-8")) bytes = web.get(link + "?" + (query % search)) definitions = set() for paragraph in r_paragraph.findall(bytes): if not ("" in paragraph): continue definition = parse(paragraph) if not definition: continue if definition.endswith("q.v."): continue if not (definition in definitions): definitions.add(definition) yield definition if len(definitions) >= limit: break def persian(phenny, input): search = input.group(2) # unicode text results = list(lookup(search)) if not results: return phenny.reply("Sorry, no results found.") result = " / ".join(results) if len(result) > 256: result = result[:251] + "[...]" phenny.reply(result) persian.commands = ["persian"] if __name__ == "__main__": print __doc__