#!/usr/bin/python """ cgiutil.py - Utilities for CGI Scripts License: GPL 2; share and enjoy! Author: Sean B. Palmer, inamidst.com """ import sys, os, re, cgi, urllib, itertools, robotparser r_title = re.compile(r'([^<]+)') method = os.environ.get('REQUEST_METHOD') form = cgi.FieldStorage() form.__call__ = lambda s: form[s].value bufsize = 2048 # @@ doctypes def serve(status, body, mime=None, charset=None, raw=None): # @@ doc, and import the status codes if they'll work mime = mime or 'text/html' charset = charset or 'utf-8' sys.stdout.write("Status: %s\r\n" % status) sys.stdout.write("Content-Type: %s; charset=%s\r\n\r\n" % (mime, charset)) if raw: sys.stdout.write(body) else: body = str(body) if not body.endswith('\n') or body.endswith('\r'): body += '\n' sys.stdout.write(body) sys.exit() def escape(html): return cgi.escape(html) def unescape(html): html = html.replace('<', '<') html = html.replace('>', '>') html = html.replace('&', '&') return html def all(seq, pred=bool): """Returns True if pred(x) is True for every element in the iterable.""" return False not in itertools.imap(pred, seq) def any(seq, pred=bool): """Returns True if pred(x) is True at least one element in the iterable.""" return True in itertools.imap(pred, seq) class RobotsTxt(object): """A better robots.txt parser, using robotparser. Example: rp = RobotsTxt(fn='/web/mysite.tld/robots.txt') if rp.fetchable('/somepath'): print "/somepath is fetchable" """ def __init__(self, uri=None, fn=None): if uri and fn: raise "ArgumentError", "Can only pass uri or fn, not both." elif not (uri or fn): raise "ArgumentError", "Must pass one of uri or fn." self.rp = robotparser.RobotFileParser() if uri: self.rp.set_url(uri) else: if os.path.isfile(fn): self.rp.set_url(fn) else: raise IOError, "Not found: %s" % fn self.rp.read() # @@ this reads borked files too def fetchable(self, path, ua=None): if ua is None: ua = '*' return self.rp.can_fetch(ua, path) def getTitle(fn): # Modified from the version in bookmarks.cgi # @@ support URIs too if not fn.endswith('.html'): return None try: f = open(fn) except: return None else: html = '' while 1: s = f.read(bufsize) if not s: break html += s m = r_title.search(html) if m: f.close() return unescape(m.group(1)) f.close() return None def trim(s): while s.startswith('\n') or s.startswith('\r'): s = s[1:] if s.endswith('\n'): s = s[:-1] i = 0 for char in s: if char == ' ': i += 1 else: break result = '' for line in s.splitlines(): if line.startswith(' ' * i): result += line[i:] + '\n' else: result += line + '\n' return result def test(): pass def main(): pass # if running as a CGI: test() # else: print __doc__ if __name__=="__main__": main()