#!/usr/bin/python
"""
cgiutil.py - Utilities for CGI Scripts
License: GPL 2; share and enjoy!
Author: Sean B. Palmer, inamidst.com
"""
import sys, os, re, cgi, urllib, itertools, robotparser
r_title = re.compile(r'
([^<]+)')
method = os.environ.get('REQUEST_METHOD')
form = cgi.FieldStorage()
form.__call__ = lambda s: form[s].value
bufsize = 2048
# @@ doctypes
def serve(status, body, mime=None, charset=None, raw=None):
# @@ doc, and import the status codes if they'll work
mime = mime or 'text/html'
charset = charset or 'utf-8'
sys.stdout.write("Status: %s\r\n" % status)
sys.stdout.write("Content-Type: %s; charset=%s\r\n\r\n" % (mime, charset))
if raw: sys.stdout.write(body)
else:
body = str(body)
if not body.endswith('\n') or body.endswith('\r'):
body += '\n'
sys.stdout.write(body)
sys.exit()
def escape(html):
return cgi.escape(html)
def unescape(html):
html = html.replace('<', '<')
html = html.replace('>', '>')
html = html.replace('&', '&')
return html
def all(seq, pred=bool):
"""Returns True if pred(x) is True for every element in the iterable."""
return False not in itertools.imap(pred, seq)
def any(seq, pred=bool):
"""Returns True if pred(x) is True at least one element in the iterable."""
return True in itertools.imap(pred, seq)
class RobotsTxt(object):
"""A better robots.txt parser, using robotparser.
Example:
rp = RobotsTxt(fn='/web/mysite.tld/robots.txt')
if rp.fetchable('/somepath'):
print "/somepath is fetchable"
"""
def __init__(self, uri=None, fn=None):
if uri and fn:
raise "ArgumentError", "Can only pass uri or fn, not both."
elif not (uri or fn):
raise "ArgumentError", "Must pass one of uri or fn."
self.rp = robotparser.RobotFileParser()
if uri: self.rp.set_url(uri)
else:
if os.path.isfile(fn):
self.rp.set_url(fn)
else: raise IOError, "Not found: %s" % fn
self.rp.read() # @@ this reads borked files too
def fetchable(self, path, ua=None):
if ua is None: ua = '*'
return self.rp.can_fetch(ua, path)
def getTitle(fn):
# Modified from the version in bookmarks.cgi
# @@ support URIs too
if not fn.endswith('.html'):
return None
try: f = open(fn)
except: return None
else:
html = ''
while 1:
s = f.read(bufsize)
if not s: break
html += s
m = r_title.search(html)
if m:
f.close()
return unescape(m.group(1))
f.close()
return None
def trim(s):
while s.startswith('\n') or s.startswith('\r'):
s = s[1:]
if s.endswith('\n'): s = s[:-1]
i = 0
for char in s:
if char == ' ': i += 1
else: break
result = ''
for line in s.splitlines():
if line.startswith(' ' * i):
result += line[i:] + '\n'
else: result += line + '\n'
return result
def test():
pass
def main():
pass
# if running as a CGI: test()
# else: print __doc__
if __name__=="__main__":
main()