#!/usr/bin/env python """ Inside - Shows inside a site. License: GPL 2. Share and enjoy! Author: Sean B. Palmer, http://purl.org/net/sbp/ """ import cgitb; cgitb.enable() import sys, os, itertools, robotparser # # # # # # # # # # indexen = ('index.cgi', 'index.py') suffixen = ('.cgi', '.py', '.php') overview = 'overview.html' # # # # # # # # # # scriptname = os.environ.get('SCRIPT_NAME') or '/inside/index.cgi' base, script = os.path.split(scriptname) site = '../' * base.count('/') if not os.path.exists('.htaccess'): f = open('.htaccess', 'w') print >> f, 'DirectoryIndex %s' % script print >> f, 'Options -MultiViews' print >> f, 'RewriteEngine on' print >> f, 'RewriteBase %s' % base print >> f, 'RewriteRule ^(.+)$ %s [L]' % script f.close() def all(seq, pred=bool): """Returns True if pred(x) is True for every element in the iterable.""" return False not in itertools.imap(pred, seq) def any(seq, pred=bool): """Returns True if pred(x) is True at least one element in the iterable.""" return True in itertools.imap(pred, seq) def serve(status, body, mime=None): mime = mime or 'text/html' sys.stdout.write("Status: %s\r\n" % status) sys.stdout.write("Content-Type: %s; charset=utf-8\r\n\r\n" % mime) sys.stdout.write(body) sys.exit() def homepage(): if os.path.exists(overview): content = open(overview).read() else: serve(500, '

Error: no %s found!

\n' % overview) serve(200, content) def main(args=None): if args is None: args = sys.argv[1:] path = os.environ.get('REQUEST_URI') or (base + '/') path = path[len(base):] # Directories to ignore--parse robots.txt robotstxt = os.path.join(site, 'robots.txt') if os.path.exists(robotstxt): rp = robotparser.RobotFileParser() rp.set_url(robotstxt) try: rp.read() # @@ this reads borked files too if not rp.can_fetch('*', path): serve(403, "

Forbidden by robots.txt

\n") except IOError: serve(500, "

Couldn't parse robots.txt

\n") if path == '/': homepage() elif path == '/misc/feedback': serve(404, "

Not found!

") else: path = os.path.join(site, path[1:]) # If it's a directory, try to serve the directory index if os.path.isdir(path): for fn in indexen: filename = os.path.join(path, fn) if os.path.isfile(filename): serve(200, open(filename).read(), 'text/plain') break # @@ not strictly necessary else: serve(404, "

Not Found (no CGI in directory)

\n") # Check that it's a .cgi or .py file if any(suffixen, lambda ext: path.endswith(ext)): if os.path.isfile(path): serve(200, open(path).read(), 'text/plain') else: serve(404, "

Not Found (%s: not a file)

\n" % path) # See if we can get a valid CGI by adding an extension for ext in suffixen: if os.path.isfile(path + ext): serve(200, open(path + ext).read(), 'text/plain') # See if we can get a valid CGI by trimming a trailing slash # @@ could hook this up with metagen to deliver a canonical URI if path.endswith('/'): for ext in suffixen: if os.path.isfile(path.rstrip('/') + ext): serve(200, open(path.rstrip('/') + ext).read(), 'text/plain') # # Check to see if it's a .htaccess file # # @@ Better to allow them to be served normally? # if path.endswith('/htaccess'): # htaccess = path[:-8] + '.htaccess' # if os.path.isfile(htaccess): # serve(200, open(htaccess).read(), 'text/plain') # Otherwise, we can't find anything we want to serve serve(404, "

Not Found

\n") if __name__=="__main__": main()