#!/usr/bin/env python """ metagen.py - Generate Metadata Information Author: Sean B. Palmer, inamidst.com """ import cgitb; cgitb.enable() import sys, os, re, urlparse # @@ Specify this on the command line meta = './meta' if os.environ.has_key('DOCUMENT_ROOT'): metadir = os.path.join(os.environ['DOCUMENT_ROOT'], './meta') if os.path.isdir(metadir): meta = metadir t_template = '(?ms)().+?()' r_template = re.compile(t_template) def escape(html): html = html.replace('&', '&') return html.replace('<', '<') def lstrip(start, text): if text.startswith(start): return text[len(start):] return text def urijoin(site, *parts): path = os.path.join(*parts) normpath = os.path.normpath(path) if path.endswith('/'): normpath += '/' return urlparse.urljoin(site, path) def urisplit(uri): scheme, host, path, query, fragid = urlparse.urlsplit(uri) if not scheme and not host: return '/', path return '%s://%s/' % (scheme, host), path def fn(path): if path.endswith('/'): path += 'index' if path.startswith('/'): path = '.' + path elif not path.startswith('.'): path = './' + path return os.path.join(meta, path) def exists(path): filename = fn(path) return os.path.isfile(filename) class Metadata(dict): def __init__(self, filename=None, path=None): dict.__init__(self) if path is not None: filename = fn(path) self.filename = filename self.parse() def parse(self): f = open(self.filename) for line in f: line = line.rstrip('\r\n') if (not line.startswith('#')) and line: key, value = line.split(': ', 1) if key == 'keywords': value = frozenset(value.split(' ')) self.__setitem__(key, value) else: continue f.close() def fileinfo(path): metadata = {} f = open(path) for line in f: line = line.rstrip('\r\n') if (not line.startswith('#')) and line: metadata.__setitem__(*line.split(': ', 1)) else: continue f.close() return metadata def makelink(uri, metadata, highlight=True): if metadata.has_key('keywords'): keywords = frozenset(metadata['keywords'].split(' ')) else: keywords = frozenset([]) if highlight and ('highlight' in keywords): link = '%s*' else: link = '%s' result = link % (uri, escape(metadata.get('title'))) if ('cgi' in keywords) or ('svc' in keywords): site, path = urisplit(uri) srcuri = urlparse.urljoin(site, 'inside' + path) result += ' (src)' % srcuri return result + ' - ' + escape(metadata.get('description')) def getclass(metadata): if metadata.has_key('keywords'): keywords = frozenset(metadata['keywords'].split(' ')) if 'act' in keywords: return ' class="act"' if 'code' in keywords: return ' class="code"' if 'doc' in keywords: return ' class="doc"' if 'pub' in keywords: return ' class="pub"' if 'svc' in keywords: return ' class="svc"' return '' def bykeyword(keyword=None): if keyword is None: result = set() else: result = [] for root, dirs, files in os.walk(meta): if root == meta: del dirs[dirs.index('meta')] files.remove('.htaccess') files.remove('index.cgi') dirname = '/' + root[len(meta):].lstrip('/') for filename in files: metafn = os.path.join(root, filename) metadata = fileinfo(metafn) path = os.path.join(dirname, filename) if path.endswith('/index'): path = path[:-5] if metadata.has_key('keywords'): if keyword is not None: keywords = frozenset(metadata['keywords'].split(' ')) if keyword in keywords: result.append((path, metafn, metadata)) else: result |= set(metadata['keywords'].split(' ')) return sorted(result) def sitemap(): smap = {} result = [] for root, dirs, files in os.walk(meta): if root == meta: del dirs[dirs.index('meta')] files.remove('.htaccess') files.remove('index.cgi') root = lstrip(meta, root).lstrip('/') if '/' in root: base, rest = root.split('/', 1) else: base, rest = root, '' for file in files: # @@ was sorted if file == 'index': file = '' if not rest: parts = (file,) else: parts = (rest, file) try: smap[base].append(parts) except KeyError: smap[base] = [parts] result.append('') return '\n'.join(result) + '\n' def keywordmap(files): result = ['') return '\n'.join(result) def template(m): opener, name, closer = tuple(m.groups()) if name != 'sitemap': files = bykeyword(name) return opener + '\n' + keywordmap(files) + closer else: return opener + '\n' + sitemap() + closer def main(): f = open(sys.argv[1]) data = f.read() f.close() print "Read %s bytes..." % len(data) data = r_template.sub(template, data) f = open(sys.argv[1], 'w') f.write(data) f.close() print "Wrote %s bytes back to %s" % (len(data), sys.argv[1]) if __name__=="__main__": main()