#!/usr/bin/env python """ randtext.py - Get some random text from the Web Author: Sean B. Palmer, inamidst.com Share and enjoy! Originally intended as pheneliza code; references: Use: http://swhack.com/logs/2004-12-31#T17-07-12 Origin: http://swhack.com/logs/2004-10-04#T20-36-07 """ import re, urllib r_comments = re.compile(r'') r_head = re.compile(r'(?ims).*<(?:/head|body)[^>]*>') r_tags = re.compile(r'<[^>]+>') r_ws = re.compile(r'[ \t\r\n]+') r_text = re.compile(r'[A-Za-z0-9\'\";:,.?! ]+') uri_mangle = 'http://mangle.ca/mangle.php' data = {'frame':'off', 'popup':'on', 'gkey': 'key: none', 'country': '', 'lang': '', 'numword': '3'} data = urllib.urlencode(data) def randomURI(): u = urllib.urlopen(uri_mangle, data=data) lines = u.readlines() u.close() for line in lines: if 'newwin=' in line: return line.split("'", 2)[1] return None def get(uri): u = urllib.urlopen(uri) s = u.read(2048) u.close() return s def stripmarkup(s): s = r_comments.sub('', s) s = r_head.sub('', s) s = r_tags.sub('', s) return s def gettext(s): s = r_ws.sub(' ', s) s = ''.join(r_text.findall(s)) s = r_ws.sub(' ', s) return s def parse(uri): s = get(uri) s = stripmarkup(s) s = gettext(s) return s def main(): print parse(randomURI()) if __name__=="__main__": main()