#!/usr/bin/env python """ validate.py - Validate a File or URI Author: Sean B. Palmer, inamidst.com """ import sys, urllib import xml.dom.minidom from optparse import OptionParser def validator(uri): return ("http://validator.w3.org/check?uri=%s" + "&charset=%%28detect+automatically%%29" + "&doctype=Inline&verbose=1&output=xml") % urllib.quote(uri) def single(seq): if not hasattr(seq, 'next'): seq = (item for item in seq) first = seq.next() try: seq.next() except StopIteration: pass else: raise ValueError("Sequence had more than one member") return first def text(element): result = [] for node in element.childNodes: if node.nodeType == node.TEXT_NODE: result.append(node.data) return ''.join(result) def validate(uri, verbosity=1): u = urllib.urlopen(validator(uri)) bytes = u.read() u.close() doc = xml.dom.minidom.parseString(bytes) doctype = single(doc.getElementsByTagName("doctype")) doctype = text(doctype) if doctype.startswith('-//W3C//DTD ') and doctype.endswith('//EN'): doctype = doctype[12:-4] errors = single(doc.getElementsByTagName("errors")) errors = int(text(errors)) if not errors: if verbosity > 0: print "Valid %s" % doctype sys.exit(0) else: if verbosity > 0: print "Invalid %s (%s errors)" % (doctype or 'HTML', errors) if verbosity > 1: print bytes # @@ sys.exit(1) def main(): parser = OptionParser(usage="%prog [options] ") parser.add_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="produce no output") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="produce much output") options, args = parser.parse_args() if options.quiet and options.verbose: parser.error("-q and -v are mutally exclusive") if options.quiet: verbosity = 0 elif options.verbose: verbosity = 2 else: verbosity = 1 if len(args) != 1: parser.print_help() else: uri = args[0] validate(uri, verbosity=verbosity) if __name__=="__main__": main()