#!/usr/bin/env python """ xhtmlmodels.py - XHTML Content Models Author: Sean B. Palmer, inamidst.com """ import os, re r_element = re.compile('(?<=element )([A-Za-z0-9]+)') def elements(text): return frozenset(text.split(' | ')) def format(elems): return '(' + ' & '.join(elem + '*' for elem in sorted(elems)) + ')' # Transcribed from the XHTML 1.0 Strict DTD: # http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd # Cf. http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_XHTML-1.0-Strict special_pre = elements('br | span | bdo | map') special = special_pre | elements('object | img') fontstyle = elements('tt | i | b | big | small') phrase = elements('em | strong | dfn | code | q | samp | kbd ' + '| var | cite | abbr | acronym | sub | sup') inline_forms = elements('input | select | textarea | label | button') misc_inline = elements('ins | del | script') misc = elements('noscript') | misc_inline inline = elements('a') | special | fontstyle | phrase | inline_forms Inline = elements('text') | inline | misc_inline heading = elements('h1 | h2 | h3 | h4 | h5 | h6') lists = elements('ul | ol | dl') blocktext = elements('pre | hr | blockquote | address') block = elements('p') | heading | elements('div') | lists | \ blocktext | elements('fieldset | table') Block = block | elements('form') | misc Flow = elements('text') | block | elements('form') | inline | misc a_content = elements('text') | special | fontstyle | phrase | \ inline_forms | misc_inline pre_content = elements('text | a') | fontstyle | phrase | \ special_pre | misc_inline | inline_forms form_content = block | misc button_content = elements('text | p') | heading | elements('div') | \ lists | blocktext | elements('table') | special | \ fontstyle | phrase | misc if os.path.exists('xhtml.rnc'): schema = open('xhtml.rnc') subset = frozenset(r_element.findall(schema.read()) + ['text']) schema.close() else: subset = frozenset() def main(): print "Subset: " print subset print print "Flow - Block: " print (Flow - Block) & subset print print "Block - Flow: " print (Block - Flow) & subset print print "Flow - Inline: " print (Flow - Inline) & subset print print "Inline - Flow: " print (Inline - Flow) & subset print print "Flow - Block - Inline: " print (Flow - Block - Inline) & subset print print "Block - form_content: " print (Block - form_content) & subset print print '* * *' print "Inline - pre_content: " print (Inline - pre_content) & subset print print "pre_content - Inline: " print (pre_content - Inline) & subset print print '* * *' object_content = elements('text | param | form') | block | inline | misc print "Flow - object_content: " print (Flow - object_content) & subset print print "object_content - Flow: " print (object_content - Flow) & subset print print '* * *' print 'Block:', format(Block & subset) print 'Inline:', format(Inline & subset) print 'Block - form_content:', (Block - form_content) & subset print 'Inline - pre_content:', (Inline - pre_content) & subset if __name__=="__main__": main()