#!/usr/bin/env python # iso-8859-1 to utf-8 Conversion Demonstration # by Sean B. Palmer. Creative Commons Zero def utf8(latin1): if not isinstance(latin1, str): raise ValueError('Expected iso-8859-1 encoded string') def stream(latin1): for character in latin1: codepoint = ord(character) if codepoint < 0x80: yield chr(codepoint) elif codepoint < 0xC0: yield '\xC2' + chr(codepoint) else: yield '\xC3' + chr(codepoint - 64) utf8stream = stream(latin1) return ''.join(utf8stream) def test(): latin1 = ''.join(chr(c) for c in xrange(256)) text = unicode(latin1, 'iso-8859-1') assert text.encode('utf-8') == utf8(latin1), 'Test Failed' print 'Test Succeeded' if __name__ == '__main__': test()