def normalise(raw): """Normalise utf-8, double-utf-8, or iso-8859-1 to utf-8. Cf. http://mzz.mine.nu/m/unithing.py.txt and http://paste.lisp.org/display/21904,4/raw >>> normalise('\x97') '\xc2\x97' >>> normalise('\xe2\x80\xbd') '\xe2\x80\xbd' >>> normalise('\xc3\xa2\xc2\x80\xc2\xbd') '\xe2\x80\xbd' """ try: uni = raw.decode('utf-8') except UnicodeDecodeError: uni = raw.decode('iso-8859-1') return uni.encode('utf-8') try: raw = uni.encode('iso-8859-1') uni.decode('utf-8') except UnicodeEncodeError: return raw return raw