#!/usr/bin/python """ ircsoundex.py - A quick IRC Soundex implementation Usage: ./ircsoundex.py Author: Sean B. Palmer, inamidst.com License: "I really couldn't care less what anyone does with this, but claiming that they wrote it would be just sad and in asserting my right of intellectual property I assert my right to publicly ridicule anyone who does." - http://www.hackcraft.net/rssvalid.xsl """ import sys, re # People often use |status modifiers, so we only use the first set of # [\w_-]+ characters, stripping any trailing [^\w_-]+ characters. r_soundex = re.compile('^(?:[^A-Za-z0-9_-]+)?([A-Za-z0-9_-]+)') # Cf. http://www.archives.gov/research_room/genealogy/census/soundex.html # This implementation is too greedy on the side-by-side rules, and doesn't # apply it for the first character. Consonantal separators aren't checked # either. soundexMap = { 'b': 1, 'f': 1, 'p': 1, 'v': 1, 'c': 2, 'g': 2, 'j': 2, 'k': 2, 'q': 2, 's': 2, 'x': 2, 'z': 2, 'd': 3, 't': 3, 'l': 4, 'm': 5, 'n': 5, 'r': 6 } def ircSoundex(nick): m = r_soundex.match(nick.lower()) if not m: return '0-000' shortnick = m.group(1) result = shortnick[0].upper() + '-' for char in shortnick[1:]: if soundexMap.has_key(char): code = str(soundexMap[char]) if not result.endswith(code): result += code if len(result) > 4: break if len(result) < 5: result += '0' * (5 - len(result)) return result def main(args=None): if args is None: args = sys.argv[1:] if args: for arg in args: print arg, ircSoundex(arg) else: print __doc__ if __name__=="__main__": main()