#!/usr/bin/env python
"""
codepoints.py - Phenny Codepoints Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

http://inamidst.com/phenny/
"""

import re, unicodedata
from itertools import islice

def about(u, cp=None, name=None): 
   if cp is None: 
      cp = ord(u)
   if name is None: 
      try: name = unicodedata.name(u)
      except ValueError: 
         return 'U+%04X (No name found)' % cp

   if not unicodedata.combining(u): 
      template = 'U+%04X %s (%s)'
   else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
   return template % (cp, name, u.encode('utf-8'))

def codepoint_simple(arg): 
   arg = arg.upper()

   r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')

   results = []
   for cp in xrange(0xFFFF): 
      u = unichr(cp)
      try: name = unicodedata.name(u)
      except ValueError: continue

      if r_label.search(name): 
         results.append((len(name), u, cp, name))
   if not results: 
      r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
      for cp in xrange(0xFFFF): 
         u = unichr(cp)
         try: name = unicodedata.name(u)
         except ValueError: continue

         if r_label.search(name): 
            results.append((len(name), u, cp, name))

   if not results: 
      return None

   length, u, cp, name = sorted(results)[0]
   return about(u, cp, name)

def codepoint_extended(arg): 
   arg = arg.upper()
   try: r_search = re.compile(arg)
   except: raise ValueError('Broken regexp: %r' % arg)

   for cp in xrange(1, 0x10FFFF): 
      u = unichr(cp)
      name = unicodedata.name(u, '-')

      if r_search.search(name): 
         yield about(u, cp, name)

def u(phenny, input): 
   """Look up unicode information."""
   arg = input.bytes[3:]
   # phenny.msg('#inamidst', '%r' % arg)
   if not arg: 
      return phenny.reply('You gave me zero length input.')
   elif not arg.strip(' '): 
      if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
      return phenny.reply('1 SPACE (U+0020)')

   # @@ space
   if set(arg.upper()) - set(
      'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'): 
      printable = False
   elif len(arg) > 1: 
      printable = True
   else: printable = False

   if printable: 
      extended = False
      for c in '.?+*{}[]\\/^$': 
         if c in arg: 
            extended = True
            break

      if len(arg) == 4: 
         try: u = unichr(int(arg, 16))
         except ValueError: pass
         else: return phenny.say(about(u))

      if extended: 
         # look up a codepoint with regexp
         results = list(islice(codepoint_extended(arg), 4))
         for i, result in enumerate(results): 
            if (i < 2) or ((i == 2) and (len(results) < 4)): 
               phenny.say(result)
            elif (i == 2) and (len(results) > 3): 
               phenny.say(result + ' [...]')
         if not results: 
            phenny.reply('Sorry, no results')
      else: 
         # look up a codepoint freely
         result = codepoint_simple(arg)
         if result is not None: 
            phenny.say(result)
         else: phenny.reply("Sorry, no results for %r." % arg)
   else: 
      text = arg.decode('utf-8')
      # look up less than three podecoints
      if len(text) <= 3: 
         for u in text: 
            phenny.say(about(u))
      # look up more than three podecoints
      elif len(text) <= 10: 
         phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
      else: phenny.reply('Sorry, your input is too long!')
u.commands = ['u']
u.example = '.u 203D'

def bytes(phenny, input): 
   """Show the input as pretty printed bytes."""
   b = input.bytes
   phenny.reply('%r' % b[b.find(' ') + 1:])
bytes.commands = ['bytes']
bytes.example = '.bytes \xe3\x8b\xa1'

if __name__ == '__main__': 
   print __doc__.strip()