#!/usr/bin/env python """ arithzip.py - Arithmetic Zip Author: Sean B. Palmer, inamidst.com @@ May pick up an extra trailing space """ import sys # This works on the principle of tabled zip # Generally, we'll allow a table of 6 to 7 bit mappings # 2 ** 6 table of characters # 'A-Za-z0-9 \', with \ being an escape char upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' lower = 'abcdefghijklmnopqrtsuvwxyz' table = ' ' + upper + lower + '0123456789' escapes = set(xrange(1, 128)) for char in table: byte = ord(char) if byte in escapes: escapes.remove(byte) escapes = ''.join(chr(i) for i in sorted(escapes)) def _binary(n): return n > 0 and _binary(n >> 1) + str(n & 1) or '' def binary(n): result = _binary(n) return ('0' * (8 - len(result))) + result class BinaryData(object): def __init__(self, bytes=None): self.length = 0 self.data = [] if bytes: self.raw(bytes) def __str__(self): return self.encode() def encode(self): return ''.join(chr(byte) for byte in self.data) def decode(self): # @@ Make this more efficient prev = 0 result = [] data = ''.join(binary(byte) for byte in self.data) length = len(data) - (len(data) % 6) for i in xrange(0, length, 6): index = int(data[i:i+6], 2) if (prev < 63) and (index < 63): result.append(table[index]) elif prev == 63: result.append(escapes[index]) prev = index return ''.join(result) def minibytes(self, spacer=' '): data = ''.join(binary(byte) for byte in self.data) length = len(data) - (len(data) % 6) return spacer.join(data[i:i+6] for i in xrange(0, length, 6)) def binaryString(self, spacer=' '): return spacer.join(binary(byte) for byte in self.data) def pack(self, minibyte): overhang = self.length % 8 if overhang > 2: tail = self.data[-1] self.data[-1] = (tail | minibyte >> (overhang - 2)) self.data.append((minibyte << (10 - overhang)) % 256) elif overhang == 2: tail = self.data[-1] self.data[-1] = (tail | minibyte) else: self.data.append(minibyte << 2) self.length += 6 def append(self, char): if char in table: self.pack(table.index(char)) elif char in escapes: self.pack(63) self.pack(escapes.index(char)) else: raise ValueError("%s is not zippable" % char) def feed(self, text): for char in text: self.append(char) def raw(self, bytes): self.data.extend(ord(byte) for byte in bytes) def compress(s): b = BinaryData() b.feed(s) return str(b) def decompress(s): b = BinaryData(s) return b.decode() def test(): b = BinaryData() print b.binaryString() b.pack(int('111111', 2)) print b.binaryString() b.pack(int('101101', 2)) print b.binaryString() b.pack(int('011011', 2)) print b.binaryString() print b print b = BinaryData() b.feed('Testing!') print b.minibytes() print b print b.decode() while True: try: data = raw_input('> ') except (KeyboardInterrupt, EOFError): break if not data: break compressed = compress(data) decompressed = decompress(compressed) if not (decompressed.rstrip(' ') == data or decompressed == data): raise Exception('%r' % decompressed) print str((float(len(compressed)) / len(data)) * 100) + '%' def main(): if sys.argv[1:]: test() else: print __doc__.strip() if __name__=="__main__": main()