#!/usr/bin/env python3 # http://inamidst.com/voynich/related # Created by Sean B. Palmer import math from collections import Counter def chomp(octets): if octets: if octets[-1] == 10: return octets[:-1] return octets def voynich_words(): with open("voynich101_comma.txt", "rb") as f: for line in f: yield chomp(line) frequency = Counter() def create_model(): counts = Counter() total_count = 0 model = {} for word in voynich_words(): for char in word: frequency[char] += 1 counts[word] += 1 total_count += 1 unique_words = len(counts) for (word, count) in counts.most_common(unique_words): probability = float(count) / total_count model[word] = probability return model model = create_model() def entropy(words): L1 = 0.95 Lunk = 1 - L1 unk = 0 V = 1000000 W = 0 H = 0 for word in words: W += 1 P = Lunk / V if word in model: P += L1 * model[word] else: unk += 1 H += -math.log(P, 2) # H / W is entropy # coverage is (W - unk) / W return H / W with open("voynich101_frequent.txt", "rb") as f: top48 = f.read(48) print("original\treplacement\tdifference") normal = entropy(voynich_words()) for a in top48: a = bytes([a]) for b in top48: b = bytes([b]) words = (word.replace(a, b) for word in voynich_words()) e = entropy(words) difference = normal - e a2 = ord(a) b2 = ord(b) # f = frequency[a2] print("%s\t%s\t%s" % (a2, b2, difference))