#!/usr/bin/env python3
# http://inamidst.com/voynich/stacks
# Created by Sean B. Palmer
import voynich
with open("voynich101_frequent.txt", "rb") as f:
# Change to top48
top48 = f.read(48).decode("iso-8859-1")
top48 = set([c for c in top48])
lengths = [1, 2, 3, 4, 5, 6, 7, 8, 9] # [10, 11, 12]
stacks = {g: {length: [0] * length for length in lengths} for g in top48}
# lengths = set()
distribution = [0] * 12
for word in voynich.words():
if len(word) < 3:
continue
word = word[1:-1]
length = len(word)
distribution[length - 1] += 1
if length > 9:
continue
# lengths.add(length)
for (i, char) in enumerate(word):
if char in top48:
stacks[char][length][i] += 1
for glyph in top48:
total = sum(sum(row) for row in stacks[glyph].values())
for length in lengths:
row = stacks[glyph][length]
for p in range(length):
# Convert to percentage
if total is 0:
row[p] = 0
else:
row[p] = row[p] / total * 100
stacks[glyph][length] = row
print("glyph\tlength\tposition\tfrequency")
for glyph in top48:
for length in lengths:
for p in range(length):
f = stacks[glyph][length][p]
print("%s\t%s\t%s\t%s" % (ord(glyph), length, p + 1, f))