Below is the file 'dictionary.py' from this revision. You can also download the file.

#!/usr/bin/env python

import dbhash
import config
import sys

class Word(str):
    def __init__(self, w):
	str.__init__(self)
	self._syllables = None
	self.rhymes = None
	self.source = None
    def update_from(self, from_word):
	self._syllables = self._syllables or from_word._syllables
	self.rhymes = self.rhymes or from_word.rhymes
    def __get_syllables(self):
	return self._syllables
    def __set_syllables(self, s):
	self._syllables = s
    syllables = property(__get_syllables, __set_syllables, None, "The number of syllables in this word.")

import shelve

if __name__ == '__main__':

    def estimation_report():
	ok, misc = 0, 0
	for word in words:
	    real = word.info.get('syllables')
	    if not word.info.has_key('syllable_estimate'):
		word._Word__syllable_estimate()
	    estimate = word.info.get('syllable_estimate')
	    if real == None: continue
	    if real != estimate:
		sys.stderr.write("Miscalculated word %s as %d but should have been %d.\n" % (word,
											     estimate,
											     real))
		misc += 1
	    else: ok += 1
	print "%d/%d correct (estimation had a %d%% success rate)" % (ok, misc+ok, (float(ok)/(ok+misc)) * 100)

    def generate_db():
	d = dbhash.open(config.word_db, 'w')
	for word in words:
	    d[word] = str(word.syllables)
	d.close()

    commands = { 'syllable_estimate' : estimation_report,
		 'generate' : generate_db
		 }
    command = commands.get(sys.argv[1])
    if not command:
	sys.stderr.write('%s: command not understood.\n' % (sys.argv[0]))
	sys.exit(1)

    import gcide
    import wiktionary
    modules = [gcide]

    words = {}
    for module in modules:
	module.Word = Word
	for word in module.words():
	    to_update = words.setdefault(word, word)
	    to_update.source = "dict"
	    if id(to_update) != id(word):
		to_update.update_from(word)

    command()