Below is the file 'syllables.py' from this revision. You can also download the file.
import dbhash import config class Syllables: def __init__(self): self.cache = {} self.db = dbhash.open(config.word_db) def __lookup(self, token): if self.db.has_key(token): rv = int(self.db[token]) elif not self.cache.has_key(token): rv = self.cache[token] = self.__syllable_estimate(token) else: rv = self.cache[token] # web.debug("token=%s, syllables=%d" % (token, rv)) return rv def lookup(self, token): # note: a token may (due to upwriting) be one or more words. # so split on ' ' before doing the lookup, then just do a sum return sum(map(self.__lookup, token.split(' '))) def __syllable_estimate(self, token): "Last resort syllable counter. Reasonably accurate in English." \ "Allegedly works for French." vowels = ['a', 'e', 'i', 'o', 'u', 'y', "'"] l = None count = 0 if len(token) == 0: return 0 if len(token) <= 3: return 1 for c in token: if c in vowels and l not in vowels: count = count + 1 l = c if count > 1 and ((token[-1] == 'e' and token[-2] != 'l') or (token[-2] == 'e' and token[-1] == 's')): # silent 'e' count = count - 1 if count == 0: count = 1 return count