Below is the file 'syllables.py' from this revision. You can also download the file.


import dbhash
import config

class Syllables:
    def __init__(self):
        self.cache = {}
        self.db = dbhash.open(config.word_db)

    def __lookup(self, token):
        if self.db.has_key(token):
            rv = int(self.db[token])
        elif not self.cache.has_key(token):
            rv = self.cache[token] = self.__syllable_estimate(token)
        else:
            rv = self.cache[token]
#        web.debug("token=%s, syllables=%d" % (token, rv))
        return rv

    def lookup(self, token):
        # note: a token may (due to upwriting) be one or more words.
        # so split on ' ' before doing the lookup, then just do a sum
        return sum(map(self.__lookup, token.split(' ')))

    def __syllable_estimate(self, token):
        "Last resort syllable counter. Reasonably accurate in English." \
        "Allegedly works for French."
        vowels = ['a', 'e', 'i', 'o', 'u', 'y', "'"]
        l = None
        count = 0
        if len(token) == 0:
            return 0
        if len(token) <= 3:
            return 1
        for c in token:
            if c in vowels and l not in vowels:
                count = count + 1
            l = c
        if count > 1 and ((token[-1] == 'e' and token[-2] != 'l') or
                          (token[-2] == 'e' and token[-1] == 's')):
            # silent 'e'
            count = count - 1
        if count == 0: count = 1
        return count