The unified diff between revisions [343614b1..] and [9813855c..] is displayed below. It can also be downloaded as a raw diff.

#
#
# patch "poet.py"
#  from [8fc0409455e4684e4399bb60610aa856e5284a2c]
#    to [8c69f59a5aade5a86a641bcd91151490975f36d7]
#
============================================================
--- poet.py	8fc0409455e4684e4399bb60610aa856e5284a2c
+++ poet.py	8c69f59a5aade5a86a641bcd91151490975f36d7
@@ -14,6 +14,7 @@ import sha
 import string
 import math
 import sha
+import sys
 import os

 class SymbolLibrary:
@@ -236,6 +237,8 @@ def haiku(doc, form=[5,7,5]):
     markov = doc.symbol_state.forward_markov

     def generate_line(target, state):
+	logger.log("starting line: %s" % target)
+
 	state = tuple(state)
 	line = []
 	count = 0
@@ -250,29 +253,57 @@ def haiku(doc, form=[5,7,5]):
 	    raise Exception("Failed to pick a number - 'total' miscalcuation?")

 	if len(state) < markov.size:
+	    logger.log("start calc sp")
 	    # we'll have to pick a starting point
-	    possible = filter(lambda seq: sum(map(syl, seq)) < form[0], markov.scores)
-	    total_possible = sum(map(lambda seq: markov.scores[seq].total, possible))
-	    seq = pickfrom(possible, total_possible, lambda seq: markov.scores[seq].total)
+
+	    # this is the most accurate, but way too slow
+	    #
+	    #possible = filter(lambda seq: sum(map(syl, seq)) < form[0], markov.scores)
+	    #total_possible = sum(map(lambda seq: markov.scores[seq].total, possible))
+	    #seq = pickfrom(possible, total_possible, lambda seq: markov.scores[seq].total)
+	    #
+
+	    seq = pickfrom(markov.scores, markov.total, lambda seq: markov.scores[seq].total)
 	    count += sum(map(syl, seq))
 	    line += list(seq)
 	    state = seq
+	    logger.log("end calc sp")
 	elif len(state) > markov.size:
 	    state = state[-1*markov.size:]

 	while count < target:
 	    maxsize = target - count
 	    score = markov.scores[state]
-	    possible = filter(lambda id: syl(id) <= maxsize, score.scores)
+
+	    # okay, if count + syl(next_token) != target then we need there
+	    # to be an entry in the symbolstate for that next potential
+	    # symbol. this lets us restrict further and not fall down holes
+	    # so often
+	    def is_not_deadend(id):
+		next_count = count + syl(id)
+		if next_count == target: return True
+		next_state = (state + (id,))[1:]
+		next_score = markov.scores.get(next_state)
+		return next_score != None
+
+	    possible = set(filter(lambda id: syl(id) <= maxsize and is_not_deadend(id), score.scores))
+	    print target, rv, state, score.scores.keys(), possible
 	    total_possible = sum(map(lambda tok: score.scores[tok], possible))
 	    if not possible:
 		break
+
 	    token = pickfrom(possible, total_possible, lambda seq: score.scores[seq])
 	    count += syl(token)
 	    state = (state + (token,))[1:]
 	    line.append(token)
-	return line

+	if count != target:
+	    logger.log ("count=%d, target=%d :-(" % (count, target))
+	    return None
+	else:
+	    return line
+
+    logger.log("starting haiku generation")
     rv = []
     last_line= []
     for length in form:
@@ -283,12 +314,26 @@ def haiku(doc, form=[5,7,5]):
 	last_line = line
     return rv

+class Log:
+    def __init__(self):
+	self.start_time = self.last_time = datetime.datetime.utcnow()
+    def log(self, s):
+	n = datetime.datetime.utcnow()
+	print "%s : +%s : %s" % (n - self.start_time, n - self.last_time, s)
+	self.last_time = n
+
+logger = Log()
+
 if __name__ == '__main__':
     uri = 'http://glamdring.local/~grahame/rss'
+    logger.log("start")
     doc = document_for_uri(uri)
+    logger.log("doc loaded")
     h = haiku(doc)
+    logger.log("haiku done")
     if not h:
 	sys.exit(1)
-    print "Woo, haiku is:"
+    logger.log("print haiku")
     for seq in h:
 	print doc.write_ids(seq)
+    logger.log("complete")