The unified diff between revisions [0eaf66b5..] and [0e5cf8ce..] is displayed below. It can also be downloaded as a raw diff.
#
#
# add_file "poetweb.py"
# content [eee7ff20b7c61a2fa7d7d1eba4ec741a343cf535]
#
# add_file "web.py"
# content [5a70283ce26a8d90373c31b3c2eb35cd39e4d7a5]
#
# patch "config.py"
# from [1028ab910fddc15e627d9e16fcd47d75bd0954ec]
# to [695a5fb88edd1190cefe0d3958e8358d46e1376b]
#
# patch "dictionary.py"
# from [ded8d84b5aaeecac34aa38ad943654147268e55e]
# to [0aed82d6ac98f925ab0c727331ab434eb9ec6cc9]
#
# patch "forms.py"
# from [b631b2a653d8c57334c492d8e87cd9296ac07fa4]
# to [617182cc4818a6d7954d3e90cb2eaaf160967643]
#
# patch "generators.py"
# from [550b478b5c306ae773cd42e8e5aff775cb0b8aed]
# to [1ec26b4d1209e302e47cfb0b1b6f2bd3e857e592]
#
# patch "poet3.py"
# from [3c5adc98809bbd5a6df624911190c02c311f2057]
# to [d4f25eefe3759a6223471ed27257554aa594a1da]
#
# patch "storage.py"
# from [bff5648f025b275e05135246f0c56236953eaef4]
# to [15df0aaa6f37d41868a4e7b75beb834d7caea7e2]
#
# patch "syllables.py"
# from [1ddbe0eb3425f3b5cc513e8af525d583ac771aff]
# to [77a91fa93c05a98c5b8efb24a081e921b42d59a6]
#
# patch "symbolstate.py"
# from [83408e4998ed66268e39b916225c46e89b976cdb]
# to [a144283fbc3b059a8e34856c43012fe514eb7305]
#
# set "poetweb.py"
# attr "mtn:execute"
# value "true"
#
============================================================
--- poetweb.py eee7ff20b7c61a2fa7d7d1eba4ec741a343cf535
+++ poetweb.py eee7ff20b7c61a2fa7d7d1eba4ec741a343cf535
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+import datetime
+import urlparse
+import web
+from poet3 import Poet3
+from forms import haiku
+import cPickle
+import config
+
+site_id_re = r'[A-Za-z\.]+'
+# specific to livejournal-based sites
+username_re = r'[A-Za-z0-9\_\-]+'
+
+def newstyle_lj_address(site, username):
+ if username.startswith('-') or username.startswith('_') or \
+ username.endswith('-') or username.endswith('_'):
+ base_uri = 'http://users.livejournal.com/%s/' % username
+ else:
+ base_uri = 'http://%s.livejournal.com/' % username
+ return base_uri + 'data/rss'
+
+def test_address(site, username):
+ return "http://glamdring.local/~grahame/rss"
+
+rss_lookup = {
+ 'livejournal.com' : newstyle_lj_address,
+ 'test' : test_address,
+}
+
+urls = (
+ '/(' + site_id_re + ')/(' + username_re + ')' + '/haiku', 'view',
+)
+
+def gen_haiku(rss_uri):
+ poet = Poet3(rss_uri)
+ last_update = poet.storage.mtime('last_update')
+ if datetime.datetime.now() - last_update > config.update_interval:
+ web.debug("calling poet.update()")
+ poet.update()
+ # FIXME this doesn't work..
+ poet.storage.open('last_update', 'w').write('')
+ poet.build_state()
+ web.debug("debug: poet.upwritten_seqs=%s" % (poet.upwritten_seqs))
+
+ # if there has been no upwriting yet, then let's do some now.
+ # we save the resulting combined tokens, so the code uses those
+ # when adding new tokens to the corpus from now on
+ if not poet.upwritten_seqs:
+ to_upwrite = poet.symbol_state.chunkable()
+ cPickle.dump(to_upwrite, poet.storage.open('upwritten.txt', 'w'))
+ poet.upwrite(to_upwrite)
+
+ for attempt in xrange(config.haiku_attempts):
+ poem = haiku(poet)
+ if poem != None:
+ break
+
+ return attempt, poem
+
+class Renderer:
+ def __init__(self):
+ # any templates that can be inherited from, should be added to the list here
+ self.templates = [ ('base.html', 'base') ]
+ self._templates_loaded = False
+
+ # these variables will be available to any template
+ self.terms = {
+ 'context' : web.context, # fugly
+ 'dynamic_uri_path' : config.dynamic_uri_path,
+ 'dynamic_join' : lambda path: urlparse.urljoin(config.dynamic_uri_path, path),
+ 'static_uri_path' : config.static_uri_path,
+ 'static_join' : lambda path: urlparse.urljoin(config.static_uri_path, path),
+ }
+
+ def load_templates(self):
+ if self._templates_loaded: return
+ for template, mod_name in self.templates:
+ web.render(template, None, True, mod_name)
+ self._templates_loaded = True
+
+ def render(self, template, **kwargs):
+ self.load_templates()
+ terms = self.terms.copy()
+ terms.update(kwargs)
+ web.render(template, terms)
+
+renderer = Renderer()
+
+class view:
+ def GET(self, site, username):
+ if not rss_lookup.has_key(site):
+ ## fixme; this is a site we don't know about; let's return a friendly
+ ## page suggesting the user might want to ask about adding it
+ return web.notfound()
+ uri = rss_lookup[site](site, username)
+ attempts, haiku = gen_haiku(uri)
+ renderer.render("haiku.html",
+ attempts=attempts,
+ haiku=(' '.join(t) for t in haiku),
+ page_title="your haiku",
+ username=username)
+
+if __name__ == '__main__':
+ web.internalerror = web.debugerror
+ web.run(urls, web.reloader)
+
============================================================
--- web.py 5a70283ce26a8d90373c31b3c2eb35cd39e4d7a5
+++ web.py 5a70283ce26a8d90373c31b3c2eb35cd39e4d7a5
@@ -0,0 +1,1701 @@
+#!/usr/bin/env python
+"""web.py: makes web apps (http://webpy.org)"""
+__version__ = "0.133"
+__license__ = "Affero General Public License, Version 1"
+__author__ = "Aaron Swartz <me@aaronsw.com>"
+
+from __future__ import generators
+
+# long term todo:
+# - new form system
+# - new templating system
+# - unit tests?
+
+# todo:
+# - get rid of upvars
+# - move documentation into docstrings
+# - provide an option to use .write()
+# - add ip:port support
+# - allow people to do $self.id from inside a reparam
+# - add sqlite support
+# - make storage a subclass of dictionary
+# - convert datetimes, floats in WebSafe
+# - locks around memoize
+# - fix memoize to use cacheify style techniques
+# - merge curval query with the insert
+# - figure out how to handle squid, etc. for web.ctx.ip
+
+import os, os.path, sys, time, types, traceback
+import cgi, re, urllib, urlparse, Cookie, pprint
+from threading import currentThread
+from tokenize import tokenprog
+iters = (list, tuple)
+if hasattr(__builtins__, 'set'): iters += (set,)
+try: from sets import Set; iters += (Set,)
+except ImportError: pass
+try: import datetime, itertools
+except ImportError: pass
+try:
+ from Cheetah.Compiler import Compiler
+ from Cheetah.Filters import Filter
+ _hasTemplating = True
+except ImportError:
+ _hasTemplating = False
+
+try:
+ from DBUtils.PooledDB import PooledDB
+ _hasPooling = True
+except ImportError:
+ _hasPooling = False
+
+# hack for compatibility with Python 2.3:
+if not hasattr(traceback, 'format_exc'):
+ from cStringIO import StringIO
+ def format_exc(limit=None):
+ s = StringIO()
+ traceback.print_exc(limit, s)
+ return s.getvalue()
+ traceback.format_exc = format_exc
+
+## general utils
+
+def _strips(direction, text, remove):
+ if direction == 'l':
+ if text.startswith(remove): return text[len(remove):]
+ elif direction == 'r':
+ if text.endswith(remove): return text[:-len(remove)]
+ else:
+ raise "WrongDirection", "Needs to be r or l."
+ return text
+
+def rstrips(text, remove):
+ """removes the string `remove` from the right of `text`"""
+ return _strips('r', text, remove)
+def lstrips(a, b):
+ """removes the string `remove` from the right of `text`"""
+ return _strips('l', text, remove)
+def strips(a, b):
+ """removes the string `remove` from the both sides of `text`"""
+ return rstrips(lstrips(a,b),b)
+
+def autoassign(self, locals):
+ """
+ Automatically assigns local variables to `self`.
+ Generally used in `__init__` methods, as in:
+
+ def __init__(self, foo, bar, baz=1): autoassign(self, locals())
+ """
+ #locals = sys._getframe(1).f_locals
+ #self = locals['self']
+ for (k, v) in locals.iteritems():
+ if k == 'self': continue
+ setattr(self, k, v)
+
+class Storage(dict):
+ """
+ A Storage object is like a dictionary except `obj.foo` can be used
+ instead of `obj['foo']`. Create one by doing `storage({'a':1})`.
+ """
+ def __getattr__(self, k):
+ if self.has_key(k): return self[k]
+ raise AttributeError, repr(k)
+ def __setattr__(self, k, v): self[k] = v
+ def __repr__(self): return '<Storage '+dict.__repr__(self)+'>'
+
+storage = Storage
+
+def storify(f, *requireds, **defaults):
+ """
+ Creates a `storage` object from dictionary d, raising `IndexError` if
+ d doesn't have all of the keys in `requireds` and using the default
+ values for keys found in `defaults`.
+
+ For example, `storify({'a':1, 'c':3}, b=2, c=0)` will return the equivalent of
+ `storage({'a':1, 'b':2, 'c':3})`.
+ """
+ stor = Storage()
+
+ for k in requireds + tuple(f.keys()):
+ v = f[k]
+ if isinstance(v, list): v = v[-1]
+ if hasattr(v, 'value'): v = v.value
+ setattr(stor, k, v)
+
+ for (k,v) in defaults.iteritems():
+ result = v
+ if hasattr(stor, k): result = stor[k]
+ if v == () and not isinstance(result, tuple): result = (result,)
+ setattr(stor, k, result)
+
+ return stor
+
+class memoize:
+ """
+ "Memoizes" a function, caching its return values for each input.
+ """
+ def __init__(self, func): self.func = func; self.cache = {}
+ def __call__(self, *a, **k):
+ key = (a, tuple(k.items()))
+ if key not in self.cache: self.cache[key] = self.func(*a, **k)
+ return self.cache[key]
+
+re_compile = memoize(re.compile) #@@ threadsafe?
+re_compile.__doc__ = """
+A memoized version of re.compile.
+"""
+
+class _re_subm_proxy:
+ def __init__(self): self.match = None
+ def __call__(self, match): self.match = match; return ''
+
+def re_subm(pat, repl, string):
+ """Like re.sub, but returns the replacement _and_ the match object."""
+ r = re_compile(pat)
+ proxy = _re_subm_proxy()
+ r.sub(proxy.__call__, string)
+ return r.sub(repl, string), proxy.match
+
+def group(seq, size):
+ """
+ Returns an iterator over a series of lists of length size from iterable.
+
+ For example, `list(group([1,2,3,4], 2))` returns `[[1,2],[3,4]]`.
+ """
+ if not hasattr(seq, 'next'): seq = iter(seq)
+ while True: yield [seq.next() for i in xrange(size)]
+
+class iterbetter:
+ """
+ Returns an object that can be used as an iterator
+ but can also be used via __getitem__ (although it
+ cannot go backwards -- that is, you cannot request
+ `iterbetter[0]` after requesting `iterbetter[1]`).
+ """
+ def __init__(self, iterator): self.i, self.c = iterator, 0
+ def __iter__(self):
+ while 1: yield self.i.next(); self.c += 1
+ def __getitem__(self, i):
+ #todo: slices
+ if i > self.c: raise KeyError, "already passed "+str(i)
+ try:
+ while i < self.c: self.i.next(); self.c += 1
+ # now self.c == i
+ self.c += 1; return self.i.next()
+ except StopIteration: raise KeyError, repr(i)
+
+def dictreverse(d):
+ """Takes a dictionary like `{1:2, 3:4}` and returns `{2:1, 4:3}`."""
+ return dict([(v,k) for k,v in d.iteritems()])
+
+def dictfind(dictionary, element):
+ """
+ Returns a key whose value in `dictionary` is `element`
+ or, if none exists, None.
+ """
+ for (k,v) in dictionary.iteritems():
+ if element is v: return k
+
+def dictincr(dictionary, element):
+ """
+ Increments `element` in `dictionary`,
+ setting it to one if it doesn't exist.
+ """
+ dictionary.setdefault(element, 0)
+ dictionary[element] += 1
+ return dictionary[element]
+
+def dictadd(a, b):
+ """Returns a dictionary consisting of the keys in `a` and `b`."""
+ result = {}
+ result.update(a)
+ result.update(b)
+ return result
+
+sumdicts = dictadd # deprecated
+
+def listget(l, n, default=None):
+ """Returns `l[n]` if it exists, `default` otherwise."""
+ if len(l)-1 < n: return default
+ return l[n]
+
+def upvars(n=2):
+ """Guido van Rossum doesn't want you to use this function."""
+ return dictadd(
+ sys._getframe(n).f_globals,
+ sys._getframe(n).f_locals)
+
+class capturestdout:
+ """
+ Captures everything func prints to stdout and returns it instead.
+
+ **WARNING:** Not threadsafe!
+ """
+ def __init__(self, func): self.func = func
+ def __call__(self, *args, **kw):
+ from cStringIO import StringIO
+ # Not threadsafe!
+ out = StringIO()
+ oldstdout = sys.stdout
+ sys.stdout = out
+ try: self.func(*args, **kw)
+ finally: sys.stdout = oldstdout
+ return out.getvalue()
+
+class profile:
+ """
+ Profiles `func` and returns a tuple containing its output
+ and a string with human-readable profiling information.
+ """
+ def __init__(self, func): self.func = func
+ def __call__(self, *args, **kw):
+ import hotshot, hotshot.stats, tempfile, time
+ temp = tempfile.NamedTemporaryFile()
+ prof = hotshot.Profile(temp.name)
+
+ stime = time.time()
+ result = prof.runcall(self.func, *args)
+ stime = time.time() - stime
+
+ prof.close()
+ stats = hotshot.stats.load(temp.name)
+ stats.strip_dirs()
+ stats.sort_stats('time', 'calls')
+ x = '\n\ntook '+ str(stime) + ' seconds\n'
+ x += capturestdout(stats.print_stats)(40)
+ x += capturestdout(stats.print_callers)()
+ return result, x
+
+def tryall(context, prefix=None):
+ """
+ Tries a series of functions and prints their results.
+ `context` is a dictionary mapping names to values;
+ the value will only be tried if it's callable.
+
+ For example, you might have a file `test/stuff.py`
+ with a series of functions testing various things in it.
+ At the bottom, have a line:
+
+ if __name__ == "__main__": tryall(globals())
+
+ Then you can run `python test/stuff.py` and get the results of
+ all the tests.
+ """
+ context = context.copy() # vars() would update
+ results = {}
+ for (k, v) in context.iteritems():
+ if not hasattr(v, '__call__'): continue
+ if prefix and not k.startswith(prefix): continue
+ print k+':',
+ try:
+ r = v()
+ dictincr(results, r)
+ print r
+ except:
+ print 'ERROR'
+ dictincr(results, 'ERROR')
+ print ' '+'\n '.join(traceback.format_exc().split('\n'))
+
+ print '-'*40
+ print 'results:'
+ for (k, v) in results.iteritems():
+ print ' '*2, str(k)+':', v
+
+class threadeddict:
+ """
+ Takes a dictionary that maps threads to objects.
+ When a thread tries to get or set an attribute or item
+ of the threadeddict, it passes it on to the object
+ for that thread in dictionary.
+ """
+ def __init__(self, d): self.__dict__['_threadeddict__d'] = d
+ def __getattr__(self, a): return getattr(self.__d[currentThread()], a)
+ def __getitem__(self, i): return self.__d[currentThread()][i]
+ def __setattr__(self, a, v): return setattr(self.__d[currentThread()], a, v)
+ def __setitem__(self, i, v): self.__d[currentThread()][i] = v
+ def __hash__(self): return hash(self.__d[currentThread()])
+
+## url utils
+
+def prefixurl(base=''):
+ """
+ Sorry, this function is really difficult to explain.
+ Maybe some other time.
+ """
+ url = context.path.lstrip('/')
+ for i in xrange(url.count('/')): base += '../'
+ if not base: base = './'
+ return base
+
+urlquote = urllib.quote
+
+## formatting
+
+try:
+ from markdown import markdown # http://webpy.org/markdown.py
+except ImportError: pass
+
+r_url = re_compile('(?<!\()(http://(\S+))')
+def safemarkdown(text):
+ """
+ Converts text to HTML following the rules of Markdown, but blocking any
+ outside HTML input, so that only the things supported by Markdown
+ can be used. Also converts raw URLs to links.
+
+ (requires [markdown.py](http://webpy.org/markdown.py))
+ """
+ if text:
+ text = text.replace('<', '<')
+ # TODO: automatically get page title?
+ text = r_url.sub(r'<\1>', text)
+ text = markdown(text)
+ return text
+
+## db api
+
+def _interpolate(format):
+ """
+ Takes a format string and returns a list of 2-tuples of the form
+ (boolean, string) where boolean says whether string should be evaled
+ or not.
+
+ from http://lfw.org/python/Itpl.py (public domain, Ka-Ping Yee)
+ """
+ def matchorfail(text, pos):
+ match = tokenprog.match(text, pos)
+ if match is None:
+ raise ItplError(text, pos)
+ return match, match.end()
+
+ namechars = "abcdefghijklmnopqrstuvwxyz" \
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
+ chunks = []
+ pos = 0
+
+ while 1:
+ dollar = format.find("$", pos)
+ if dollar < 0: break
+ nextchar = format[dollar+1]
+
+ if nextchar == "{":
+ chunks.append((0, format[pos:dollar]))
+ pos, level = dollar+2, 1
+ while level:
+ match, pos = matchorfail(format, pos)
+ tstart, tend = match.regs[3]
+ token = format[tstart:tend]
+ if token == "{": level = level+1
+ elif token == "}": level = level-1
+ chunks.append((1, format[dollar+2:pos-1]))
+
+ elif nextchar in namechars:
+ chunks.append((0, format[pos:dollar]))
+ match, pos = matchorfail(format, dollar+1)
+ while pos < len(format):
+ if format[pos] == "." and \
+ pos+1 < len(format) and format[pos+1] in namechars:
+ match, pos = matchorfail(format, pos+1)
+ elif format[pos] in "([":
+ pos, level = pos+1, 1
+ while level:
+ match, pos = matchorfail(format, pos)
+ tstart, tend = match.regs[3]
+ token = format[tstart:tend]
+ if token[0] in "([": level = level+1
+ elif token[0] in ")]": level = level-1
+ else: break
+ chunks.append((1, format[dollar+1:pos]))
+
+ else:
+ chunks.append((0, format[pos:dollar+1]))
+ pos = dollar + 1 + (nextchar == "$")
+
+ if pos < len(format): chunks.append((0, format[pos:]))
+ return chunks
+
+def sqlors(left, lst):
+ """
+ `left is a SQL clause like `tablename.arg = `
+ and `lst` is a list of values. Returns a reparam-style
+ pair featuring the SQL that ORs together the clause
+ for each item in the lst.
+
+ For example:
+
+ web.sqlors('foo =', [1,2,3])
+
+ would result in:
+
+ foo = 1 OR foo = 2 OR foo = 3
+
+ contributed by Steven Huffman <http://spez.name>
+ """
+ if isinstance(lst, iters) and len(lst) == 1: lst = lst[0]
+ if isinstance(lst, iters):
+ return '(' + left + (' OR ' + left).join([aparam() for x in lst]) + ")", lst
+ elif not list: return "", []
+ else:
+ return left + aparam(), [lst,]
+
+class UnknownParamstyle(Exception): pass
+def aparam():
+ """Use in a SQL string to make a spot for a db value."""
+ p = ctx.db_module.paramstyle
+ if p == 'qmark': return '?'
+ elif p == 'numeric': return ':1'
+ elif p in ['format', 'pyformat']: return '%s'
+ raise UnknownParamstyle, p
+
+def reparam(s, d):
+ """
+ Takes a string and a dictionary and interpolates the string
+ using values from the dictionary. Returns a 2-tuple containing
+ the a string with `aparam()`s in it and a list of the matching values.
+
+ You can pass this sort of thing as a clause in any db function.
+ Otherwise, you can pass a dictionary to the keyword argument `vars`
+ and the function will call reparam for you.
+ """
+ vals = []
+ result = []
+ for live, chunk in _interpolate(s):
+ if live:
+ result.append(aparam())
+ vals.append(eval(chunk, d))
+ else: result.append(chunk)
+ return ''.join(result), vals
+
+class UnknownDB(Exception): pass
+def connect(dbn, **kw):
+ """
+ Connects to the specified database.
+ db currently must be "postgres" or "mysql".
+ If DBUtils is installed, connection pooling will be used.
+ """
+ if dbn == "postgres":
+ try: import psycopg2 as db
+ except ImportError:
+ try: import psycopg as db
+ except ImportError: import pgdb as db
+ kw['password'] = kw['pw']
+ del kw['pw']
+ kw['database'] = kw['db']
+ del kw['db']
+ elif dbn == "mysql":
+ import MySQLdb as db
+ kw['passwd'] = kw['pw']
+ del kw['pw']
+ db.paramstyle = 'pyformat' # it's both, like psycopg
+ else: raise UnknownDB, dbn
+ ctx.db_name = dbn
+ ctx.db_module = db
+ ctx.db_transaction = False
+ if _hasPooling:
+ if 'db' not in globals(): globals()['db'] = PooledDB(dbapi=db, **kw)
+ ctx.db = globals()['db'].connection()
+ else:
+ ctx.db = db.connect(**kw)
+ ctx.dbq_count = 0
+ if globals().get('db_printing'):
+ def db_execute(cur, q, d=None):
+ ctx.dbq_count += 1
+ try: outq = q % tuple(d)
+ except: outq = q
+ print>>debug, str(ctx.dbq_count)+':', outq
+ a = time.time()
+ out = cur.execute(q, d)
+ b = time.time()
+ print>>debug, '(%s)' % round(b-a, 2)
+ return out
+ ctx.db_execute = db_execute
+ else:
+ ctx.db_execute = lambda cur, q, d=None: cur.execute(q, d)
+ return ctx.db
+
+def transact():
+ """Start a transaction."""
+ # commit everything up to now, so we don't rollback it later
+ ctx.db.commit()
+ ctx.db_transaction = True
+
+def commit():
+ """Commits a transaction."""
+ ctx.db.commit()
+ ctx.db_transaction = False
+
+def rollback():
+ """Rolls back a transaction."""
+ ctx.db.rollback()
+ ctx.db_transaction = False
+
+def query(q, vars=None, processed=False):
+ """
+ Execute SQL query `q` using dictionary `vars` to interpolate it.
+ If `processed=True`, `vars` is a `reparam`-style list to use
+ instead of interpolating.
+ """
+ if vars is None: vars = {}
+ d = ctx.db.cursor()
+
+ if not processed: q, vars = reparam(q, vars)
+ ctx.db_execute(d, q, vars)
+ if d.description:
+ names = [x[0] for x in d.description]
+ def iterwrapper():
+ x = d.fetchone()
+ while x:
+ yield Storage(dict(zip(names, x)))
+ x = d.fetchone()
+ out = iterbetter(iterwrapper())
+ out.__len__ = lambda: int(d.rowcount)
+ out.list = lambda: [Storage(dict(zip(names, x))) for x in d.fetchall()]
+ else:
+ out = d.rowcount
+
+ if not ctx.db_transaction: ctx.db.commit()
+ return out
+
+def sqllist(l):
+ """
+ If a list, converts it to a comma-separated string.
+ Otherwise, returns the string.
+ """
+ if isinstance(l, str): return l
+ else: return ', '.join(l)
+
+def select(tables, vars=None, what='*', where=None, order=None, group=None,
+ limit=None, offset=None):
+ """
+ Selects `what` from `tables` with clauses `where`, `order`,
+ `group`, `limit`, and `offset. Uses vars to interpolate.
+ Otherwise, each clause can take a reparam-style list.
+ """
+ if vars is None: vars = {}
+ values = []
+ qout = "SELECT "+what+" FROM "+sqllist(tables)
+
+ for (sql, val) in (
+ ('WHERE', where),
+ ('GROUP BY', group),
+ ('ORDER BY', order),
+ ('LIMIT', limit),
+ ('OFFSET', offset)):
+ if isinstance(val, (int, long)):
+ if sql == 'WHERE':
+ nquery, nvalue = 'id = '+aparam(), [val]
+ else:
+ nquery, nvalue = str(val), ()
+ elif isinstance(val, (list, tuple)) and len(val) == 2:
+ nquery, nvalue = val
+ elif val:
+ nquery, nvalue = reparam(val, vars)
+ else: continue
+ qout += " "+sql+" " + nquery
+ values.extend(nvalue)
+ return query(qout, values, processed=True)
+
+def insert(tablename, seqname=None, **values):
+ """
+ Inserts `values` into `tablename`. Returns current sequence ID.
+ Set `seqname` to the ID if it's not the default, or to `False`
+ if there isn't one.
+ """
+ d = ctx.db.cursor()
+
+ if values:
+ q, v = "INSERT INTO %s (%s) VALUES (%s)" % (
+ tablename,
+ ", ".join(values.keys()),
+ ', '.join([aparam() for x in values])
+ ), values.values()
+ else:
+ q, v = "INSERT INTO %s DEFAULT VALUES" % tablename, None
+
+ if seqname is False: pass
+ elif ctx.db_name == "postgres":
+ if seqname is None: seqname = tablename + "_id_seq"
+ q += "; SELECT currval('%s')" % seqname
+ elif ctx.db_name == "mysql":
+ q += "; SELECT last_insert_id()"
+ elif ctx.db_name == "sqlite":
+ # not really the same...
+ q += "; SELECT last_insert_rowid()"
+
+ ctx.db_execute(d, q, v)
+ try: out = d.fetchone()[0]
+ except: out = None
+
+ if not ctx.db_transaction: ctx.db.commit()
+ return out
+
+def update(tables, where, vars=None, **values):
+ """
+ Update `tables` with clause `where` (interpolated using `vars`)
+ and setting `values`.
+ """
+ if vars is None: vars = {}
+ if isinstance(where, (int, long)):
+ vars = [where]
+ where = "id = "+aparam()
+ elif isinstance(where, (list, tuple)) and len(where) == 2:
+ where, vars = where
+ else:
+ where, vars = reparam(where, vars)
+
+ d = ctx.db.cursor()
+ ctx.db_execute(d, "UPDATE %s SET %s WHERE %s" % (
+ sqllist(tables),
+ ', '.join([k+'='+aparam() for k in values.keys()]),
+ where),
+ values.values()+vars)
+
+ if not ctx.db_transaction: ctx.db.commit()
+ return d.rowcount
+
+def delete(table, where, using=None, vars=None):
+ """
+ Deletes from `table` with clauses `where` and `using`.
+ """
+ if vars is None: vars = {}
+ d = ctx.db.cursor()
+
+ if isinstance(where, (int, long)):
+ vars = [where]
+ where = "id = "+aparam()
+ elif isinstance(where, (list, tuple)) and len(where) == 2:
+ where, vars = val
+ else:
+ where, vars = reparam(where, vars)
+ q = 'DELETE FROM %s WHERE %s' % (table, where)
+ if using: q += ' USING '+sqllist(using)
+ ctx.db_execute(d, q, vars)
+
+ if not ctx.db_transaction: ctx.db.commit()
+ return d.rowcount
+
+## request handlers
+
+def handle(mapping, fvars=None):
+ """
+ Call the appropriate function based on the url to function mapping in `mapping`.
+ If no module for the function is specified, look up the function in `fvars`. If
+ `fvars` is empty, using the caller's context.
+
+ `mapping` should be a tuple of paired regular expressions with function name
+ substitutions. `handle` will import modules as necessary.
+ """
+ for url, ofno in group(mapping, 2):
+ if isinstance(ofno, tuple): ofn, fna = ofno[0], list(ofno[1:])
+ else: ofn, fna = ofno, []
+ fn, result = re_subm('^'+url+'$', ofn, context.path)
+ if result: # it's a match
+ if fn.split(' ', 1)[0] == "redirect":
+ url = fn.split(' ', 1)[1]
+ if context.method == "GET":
+ x = context.environ.get('QUERY_STRING', '')
+ if x: url += '?'+x
+ return redirect(url)
+ elif '.' in fn:
+ x = fn.split('.')
+ mod, cls = '.'.join(x[:-1]), x[-1]
+ mod = __import__(mod, globals(), locals(), [""])
+ cls = getattr(mod, cls)
+ else:
+ cls = fn
+ mod = fvars or upvars()
+ if isinstance(mod, types.ModuleType): mod = vars(mod)
+ try: cls = mod[cls]
+ except KeyError: return notfound()
+
+ meth = context.method
+ if meth == "HEAD":
+ if not hasattr(cls, meth): meth = "GET"
+ if not hasattr(cls, meth): return nomethod(cls)
+ tocall = getattr(cls(), meth)
+ args = list(result.groups())
+ for d in re.findall(r'\\(\d+)', ofn):
+ args.pop(int(d)-1)
+ return tocall(*([urllib.unquote(x) for x in args]+fna))
+
+ return notfound()
+
+def autodelegate(prefix=''):
+ """
+ Returns a method that takes one argument and calls the method named prefix+arg,
+ calling `notfound()` if there isn't one. Example:
+
+ urls = ('/prefs/(.*)', 'prefs')
+
+ class prefs:
+ GET = autodelegate('GET_')
+ def GET_password(self): pass
+ def GET_privacy(self): pass
+
+ `GET_password` would get called for `/prefs/password` while `GET_privacy` for
+ `GET_privacy` gets called for `/prefs/privacy`.
+ """
+ def internal(self, arg):
+ func = prefix+arg
+ if hasattr(self, func): return getattr(self, func)()
+ else: return notfound()
+ return internal
+
+## http defaults
+
+def expires(delta):
+ """
+ Outputs an `Expires` header for `delta` from now.
+ `delta` is a `timedelta` object or a number of seconds.
+ """
+ try: datetime
+ except NameError: raise Exception, "requires Python 2.3 or later"
+ if isinstance(delta, (int, long)):
+ delta = datetime.timedelta(seconds=delta)
+ o = datetime.datetime.utcnow() + delta
+ header('Expires', o.strftime("%a, %d %b %Y %T GMT"))
+
+def lastmodified(d):
+ """Outputs a `Last-Modified` header for `datetime`."""
+ header('Last-Modified', d.strftime("%a, %d %b %Y %T GMT"))
+
+"""
+By default, these all return simple error messages that send very short messages
+(like "bad request") to the user. They can and should be overridden
+to return nicer ones.
+"""
+
+def redirect(url, status='301 Moved Permanently'):
+ """
+ Returns a `status` redirect to the new URL.
+ `url` is joined with the base URL so that things like
+ `redirect("about") will work properly.
+ """
+ newloc = urlparse.urljoin(context.home + context.path, url)
+ context.status = status
+ header('Content-Type', 'text/html')
+ header('Location', newloc)
+ # seems to add a three-second delay for some reason:
+ # output('<a href="'+ newloc + '">moved permanently</a>')
+
+def found(url):
+ """A `302 Found` redirect."""
+ return redirect(url, '302 Found')
+
+def seeother(url):
+ """A `303 See Other` redirect."""
+ return redirect(url, '303 See Other')
+
+def tempredirect(url):
+ """A `307 Temporary Redirect` redirect."""
+ return redirect(url, '307 Temporary Redirect')
+
+def badrequest():
+ """Return a `400 Bad Request` error."""
+ context.status = '400 Bad Request'
+ header('Content-Type', 'text/html')
+ return output('bad request')
+
+def notfound():
+ """Returns a `404 Not Found` error."""
+ context.status = '404 Not Found'
+ header('Content-Type', 'text/html')
+ return output('not found')
+
+def nomethod(cls):
+ """Returns a `405 Method Not Allowed` error for `cls`."""
+ context.status = '405 Method Not Allowed'
+ header('Content-Type', 'text/html')
+ header("Allow", ', '.join([x for x in ['GET', 'HEAD', 'POST', 'PUT', 'DELETE'] if hasattr(cls, x)]))
+ return output('method not allowed')
+
+def gone():
+ """Returns a `410 Gone` error."""
+ context.status = '410 Gone'
+ header('Content-Type', 'text/html')
+ return output("gone")
+
+def internalerror():
+ """Returns a `500 Internal Server` error."""
+ context.status = "500 Internal Server Error"
+ context.headers = [('Content-Type', 'text/html')]
+ context.output = "internal server error"
+
+
+# adapted from Django <djangoproject.com>
+# Copyright (c) 2005, the Lawrence Journal-World
+# Used under the modified BSD license:
+# http://www.xfree86.org/3.3.6/COPYRIGHT2.html#5
+
+DJANGO_500_PAGE = """#import inspect
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
+ <meta name="robots" content="NONE,NOARCHIVE" />
+ <title>$exception_type at $context.path</title>
+ <style type="text/css">
+ html * { padding:0; margin:0; }
+ body * { padding:10px 20px; }
+ body * * { padding:0; }
+ body { font:small sans-serif; }
+ body>div { border-bottom:1px solid #ddd; }
+ h1 { font-weight:normal; }
+ h2 { margin-bottom:.8em; }
+ h2 span { font-size:80%; color:#666; font-weight:normal; }
+ h3 { margin:1em 0 .5em 0; }
+ h4 { margin:0 0 .5em 0; font-weight: normal; }
+ table { border:1px solid #ccc; border-collapse: collapse; background:white; }
+ tbody td, tbody th { vertical-align:top; padding:2px 3px; }
+ thead th { padding:1px 6px 1px 3px; background:#fefefe; text-align:left; font-weight:normal; font-size:11px; border:1px solid #ddd; }
+ tbody th { text-align:right; color:#666; padding-right:.5em; }
+ table.vars { margin:5px 0 2px 40px; }
+ table.vars td, table.req td { font-family:monospace; }
+ table td.code { width:100%;}
+ table td.code div { overflow:hidden; }
+ table.source th { color:#666; }
+ table.source td { font-family:monospace; white-space:pre; border-bottom:1px solid #eee; }
+ ul.traceback { list-style-type:none; }
+ ul.traceback li.frame { margin-bottom:1em; }
+ div.context { margin: 10px 0; }
+ div.context ol { padding-left:30px; margin:0 10px; list-style-position: inside; }
+ div.context ol li { font-family:monospace; white-space:pre; color:#666; cursor:pointer; }
+ div.context ol.context-line li { color:black; background-color:#ccc; }
+ div.context ol.context-line li span { float: right; }
+ div.commands { margin-left: 40px; }
+ div.commands a { color:black; text-decoration:none; }
+ #summary { background: #ffc; }
+ #summary h2 { font-weight: normal; color: #666; }
+ #explanation { background:#eee; }
+ #template, #template-not-exist { background:#f6f6f6; }
+ #template-not-exist ul { margin: 0 0 0 20px; }
+ #traceback { background:#eee; }
+ #requestinfo { background:#f6f6f6; padding-left:120px; }
+ #summary table { border:none; background:transparent; }
+ #requestinfo h2, #requestinfo h3 { position:relative; margin-left:-100px; }
+ #requestinfo h3 { margin-bottom:-1em; }
+ .error { background: #ffc; }
+ .specific { color:#cc3300; font-weight:bold; }
+ </style>
+ <script type="text/javascript">
+ //<!--
+ function getElementsByClassName(oElm, strTagName, strClassName){
+ // Written by Jonathan Snook, http://www.snook.ca/jon; Add-ons by Robert Nyman, http://www.robertnyman.com
+ var arrElements = (strTagName == "*" && document.all)? document.all :
+ oElm.getElementsByTagName(strTagName);
+ var arrReturnElements = new Array();
+ strClassName = strClassName.replace(/\-/g, "\\-");
+ var oRegExp = new RegExp("(^|\\s)" + strClassName + "(\\s|$)");
+ var oElement;
+ for(var i=0; i<arrElements.length; i++){
+ oElement = arrElements[i];
+ if(oRegExp.test(oElement.className)){
+ arrReturnElements.push(oElement);
+ }
+ }
+ return (arrReturnElements)
+ }
+ function hideAll(elems) {
+ for (var e = 0; e < elems.length; e++) {
+ elems[e].style.display = 'none';
+ }
+ }
+ window.onload = function() {
+ hideAll(getElementsByClassName(document, 'table', 'vars'));
+ hideAll(getElementsByClassName(document, 'ol', 'pre-context'));
+ hideAll(getElementsByClassName(document, 'ol', 'post-context'));
+ }
+ function toggle() {
+ for (var i = 0; i < arguments.length; i++) {
+ var e = document.getElementById(arguments[i]);
+ if (e) {
+ e.style.display = e.style.display == 'none' ? 'block' : 'none';
+ }
+ }
+ return false;
+ }
+ function varToggle(link, id) {
+ toggle('v' + id);
+ var s = link.getElementsByTagName('span')[0];
+ var uarr = String.fromCharCode(0x25b6);
+ var darr = String.fromCharCode(0x25bc);
+ s.innerHTML = s.innerHTML == uarr ? darr : uarr;
+ return false;
+ }
+ //-->
+ </script>
+</head>
+<body>
+
+<div id="summary">
+ <h1>$exception_type at $context.path</h1>
+ <h2>$exception_value</h2>
+ <table><tr>
+ <th>Python</th>
+ <td>$lastframe.filename in $lastframe.function, line $lastframe.lineno</td>
+ </tr><tr>
+ <th>Web</th>
+ <td>$context.method $context.home$context.path</td>
+ </tr></table>
+</div>
+<div id="traceback">
+ <h2>Traceback <span>(innermost first)</span></h2>
+ <ul class="traceback">
+ #for frame in $frames
+ <li class="frame">
+ <code>$frame.filename</code> in <code>$frame.function</code>
+
+ #if $frame.context_line
+ <div class="context" id="c$frame.id">
+ #if $frame.pre_context
+ <ol start="$frame.pre_context_lineno" class="pre-context" id="pre$frame.id">#for line in $frame.pre_context#<li onclick="toggle('pre$frame.id', 'post$frame.id')">$line</li>#end for#</ol>
+ #end if
+ <ol start="$frame.lineno" class="context-line"><li onclick="toggle('pre$frame.id', 'post$frame.id')">$frame.context_line <span>...</span></li></ol>
+ #if $frame.post_context
+ <ol start='$(frame.lineno+1)' class="post-context" id="post$frame.id">#for line in $frame.post_context#<li onclick="toggle('pre$frame.id', 'post$frame.id')">$line</li>#end for#</ol>
+ #end if
+ </div>
+ #end if
+
+ #if $frame.vars
+ <div class="commands">
+ <a href='#' onclick="return varToggle(this, '$frame.id')"><span>▶</span> Local vars</a>## $inspect.formatargvalues(*inspect.getargvalues(frame['tb'].tb_frame))
+ </div>
+ <table class="vars" id="v$frame.id">
+ <thead>
+ <tr>
+ <th>Variable</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ #set frameitems = $frame.vars
+ #silent frameitems.sort(lambda x,y: cmp(x[0], y[0]))
+ #for (key, val) in frameitems
+ <tr>
+ <td>$key</td>
+ <td class="code"><div>$prettify(val)</div></td>
+ </tr>
+ #end for
+ </tbody>
+ </table>
+ #end if
+ </li>
+ #end for
+ </ul>
+</div>
+
+<div id="requestinfo">
+ #if $context_.output or $context_.headers
+ <h2>Response so far</h2>
+ <h3>HEADERS</h3>
+ #if $context.headers
+ <p class="req"><code>
+ #for (k, v) in $context_.headers
+ $k: $v<br />
+ #end for
+
+ </code></p>
+ #else
+ <p>No headers.</p>
+ #end if
+ <h3>BODY</h3>
+ <p class="req" style="padding-bottom: 2em"><code>
+ $context_.output
+ </code></p>
+ #end if
+
+ <h2>Request information</h2>
+
+ <h3>INPUT</h3>
+ #if $input_
+ <table class="req">
+ <thead>
+ <tr>
+ <th>Variable</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ #set myitems = $input_.items()
+ #silent myitems.sort(lambda x,y: cmp(x[0], y[0]))
+ #for (key, val) in myitems
+ <tr>
+ <td>$key</td>
+ <td class="code"><div>$val</div></td>
+ </tr>
+ #end for
+ </tbody>
+ </table>
+ #else
+ <p>No input data.</p>
+ #end if
+
+ <h3 id="cookie-info">COOKIES</h3>
+ #if $cookies_
+ <table class="req">
+ <thead>
+ <tr>
+ <th>Variable</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ #for (key, val) in $cookies_.items()
+ <tr>
+ <td>$key</td>
+ <td class="code"><div>$val</div></td>
+ </tr>
+ #end for
+ </tbody>
+ </table>
+ #else
+ <p>No cookie data</p>
+ #end if
+
+ <h3 id="meta-info">META</h3>
+ <table class="req">
+ <thead>
+ <tr>
+ <th>Variable</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ #set myitems = $context_.items()
+ #silent myitems.sort(lambda x,y: cmp(x[0], y[0]))
+ #for (key, val) in $myitems
+ #if not $key.startswith('_') and $key not in ['env', 'output', 'headers', 'environ', 'status', 'db_execute']
+ <tr>
+ <td>$key</td>
+ <td class="code"><div>$prettify($val)</div></td>
+ </tr>
+ #end if
+ #end for
+ </tbody>
+ </table>
+
+ <h3 id="meta-info">ENVIRONMENT</h3>
+ <table class="req">
+ <thead>
+ <tr>
+ <th>Variable</th>
+ <th>Value</th>
+ </tr>
+ </thead>
+ <tbody>
+ #set myitems = $context_.environ.items()
+ #silent myitems.sort(lambda x,y: cmp(x[0], y[0]))
+ #for (key, val) in $myitems
+ <tr>
+ <td>$key</td>
+ <td class="code"><div>$prettify($val)</div></td>
+ </tr>
+ #end for
+ </tbody>
+ </table>
+
+</div>
+
+<div id="explanation">
+ <p>
+ You're seeing this error because you have <code>web.internalerror</code>
+ set to <code>web.debugerror</code>. Change that if you want a different one.
+ </p>
+</div>
+
+</body>
+</html>"""
+
+def djangoerror():
+ def _get_lines_from_file(filename, lineno, context_lines):
+ """
+ Returns context_lines before and after lineno from file.
+ Returns (pre_context_lineno, pre_context, context_line, post_context).
+ """
+ try:
+ source = open(filename).readlines()
+ lower_bound = max(0, lineno - context_lines)
+ upper_bound = lineno + context_lines
+
+ pre_context = [line.strip('\n') for line in source[lower_bound:lineno]]
+ context_line = source[lineno].strip('\n')
+ post_context = [line.strip('\n') for line in source[lineno+1:upper_bound]]
+
+ return lower_bound, pre_context, context_line, post_context
+ except (OSError, IOError):
+ return None, [], None, []
+
+ exception_type, exception_value, tb = sys.exc_info()
+ frames = []
+ while tb is not None:
+ filename = tb.tb_frame.f_code.co_filename
+ function = tb.tb_frame.f_code.co_name
+ lineno = tb.tb_lineno - 1
+ pre_context_lineno, pre_context, context_line, post_context = _get_lines_from_file(filename, lineno, 7)
+ frames.append({
+ 'tb': tb,
+ 'filename': filename,
+ 'function': function,
+ 'lineno': lineno,
+ 'vars': tb.tb_frame.f_locals.items(),
+ 'id': id(tb),
+ 'pre_context': pre_context,
+ 'context_line': context_line,
+ 'post_context': post_context,
+ 'pre_context_lineno': pre_context_lineno,
+ })
+ tb = tb.tb_next
+ lastframe = frames[-1]
+ frames.reverse()
+ urljoin = urlparse.urljoin
+ input_ = input()
+ cookies_ = cookies()
+ context_ = context
+ def prettify(x):
+ try: out = pprint.pformat(x)
+ except Exception, e: out = '[could not display: <'+e.__class__.__name__+': '+str(e)+'>]'
+ return out
+ return render(DJANGO_500_PAGE, asTemplate=True, isString=True)
+
+def debugerror():
+ """
+ A replacement for `internalerror` that presents a nice page with lots
+ of debug information for the programmer.
+
+ (Based on the beautiful 500 page from [Django](http://djangoproject.com/),
+ designed by [Wilson Miner](http://wilsonminer.com/).)
+
+ Requires [Cheetah](http://cheetahtemplate.org/).
+ """
+ # need to do django first, so it can get the old stuff
+ if _hasTemplating:
+ out = str(djangoerror())
+ else:
+ # Cheetah isn't installed
+ out = """<p>You've set web.py to use the fancier debugerror error messages,
+but these messages require you install the Cheetah template
+system. For more information, see
+<a href="http://webpy.org/">the web.py website</a>.</p>
+
+<p>In the meantime, here's a plain old error message:</p>
+
+<pre>%s</pre>
+
+<p>(If it says something about 'Compiler', then it's probably
+because you're trying to use templates and you haven't
+installed Cheetah. See above.)</p>
+""" % htmlquote(traceback.format_exc())
+ context.status = "500 Internal Server Error"
+ context.headers = [('Content-Type', 'text/html')]
+ context.output = out
+
+
+## rendering
+
+r_include = re_compile(r'(?!\\)#include \"(.*?)\"($|#)', re.M)
+def __compiletemplate(template, base=None, isString=False):
+ if isString: text = template
+ else: text = open('templates/'+template).read()
+ # implement #include at compile-time
+ def do_include(match):
+ text = open('templates/'+match.groups()[0]).read()
+ return text
+ while r_include.findall(text): text = r_include.sub(do_include, text)
+
+ execspace = _compiletemplate.bases.copy()
+ c = Compiler(source=text, mainClassName='GenTemplate')
+ c.addImportedVarNames(execspace.keys())
+ exec str(c) in execspace
+ if base: _compiletemplate.bases[base] = execspace['GenTemplate']
+
+ return execspace['GenTemplate']
+
+_compiletemplate = memoize(__compiletemplate)
+_compiletemplate.bases = {}
+
+def htmlquote(s):
+ """Encodes `s` for raw use in HTML."""
+ s = s.replace("&", "&") # Must be done first!
+ s = s.replace("<", "<")
+ s = s.replace(">", ">")
+ s = s.replace("'", "'")
+ s = s.replace('"', """)
+ return s
+
+if _hasTemplating:
+ class WebSafe(Filter):
+ def filter(selv, val, **kw):
+ if val is None: return ''
+ return htmlquote(str(val))
+
+def render(template, terms=None, asTemplate=False, base=None, isString=False):
+ """
+ Renders a template, caching where it can.
+
+ `template` is the name of a file containing the a template in
+ the `templates/` folder, unless `isString`, in which case it's the template
+ itself.
+
+ `terms` is a dictionary used to fill the template. If it's None, then
+ the caller's local variables are used instead, plus context, if it's not already
+ set, is set to `context`.
+
+ If asTemplate is False, it `output`s the template directly. Otherwise,
+ it returns the template object.
+
+ If the template is a potential base template (that is, something other templates)
+ can extend, then base should be a string with the name of the template. The
+ template will be cached and made available for future calls to `render`.
+
+ Requires [Cheetah](http://cheetahtemplate.org/).
+ """
+ # terms=['var1', 'var2'] means grab those variables
+ if isinstance(terms, list):
+ new = {}; old = upvars()
+ for k in terms: new[k] = old[k]
+ terms = new
+ # default: grab all locals
+ elif terms is None:
+ terms = {'context': context}
+ terms.update(sys._getframe(1).f_locals)
+ # terms=d means use d as the searchList
+ if not isinstance(terms, tuple):
+ terms = (terms,)
+
+ if not isString and template.endswith('.html'): header('Content-Type','text/html; charset=utf-8')
+
+ t = _compiletemplate(template, base=base, isString=isString)
+ t = t(searchList=terms, filter=WebSafe)
+ if asTemplate: return t
+ else: return output(str(t))
+
+## input forms
+
+def input(*requireds, **defaults):
+ """
+ Returns a `storage` object with the GET and POST arguments.
+ See `storify` for how `requireds` and `defaults` work.
+ """
+ if not hasattr(context, '_inputfs'): context._inputfs = cgi.FieldStorage(fp = context.environ['wsgi.input'],environ=context.environ, keep_blank_values=1)
+ return storify(context._inputfs, *requireds, **defaults)
+
+## cookies
+
+def setcookie(name, value, expires="", domain=None):
+ """Sets a cookie."""
+ if expires < 0: expires = -1000000000
+ kargs = {'expires': expires, 'path':'/'}
+ if domain: kargs['domain'] = domain
+ # @@ should we limit cookies to a different path?
+ c = Cookie.SimpleCookie()
+ c[name] = value
+ for key, val in kargs.iteritems(): c[name][key] = val
+ header('Set-Cookie', c.items()[0][1].OutputString())
+
+def cookies(*requireds, **defaults):
+ """
+ Returns a `storage` object with all the cookies in it.
+ See `storify` for how `requireds` and `defaults` work.
+ """
+ c = Cookie.SimpleCookie()
+ c.load(context.environ.get('HTTP_COOKIE', ''))
+ return storify(c, *requireds, **defaults)
+
+## WSGI Sugar
+
+def header(h, v):
+ """Adds the header `h: v` with the response."""
+ context.headers.append((h, v))
+def output(s):
+ """Appends `s` to the response."""
+ context.output += str(s)
+
+def write(t):
+ """Converts a standard CGI-style string response into `header` and `output` calls."""
+ t = str(t)
+ t.replace('\r\n', '\n')
+ head, body = t.split('\n\n', 1)
+ lines = head.split('\n')
+
+ for line in lines:
+ if line.isspace(): continue
+ h, v = line.split(":", 1)
+ v = v.strip()
+ if h.lower() == "status": context.status = v
+ else: header(h, v)
+
+ output(body)
+
+def webpyfunc(inp, fvars=None, autoreload=False):
+ """If `inp` is a url mapping, returns a function that calls handle."""
+ if not fvars: fvars = upvars()
+ if not hasattr(inp, '__call__'):
+ if autoreload:
+ # black magic to make autoreload work:
+ mod = __import__(fvars['__file__'].split(os.path.sep).pop().split('.')[0])
+ #@@probably should replace this with some inspect magic
+ name = dictfind(fvars, inp)
+ func = lambda: handle(getattr(mod, name), mod)
+ else:
+ func = lambda: handle(inp, fvars)
+ else:
+ func = inp
+ return func
+
+def wsgifunc(func, *middleware):
+ """Returns a WSGI-compatible function from a webpy-function."""
+ middleware = list(middleware)
+ if reloader in middleware:
+ relr = reloader(None)
+ relrcheck = relr.check
+ middleware.remove(reloader)
+ else:
+ relr = None
+ relrcheck = lambda: None
+
+ def wsgifunc(e, r):
+ _load(e)
+ relrcheck()
+ result = func()
+ is_generator = result and hasattr(result, 'next')
+ if is_generator:
+ # we need to give wsgi back the headers first,
+ # so we need to do at iteration
+ try: firstchunk = result.next()
+ except StopIteration: firstchunk = ''
+ status, headers, output = ctx.status, ctx.headers, ctx.output
+ _unload()
+ r(status, headers)
+ if is_generator: return itertools.chain([firstchunk], result)
+ elif isinstance(output, str): return [output] #@@ other stringlikes?
+ elif hasattr(output, 'next'): return output
+ else: raise Exception, "Invalid web.context.output"
+
+ for x in middleware: wsgifunc = x(wsgifunc)
+
+ if relr:
+ relr.func = wsgifunc
+ return wsgifunc
+ return wsgifunc
+
+def run(inp, *middleware):
+ """
+ Starts handling requests. If called in a CGI or FastCGI context, it will follow
+ that protocol. If called from the command line, it will start an HTTP
+ server on the port named in the first command line argument, or, if there
+ is no argument, on port 8080.
+
+ `input` is a callable, then it's called with no arguments.
+ Otherwise, it's a `mapping` object to be passed to `handle(...)`.
+
+ **Caveat:** So that `reloader` will work correctly, input has to be a variable,
+ it can't be a tuple passed in directly.
+
+ `middleware` is a list of WSGI middleware which is applied to the resulting WSGI
+ function.
+ """
+ autoreload = reloader in middleware
+ fvars = upvars()
+ return runwsgi(wsgifunc(webpyfunc(inp, fvars, autoreload), *middleware))
+
+def runwsgi(func):
+ """
+ Runs a WSGI-compatible function using FCGI, SCGI, or a simple web server,
+ as appropriate.
+ """
+ #@@ improve detection
+ if os.environ.has_key('SERVER_SOFTWARE'): # cgi
+ os.environ['FCGI_FORCE_CGI'] = 'Y'
+
+ if (os.environ.has_key('PHP_FCGI_CHILDREN') #lighttpd fastcgi
+ or os.environ.has_key('SERVER_SOFTWARE')):
+ import flup.server.fcgi
+ return runfcgi(func)
+
+ if 'scgi' in sys.argv:
+ import flup.server.scgi
+ return runscgi(func)
+
+ # command line:
+ return runsimple(func, listget(sys.argv, 1, 8080))
+
+def runsimple(func, port=8080):
+ """
+ Runs a simple HTTP server hosting WSGI app `func`. The directory `static/` is
+ hosted statically.
+
+ Based on [WsgiServer](http://www.owlfish.com/software/wsgiutils/documentation/wsgi-server-api.html)
+ from [Colin Stewart](http://www.owlfish.com/).
+ """
+ # Copyright (c) 2004 Colin Stewart (http://www.owlfish.com/)
+ # Modified somewhat for simplicity
+ # Used under the modified BSD license:
+ # http://www.xfree86.org/3.3.6/COPYRIGHT2.html#5
+
+ import SimpleHTTPServer, SocketServer, BaseHTTPServer, urlparse
+ import sys, socket, errno
+ import traceback
+
+ class WSGIHandler (SimpleHTTPServer.SimpleHTTPRequestHandler):
+ def runWSGIApp(self):
+ protocol, host, path, parameters, query, fragment = urlparse.urlparse ('http://dummyhost%s' % self.path)
+ # we only use path, query
+ env = {'wsgi.version': (1,0)
+ ,'wsgi.url_scheme': 'http'
+ ,'wsgi.input': self.rfile
+ ,'wsgi.errors': sys.stderr
+ ,'wsgi.multithread': 1
+ ,'wsgi.multiprocess': 0
+ ,'wsgi.run_once': 0
+ ,'REQUEST_METHOD': self.command
+ ,'REQUEST_URI': self.path
+ ,'PATH_INFO': path
+ ,'QUERY_STRING': query
+ ,'CONTENT_TYPE': self.headers.get ('Content-Type', '')
+ ,'CONTENT_LENGTH': self.headers.get ('Content-Length', '')
+ ,'REMOTE_ADDR': self.client_address[0]
+ ,'SERVER_NAME': self.server.server_address [0]
+ ,'SERVER_PORT': str (self.server.server_address [1])
+ ,'SERVER_PROTOCOL': self.request_version
+ }
+
+ for httpHeader, httpValue in self.headers.items():
+ env ['HTTP_%s' % httpHeader.replace ('-', '_').upper()] = httpValue
+
+ # Setup the state
+ self.wsgiSentHeaders = 0
+ self.wsgiHeaders = []
+
+ try:
+ # We have there environment, now invoke the application
+ result = self.server.app(env, self.wsgiStartResponse)
+ try:
+ try:
+ for data in result:
+ if data: self.wsgiWriteData (data)
+ finally:
+ if hasattr(result, 'close'): result.close()
+ except socket.error, socketErr:
+ # Catch common network errors and suppress them
+ if (socketErr.args[0] in (errno.ECONNABORTED, errno.EPIPE)): return
+ except socket.timeout, socketTimeout: return
+ except:
+ print >> debug, traceback.format_exc(),
+ internalerror()
+ if not self.wsgiSentHeaders:
+ self.wsgiStartResponse(ctx.status, ctx.headers)
+ self.wsgiWriteData(ctx.output)
+
+ if (not self.wsgiSentHeaders):
+ # We must write out something!
+ self.wsgiWriteData(" ")
+ return
+
+ do_POST = runWSGIApp
+
+ def do_GET(self):
+ if self.path.startswith('/static/'):
+ SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
+ else:
+ self.runWSGIApp()
+
+ def wsgiStartResponse (self, response_status, response_headers, exc_info=None):
+ if (self.wsgiSentHeaders):
+ raise Exception ("Headers already sent and start_response called again!")
+ # Should really take a copy to avoid changes in the application....
+ self.wsgiHeaders = (response_status, response_headers)
+ return self.wsgiWriteData
+
+ def wsgiWriteData (self, data):
+ if (not self.wsgiSentHeaders):
+ status, headers = self.wsgiHeaders
+ # Need to send header prior to data
+ statusCode = status [:status.find (' ')]
+ statusMsg = status [status.find (' ') + 1:]
+ self.send_response (int (statusCode), statusMsg)
+ for header, value in headers:
+ self.send_header (header, value)
+ self.end_headers()
+ self.wsgiSentHeaders = 1
+ # Send the data
+ self.wfile.write (data)
+
+ class WSGIServer (SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
+ def __init__(self, func):
+ BaseHTTPServer.HTTPServer.__init__(self, ("0.0.0.0", int(port)), WSGIHandler)
+ self.app = func
+ self.serverShuttingDown = 0
+
+ print "Launching server: http://0.0.0.0:"+str(port)+"/"
+ WSGIServer(func).serve_forever()
+
+def makeserver(WSGIServer):
+ """Updates a flup-style WSGIServer with web.py-style error support."""
+ class MyServer(WSGIServer):
+ def error(self, req):
+ w = req.stdout.write
+ internalerror()
+ w('Status: '+context.status+'\r\n')
+ for (h, v) in context.headers:
+ w(h+': '+v+'\r\n')
+ w('\r\n'+context.output)
+
+ return MyServer
+
+def runfcgi(func):
+ """Runs a WSGI-function with a FastCGI server."""
+ from flup.server.fcgi import WSGIServer
+ return makeserver(WSGIServer)(func, multiplexed=True).run()
+
+def runscgi(func):
+ """Runs a WSGI-function with an SCGI server."""
+ from flup.server.scgi import WSGIServer
+ MyServer = makeserver(WSGIServer)
+ if len(sys.argv) > 2: # progname, scgi
+ args = sys.argv[:]
+ args.remove('scgi')
+ hostport = args[1]
+ hostport = hostport.split(':',1)
+ if len(hostport) == 2: hostport = (hostport[0], int(hostport[1]))
+ else: hostport = ('localhost',int(hostport[0]))
+ else: hostport = ('localhost',4000)
+ return MyServer(func, bindAddress=hostport).run()
+
+## debug
+
+def debug(*args):
+ """
+ Prints a prettyprinted version of `args` to stderr.
+ """
+ try: out = context.environ['wsgi.errors']
+ except: out = sys.stderr
+ for x in args:
+ print >> out, pprint.pformat(x)
+ return ''
+
+def debugwrite(x):
+ try: out = context.environ['wsgi.errors']
+ except: out = sys.stderr
+ out.write(x)
+debug.write = debugwrite
+
+class reloader:
+ """
+ Before every request, checks to see if any loaded modules have changed on disk
+ and, if so, reloads them.
+ """
+ def __init__(self, func, tocheck=None):
+ self.func = func
+ self.mtimes = {}
+ global _compiletemplate
+ b = _compiletemplate.bases
+ _compiletemplate = globals()['__compiletemplate']
+ _compiletemplate.bases = b
+
+ def check(self):
+ for mod in sys.modules.values():
+ try: mtime = os.stat(mod.__file__).st_mtime
+ except (AttributeError, OSError, IOError): continue
+ if mod.__file__.endswith('.pyc') and os.path.exists(mod.__file__[:-1]):
+ mtime = max(os.stat(mod.__file__[:-1]).st_mtime, mtime)
+ if mod not in self.mtimes:
+ self.mtimes[mod] = mtime
+ elif self.mtimes[mod] < mtime:
+ try: reload(mod)
+ except ImportError: pass
+ return True
+
+ def __call__(self, e, o):
+ self.check()
+ return self.func(e, o)
+
+def profiler(app):
+ """Outputs basic profiling information at the bottom of each response."""
+ def profile_internal(e, o):
+ out, result = profile(app)(e, o)
+ return out + ['<pre>'+result+'</pre>'] #@@encode
+ return profile_internal
+
+## setting up the context
+
+class _outputter:
+ """Wraps `sys.stdout` so that print statements go into the response."""
+ def write(self, x):
+ if hasattr(ctx, 'output'): return output(x)
+ else: _oldstdout.write(x)
+ def flush(self): return _oldstdout.flush()
+ def close(self): return _oldstdout.close()
+
+_context = {currentThread():Storage()}
+ctx = context = threadeddict(_context)
+
+ctx.__doc__ = """
+A `storage` object containing various information about the request:
+
+ `environ` (aka `env`)
+ : A dictionary containing the standard WSGI environment variables.
+
+ `host`
+ : The domain (`Host` header) requested by the user.
+
+ `home`
+ : The base path for the application.
+
+ `ip`
+ : The IP address of the requester.
+
+ `method`
+ : The HTTP method used.
+
+ `path`
+ : The path request.
+
+ `fullpath`
+ : The full path requested, including query arguments.
+
+ ### Response Data
+
+ `status` (default: "200 OK")
+ : The status code to be used in the response.
+
+ `headers`
+ : A list of 2-tuples to be used in the response.
+
+ `output`
+ : A string to be used as the response.
+"""
+
+if not '_oldstdout' in globals():
+ _oldstdout = sys.stdout
+ sys.stdout = _outputter()
+
+def _load(env):
+ _context[currentThread()] = Storage()
+ ctx.environ = ctx.env = env
+ ctx.host = env.get('HTTP_HOST')
+ ctx.home = 'http://' + env.get('HTTP_HOST', '[unknown]') + env.get('SCRIPT_NAME', '')
+ ctx.ip = env.get('REMOTE_ADDR')
+ ctx.method = env.get('REQUEST_METHOD')
+ ctx.path = env.get('PATH_INFO')
+ # http://trac.lighttpd.net/trac/ticket/406 requires:
+ if env.get('SERVER_SOFTWARE', '').startswith('lighttpd/'):
+ ctx.path = lstrips(env.get('REQUEST_URI').split('?')[0], env.get('SCRIPT_NAME'))
+
+ ctx.fullpath = ctx.path
+ if dict(input()): ctx.fullpath+='?'+urllib.urlencode(dict(input()))
+ ctx.status = '200 OK'
+ ctx.headers = []
+ ctx.output = ''
+ if 'db_parameters' in globals():
+ connect(**db_parameters)
+
+def _unload():
+ # ensures db cursors and such are GCed promptly
+ del _context[currentThread()]
+
+if __name__ == "__main__":
+ urls = ('/web.py', 'source')
+ class source:
+ def GET(self):
+ header('Content-Type', 'text/python')
+ print open(sys.argv[0]).read()
+ run(urls)
============================================================
--- config.py 1028ab910fddc15e627d9e16fcd47d75bd0954ec
+++ config.py 695a5fb88edd1190cefe0d3958e8358d46e1376b
@@ -1,12 +1,18 @@
+import datetime
import os
install_path = '/Users/grahame/monotone/memes/'
storage_path = os.path.join(install_path, 'storage')
-word_shelf = os.path.join(install_path, 'word.shelf')
word_db = os.path.join(install_path, 'word.db')
user_schema = os.path.join(install_path, 'sql', 'user.sql')
haiku_line_attempts = 3
+haiku_attempts = 10
+
+update_interval = datetime.timedelta(days=7)
+
+dynamic_uri_path = 'http://localhost:8080/'
+static_uri_path = 'http://glamdring.local/~grahame/memes/static/'
+
-haiku_attempts = 3
============================================================
--- dictionary.py ded8d84b5aaeecac34aa38ad943654147268e55e
+++ dictionary.py 0aed82d6ac98f925ab0c727331ab434eb9ec6cc9
@@ -1,5 +1,6 @@
#!/usr/bin/env python
+import dbhash
import config
import sys
@@ -7,43 +8,15 @@ class Word(str):
def __init__(self, w):
str.__init__(self)
self._syllables = None
- self._syllables_est = None
self.rhymes = None
self.source = None
def update_from(self, from_word):
self._syllables = self._syllables or from_word._syllables
self.rhymes = self.rhymes or from_word.rhymes
def __get_syllables(self):
- if self._syllables != None:
- return self._syllables
- else:
- return self.__syllable_estimate()
+ return self._syllables
def __set_syllables(self, s):
self._syllables = s
- def __syllable_estimate(self):
- "Last resort syllable counter. Reasonably accurate in English." \
- "Allegedly works for French."
- def est():
- vowels = ['a', 'e', 'i', 'o', 'u', 'y', "'"]
- l = None
- count = 0
- if len(self) == 0:
- return 0
- if len(self) <= 3:
- return 1
- for c in self:
- if c in vowels and l not in vowels:
- count = count + 1
- l = c
- if count > 1 and ((self[-1] == 'e' and self[-2] != 'l') or
- (self[-2] == 'e' and self[-1] == 's')):
- # silent 'e'
- count = count - 1
- if count == 0: count = 1
- return count
- if self._syllables_est == None:
- self._syllables_est = est()
- return self._syllables_est
syllables = property(__get_syllables, __set_syllables, None, "The number of syllables in this word.")
import shelve
@@ -67,27 +40,14 @@ if __name__ == '__main__':
print "%d/%d correct (estimation had a %d%% success rate)" % (ok, misc+ok, (float(ok)/(ok+misc)) * 100)
def generate_db():
- # these must have a words() method, which returns
- # an iterator yielding Word instances.
- d = shelve.open(config.word_shelf)
+ d = dbhash.open(config.word_db, 'w')
for word in words:
- d[word] = word
+ d[word] = str(word.syllables)
d.close()
- def to_sqlite():
- from pysqlite2 import dbapi2 as sqlite
- con = sqlite.connect(config.word_db)
- cur = con.cursor()
- cur.execute("delete from words")
- for word in words:
-# print "inserting:", word, word.source, word.syllables, word.rhymes
- cur.execute("""insert into words (word, source, syllables, rhymes) VALUES (?,?,?,?)""",
- (word, word.source, word.syllables, word.rhymes))
- con.commit()
-
commands = { 'syllable_estimate' : estimation_report,
- 'generate' : generate_db,
- 'sqlite' : to_sqlite }
+ 'generate' : generate_db
+ }
command = commands.get(sys.argv[1])
if not command:
sys.stderr.write('%s: command not understood.\n' % (sys.argv[0]))
============================================================
--- forms.py b631b2a653d8c57334c492d8e87cd9296ac07fa4
+++ forms.py 617182cc4818a6d7954d3e90cb2eaaf160967643
@@ -3,82 +3,79 @@ def haiku(doc, form=[5,7,5]):
import config
def haiku(doc, form=[5,7,5]):
+ random.seed()
markov = doc.symbol_state.forward_markov
def pickfrom(possible, total, get_total):
- k = random.randint(0, total - 1)
- for seq in possible:
- k -= get_total(seq)
- if k < 0:
- return seq
- raise Exception("Failed to pick a number - 'total' miscalcuation? (%d, %d)" % (k, total))
+ k = random.randint(0, total - 1)
+ for seq in possible:
+ k -= get_total(seq)
+ if k < 0:
+ return seq
+ raise Exception("Failed to pick a number - 'total' miscalcuation? (%d, %d)" % (k, total))
def generate_line(target, state):
- state = tuple(state)
- line = []
- count = 0
- syl = doc.syllables.lookup
+ state = tuple(state)
+ line = []
+ count = 0
+ syl = doc.syllables.lookup
- if len(state) < markov.size:
- # we'll have to pick a starting point
+ if len(state) < markov.size:
+ # we'll have to pick a starting point
- # this is the most accurate, but way too slow
- #
- #possible = filter(lambda seq: sum(map(syl, seq)) < form[0], markov.scores)
- #total_possible = sum(map(lambda seq: markov.scores[seq].total, possible))
- #seq = pickfrom(possible, total_possible, lambda seq: markov.scores[seq].total)
- #
- seq = pickfrom(markov.scores, markov.total, lambda seq: markov.scores[seq].total)
- count += sum(map(syl, seq))
- line += list(seq)
- state = seq
- elif len(state) > markov.size:
- state = state[-1*markov.size:]
+ # this is the most accurate, but way too slow
+ #
+ #possible = filter(lambda seq: sum(map(syl, seq)) < form[0], markov.scores)
+ #total_possible = sum(map(lambda seq: markov.scores[seq].total, possible))
+ #seq = pickfrom(possible, total_possible, lambda seq: markov.scores[seq].total)
+ #
+ seq = pickfrom(markov.scores, markov.total, lambda seq: markov.scores[seq].total)
+ count += sum(map(syl, seq))
+ line += list(seq)
+ state = seq
+ elif len(state) > markov.size:
+ state = state[-1*markov.size:]
- while count < target:
- maxsize = target - count
- score = markov.scores[state]
+ while count < target:
+ maxsize = target - count
+ score = markov.scores[state]
- # okay, if count + syl(next_token) != target then we need there
- # to be an entry in the symbolstate for that next potential
- # symbol. this lets us restrict further and not fall down holes
- # so often
- def is_not_deadend(id):
- next_count = count + syl(id)
- if next_count == target: return True
- next_state = (state + (id,))[1:]
- next_score = markov.scores.get(next_state)
- return next_score != None
+ # okay, if count + syl(next_token) != target then we need there
+ # to be an entry in the symbolstate for that next potential
+ # symbol. this lets us restrict further and not fall down holes
+ # so often
+ def is_not_deadend(id):
+ next_count = count + syl(id)
+ if next_count == target: return True
+ next_state = (state + (id,))[1:]
+ next_score = markov.scores.get(next_state)
+ return next_score != None
- possible = set(filter(lambda id: syl(id) <= maxsize and is_not_deadend(id), score.scores))
- print "status:", target, state, score.scores.keys(), possible
- total_possible = sum(map(lambda tok: score.scores[tok], possible))
- if not possible:
- break
+ possible = set(filter(lambda id: syl(id) <= maxsize and is_not_deadend(id), score.scores))
+# print "status:", target, state, score.scores.keys(), possible
+ total_possible = sum(map(lambda tok: score.scores[tok], possible))
+ if not possible:
+ break
- token = pickfrom(possible, total_possible, lambda seq: score.scores[seq])
- count += syl(token)
- state = (state + (token,))[1:]
- line.append(token)
+ token = pickfrom(possible, total_possible, lambda seq: score.scores[seq])
+ count += syl(token)
+ state = (state + (token,))[1:]
+ line.append(token)
- if count != target:
- return None
- else:
- return line
+ if count != target:
+ return None
+ else:
+ return line
rv = []
last_line= []
for length in form:
- print "** target length is:", length
-
- for i in xrange(config.haiku_line_attempts):
- line = generate_line(length, last_line)
- if line != None:
- break
-
- print "** resulting line is:", line
- if not line:
- return None
- rv.append(line)
- last_line = line
+ for i in xrange(config.haiku_line_attempts):
+ line = generate_line(length, last_line)
+ if line != None:
+ break
+ if not line:
+ return None
+ rv.append(line)
+ last_line = line
return rv
============================================================
--- generators.py 550b478b5c306ae773cd42e8e5aff775cb0b8aed
+++ generators.py 1ec26b4d1209e302e47cfb0b1b6f2bd3e857e592
@@ -61,7 +61,8 @@ class Callback:
def characters(self, data):
if self.gather_chars: self.chars += data
elif self.in_guid: self.guid += data
- def endDocument(self): pass
+ def endDocument(self):
+ pass
def startElement(self, tag, attrs):
if tag == "item":
self.in_item = True
============================================================
--- poet3.py 3c5adc98809bbd5a6df624911190c02c311f2057
+++ poet3.py d4f25eefe3759a6223471ed27257554aa594a1da
@@ -5,106 +5,96 @@
# ready for deployment on angrygoats.net
#
-from forms import haiku
from generators import RSSGenerator, HTTPGenerator
from storage import Storage
from syllables import Syllables
from symbolstate import SymbolState
import os
import config
+import cPickle
+import web
+def upwrite(generator, to_upwrite):
+ """upwrite a sequence of tokens.
+ to_upwrite is a list of sequences to be upwritten.
+ assumption: all sequences to be upwritten are equal length.
+ """
+ seq_size = len(to_upwrite[0])
+ matches = [0] * len(to_upwrite)
+ buffer = []
+ for token in (t.strip() for t in generator):
+ buffer.append(token)
+ for i, seq in enumerate(to_upwrite):
+ if token == seq[matches[i]]:
+ matches[i] += 1
+ else:
+ matches[i] = 0
+
+ if matches[i] == seq_size:
+ yield [' '.join(to_upwrite[i])]
+ matches = [0] * len(to_upwrite)
+ buffer = []
+ break
+ longest_match = max(matches)
+ if len(buffer) > longest_match:
+ output_idx = len(buffer)-longest_match
+ output, buffer = buffer[:output_idx], buffer[output_idx:]
+ yield output
+
+def remove_token_sequences(generator):
+ for token_seq in generator:
+ for token in token_seq:
+ yield token
+
class Poet3:
def __init__(self, uri):
- self.uri = uri
- self.storage = Storage(self.uri)
- self.syllables = Syllables()
- self.upwritten = self.storage.has_file('has_been_upwritten.txt')
+ self.uri = uri
+ self.storage = Storage(self.uri)
+ self.syllables = Syllables()
+ if self.storage.has_file('upwritten.txt'):
+ self.upwritten_seqs = cPickle.load(self.storage.open('upwritten.txt'))
+ else:
+ self.upwritten_seqs = None
def update(self):
- self.storage.require_files(('articles.txt', 'corpus.txt'))
+ self.storage.require_files(('articles.txt', 'corpus.txt'))
- articles, new_articles = set(), set()
- for line in (t.strip() for t in self.storage.open('articles.txt')):
- articles.add(line)
+ articles, new_articles = set(), set()
+ for line in (t.strip() for t in self.storage.open('articles.txt')):
+ articles.add(line)
- corpus_fd = self.storage.open('corpus.txt', 'a')
- for token_seq in RSSGenerator(HTTPGenerator(self.uri), articles, new_articles):
- for token in token_seq:
- corpus_fd.write(token+"\n")
- corpus_fd.close()
+ corpus_fd = self.storage.open('corpus.txt', 'a')
+ generator = RSSGenerator(HTTPGenerator(self.uri), articles, new_articles)
+ # except that we don't actually *want* token sequences any more; we don't care
+ # and it breaks the upwriting generator.
+ generator = remove_token_sequences(generator)
+ if self.upwritten_seqs:
+ generator = remove_token_sequences(upwrite(generator, self.upwritten_seqs))
- articles_fd = self.storage.open('articles.txt', 'a')
- for article in new_articles:
- articles_fd.write(article+"\n")
- articles_fd.close()
+ for token in generator:
+ corpus_fd.write(token+"\n")
+ corpus_fd.close()
+ articles_fd = self.storage.open('articles.txt', 'a')
+ for article in new_articles:
+ articles_fd.write(article+"\n")
+ articles_fd.close()
+
def build_state(self):
- self.symbol_state = SymbolState((t.strip() for t in self.storage.open('corpus.txt')))
- self.symbol_state.update()
+ self.symbol_state = SymbolState((t.strip() for t in self.storage.open('corpus.txt')))
+ self.symbol_state.update()
def upwrite(self, to_upwrite):
- if len(to_upwrite) == 0:
- return
+ if len(to_upwrite) == 0:
+ return
- corpus_fd = self.storage.open('corpus.txt')
- upwrite_fd = self.storage.open('upwrite.txt', 'w')
+ corpus_fd = self.storage.open('corpus.txt')
+ upwrite_fd = self.storage.open('upwrite.txt', 'w')
- def get_token_lists():
- matches = [0] * len(to_upwrite)
- buffer = []
- for token in (t.strip() for t in corpus_fd):
- buffer.append(token)
- for i, seq in enumerate(to_upwrite):
- if token == seq[matches[i]]:
- matches[i] += 1
- else:
- matches[i] = 0
+ for token_list in upwrite((t.strip() for t in corpus_fd), to_upwrite):
+ for token in token_list:
+ upwrite_fd.write(token+'\n')
+ upwrite_fd.close()
+ os.rename(self.storage.file('upwrite.txt'),
+ self.storage.file('corpus.txt'))
- if matches[i] == self.symbol_state.forward_markov.size:
- yield [' '.join(to_upwrite[i])]
- matches = [0] * len(to_upwrite)
- buffer = []
- break
-
- longest_match = max(matches)
- if len(buffer) > longest_match:
- output_idx = len(buffer)-longest_match
- output, buffer = buffer[:output_idx], buffer[output_idx:]
- yield output
-
- for token_list in get_token_lists():
- for token in token_list:
- upwrite_fd.write(token+'\n')
- upwrite_fd.close()
- os.rename(self.storage.file('upwrite.txt'),
- self.storage.file('corpus.txt'))
-
- self.storage.open('has_been_upwritten.txt', 'w')
-
-def gen_haiku(uri):
- poet = Poet3(uri)
- poet.update()
- poet.build_state()
- if not poet.upwritten:
- to_upwrite = poet.symbol_state.chunkable()
- poet.upwrite(to_upwrite)
-
- for attempt in xrange(config.haiku_attempts):
- poem = haiku(poet)
- if poem != None:
- break
-
- if poem == None:
- raise PoetException("Could not generate you a poem!")
-
- return attempt, poem
-
-if __name__ == '__main__':
- uri = 'http://glamdring.local/~grahame/rss'
- attempts, poem = gen_haiku(uri)
- print "%d attempts" % attempts
- for line in poem:
- print ' '.join(line)
-
-
-
============================================================
--- storage.py bff5648f025b275e05135246f0c56236953eaef4
+++ storage.py 15df0aaa6f37d41868a4e7b75beb834d7caea7e2
@@ -1,6 +1,8 @@
+import datetime
import urlparse
import string
+import stat
import sha
import os
@@ -11,42 +13,50 @@ class Storage:
class Storage:
def __init__(self, uri):
- self.uri = uri
- self.dir = self.__storage_dir()
+ self.uri = uri
+ self.dir = self.__storage_dir()
def __storage_dir(self):
- # list of directories in the storage path to get to this URI
- dirs = []
- site = filter(lambda x: x in string.letters or x in string.digits or x == '.', urlparse.urlparse(self.uri)[1])
- # no .. entries to climb the filesystem :-)
- site = site.lstrip('.')
- dirs.append(site)
+ # list of directories in the storage path to get to this URI
+ dirs = []
+ site = filter(lambda x: x in string.letters or x in string.digits or x == '.', urlparse.urlparse(self.uri)[1])
+ # no .. entries to climb the filesystem :-)
+ site = site.lstrip('.')
+ dirs.append(site)
- # but this should be safe
- hash = sha.new(self.uri).hexdigest()
- dirs += [hash[:8], hash[8:16], hash[16:24], hash[24:32], hash[32:40]]
+ # but this should be safe
+ hash = sha.new(self.uri).hexdigest()
+ dirs += [hash[:8], hash[8:16], hash[16:24], hash[24:32], hash[32:40]]
- c_dir = config.storage_path
- for dir in dirs:
- c_dir = os.path.join(c_dir, dir)
- if not os.access(c_dir, os.R_OK):
- os.mkdir(c_dir)
- return c_dir
+ c_dir = config.storage_path
+ for dir in dirs:
+ c_dir = os.path.join(c_dir, dir)
+ if not os.access(c_dir, os.R_OK):
+ os.mkdir(c_dir)
+ return c_dir
def file(self, fname):
- if fname.startswith('/'):
- raise StorageException("fname may not start with a slash.")
- return os.path.join(self.dir, fname)
+ if fname.startswith('/'):
+ raise StorageException("fname may not start with a slash.")
+ return os.path.join(self.dir, fname)
+ def mtime(self, fname):
+ fname = self.file(fname)
+ if not os.access(fname, os.R_OK):
+ timestamp = 0
+ else:
+ timestamp = os.stat(fname)[stat.ST_MTIME]
+ return datetime.datetime.fromtimestamp(timestamp)
+
def has_file(self, fname):
- return os.access(self.file(fname), os.R_OK)
+ return os.access(self.file(fname), os.R_OK)
def require_files(self, files):
- for file in files:
- file = self.file(file)
- if not os.access(file, os.R_OK):
- open(file, 'w')
+ for file in files:
+ file = self.file(file)
+ if not os.access(file, os.R_OK):
+ open(file, 'w')
def open(self, *args):
+ fname, other_args = args[0], args[1:]
+ return open(*[self.file(fname)] + list(other_args))
- fname, other_args = args[0], args[1:]
- return open(*[self.file(fname)] + list(other_args))
============================================================
--- syllables.py 1ddbe0eb3425f3b5cc513e8af525d583ac771aff
+++ syllables.py 77a91fa93c05a98c5b8efb24a081e921b42d59a6
@@ -1,31 +1,45 @@
+import dbhash
+import config
+
class Syllables:
def __init__(self):
- self.cache = {}
+ self.cache = {}
+ self.db = dbhash.open(config.word_db)
+ def __lookup(self, token):
+ if self.db.has_key(token):
+ rv = int(self.db[token])
+ elif not self.cache.has_key(token):
+ rv = self.cache[token] = self.__syllable_estimate(token)
+ else:
+ rv = self.cache[token]
+# web.debug("token=%s, syllables=%d" % (token, rv))
+ return rv
+
def lookup(self, token):
- if not self.cache.has_key(token):
- self.cache[token] = self.__syllable_estimate(token)
- return self.cache[token]
+ # note: a token may (due to upwriting) be one or more words.
+ # so split on ' ' before doing the lookup, then just do a sum
+ return sum(map(self.__lookup, token.split(' ')))
def __syllable_estimate(self, token):
- "Last resort syllable counter. Reasonably accurate in English." \
- "Allegedly works for French."
- vowels = ['a', 'e', 'i', 'o', 'u', 'y', "'"]
- l = None
- count = 0
- if len(token) == 0:
- return 0
- if len(token) <= 3:
- return 1
- for c in token:
- if c in vowels and l not in vowels:
- count = count + 1
- l = c
- if count > 1 and ((token[-1] == 'e' and token[-2] != 'l') or
- (token[-2] == 'e' and token[-1] == 's')):
- # silent 'e'
- count = count - 1
- if count == 0: count = 1
- return count
+ "Last resort syllable counter. Reasonably accurate in English." \
+ "Allegedly works for French."
+ vowels = ['a', 'e', 'i', 'o', 'u', 'y', "'"]
+ l = None
+ count = 0
+ if len(token) == 0:
+ return 0
+ if len(token) <= 3:
+ return 1
+ for c in token:
+ if c in vowels and l not in vowels:
+ count = count + 1
+ l = c
+ if count > 1 and ((token[-1] == 'e' and token[-2] != 'l') or
+ (token[-2] == 'e' and token[-1] == 's')):
+ # silent 'e'
+ count = count - 1
+ if count == 0: count = 1
+ return count
============================================================
--- symbolstate.py 83408e4998ed66268e39b916225c46e89b976cdb
+++ symbolstate.py a144283fbc3b059a8e34856c43012fe514eb7305
@@ -6,55 +6,55 @@ class MarkovScore:
class MarkovScore:
def __init__(self):
- self.scores = {}
- self.total = 0
+ self.scores = {}
+ self.total = 0
def add_score(self, token):
- self.scores.setdefault(token, 0)
- self.scores[token] += 1
- self.total += 1
+ self.scores.setdefault(token, 0)
+ self.scores[token] += 1
+ self.total += 1
def entropy(self):
- if not hasattr(self, 'h'):
- self.h = -1 * sum(map(lambda p: p * math.log(p, 2),
- map(lambda x: (self.scores[x] / float(self.total)), self.scores)))
- return self.h
+ if not hasattr(self, 'h'):
+ self.h = -1 * sum(map(lambda p: p * math.log(p, 2),
+ map(lambda x: (self.scores[x] / float(self.total)), self.scores)))
+ return self.h
class MarkovModel:
def __init__(self, size):
- self.total = 0
- self.size = size
- self.scores = {}
+ self.total = 0
+ self.size = size
+ self.scores = {}
def add(self, tokens, token):
- tokens = tuple(tokens)
- if len(tokens) != self.size:
- raise MarkovException("Token list is of incorrect size.")
- if not self.scores.has_key(tokens):
- self.scores[tokens] = MarkovScore()
- self.scores[tokens].add_score(token)
- self.total += 1
+ tokens = tuple(tokens)
+ if len(tokens) != self.size:
+ raise MarkovException("Token list is of incorrect size.")
+ if not self.scores.has_key(tokens):
+ self.scores[tokens] = MarkovScore()
+ self.scores[tokens].add_score(token)
+ self.total += 1
class SymbolState:
def __init__(self, corpus, chain_size=2):
- self.forward_markov = MarkovModel(chain_size)
- self.corpus = corpus
+ self.forward_markov = MarkovModel(chain_size)
+ self.corpus = corpus
def update(self):
- buffer = []
- for token in self.corpus:
- if token == None:
- continue
- if len(buffer) == self.forward_markov.size:
- self.forward_markov.add(buffer, token)
- buffer = buffer[1:]
- buffer.append(token)
+ buffer = []
+ for token in self.corpus:
+ if token == None:
+ continue
+ if len(buffer) == self.forward_markov.size:
+ self.forward_markov.add(buffer, token)
+ buffer = buffer[1:]
+ buffer.append(token)
def chunkable(self):
+ entropies = [self.forward_markov.scores[t].entropy() for t in self.forward_markov.scores]
+ mean_h = sum(entropies) / len(entropies)
+ sd_h = math.sqrt(sum([ pow(t - mean_h, 2) for t in entropies ]) / len(entropies))
+ cutoff = mean_h + 6 * sd_h # should really justify in some way other than 'it works'
+ rv = filter(lambda tokens: self.forward_markov.scores[tokens].entropy() > cutoff, self.forward_markov.scores)
+ return rv
- entropies = [self.forward_markov.scores[t].entropy() for t in self.forward_markov.scores]
- mean_h = sum(entropies) / len(entropies)
- sd_h = math.sqrt(sum([ pow(t - mean_h, 2) for t in entropies ]) / len(entropies))
- cutoff = mean_h + 8 * sd_h # should really justify in some way other than 'it works'
- return filter(lambda tokens: self.forward_markov.scores[tokens].entropy() > cutoff,
- self.forward_markov.scores)