The unified diff between revisions [fddcb91e..] and [9b09f962..] is displayed below. It can also be downloaded as a raw diff.

#
#
# add_file "README"
#  content [57b961a0bca3c442cad3389fa39e2130f4377169]
#
# add_file "basic_io.py"
#  content [db6995dfe6a24326d30e960bf425a30cb256d022]
#
# patch "on_the_fly.py"
#  from [f15c421520b003282b811db3c3d00bc5d99d2965]
#    to [92f24cea06bdafa8ca0793431548d6bbee0fee36]
#
============================================================
--- README	57b961a0bca3c442cad3389fa39e2130f4377169
+++ README	57b961a0bca3c442cad3389fa39e2130f4377169
@@ -0,0 +1,21 @@
+These scripts are designed for the Monotone Delta (or other) Storage
+Strategy Shootout (see
+http://venge.net/monotone/wiki/DeltaStorageStrategies/ShootOut )
+
+mkpristine.py
+    Is designed to create a set of PRISTINE-N.db databases as
+    described on the wiki. The set of databases will use multiple
+    gigabytes of space with a realistic n.v.m scenario, so is a bit
+    unwieldy.
+
+on_the_fly.py
+    Is a newer approach, of simply adding certs to the top of the tree
+    and working downwards, netsyncing every N revs. For each existing
+    branch, it is prefixed with "shootout-", ie
+    "shootout-net.venge.monotone". This allows for timing checkouts
+    etc with just those branches.
+
+    Usage:
+    ./on_the_fly.py <db> <revs-per-sync> <serve binary> <pull binary> [seed]
+
+    Seed is optional, designed for repeatibility.
============================================================
--- basic_io.py	db6995dfe6a24326d30e960bf425a30cb256d022
+++ basic_io.py	db6995dfe6a24326d30e960bf425a30cb256d022
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+# taken from Grahame Bowland's viewmtn, rev
+# a4c0903d8e67b9cfb14d116024c8eaefd058de18 2006-01-06T01:17:45
+
+import re
+
+basic_io_hex_re = re.compile(r'^ *(\S+) (\[[0-9A-Fa-f]*\])$')
+basic_io_string_re = re.compile(r'^ *(\S+) (\".*)$')
+
+
+def basic_io_parser(data):
+    """returns a list of lists of (key, value) tuples.  hashes are returned with []s around
+    them; strings are returned raw."""
+    def unescape_string_value(str):
+        rv = ""
+        is_terminated = False
+        in_escape = False
+        if str[0] != '"':
+            raise Exception("basic_io parse error; not a string.")
+        for c in str[1:]:
+            if in_escape:
+                if c != '\\' and c != '\"':
+                    raise Exception(r'basic_io parse error; expected \" or \\')
+                rv += c
+                in_escape = False
+            else:
+                if c == '\\':
+                    in_escape = True
+                elif c == '"':
+                    if is_terminated:
+                        raise Exception("basic_io parse error; string ends twice!")
+                    is_terminated = True
+                else:
+                    rv += c
+        return is_terminated, rv
+
+    # 14:46 < tbrownaw> list<multimap<string, array<string>>>, with the outer list divided according to
+    #                   what item starts a stanza?
+
+    rv = {}
+
+    stanza = []
+    ongoing_string = None
+
+    for line in data.split('\n'):
+        if ongoing_string != None:
+            ongoing_string += '\n' + line
+            is_terminated, e_value = unescape_string_value(ongoing_string)
+            if is_terminated:
+                stanza += [key, e_value]
+                ongoing_string = None
+            continue
+
+        if line == '' and len(stanza) != 0:
+            rv.setdefault(stanza[0], []).append(stanza)
+            stanza = []
+            continue
+
+        m = basic_io_hex_re.match(line)
+        if m:
+            key, value = m.groups()
+            stanza += [key, value[1:-1]]
+            continue
+
+        m = basic_io_string_re.match(line)
+        if m:
+            key, value = m.groups()
+            is_terminated, e_value = unescape_string_value(value)
+            if not is_terminated: ongoing_string = value
+            else: stanza += [key, e_value]
+            continue
+    return rv
============================================================
--- on_the_fly.py	f15c421520b003282b811db3c3d00bc5d99d2965
+++ on_the_fly.py	92f24cea06bdafa8ca0793431548d6bbee0fee36
@@ -11,7 +11,9 @@ from time import sleep
 from signal import SIGTERM, SIGKILL
 from time import sleep

-TEST_BRANCH = 'benchmark-test-branch'
+from basic_io import basic_io_parser
+
+BRANCH_PREFIX = 'shootout-'
 SERVE_DB = 'serve.db'
 PULL_DB = 'pull.db'
 PID_FILE = 'serve.pid'
@@ -19,7 +21,7 @@ def usage():
 KEY="shootout@example.com"

 def usage():
-    print>>stderr, "Usage: %s <db> <revs per netsync> <mto-head binary> <testing binary> [optional seed]" % argv[0]
+    print>>stderr, "Usage: %s <db> <revs per netsync> <serve binary> <pull binary> [optional seed]" % argv[0]
     print>>stderr

 def pairwise(l):
@@ -79,10 +81,6 @@ def get_num_revs(db):
     m = monotone(db, ['auto', 'select', ''])
     return len( m )

-def remove_all_branch_certs(db):
-
-    monotone(db, ['db', 'execute', "delete from revision_certs where name = 'branch'"])
-
 def get_rev_graph(db):

     """
@@ -95,6 +93,7 @@ def get_rev_graph(db):

     graph = {}
     roots = []
+    parent_deps = {} # number of parents of each child

     lines = [ l.strip().split() for l in m ]
     for t in lines:
@@ -103,10 +102,12 @@ def get_rev_graph(db):
         if not len(pars):
             roots.append(rev)

+        parent_deps[rev] = len(pars)
+
         for p in pars:
             graph.setdefault(p, []).append(rev)

-    return (graph, roots)
+    return graph, roots, parent_deps

 def remove_one_rev(db):

@@ -120,19 +121,52 @@ def remove_one_rev(db):
     to_kill = choice(leaves)
     monotone(db, ['db', 'kill_rev_locally', to_kill])

-def cert_leaf_rev(db, graph, frontier):
+def parents_done(child, deps):
+    return deps[child] == 0

-    rev = frontier.pop(randint(0,len(frontier)-1))
+def get_branch_certs(db, rev):
+
+    m = monotone(db, "automate certs %s" % rev)
+    b = basic_io_parser( ' '.join(m) )
+
+    # this is an awful hack.
+    tokens = b['key'][0]
+    assert(len(tokens) % 10 == 0)
+
+    branches = []
+
+    while len(tokens):
+        assert tokens[4] == 'name'
+        assert tokens[6] == 'value'
+        if tokens[5] == 'branch':
+            branches.append(tokens[7])
+        tokens[:10] = []
+
+    return branches
+
+def cert_leaf_rev(db, graph, frontier, parent_deps):
+
+    """
+    Picks a rev from the frontier, and for any branch certs
+    adds another branch cert prefixed with BRANCH_PREFIX
+    """
+
     assert len(frontier)
-    monotone(db, "cert %s branch %s" % (rev, TEST_BRANCH))
+    rev = frontier.pop(randint(0,len(frontier)-1))
+    branches = get_branch_certs(db, rev)
+    for b in branches:
+        monotone(db, "cert %s branch %s%s" % (rev, BRANCH_PREFIX, b))
     if rev in graph:
-        frontier += graph[rev]
+        for child in graph[rev]:
+            parent_deps[child] -= 1
+            if parents_done(child, parent_deps):
+                frontier.append(child)
         del graph[rev]

-def cert_leaf_revs(db, graph, frontier, revs_per_ns):
+def cert_leaf_revs(db, graph, frontier, parent_deps, revs_per_ns):

     for i in range(revs_per_ns):
-        cert_leaf_rev(db, graph, frontier)
+        cert_leaf_rev(db, graph, frontier, parent_deps)

 def serve(db, branch):

@@ -150,7 +184,7 @@ def serve(db, branch):
             pass

         try:
-            monotone(db, 'serve --pid-file=%s %s' % (PID_FILE, TEST_BRANCH))
+            monotone(db, 'serve --pid-file=%s %s*' % (PID_FILE, BRANCH_PREFIX))
         except SystemExit, e:
             # try and cleanup
             try:
@@ -163,13 +197,13 @@ def pull(db, branch):

 def pull(db, branch):

-    monotone(db, 'pull localhost %s' % TEST_BRANCH)
+    monotone(db, 'pull localhost %s*' % BRANCH_PREFIX)

 def serve_pull(serve_db, pull_db):

-    pid = serve(serve_db, TEST_BRANCH)
+    pid = serve(serve_db, BRANCH_PREFIX)

-    pull(pull_db, TEST_BRANCH)
+    pull(pull_db, BRANCH_PREFIX)

     os.kill(pid, SIGKILL)

@@ -192,18 +226,14 @@ def main():

     fresh_db(PULL_DB)

-    remove_all_branch_certs(SERVE_DB)
-
     num_to_go = get_num_revs(SERVE_DB)

-    graph, roots = get_rev_graph(SERVE_DB)
+    graph, roots, parent_deps = get_rev_graph(SERVE_DB)

     while num_to_go > 0:

-        print '\r%d' % num_to_go
+        cert_leaf_revs(SERVE_DB, graph, roots, parent_deps, revs_per_ns)

-        cert_leaf_revs(SERVE_DB, graph, roots, revs_per_ns)
-
         serve_pull(SERVE_DB, PULL_DB)

         num_to_go -= revs_per_ns