The unified diff between revisions [fddcb91e..] and [9b09f962..] is displayed below. It can also be downloaded as a raw diff.
#
#
# add_file "README"
# content [57b961a0bca3c442cad3389fa39e2130f4377169]
#
# add_file "basic_io.py"
# content [db6995dfe6a24326d30e960bf425a30cb256d022]
#
# patch "on_the_fly.py"
# from [f15c421520b003282b811db3c3d00bc5d99d2965]
# to [92f24cea06bdafa8ca0793431548d6bbee0fee36]
#
============================================================
--- README 57b961a0bca3c442cad3389fa39e2130f4377169
+++ README 57b961a0bca3c442cad3389fa39e2130f4377169
@@ -0,0 +1,21 @@
+These scripts are designed for the Monotone Delta (or other) Storage
+Strategy Shootout (see
+http://venge.net/monotone/wiki/DeltaStorageStrategies/ShootOut )
+
+mkpristine.py
+ Is designed to create a set of PRISTINE-N.db databases as
+ described on the wiki. The set of databases will use multiple
+ gigabytes of space with a realistic n.v.m scenario, so is a bit
+ unwieldy.
+
+on_the_fly.py
+ Is a newer approach, of simply adding certs to the top of the tree
+ and working downwards, netsyncing every N revs. For each existing
+ branch, it is prefixed with "shootout-", ie
+ "shootout-net.venge.monotone". This allows for timing checkouts
+ etc with just those branches.
+
+ Usage:
+ ./on_the_fly.py <db> <revs-per-sync> <serve binary> <pull binary> [seed]
+
+ Seed is optional, designed for repeatibility.
============================================================
--- basic_io.py db6995dfe6a24326d30e960bf425a30cb256d022
+++ basic_io.py db6995dfe6a24326d30e960bf425a30cb256d022
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+# taken from Grahame Bowland's viewmtn, rev
+# a4c0903d8e67b9cfb14d116024c8eaefd058de18 2006-01-06T01:17:45
+
+import re
+
+basic_io_hex_re = re.compile(r'^ *(\S+) (\[[0-9A-Fa-f]*\])$')
+basic_io_string_re = re.compile(r'^ *(\S+) (\".*)$')
+
+
+def basic_io_parser(data):
+ """returns a list of lists of (key, value) tuples. hashes are returned with []s around
+ them; strings are returned raw."""
+ def unescape_string_value(str):
+ rv = ""
+ is_terminated = False
+ in_escape = False
+ if str[0] != '"':
+ raise Exception("basic_io parse error; not a string.")
+ for c in str[1:]:
+ if in_escape:
+ if c != '\\' and c != '\"':
+ raise Exception(r'basic_io parse error; expected \" or \\')
+ rv += c
+ in_escape = False
+ else:
+ if c == '\\':
+ in_escape = True
+ elif c == '"':
+ if is_terminated:
+ raise Exception("basic_io parse error; string ends twice!")
+ is_terminated = True
+ else:
+ rv += c
+ return is_terminated, rv
+
+ # 14:46 < tbrownaw> list<multimap<string, array<string>>>, with the outer list divided according to
+ # what item starts a stanza?
+
+ rv = {}
+
+ stanza = []
+ ongoing_string = None
+
+ for line in data.split('\n'):
+ if ongoing_string != None:
+ ongoing_string += '\n' + line
+ is_terminated, e_value = unescape_string_value(ongoing_string)
+ if is_terminated:
+ stanza += [key, e_value]
+ ongoing_string = None
+ continue
+
+ if line == '' and len(stanza) != 0:
+ rv.setdefault(stanza[0], []).append(stanza)
+ stanza = []
+ continue
+
+ m = basic_io_hex_re.match(line)
+ if m:
+ key, value = m.groups()
+ stanza += [key, value[1:-1]]
+ continue
+
+ m = basic_io_string_re.match(line)
+ if m:
+ key, value = m.groups()
+ is_terminated, e_value = unescape_string_value(value)
+ if not is_terminated: ongoing_string = value
+ else: stanza += [key, e_value]
+ continue
+ return rv
============================================================
--- on_the_fly.py f15c421520b003282b811db3c3d00bc5d99d2965
+++ on_the_fly.py 92f24cea06bdafa8ca0793431548d6bbee0fee36
@@ -11,7 +11,9 @@ from time import sleep
from signal import SIGTERM, SIGKILL
from time import sleep
-TEST_BRANCH = 'benchmark-test-branch'
+from basic_io import basic_io_parser
+
+BRANCH_PREFIX = 'shootout-'
SERVE_DB = 'serve.db'
PULL_DB = 'pull.db'
PID_FILE = 'serve.pid'
@@ -19,7 +21,7 @@ def usage():
KEY="shootout@example.com"
def usage():
- print>>stderr, "Usage: %s <db> <revs per netsync> <mto-head binary> <testing binary> [optional seed]" % argv[0]
+ print>>stderr, "Usage: %s <db> <revs per netsync> <serve binary> <pull binary> [optional seed]" % argv[0]
print>>stderr
def pairwise(l):
@@ -79,10 +81,6 @@ def get_num_revs(db):
m = monotone(db, ['auto', 'select', ''])
return len( m )
-def remove_all_branch_certs(db):
-
- monotone(db, ['db', 'execute', "delete from revision_certs where name = 'branch'"])
-
def get_rev_graph(db):
"""
@@ -95,6 +93,7 @@ def get_rev_graph(db):
graph = {}
roots = []
+ parent_deps = {} # number of parents of each child
lines = [ l.strip().split() for l in m ]
for t in lines:
@@ -103,10 +102,12 @@ def get_rev_graph(db):
if not len(pars):
roots.append(rev)
+ parent_deps[rev] = len(pars)
+
for p in pars:
graph.setdefault(p, []).append(rev)
- return (graph, roots)
+ return graph, roots, parent_deps
def remove_one_rev(db):
@@ -120,19 +121,52 @@ def remove_one_rev(db):
to_kill = choice(leaves)
monotone(db, ['db', 'kill_rev_locally', to_kill])
-def cert_leaf_rev(db, graph, frontier):
+def parents_done(child, deps):
+ return deps[child] == 0
- rev = frontier.pop(randint(0,len(frontier)-1))
+def get_branch_certs(db, rev):
+
+ m = monotone(db, "automate certs %s" % rev)
+ b = basic_io_parser( ' '.join(m) )
+
+ # this is an awful hack.
+ tokens = b['key'][0]
+ assert(len(tokens) % 10 == 0)
+
+ branches = []
+
+ while len(tokens):
+ assert tokens[4] == 'name'
+ assert tokens[6] == 'value'
+ if tokens[5] == 'branch':
+ branches.append(tokens[7])
+ tokens[:10] = []
+
+ return branches
+
+def cert_leaf_rev(db, graph, frontier, parent_deps):
+
+ """
+ Picks a rev from the frontier, and for any branch certs
+ adds another branch cert prefixed with BRANCH_PREFIX
+ """
+
assert len(frontier)
- monotone(db, "cert %s branch %s" % (rev, TEST_BRANCH))
+ rev = frontier.pop(randint(0,len(frontier)-1))
+ branches = get_branch_certs(db, rev)
+ for b in branches:
+ monotone(db, "cert %s branch %s%s" % (rev, BRANCH_PREFIX, b))
if rev in graph:
- frontier += graph[rev]
+ for child in graph[rev]:
+ parent_deps[child] -= 1
+ if parents_done(child, parent_deps):
+ frontier.append(child)
del graph[rev]
-def cert_leaf_revs(db, graph, frontier, revs_per_ns):
+def cert_leaf_revs(db, graph, frontier, parent_deps, revs_per_ns):
for i in range(revs_per_ns):
- cert_leaf_rev(db, graph, frontier)
+ cert_leaf_rev(db, graph, frontier, parent_deps)
def serve(db, branch):
@@ -150,7 +184,7 @@ def serve(db, branch):
pass
try:
- monotone(db, 'serve --pid-file=%s %s' % (PID_FILE, TEST_BRANCH))
+ monotone(db, 'serve --pid-file=%s %s*' % (PID_FILE, BRANCH_PREFIX))
except SystemExit, e:
# try and cleanup
try:
@@ -163,13 +197,13 @@ def pull(db, branch):
def pull(db, branch):
- monotone(db, 'pull localhost %s' % TEST_BRANCH)
+ monotone(db, 'pull localhost %s*' % BRANCH_PREFIX)
def serve_pull(serve_db, pull_db):
- pid = serve(serve_db, TEST_BRANCH)
+ pid = serve(serve_db, BRANCH_PREFIX)
- pull(pull_db, TEST_BRANCH)
+ pull(pull_db, BRANCH_PREFIX)
os.kill(pid, SIGKILL)
@@ -192,18 +226,14 @@ def main():
fresh_db(PULL_DB)
- remove_all_branch_certs(SERVE_DB)
-
num_to_go = get_num_revs(SERVE_DB)
- graph, roots = get_rev_graph(SERVE_DB)
+ graph, roots, parent_deps = get_rev_graph(SERVE_DB)
while num_to_go > 0:
- print '\r%d' % num_to_go
+ cert_leaf_revs(SERVE_DB, graph, roots, parent_deps, revs_per_ns)
- cert_leaf_revs(SERVE_DB, graph, roots, revs_per_ns)
-
serve_pull(SERVE_DB, PULL_DB)
num_to_go -= revs_per_ns