The unified diff between revisions [1b076c97..] and [2235bcdf..] is displayed below. It can also be downloaded as a raw diff.

#
#
# patch "automate.cc"
#  from [68d579fc833b4a0ec52a0c8e5b130af7517ab7ed]
#    to [d86c347f85ee7ce7a59f699c8e51feaa1fa57bfb]
#
# patch "commands.cc"
#  from [fcdf6b54a899e0273098115c0cdf496f39edaaa1]
#    to [b3a8d32282111f7f405d1c250e9a22dddad24aab]
#
# patch "constants.cc"
#  from [c7bc2142cf0e9861c9fd744f458da6e8c7a0323e]
#    to [f6f6f11ced5f076aa9c362b2c11148af3a2d9556]
#
# patch "database.cc"
#  from [a4b5de4f4123d5b7729f5017e6a26d14d3df7b70]
#    to [a7f6d1a9f789180312488e66637df2832f11a4fb]
#
# patch "database.hh"
#  from [fa72d07116fc7a0b5744291d97ef3b1eff93ec8c]
#    to [c33e9d5d1f292ec396c695121530347a065aceaf]
#
# patch "packet.cc"
#  from [06b93709b49bc581a5274afc7762f503abf89dc2]
#    to [c7fa83e44be5244218995087ba98814c0548bbf4]
#
# patch "revision.cc"
#  from [8cda143f1b14ea5d67e40c9dc325b5ec6f2fd1a7]
#    to [77ddcf19bb049ae3ca74b008441bcaa7079232d5]
#
# patch "revision.hh"
#  from [5e3733994a6c92c4f15f0d78aa26669458443d46]
#    to [c6f7e1c8ac336bdec754f0fd1b822124e58eeb5f]
#
============================================================
--- automate.cc	68d579fc833b4a0ec52a0c8e5b130af7517ab7ed
+++ automate.cc	d86c347f85ee7ce7a59f699c8e51feaa1fa57bfb
@@ -283,7 +283,7 @@ automate_toposort(std::vector<utf8> args
       revs.insert(rid);
     }
   std::vector<revision_id> sorted;
-  toposort(revs, sorted, app);
+  toposort(revs, sorted, app.db);
   for (std::vector<revision_id>::const_iterator i = sorted.begin();
        i != sorted.end(); ++i)
     output << (*i).inner()() << std::endl;
@@ -329,7 +329,7 @@ automate_ancestry_difference(std::vector
   ancestry_difference(a, bs, ancestors, app);

   std::vector<revision_id> sorted;
-  toposort(ancestors, sorted, app);
+  toposort(ancestors, sorted, app.db);
   for (std::vector<revision_id>::const_iterator i = sorted.begin();
        i != sorted.end(); ++i)
     output << (*i).inner()() << std::endl;
============================================================
--- commands.cc	fcdf6b54a899e0273098115c0cdf496f39edaaa1
+++ commands.cc	b3a8d32282111f7f405d1c250e9a22dddad24aab
@@ -2191,6 +2191,10 @@ CMD(db, N_("database"),
         build_changesets_from_manifest_ancestry(app);
       else if (idx(args, 0)() == "rosterify")
         build_roster_style_revs_from_manifest_style_revs(app);
+      else if (idx(args, 0)() == "against_base")
+        {
+          app.db.make_all_fwd();
+        }
       else
         throw usage(name);
     }
============================================================
--- constants.cc	c7bc2142cf0e9861c9fd744f458da6e8c7a0323e
+++ constants.cc	f6f6f11ced5f076aa9c362b2c11148af3a2d9556
@@ -35,7 +35,7 @@ namespace constants
   // the value of 7 MB was determined as the optimal point after timing
   // various values with a pull of the monotone repository - it could
   // be tweaked further.
-  size_t const db_version_cache_sz = 7 * (1 << 20);
+  size_t const db_version_cache_sz = 200 * (1 << 20);

   // size of a line of text in the log buffer, beyond which log lines will be
   // truncated.
============================================================
--- database.cc	a4b5de4f4123d5b7729f5017e6a26d14d3df7b70
+++ database.cc	a7f6d1a9f789180312488e66637df2832f11a4fb
@@ -37,6 +37,7 @@
 #include "vocab.hh"
 #include "xdelta.hh"
 #include "epoch.hh"
+#include "revision.hh"

 // defined in schema.sql, converted to header:
 #include "schema.h"
@@ -777,6 +778,14 @@ database::exists(hexenc<id> const & iden
   results res;
   query q("SELECT id FROM " + table + " WHERE id = ?");
   fetch(res, one_col, any_rows, q % text(ident()));
+  if (res.size() > 1)
+    {
+      for (results::const_iterator i = res.begin();
+           i != res.end(); i++)
+        {
+          L(FL("%s") % (*i)[0]);
+        }
+    }
   I((res.size() == 1) || (res.size() == 0));
   return res.size() == 1;
 }
@@ -933,7 +942,7 @@ struct version_cache
           }
         I(i != cache.end());
         I(use >= i->second().size());
-        L(FL("version cache expiring %s\n") % i->first);
+        //L(FL("version cache expiring %s\n") % i->first);
         use -= i->second().size();
         cache.erase(i->first);
       }
@@ -955,7 +964,7 @@ struct version_cache
     if (i == cache.end())
       return false;
     // ++cache_hits;
-    L(FL("version cache hit on %s\n") % ident);
+    //L(FL("version cache hit on %s\n") % ident);
     dat = i->second;
     return true;
   }
@@ -1161,30 +1170,61 @@ database::drop(hexenc<id> const & ident,
   execute(query(drop) % text(ident()));
 }

+// insert the given new data using old_id as a hint
+// as to ancestry
 void
 database::put_version(hexenc<id> const & old_id,
                       hexenc<id> const & new_id,
-                      delta const & del,
+                      data const & new_dat,
                       string const & data_table,
                       string const & delta_table)
 {
-
-  data old_data, new_data;
-  delta reverse_delta;
-
-  get_version(old_id, old_data, data_table, delta_table);
-  patch(old_data, del, new_data);
-  diff(new_data, old_data, reverse_delta);
-
   transaction_guard guard(*this);
+
+  if (exists(new_id, data_table) || exists(new_id, delta_table))
+      return;
+
+  hexenc<id> base_id;
+  MM(base_id);
   if (exists(old_id, data_table))
     {
-      // descendent of a head version replaces the head, therefore old head
-      // must be disposed of
-      drop(old_id, data_table);
+      base_id = old_id;
     }
-  put(new_id, new_data, data_table);
-  put_delta(old_id, new_id, reverse_delta, delta_table);
+  else
+    {
+      // XXX: this relies on single step deltas, should probably be something
+      // more like get_version()'s path following.
+      string delta_query = "SELECT base FROM " + delta_table + " WHERE id = ?";
+      results res;
+      fetch(res, one_col, any_rows, query(delta_query) % text(old_id()));
+      I(res.size() != 0);
+
+      base_id = hexenc<id>(res[0][0]);
+      I(exists(base_id, data_table));
+    }
+
+  data base_dat;
+  get(base_id, base_dat, data_table);
+  delta del;
+  diff(base_dat, new_dat, del);
+
+  static ticker full("full", "f", 1);
+  static ticker against("ag", "g", 1);
+
+  // TODO: size comparison stuff.
+
+  if (del().size() < 0.15 * new_dat().size())
+    {
+      ++against;
+      L(FL("put_version del %s -> %s (%s)") % base_id % new_id % delta_table);
+      put_delta(new_id, base_id, del, delta_table);
+    }
+  else
+    {
+      ++full;
+      L(FL("put_version dat %s (%s)") % new_id % data_table);
+      put(new_id, new_dat, data_table);
+    }
   guard.commit();
 }

@@ -1193,6 +1233,7 @@ database::remove_version(hexenc<id> cons
                          string const & data_table,
                          string const & delta_table)
 {
+  E(false, F("needs updating for against-base"));
   // We have a one of two cases (for multiple 'older' nodes):
   //
   //    1.  pre:        older <- target <- newer
@@ -1384,9 +1425,9 @@ database::put_file_version(file_id const
 void
 database::put_file_version(file_id const & old_id,
                            file_id const & new_id,
-                           file_delta const & del)
+                           file_data const & dat)
 {
-  put_version(old_id.inner(), new_id.inner(), del.inner(),
+  put_version(old_id.inner(), new_id.inner(), dat.inner(),
               "files", "file_deltas");
 }

@@ -1473,44 +1514,126 @@ void
 }

 void
-database::deltify_revision(revision_id const & rid)
+database::make_all_fwd()
 {
   transaction_guard guard(*this);
-  revision_set rev;
-  MM(rev);
-  MM(rid);
-  get_revision(rid, rev);
-  // Make sure that all parent revs have their files replaced with deltas
-  // from this rev's files.
-  {
-    for (edge_map::const_iterator i = rev.edges.begin();
-         i != rev.edges.end(); ++i)
-      {
-        for (std::map<split_path, std::pair<file_id, file_id> >::const_iterator
-               j = edge_changes(i).deltas_applied.begin();
-             j != edge_changes(i).deltas_applied.end(); ++j)
-          {
-            if (exists(delta_entry_src(j).inner(), "files") &&
-                file_version_exists(delta_entry_dst(j)))
-              {
-                file_data old_data;
-                file_data new_data;
-                get_file_version(delta_entry_src(j), old_data);
-                get_file_version(delta_entry_dst(j), new_data);
-                delta delt;
-                diff(old_data.inner(), new_data.inner(), delt);
-                file_delta del(delt);
-                drop(delta_entry_dst(j).inner(), "files");
-                drop(delta_entry_dst(j).inner(), "file_deltas");
-                put_file_version(delta_entry_src(j), delta_entry_dst(j), del);
-              }
-          }
-      }
-  }
+
+  // create some empty temporary tables
+  string tmp_file_data("tmp_files");
+  string tmp_file_deltas("tmp_file_deltas");
+  string tmp_roster_data("tmp_rosters");
+  string tmp_roster_deltas("tmp_roster_deltas");
+  execute(query("CREATE TABLE tmp_files AS SELECT * FROM files WHERE 1=0"));
+  execute(query("CREATE TABLE tmp_file_deltas AS SELECT * FROM file_deltas WHERE 1=0"));
+  execute(query("CREATE TABLE tmp_rosters AS SELECT * FROM rosters WHERE 1=0"));
+  execute(query("CREATE TABLE tmp_roster_deltas AS SELECT * FROM roster_deltas WHERE 1=0"));
+
+  vector<revision_id> sorted;
+  toposort(sorted, *this);
+
+  ticker revs("rev", "r", 1);
+  for (vector<revision_id>::const_iterator i = sorted.begin();
+       i != sorted.end(); i++)
+    {
+      revision_id r(*i);
+      MM(r);
+      if (null_id(r))
+        continue;
+      ++revs;
+
+      revision_set rs;
+      get_revision(r, rs);
+      hexenc<id> new_rost_id;
+      MM(new_rost_id);
+      get_roster_id_for_revision(r, new_rost_id);
+
+      for (edge_map::const_iterator i = rs.edges.begin();
+           i != rs.edges.end(); ++i)
+        {
+
+          // rosters
+          if (!(exists(new_rost_id, tmp_roster_data)
+              || exists(new_rost_id, tmp_roster_deltas)))
+            {
+              data new_rost_dat;
+              get_roster(new_rost_id, new_rost_dat);
+
+              revision_id old_rev = edge_old_revision(i);
+              if (null_id(old_rev))
+                {
+                  L(FL("put roster %s") % new_rost_id);
+                  put(new_rost_id, new_rost_dat, tmp_roster_data);
+                }
+              else
+                {
+                  hexenc<id> old_rost_id;
+                  MM(old_rost_id);
+                  get_roster_id_for_revision(old_rev, old_rost_id);
+                  put_version(old_rost_id, new_rost_id, new_rost_dat,
+                              tmp_roster_data, tmp_roster_deltas);
+                }
+            }
+
+
+          // the file data.
+          // we'll be slack about the same add on multiple sides,
+          // put() and put_version() can NOP those out.
+          cset const & cs = edge_changes(i);
+          MM(cs);
+          // new additions
+          for (map<split_path, file_id>::const_iterator fa = cs.files_added.begin();
+               fa != cs.files_added.end(); ++fa)
+            {
+              if (exists(fa->second.inner(), tmp_file_data)
+                  || exists(fa->second.inner(), tmp_file_deltas))
+                continue;
+
+              file_data fdat;
+              get_file_version(fa->second, fdat);
+              L(FL("put file %s") % fa->second.inner());
+              put(fa->second.inner(), fdat.inner(), tmp_file_data);
+            }
+
+          // deltas
+          for (map<split_path, std::pair<file_id, file_id> >::const_iterator fd
+                 = cs.deltas_applied.begin();
+               fd != cs.deltas_applied.end(); ++fd)
+            {
+              file_id src(fd->second.first);
+              file_id dst(fd->second.second);
+              MM(src);
+              MM(dst);
+              file_data src_dat, dst_dat;
+              get_file_version(src, src_dat);
+              get_file_version(dst, dst_dat);
+
+              delta del;
+              diff(src_dat.inner(), dst_dat.inner(), del);
+
+              put_version(src.inner(), dst.inner(), dst_dat.inner(),
+                          tmp_file_data, tmp_file_deltas);
+            }
+        }
+    }
+
+  execute(query("DELETE FROM files"));
+  execute(query("DELETE FROM file_deltas"));
+  execute(query("DELETE FROM rosters"));
+  execute(query("DELETE FROM roster_deltas"));
+
+  execute(query("INSERT INTO files SELECT * FROM " + tmp_file_data));
+  execute(query("INSERT INTO file_deltas SELECT * FROM " + tmp_file_deltas));
+  execute(query("INSERT INTO rosters SELECT * FROM " + tmp_roster_data));
+  execute(query("INSERT INTO roster_deltas SELECT * FROM " + tmp_roster_deltas));
+
+  execute(query("DROP TABLE " + tmp_file_data));
+  execute(query("DROP TABLE " + tmp_file_deltas));
+  execute(query("DROP TABLE " + tmp_roster_data));
+  execute(query("DROP TABLE " + tmp_roster_deltas));
+
   guard.commit();
 }

-
 void
 database::put_revision(revision_id const & new_id,
                        revision_set const & rev)
@@ -1565,8 +1688,6 @@ database::put_revision(revision_id const
               % text(new_id.inner()()));
     }

-  deltify_revision(new_id);
-
   // Phase 4: write the roster data and commit
   put_roster(new_id, ros, mm);

@@ -2652,8 +2773,7 @@ database::put_roster(revision_id const &
                      marking_map & marks)
 {
   MM(rev_id);
-  data old_data, new_data;
-  delta reverse_delta;
+  data new_data;
   hexenc<id> old_id, new_id;

   write_roster_and_marking(roster, marks, new_data);
@@ -2680,30 +2800,29 @@ database::put_roster(revision_id const &
     }

   // Else we have a new roster the database hasn't seen yet; our task is to
-  // add it, and deltify all the incoming edges (if they aren't already).
+  // add it.

-  put(new_id, new_data, data_table);
-
   std::set<revision_id> parents;
   get_revision_parents(rev_id, parents);

-  // Now do what deltify would do if we bothered (we have the
-  // roster written now, so might as well do it here).
+  // we need to make a delta, any parent will do
+  bool written = false;
   for (std::set<revision_id>::const_iterator i = parents.begin();
        i != parents.end(); ++i)
     {
       if (null_id(*i))
         continue;
-      revision_id old_rev = *i;
-      get_roster_id_for_revision(old_rev, old_id);
-      if (exists(new_id, data_table))
-        {
-          get_version(old_id, old_data, data_table, delta_table);
-          diff(new_data, old_data, reverse_delta);
-          drop(old_id, data_table);
-          put_delta(old_id, new_id, reverse_delta, delta_table);
-        }
+      get_roster_id_for_revision(*i, old_id);
+      put_version(old_id, new_id, new_data, data_table, delta_table);
+      written = true;
+      break;
     }
+
+  if (!written)
+    {
+      put(new_id, new_data, data_table);
+    }
+
   guard.commit();
 }

@@ -2897,11 +3016,20 @@ transaction_guard::maybe_checkpoint(size
 void
 transaction_guard::maybe_checkpoint(size_t nbytes)
 {
+  static ticker size("siz", "s", 1);
+  static ticker bytes("byt", "b", 1);
   checkpointed_calls += 1;
   checkpointed_bytes += nbytes;
   if (checkpointed_calls >= checkpoint_batch_size
       || checkpointed_bytes >= checkpoint_batch_bytes)
-    do_checkpoint();
+    {
+      if (checkpointed_calls >= checkpoint_batch_size)
+        ++size;
+      if (checkpointed_bytes >= checkpoint_batch_bytes)
+        ++bytes;
+
+      do_checkpoint();
+    }
 }

 void
============================================================
--- database.hh	fa72d07116fc7a0b5744291d97ef3b1eff93ec8c
+++ database.hh	c33e9d5d1f292ec396c695121530347a065aceaf
@@ -140,7 +140,7 @@ class database
                  std::string const & table);
   void put_version(hexenc<id> const & old_id,
                    hexenc<id> const & new_id,
-                   delta const & del,
+                   data const & new_dat,
                    std::string const & data_table,
                    std::string const & delta_table);
   void remove_version(hexenc<id> const & target_id,
@@ -238,10 +238,10 @@ public:
   void put_file(file_id const & new_id,
                 file_data const & dat);

-  // store new version and update old version to be a delta
+  // store delta to new data. can be called
   void put_file_version(file_id const & old_id,
                         file_id const & new_id,
-                        file_delta const & del);
+                        file_data const & dat);

   // get plain version if it exists, or reconstruct version
   // from deltas (if they exist).
@@ -259,7 +259,7 @@ public:
   void get_revision_manifest(revision_id const & cid,
                              manifest_id & mid);

-  void deltify_revision(revision_id const & rid);
+  void make_all_fwd();

   void get_revision(revision_id const & id,
                    revision_set & cs);
============================================================
--- packet.cc	06b93709b49bc581a5274afc7762f503abf89dc2
+++ packet.cc	c7fa83e44be5244218995087ba98814c0548bbf4
@@ -103,7 +103,7 @@ packet_db_writer::consume_file_delta(fil
   patch(old_dat.inner(), del.inner(), new_dat);
   calculate_ident(file_data(new_dat), confirm);
   if (confirm == new_id)
-    app.db.put_file_version(old_id, new_id, del);
+    app.db.put_file_version(old_id, new_id, new_dat);
   else
     {
       W(F("reconstructed file from delta '%s' -> '%s' has wrong id '%s'\n")
============================================================
--- revision.cc	8cda143f1b14ea5d67e40c9dc325b5ec6f2fd1a7
+++ revision.cc	77ddcf19bb049ae3ca74b008441bcaa7079232d5
@@ -361,17 +361,15 @@ void
 // passed in set.  if anyone ever needs to toposort the whole graph, then,
 // this function would be a good thing to generalize...
 void
-toposort(std::set<revision_id> const & revisions,
-         std::vector<revision_id> & sorted,
-         app_state & app)
+toposort(std::vector<revision_id> & sorted, database & db)
 {
   sorted.clear();
   typedef std::multimap<revision_id, revision_id>::iterator gi;
   typedef std::map<revision_id, int>::iterator pi;
   std::multimap<revision_id, revision_id> graph;
-  app.db.get_revision_ancestry(graph);
+  db.get_revision_ancestry(graph);
   std::set<revision_id> leaves;
-  app.db.get_revision_ids(leaves);
+  db.get_revision_ids(leaves);
   std::map<revision_id, int> pcount;
   for (gi i = graph.begin(); i != graph.end(); ++i)
     pcount.insert(std::make_pair(i->first, 0));
@@ -387,8 +385,7 @@ toposort(std::set<revision_id> const & r
       // now stick them in our ordering (if wanted) and remove them from the
       // graph, calculating the new roots as we go
       L(FL("new root: %s\n") % (roots.front()));
-      if (revisions.find(roots.front()) != revisions.end())
-        sorted.push_back(roots.front());
+      sorted.push_back(roots.front());
       for(gi i = graph.lower_bound(roots.front());
           i != graph.upper_bound(roots.front()); i++)
         if(--(pcount[i->second]) == 0)
@@ -402,6 +399,21 @@ toposort(std::set<revision_id> const & r
        i != leaves.end(); ++i)
     {
       L(FL("new leaf: %s\n") % (*i));
+      sorted.push_back(*i);
+    }
+}
+
+void
+toposort(std::set<revision_id> const & revisions,
+         std::vector<revision_id> & sorted,
+         database & db)
+{
+  std::vector<revision_id> all;
+  toposort(all, db);
+  sorted.clear();
+  for (std::vector<revision_id>::const_iterator i = all.begin();
+       i != all.end(); i++)
+    {
       if (revisions.find(*i) != revisions.end())
         sorted.push_back(*i);
     }
============================================================
--- revision.hh	5e3733994a6c92c4f15f0d78aa26669458443d46
+++ revision.hh	c6f7e1c8ac336bdec754f0fd1b822124e58eeb5f
@@ -125,9 +125,13 @@ toposort(std::set<revision_id> const & r
 void
 toposort(std::set<revision_id> const & revisions,
          std::vector<revision_id> & sorted,
-         app_state & app);
+         database & db);

 void
+toposort(std::vector<revision_id> & sorted,
+         database & db);
+
+void
 erase_ancestors(std::set<revision_id> & revisions, app_state & app);

 void