The unified diff between revisions [b395e92f..] and [dd2f27b8..] is displayed below. It can also be downloaded as a raw diff.

#
#
# patch "automate.cc"
#  from [68d579fc833b4a0ec52a0c8e5b130af7517ab7ed]
#    to [d86c347f85ee7ce7a59f699c8e51feaa1fa57bfb]
#
# patch "commands.cc"
#  from [981960f6974af94a4f65c7a07441ee6cea47a677]
#    to [8d15447176db8827209c2feeaaf0b0eaa7e2e1fd]
#
# patch "constants.cc"
#  from [c7bc2142cf0e9861c9fd744f458da6e8c7a0323e]
#    to [c6acb6030061b1152370509c2775de8e93cc2ba7]
#
# patch "constants.hh"
#  from [528788cf84a56e076de55706b0fb989f570a6e9f]
#    to [7f0cbbead461f925ac421d3303932a9fd326815e]
#
# patch "database.cc"
#  from [4e1b8cd93f1705eb61577322de3b0cf540bc6940]
#    to [70d4d8f0c43de5e18bb023fc2c87805c0cdfa659]
#
# patch "database.hh"
#  from [ad0aa7e49ff8c91790d1134a7b91664123a58085]
#    to [b46b1ec4697ecd13dfe46d42078f7b514be8a022]
#
# patch "enumerator.cc"
#  from [9cd3afa7ef779cb7b0c53a053a9567b57b59f660]
#    to [05baab83e8b397403e54ed13a6c514c81feb7b4f]
#
# patch "enumerator.hh"
#  from [c0e45bd4447e10c0cbf3ca1e3ed54ef91f23c6e5]
#    to [5cc476581aed97dd306843e60c057ab1053cf0da]
#
# patch "netsync.cc"
#  from [ff048e5f5c432920b0ddbffbe0fb7133b8eb9f46]
#    to [0edebb049f6f44132b6c97be4663efc113efeee8]
#
# patch "packet.cc"
#  from [06b93709b49bc581a5274afc7762f503abf89dc2]
#    to [8d4dbbc27b15701f8963858e708b37d85e5bb1b9]
#
# patch "rcs_import.cc"
#  from [0de2b62e2c973f4c94592cff4990f0a43eb70294]
#    to [7a1852b10d39b49f220b7746b8cbef6d6c76ca23]
#
# patch "revision.cc"
#  from [6640272c7839f8489f1fe23813d7c520afc87d02]
#    to [ca74a6db1747f8c8cf213167ec226df0e30c0b84]
#
# patch "revision.hh"
#  from [1031cee8580d55528f5fb80e754fbf6e43dedb25]
#    to [ee41c7c61df986d551aebd2445a6232150a9c8b0]
#
# patch "schema.sql"
#  from [f44144278a4158695818d8f7e1901ac6f89e39bb]
#    to [5e315b54e2c1df1f315c3aff2ad9a551332debb7]
#
# patch "schema_migration.cc"
#  from [a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b]
#    to [9f3a9b8a4a509edf8af01c796cbc6588e902e888]
#
============================================================
--- automate.cc	68d579fc833b4a0ec52a0c8e5b130af7517ab7ed
+++ automate.cc	d86c347f85ee7ce7a59f699c8e51feaa1fa57bfb
@@ -283,7 +283,7 @@ automate_toposort(std::vector<utf8> args
       revs.insert(rid);
     }
   std::vector<revision_id> sorted;
-  toposort(revs, sorted, app);
+  toposort(revs, sorted, app.db);
   for (std::vector<revision_id>::const_iterator i = sorted.begin();
        i != sorted.end(); ++i)
     output << (*i).inner()() << std::endl;
@@ -329,7 +329,7 @@ automate_ancestry_difference(std::vector
   ancestry_difference(a, bs, ancestors, app);

   std::vector<revision_id> sorted;
-  toposort(ancestors, sorted, app);
+  toposort(ancestors, sorted, app.db);
   for (std::vector<revision_id>::const_iterator i = sorted.begin();
        i != sorted.end(); ++i)
     output << (*i).inner()() << std::endl;
============================================================
--- commands.cc	981960f6974af94a4f65c7a07441ee6cea47a677
+++ commands.cc	8d15447176db8827209c2feeaaf0b0eaa7e2e1fd
@@ -2191,6 +2191,11 @@ CMD(db, N_("database"),
         build_changesets_from_manifest_ancestry(app);
       else if (idx(args, 0)() == "rosterify")
         build_roster_style_revs_from_manifest_style_revs(app);
+      else if (idx(args, 0)() == "flip")
+        {
+          app.db.make_fwd_deltas("files", "file_deltas");
+          app.db.make_fwd_deltas("rosters", "roster_deltas");
+        }
       else
         throw usage(name);
     }
============================================================
--- constants.cc	c7bc2142cf0e9861c9fd744f458da6e8c7a0323e
+++ constants.cc	c6acb6030061b1152370509c2775de8e93cc2ba7
@@ -150,4 +150,7 @@ namespace constants

   std::string const & netsync_key_initializer = std::string(netsync_session_key_length_in_bytes, 0);

+  size_t const max_delta_chain_length = 20;
+  float const max_delta_chain_size = 1.0;
+
 }
============================================================
--- constants.hh	528788cf84a56e076de55706b0fb989f570a6e9f
+++ constants.hh	7f0cbbead461f925ac421d3303932a9fd326815e
@@ -148,6 +148,12 @@ namespace constants

   // netsync session key default initializer
   extern std::string const & netsync_key_initializer;
+
+  // maximum length of a delta chain
+  extern size_t const max_delta_chain_length;
+
+  // maximum ratio of delta chain size vs original size
+  extern float const max_delta_chain_size;
 }

 #endif // __CONSTANTS_HH__
============================================================
--- database.cc	4e1b8cd93f1705eb61577322de3b0cf540bc6940
+++ database.cc	70d4d8f0c43de5e18bb023fc2c87805c0cdfa659
@@ -62,9 +62,10 @@ namespace
 {
   struct query_param
   {
-    enum arg_type { text, blob };
+    enum arg_type { text, blob, integer };
     arg_type type;
     std::string data;
+    uint32_t number;
   };

   query_param
@@ -73,6 +74,7 @@ namespace
     query_param q = {
       query_param::text,
       txt,
+      0,
     };
     return q;
   }
@@ -83,10 +85,22 @@ namespace
     query_param q = {
       query_param::blob,
       blb,
+      0,
     };
     return q;
   }

+  query_param
+  integer(uint32_t const & intg)
+  {
+    query_param q = {
+      query_param::integer,
+      "",
+      intg,
+    };
+    return q;
+  }
+
   // track all open databases for close_all_databases() handler
   set<sqlite3*> sql_contexts;
 }
@@ -120,7 +134,7 @@ database::database(system_path const & f
   // non-alphabetic ordering of tables in sql source files. we could create
   // a temporary db, write our intended schema into it, and read it back,
   // but this seems like it would be too rude. possibly revisit this issue.
-  schema("1db80c7cee8fa966913db1a463ed50bf1b0e5b0e"),
+  schema("b0987b874c2d348e9720ec11cf9618509b002618"),
   __sql(NULL),
   transaction_level(0)
 {}
@@ -696,6 +710,12 @@ database::fetch(results & res,
                               SQLITE_STATIC);
           }
           break;
+        case query_param::integer:
+          {
+            uint32_t number = idx(query.args, param - 1).number;
+            sqlite3_bind_int(i->second.stmt(), param, number);
+          }
+          break;
         default:
           I(false);
         }
@@ -802,6 +822,17 @@ database::delta_exists(hexenc<id> const
   return res.size() > 0;
 }

+bool
+database::delta_exists(hexenc<id> const & ident,
+                       hexenc<id> const & base,
+                       string const & table)
+{
+  results res;
+  query q("SELECT id FROM " + table + " WHERE id = ? AND base = ?");
+  fetch(res, one_col, any_rows, q % text(ident()) % text(base()));
+  return res.size() > 0;
+}
+
 unsigned long
 database::count(string const & table)
 {
@@ -887,30 +918,39 @@ database::put(hexenc<id> const & ident,

   base64<gzip<data> > dat_packed;
   pack(dat, dat_packed);
+
+  uint32_t dat_size = dat_packed().size();

-  string insert = "INSERT INTO " + table + " VALUES(?, ?)";
+  string insert = "INSERT INTO " + table + " VALUES(?, ?, ?)";
   execute(query(insert)
           % text(ident())
-          % text(dat_packed()));
+          % text(dat_packed())
+          % integer(dat_size));
 }
 void
 database::put_delta(hexenc<id> const & ident,
                     hexenc<id> const & base,
                     delta const & del,
+                    size_t parent_distance,
+                    size_t parent_size,
                     string const & table)
 {
-  // nb: delta schema is (id, base, delta)
-  I(ident() != "");
-  I(base() != "");
+  I(!null_id(ident));
+  I(!null_id(base));

   base64<gzip<delta> > del_packed;
   pack(del, del_packed);
+
+  uint32_t del_size = del_packed().size();

-  string insert = "INSERT INTO "+table+" VALUES(?, ?, ?)";
+  string insert = "INSERT INTO "+table+" VALUES(?, ?, ?, ?, ?, ?)";
   execute(query(insert)
           % text(ident())
           % text(base())
-          % text(del_packed()));
+          % text(del_packed())
+          % integer(parent_distance + 1)
+          % integer(parent_size + del_size)
+          % integer(del_size));
 }

 // static ticker cache_hits("vcache hits", "h", 1);
@@ -1135,12 +1175,14 @@ database::get_version(hexenc<id> const &
         {
           hexenc<id> const nxt = *i;

+          /*
           if (!vcache.exists(curr))
             {
               string tmp;
               app->finish(tmp);
               vcache.put(curr, tmp);
             }
+            */

           L(FL("following delta %s -> %s\n") % curr % nxt);
           delta del;
@@ -1171,30 +1213,68 @@ database::drop(hexenc<id> const & ident,
   execute(query(drop) % text(ident()));
 }

+void
+database::get_distance(hexenc<id> const & id,
+                       uint32_t & distance,
+                       uint32_t & size,
+                       string const & data_table,
+                       string const & delta_table)
+{
+  MM(id);
+  MM(data_table);
+  MM(delta_table);
+  {
+    results res;
+    query q("SELECT size FROM " + data_table + " WHERE id = ?");
+    fetch(res, one_col, any_rows, q % text(id()));
+    if (res.size() > 0)
+      {
+        distance = 0;
+        size = lexical_cast<uint32_t>(res[0][0]);
+        return;
+      }
+  }
+
+  {
+    results res;
+    query q("SELECT path_dist, path_size FROM " + delta_table + " WHERE id = ?");
+    fetch(res, 2, any_rows, q % text(id()));
+    if (res.size() > 0)
+      {
+        distance = lexical_cast<uint32_t>(res[0][0]);
+        size = lexical_cast<uint32_t>(res[0][1]);
+        return;
+      }
+  }
+  I(false);
+}
+
 void
 database::put_version(hexenc<id> const & old_id,
                       hexenc<id> const & new_id,
                       delta const & del,
+                      data const & dat,
                       string const & data_table,
                       string const & delta_table)
 {
+  // TODO: add an invariant or something perhaps
+  static ticker put_full("full", "f", 1);
+  static ticker put_del("del", "d", 1);

-  data old_data, new_data;
-  delta reverse_delta;
-
-  get_version(old_id, old_data, data_table, delta_table);
-  patch(old_data, del, new_data);
-  diff(new_data, old_data, reverse_delta);
-
   transaction_guard guard(*this);
-  if (exists(old_id, data_table))
+  uint32_t parent_distance, parent_size;
+  get_distance(old_id, parent_distance, parent_size, data_table, delta_table);
+
+  if (parent_distance >= constants::max_delta_chain_length)
     {
-      // descendent of a head version replaces the head, therefore old head
-      // must be disposed of
-      drop(old_id, data_table);
+      ++put_full;
+      put(new_id, dat, data_table);
     }
-  put(new_id, new_data, data_table);
-  put_delta(old_id, new_id, reverse_delta, delta_table);
+  else
+    {
+      ++put_del;
+      put_delta(new_id, old_id, del, parent_distance, parent_size, delta_table);
+    }
   guard.commit();
 }

@@ -1203,6 +1283,8 @@ database::remove_version(hexenc<id> cons
                          string const & data_table,
                          string const & delta_table)
 {
+  N(false, F("please don't remove versions for now."));
+#if 0
   // We have a one of two cases (for multiple 'older' nodes):
   //
   //    1.  pre:        older <- target <- newer
@@ -1282,6 +1364,7 @@ database::remove_version(hexenc<id> cons
     }

   guard.commit();
+#endif
 }


@@ -1384,6 +1467,29 @@ database::get_file_version(file_id const
   dat = tmp;
 }

+void
+database::get_file_delta(file_id const & id,
+                         file_id const & base,
+                         file_delta & del)
+{
+  if (delta_exists(id.inner(), base.inner(), "file_deltas"))
+    {
+      delta d;
+      get_delta(id.inner(), base.inner(), d, "file_deltas");
+      del = file_delta(d);
+    }
+  else
+    {
+      L(FL("get_file_delta failed for '%s'->'%s'") % base % id);
+      file_data dat, base_dat;
+      get_file_version(base, base_dat);
+      get_file_version(id, dat);
+      delta d;
+      diff(base_dat.inner(), dat.inner(), d);
+      del = file_delta(d);
+    }
+}
+
 void
 database::get_manifest_version(manifest_id const & id,
                                manifest_data & dat)
@@ -1403,9 +1509,11 @@ database::put_file_version(file_id const
 void
 database::put_file_version(file_id const & old_id,
                            file_id const & new_id,
-                           file_delta const & del)
+                           file_delta const & del,
+                           file_data const & dat)
 {
-  put_version(old_id.inner(), new_id.inner(), del.inner(),
+  put_version(old_id.inner(), new_id.inner(),
+              del.inner(), dat.inner(),
               "files", "file_deltas");
 }

@@ -1491,6 +1599,84 @@ database::get_revision(revision_id const
   dat = rdat;
 }

+void
+database::make_fwd_deltas(string const & data_table, string const & delta_table)
+{
+  N(false, F("it's not documented, don't run it"));
+#if 0
+  transaction_guard guard(*this);
+
+  set< hexenc<id> > del_bases, all_ids;
+  set< pair< hexenc<id>, hexenc<id> > > dels;
+
+  // id, base
+  get_ids(data_table, all_ids);
+    {
+      results res;
+      fetch(res, 2, any_rows, query("SELECT id, base FROM " + delta_table));
+
+      for (size_t i = 0; i < res.size(); ++i)
+        {
+          all_ids.insert(hexenc<id>(res[i][0]));
+          del_bases.insert(hexenc<id>(res[i][1]));
+          dels.insert( make_pair(res[i][0], res[i][1]) );
+        }
+    }
+
+  set< hexenc<id> > new_bases;
+
+  set_difference(all_ids.begin(), all_ids.end(), del_bases.begin(), del_bases.end(),
+                 inserter(new_bases, new_bases.begin()));
+
+  // create some empty temporary tables
+  string tmp_data_table("tmp_" + data_table);
+  string tmp_delta_table("tmp_" + delta_table);
+
+  execute(query("CREATE TABLE " + tmp_data_table + " AS SELECT * FROM " + data_table + " WHERE 1=0"));
+  execute(query("CREATE TABLE " + tmp_delta_table + " AS SELECT * FROM " + delta_table + " WHERE 1=0"));
+
+  ticker full("full", "g", 1);
+  for ( set< hexenc<id> >::const_iterator i = new_bases.begin(); i != new_bases.end(); i++)
+    {
+      MM(*i);
+      data dat;
+      get_version(*i, dat, data_table, delta_table);
+      put(*i, dat, tmp_data_table);
+      ++full;
+    }
+
+  ticker flips("flips", "f", 1);
+
+  for ( set< pair< hexenc<id>, hexenc<id> > >::const_iterator i = dels.begin(); i != dels.end(); i++)
+    {
+      ++flips;
+      hexenc<id> new_id = i->second;
+      hexenc<id> new_base = i->first;
+      MM(new_id);
+      MM(new_base);
+
+      data base, dat;
+      get_version(new_id, dat, data_table, delta_table);
+      get_version(new_base, base, data_table, delta_table);
+      delta del;
+      diff(base, dat, del);
+
+      put_delta(new_id, new_base, del, tmp_delta_table);
+    }
+
+  execute(query("DELETE FROM " + data_table));
+  execute(query("DELETE FROM " + delta_table));
+
+  execute(query("INSERT INTO " + data_table + " SELECT * FROM " + tmp_data_table));
+  execute(query("INSERT INTO " + delta_table + " SELECT * FROM " + tmp_delta_table));
+
+  execute(query("DROP TABLE " + tmp_data_table));
+  execute(query("DROP TABLE " + tmp_delta_table));
+
+  guard.commit();
+#endif
+}
+
 void
 database::deltify_revision(revision_id const & rid)
 {
@@ -1521,7 +1707,7 @@ database::deltify_revision(revision_id c
                 file_delta del(delt);
                 drop(delta_entry_dst(j).inner(), "files");
                 drop(delta_entry_dst(j).inner(), "file_deltas");
-                put_file_version(delta_entry_src(j), delta_entry_dst(j), del);
+                put_file_version(delta_entry_src(j), delta_entry_dst(j), del, new_data);
               }
           }
       }
@@ -1572,9 +1758,12 @@ database::put_revision(revision_id const
   base64<gzip<data> > d_packed;
   pack(d.inner(), d_packed);

-  execute(query("INSERT INTO revisions VALUES(?, ?)")
+  uint32_t d_size = d_packed().size();
+
+  execute(query("INSERT INTO revisions VALUES(?, ?, ?)")
           % text(new_id.inner()())
-          % text(d_packed()));
+          % text(d_packed())
+          % integer(d_size));

   for (edge_map::const_iterator e = rev.edges.begin();
        e != rev.edges.end(); ++e)
@@ -1584,7 +1773,8 @@ database::put_revision(revision_id const
               % text(new_id.inner()()));
     }

-  deltify_revision(new_id);
+  // TODO: some different version?
+  // deltify_revision(new_id);

   // Phase 4: write the roster data and commit
   put_roster(new_id, ros, mm);
@@ -2664,7 +2854,6 @@ database::get_roster(revision_id const &
   read_roster_and_marking(dat, roster, marks);
 }

-
 void
 database::put_roster(revision_id const & rev_id,
                      roster_t & roster,
@@ -2672,7 +2861,6 @@ database::put_roster(revision_id const &
 {
   MM(rev_id);
   data old_data, new_data;
-  delta reverse_delta;
   hexenc<id> old_id, new_id;

   write_roster_and_marking(roster, marks, new_data);
@@ -2701,13 +2889,13 @@ database::put_roster(revision_id const &
   // Else we have a new roster the database hasn't seen yet; our task is to
   // add it, and deltify all the incoming edges (if they aren't already).

-  put(new_id, new_data, data_table);

   std::set<revision_id> parents;
   get_revision_parents(rev_id, parents);

   // Now do what deltify would do if we bothered (we have the
   // roster written now, so might as well do it here).
+  bool delta_written = false;
   for (std::set<revision_id>::const_iterator i = parents.begin();
        i != parents.end(); ++i)
     {
@@ -2715,14 +2903,18 @@ database::put_roster(revision_id const &
         continue;
       revision_id old_rev = *i;
       get_roster_id_for_revision(old_rev, old_id);
-      if (exists(new_id, data_table))
-        {
-          get_version(old_id, old_data, data_table, delta_table);
-          diff(new_data, old_data, reverse_delta);
-          drop(old_id, data_table);
-          put_delta(old_id, new_id, reverse_delta, delta_table);
-        }
+      get_version(old_id, old_data, data_table, delta_table);
+      delta del;
+      diff(old_data, new_data, del);
+      put_version(old_id, new_id, del, new_data, data_table, delta_table);
+      delta_written = true;
+      break;
     }
+
+  // in case the parents were all null/didn't exist
+  if (!delta_written)
+    put(new_id, new_data, data_table);
+
   guard.commit();
 }

============================================================
--- database.hh	ad0aa7e49ff8c91790d1134a7b91664123a58085
+++ database.hh	b46b1ec4697ecd13dfe46d42078f7b514be8a022
@@ -110,6 +110,9 @@ class database
               std::string const & table);
   bool delta_exists(hexenc<id> const & ident,
                     std::string const & table);
+  bool database::delta_exists(hexenc<id> const & ident,
+                              hexenc<id> const & base,
+                              std::string const & table);

   unsigned long count(std::string const & table);
   unsigned long space_usage(std::string const & table,
@@ -134,13 +137,21 @@ class database
            std::string const & table);
   void drop(hexenc<id> const & base,
             std::string const & table);
-  void put_delta(hexenc<id> const & id,
+  void get_distance(hexenc<id> const & id,
+                    uint32_t & distance,
+                    uint32_t & size,
+                    std::string const & data_table,
+                    std::string const & delta_table);
+  void put_delta(hexenc<id> const & ident,
                  hexenc<id> const & base,
                  delta const & del,
+                 size_t parent_distance,
+                 size_t parent_size,
                  std::string const & table);
   void put_version(hexenc<id> const & old_id,
                    hexenc<id> const & new_id,
                    delta const & del,
+                   data const & dat,
                    std::string const & data_table,
                    std::string const & delta_table);
   void remove_version(hexenc<id> const & target_id,
@@ -242,8 +253,14 @@ public:
   // store new version and update old version to be a delta
   void put_file_version(file_id const & old_id,
                         file_id const & new_id,
-                        file_delta const & del);
+                        file_delta const & del,
+                        file_data const & dat);

+
+  void get_file_delta(file_id const & id,
+                      file_id const & base,
+                      file_delta & del);
+
   // get plain version if it exists, or reconstruct version
   // from deltas (if they exist).
   void get_manifest_version(manifest_id const & id,
@@ -260,6 +277,8 @@ public:
   void get_revision_manifest(revision_id const & cid,
                              manifest_id & mid);

+  void make_fwd_deltas(std::string const & data_table, std::string const & delta_table);
+
   void deltify_revision(revision_id const & rid);

   void get_revision(revision_id const & id,
============================================================
--- enumerator.cc	9cd3afa7ef779cb7b0c53a053a9567b57b59f660
+++ enumerator.cc	05baab83e8b397403e54ed13a6c514c81feb7b4f
@@ -27,10 +27,14 @@ revision_enumerator::revision_enumerator
                                          set<revision_id> const & terminal)
   : cb(cb), app(app), terminal_nodes(terminal)
 {
+  // TODO: needs sorting out with the toposort stuff.
+  I(false);
+  /*
   for (set<revision_id>::const_iterator i = initial.begin();
        i != initial.end(); ++i)
     revs.push_back(*i);
-  load_graphs();
+    */
+  load_revs();
 }

 revision_enumerator::revision_enumerator(enumerator_callbacks & cb,
@@ -38,41 +42,29 @@ revision_enumerator::revision_enumerator
   : cb(cb), app(app)
 {
   revision_id root;
-  revs.push_back(root);
-  load_graphs();
+  load_revs();
 }

 void
-revision_enumerator::load_graphs()
+revision_enumerator::load_revs()
 {
-  app.db.get_revision_ancestry(graph);
-  for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
-       i != graph.end(); ++i)
+  // TODO: this totally disrespects the initial and terminal stuff.
+  // It should probably be integrated into the toposort code itself.
+  vector<revision_id> topo_vec;
+  toposort(topo_vec, app.db);
+  for (vector<revision_id>::const_iterator r = topo_vec.begin();
+       r != topo_vec.end(); r++)
     {
-      inverse_graph.insert(make_pair(i->second, i->first));
+      if (null_id(*r))
+        continue;
+      topo_revs.push_back(*r);
     }
 }

-bool
-revision_enumerator::all_parents_enumerated(revision_id const & child)
-{
-  typedef multimap<revision_id, revision_id>::const_iterator ci;
-  pair<ci,ci> range = inverse_graph.equal_range(child);
-  for (ci i = range.first; i != range.second; ++i)
-    {
-      if (i->first == child)
-        {
-          if (enumerated_nodes.find(i->second) == enumerated_nodes.end())
-            return false;
-        }
-    }
-  return true;
-}
-
 bool
 revision_enumerator::done()
 {
-  return revs.empty() && items.empty();
+  return topo_revs.empty() && items.empty();
 }

 void
@@ -158,106 +150,152 @@ revision_enumerator::files_for_revision(
     }
 }

-void
-revision_enumerator::step()
+void
+revision_enumerator::process_bunch()
 {
-  while (!done())
+  // we build up a set of files and revs to send
+  vector<revision_id> bunch_revs;
+  set<file_id> bunch_files;
+  multimap<file_id, file_id> bunch_file_deltas;
+
+  // files that should be sent first
+  set<file_id> top_files;
+  // files that will be sent either as full files are as the dst of deltas
+  set<file_id> dst_files;
+
+  while (bunch_revs.size() < 100 && !topo_revs.empty())
     {
-      if (items.empty() && !revs.empty())
+      revision_id r = topo_revs.front();
+      topo_revs.pop_front();
+      if (!cb.process_this_rev(r))
+        continue;
+
+      bunch_revs.push_back(r);
+
+      set<file_id> full_files;
+      set<pair<file_id, file_id> > del_files;
+
+      files_for_revision(r, full_files, del_files);
+
+      set<file_id> top_files;
+      set<file_id> dst_files;
+
+      for (set<file_id>::const_iterator f = full_files.begin();
+           f != full_files.end(); f++)
         {
-          revision_id r = revs.front();
-          revs.pop_front();
+          bunch_files.insert(*f);
+          top_files.insert(*f);
+          dst_files.insert(*f)
+        }

-          // It's possible we've enumerated this node elsewhere since last
-          // time around. Cull rather than reprocess.
-          if (enumerated_nodes.find(r) != enumerated_nodes.end())
-            continue;
-
-          if (!all_parents_enumerated(r))
-            {
-              revs.push_back(r);
-              continue;
-            }
-
-          if (terminal_nodes.find(r) == terminal_nodes.end())
-            {
-              typedef multimap<revision_id, revision_id>::const_iterator ci;
-              pair<ci,ci> range = graph.equal_range(r);
-              for (ci i = range.first; i != range.second; ++i)
-                {
-                  if (i->first == r)
-                    if (enumerated_nodes.find(i->first) == enumerated_nodes.end())
-                      revs.push_back(i->second);
-                }
-            }
+      for (set<pair<file_id,file_id> >::const_iterator fd = del_files.begin();
+           fd != del_files.end(); fd++)
+        {
+          file_id src(fd->second);
+          file_id dst(fd->first);
+          bunch_file_deltas.insert(make_pair(fd->second, fd->first));
+          if (dst_files.find(src) == dst_files.end())
+            top_files.insert(src);
+          dst_files.insert(dst);
+        }
+    }

-          enumerated_nodes.insert(r);
+  // XXX required?
+  set<file_id> sent_files;

-          if (null_id(r))
-            continue;
+  // now we can queue up the file items in order.
+  for (set<file_id>::const_iterator t = top_files.begin();
+       t != top_files.end(); t++)
+    {
+      if (sent_files.find(*t) != sent_files.end())
+        {
+          L(FL("already sent top_file %s") % t->inner()());
+          continue;
+        }

-          if (cb.process_this_rev(r))
-            {
-              L(FL("revision_enumerator::step expanding "
-                  "contents of rev '%d'\n") % r);
+      L(FL("top_file %s") % t->inner()());

-              // The rev's files and fdeltas
-              {
-                set<file_id> full_files;
-                set<pair<file_id, file_id> > del_files;
-                files_for_revision(r, full_files, del_files);
+      if (bunch_files.find(*t) != bunch_files.end())
+        {
+          // a full file to send.
+          enumerator_item item;
+          item.tag = enumerator_item::fdata;
+          item.ident_a = t->inner();
+          items.push_back(item);
+          sent_files.insert(*t);
+          L(FL("send full_file %s") % t->inner()());
+        }

-                for (set<file_id>::const_iterator f = full_files.begin();
-                     f != full_files.end(); f++)
-                  {
-                    if (cb.queue_this_file(f->inner()))
-                      {
-                        enumerator_item item;
-                        item.tag = enumerator_item::fdata;
-                        item.ident_a = f->inner();
-                        items.push_back(item);
-                      }
-                  }
+      // XXX: doing BFS now, should try DFS too.
+      deque<file_id> frontier;
+      frontier.push_back(*t);

-                for (set<pair<file_id, file_id> >::const_iterator fd = del_files.begin();
-                     fd != del_files.end(); fd++)
-                  {
-                    if (cb.queue_this_file(fd->second.inner()))
-                      {
-                        enumerator_item item;
-                        item.tag = enumerator_item::fdelta;
-                        item.ident_a = fd->first.inner();
-                        item.ident_b = fd->second.inner();
-                        items.push_back(item);
-                      }
-                  }
-              }
+      while (!frontier.empty())
+        {
+          file_id f = frontier.front();
+          frontier.pop_front();

-              // Queue up the rev itself
-              {
-                enumerator_item item;
-                item.tag = enumerator_item::rev;
-                item.ident_a = r.inner();
-                items.push_back(item);
-              }
-            }
-
-          // Queue up some or all of the rev's certs
-          vector<hexenc<id> > hashes;
-          app.db.get_revision_certs(r, hashes);
-          for (vector<hexenc<id> >::const_iterator i = hashes.begin();
-               i != hashes.end(); ++i)
+          L(FL("frontier %s") % f.inner()());
+
+          for (multimap<file_id,file_id>::const_iterator
+               d = bunch_file_deltas.lower_bound(f);
+               d != bunch_file_deltas.upper_bound(f);
+               d++)
             {
-              if (cb.queue_this_cert(*i))
+              if (sent_files.find(d->second) != sent_files.end())
                 {
-                  enumerator_item item;
-                  item.tag = enumerator_item::cert;
-                  item.ident_a = *i;
-                  items.push_back(item);
+                  L(FL("already sent delta %s") % d->second.inner()());
+                  continue;
                 }
+              sent_files.insert(d->second);
+              frontier.push_back(d->second);
+
+              enumerator_item item;
+              item.tag = enumerator_item::fdelta;
+              item.ident_a = d->first.inner();
+              item.ident_b = d->second.inner();
+              items.push_back(item);
+              L(FL("file_delta %s->%s") % d->first.inner()() % d->second.inner()());
             }
         }
+    }

+  // and the revs
+  for (vector<revision_id>::const_iterator r = bunch_revs.begin();
+       r != bunch_revs.end(); r++)
+    {
+      enumerator_item item;
+      item.tag = enumerator_item::rev;
+      item.ident_a = r->inner();
+      items.push_back(item);
+
+      // Queue up some or all of the rev's certs
+      vector<hexenc<id> > hashes;
+      app.db.get_revision_certs(*r, hashes);
+      for (vector<hexenc<id> >::const_iterator i = hashes.begin();
+           i != hashes.end(); ++i)
+        {
+          if (cb.queue_this_cert(*i))
+            {
+              enumerator_item item;
+              item.tag = enumerator_item::cert;
+              item.ident_a = *i;
+              items.push_back(item);
+            }
+        }
+    }
+}
+
+void
+revision_enumerator::step()
+{
+  while (!done())
+    {
+      if (items.empty() && !topo_revs.empty())
+        {
+          process_bunch();
+        }
+
       if (!items.empty())
         {
           L(FL("revision_enumerator::step extracting item\n"));
============================================================
--- enumerator.hh	c0e45bd4447e10c0cbf3ca1e3ed54ef91f23c6e5
+++ enumerator.hh	5cc476581aed97dd306843e60c057ab1053cf0da
@@ -50,10 +50,8 @@ revision_enumerator
   app_state & app;
   std::set<revision_id> terminal_nodes;
   std::set<revision_id> enumerated_nodes;
-  std::deque<revision_id> revs;
   std::deque<enumerator_item> items;
-  std::multimap<revision_id, revision_id> graph;
-  std::multimap<revision_id, revision_id> inverse_graph;
+  std::deque<revision_id> topo_revs;

   revision_enumerator(enumerator_callbacks & cb,
                       app_state & app,
@@ -61,11 +59,11 @@ revision_enumerator
                       std::set<revision_id> const & terminal);
   revision_enumerator(enumerator_callbacks & cb,
                       app_state & app);
-  void load_graphs();
-  bool all_parents_enumerated(revision_id const & child);
+  void load_revs();
   void revision_enumerator::files_for_revision(revision_id const & r,
                                                std::set<file_id> & full_files,
                                                std::set<std::pair<file_id,file_id> > & del_files);
+  void process_bunch();
   void step();
   bool done();
 };
============================================================
--- netsync.cc	ff048e5f5c432920b0ddbffbe0fb7133b8eb9f46
+++ netsync.cc	0edebb049f6f44132b6c97be4663efc113efeee8
@@ -591,15 +591,12 @@ session::note_file_delta(file_id const &
 {
   if (role == sink_role)
     return;
-  file_data fd1, fd2;
-  delta del;
-  id fid1, fid2;
-  decode_hexenc(src.inner(), fid1);
-  decode_hexenc(dst.inner(), fid2);
-  app.db.get_file_version(src, fd1);
-  app.db.get_file_version(dst, fd2);
-  diff(fd1.inner(), fd2.inner(), del);
-  queue_delta_cmd(file_item, fid1, fid2, del);
+  file_delta del;
+  app.db.get_file_delta(dst, src, del);
+  id src_id, dst_id;
+  decode_hexenc(src.inner(), src_id);
+  decode_hexenc(dst.inner(), dst_id);
+  queue_delta_cmd(file_item, src_id, dst_id, del.inner());
   file_items_sent.insert(dst);
 }

============================================================
--- packet.cc	06b93709b49bc581a5274afc7762f503abf89dc2
+++ packet.cc	8d4dbbc27b15701f8963858e708b37d85e5bb1b9
@@ -103,7 +103,7 @@ packet_db_writer::consume_file_delta(fil
   patch(old_dat.inner(), del.inner(), new_dat);
   calculate_ident(file_data(new_dat), confirm);
   if (confirm == new_id)
-    app.db.put_file_version(old_id, new_id, del);
+    app.db.put_file_version(old_id, new_id, del, new_dat);
   else
     {
       W(F("reconstructed file from delta '%s' -> '%s' has wrong id '%s'\n")
============================================================
--- rcs_import.cc	0de2b62e2c973f4c94592cff4990f0a43eb70294
+++ rcs_import.cc	7a1852b10d39b49f220b7746b8cbef6d6c76ca23
@@ -456,7 +456,8 @@ rcs_put_raw_file_edge(hexenc<id> const &
     {
       I(db.exists(new_id, "files")
         || db.delta_exists(new_id, "file_deltas"));
-      db.put_delta(old_id, new_id, del, "file_deltas");
+      N(false, F("cvs_import is broken here."));
+      //db.put_delta(old_id, new_id, del, "file_deltas");
     }
 }

============================================================
--- revision.cc	6640272c7839f8489f1fe23813d7c520afc87d02
+++ revision.cc	ca74a6db1747f8c8cf213167ec226df0e30c0b84
@@ -361,17 +361,15 @@ void
 // passed in set.  if anyone ever needs to toposort the whole graph, then,
 // this function would be a good thing to generalize...
 void
-toposort(std::set<revision_id> const & revisions,
-         std::vector<revision_id> & sorted,
-         app_state & app)
+toposort(std::vector<revision_id> & sorted, database & db)
 {
   sorted.clear();
   typedef std::multimap<revision_id, revision_id>::iterator gi;
   typedef std::map<revision_id, int>::iterator pi;
   std::multimap<revision_id, revision_id> graph;
-  app.db.get_revision_ancestry(graph);
+  db.get_revision_ancestry(graph);
   std::set<revision_id> leaves;
-  app.db.get_revision_ids(leaves);
+  db.get_revision_ids(leaves);
   std::map<revision_id, int> pcount;
   for (gi i = graph.begin(); i != graph.end(); ++i)
     pcount.insert(std::make_pair(i->first, 0));
@@ -387,8 +385,7 @@ toposort(std::set<revision_id> const & r
       // now stick them in our ordering (if wanted) and remove them from the
       // graph, calculating the new roots as we go
       L(FL("new root: %s\n") % (roots.front()));
-      if (revisions.find(roots.front()) != revisions.end())
-        sorted.push_back(roots.front());
+      sorted.push_back(roots.front());
       for(gi i = graph.lower_bound(roots.front());
           i != graph.upper_bound(roots.front()); i++)
         if(--(pcount[i->second]) == 0)
@@ -402,6 +399,21 @@ toposort(std::set<revision_id> const & r
        i != leaves.end(); ++i)
     {
       L(FL("new leaf: %s\n") % (*i));
+      sorted.push_back(*i);
+    }
+}
+
+void
+toposort(std::set<revision_id> const & revisions,
+         std::vector<revision_id> & sorted,
+         database & db)
+{
+  std::vector<revision_id> all;
+  toposort(all, db);
+  sorted.clear();
+  for (std::vector<revision_id>::const_iterator i = all.begin();
+       i != all.end(); i++)
+    {
       if (revisions.find(*i) != revisions.end())
         sorted.push_back(*i);
     }
============================================================
--- revision.hh	1031cee8580d55528f5fb80e754fbf6e43dedb25
+++ revision.hh	ee41c7c61df986d551aebd2445a6232150a9c8b0
@@ -125,9 +125,13 @@ toposort(std::set<revision_id> const & r
 void
 toposort(std::set<revision_id> const & revisions,
          std::vector<revision_id> & sorted,
-         app_state & app);
+         database & db);

 void
+toposort(std::vector<revision_id> & sorted,
+         database & db);
+
+void
 erase_ancestors(std::set<revision_id> & revisions, app_state & app);

 void
============================================================
--- schema.sql	f44144278a4158695818d8f7e1901ac6f89e39bb
+++ schema.sql	5e315b54e2c1df1f315c3aff2ad9a551332debb7
@@ -22,21 +22,26 @@ CREATE TABLE files
 CREATE TABLE files
 	(
 	id primary key,   -- strong hash of file contents
-	data not null     -- compressed, encoded contents of a file
+	data not null,    -- compressed, encoded contents of a file
+	size integer not null     -- length(data)
 	);

 CREATE TABLE file_deltas
 	(
-	id not null,      -- strong hash of file contents
-	base not null,    -- joins with files.id or file_deltas.id
-	delta not null,   -- rdiff to construct current from base
+	id not null,          -- strong hash of file contents
+	base not null,        -- joins with files.id or file_deltas.id
+	delta not null,       -- rdiff to construct current from base
+	path_dist integer not null,   -- 1 if base is full, otherwise path_dist(base)+1
+	path_size integer not null,   -- size + size of base
+	size integer not null,        -- length(delta)
 	unique(id, base)
 	);

 CREATE TABLE manifests
 	(
 	id primary key,      -- strong hash of all the entries in a manifest
-	data not null        -- compressed, encoded contents of a manifest
+	data not null,       -- compressed, encoded contents of a manifest
+	size integer not null        -- length(data)
 	);

 CREATE TABLE manifest_deltas
@@ -44,13 +49,17 @@ CREATE TABLE manifest_deltas
 	id not null,         -- strong hash of all the entries in a manifest
 	base not null,       -- joins with either manifest.id or manifest_deltas.id
 	delta not null,      -- rdiff to construct current from base
+	path_dist integer not null,  -- 1 if base is full, otherwise path_dist(base)+1
+	path_size integer not null,  -- size + size of base
+	size integer not null,       -- length(delta)
 	unique(id, base)
 	);

 CREATE TABLE revisions
 	(
-	id primary key,      -- SHA1(text of revision)
-	data not null        -- compressed, encoded contents of a revision
+	id primary key,        -- SHA1(text of revision)
+	data not null,         -- compressed, encoded contents of a revision
+	size integer not null  -- length(data)
 	);

 CREATE TABLE revision_ancestry
@@ -63,7 +72,8 @@ CREATE TABLE rosters
 CREATE TABLE rosters
 	(
 	id primary key,         -- strong hash of the roster
-	data not null           -- compressed, encoded contents of the roster
+	data not null,          -- compressed, encoded contents of the roster
+	size integer not null   -- length(data)
 	);

 CREATE TABLE roster_deltas
@@ -71,6 +81,9 @@ CREATE TABLE roster_deltas
 	id not null,            -- strong hash of the roster
 	base not null,          -- joins with either rosters.id or roster_deltas.id
 	delta not null,         -- rdiff to construct current from base
+	path_dist integer not null,     -- 1 if base is full, otherwise path_dist(base)+1
+	path_size integer not null,     -- size + size of base
+	size integer not null,          -- length(delta)
 	unique(id, base)
 	);

============================================================
--- schema_migration.cc	a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b
+++ schema_migration.cc	9f3a9b8a4a509edf8af01c796cbc6588e902e888
@@ -943,5 +943,7 @@ migrate_monotone_schema(sqlite3 *sql, ap
   // also add a new migration test for the new schema version.  See
   // tests/t_migrate_schema.at for details.

-  m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e");
+  //m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e");
+  //m.migrate(sql, "8cd14946f11ae66218240f332ce416801db1e453");
+  m.migrate(sql, "b0987b874c2d348e9720ec11cf9618509b002618");
 }