The unified diff between revisions [9b19e788..] and [b954a1fc..] is displayed below. It can also be downloaded as a raw diff.

#
#
# patch "constants.cc"
#  from [c7bc2142cf0e9861c9fd744f458da6e8c7a0323e]
#    to [c6acb6030061b1152370509c2775de8e93cc2ba7]
#
# patch "constants.hh"
#  from [528788cf84a56e076de55706b0fb989f570a6e9f]
#    to [7f0cbbead461f925ac421d3303932a9fd326815e]
#
# patch "database.cc"
#  from [377449cef66018ee4619b66e5c08dae01129df89]
#    to [2fc44fac19123c10272d86d0295b91f58a8ad6b1]
#
# patch "database.hh"
#  from [4ea836110ca59b61655724a3eb2dcd1c9a5a34cb]
#    to [b46b1ec4697ecd13dfe46d42078f7b514be8a022]
#
# patch "packet.cc"
#  from [06b93709b49bc581a5274afc7762f503abf89dc2]
#    to [8d4dbbc27b15701f8963858e708b37d85e5bb1b9]
#
# patch "rcs_import.cc"
#  from [0de2b62e2c973f4c94592cff4990f0a43eb70294]
#    to [7a1852b10d39b49f220b7746b8cbef6d6c76ca23]
#
# patch "schema.sql"
#  from [f44144278a4158695818d8f7e1901ac6f89e39bb]
#    to [5e315b54e2c1df1f315c3aff2ad9a551332debb7]
#
# patch "schema_migration.cc"
#  from [a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b]
#    to [9f3a9b8a4a509edf8af01c796cbc6588e902e888]
#
============================================================
--- constants.cc	c7bc2142cf0e9861c9fd744f458da6e8c7a0323e
+++ constants.cc	c6acb6030061b1152370509c2775de8e93cc2ba7
@@ -150,4 +150,7 @@ namespace constants

   std::string const & netsync_key_initializer = std::string(netsync_session_key_length_in_bytes, 0);

+  size_t const max_delta_chain_length = 20;
+  float const max_delta_chain_size = 1.0;
+
 }
============================================================
--- constants.hh	528788cf84a56e076de55706b0fb989f570a6e9f
+++ constants.hh	7f0cbbead461f925ac421d3303932a9fd326815e
@@ -148,6 +148,12 @@ namespace constants

   // netsync session key default initializer
   extern std::string const & netsync_key_initializer;
+
+  // maximum length of a delta chain
+  extern size_t const max_delta_chain_length;
+
+  // maximum ratio of delta chain size vs original size
+  extern float const max_delta_chain_size;
 }

 #endif // __CONSTANTS_HH__
============================================================
--- database.cc	377449cef66018ee4619b66e5c08dae01129df89
+++ database.cc	2fc44fac19123c10272d86d0295b91f58a8ad6b1
@@ -62,9 +62,10 @@ namespace
 {
   struct query_param
   {
-    enum arg_type { text, blob };
+    enum arg_type { text, blob, integer };
     arg_type type;
     std::string data;
+    uint32_t number;
   };

   query_param
@@ -73,6 +74,7 @@ namespace
     query_param q = {
       query_param::text,
       txt,
+      0,
     };
     return q;
   }
@@ -83,10 +85,22 @@ namespace
     query_param q = {
       query_param::blob,
       blb,
+      0,
     };
     return q;
   }

+  query_param
+  integer(uint32_t const & intg)
+  {
+    query_param q = {
+      query_param::integer,
+      "",
+      intg,
+    };
+    return q;
+  }
+
   // track all open databases for close_all_databases() handler
   set<sqlite3*> sql_contexts;
 }
@@ -120,7 +134,7 @@ database::database(system_path const & f
   // non-alphabetic ordering of tables in sql source files. we could create
   // a temporary db, write our intended schema into it, and read it back,
   // but this seems like it would be too rude. possibly revisit this issue.
-  schema("1db80c7cee8fa966913db1a463ed50bf1b0e5b0e"),
+  schema("b0987b874c2d348e9720ec11cf9618509b002618"),
   __sql(NULL),
   transaction_level(0)
 {}
@@ -696,6 +710,12 @@ database::fetch(results & res,
                               SQLITE_STATIC);
           }
           break;
+        case query_param::integer:
+          {
+            uint32_t number = idx(query.args, param - 1).number;
+            sqlite3_bind_int(i->second.stmt(), param, number);
+          }
+          break;
         default:
           I(false);
         }
@@ -898,30 +918,39 @@ database::put(hexenc<id> const & ident,

   base64<gzip<data> > dat_packed;
   pack(dat, dat_packed);
+
+  uint32_t dat_size = dat_packed().size();

-  string insert = "INSERT INTO " + table + " VALUES(?, ?)";
+  string insert = "INSERT INTO " + table + " VALUES(?, ?, ?)";
   execute(query(insert)
           % text(ident())
-          % text(dat_packed()));
+          % text(dat_packed())
+          % integer(dat_size));
 }
 void
 database::put_delta(hexenc<id> const & ident,
                     hexenc<id> const & base,
                     delta const & del,
+                    size_t parent_distance,
+                    size_t parent_size,
                     string const & table)
 {
-  // nb: delta schema is (id, base, delta)
-  I(ident() != "");
-  I(base() != "");
+  I(!null_id(ident));
+  I(!null_id(base));

   base64<gzip<delta> > del_packed;
   pack(del, del_packed);
+
+  uint32_t del_size = del_packed().size();

-  string insert = "INSERT INTO "+table+" VALUES(?, ?, ?)";
+  string insert = "INSERT INTO "+table+" VALUES(?, ?, ?, ?, ?, ?)";
   execute(query(insert)
           % text(ident())
           % text(base())
-          % text(del_packed()));
+          % text(del_packed())
+          % integer(parent_distance + 1)
+          % integer(parent_size + del_size)
+          % integer(del_size));
 }

 // static ticker cache_hits("vcache hits", "h", 1);
@@ -1146,12 +1175,14 @@ database::get_version(hexenc<id> const &
         {
           hexenc<id> const nxt = *i;

+          /*
           if (!vcache.exists(curr))
             {
               string tmp;
               app->finish(tmp);
               vcache.put(curr, tmp);
             }
+            */

           L(FL("following delta %s -> %s\n") % curr % nxt);
           delta del;
@@ -1182,17 +1213,68 @@ database::drop(hexenc<id> const & ident,
   execute(query(drop) % text(ident()));
 }

+void
+database::get_distance(hexenc<id> const & id,
+                       uint32_t & distance,
+                       uint32_t & size,
+                       string const & data_table,
+                       string const & delta_table)
+{
+  MM(id);
+  MM(data_table);
+  MM(delta_table);
+  {
+    results res;
+    query q("SELECT size FROM " + data_table + " WHERE id = ?");
+    fetch(res, one_col, any_rows, q % text(id()));
+    if (res.size() > 0)
+      {
+        distance = 0;
+        size = lexical_cast<uint32_t>(res[0][0]);
+        return;
+      }
+  }
+
+  {
+    results res;
+    query q("SELECT path_dist, path_size FROM " + delta_table + " WHERE id = ?");
+    fetch(res, 2, any_rows, q % text(id()));
+    if (res.size() > 0)
+      {
+        distance = lexical_cast<uint32_t>(res[0][0]);
+        size = lexical_cast<uint32_t>(res[0][1]);
+        return;
+      }
+  }
+  I(false);
+}
+
 void
 database::put_version(hexenc<id> const & old_id,
                       hexenc<id> const & new_id,
                       delta const & del,
+                      data const & dat,
                       string const & data_table,
                       string const & delta_table)
 {
   // TODO: add an invariant or something perhaps
+  static ticker put_full("full", "f", 1);
+  static ticker put_del("del", "d", 1);

   transaction_guard guard(*this);
-  put_delta(new_id, old_id, del, delta_table);
+  uint32_t parent_distance, parent_size;
+  get_distance(old_id, parent_distance, parent_size, data_table, delta_table);
+
+  if (parent_distance >= constants::max_delta_chain_length)
+    {
+      ++put_full;
+      put(new_id, dat, data_table);
+    }
+  else
+    {
+      ++put_del;
+      put_delta(new_id, old_id, del, parent_distance, parent_size, delta_table);
+    }
   guard.commit();
 }

@@ -1201,6 +1283,8 @@ database::remove_version(hexenc<id> cons
                          string const & data_table,
                          string const & delta_table)
 {
+  N(false, F("please don't remove versions for now."));
+#if 0
   // We have a one of two cases (for multiple 'older' nodes):
   //
   //    1.  pre:        older <- target <- newer
@@ -1271,6 +1355,7 @@ database::remove_version(hexenc<id> cons
     }

   guard.commit();
+#endif
 }


@@ -1415,9 +1500,11 @@ database::put_file_version(file_id const
 void
 database::put_file_version(file_id const & old_id,
                            file_id const & new_id,
-                           file_delta const & del)
+                           file_delta const & del,
+                           file_data const & dat)
 {
-  put_version(old_id.inner(), new_id.inner(), del.inner(),
+  put_version(old_id.inner(), new_id.inner(),
+              del.inner(), dat.inner(),
               "files", "file_deltas");
 }

@@ -1506,6 +1593,8 @@ database::make_fwd_deltas(string const &
 void
 database::make_fwd_deltas(string const & data_table, string const & delta_table)
 {
+  N(false, F("it's not documented, don't run it"));
+#if 0
   transaction_guard guard(*this);

   set< hexenc<id> > del_bases, all_ids;
@@ -1576,6 +1665,7 @@ database::make_fwd_deltas(string const &
   execute(query("DROP TABLE " + tmp_delta_table));

   guard.commit();
+#endif
 }

 void
@@ -1608,7 +1698,7 @@ database::deltify_revision(revision_id c
                 file_delta del(delt);
                 drop(delta_entry_dst(j).inner(), "files");
                 drop(delta_entry_dst(j).inner(), "file_deltas");
-                put_file_version(delta_entry_src(j), delta_entry_dst(j), del);
+                put_file_version(delta_entry_src(j), delta_entry_dst(j), del, new_data);
               }
           }
       }
@@ -1659,9 +1749,12 @@ database::put_revision(revision_id const
   base64<gzip<data> > d_packed;
   pack(d.inner(), d_packed);

-  execute(query("INSERT INTO revisions VALUES(?, ?)")
+  uint32_t d_size = d_packed().size();
+
+  execute(query("INSERT INTO revisions VALUES(?, ?, ?)")
           % text(new_id.inner()())
-          % text(d_packed()));
+          % text(d_packed())
+          % integer(d_size));

   for (edge_map::const_iterator e = rev.edges.begin();
        e != rev.edges.end(); ++e)
@@ -2804,7 +2897,7 @@ database::put_roster(revision_id const &
       get_version(old_id, old_data, data_table, delta_table);
       delta del;
       diff(old_data, new_data, del);
-      put_delta(new_id, old_id, del, delta_table);
+      put_version(old_id, new_id, del, new_data, data_table, delta_table);
       delta_written = true;
       break;
     }
============================================================
--- database.hh	4ea836110ca59b61655724a3eb2dcd1c9a5a34cb
+++ database.hh	b46b1ec4697ecd13dfe46d42078f7b514be8a022
@@ -137,13 +137,21 @@ class database
            std::string const & table);
   void drop(hexenc<id> const & base,
             std::string const & table);
-  void put_delta(hexenc<id> const & id,
+  void get_distance(hexenc<id> const & id,
+                    uint32_t & distance,
+                    uint32_t & size,
+                    std::string const & data_table,
+                    std::string const & delta_table);
+  void put_delta(hexenc<id> const & ident,
                  hexenc<id> const & base,
                  delta const & del,
+                 size_t parent_distance,
+                 size_t parent_size,
                  std::string const & table);
   void put_version(hexenc<id> const & old_id,
                    hexenc<id> const & new_id,
                    delta const & del,
+                   data const & dat,
                    std::string const & data_table,
                    std::string const & delta_table);
   void remove_version(hexenc<id> const & target_id,
@@ -245,7 +253,8 @@ public:
   // store new version and update old version to be a delta
   void put_file_version(file_id const & old_id,
                         file_id const & new_id,
-                        file_delta const & del);
+                        file_delta const & del,
+                        file_data const & dat);


   void get_file_delta(file_id const & id,
============================================================
--- packet.cc	06b93709b49bc581a5274afc7762f503abf89dc2
+++ packet.cc	8d4dbbc27b15701f8963858e708b37d85e5bb1b9
@@ -103,7 +103,7 @@ packet_db_writer::consume_file_delta(fil
   patch(old_dat.inner(), del.inner(), new_dat);
   calculate_ident(file_data(new_dat), confirm);
   if (confirm == new_id)
-    app.db.put_file_version(old_id, new_id, del);
+    app.db.put_file_version(old_id, new_id, del, new_dat);
   else
     {
       W(F("reconstructed file from delta '%s' -> '%s' has wrong id '%s'\n")
============================================================
--- rcs_import.cc	0de2b62e2c973f4c94592cff4990f0a43eb70294
+++ rcs_import.cc	7a1852b10d39b49f220b7746b8cbef6d6c76ca23
@@ -456,7 +456,8 @@ rcs_put_raw_file_edge(hexenc<id> const &
     {
       I(db.exists(new_id, "files")
         || db.delta_exists(new_id, "file_deltas"));
-      db.put_delta(old_id, new_id, del, "file_deltas");
+      N(false, F("cvs_import is broken here."));
+      //db.put_delta(old_id, new_id, del, "file_deltas");
     }
 }

============================================================
--- schema.sql	f44144278a4158695818d8f7e1901ac6f89e39bb
+++ schema.sql	5e315b54e2c1df1f315c3aff2ad9a551332debb7
@@ -22,21 +22,26 @@ CREATE TABLE files
 CREATE TABLE files
 	(
 	id primary key,   -- strong hash of file contents
-	data not null     -- compressed, encoded contents of a file
+	data not null,    -- compressed, encoded contents of a file
+	size integer not null     -- length(data)
 	);

 CREATE TABLE file_deltas
 	(
-	id not null,      -- strong hash of file contents
-	base not null,    -- joins with files.id or file_deltas.id
-	delta not null,   -- rdiff to construct current from base
+	id not null,          -- strong hash of file contents
+	base not null,        -- joins with files.id or file_deltas.id
+	delta not null,       -- rdiff to construct current from base
+	path_dist integer not null,   -- 1 if base is full, otherwise path_dist(base)+1
+	path_size integer not null,   -- size + size of base
+	size integer not null,        -- length(delta)
 	unique(id, base)
 	);

 CREATE TABLE manifests
 	(
 	id primary key,      -- strong hash of all the entries in a manifest
-	data not null        -- compressed, encoded contents of a manifest
+	data not null,       -- compressed, encoded contents of a manifest
+	size integer not null        -- length(data)
 	);

 CREATE TABLE manifest_deltas
@@ -44,13 +49,17 @@ CREATE TABLE manifest_deltas
 	id not null,         -- strong hash of all the entries in a manifest
 	base not null,       -- joins with either manifest.id or manifest_deltas.id
 	delta not null,      -- rdiff to construct current from base
+	path_dist integer not null,  -- 1 if base is full, otherwise path_dist(base)+1
+	path_size integer not null,  -- size + size of base
+	size integer not null,       -- length(delta)
 	unique(id, base)
 	);

 CREATE TABLE revisions
 	(
-	id primary key,      -- SHA1(text of revision)
-	data not null        -- compressed, encoded contents of a revision
+	id primary key,        -- SHA1(text of revision)
+	data not null,         -- compressed, encoded contents of a revision
+	size integer not null  -- length(data)
 	);

 CREATE TABLE revision_ancestry
@@ -63,7 +72,8 @@ CREATE TABLE rosters
 CREATE TABLE rosters
 	(
 	id primary key,         -- strong hash of the roster
-	data not null           -- compressed, encoded contents of the roster
+	data not null,          -- compressed, encoded contents of the roster
+	size integer not null   -- length(data)
 	);

 CREATE TABLE roster_deltas
@@ -71,6 +81,9 @@ CREATE TABLE roster_deltas
 	id not null,            -- strong hash of the roster
 	base not null,          -- joins with either rosters.id or roster_deltas.id
 	delta not null,         -- rdiff to construct current from base
+	path_dist integer not null,     -- 1 if base is full, otherwise path_dist(base)+1
+	path_size integer not null,     -- size + size of base
+	size integer not null,          -- length(delta)
 	unique(id, base)
 	);

============================================================
--- schema_migration.cc	a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b
+++ schema_migration.cc	9f3a9b8a4a509edf8af01c796cbc6588e902e888
@@ -943,5 +943,7 @@ migrate_monotone_schema(sqlite3 *sql, ap
   // also add a new migration test for the new schema version.  See
   // tests/t_migrate_schema.at for details.

-  m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e");
+  //m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e");
+  //m.migrate(sql, "8cd14946f11ae66218240f332ce416801db1e453");
+  m.migrate(sql, "b0987b874c2d348e9720ec11cf9618509b002618");
 }