The unified diff between revisions [2937c475..] and [a8a22bf0..] is displayed below. It can also be downloaded as a raw diff.
#
#
# patch "automate.cc"
# from [8fa450b0e98f21120ab9d205c99e6f34247b7f88]
# to [413208df40261e59a12ed89232f779d2cefe61cb]
#
# patch "constants.cc"
# from [f2baab50e8fa565411d429fe4a5099f0266c3384]
# to [3c0ed351b2325b3c724fb07228219bc41eb9bed0]
#
# patch "constants.hh"
# from [b8396f4cd024f5c1fd0d1cbd1975d2359e3b7771]
# to [40dbe6df2f01da85fc107490f81c57b817e81ebb]
#
# patch "database.cc"
# from [3bc4262aa2cf230fa35529ceb8d0987492b1e332]
# to [801ea0af8be21f5a8d9ea7a5188fb23f18d71a18]
#
# patch "database.hh"
# from [8bf06c7857ef49664a75be7ff3eb029d2f0cd96e]
# to [5335968e56abf76c14ddcde168c495279a7dfa35]
#
# patch "enumerator.cc"
# from [9cd3afa7ef779cb7b0c53a053a9567b57b59f660]
# to [ac1b4a2627f91cb4ea304a5a6a7f5de363447d02]
#
# patch "enumerator.hh"
# from [6e6ecbf59d43a8e4bf4de07008a8795bd18cd960]
# to [5cc476581aed97dd306843e60c057ab1053cf0da]
#
# patch "netsync.cc"
# from [976872ee6fdd486c92367f0f9328abf442cb747d]
# to [5a944dff0472a57d79591781855ba4be24ffcf47]
#
# patch "packet.cc"
# from [06b93709b49bc581a5274afc7762f503abf89dc2]
# to [8d4dbbc27b15701f8963858e708b37d85e5bb1b9]
#
# patch "rcs_import.cc"
# from [0de2b62e2c973f4c94592cff4990f0a43eb70294]
# to [7a1852b10d39b49f220b7746b8cbef6d6c76ca23]
#
# patch "revision.cc"
# from [1c42e4092e1024cc29777a04dda9404262954df7]
# to [e7eab94a25eccd4b23ad441c43962e4fd2119a95]
#
# patch "revision.hh"
# from [1031cee8580d55528f5fb80e754fbf6e43dedb25]
# to [ee41c7c61df986d551aebd2445a6232150a9c8b0]
#
# patch "schema.sql"
# from [2ce34023fdde9799f435ad2f016f509aed3fa25e]
# to [7720a4e3d4a3eedfa5b55533f82964850d671d59]
#
# patch "schema_migration.cc"
# from [f820f10c5c4992df47354a9653d89a6de21c3a87]
# to [82c6cc2b8bed83ee6f7e8f7ae9907df7883661d7]
#
============================================================
--- automate.cc 8fa450b0e98f21120ab9d205c99e6f34247b7f88
+++ automate.cc 413208df40261e59a12ed89232f779d2cefe61cb
@@ -284,7 +284,7 @@ automate_toposort(std::vector<utf8> args
revs.insert(rid);
}
std::vector<revision_id> sorted;
- toposort(revs, sorted, app);
+ toposort(revs, sorted, app.db);
for (std::vector<revision_id>::const_iterator i = sorted.begin();
i != sorted.end(); ++i)
output << (*i).inner()() << std::endl;
@@ -330,7 +330,7 @@ automate_ancestry_difference(std::vector
ancestry_difference(a, bs, ancestors, app);
std::vector<revision_id> sorted;
- toposort(ancestors, sorted, app);
+ toposort(ancestors, sorted, app.db);
for (std::vector<revision_id>::const_iterator i = sorted.begin();
i != sorted.end(); ++i)
output << (*i).inner()() << std::endl;
============================================================
--- constants.cc f2baab50e8fa565411d429fe4a5099f0266c3384
+++ constants.cc 3c0ed351b2325b3c724fb07228219bc41eb9bed0
@@ -152,4 +152,7 @@ namespace constants
std::string const & netsync_key_initializer = std::string(netsync_session_key_length_in_bytes, 0);
+ size_t const max_delta_chain_length = 20;
+ float const max_delta_chain_size = 1.0;
+
}
============================================================
--- constants.hh b8396f4cd024f5c1fd0d1cbd1975d2359e3b7771
+++ constants.hh 40dbe6df2f01da85fc107490f81c57b817e81ebb
@@ -151,6 +151,12 @@ namespace constants
// netsync session key default initializer
extern std::string const & netsync_key_initializer;
+
+ // maximum length of a delta chain
+ extern size_t const max_delta_chain_length;
+
+ // maximum ratio of delta chain size vs original size
+ extern float const max_delta_chain_size;
}
#endif // __CONSTANTS_HH__
============================================================
--- database.cc 3bc4262aa2cf230fa35529ceb8d0987492b1e332
+++ database.cc 801ea0af8be21f5a8d9ea7a5188fb23f18d71a18
@@ -65,9 +65,10 @@ namespace
{
struct query_param
{
- enum arg_type { text, blob };
+ enum arg_type { text, blob, integer };
arg_type type;
std::string data;
+ uint32_t number;
};
query_param
@@ -76,6 +77,7 @@ namespace
query_param q = {
query_param::text,
txt,
+ 0,
};
return q;
}
@@ -86,10 +88,22 @@ namespace
query_param q = {
query_param::blob,
blb,
+ 0,
};
return q;
}
+ query_param
+ integer(uint32_t const & intg)
+ {
+ query_param q = {
+ query_param::integer,
+ "",
+ intg,
+ };
+ return q;
+ }
+
// track all open databases for close_all_databases() handler
set<sqlite3*> sql_contexts;
}
@@ -117,7 +131,7 @@ database::database(system_path const & f
// non-alphabetic ordering of tables in sql source files. we could create
// a temporary db, write our intended schema into it, and read it back,
// but this seems like it would be too rude. possibly revisit this issue.
- schema("9d2b5d7b86df00c30ac34fe87a3c20f1195bb2df"),
+ schema("e11e717741276b6969a186ac153a5f6e2012c000"),
__sql(NULL),
transaction_level(0)
{}
@@ -718,6 +732,12 @@ database::fetch(results & res,
SQLITE_STATIC);
}
break;
+ case query_param::integer:
+ {
+ uint32_t number = idx(query.args, param - 1).number;
+ sqlite3_bind_int(i->second.stmt(), param, number);
+ }
+ break;
default:
I(false);
}
@@ -825,6 +845,17 @@ database::delta_exists(hexenc<id> const
return res.size() > 0;
}
+bool
+database::delta_exists(hexenc<id> const & ident,
+ hexenc<id> const & base,
+ string const & table)
+{
+ results res;
+ query q("SELECT id FROM " + table + " WHERE id = ? AND base = ?");
+ fetch(res, one_col, any_rows, q % text(ident()) % text(base()));
+ return res.size() > 0;
+}
+
unsigned long
database::count(string const & table)
{
@@ -931,30 +962,38 @@ database::put(hexenc<id> const & ident,
gzip<data> dat_packed;
encode_gzip(dat, dat_packed);
+ uint32_t dat_size = dat_packed().size();
- string insert = "INSERT INTO " + table + " VALUES(?, ?)";
+ string insert = "INSERT INTO " + table + " VALUES(?, ?, ?)";
execute(query(insert)
% text(ident())
- % blob(dat_packed()));
+ % blob(dat_packed())
+ % integer(dat_size));
}
void
database::put_delta(hexenc<id> const & ident,
hexenc<id> const & base,
delta const & del,
+ size_t parent_distance,
+ size_t parent_size,
string const & table)
{
- // nb: delta schema is (id, base, delta)
- I(ident() != "");
- I(base() != "");
+ I(!null_id(ident));
+ I(!null_id(base));
gzip<delta> del_packed;
encode_gzip(del, del_packed);
- string insert = "INSERT INTO "+table+" VALUES(?, ?, ?)";
+ uint32_t del_size = del_packed().size();
+
+ string insert = "INSERT INTO "+table+" VALUES(?, ?, ?, ?, ?, ?)";
execute(query(insert)
% text(ident())
% text(base())
- % blob(del_packed()));
+ % blob(del_packed())
+ % integer(parent_distance + 1)
+ % integer(parent_size + del_size)
+ % integer(del_size));
}
// static ticker cache_hits("vcache hits", "h", 1);
@@ -1128,12 +1167,14 @@ database::get_version(hexenc<id> const &
{
hexenc<id> const nxt = *i;
+ /*
if (!vcache.exists(curr))
{
string tmp;
app->finish(tmp);
vcache.insert(curr, tmp);
}
+ */
L(FL("following delta %s -> %s\n") % curr % nxt);
delta del;
@@ -1164,30 +1205,68 @@ database::drop(hexenc<id> const & ident,
execute(query(drop) % text(ident()));
}
+void
+database::get_distance(hexenc<id> const & id,
+ uint32_t & distance,
+ uint32_t & size,
+ string const & data_table,
+ string const & delta_table)
+{
+ MM(id);
+ MM(data_table);
+ MM(delta_table);
+ {
+ results res;
+ query q("SELECT size FROM " + data_table + " WHERE id = ?");
+ fetch(res, one_col, any_rows, q % text(id()));
+ if (res.size() > 0)
+ {
+ distance = 0;
+ size = lexical_cast<uint32_t>(res[0][0]);
+ return;
+ }
+ }
+
+ {
+ results res;
+ query q("SELECT path_dist, path_size FROM " + delta_table + " WHERE id = ?");
+ fetch(res, 2, any_rows, q % text(id()));
+ if (res.size() > 0)
+ {
+ distance = lexical_cast<uint32_t>(res[0][0]);
+ size = lexical_cast<uint32_t>(res[0][1]);
+ return;
+ }
+ }
+ I(false);
+}
+
void
database::put_version(hexenc<id> const & old_id,
hexenc<id> const & new_id,
delta const & del,
+ data const & dat,
string const & data_table,
string const & delta_table)
{
+ // TODO: add an invariant or something perhaps
+ static ticker put_full("full", "f", 1);
+ static ticker put_del("del", "d", 1);
- data old_data, new_data;
- delta reverse_delta;
-
- get_version(old_id, old_data, data_table, delta_table);
- patch(old_data, del, new_data);
- diff(new_data, old_data, reverse_delta);
-
transaction_guard guard(*this);
- if (exists(old_id, data_table))
+ uint32_t parent_distance, parent_size;
+ get_distance(old_id, parent_distance, parent_size, data_table, delta_table);
+
+ if (parent_distance >= constants::max_delta_chain_length)
{
- // descendent of a head version replaces the head, therefore old head
- // must be disposed of
- drop(old_id, data_table);
+ ++put_full;
+ put(new_id, dat, data_table);
}
- put(new_id, new_data, data_table);
- put_delta(old_id, new_id, reverse_delta, delta_table);
+ else
+ {
+ ++put_del;
+ put_delta(new_id, old_id, del, parent_distance, parent_size, delta_table);
+ }
guard.commit();
}
@@ -1196,6 +1275,8 @@ database::remove_version(hexenc<id> cons
string const & data_table,
string const & delta_table)
{
+ N(false, F("please don't remove versions for now."));
+#if 0
// We have a one of two cases (for multiple 'older' nodes):
//
// 1. pre: older <- target <- newer
@@ -1275,6 +1356,7 @@ database::remove_version(hexenc<id> cons
}
guard.commit();
+#endif
}
@@ -1377,6 +1459,29 @@ database::get_file_version(file_id const
dat = tmp;
}
+void
+database::get_file_delta(file_id const & id,
+ file_id const & base,
+ file_delta & del)
+{
+ if (delta_exists(id.inner(), base.inner(), "file_deltas"))
+ {
+ delta d;
+ get_delta(id.inner(), base.inner(), d, "file_deltas");
+ del = file_delta(d);
+ }
+ else
+ {
+ L(FL("get_file_delta failed for '%s'->'%s'") % base % id);
+ file_data dat, base_dat;
+ get_file_version(base, base_dat);
+ get_file_version(id, dat);
+ delta d;
+ diff(base_dat.inner(), dat.inner(), d);
+ del = file_delta(d);
+ }
+}
+
void
database::get_manifest_version(manifest_id const & id,
manifest_data & dat)
@@ -1396,9 +1501,11 @@ database::put_file_version(file_id const
void
database::put_file_version(file_id const & old_id,
file_id const & new_id,
- file_delta const & del)
+ file_delta const & del,
+ file_data const & dat)
{
- put_version(old_id.inner(), new_id.inner(), del.inner(),
+ put_version(old_id.inner(), new_id.inner(),
+ del.inner(), dat.inner(),
"files", "file_deltas");
}
@@ -1483,6 +1590,84 @@ database::get_revision(revision_id const
dat = rdat;
}
+void
+database::make_fwd_deltas(string const & data_table, string const & delta_table)
+{
+ N(false, F("it's not documented, don't run it"));
+#if 0
+ transaction_guard guard(*this);
+
+ set< hexenc<id> > del_bases, all_ids;
+ set< pair< hexenc<id>, hexenc<id> > > dels;
+
+ // id, base
+ get_ids(data_table, all_ids);
+ {
+ results res;
+ fetch(res, 2, any_rows, query("SELECT id, base FROM " + delta_table));
+
+ for (size_t i = 0; i < res.size(); ++i)
+ {
+ all_ids.insert(hexenc<id>(res[i][0]));
+ del_bases.insert(hexenc<id>(res[i][1]));
+ dels.insert( make_pair(res[i][0], res[i][1]) );
+ }
+ }
+
+ set< hexenc<id> > new_bases;
+
+ set_difference(all_ids.begin(), all_ids.end(), del_bases.begin(), del_bases.end(),
+ inserter(new_bases, new_bases.begin()));
+
+ // create some empty temporary tables
+ string tmp_data_table("tmp_" + data_table);
+ string tmp_delta_table("tmp_" + delta_table);
+
+ execute(query("CREATE TABLE " + tmp_data_table + " AS SELECT * FROM " + data_table + " WHERE 1=0"));
+ execute(query("CREATE TABLE " + tmp_delta_table + " AS SELECT * FROM " + delta_table + " WHERE 1=0"));
+
+ ticker full("full", "g", 1);
+ for ( set< hexenc<id> >::const_iterator i = new_bases.begin(); i != new_bases.end(); i++)
+ {
+ MM(*i);
+ data dat;
+ get_version(*i, dat, data_table, delta_table);
+ put(*i, dat, tmp_data_table);
+ ++full;
+ }
+
+ ticker flips("flips", "f", 1);
+
+ for ( set< pair< hexenc<id>, hexenc<id> > >::const_iterator i = dels.begin(); i != dels.end(); i++)
+ {
+ ++flips;
+ hexenc<id> new_id = i->second;
+ hexenc<id> new_base = i->first;
+ MM(new_id);
+ MM(new_base);
+
+ data base, dat;
+ get_version(new_id, dat, data_table, delta_table);
+ get_version(new_base, base, data_table, delta_table);
+ delta del;
+ diff(base, dat, del);
+
+ put_delta(new_id, new_base, del, tmp_delta_table);
+ }
+
+ execute(query("DELETE FROM " + data_table));
+ execute(query("DELETE FROM " + delta_table));
+
+ execute(query("INSERT INTO " + data_table + " SELECT * FROM " + tmp_data_table));
+ execute(query("INSERT INTO " + delta_table + " SELECT * FROM " + tmp_delta_table));
+
+ execute(query("DROP TABLE " + tmp_data_table));
+ execute(query("DROP TABLE " + tmp_delta_table));
+
+ guard.commit();
+#endif
+}
+
void
database::deltify_revision(revision_id const & rid)
{
@@ -1513,7 +1698,7 @@ database::deltify_revision(revision_id c
file_delta del(delt);
drop(delta_entry_dst(j).inner(), "files");
drop(delta_entry_dst(j).inner(), "file_deltas");
- put_file_version(delta_entry_src(j), delta_entry_dst(j), del);
+ put_file_version(delta_entry_src(j), delta_entry_dst(j), del, new_data);
}
}
}
@@ -1563,9 +1748,13 @@ database::put_revision(revision_id const
gzip<data> d_packed;
encode_gzip(d.inner(), d_packed);
- execute(query("INSERT INTO revisions VALUES(?, ?)")
+
+ uint32_t d_size = d_packed().size();
+
+ execute(query("INSERT INTO revisions VALUES(?, ?, ?)")
% text(new_id.inner()())
- % blob(d_packed()));
+ % blob(d_packed())
+ % integer(d_size));
for (edge_map::const_iterator e = rev.edges.begin();
e != rev.edges.end(); ++e)
@@ -1575,7 +1764,8 @@ database::put_revision(revision_id const
% text(new_id.inner()()));
}
- deltify_revision(new_id);
+ // TODO: some different version?
+ // deltify_revision(new_id);
// Phase 4: write the roster data and commit
put_roster(new_id, ros, mm);
@@ -2656,7 +2846,6 @@ database::get_roster(revision_id const &
rcache.insert(rev_id, sp);
}
-
void
database::put_roster(revision_id const & rev_id,
roster_t & roster,
@@ -2664,7 +2853,6 @@ database::put_roster(revision_id const &
{
MM(rev_id);
data old_data, new_data;
- delta reverse_delta;
hexenc<id> old_id, new_id;
if (!rcache.exists(rev_id))
@@ -2700,13 +2888,13 @@ database::put_roster(revision_id const &
// Else we have a new roster the database hasn't seen yet; our task is to
// add it, and deltify all the incoming edges (if they aren't already).
- put(new_id, new_data, data_table);
std::set<revision_id> parents;
get_revision_parents(rev_id, parents);
// Now do what deltify would do if we bothered (we have the
// roster written now, so might as well do it here).
+ bool delta_written = false;
for (std::set<revision_id>::const_iterator i = parents.begin();
i != parents.end(); ++i)
{
@@ -2714,14 +2902,18 @@ database::put_roster(revision_id const &
continue;
revision_id old_rev = *i;
get_roster_id_for_revision(old_rev, old_id);
- if (exists(new_id, data_table))
- {
- get_version(old_id, old_data, data_table, delta_table);
- diff(new_data, old_data, reverse_delta);
- drop(old_id, data_table);
- put_delta(old_id, new_id, reverse_delta, delta_table);
- }
+ get_version(old_id, old_data, data_table, delta_table);
+ delta del;
+ diff(old_data, new_data, del);
+ put_version(old_id, new_id, del, new_data, data_table, delta_table);
+ delta_written = true;
+ break;
}
+
+ // in case the parents were all null/didn't exist
+ if (!delta_written)
+ put(new_id, new_data, data_table);
+
guard.commit();
}
============================================================
--- database.hh 8bf06c7857ef49664a75be7ff3eb029d2f0cd96e
+++ database.hh 5335968e56abf76c14ddcde168c495279a7dfa35
@@ -110,6 +110,9 @@ class database
std::string const & table);
bool delta_exists(hexenc<id> const & ident,
std::string const & table);
+ bool database::delta_exists(hexenc<id> const & ident,
+ hexenc<id> const & base,
+ std::string const & table);
unsigned long count(std::string const & table);
unsigned long space_usage(std::string const & table,
@@ -136,13 +139,21 @@ class database
std::string const & table);
void drop(hexenc<id> const & base,
std::string const & table);
- void put_delta(hexenc<id> const & id,
+ void get_distance(hexenc<id> const & id,
+ uint32_t & distance,
+ uint32_t & size,
+ std::string const & data_table,
+ std::string const & delta_table);
+ void put_delta(hexenc<id> const & ident,
hexenc<id> const & base,
delta const & del,
+ size_t parent_distance,
+ size_t parent_size,
std::string const & table);
void put_version(hexenc<id> const & old_id,
hexenc<id> const & new_id,
delta const & del,
+ data const & dat,
std::string const & data_table,
std::string const & delta_table);
void remove_version(hexenc<id> const & target_id,
@@ -245,8 +256,14 @@ public:
// store new version and update old version to be a delta
void put_file_version(file_id const & old_id,
file_id const & new_id,
- file_delta const & del);
+ file_delta const & del,
+ file_data const & dat);
+
+ void get_file_delta(file_id const & id,
+ file_id const & base,
+ file_delta & del);
+
// get plain version if it exists, or reconstruct version
// from deltas (if they exist).
void get_manifest_version(manifest_id const & id,
@@ -263,6 +280,8 @@ public:
void get_revision_manifest(revision_id const & cid,
manifest_id & mid);
+ void make_fwd_deltas(std::string const & data_table, std::string const & delta_table);
+
void deltify_revision(revision_id const & rid);
void get_revision(revision_id const & id,
============================================================
--- enumerator.cc 9cd3afa7ef779cb7b0c53a053a9567b57b59f660
+++ enumerator.cc ac1b4a2627f91cb4ea304a5a6a7f5de363447d02
@@ -27,10 +27,14 @@ revision_enumerator::revision_enumerator
set<revision_id> const & terminal)
: cb(cb), app(app), terminal_nodes(terminal)
{
+ // TODO: needs sorting out with the toposort stuff.
+ I(false);
+ /*
for (set<revision_id>::const_iterator i = initial.begin();
i != initial.end(); ++i)
revs.push_back(*i);
- load_graphs();
+ */
+ load_revs();
}
revision_enumerator::revision_enumerator(enumerator_callbacks & cb,
@@ -38,41 +42,29 @@ revision_enumerator::revision_enumerator
: cb(cb), app(app)
{
revision_id root;
- revs.push_back(root);
- load_graphs();
+ load_revs();
}
void
-revision_enumerator::load_graphs()
+revision_enumerator::load_revs()
{
- app.db.get_revision_ancestry(graph);
- for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
- i != graph.end(); ++i)
+ // TODO: this totally disrespects the initial and terminal stuff.
+ // It should probably be integrated into the toposort code itself.
+ vector<revision_id> topo_vec;
+ toposort(topo_vec, app.db);
+ for (vector<revision_id>::const_iterator r = topo_vec.begin();
+ r != topo_vec.end(); r++)
{
- inverse_graph.insert(make_pair(i->second, i->first));
+ if (null_id(*r))
+ continue;
+ topo_revs.push_back(*r);
}
}
-bool
-revision_enumerator::all_parents_enumerated(revision_id const & child)
-{
- typedef multimap<revision_id, revision_id>::const_iterator ci;
- pair<ci,ci> range = inverse_graph.equal_range(child);
- for (ci i = range.first; i != range.second; ++i)
- {
- if (i->first == child)
- {
- if (enumerated_nodes.find(i->second) == enumerated_nodes.end())
- return false;
- }
- }
- return true;
-}
-
bool
revision_enumerator::done()
{
- return revs.empty() && items.empty();
+ return topo_revs.empty() && items.empty();
}
void
@@ -90,7 +82,7 @@ revision_enumerator::files_for_revision(
// do a delta. If both sides say "add", do a data."
set<file_id> file_adds;
- // map<dst, src>. src is arbitrary.
+ // map<dst, src>. src is arbitrary if there are multiples.
map<file_id, file_id> file_deltas;
map<file_id, size_t> file_edge_counts;
@@ -135,6 +127,7 @@ revision_enumerator::files_for_revision(
i != file_edge_counts.end(); i++)
{
MM(i->first);
+ L(FL("edge %s count %d") % i->first.inner()() % i->second);
if (i->second < num_edges)
continue;
@@ -158,106 +151,154 @@ revision_enumerator::files_for_revision(
}
}
-void
-revision_enumerator::step()
+void
+revision_enumerator::process_bunch()
{
- while (!done())
+ // we build up a set of files and revs to send
+ vector<revision_id> bunch_revs;
+ set<file_id> bunch_files;
+ multimap<file_id, file_id> bunch_file_deltas;
+
+ // files that should be sent first
+ set<file_id> top_files;
+ // files that will be sent either as full files are as the dst of deltas
+ set<file_id> dst_files;
+
+ while (bunch_revs.size() < 100 && !topo_revs.empty())
{
- if (items.empty() && !revs.empty())
+ revision_id r = topo_revs.front();
+ topo_revs.pop_front();
+ if (!cb.process_this_rev(r))
+ continue;
+
+ bunch_revs.push_back(r);
+
+ set<file_id> full_files;
+ set<pair<file_id, file_id> > del_files;
+
+ files_for_revision(r, full_files, del_files);
+
+ for (set<file_id>::const_iterator f = full_files.begin();
+ f != full_files.end(); f++)
{
- revision_id r = revs.front();
- revs.pop_front();
+ L(FL("full_file %s") % f->inner()());
+ bunch_files.insert(*f);
+ top_files.insert(*f);
+ dst_files.insert(*f);
+ }
- // It's possible we've enumerated this node elsewhere since last
- // time around. Cull rather than reprocess.
- if (enumerated_nodes.find(r) != enumerated_nodes.end())
- continue;
-
- if (!all_parents_enumerated(r))
+ for (set<pair<file_id,file_id> >::const_iterator fd = del_files.begin();
+ fd != del_files.end(); fd++)
+ {
+ L(FL("del_file %s->%s") % fd->first.inner()() % fd->second.inner()());
+ file_id src(fd->first);
+ file_id dst(fd->second);
+ bunch_file_deltas.insert(make_pair(fd->first, fd->second));
+ if (dst_files.find(src) == dst_files.end())
{
- revs.push_back(r);
- continue;
+ L(FL("added del_file to top_files"));
+ top_files.insert(src);
}
-
- if (terminal_nodes.find(r) == terminal_nodes.end())
- {
- typedef multimap<revision_id, revision_id>::const_iterator ci;
- pair<ci,ci> range = graph.equal_range(r);
- for (ci i = range.first; i != range.second; ++i)
- {
- if (i->first == r)
- if (enumerated_nodes.find(i->first) == enumerated_nodes.end())
- revs.push_back(i->second);
- }
- }
+ dst_files.insert(dst);
+ }
+ }
- enumerated_nodes.insert(r);
+ // XXX required?
+ set<file_id> sent_files;
- if (null_id(r))
- continue;
+ // now we can queue up the file items in order.
+ for (set<file_id>::const_iterator t = top_files.begin();
+ t != top_files.end(); t++)
+ {
+ if (sent_files.find(*t) != sent_files.end())
+ {
+ L(FL("already sent top_file %s") % t->inner()());
+ continue;
+ }
- if (cb.process_this_rev(r))
- {
- L(FL("revision_enumerator::step expanding "
- "contents of rev '%d'\n") % r);
+ L(FL("top_file %s") % t->inner()());
- // The rev's files and fdeltas
- {
- set<file_id> full_files;
- set<pair<file_id, file_id> > del_files;
- files_for_revision(r, full_files, del_files);
+ if (bunch_files.find(*t) != bunch_files.end())
+ {
+ // a full file to send.
+ enumerator_item item;
+ item.tag = enumerator_item::fdata;
+ item.ident_a = t->inner();
+ items.push_back(item);
+ sent_files.insert(*t);
+ L(FL("send full_file %s") % t->inner()());
+ }
- for (set<file_id>::const_iterator f = full_files.begin();
- f != full_files.end(); f++)
- {
- if (cb.queue_this_file(f->inner()))
- {
- enumerator_item item;
- item.tag = enumerator_item::fdata;
- item.ident_a = f->inner();
- items.push_back(item);
- }
- }
+ // XXX: doing BFS now, should try DFS too.
+ deque<file_id> frontier;
+ frontier.push_back(*t);
- for (set<pair<file_id, file_id> >::const_iterator fd = del_files.begin();
- fd != del_files.end(); fd++)
- {
- if (cb.queue_this_file(fd->second.inner()))
- {
- enumerator_item item;
- item.tag = enumerator_item::fdelta;
- item.ident_a = fd->first.inner();
- item.ident_b = fd->second.inner();
- items.push_back(item);
- }
- }
- }
+ while (!frontier.empty())
+ {
+ file_id f = frontier.front();
+ frontier.pop_front();
- // Queue up the rev itself
- {
- enumerator_item item;
- item.tag = enumerator_item::rev;
- item.ident_a = r.inner();
- items.push_back(item);
- }
- }
-
- // Queue up some or all of the rev's certs
- vector<hexenc<id> > hashes;
- app.db.get_revision_certs(r, hashes);
- for (vector<hexenc<id> >::const_iterator i = hashes.begin();
- i != hashes.end(); ++i)
+ L(FL("frontier %s") % f.inner()());
+
+ for (multimap<file_id,file_id>::const_iterator
+ d = bunch_file_deltas.lower_bound(f);
+ d != bunch_file_deltas.upper_bound(f);
+ d++)
{
- if (cb.queue_this_cert(*i))
+ if (sent_files.find(d->second) != sent_files.end())
{
- enumerator_item item;
- item.tag = enumerator_item::cert;
- item.ident_a = *i;
- items.push_back(item);
+ L(FL("already sent delta %s") % d->second.inner()());
+ continue;
}
+ sent_files.insert(d->second);
+ frontier.push_back(d->second);
+
+ enumerator_item item;
+ item.tag = enumerator_item::fdelta;
+ item.ident_a = d->first.inner();
+ item.ident_b = d->second.inner();
+ items.push_back(item);
+ L(FL("file_delta %s->%s") % d->first.inner()() % d->second.inner()());
}
}
+ }
+ // and the revs
+ for (vector<revision_id>::const_iterator r = bunch_revs.begin();
+ r != bunch_revs.end(); r++)
+ {
+ enumerator_item item;
+ item.tag = enumerator_item::rev;
+ item.ident_a = r->inner();
+ items.push_back(item);
+
+ // Queue up some or all of the rev's certs
+ vector<hexenc<id> > hashes;
+ app.db.get_revision_certs(*r, hashes);
+ for (vector<hexenc<id> >::const_iterator i = hashes.begin();
+ i != hashes.end(); ++i)
+ {
+ if (cb.queue_this_cert(*i))
+ {
+ enumerator_item item;
+ item.tag = enumerator_item::cert;
+ item.ident_a = *i;
+ items.push_back(item);
+ }
+ }
+ }
+}
+
+void
+revision_enumerator::step()
+{
+ while (!done())
+ {
+ if (items.empty() && !topo_revs.empty())
+ {
+ process_bunch();
+ }
+
if (!items.empty())
{
L(FL("revision_enumerator::step extracting item\n"));
============================================================
--- enumerator.hh 6e6ecbf59d43a8e4bf4de07008a8795bd18cd960
+++ enumerator.hh 5cc476581aed97dd306843e60c057ab1053cf0da
@@ -50,10 +50,8 @@ revision_enumerator
app_state & app;
std::set<revision_id> terminal_nodes;
std::set<revision_id> enumerated_nodes;
- std::deque<revision_id> revs;
std::deque<enumerator_item> items;
- std::multimap<revision_id, revision_id> graph;
- std::multimap<revision_id, revision_id> inverse_graph;
+ std::deque<revision_id> topo_revs;
revision_enumerator(enumerator_callbacks & cb,
app_state & app,
@@ -61,11 +59,11 @@ revision_enumerator
std::set<revision_id> const & terminal);
revision_enumerator(enumerator_callbacks & cb,
app_state & app);
- void load_graphs();
- bool all_parents_enumerated(revision_id const & child);
- void files_for_revision(revision_id const & r,
- std::set<file_id> & full_files,
- std::set<std::pair<file_id,file_id> > & del_files);
+ void load_revs();
+ void revision_enumerator::files_for_revision(revision_id const & r,
+ std::set<file_id> & full_files,
+ std::set<std::pair<file_id,file_id> > & del_files);
+ void process_bunch();
void step();
bool done();
};
============================================================
--- netsync.cc 976872ee6fdd486c92367f0f9328abf442cb747d
+++ netsync.cc 5a944dff0472a57d79591781855ba4be24ffcf47
@@ -608,15 +608,12 @@ session::note_file_delta(file_id const &
{
if (role == sink_role)
return;
- file_data fd1, fd2;
- delta del;
- id fid1, fid2;
- decode_hexenc(src.inner(), fid1);
- decode_hexenc(dst.inner(), fid2);
- app.db.get_file_version(src, fd1);
- app.db.get_file_version(dst, fd2);
- diff(fd1.inner(), fd2.inner(), del);
- queue_delta_cmd(file_item, fid1, fid2, del);
+ file_delta del;
+ app.db.get_file_delta(dst, src, del);
+ id src_id, dst_id;
+ decode_hexenc(src.inner(), src_id);
+ decode_hexenc(dst.inner(), dst_id);
+ queue_delta_cmd(file_item, src_id, dst_id, del.inner());
file_items_sent.insert(dst);
}
============================================================
--- packet.cc 06b93709b49bc581a5274afc7762f503abf89dc2
+++ packet.cc 8d4dbbc27b15701f8963858e708b37d85e5bb1b9
@@ -103,7 +103,7 @@ packet_db_writer::consume_file_delta(fil
patch(old_dat.inner(), del.inner(), new_dat);
calculate_ident(file_data(new_dat), confirm);
if (confirm == new_id)
- app.db.put_file_version(old_id, new_id, del);
+ app.db.put_file_version(old_id, new_id, del, new_dat);
else
{
W(F("reconstructed file from delta '%s' -> '%s' has wrong id '%s'\n")
============================================================
--- rcs_import.cc 0de2b62e2c973f4c94592cff4990f0a43eb70294
+++ rcs_import.cc 7a1852b10d39b49f220b7746b8cbef6d6c76ca23
@@ -456,7 +456,8 @@ rcs_put_raw_file_edge(hexenc<id> const &
{
I(db.exists(new_id, "files")
|| db.delta_exists(new_id, "file_deltas"));
- db.put_delta(old_id, new_id, del, "file_deltas");
+ N(false, F("cvs_import is broken here."));
+ //db.put_delta(old_id, new_id, del, "file_deltas");
}
}
============================================================
--- revision.cc 1c42e4092e1024cc29777a04dda9404262954df7
+++ revision.cc e7eab94a25eccd4b23ad441c43962e4fd2119a95
@@ -361,17 +361,15 @@ void
// passed in set. if anyone ever needs to toposort the whole graph, then,
// this function would be a good thing to generalize...
void
-toposort(std::set<revision_id> const & revisions,
- std::vector<revision_id> & sorted,
- app_state & app)
+toposort(std::vector<revision_id> & sorted, database & db)
{
sorted.clear();
typedef std::multimap<revision_id, revision_id>::iterator gi;
typedef std::map<revision_id, int>::iterator pi;
std::multimap<revision_id, revision_id> graph;
- app.db.get_revision_ancestry(graph);
+ db.get_revision_ancestry(graph);
std::set<revision_id> leaves;
- app.db.get_revision_ids(leaves);
+ db.get_revision_ids(leaves);
std::map<revision_id, int> pcount;
for (gi i = graph.begin(); i != graph.end(); ++i)
pcount.insert(std::make_pair(i->first, 0));
@@ -387,8 +385,7 @@ toposort(std::set<revision_id> const & r
// now stick them in our ordering (if wanted) and remove them from the
// graph, calculating the new roots as we go
L(FL("new root: %s\n") % (roots.front()));
- if (revisions.find(roots.front()) != revisions.end())
- sorted.push_back(roots.front());
+ sorted.push_back(roots.front());
for(gi i = graph.lower_bound(roots.front());
i != graph.upper_bound(roots.front()); i++)
if(--(pcount[i->second]) == 0)
@@ -402,6 +399,21 @@ toposort(std::set<revision_id> const & r
i != leaves.end(); ++i)
{
L(FL("new leaf: %s\n") % (*i));
+ sorted.push_back(*i);
+ }
+}
+
+void
+toposort(std::set<revision_id> const & revisions,
+ std::vector<revision_id> & sorted,
+ database & db)
+{
+ std::vector<revision_id> all;
+ toposort(all, db);
+ sorted.clear();
+ for (std::vector<revision_id>::const_iterator i = all.begin();
+ i != all.end(); i++)
+ {
if (revisions.find(*i) != revisions.end())
sorted.push_back(*i);
}
============================================================
--- revision.hh 1031cee8580d55528f5fb80e754fbf6e43dedb25
+++ revision.hh ee41c7c61df986d551aebd2445a6232150a9c8b0
@@ -125,9 +125,13 @@ toposort(std::set<revision_id> const & r
void
toposort(std::set<revision_id> const & revisions,
std::vector<revision_id> & sorted,
- app_state & app);
+ database & db);
void
+toposort(std::vector<revision_id> & sorted,
+ database & db);
+
+void
erase_ancestors(std::set<revision_id> & revisions, app_state & app);
void
============================================================
--- schema.sql 2ce34023fdde9799f435ad2f016f509aed3fa25e
+++ schema.sql 7720a4e3d4a3eedfa5b55533f82964850d671d59
@@ -23,36 +23,45 @@ CREATE TABLE files
CREATE TABLE files
(
- id primary key, -- strong hash of file contents
- data not null -- compressed contents of a file
- );
+ id primary key, -- strong hash of file contents
+ data not null, -- compressed contents of a file
+ size integer not null -- length(data)
+ );
CREATE TABLE file_deltas
(
- id not null, -- strong hash of file contents
- base not null, -- joins with files.id or file_deltas.id
- delta not null, -- compressed rdiff to construct current from base
+ id not null, -- strong hash of file contents
+ base not null, -- joins with files.id or file_deltas.id
+ delta not null, -- compressed rdiff to construct current from base
+ path_dist integer not null, -- 1 if base is full, otherwise path_dist(base)+1
+ path_size integer not null, -- size + size of base
+ size integer not null, -- length(delta)
unique(id, base)
);
CREATE TABLE manifests
(
- id primary key, -- strong hash of all the entries in a manifest
- data not null -- compressed, encoded contents of a manifest
+ id primary key, -- strong hash of all the entries in a manifest
+ data not null, -- compressed, encoded contents of a manifest
+ size integer not null -- length(data)
);
CREATE TABLE manifest_deltas
(
- id not null, -- strong hash of all the entries in a manifest
- base not null, -- joins with either manifest.id or manifest_deltas.id
- delta not null, -- rdiff to construct current from base
+ id not null, -- strong hash of all the entries in a manifest
+ base not null, -- joins with either manifest.id or manifest_deltas.id
+ delta not null, -- rdiff to construct current from base
+ path_dist integer not null, -- 1 if base is full, otherwise path_dist(base)+1
+ path_size integer not null, -- size + size of base
+ size integer not null, -- length(delta)
unique(id, base)
);
CREATE TABLE revisions
(
- id primary key, -- SHA1(text of revision)
- data not null -- compressed, encoded contents of a revision
+ id primary key, -- SHA1(text of revision)
+ data not null, -- compressed, encoded contents of a revision
+ size integer not null -- length(data)
);
CREATE TABLE revision_ancestry
@@ -65,14 +74,18 @@ CREATE TABLE rosters
CREATE TABLE rosters
(
id primary key, -- strong hash of the roster
- data not null -- compressed, encoded contents of the roster
+ data not null, -- compressed, encoded contents of the roster
+ size integer not null -- length(data)
);
CREATE TABLE roster_deltas
(
- id not null, -- strong hash of the roster
- base not null, -- joins with either rosters.id or roster_deltas.id
- delta not null, -- rdiff to construct current from base
+ id not null, -- strong hash of the roster
+ base not null, -- joins with either rosters.id or roster_deltas.id
+ delta not null, -- rdiff to construct current from base
+ path_dist integer not null, -- 1 if base is full, otherwise path_dist(base)+1
+ path_size integer not null, -- size + size of base
+ size integer not null, -- length(delta)
unique(id, base)
);
============================================================
--- schema_migration.cc f820f10c5c4992df47354a9653d89a6de21c3a87
+++ schema_migration.cc 82c6cc2b8bed83ee6f7e8f7ae9907df7883661d7
@@ -1083,5 +1083,6 @@ migrate_monotone_schema(sqlite3 *sql, ap
// also add a new migration test for the new schema version. See
// tests/t_migrate_schema.at for details.
- m.migrate(sql, "9d2b5d7b86df00c30ac34fe87a3c20f1195bb2df");
+ // XXX: there's no migration code to get to this.
+ m.migrate(sql, "e11e717741276b6969a186ac153a5f6e2012c000");
}