The unified diff between revisions [1b076c97..] and [2235bcdf..] is displayed below. It can also be downloaded as a raw diff.
#
#
# patch "automate.cc"
# from [68d579fc833b4a0ec52a0c8e5b130af7517ab7ed]
# to [d86c347f85ee7ce7a59f699c8e51feaa1fa57bfb]
#
# patch "commands.cc"
# from [fcdf6b54a899e0273098115c0cdf496f39edaaa1]
# to [b3a8d32282111f7f405d1c250e9a22dddad24aab]
#
# patch "constants.cc"
# from [c7bc2142cf0e9861c9fd744f458da6e8c7a0323e]
# to [f6f6f11ced5f076aa9c362b2c11148af3a2d9556]
#
# patch "database.cc"
# from [a4b5de4f4123d5b7729f5017e6a26d14d3df7b70]
# to [a7f6d1a9f789180312488e66637df2832f11a4fb]
#
# patch "database.hh"
# from [fa72d07116fc7a0b5744291d97ef3b1eff93ec8c]
# to [c33e9d5d1f292ec396c695121530347a065aceaf]
#
# patch "packet.cc"
# from [06b93709b49bc581a5274afc7762f503abf89dc2]
# to [c7fa83e44be5244218995087ba98814c0548bbf4]
#
# patch "revision.cc"
# from [8cda143f1b14ea5d67e40c9dc325b5ec6f2fd1a7]
# to [77ddcf19bb049ae3ca74b008441bcaa7079232d5]
#
# patch "revision.hh"
# from [5e3733994a6c92c4f15f0d78aa26669458443d46]
# to [c6f7e1c8ac336bdec754f0fd1b822124e58eeb5f]
#
============================================================
--- automate.cc 68d579fc833b4a0ec52a0c8e5b130af7517ab7ed
+++ automate.cc d86c347f85ee7ce7a59f699c8e51feaa1fa57bfb
@@ -283,7 +283,7 @@ automate_toposort(std::vector<utf8> args
revs.insert(rid);
}
std::vector<revision_id> sorted;
- toposort(revs, sorted, app);
+ toposort(revs, sorted, app.db);
for (std::vector<revision_id>::const_iterator i = sorted.begin();
i != sorted.end(); ++i)
output << (*i).inner()() << std::endl;
@@ -329,7 +329,7 @@ automate_ancestry_difference(std::vector
ancestry_difference(a, bs, ancestors, app);
std::vector<revision_id> sorted;
- toposort(ancestors, sorted, app);
+ toposort(ancestors, sorted, app.db);
for (std::vector<revision_id>::const_iterator i = sorted.begin();
i != sorted.end(); ++i)
output << (*i).inner()() << std::endl;
============================================================
--- commands.cc fcdf6b54a899e0273098115c0cdf496f39edaaa1
+++ commands.cc b3a8d32282111f7f405d1c250e9a22dddad24aab
@@ -2191,6 +2191,10 @@ CMD(db, N_("database"),
build_changesets_from_manifest_ancestry(app);
else if (idx(args, 0)() == "rosterify")
build_roster_style_revs_from_manifest_style_revs(app);
+ else if (idx(args, 0)() == "against_base")
+ {
+ app.db.make_all_fwd();
+ }
else
throw usage(name);
}
============================================================
--- constants.cc c7bc2142cf0e9861c9fd744f458da6e8c7a0323e
+++ constants.cc f6f6f11ced5f076aa9c362b2c11148af3a2d9556
@@ -35,7 +35,7 @@ namespace constants
// the value of 7 MB was determined as the optimal point after timing
// various values with a pull of the monotone repository - it could
// be tweaked further.
- size_t const db_version_cache_sz = 7 * (1 << 20);
+ size_t const db_version_cache_sz = 200 * (1 << 20);
// size of a line of text in the log buffer, beyond which log lines will be
// truncated.
============================================================
--- database.cc a4b5de4f4123d5b7729f5017e6a26d14d3df7b70
+++ database.cc a7f6d1a9f789180312488e66637df2832f11a4fb
@@ -37,6 +37,7 @@
#include "vocab.hh"
#include "xdelta.hh"
#include "epoch.hh"
+#include "revision.hh"
// defined in schema.sql, converted to header:
#include "schema.h"
@@ -777,6 +778,14 @@ database::exists(hexenc<id> const & iden
results res;
query q("SELECT id FROM " + table + " WHERE id = ?");
fetch(res, one_col, any_rows, q % text(ident()));
+ if (res.size() > 1)
+ {
+ for (results::const_iterator i = res.begin();
+ i != res.end(); i++)
+ {
+ L(FL("%s") % (*i)[0]);
+ }
+ }
I((res.size() == 1) || (res.size() == 0));
return res.size() == 1;
}
@@ -933,7 +942,7 @@ struct version_cache
}
I(i != cache.end());
I(use >= i->second().size());
- L(FL("version cache expiring %s\n") % i->first);
+ //L(FL("version cache expiring %s\n") % i->first);
use -= i->second().size();
cache.erase(i->first);
}
@@ -955,7 +964,7 @@ struct version_cache
if (i == cache.end())
return false;
// ++cache_hits;
- L(FL("version cache hit on %s\n") % ident);
+ //L(FL("version cache hit on %s\n") % ident);
dat = i->second;
return true;
}
@@ -1161,30 +1170,61 @@ database::drop(hexenc<id> const & ident,
execute(query(drop) % text(ident()));
}
+// insert the given new data using old_id as a hint
+// as to ancestry
void
database::put_version(hexenc<id> const & old_id,
hexenc<id> const & new_id,
- delta const & del,
+ data const & new_dat,
string const & data_table,
string const & delta_table)
{
-
- data old_data, new_data;
- delta reverse_delta;
-
- get_version(old_id, old_data, data_table, delta_table);
- patch(old_data, del, new_data);
- diff(new_data, old_data, reverse_delta);
-
transaction_guard guard(*this);
+
+ if (exists(new_id, data_table) || exists(new_id, delta_table))
+ return;
+
+ hexenc<id> base_id;
+ MM(base_id);
if (exists(old_id, data_table))
{
- // descendent of a head version replaces the head, therefore old head
- // must be disposed of
- drop(old_id, data_table);
+ base_id = old_id;
}
- put(new_id, new_data, data_table);
- put_delta(old_id, new_id, reverse_delta, delta_table);
+ else
+ {
+ // XXX: this relies on single step deltas, should probably be something
+ // more like get_version()'s path following.
+ string delta_query = "SELECT base FROM " + delta_table + " WHERE id = ?";
+ results res;
+ fetch(res, one_col, any_rows, query(delta_query) % text(old_id()));
+ I(res.size() != 0);
+
+ base_id = hexenc<id>(res[0][0]);
+ I(exists(base_id, data_table));
+ }
+
+ data base_dat;
+ get(base_id, base_dat, data_table);
+ delta del;
+ diff(base_dat, new_dat, del);
+
+ static ticker full("full", "f", 1);
+ static ticker against("ag", "g", 1);
+
+ // TODO: size comparison stuff.
+
+ if (del().size() < 0.15 * new_dat().size())
+ {
+ ++against;
+ L(FL("put_version del %s -> %s (%s)") % base_id % new_id % delta_table);
+ put_delta(new_id, base_id, del, delta_table);
+ }
+ else
+ {
+ ++full;
+ L(FL("put_version dat %s (%s)") % new_id % data_table);
+ put(new_id, new_dat, data_table);
+ }
guard.commit();
}
@@ -1193,6 +1233,7 @@ database::remove_version(hexenc<id> cons
string const & data_table,
string const & delta_table)
{
+ E(false, F("needs updating for against-base"));
// We have a one of two cases (for multiple 'older' nodes):
//
// 1. pre: older <- target <- newer
@@ -1384,9 +1425,9 @@ database::put_file_version(file_id const
void
database::put_file_version(file_id const & old_id,
file_id const & new_id,
- file_delta const & del)
+ file_data const & dat)
{
- put_version(old_id.inner(), new_id.inner(), del.inner(),
+ put_version(old_id.inner(), new_id.inner(), dat.inner(),
"files", "file_deltas");
}
@@ -1473,44 +1514,126 @@ void
}
void
-database::deltify_revision(revision_id const & rid)
+database::make_all_fwd()
{
transaction_guard guard(*this);
- revision_set rev;
- MM(rev);
- MM(rid);
- get_revision(rid, rev);
- // Make sure that all parent revs have their files replaced with deltas
- // from this rev's files.
- {
- for (edge_map::const_iterator i = rev.edges.begin();
- i != rev.edges.end(); ++i)
- {
- for (std::map<split_path, std::pair<file_id, file_id> >::const_iterator
- j = edge_changes(i).deltas_applied.begin();
- j != edge_changes(i).deltas_applied.end(); ++j)
- {
- if (exists(delta_entry_src(j).inner(), "files") &&
- file_version_exists(delta_entry_dst(j)))
- {
- file_data old_data;
- file_data new_data;
- get_file_version(delta_entry_src(j), old_data);
- get_file_version(delta_entry_dst(j), new_data);
- delta delt;
- diff(old_data.inner(), new_data.inner(), delt);
- file_delta del(delt);
- drop(delta_entry_dst(j).inner(), "files");
- drop(delta_entry_dst(j).inner(), "file_deltas");
- put_file_version(delta_entry_src(j), delta_entry_dst(j), del);
- }
- }
- }
- }
+
+ // create some empty temporary tables
+ string tmp_file_data("tmp_files");
+ string tmp_file_deltas("tmp_file_deltas");
+ string tmp_roster_data("tmp_rosters");
+ string tmp_roster_deltas("tmp_roster_deltas");
+ execute(query("CREATE TABLE tmp_files AS SELECT * FROM files WHERE 1=0"));
+ execute(query("CREATE TABLE tmp_file_deltas AS SELECT * FROM file_deltas WHERE 1=0"));
+ execute(query("CREATE TABLE tmp_rosters AS SELECT * FROM rosters WHERE 1=0"));
+ execute(query("CREATE TABLE tmp_roster_deltas AS SELECT * FROM roster_deltas WHERE 1=0"));
+
+ vector<revision_id> sorted;
+ toposort(sorted, *this);
+
+ ticker revs("rev", "r", 1);
+ for (vector<revision_id>::const_iterator i = sorted.begin();
+ i != sorted.end(); i++)
+ {
+ revision_id r(*i);
+ MM(r);
+ if (null_id(r))
+ continue;
+ ++revs;
+
+ revision_set rs;
+ get_revision(r, rs);
+ hexenc<id> new_rost_id;
+ MM(new_rost_id);
+ get_roster_id_for_revision(r, new_rost_id);
+
+ for (edge_map::const_iterator i = rs.edges.begin();
+ i != rs.edges.end(); ++i)
+ {
+
+ // rosters
+ if (!(exists(new_rost_id, tmp_roster_data)
+ || exists(new_rost_id, tmp_roster_deltas)))
+ {
+ data new_rost_dat;
+ get_roster(new_rost_id, new_rost_dat);
+
+ revision_id old_rev = edge_old_revision(i);
+ if (null_id(old_rev))
+ {
+ L(FL("put roster %s") % new_rost_id);
+ put(new_rost_id, new_rost_dat, tmp_roster_data);
+ }
+ else
+ {
+ hexenc<id> old_rost_id;
+ MM(old_rost_id);
+ get_roster_id_for_revision(old_rev, old_rost_id);
+ put_version(old_rost_id, new_rost_id, new_rost_dat,
+ tmp_roster_data, tmp_roster_deltas);
+ }
+ }
+
+
+ // the file data.
+ // we'll be slack about the same add on multiple sides,
+ // put() and put_version() can NOP those out.
+ cset const & cs = edge_changes(i);
+ MM(cs);
+ // new additions
+ for (map<split_path, file_id>::const_iterator fa = cs.files_added.begin();
+ fa != cs.files_added.end(); ++fa)
+ {
+ if (exists(fa->second.inner(), tmp_file_data)
+ || exists(fa->second.inner(), tmp_file_deltas))
+ continue;
+
+ file_data fdat;
+ get_file_version(fa->second, fdat);
+ L(FL("put file %s") % fa->second.inner());
+ put(fa->second.inner(), fdat.inner(), tmp_file_data);
+ }
+
+ // deltas
+ for (map<split_path, std::pair<file_id, file_id> >::const_iterator fd
+ = cs.deltas_applied.begin();
+ fd != cs.deltas_applied.end(); ++fd)
+ {
+ file_id src(fd->second.first);
+ file_id dst(fd->second.second);
+ MM(src);
+ MM(dst);
+ file_data src_dat, dst_dat;
+ get_file_version(src, src_dat);
+ get_file_version(dst, dst_dat);
+
+ delta del;
+ diff(src_dat.inner(), dst_dat.inner(), del);
+
+ put_version(src.inner(), dst.inner(), dst_dat.inner(),
+ tmp_file_data, tmp_file_deltas);
+ }
+ }
+ }
+
+ execute(query("DELETE FROM files"));
+ execute(query("DELETE FROM file_deltas"));
+ execute(query("DELETE FROM rosters"));
+ execute(query("DELETE FROM roster_deltas"));
+
+ execute(query("INSERT INTO files SELECT * FROM " + tmp_file_data));
+ execute(query("INSERT INTO file_deltas SELECT * FROM " + tmp_file_deltas));
+ execute(query("INSERT INTO rosters SELECT * FROM " + tmp_roster_data));
+ execute(query("INSERT INTO roster_deltas SELECT * FROM " + tmp_roster_deltas));
+
+ execute(query("DROP TABLE " + tmp_file_data));
+ execute(query("DROP TABLE " + tmp_file_deltas));
+ execute(query("DROP TABLE " + tmp_roster_data));
+ execute(query("DROP TABLE " + tmp_roster_deltas));
+
guard.commit();
}
-
void
database::put_revision(revision_id const & new_id,
revision_set const & rev)
@@ -1565,8 +1688,6 @@ database::put_revision(revision_id const
% text(new_id.inner()()));
}
- deltify_revision(new_id);
-
// Phase 4: write the roster data and commit
put_roster(new_id, ros, mm);
@@ -2652,8 +2773,7 @@ database::put_roster(revision_id const &
marking_map & marks)
{
MM(rev_id);
- data old_data, new_data;
- delta reverse_delta;
+ data new_data;
hexenc<id> old_id, new_id;
write_roster_and_marking(roster, marks, new_data);
@@ -2680,30 +2800,29 @@ database::put_roster(revision_id const &
}
// Else we have a new roster the database hasn't seen yet; our task is to
- // add it, and deltify all the incoming edges (if they aren't already).
+ // add it.
- put(new_id, new_data, data_table);
-
std::set<revision_id> parents;
get_revision_parents(rev_id, parents);
- // Now do what deltify would do if we bothered (we have the
- // roster written now, so might as well do it here).
+ // we need to make a delta, any parent will do
+ bool written = false;
for (std::set<revision_id>::const_iterator i = parents.begin();
i != parents.end(); ++i)
{
if (null_id(*i))
continue;
- revision_id old_rev = *i;
- get_roster_id_for_revision(old_rev, old_id);
- if (exists(new_id, data_table))
- {
- get_version(old_id, old_data, data_table, delta_table);
- diff(new_data, old_data, reverse_delta);
- drop(old_id, data_table);
- put_delta(old_id, new_id, reverse_delta, delta_table);
- }
+ get_roster_id_for_revision(*i, old_id);
+ put_version(old_id, new_id, new_data, data_table, delta_table);
+ written = true;
+ break;
}
+
+ if (!written)
+ {
+ put(new_id, new_data, data_table);
+ }
+
guard.commit();
}
@@ -2897,11 +3016,20 @@ transaction_guard::maybe_checkpoint(size
void
transaction_guard::maybe_checkpoint(size_t nbytes)
{
+ static ticker size("siz", "s", 1);
+ static ticker bytes("byt", "b", 1);
checkpointed_calls += 1;
checkpointed_bytes += nbytes;
if (checkpointed_calls >= checkpoint_batch_size
|| checkpointed_bytes >= checkpoint_batch_bytes)
- do_checkpoint();
+ {
+ if (checkpointed_calls >= checkpoint_batch_size)
+ ++size;
+ if (checkpointed_bytes >= checkpoint_batch_bytes)
+ ++bytes;
+
+ do_checkpoint();
+ }
}
void
============================================================
--- database.hh fa72d07116fc7a0b5744291d97ef3b1eff93ec8c
+++ database.hh c33e9d5d1f292ec396c695121530347a065aceaf
@@ -140,7 +140,7 @@ class database
std::string const & table);
void put_version(hexenc<id> const & old_id,
hexenc<id> const & new_id,
- delta const & del,
+ data const & new_dat,
std::string const & data_table,
std::string const & delta_table);
void remove_version(hexenc<id> const & target_id,
@@ -238,10 +238,10 @@ public:
void put_file(file_id const & new_id,
file_data const & dat);
- // store new version and update old version to be a delta
+ // store delta to new data. can be called
void put_file_version(file_id const & old_id,
file_id const & new_id,
- file_delta const & del);
+ file_data const & dat);
// get plain version if it exists, or reconstruct version
// from deltas (if they exist).
@@ -259,7 +259,7 @@ public:
void get_revision_manifest(revision_id const & cid,
manifest_id & mid);
- void deltify_revision(revision_id const & rid);
+ void make_all_fwd();
void get_revision(revision_id const & id,
revision_set & cs);
============================================================
--- packet.cc 06b93709b49bc581a5274afc7762f503abf89dc2
+++ packet.cc c7fa83e44be5244218995087ba98814c0548bbf4
@@ -103,7 +103,7 @@ packet_db_writer::consume_file_delta(fil
patch(old_dat.inner(), del.inner(), new_dat);
calculate_ident(file_data(new_dat), confirm);
if (confirm == new_id)
- app.db.put_file_version(old_id, new_id, del);
+ app.db.put_file_version(old_id, new_id, new_dat);
else
{
W(F("reconstructed file from delta '%s' -> '%s' has wrong id '%s'\n")
============================================================
--- revision.cc 8cda143f1b14ea5d67e40c9dc325b5ec6f2fd1a7
+++ revision.cc 77ddcf19bb049ae3ca74b008441bcaa7079232d5
@@ -361,17 +361,15 @@ void
// passed in set. if anyone ever needs to toposort the whole graph, then,
// this function would be a good thing to generalize...
void
-toposort(std::set<revision_id> const & revisions,
- std::vector<revision_id> & sorted,
- app_state & app)
+toposort(std::vector<revision_id> & sorted, database & db)
{
sorted.clear();
typedef std::multimap<revision_id, revision_id>::iterator gi;
typedef std::map<revision_id, int>::iterator pi;
std::multimap<revision_id, revision_id> graph;
- app.db.get_revision_ancestry(graph);
+ db.get_revision_ancestry(graph);
std::set<revision_id> leaves;
- app.db.get_revision_ids(leaves);
+ db.get_revision_ids(leaves);
std::map<revision_id, int> pcount;
for (gi i = graph.begin(); i != graph.end(); ++i)
pcount.insert(std::make_pair(i->first, 0));
@@ -387,8 +385,7 @@ toposort(std::set<revision_id> const & r
// now stick them in our ordering (if wanted) and remove them from the
// graph, calculating the new roots as we go
L(FL("new root: %s\n") % (roots.front()));
- if (revisions.find(roots.front()) != revisions.end())
- sorted.push_back(roots.front());
+ sorted.push_back(roots.front());
for(gi i = graph.lower_bound(roots.front());
i != graph.upper_bound(roots.front()); i++)
if(--(pcount[i->second]) == 0)
@@ -402,6 +399,21 @@ toposort(std::set<revision_id> const & r
i != leaves.end(); ++i)
{
L(FL("new leaf: %s\n") % (*i));
+ sorted.push_back(*i);
+ }
+}
+
+void
+toposort(std::set<revision_id> const & revisions,
+ std::vector<revision_id> & sorted,
+ database & db)
+{
+ std::vector<revision_id> all;
+ toposort(all, db);
+ sorted.clear();
+ for (std::vector<revision_id>::const_iterator i = all.begin();
+ i != all.end(); i++)
+ {
if (revisions.find(*i) != revisions.end())
sorted.push_back(*i);
}
============================================================
--- revision.hh 5e3733994a6c92c4f15f0d78aa26669458443d46
+++ revision.hh c6f7e1c8ac336bdec754f0fd1b822124e58eeb5f
@@ -125,9 +125,13 @@ toposort(std::set<revision_id> const & r
void
toposort(std::set<revision_id> const & revisions,
std::vector<revision_id> & sorted,
- app_state & app);
+ database & db);
void
+toposort(std::vector<revision_id> & sorted,
+ database & db);
+
+void
erase_ancestors(std::set<revision_id> & revisions, app_state & app);
void