The unified diff between revisions [9b19e788..] and [b954a1fc..] is displayed below. It can also be downloaded as a raw diff.
#
#
# patch "constants.cc"
# from [c7bc2142cf0e9861c9fd744f458da6e8c7a0323e]
# to [c6acb6030061b1152370509c2775de8e93cc2ba7]
#
# patch "constants.hh"
# from [528788cf84a56e076de55706b0fb989f570a6e9f]
# to [7f0cbbead461f925ac421d3303932a9fd326815e]
#
# patch "database.cc"
# from [377449cef66018ee4619b66e5c08dae01129df89]
# to [2fc44fac19123c10272d86d0295b91f58a8ad6b1]
#
# patch "database.hh"
# from [4ea836110ca59b61655724a3eb2dcd1c9a5a34cb]
# to [b46b1ec4697ecd13dfe46d42078f7b514be8a022]
#
# patch "packet.cc"
# from [06b93709b49bc581a5274afc7762f503abf89dc2]
# to [8d4dbbc27b15701f8963858e708b37d85e5bb1b9]
#
# patch "rcs_import.cc"
# from [0de2b62e2c973f4c94592cff4990f0a43eb70294]
# to [7a1852b10d39b49f220b7746b8cbef6d6c76ca23]
#
# patch "schema.sql"
# from [f44144278a4158695818d8f7e1901ac6f89e39bb]
# to [5e315b54e2c1df1f315c3aff2ad9a551332debb7]
#
# patch "schema_migration.cc"
# from [a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b]
# to [9f3a9b8a4a509edf8af01c796cbc6588e902e888]
#
============================================================
--- constants.cc c7bc2142cf0e9861c9fd744f458da6e8c7a0323e
+++ constants.cc c6acb6030061b1152370509c2775de8e93cc2ba7
@@ -150,4 +150,7 @@ namespace constants
std::string const & netsync_key_initializer = std::string(netsync_session_key_length_in_bytes, 0);
+ size_t const max_delta_chain_length = 20;
+ float const max_delta_chain_size = 1.0;
+
}
============================================================
--- constants.hh 528788cf84a56e076de55706b0fb989f570a6e9f
+++ constants.hh 7f0cbbead461f925ac421d3303932a9fd326815e
@@ -148,6 +148,12 @@ namespace constants
// netsync session key default initializer
extern std::string const & netsync_key_initializer;
+
+ // maximum length of a delta chain
+ extern size_t const max_delta_chain_length;
+
+ // maximum ratio of delta chain size vs original size
+ extern float const max_delta_chain_size;
}
#endif // __CONSTANTS_HH__
============================================================
--- database.cc 377449cef66018ee4619b66e5c08dae01129df89
+++ database.cc 2fc44fac19123c10272d86d0295b91f58a8ad6b1
@@ -62,9 +62,10 @@ namespace
{
struct query_param
{
- enum arg_type { text, blob };
+ enum arg_type { text, blob, integer };
arg_type type;
std::string data;
+ uint32_t number;
};
query_param
@@ -73,6 +74,7 @@ namespace
query_param q = {
query_param::text,
txt,
+ 0,
};
return q;
}
@@ -83,10 +85,22 @@ namespace
query_param q = {
query_param::blob,
blb,
+ 0,
};
return q;
}
+ query_param
+ integer(uint32_t const & intg)
+ {
+ query_param q = {
+ query_param::integer,
+ "",
+ intg,
+ };
+ return q;
+ }
+
// track all open databases for close_all_databases() handler
set<sqlite3*> sql_contexts;
}
@@ -120,7 +134,7 @@ database::database(system_path const & f
// non-alphabetic ordering of tables in sql source files. we could create
// a temporary db, write our intended schema into it, and read it back,
// but this seems like it would be too rude. possibly revisit this issue.
- schema("1db80c7cee8fa966913db1a463ed50bf1b0e5b0e"),
+ schema("b0987b874c2d348e9720ec11cf9618509b002618"),
__sql(NULL),
transaction_level(0)
{}
@@ -696,6 +710,12 @@ database::fetch(results & res,
SQLITE_STATIC);
}
break;
+ case query_param::integer:
+ {
+ uint32_t number = idx(query.args, param - 1).number;
+ sqlite3_bind_int(i->second.stmt(), param, number);
+ }
+ break;
default:
I(false);
}
@@ -898,30 +918,39 @@ database::put(hexenc<id> const & ident,
base64<gzip<data> > dat_packed;
pack(dat, dat_packed);
+
+ uint32_t dat_size = dat_packed().size();
- string insert = "INSERT INTO " + table + " VALUES(?, ?)";
+ string insert = "INSERT INTO " + table + " VALUES(?, ?, ?)";
execute(query(insert)
% text(ident())
- % text(dat_packed()));
+ % text(dat_packed())
+ % integer(dat_size));
}
void
database::put_delta(hexenc<id> const & ident,
hexenc<id> const & base,
delta const & del,
+ size_t parent_distance,
+ size_t parent_size,
string const & table)
{
- // nb: delta schema is (id, base, delta)
- I(ident() != "");
- I(base() != "");
+ I(!null_id(ident));
+ I(!null_id(base));
base64<gzip<delta> > del_packed;
pack(del, del_packed);
+
+ uint32_t del_size = del_packed().size();
- string insert = "INSERT INTO "+table+" VALUES(?, ?, ?)";
+ string insert = "INSERT INTO "+table+" VALUES(?, ?, ?, ?, ?, ?)";
execute(query(insert)
% text(ident())
% text(base())
- % text(del_packed()));
+ % text(del_packed())
+ % integer(parent_distance + 1)
+ % integer(parent_size + del_size)
+ % integer(del_size));
}
// static ticker cache_hits("vcache hits", "h", 1);
@@ -1146,12 +1175,14 @@ database::get_version(hexenc<id> const &
{
hexenc<id> const nxt = *i;
+ /*
if (!vcache.exists(curr))
{
string tmp;
app->finish(tmp);
vcache.put(curr, tmp);
}
+ */
L(FL("following delta %s -> %s\n") % curr % nxt);
delta del;
@@ -1182,17 +1213,68 @@ database::drop(hexenc<id> const & ident,
execute(query(drop) % text(ident()));
}
+void
+database::get_distance(hexenc<id> const & id,
+ uint32_t & distance,
+ uint32_t & size,
+ string const & data_table,
+ string const & delta_table)
+{
+ MM(id);
+ MM(data_table);
+ MM(delta_table);
+ {
+ results res;
+ query q("SELECT size FROM " + data_table + " WHERE id = ?");
+ fetch(res, one_col, any_rows, q % text(id()));
+ if (res.size() > 0)
+ {
+ distance = 0;
+ size = lexical_cast<uint32_t>(res[0][0]);
+ return;
+ }
+ }
+
+ {
+ results res;
+ query q("SELECT path_dist, path_size FROM " + delta_table + " WHERE id = ?");
+ fetch(res, 2, any_rows, q % text(id()));
+ if (res.size() > 0)
+ {
+ distance = lexical_cast<uint32_t>(res[0][0]);
+ size = lexical_cast<uint32_t>(res[0][1]);
+ return;
+ }
+ }
+ I(false);
+}
+
void
database::put_version(hexenc<id> const & old_id,
hexenc<id> const & new_id,
delta const & del,
+ data const & dat,
string const & data_table,
string const & delta_table)
{
// TODO: add an invariant or something perhaps
+ static ticker put_full("full", "f", 1);
+ static ticker put_del("del", "d", 1);
transaction_guard guard(*this);
- put_delta(new_id, old_id, del, delta_table);
+ uint32_t parent_distance, parent_size;
+ get_distance(old_id, parent_distance, parent_size, data_table, delta_table);
+
+ if (parent_distance >= constants::max_delta_chain_length)
+ {
+ ++put_full;
+ put(new_id, dat, data_table);
+ }
+ else
+ {
+ ++put_del;
+ put_delta(new_id, old_id, del, parent_distance, parent_size, delta_table);
+ }
guard.commit();
}
@@ -1201,6 +1283,8 @@ database::remove_version(hexenc<id> cons
string const & data_table,
string const & delta_table)
{
+ N(false, F("please don't remove versions for now."));
+#if 0
// We have a one of two cases (for multiple 'older' nodes):
//
// 1. pre: older <- target <- newer
@@ -1271,6 +1355,7 @@ database::remove_version(hexenc<id> cons
}
guard.commit();
+#endif
}
@@ -1415,9 +1500,11 @@ database::put_file_version(file_id const
void
database::put_file_version(file_id const & old_id,
file_id const & new_id,
- file_delta const & del)
+ file_delta const & del,
+ file_data const & dat)
{
- put_version(old_id.inner(), new_id.inner(), del.inner(),
+ put_version(old_id.inner(), new_id.inner(),
+ del.inner(), dat.inner(),
"files", "file_deltas");
}
@@ -1506,6 +1593,8 @@ database::make_fwd_deltas(string const &
void
database::make_fwd_deltas(string const & data_table, string const & delta_table)
{
+ N(false, F("it's not documented, don't run it"));
+#if 0
transaction_guard guard(*this);
set< hexenc<id> > del_bases, all_ids;
@@ -1576,6 +1665,7 @@ database::make_fwd_deltas(string const &
execute(query("DROP TABLE " + tmp_delta_table));
guard.commit();
+#endif
}
void
@@ -1608,7 +1698,7 @@ database::deltify_revision(revision_id c
file_delta del(delt);
drop(delta_entry_dst(j).inner(), "files");
drop(delta_entry_dst(j).inner(), "file_deltas");
- put_file_version(delta_entry_src(j), delta_entry_dst(j), del);
+ put_file_version(delta_entry_src(j), delta_entry_dst(j), del, new_data);
}
}
}
@@ -1659,9 +1749,12 @@ database::put_revision(revision_id const
base64<gzip<data> > d_packed;
pack(d.inner(), d_packed);
- execute(query("INSERT INTO revisions VALUES(?, ?)")
+ uint32_t d_size = d_packed().size();
+
+ execute(query("INSERT INTO revisions VALUES(?, ?, ?)")
% text(new_id.inner()())
- % text(d_packed()));
+ % text(d_packed())
+ % integer(d_size));
for (edge_map::const_iterator e = rev.edges.begin();
e != rev.edges.end(); ++e)
@@ -2804,7 +2897,7 @@ database::put_roster(revision_id const &
get_version(old_id, old_data, data_table, delta_table);
delta del;
diff(old_data, new_data, del);
- put_delta(new_id, old_id, del, delta_table);
+ put_version(old_id, new_id, del, new_data, data_table, delta_table);
delta_written = true;
break;
}
============================================================
--- database.hh 4ea836110ca59b61655724a3eb2dcd1c9a5a34cb
+++ database.hh b46b1ec4697ecd13dfe46d42078f7b514be8a022
@@ -137,13 +137,21 @@ class database
std::string const & table);
void drop(hexenc<id> const & base,
std::string const & table);
- void put_delta(hexenc<id> const & id,
+ void get_distance(hexenc<id> const & id,
+ uint32_t & distance,
+ uint32_t & size,
+ std::string const & data_table,
+ std::string const & delta_table);
+ void put_delta(hexenc<id> const & ident,
hexenc<id> const & base,
delta const & del,
+ size_t parent_distance,
+ size_t parent_size,
std::string const & table);
void put_version(hexenc<id> const & old_id,
hexenc<id> const & new_id,
delta const & del,
+ data const & dat,
std::string const & data_table,
std::string const & delta_table);
void remove_version(hexenc<id> const & target_id,
@@ -245,7 +253,8 @@ public:
// store new version and update old version to be a delta
void put_file_version(file_id const & old_id,
file_id const & new_id,
- file_delta const & del);
+ file_delta const & del,
+ file_data const & dat);
void get_file_delta(file_id const & id,
============================================================
--- packet.cc 06b93709b49bc581a5274afc7762f503abf89dc2
+++ packet.cc 8d4dbbc27b15701f8963858e708b37d85e5bb1b9
@@ -103,7 +103,7 @@ packet_db_writer::consume_file_delta(fil
patch(old_dat.inner(), del.inner(), new_dat);
calculate_ident(file_data(new_dat), confirm);
if (confirm == new_id)
- app.db.put_file_version(old_id, new_id, del);
+ app.db.put_file_version(old_id, new_id, del, new_dat);
else
{
W(F("reconstructed file from delta '%s' -> '%s' has wrong id '%s'\n")
============================================================
--- rcs_import.cc 0de2b62e2c973f4c94592cff4990f0a43eb70294
+++ rcs_import.cc 7a1852b10d39b49f220b7746b8cbef6d6c76ca23
@@ -456,7 +456,8 @@ rcs_put_raw_file_edge(hexenc<id> const &
{
I(db.exists(new_id, "files")
|| db.delta_exists(new_id, "file_deltas"));
- db.put_delta(old_id, new_id, del, "file_deltas");
+ N(false, F("cvs_import is broken here."));
+ //db.put_delta(old_id, new_id, del, "file_deltas");
}
}
============================================================
--- schema.sql f44144278a4158695818d8f7e1901ac6f89e39bb
+++ schema.sql 5e315b54e2c1df1f315c3aff2ad9a551332debb7
@@ -22,21 +22,26 @@ CREATE TABLE files
CREATE TABLE files
(
id primary key, -- strong hash of file contents
- data not null -- compressed, encoded contents of a file
+ data not null, -- compressed, encoded contents of a file
+ size integer not null -- length(data)
);
CREATE TABLE file_deltas
(
- id not null, -- strong hash of file contents
- base not null, -- joins with files.id or file_deltas.id
- delta not null, -- rdiff to construct current from base
+ id not null, -- strong hash of file contents
+ base not null, -- joins with files.id or file_deltas.id
+ delta not null, -- rdiff to construct current from base
+ path_dist integer not null, -- 1 if base is full, otherwise path_dist(base)+1
+ path_size integer not null, -- size + size of base
+ size integer not null, -- length(delta)
unique(id, base)
);
CREATE TABLE manifests
(
id primary key, -- strong hash of all the entries in a manifest
- data not null -- compressed, encoded contents of a manifest
+ data not null, -- compressed, encoded contents of a manifest
+ size integer not null -- length(data)
);
CREATE TABLE manifest_deltas
@@ -44,13 +49,17 @@ CREATE TABLE manifest_deltas
id not null, -- strong hash of all the entries in a manifest
base not null, -- joins with either manifest.id or manifest_deltas.id
delta not null, -- rdiff to construct current from base
+ path_dist integer not null, -- 1 if base is full, otherwise path_dist(base)+1
+ path_size integer not null, -- size + size of base
+ size integer not null, -- length(delta)
unique(id, base)
);
CREATE TABLE revisions
(
- id primary key, -- SHA1(text of revision)
- data not null -- compressed, encoded contents of a revision
+ id primary key, -- SHA1(text of revision)
+ data not null, -- compressed, encoded contents of a revision
+ size integer not null -- length(data)
);
CREATE TABLE revision_ancestry
@@ -63,7 +72,8 @@ CREATE TABLE rosters
CREATE TABLE rosters
(
id primary key, -- strong hash of the roster
- data not null -- compressed, encoded contents of the roster
+ data not null, -- compressed, encoded contents of the roster
+ size integer not null -- length(data)
);
CREATE TABLE roster_deltas
@@ -71,6 +81,9 @@ CREATE TABLE roster_deltas
id not null, -- strong hash of the roster
base not null, -- joins with either rosters.id or roster_deltas.id
delta not null, -- rdiff to construct current from base
+ path_dist integer not null, -- 1 if base is full, otherwise path_dist(base)+1
+ path_size integer not null, -- size + size of base
+ size integer not null, -- length(delta)
unique(id, base)
);
============================================================
--- schema_migration.cc a13abc3a8c750aadb1ad07114c8f5b0cfc0cc67b
+++ schema_migration.cc 9f3a9b8a4a509edf8af01c796cbc6588e902e888
@@ -943,5 +943,7 @@ migrate_monotone_schema(sqlite3 *sql, ap
// also add a new migration test for the new schema version. See
// tests/t_migrate_schema.at for details.
- m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e");
+ //m.migrate(sql, "1db80c7cee8fa966913db1a463ed50bf1b0e5b0e");
+ //m.migrate(sql, "8cd14946f11ae66218240f332ce416801db1e453");
+ m.migrate(sql, "b0987b874c2d348e9720ec11cf9618509b002618");
}