The unified diff between revisions [2449c78b..] and [ebdccab0..] is displayed below. It can also be downloaded as a raw diff.
This diff has been restricted to the following files: 'enumerator.cc'
#
#
# patch "enumerator.cc"
# from [9cd3afa7ef779cb7b0c53a053a9567b57b59f660]
# to [33a06c9ebfb955efb197f5eafb2e8d9beaee8a1d]
#
============================================================
--- enumerator.cc 9cd3afa7ef779cb7b0c53a053a9567b57b59f660
+++ enumerator.cc 33a06c9ebfb955efb197f5eafb2e8d9beaee8a1d
@@ -27,10 +27,14 @@ revision_enumerator::revision_enumerator
set<revision_id> const & terminal)
: cb(cb), app(app), terminal_nodes(terminal)
{
+ // TODO: needs sorting out with the toposort stuff.
+ I(false);
+ /*
for (set<revision_id>::const_iterator i = initial.begin();
i != initial.end(); ++i)
revs.push_back(*i);
- load_graphs();
+ */
+ load_revs();
}
revision_enumerator::revision_enumerator(enumerator_callbacks & cb,
@@ -38,226 +42,188 @@ revision_enumerator::revision_enumerator
: cb(cb), app(app)
{
revision_id root;
- revs.push_back(root);
- load_graphs();
+ load_revs();
}
void
-revision_enumerator::load_graphs()
+revision_enumerator::load_revs()
{
- app.db.get_revision_ancestry(graph);
- for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
- i != graph.end(); ++i)
+ // TODO: this totally disrespects the initial and terminal stuff.
+ // It should probably be integrated into the toposort code itself.
+ vector<revision_id> topo_vec;
+ toposort(topo_vec, app.db);
+ for (vector<revision_id>::const_iterator r = topo_vec.begin();
+ r != topo_vec.end(); r++)
{
- inverse_graph.insert(make_pair(i->second, i->first));
+ if (null_id(*r))
+ continue;
+ topo_revs.push_back(*r);
}
}
-bool
-revision_enumerator::all_parents_enumerated(revision_id const & child)
-{
- typedef multimap<revision_id, revision_id>::const_iterator ci;
- pair<ci,ci> range = inverse_graph.equal_range(child);
- for (ci i = range.first; i != range.second; ++i)
- {
- if (i->first == child)
- {
- if (enumerated_nodes.find(i->second) == enumerated_nodes.end())
- return false;
- }
- }
- return true;
-}
-
bool
revision_enumerator::done()
{
- return revs.empty() && items.empty();
+ return topo_revs.empty() && items.empty();
}
void
-revision_enumerator::files_for_revision(revision_id const & r,
- set<file_id> & full_files,
- set<pair<file_id,file_id> > & del_files)
+revision_enumerator::process_bunch()
{
- // when we're sending a merge, we have to be careful if we
- // want to send as little data as possible. see bug #15846
- //
- // njs's solution: "when sending the files for a revision,
- // look at both csets. If a given hash is not listed as new
- // in _both_ csets, throw it out. Now, for everything left
- // over, if one side says "add" and the other says "delta",
- // do a delta. If both sides say "add", do a data."
+ // we build up a set of files and revs to send
+ vector<revision_id> bunch_revs;
+ set<file_id> bunch_files;
+ multimap<file_id, file_id> bunch_file_deltas;
- set<file_id> file_adds;
- // map<dst, src>. src is arbitrary.
- map<file_id, file_id> file_deltas;
- map<file_id, size_t> file_edge_counts;
+ // files that should be sent first
+ set<file_id> top_files;
+ // files that will be sent either as full files are as the dst of deltas
+ set<file_id> dst_files;
- revision_set rs;
- MM(rs);
- app.db.get_revision(r, rs);
+ while (bunch_revs.size() < 100 && !topo_revs.empty())
+ {
+ revision_id r = topo_revs.front();
+ topo_revs.pop_front();
+ if (!cb.process_this_rev(r))
+ continue;
- for (edge_map::const_iterator i = rs.edges.begin();
- i != rs.edges.end(); ++i)
- {
- set<file_id> file_dsts;
- cset const & cs = edge_changes(i);
-
- // Queue up all the file-adds
- for (map<split_path, file_id>::const_iterator fa = cs.files_added.begin();
- fa != cs.files_added.end(); ++fa)
+ bunch_revs.push_back(r);
+
+ revision_set rs;
+ app.db.get_revision(r, rs);
+ for (edge_map::const_iterator i = rs.edges.begin();
+ i != rs.edges.end(); ++i)
{
- file_adds.insert(fa->second);
- file_dsts.insert(fa->second);
- }
-
- // Queue up all the file-deltas
- for (map<split_path, std::pair<file_id, file_id> >::const_iterator fd
- = cs.deltas_applied.begin();
- fd != cs.deltas_applied.end(); ++fd)
- {
- file_deltas[fd->second.second] = fd->second.first;
- file_dsts.insert(fd->second.second);
- }
+ cset const & cs = edge_changes(i);
+
+ // Queue up all the file-adds
+ for (map<split_path, file_id>::const_iterator fa = cs.files_added.begin();
+ fa != cs.files_added.end(); ++fa)
+ {
+ if (cb.queue_this_file(fa->second.inner()))
+ {
+ dst_files.insert(fa->second);
+ top_files.insert(fa->second);
+ bunch_files.insert(fa->second);
+ }
+ }
+
+ // Queue up all the file-deltas
+ for (map<split_path, std::pair<file_id, file_id> >::const_iterator fd
+ = cs.deltas_applied.begin();
+ fd != cs.deltas_applied.end(); ++fd)
+ {
+ if (cb.queue_this_file(fd->second.second.inner()))
+ {
+ if (dst_files.find(fd->second.first) == dst_files.end())
+ {
+ top_files.insert(fd->second.first);
+ }
- // we don't want to be counting files twice in a single edge
- for (set<file_id>::const_iterator i = file_dsts.begin();
- i != file_dsts.end(); i++)
- file_edge_counts[*i]++;
+ bunch_file_deltas.insert(make_pair(fd->second.first,
+ fd->second.second));
+ dst_files.insert(fd->second.second);
+ }
+ }
+ }
}
- del_files.clear();
- full_files.clear();
- size_t num_edges = rs.edges.size();
+ // XXX required?
+ set<file_id> sent_files;
- for (map<file_id, size_t>::const_iterator i = file_edge_counts.begin();
- i != file_edge_counts.end(); i++)
+ // now we can queue up the file items in order.
+ for (set<file_id>::const_iterator t = top_files.begin();
+ t != top_files.end(); t++)
{
- MM(i->first);
- if (i->second < num_edges)
- continue;
-
- // first preference is to send as a delta...
- map<file_id, file_id>::const_iterator fd = file_deltas.find(i->first);
- if (fd != file_deltas.end())
+ if (sent_files.find(*t) != sent_files.end())
{
- del_files.insert(make_pair(fd->second, fd->first));
+ L(FL("already sent top_file %s") % t->inner()());
continue;
}
- // ... otherwise as a full file.
- set<file_id>::const_iterator f = file_adds.find(i->first);
- if (f != file_adds.end())
+ L(FL("top_file %s") % t->inner()());
+
+ if (bunch_files.find(*t) != bunch_files.end())
{
- full_files.insert(*f);
- continue;
+ // a full file to send.
+ enumerator_item item;
+ item.tag = enumerator_item::fdata;
+ item.ident_a = t->inner();
+ items.push_back(item);
+ sent_files.insert(*t);
+ L(FL("send full_file %s") % t->inner()());
}
- I(false);
- }
-}
+ // XXX: doing BFS now, should try DFS too.
+ deque<file_id> frontier;
+ frontier.push_back(*t);
-void
-revision_enumerator::step()
-{
- while (!done())
- {
- if (items.empty() && !revs.empty())
+ while (!frontier.empty())
{
- revision_id r = revs.front();
- revs.pop_front();
+ file_id f = frontier.front();
+ frontier.pop_front();
- // It's possible we've enumerated this node elsewhere since last
- // time around. Cull rather than reprocess.
- if (enumerated_nodes.find(r) != enumerated_nodes.end())
- continue;
-
- if (!all_parents_enumerated(r))
+ L(FL("frontier %s") % f.inner()());
+
+ for (multimap<file_id,file_id>::const_iterator
+ d = bunch_file_deltas.lower_bound(f);
+ d != bunch_file_deltas.upper_bound(f);
+ d++)
{
- revs.push_back(r);
- continue;
- }
-
- if (terminal_nodes.find(r) == terminal_nodes.end())
- {
- typedef multimap<revision_id, revision_id>::const_iterator ci;
- pair<ci,ci> range = graph.equal_range(r);
- for (ci i = range.first; i != range.second; ++i)
+ if (sent_files.find(d->second) != sent_files.end())
{
- if (i->first == r)
- if (enumerated_nodes.find(i->first) == enumerated_nodes.end())
- revs.push_back(i->second);
+ L(FL("already sent delta %s") % d->second.inner()());
+ continue;
}
- }
+ sent_files.insert(d->second);
+ frontier.push_back(d->second);
- enumerated_nodes.insert(r);
+ enumerator_item item;
+ item.tag = enumerator_item::fdelta;
+ item.ident_a = d->first.inner();
+ item.ident_b = d->second.inner();
+ items.push_back(item);
+ L(FL("file_delta %s->%s") % d->first.inner()() % d->second.inner()());
+ }
+ }
+ }
- if (null_id(r))
- continue;
+ // and the revs
+ for (vector<revision_id>::const_iterator r = bunch_revs.begin();
+ r != bunch_revs.end(); r++)
+ {
+ enumerator_item item;
+ item.tag = enumerator_item::rev;
+ item.ident_a = r->inner();
+ items.push_back(item);
- if (cb.process_this_rev(r))
+ // Queue up some or all of the rev's certs
+ vector<hexenc<id> > hashes;
+ app.db.get_revision_certs(*r, hashes);
+ for (vector<hexenc<id> >::const_iterator i = hashes.begin();
+ i != hashes.end(); ++i)
+ {
+ if (cb.queue_this_cert(*i))
{
- L(FL("revision_enumerator::step expanding "
- "contents of rev '%d'\n") % r);
-
- // The rev's files and fdeltas
- {
- set<file_id> full_files;
- set<pair<file_id, file_id> > del_files;
- files_for_revision(r, full_files, del_files);
-
- for (set<file_id>::const_iterator f = full_files.begin();
- f != full_files.end(); f++)
- {
- if (cb.queue_this_file(f->inner()))
- {
- enumerator_item item;
- item.tag = enumerator_item::fdata;
- item.ident_a = f->inner();
- items.push_back(item);
- }
- }
-
- for (set<pair<file_id, file_id> >::const_iterator fd = del_files.begin();
- fd != del_files.end(); fd++)
- {
- if (cb.queue_this_file(fd->second.inner()))
- {
- enumerator_item item;
- item.tag = enumerator_item::fdelta;
- item.ident_a = fd->first.inner();
- item.ident_b = fd->second.inner();
- items.push_back(item);
- }
- }
- }
-
- // Queue up the rev itself
- {
- enumerator_item item;
- item.tag = enumerator_item::rev;
- item.ident_a = r.inner();
- items.push_back(item);
- }
+ enumerator_item item;
+ item.tag = enumerator_item::cert;
+ item.ident_a = *i;
+ items.push_back(item);
}
-
- // Queue up some or all of the rev's certs
- vector<hexenc<id> > hashes;
- app.db.get_revision_certs(r, hashes);
- for (vector<hexenc<id> >::const_iterator i = hashes.begin();
- i != hashes.end(); ++i)
- {
- if (cb.queue_this_cert(*i))
- {
- enumerator_item item;
- item.tag = enumerator_item::cert;
- item.ident_a = *i;
- items.push_back(item);
- }
- }
}
+ }
+}
+void
+revision_enumerator::step()
+{
+ while (!done())
+ {
+ if (items.empty() && !topo_revs.empty())
+ {
+ process_bunch();
+ }
+
if (!items.empty())
{
L(FL("revision_enumerator::step extracting item\n"));