The unified diff between revisions [2449c78b..] and [ebdccab0..] is displayed below. It can also be downloaded as a raw diff.

This diff has been restricted to the following files: 'enumerator.cc'

#
#
# patch "enumerator.cc"
#  from [9cd3afa7ef779cb7b0c53a053a9567b57b59f660]
#    to [33a06c9ebfb955efb197f5eafb2e8d9beaee8a1d]
#
============================================================
--- enumerator.cc	9cd3afa7ef779cb7b0c53a053a9567b57b59f660
+++ enumerator.cc	33a06c9ebfb955efb197f5eafb2e8d9beaee8a1d
@@ -27,10 +27,14 @@ revision_enumerator::revision_enumerator
                                          set<revision_id> const & terminal)
   : cb(cb), app(app), terminal_nodes(terminal)
 {
+  // TODO: needs sorting out with the toposort stuff.
+  I(false);
+  /*
   for (set<revision_id>::const_iterator i = initial.begin();
        i != initial.end(); ++i)
     revs.push_back(*i);
-  load_graphs();
+    */
+  load_revs();
 }

 revision_enumerator::revision_enumerator(enumerator_callbacks & cb,
@@ -38,226 +42,188 @@ revision_enumerator::revision_enumerator
   : cb(cb), app(app)
 {
   revision_id root;
-  revs.push_back(root);
-  load_graphs();
+  load_revs();
 }

 void
-revision_enumerator::load_graphs()
+revision_enumerator::load_revs()
 {
-  app.db.get_revision_ancestry(graph);
-  for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
-       i != graph.end(); ++i)
+  // TODO: this totally disrespects the initial and terminal stuff.
+  // It should probably be integrated into the toposort code itself.
+  vector<revision_id> topo_vec;
+  toposort(topo_vec, app.db);
+  for (vector<revision_id>::const_iterator r = topo_vec.begin();
+       r != topo_vec.end(); r++)
     {
-      inverse_graph.insert(make_pair(i->second, i->first));
+      if (null_id(*r))
+        continue;
+      topo_revs.push_back(*r);
     }
 }

-bool
-revision_enumerator::all_parents_enumerated(revision_id const & child)
-{
-  typedef multimap<revision_id, revision_id>::const_iterator ci;
-  pair<ci,ci> range = inverse_graph.equal_range(child);
-  for (ci i = range.first; i != range.second; ++i)
-    {
-      if (i->first == child)
-        {
-          if (enumerated_nodes.find(i->second) == enumerated_nodes.end())
-            return false;
-        }
-    }
-  return true;
-}
-
 bool
 revision_enumerator::done()
 {
-  return revs.empty() && items.empty();
+  return topo_revs.empty() && items.empty();
 }

 void
-revision_enumerator::files_for_revision(revision_id const & r,
-                                        set<file_id> & full_files,
-                                        set<pair<file_id,file_id> > & del_files)
+revision_enumerator::process_bunch()
 {
-  // when we're sending a merge, we have to be careful if we
-  // want to send as little data as possible. see bug #15846
-  //
-  // njs's solution: "when sending the files for a revision,
-  // look at both csets. If a given hash is not listed as new
-  // in _both_ csets, throw it out. Now, for everything left
-  // over, if one side says "add" and the other says "delta",
-  // do a delta. If both sides say "add", do a data."
+  // we build up a set of files and revs to send
+  vector<revision_id> bunch_revs;
+  set<file_id> bunch_files;
+  multimap<file_id, file_id> bunch_file_deltas;

-  set<file_id> file_adds;
-  // map<dst, src>.  src is arbitrary.
-  map<file_id, file_id> file_deltas;
-  map<file_id, size_t> file_edge_counts;
+  // files that should be sent first
+  set<file_id> top_files;
+  // files that will be sent either as full files are as the dst of deltas
+  set<file_id> dst_files;

-  revision_set rs;
-  MM(rs);
-  app.db.get_revision(r, rs);
+  while (bunch_revs.size() < 100 && !topo_revs.empty())
+    {
+      revision_id r = topo_revs.front();
+      topo_revs.pop_front();
+      if (!cb.process_this_rev(r))
+        continue;

-  for (edge_map::const_iterator i = rs.edges.begin();
-       i != rs.edges.end(); ++i)
-    {
-      set<file_id> file_dsts;
-      cset const & cs = edge_changes(i);
-
-      // Queue up all the file-adds
-      for (map<split_path, file_id>::const_iterator fa = cs.files_added.begin();
-           fa != cs.files_added.end(); ++fa)
+      bunch_revs.push_back(r);
+
+      revision_set rs;
+      app.db.get_revision(r, rs);
+      for (edge_map::const_iterator i = rs.edges.begin();
+           i != rs.edges.end(); ++i)
         {
-          file_adds.insert(fa->second);
-          file_dsts.insert(fa->second);
-        }
-
-      // Queue up all the file-deltas
-      for (map<split_path, std::pair<file_id, file_id> >::const_iterator fd
-             = cs.deltas_applied.begin();
-           fd != cs.deltas_applied.end(); ++fd)
-        {
-          file_deltas[fd->second.second] = fd->second.first;
-          file_dsts.insert(fd->second.second);
-        }
+          cset const & cs = edge_changes(i);
+
+          // Queue up all the file-adds
+          for (map<split_path, file_id>::const_iterator fa = cs.files_added.begin();
+               fa != cs.files_added.end(); ++fa)
+            {
+              if (cb.queue_this_file(fa->second.inner()))
+                {
+                  dst_files.insert(fa->second);
+                  top_files.insert(fa->second);
+                  bunch_files.insert(fa->second);
+                }
+            }
+
+          // Queue up all the file-deltas
+          for (map<split_path, std::pair<file_id, file_id> >::const_iterator fd
+                 = cs.deltas_applied.begin();
+               fd != cs.deltas_applied.end(); ++fd)
+            {
+              if (cb.queue_this_file(fd->second.second.inner()))
+                {
+                  if (dst_files.find(fd->second.first) == dst_files.end())
+                    {
+                      top_files.insert(fd->second.first);
+                    }

-      // we don't want to be counting files twice in a single edge
-      for (set<file_id>::const_iterator i = file_dsts.begin();
-           i != file_dsts.end(); i++)
-        file_edge_counts[*i]++;
+                  bunch_file_deltas.insert(make_pair(fd->second.first,
+                                                     fd->second.second));
+                  dst_files.insert(fd->second.second);
+                }
+            }
+        }
     }

-  del_files.clear();
-  full_files.clear();
-  size_t num_edges = rs.edges.size();
+  // XXX required?
+  set<file_id> sent_files;

-  for (map<file_id, size_t>::const_iterator i = file_edge_counts.begin();
-       i != file_edge_counts.end(); i++)
+  // now we can queue up the file items in order.
+  for (set<file_id>::const_iterator t = top_files.begin();
+       t != top_files.end(); t++)
     {
-      MM(i->first);
-      if (i->second < num_edges)
-        continue;
-
-      // first preference is to send as a delta...
-      map<file_id, file_id>::const_iterator fd = file_deltas.find(i->first);
-      if (fd != file_deltas.end())
+      if (sent_files.find(*t) != sent_files.end())
         {
-          del_files.insert(make_pair(fd->second, fd->first));
+          L(FL("already sent top_file %s") % t->inner()());
           continue;
         }

-      // ... otherwise as a full file.
-      set<file_id>::const_iterator f = file_adds.find(i->first);
-      if (f != file_adds.end())
+      L(FL("top_file %s") % t->inner()());
+
+      if (bunch_files.find(*t) != bunch_files.end())
         {
-          full_files.insert(*f);
-          continue;
+          // a full file to send.
+          enumerator_item item;
+          item.tag = enumerator_item::fdata;
+          item.ident_a = t->inner();
+          items.push_back(item);
+          sent_files.insert(*t);
+          L(FL("send full_file %s") % t->inner()());
         }

-      I(false);
-    }
-}
+      // XXX: doing BFS now, should try DFS too.
+      deque<file_id> frontier;
+      frontier.push_back(*t);

-void
-revision_enumerator::step()
-{
-  while (!done())
-    {
-      if (items.empty() && !revs.empty())
+      while (!frontier.empty())
         {
-          revision_id r = revs.front();
-          revs.pop_front();
+          file_id f = frontier.front();
+          frontier.pop_front();

-          // It's possible we've enumerated this node elsewhere since last
-          // time around. Cull rather than reprocess.
-          if (enumerated_nodes.find(r) != enumerated_nodes.end())
-            continue;
-
-          if (!all_parents_enumerated(r))
+          L(FL("frontier %s") % f.inner()());
+
+          for (multimap<file_id,file_id>::const_iterator
+               d = bunch_file_deltas.lower_bound(f);
+               d != bunch_file_deltas.upper_bound(f);
+               d++)
             {
-              revs.push_back(r);
-              continue;
-            }
-
-          if (terminal_nodes.find(r) == terminal_nodes.end())
-            {
-              typedef multimap<revision_id, revision_id>::const_iterator ci;
-              pair<ci,ci> range = graph.equal_range(r);
-              for (ci i = range.first; i != range.second; ++i)
+              if (sent_files.find(d->second) != sent_files.end())
                 {
-                  if (i->first == r)
-                    if (enumerated_nodes.find(i->first) == enumerated_nodes.end())
-                      revs.push_back(i->second);
+                  L(FL("already sent delta %s") % d->second.inner()());
+                  continue;
                 }
-            }
+              sent_files.insert(d->second);
+              frontier.push_back(d->second);

-          enumerated_nodes.insert(r);
+              enumerator_item item;
+              item.tag = enumerator_item::fdelta;
+              item.ident_a = d->first.inner();
+              item.ident_b = d->second.inner();
+              items.push_back(item);
+              L(FL("file_delta %s->%s") % d->first.inner()() % d->second.inner()());
+            }
+        }
+    }

-          if (null_id(r))
-            continue;
+  // and the revs
+  for (vector<revision_id>::const_iterator r = bunch_revs.begin();
+       r != bunch_revs.end(); r++)
+    {
+      enumerator_item item;
+      item.tag = enumerator_item::rev;
+      item.ident_a = r->inner();
+      items.push_back(item);

-          if (cb.process_this_rev(r))
+      // Queue up some or all of the rev's certs
+      vector<hexenc<id> > hashes;
+      app.db.get_revision_certs(*r, hashes);
+      for (vector<hexenc<id> >::const_iterator i = hashes.begin();
+           i != hashes.end(); ++i)
+        {
+          if (cb.queue_this_cert(*i))
             {
-              L(FL("revision_enumerator::step expanding "
-                  "contents of rev '%d'\n") % r);
-
-              // The rev's files and fdeltas
-              {
-                set<file_id> full_files;
-                set<pair<file_id, file_id> > del_files;
-                files_for_revision(r, full_files, del_files);
-
-                for (set<file_id>::const_iterator f = full_files.begin();
-                     f != full_files.end(); f++)
-                  {
-                    if (cb.queue_this_file(f->inner()))
-                      {
-                        enumerator_item item;
-                        item.tag = enumerator_item::fdata;
-                        item.ident_a = f->inner();
-                        items.push_back(item);
-                      }
-                  }
-
-                for (set<pair<file_id, file_id> >::const_iterator fd = del_files.begin();
-                     fd != del_files.end(); fd++)
-                  {
-                    if (cb.queue_this_file(fd->second.inner()))
-                      {
-                        enumerator_item item;
-                        item.tag = enumerator_item::fdelta;
-                        item.ident_a = fd->first.inner();
-                        item.ident_b = fd->second.inner();
-                        items.push_back(item);
-                      }
-                  }
-              }
-
-              // Queue up the rev itself
-              {
-                enumerator_item item;
-                item.tag = enumerator_item::rev;
-                item.ident_a = r.inner();
-                items.push_back(item);
-              }
+              enumerator_item item;
+              item.tag = enumerator_item::cert;
+              item.ident_a = *i;
+              items.push_back(item);
             }
-
-          // Queue up some or all of the rev's certs
-          vector<hexenc<id> > hashes;
-          app.db.get_revision_certs(r, hashes);
-          for (vector<hexenc<id> >::const_iterator i = hashes.begin();
-               i != hashes.end(); ++i)
-            {
-              if (cb.queue_this_cert(*i))
-                {
-                  enumerator_item item;
-                  item.tag = enumerator_item::cert;
-                  item.ident_a = *i;
-                  items.push_back(item);
-                }
-            }
         }
+    }
+}

+void
+revision_enumerator::step()
+{
+  while (!done())
+    {
+      if (items.empty() && !topo_revs.empty())
+        {
+          process_bunch();
+        }
+
       if (!items.empty())
         {
           L(FL("revision_enumerator::step extracting item\n"));