Below is the file 'annotate.cc' from this revision. You can also download the file.
// copyright (C) 2005 emile snyder <emile@alumni.reed.edu> // all rights reserved. // licensed to the public under the terms of the GNU GPL (>= 2) // see the file COPYING for details #include <string> #include <sstream> #include <deque> #include <set> #include <boost/shared_ptr.hpp> #include "platform.hh" #include "vocab.hh" #include "sanity.hh" #include "revision.hh" #include "change_set.hh" #include "app_state.hh" #include "manifest.hh" #include "transforms.hh" #include "lcs.hh" #include "annotate.hh" class annotate_lineage_mapping; class annotate_context { public: annotate_context(file_id fid, app_state &app); boost::shared_ptr<annotate_lineage_mapping> initial_lineage() const; /// credit any uncopied lines (as recorded in copied_lines) to /// rev, and reset copied_lines. void evaluate(revision_id rev); void set_copied(int index); void set_touched(int index); void set_equivalent(int index, int index2); void annotate_equivalent_lines(); /// return true if we have no more unassigned lines bool is_complete() const; void dump() const; std::string get_line(int line_index) const { return file_lines[line_index]; } private: std::vector<std::string> file_lines; std::vector<revision_id> annotations; /// equivalent_lines[n] = m means that line n should be blamed to the same /// revision as line m std::map<int, int> equivalent_lines; /// keep a count so we can tell quickly whether we can terminate size_t annotated_lines_completed; // elements of the set are indexes into the array of lines in the UDOI // lineages add entries here when they notice that they copied a line from the UDOI std::set<size_t> copied_lines; // similarly, lineages add entries here for all the lines from the UDOI they know about that they didn't copy std::set<size_t> touched_lines; }; /* An annotate_lineage_mapping tells you, for each line of a file, where in the ultimate descendent of interest (UDOI) the line came from (a line not present in the UDOI is represented as -1). */ class annotate_lineage_mapping { public: annotate_lineage_mapping(const file_data &data); annotate_lineage_mapping(const std::vector<std::string> &lines); // debugging //bool equal_interned (const annotate_lineage_mapping &rhs) const; /// need a better name. does the work of setting copied bits in the context object. boost::shared_ptr<annotate_lineage_mapping> build_parent_lineage(boost::shared_ptr<annotate_context> acp, revision_id parent_rev, const file_data &parent_data) const; void merge(const annotate_lineage_mapping &other, const boost::shared_ptr<annotate_context> &acp); void credit_mapped_lines (boost::shared_ptr<annotate_context> acp) const; void set_copied_all_mapped (boost::shared_ptr<annotate_context> acp) const; private: void init_with_lines(const std::vector<std::string> &lines); static interner<long> in; // FIX, testing hack std::vector<long, QA(long)> file_interned; // maps an index into the vector of lines for our current version of the file // into an index into the vector of lines of the UDOI: // eg. if the line file_interned[i] will turn into line 4 in the UDOI, mapping[i] = 4 std::vector<int> mapping; }; interner<long> annotate_lineage_mapping::in; /* annotate_node_work encapsulates the input data needed to process the annotations for a given childrev, considering all the childrev -> parentrevN edges. */ struct annotate_node_work { annotate_node_work (boost::shared_ptr<annotate_context> annotations_, boost::shared_ptr<annotate_lineage_mapping> lineage_, revision_id node_revision_, file_id node_fid_, file_path node_fpath_) : annotations(annotations_), lineage(lineage_), node_revision(node_revision_), node_fid(node_fid_), node_fpath(node_fpath_) {} annotate_node_work (const annotate_node_work &w) : annotations(w.annotations), lineage(w.lineage), node_revision(w.node_revision), node_fid(w.node_fid), node_fpath(w.node_fpath) {} boost::shared_ptr<annotate_context> annotations; boost::shared_ptr<annotate_lineage_mapping> lineage; revision_id node_revision; file_id node_fid; file_path node_fpath; }; class lineage_merge_node { public: typedef boost::shared_ptr<annotate_lineage_mapping> splm; lineage_merge_node(const lineage_merge_node &m) : work(m.work), incoming_edges(m.incoming_edges), completed_edges(m.completed_edges) {} lineage_merge_node(annotate_node_work wu, size_t incoming) : work(wu), incoming_edges(incoming), completed_edges(1) {} void merge(splm incoming, const boost::shared_ptr<annotate_context> &acp) { work.lineage->merge(*incoming, acp); completed_edges++; } bool iscomplete () const { I(completed_edges <= incoming_edges); return incoming_edges == completed_edges; } annotate_node_work get_work() const { I(iscomplete()); return work; } private: annotate_node_work work; size_t incoming_edges; size_t completed_edges; }; annotate_context::annotate_context(file_id fid, app_state &app) : annotated_lines_completed(0) { // initialize file_lines file_data fpacked; app.db.get_file_version(fid, fpacked); std::string encoding = default_encoding; // FIXME split_into_lines(fpacked.inner()(), encoding, file_lines); L(F("annotate_context::annotate_context initialized with %d file lines\n") % file_lines.size()); // initialize annotations revision_id nullid; annotations.clear(); annotations.reserve(file_lines.size()); annotations.insert(annotations.begin(), file_lines.size(), nullid); L(F("annotate_context::annotate_context initialized with %d entries in annotations\n") % annotations.size()); // initialize copied_lines and touched_lines copied_lines.clear(); touched_lines.clear(); } boost::shared_ptr<annotate_lineage_mapping> annotate_context::initial_lineage() const { boost::shared_ptr<annotate_lineage_mapping> res(new annotate_lineage_mapping(file_lines)); return res; } void annotate_context::evaluate(revision_id rev) { revision_id nullid; I(copied_lines.size() <= annotations.size()); I(touched_lines.size() <= annotations.size()); // Find the lines that we touched but that no other parent copied. std::set<size_t> credit_lines; std::set_difference(touched_lines.begin(), touched_lines.end(), copied_lines.begin(), copied_lines.end(), inserter(credit_lines, credit_lines.begin())); std::set<size_t>::const_iterator i; for (i = credit_lines.begin(); i != credit_lines.end(); i++) { I(*i >= 0 && *i < annotations.size()); if (annotations[*i] == nullid) { //L(F("evaluate setting annotations[%d] -> %s, since touched_lines contained %d, copied_lines didn't and annotations[%d] was nullid\n") // % *i % rev % *i % *i); annotations[*i] = rev; annotated_lines_completed++; } else { //L(F("evaluate LEAVING annotations[%d] -> %s\n") % *i % annotations[*i]); } } copied_lines.clear(); touched_lines.clear(); } void annotate_context::set_copied(int index) { //L(F("annotate_context::set_copied %d\n") % index); if (index == -1) return; I(index >= 0 && index < (int)file_lines.size()); copied_lines.insert(index); } void annotate_context::set_touched(int index) { //L(F("annotate_context::set_touched %d\n") % index); if (index == -1) return; I(index >= 0 && index <= (int)file_lines.size()); touched_lines.insert(index); } void annotate_context::set_equivalent(int index, int index2) { L(F("annotate_context::set_equivalent index %d index2 %d\n") % index % index2); equivalent_lines[index] = index2; } void annotate_context::annotate_equivalent_lines() { revision_id null_id; for (size_t i=0; i<annotations.size(); i++) { if (annotations[i] == null_id) { std::map<int, int>::const_iterator j = equivalent_lines.find(i); if (j == equivalent_lines.end()) { L(F("annotate_equivalent_lines unable to find equivalent for line %d\n") % i); } I(j != equivalent_lines.end()); annotations[i] = annotations[j->second]; annotated_lines_completed++; } } } bool annotate_context::is_complete() const { if (annotated_lines_completed == annotations.size()) return true; I(annotated_lines_completed < annotations.size()); return false; } void annotate_context::dump() const { revision_id nullid; I(annotations.size() == file_lines.size()); revision_id lastid = nullid; for (size_t i=0; i<file_lines.size(); i++) { //I(! (annotations[i] == nullid) ); if (false) //(lastid == annotations[i]) std::cout << " : " << file_lines[i] << std::endl; else std::cout << annotations[i] << ": " << file_lines[i] << std::endl; lastid = annotations[i]; } } annotate_lineage_mapping::annotate_lineage_mapping(const file_data &data) { // split into lines std::vector<std::string> lines; std::string encoding = default_encoding; // FIXME split_into_lines (data.inner()().data(), encoding, lines); init_with_lines(lines); } annotate_lineage_mapping::annotate_lineage_mapping(const std::vector<std::string> &lines) { init_with_lines(lines); } /* bool annotate_lineage_mapping::equal_interned (const annotate_lineage_mapping &rhs) const { bool result = true; if (file_interned.size() != rhs.file_interned.size()) { L(F("annotate_lineage_mapping::equal_interned lhs size %d != rhs size %d\n") % file_interned.size() % rhs.file_interned.size()); result = false; } size_t limit = std::min(file_interned.size(), rhs.file_interned.size()); for (size_t i=0; i<limit; i++) { if (file_interned[i] != rhs.file_interned[i]) { L(F("annotate_lineage_mapping::equal_interned lhs[%d]:%ld != rhs[%d]:%ld\n") % i % file_interned[i] % i % rhs.file_interned[i]); result = false; } } return result; } */ void annotate_lineage_mapping::init_with_lines(const std::vector<std::string> &lines) { file_interned.clear(); file_interned.reserve(lines.size()); mapping.clear(); mapping.reserve(lines.size()); int count; std::vector<std::string>::const_iterator i; for (count=0, i = lines.begin(); i != lines.end(); i++, count++) { file_interned.push_back(in.intern(*i)); mapping.push_back(count); } L(F("annotate_lineage_mapping::init_with_lines ending with %d entries in mapping\n") % mapping.size()); } boost::shared_ptr<annotate_lineage_mapping> annotate_lineage_mapping::build_parent_lineage (boost::shared_ptr<annotate_context> acp, revision_id parent_rev, const file_data &parent_data) const { bool verbose = false; boost::shared_ptr<annotate_lineage_mapping> parent_lineage(new annotate_lineage_mapping(parent_data)); std::vector<long, QA(long)> lcs; std::back_insert_iterator< std::vector<long, QA(long)> > bii(lcs); longest_common_subsequence(file_interned.begin(), file_interned.end(), parent_lineage->file_interned.begin(), parent_lineage->file_interned.end(), std::min(file_interned.size(), parent_lineage->file_interned.size()), std::back_inserter(lcs)); if (verbose) L(F("build_parent_lineage: file_lines.size() == %d, parent.file_lines.size() == %d, lcs.size() == %d\n") % file_interned.size() % parent_lineage->file_interned.size() % lcs.size()); // do the copied lines thing for our annotate_context std::vector<long> lcs_src_lines; lcs_src_lines.resize(lcs.size()); size_t i, j; i = j = 0; while (i < file_interned.size() && j < lcs.size()) { //if (verbose) if (file_interned[i] == 14) L(F("%s file_interned[%d]: %ld\tlcs[%d]: %ld\tmapping[%d]: %ld\n") % parent_rev % i % file_interned[i] % j % lcs[j] % i % mapping[i]); if (file_interned[i] == lcs[j]) { acp->set_copied(mapping[i]); lcs_src_lines[j] = mapping[i]; j++; } else { acp->set_touched(mapping[i]); } i++; } if (verbose) L(F("loop ended with i: %d, j: %d, lcs.size(): %d\n") % i % j % lcs.size()); I(j == lcs.size()); // set touched for the rest of the lines in the file while (i < file_interned.size()) { acp->set_touched(mapping[i]); i++; } // determine the mapping for parent lineage if (verbose) L(F("build_parent_lineage: building mapping now for parent_rev %s\n") % parent_rev); i = j = 0; while (i < parent_lineage->file_interned.size() && j < lcs.size()) { if (parent_lineage->file_interned[i] == lcs[j]) { parent_lineage->mapping[i] = lcs_src_lines[j]; j++; } else { parent_lineage->mapping[i] = -1; } if (verbose) L(F("mapping[%d] -> %d\n") % i % parent_lineage->mapping[i]); i++; } I(j == lcs.size()); // set mapping for the rest of the lines in the file while (i < parent_lineage->file_interned.size()) { parent_lineage->mapping[i] = -1; if (verbose) L(F("mapping[%d] -> %d\n") % i % parent_lineage->mapping[i]); i++; } return parent_lineage; } void annotate_lineage_mapping::merge (const annotate_lineage_mapping &other, const boost::shared_ptr<annotate_context> &acp) { I(file_interned.size() == other.file_interned.size()); I(mapping.size() == other.mapping.size()); //I(equal_interned(other)); // expensive check for (size_t i=0; i<mapping.size(); i++) { if (mapping[i] == -1 && other.mapping[i] >= 0) mapping[i] = other.mapping[i]; if (mapping[i] >= 0 && other.mapping[i] >= 0) { //I(mapping[i] == other.mapping[i]); if (mapping[i] != other.mapping[i]) { // a given line in the current merged mapping will split and become // multiple lines in the UDOI. so we have to remember that whenever we // ultimately assign blame for mapping[i] we blame the same revision // on other.mapping[i]. acp->set_equivalent(other.mapping[i], mapping[i]); } } } } void annotate_lineage_mapping::credit_mapped_lines (boost::shared_ptr<annotate_context> acp) const { std::vector<int>::const_iterator i; for (i=mapping.begin(); i != mapping.end(); i++) { acp->set_touched(*i); } } void annotate_lineage_mapping::set_copied_all_mapped (boost::shared_ptr<annotate_context> acp) const { std::vector<int>::const_iterator i; for (i=mapping.begin(); i != mapping.end(); i++) { acp->set_copied(*i); } } static void do_annotate_node (const annotate_node_work &work_unit, app_state &app, std::deque<annotate_node_work> &nodes_to_process, std::set<revision_id> &nodes_complete, const std::map<revision_id, size_t> &paths_to_nodes, std::map<revision_id, lineage_merge_node> &pending_merge_nodes) { L(F("do_annotate_node for node %s\n") % work_unit.node_revision); I(nodes_complete.find(work_unit.node_revision) == nodes_complete.end()); //nodes_seen.insert(std::make_pair(work_unit.node_revision, work_unit.lineage)); revision_set rev; app.db.get_revision(work_unit.node_revision, rev); if (rev.edges.size() == 0) { L(F("do_annotate_node credit_mapped_lines to revision %s\n") % work_unit.node_revision); work_unit.lineage->credit_mapped_lines(work_unit.annotations); work_unit.annotations->evaluate(work_unit.node_revision); nodes_complete.insert(work_unit.node_revision); return; } // if all deltas backwards have to add the file, then we credit any mapped but // unassigned lines in our lineage to this revision. gotta count adds to compare to number // of parent revs. size_t added_in_parent_count = 0; // edges are from parent -> child where child is our work_unit node for (edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); i++) { revision_id parent_revision = edge_old_revision(i); L(F("do_annotate_node processing edge from parent %s to child %s\n") % parent_revision % work_unit.node_revision); change_set cs = edge_changes(i); if (cs.rearrangement.added_files.find(work_unit.node_fpath) != cs.rearrangement.added_files.end()) { L(F("file %s added in %s, continuing\n") % work_unit.node_fpath % work_unit.node_revision); added_in_parent_count++; continue; } file_path parent_fpath = apply_change_set_inverse(cs, work_unit.node_fpath); L(F("file %s in parent revision %s is %s\n") % work_unit.node_fpath % parent_revision % parent_fpath); I(!parent_fpath.empty()); change_set::delta_map::const_iterator fdelta_iter = cs.deltas.find(parent_fpath); file_id parent_fid = work_unit.node_fid; boost::shared_ptr<annotate_lineage_mapping> parent_lineage; if (fdelta_iter != cs.deltas.end()) // then the file changed { I(delta_entry_dst(fdelta_iter) == work_unit.node_fid); parent_fid = delta_entry_src(fdelta_iter); file_data data; app.db.get_file_version(parent_fid, data); L(F("building parent lineage for parent file %s\n") % parent_fid); parent_lineage = work_unit.lineage->build_parent_lineage(work_unit.annotations, parent_revision, data); } else { L(F("parent file identical, set copied all mapped and copy lineage\n")); parent_lineage = work_unit.lineage; parent_lineage->set_copied_all_mapped(work_unit.annotations); } // if this parent has not yet been queued for processing, create the work unit for it. std::map<revision_id, lineage_merge_node>::iterator lmn = pending_merge_nodes.find(parent_revision); if (lmn == pending_merge_nodes.end()) { annotate_node_work newunit(work_unit.annotations, parent_lineage, parent_revision, parent_fid, parent_fpath); std::map<revision_id, size_t>::const_iterator ptn = paths_to_nodes.find(parent_revision); if (ptn->second > 1) { lineage_merge_node nmn(newunit, ptn->second); pending_merge_nodes.insert(std::make_pair(parent_revision, nmn)); // just checking... //(pending_merge_nodes.find(parent_revision))->second.dump(); } else { //L(F("single path to node, just stick work on the queue\n")); nodes_to_process.push_back(newunit); } } else { // already a pending node, so we just have to merge the lineage and decide whether to move it // over to the nodes_to_process queue L(F("merging lineage from node %s to parent %s\n") % work_unit.node_revision % parent_revision); lmn->second.merge(parent_lineage, work_unit.annotations); //L(F("after merging from work revision %s to parent %s lineage_merge_node is:\n") % work_unit.node_revision % parent_revision); //lmn->second.dump(); if (lmn->second.iscomplete()) { nodes_to_process.push_back(lmn->second.get_work()); pending_merge_nodes.erase(lmn); } } } I(added_in_parent_count <= rev.edges.size()); if (added_in_parent_count == rev.edges.size()) { //L(F("added_in_parent_count == rev.edges.size(), credit_mapped_lines to %s\n") % work_unit.node_revision); work_unit.lineage->credit_mapped_lines(work_unit.annotations); } work_unit.annotations->evaluate(work_unit.node_revision); nodes_complete.insert(work_unit.node_revision); } void find_ancestors(app_state &app, revision_id rid, std::map<revision_id, size_t> &paths_to_nodes) { std::vector<revision_id> frontier; frontier.push_back(rid); while (!frontier.empty()) { revision_id rid = frontier.back(); frontier.pop_back(); if(!null_id(rid)) { std::set<revision_id> parents; app.db.get_revision_parents(rid, parents); for (std::set<revision_id>::const_iterator i = parents.begin(); i != parents.end(); ++i) { std::map<revision_id, size_t>::iterator found = paths_to_nodes.find(*i); if (found == paths_to_nodes.end()) { frontier.push_back(*i); paths_to_nodes.insert(std::make_pair(*i, 1)); } else { (found->second)++; } } } } } void do_annotate (app_state &app, file_path fpath, file_id fid, revision_id rid) { L(F("annotating file %s with id %s in revision %s\n") % fpath % fid % rid); boost::shared_ptr<annotate_context> acp(new annotate_context(fid, app)); boost::shared_ptr<annotate_lineage_mapping> lineage = acp->initial_lineage(); std::set<revision_id> nodes_complete; std::map<revision_id, size_t> paths_to_nodes; std::map<revision_id, lineage_merge_node> pending_merge_nodes; find_ancestors(app, rid, paths_to_nodes); // build node work unit std::deque<annotate_node_work> nodes_to_process; annotate_node_work workunit(acp, lineage, rid, fid, fpath); nodes_to_process.push_back(workunit); while (nodes_to_process.size() && !acp->is_complete()) { annotate_node_work work = nodes_to_process.front(); nodes_to_process.pop_front(); do_annotate_node(work, app, nodes_to_process, nodes_complete, paths_to_nodes, pending_merge_nodes); } I(pending_merge_nodes.size() == 0); acp->annotate_equivalent_lines(); I(acp->is_complete()); acp->dump(); }