Below is the file 'change_set.cc' from this revision. You can also download the file.
// -*- mode: C++; c-file-style: "gnu"; indent-tabs-mode: nil -*- // copyright (C) 2004 graydon hoare <graydon@pobox.com> // all rights reserved. // licensed to the public under the terms of the GNU GPL (>= 2) // see the file COPYING for details // this is how you "ask for" the C99 constant constructor macros. *and* // you have to do so before any other files accidentally include // stdint.h. awesome. #define __STDC_CONSTANT_MACROS #include <algorithm> #include <iterator> #include <iostream> #include <list> #include <vector> #include <boost/filesystem/path.hpp> #include <boost/shared_ptr.hpp> #include <boost/lexical_cast.hpp> #include <boost/dynamic_bitset.hpp> #include "basic_io.hh" #include "change_set.hh" #include "constants.hh" #include "diff_patch.hh" #include "file_io.hh" #include "numeric_vocab.hh" #include "sanity.hh" #include "smap.hh" #include "paths.hh" // our analyses in this file happen on one of two families of // related structures: a path_analysis or a directory_map. // // a path_analysis corresponds exactly to a normalized // path_rearrangement; they are two ways of writing the // same information // // the path_analysis stores two path_states. each path_state is a map from // transient identifiers (tids) to items. each item represents a semantic // element of a filesystem which has a type (file or directory), a name, // and a parent link (another tid). tids should be unique across a // path_analysis. typedef enum { ptype_directory, ptype_file } ptype; typedef u32 tid; static tid root_tid = 0; struct tid_source { tid ctr; tid_source() : ctr(root_tid + 1) {} tid next() { I(ctr != UINT32_C(0xffffffff)); return ctr++; } }; struct path_item { tid parent; ptype ty; path_component name; inline path_item() {} inline path_item(tid p, ptype t, path_component n); inline path_item(path_item const & other); inline path_item const & operator=(path_item const & other); inline bool operator==(path_item const & other) const; }; template<typename T> struct identity { size_t operator()(T const & v) const { return static_cast<size_t>(v); } }; typedef smap<tid, path_item> path_state; typedef smap<tid, tid> state_renumbering; typedef std::pair<path_state, path_state> path_analysis; // nulls and tests static file_id null_ident; // a directory_map is a more "normal" representation of a directory tree, // which you can traverse more conveniently from root to tip // // tid -> [ name -> (ptype, tid), // name -> (ptype, tid), // ... ] // // tid -> [ name -> (ptype, tid), // name -> (ptype, tid), // ... ] // There is a bug in handling directory_nodes; the problem is that it is legal // to have multiple files named "", and, worse, to have both a file named "" // and a directory named "". Currently, we make this a map instead of an // smap, so whichever ""-named file is added first simply wins, and the rest // are ignored. FIXME: teach the code that uses directory_map's not to expect // there to be any entry at all for ""-named files. typedef std::map< path_component, std::pair<ptype,tid> > directory_node; typedef smap<tid, boost::shared_ptr<directory_node> > directory_map; static path_component directory_entry_name(directory_node::const_iterator const & i) { return i->first; } static ptype directory_entry_type(directory_node::const_iterator const & i) { return i->second.first; } static tid directory_entry_tid(directory_node::const_iterator const & i) { return i->second.second; } void change_set::add_file(file_path const & a) { I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); rearrangement.added_files.insert(a); } void change_set::add_file(file_path const & a, file_id const & ident) { I(rearrangement.added_files.find(a) == rearrangement.added_files.end()); I(deltas.find(a) == deltas.end()); rearrangement.added_files.insert(a); deltas.insert(std::make_pair(a, std::make_pair(null_ident, ident))); } void change_set::apply_delta(file_path const & path, file_id const & src, file_id const & dst) { I(deltas.find(path) == deltas.end()); deltas.insert(std::make_pair(path, std::make_pair(src, dst))); } void change_set::delete_file(file_path const & d) { I(rearrangement.deleted_files.find(d) == rearrangement.deleted_files.end()); rearrangement.deleted_files.insert(d); } void change_set::delete_dir(file_path const & d) { I(rearrangement.deleted_dirs.find(d) == rearrangement.deleted_dirs.end()); rearrangement.deleted_dirs.insert(d); } void change_set::rename_file(file_path const & a, file_path const & b) { I(rearrangement.renamed_files.find(a) == rearrangement.renamed_files.end()); rearrangement.renamed_files.insert(std::make_pair(a,b)); } void change_set::rename_dir(file_path const & a, file_path const & b) { I(rearrangement.renamed_dirs.find(a) == rearrangement.renamed_dirs.end()); rearrangement.renamed_dirs.insert(std::make_pair(a,b)); } bool change_set::path_rearrangement::operator==(path_rearrangement const & other) const { return deleted_files == other.deleted_files && deleted_dirs == other.deleted_dirs && renamed_files == other.renamed_files && renamed_dirs == other.renamed_dirs && added_files == other.added_files; } bool change_set::path_rearrangement::empty() const { return deleted_files.empty() && deleted_dirs.empty() && renamed_files.empty() && renamed_dirs.empty() && added_files.empty(); } bool change_set::path_rearrangement::has_added_file(file_path const & file) const { return added_files.find(file) != added_files.end(); } bool change_set::path_rearrangement::has_deleted_file(file_path const & file) const { return deleted_files.find(file) != deleted_files.end(); } bool change_set::path_rearrangement::has_renamed_file_dst(file_path const & file) const { // FIXME: this is inefficient, but improvements would require a different // structure for renamed_files (or perhaps a second reverse map). For now // we'll assume that few files will be renamed per changeset. for (std::map<file_path,file_path>::const_iterator rf = renamed_files.begin(); rf != renamed_files.end(); ++rf) if (rf->second == file) return true; return false; } bool change_set::path_rearrangement::has_renamed_file_src(file_path const & file) const { return renamed_files.find(file) != renamed_files.end(); } bool change_set::empty() const { return deltas.empty() && rearrangement.empty(); } bool change_set::operator==(change_set const & other) const { return rearrangement == other.rearrangement && deltas == other.deltas; } // simple accessors inline tid const & path_item_parent(path_item const & p) { return p.parent; } inline ptype const & path_item_type(path_item const & p) { return p.ty; } inline path_component path_item_name(path_item const & p) { return p.name; } inline tid path_state_tid(path_state::const_iterator i) { return i->first; } inline path_item const & path_state_item(path_state::const_iterator i) { return i->second; } void dump(path_state const & st, std::string & out) { for (path_state::const_iterator i = st.begin(); i != st.end(); ++i) { std::vector<path_component> tmp_v; tmp_v.push_back(path_item_name(path_state_item(i))); file_path tmp_fp(tmp_v); out += (F("tid %d: parent %d, type %s, name %s\n") % path_state_tid(i) % path_item_parent(path_state_item(i)) % (path_item_type(path_state_item(i)) == ptype_directory ? "dir" : "file") % tmp_fp).str(); } } void dump(path_analysis const & analysis, std::string & out) { out = "pre-state:\n"; std::string tmp; dump(analysis.first, tmp); out += tmp; out += "post-state:\n"; tmp.clear(); dump(analysis.second, tmp); out += tmp; } void dump(state_renumbering const & r, std::string & out) { for (state_renumbering::const_iterator i = r.begin(); i != r.end(); ++i) out += (F("%d -> %d\n") % i->first % i->second).str(); } // structure dumping /* static void dump_renumbering(std::string const & s, state_renumbering const & r) { L(F("BEGIN dumping renumbering '%s'\n") % s); for (state_renumbering::const_iterator i = r.begin(); i != r.end(); ++i) { L(F("%d -> %d\n") % i->first % i->second); } L(F("END dumping renumbering '%s'\n") % s); } static void dump_state(std::string const & s, path_state const & st) { L(F("BEGIN dumping state '%s'\n") % s); for (path_state::const_iterator i = st.begin(); i != st.end(); ++i) { std::vector<path_component> tmp_v; tmp_v.push_back(path_item_name(path_state_item(i))); file_path tmp_fp(tmp_v); L(F("state '%s': tid %d, parent %d, type %s, name %s\n") % s % path_state_tid(i) % path_item_parent(path_state_item(i)) % (path_item_type(path_state_item(i)) == ptype_directory ? "dir" : "file") % tmp_fp); } L(F("END dumping state '%s'\n") % s); } static void dump_analysis(std::string const & s, path_analysis const & t) { L(F("BEGIN dumping tree '%s'\n") % s); dump_state(s + " first", t.first); dump_state(s + " second", t.second); L(F("END dumping tree '%s'\n") % s); } */ // sanity checking static void check_sets_disjoint(std::set<file_path> const & a, std::set<file_path> const & b) { std::set<file_path> isect; std::set_intersection(a.begin(), a.end(), b.begin(), b.end(), std::inserter(isect, isect.begin())); if (!global_sanity.relaxed) { I(isect.empty()); } } change_set::path_rearrangement::path_rearrangement(path_rearrangement const & other) { other.check_sane(); deleted_files = other.deleted_files; deleted_dirs = other.deleted_dirs; renamed_files = other.renamed_files; renamed_dirs = other.renamed_dirs; added_files = other.added_files; } change_set::path_rearrangement const & change_set::path_rearrangement::operator=(path_rearrangement const & other) { other.check_sane(); deleted_files = other.deleted_files; deleted_dirs = other.deleted_dirs; renamed_files = other.renamed_files; renamed_dirs = other.renamed_dirs; added_files = other.added_files; return *this; } static void extract_pairs_and_insert(std::map<file_path, file_path> const & in, std::set<file_path> & firsts, std::set<file_path> & seconds) { for (std::map<file_path, file_path>::const_iterator i = in.begin(); i != in.end(); ++i) { firsts.insert(i->first); seconds.insert(i->second); } } template <typename A, typename B> static void extract_first(std::map<A, B> const & m, std::set<A> & s) { s.clear(); for (typename std::map<A, B>::const_iterator i = m.begin(); i != m.end(); ++i) { s.insert(i->first); } } static void extract_killed(path_analysis const & a, std::set<file_path> & killed); static void check_no_deltas_on_killed_files(path_analysis const & pa, change_set::delta_map const & del) { std::set<file_path> killed; std::set<file_path> delta_paths; extract_killed(pa, killed); extract_first(del, delta_paths); check_sets_disjoint(killed, delta_paths); } static void check_delta_entries_not_directories(path_analysis const & pa, change_set::delta_map const & dels); void analyze_rearrangement(change_set::path_rearrangement const & pr, path_analysis & pa, tid_source & ts); void sanity_check_path_analysis(path_analysis const & pr); void change_set::path_rearrangement::check_sane() const { delta_map del; this->check_sane(del); } void change_set::path_rearrangement::check_sane(delta_map const & deltas) const { tid_source ts; path_analysis pa; analyze_rearrangement(*this, pa, ts); sanity_check_path_analysis (pa); check_no_deltas_on_killed_files(pa, deltas); check_delta_entries_not_directories(pa, deltas); // FIXME: extend this as you manage to think of more invariants // which are cheap enough to check at this level. std::set<file_path> renamed_srcs, renamed_dsts; extract_pairs_and_insert(renamed_files, renamed_srcs, renamed_dsts); extract_pairs_and_insert(renamed_dirs, renamed_srcs, renamed_dsts); // Files cannot be split nor joined by renames. I(renamed_files.size() + renamed_dirs.size() == renamed_srcs.size()); I(renamed_files.size() + renamed_dirs.size() == renamed_dsts.size()); check_sets_disjoint(deleted_files, deleted_dirs); check_sets_disjoint(deleted_files, renamed_srcs); check_sets_disjoint(deleted_dirs, renamed_srcs); check_sets_disjoint(added_files, renamed_dsts); } change_set::change_set(change_set const & other) { other.check_sane(); rearrangement = other.rearrangement; deltas = other.deltas; } change_set const &change_set::operator=(change_set const & other) { other.check_sane(); rearrangement = other.rearrangement; deltas = other.deltas; return *this; } void change_set::check_sane() const { // FIXME: extend this as you manage to think of more invariants // which are cheap enough to check at this level. MM(*this); rearrangement.check_sane(this->deltas); for (std::set<file_path>::const_iterator i = rearrangement.added_files.begin(); i != rearrangement.added_files.end(); ++i) { delta_map::const_iterator j = deltas.find(*i); if (!global_sanity.relaxed) { I(j != deltas.end()); I(null_id(delta_entry_src(j))); I(!null_id(delta_entry_dst(j))); } } for (delta_map::const_iterator i = deltas.begin(); i != deltas.end(); ++i) { if (!global_sanity.relaxed) { I(!null_name(delta_entry_path(i))); I(!null_id(delta_entry_dst(i))); I(!(delta_entry_src(i) == delta_entry_dst(i))); if (null_id(delta_entry_src(i))) I(rearrangement.added_files.find(delta_entry_path(i)) != rearrangement.added_files.end()); } } } inline static void sanity_check_path_item(path_item const & pi) { } // recursive helper for confirm_proper_tree. // traverse up the tree from p, return true if the root tid is hit // in finite (ttl) time. static bool check_depth(path_state const & ps, path_item const & p, size_t ttl, boost::dynamic_bitset<> & checked, tid min_tid) { if (ttl == 0) return false; if (path_item_parent(p) == root_tid) return true; // can only use the checked cache if the name isn't null. // null names' parents need to be checked further on. if (!null_name(path_item_name(p)) && checked[path_item_parent(p) - min_tid]) return true; path_state::const_iterator par = ps.find(path_item_parent(p)); I(par != ps.end()); I(path_item_type(path_state_item(par)) == ptype_directory); // if we're null, our parent must also be null if (null_name(path_item_name(p))) I(null_name(path_item_name(path_state_item(par)))); // recurse bool ret = check_depth(ps, path_state_item(par), ttl - 1, checked, min_tid); checked[path_item_parent(p) - min_tid] = ret; return ret; } // Check that there are no loops in the path_state. // This can be done by recursing up the tree, and ensuring that the root // tid is hit in finite steps. static void confirm_proper_tree(path_state const & ps) { if (ps.empty()) return; I(ps.find(root_tid) == ps.end()); // Note that this find() also ensures // sortedness of ps... tid min_tid = ps.begin()->first; // ... which matters here tid max_tid = ps.rbegin()->first; size_t tid_range = max_tid - min_tid + 1; boost::dynamic_bitset<> checked(tid_range); for (path_state::const_iterator p = ps.begin(); p != ps.end(); p++) { // the path to the top must be finite, otherwise there are loops. I(check_depth(ps, path_state_item(p), constants::max_path_depth, checked, min_tid) == true); } } static void confirm_unique_entries_in_directories(path_state const & ps) { std::vector<std::pair<tid,path_component> > entries; for (path_state::const_iterator i = ps.begin(); i != ps.end(); ++i) { if (null_name(path_item_name(i->second))) { I(path_item_parent(i->second) == root_tid); continue; } std::pair<tid,path_component> p = std::make_pair(path_item_parent(i->second), path_item_name(i->second)); entries.push_back(p); } // Now we check that entries is unique if (entries.empty()) return; std::sort(entries.begin(), entries.end()); std::vector<std::pair<tid,path_component> >::const_iterator leader, lagged; leader = entries.begin(); lagged = entries.begin(); I(leader != entries.end()); ++leader; while (leader != entries.end()) { I(*leader != *lagged); ++leader; ++lagged; } } static void sanity_check_path_state(path_state const & ps) { MM(ps); confirm_proper_tree(ps); confirm_unique_entries_in_directories(ps); } path_item::path_item(tid p, ptype t, path_component n) : parent(p), ty(t), name(n) { sanity_check_path_item(*this); } path_item::path_item(path_item const & other) : parent(other.parent), ty(other.ty), name(other.name) { sanity_check_path_item(*this); } path_item const & path_item::operator=(path_item const & other) { parent = other.parent; ty = other.ty; name = other.name; sanity_check_path_item(*this); return *this; } bool path_item::operator==(path_item const & other) const { return this->parent == other.parent && this->ty == other.ty && this->name == other.name; } static void check_states_agree(path_state const & p1, path_state const & p2) { path_analysis pa; pa.first = p1; pa.second = p2; // dump_analysis("agreement", pa); for (path_state::const_iterator i = p1.begin(); i != p1.end(); ++i) { path_state::const_iterator j = p2.find(i->first); I(j != p2.end()); I(path_item_type(i->second) == path_item_type(j->second)); // I(! (null_name(path_item_name(i->second)) // && // null_name(path_item_name(j->second)))); } } void sanity_check_path_analysis(path_analysis const & pr) { sanity_check_path_state(pr.first); sanity_check_path_state(pr.second); check_states_agree(pr.first, pr.second); check_states_agree(pr.second, pr.first); } // construction helpers static boost::shared_ptr<directory_node> new_dnode() { return boost::shared_ptr<directory_node>(new directory_node()); } static boost::shared_ptr<directory_node> dnode(directory_map & dir, tid t) { boost::shared_ptr<directory_node> node; directory_map::const_iterator dirent = dir.find(t); if (dirent == dir.end()) { node = new_dnode(); dir.insert(std::make_pair(t, node)); } else node = dirent->second; return node; } static void get_full_path(path_state const & state, tid t, std::vector<path_component> & pth) { std::vector<path_component> tmp; while(t != root_tid) { path_state::const_iterator i = state.find(t); I(i != state.end()); tmp.push_back(path_item_name(i->second)); t = path_item_parent(i->second); } pth.clear(); std::copy(tmp.rbegin(), tmp.rend(), inserter(pth, pth.begin())); } static void get_full_path(path_state const & state, tid t, file_path & pth) { std::vector<path_component> tmp; get_full_path(state, t, tmp); // L(F("got %d-entry path for tid %d\n") % tmp.size() % t); pth = file_path(tmp); } static void clear_rearrangement(change_set::path_rearrangement & pr) { pr.deleted_files.clear(); pr.deleted_dirs.clear(); pr.renamed_files.clear(); pr.renamed_dirs.clear(); pr.added_files.clear(); } static void clear_change_set(change_set & cs) { clear_rearrangement(cs.rearrangement); cs.deltas.clear(); } static void compose_rearrangement(path_analysis const & pa, change_set::path_rearrangement & pr) { clear_rearrangement(pr); for (path_state::const_iterator i = pa.first.begin(); i != pa.first.end(); ++i) { tid curr(path_state_tid(i)); std::vector<path_component> old_name, new_name; file_path old_path, new_path; path_state::const_iterator j = pa.second.find(curr); I(j != pa.second.end()); path_item old_item(path_state_item(i)); path_item new_item(path_state_item(j)); // compose names if (!null_name(path_item_name(old_item))) { get_full_path(pa.first, curr, old_name); old_path = file_path(old_name); } if (!null_name(path_item_name(new_item))) { get_full_path(pa.second, curr, new_name); new_path = file_path(new_name); } if (old_path == new_path) { /* L(F("skipping preserved %s %d : '%s'\n") % (path_item_type(old_item) == ptype_directory ? "directory" : "file") % curr % old_path); */ continue; } /* L(F("analyzing %s %d : '%s' -> '%s'\n") % (path_item_type(old_item) == ptype_directory ? "directory" : "file") % curr % old_path % new_path); */ if (null_name(path_item_name(old_item))) { // an addition (which must be a file, not a directory) I(! null_name(path_item_name(new_item))); I(path_item_type(new_item) != ptype_directory); pr.added_files.insert(new_path); } else if (null_name(path_item_name(new_item))) { // a deletion I(! null_name(path_item_name(old_item))); switch (path_item_type(new_item)) { case ptype_directory: pr.deleted_dirs.insert(old_path); break; case ptype_file: pr.deleted_files.insert(old_path); break; } } else { // a generic rename switch (path_item_type(new_item)) { case ptype_directory: pr.renamed_dirs.insert(std::make_pair(old_path, new_path)); break; case ptype_file: pr.renamed_files.insert(std::make_pair(old_path, new_path)); break; } } } } static bool lookup_path(std::vector<path_component> const & pth, directory_map const & dir, tid & t) { t = root_tid; for (std::vector<path_component>::const_iterator i = pth.begin(); i != pth.end(); ++i) { directory_map::const_iterator dirent = dir.find(t); if (dirent != dir.end()) { boost::shared_ptr<directory_node> node = dirent->second; directory_node::const_iterator entry = node->find(*i); if (entry == node->end()) return false; t = directory_entry_tid(entry); } else return false; } return true; } static bool lookup_path(file_path const & pth, directory_map const & dir, tid & t) { std::vector<path_component> vec; pth.split(vec); return lookup_path(vec, dir, t); } static tid ensure_entry(directory_map & dmap, path_state & state, tid dir_tid, ptype entry_ty, path_component entry, tid_source & ts) { I(! null_name(entry)); if (dir_tid != root_tid) { path_state::const_iterator parent = state.find(dir_tid); I( parent != state.end()); // if our parent is null, we immediately become null too, and attach to // the root node (where all null entries reside) if (null_name(path_item_name(path_state_item(parent)))) { tid new_tid = ts.next(); state.insert(std::make_pair(new_tid, path_item(root_tid, entry_ty, the_null_component))); return new_tid; } } boost::shared_ptr<directory_node> node = dnode(dmap, dir_tid); directory_node::const_iterator node_entry = node->find(entry); if (node_entry != node->end()) { I(node_entry->second.first == entry_ty); return node_entry->second.second; } else { tid new_tid = ts.next(); state.insert(std::make_pair(new_tid, path_item(dir_tid, entry_ty, entry))); node->insert(std::make_pair(entry, std::make_pair(entry_ty, new_tid))); return new_tid; } } static tid ensure_dir_in_map (std::vector<path_component> pth, directory_map & dmap, path_state & state, tid_source & ts) { tid dir_tid = root_tid; for (std::vector<path_component>::const_iterator p = pth.begin(); p != pth.end(); ++p) { dir_tid = ensure_entry(dmap, state, dir_tid, ptype_directory, *p, ts); } return dir_tid; } static tid ensure_dir_in_map (file_path const & path, directory_map & dmap, path_state & state, tid_source & ts) { std::vector<path_component> components; path.split(components); return ensure_dir_in_map(components, dmap, state, ts); } static tid ensure_file_in_map (file_path const & path, directory_map & dmap, path_state & state, tid_source & ts) { std::vector<path_component> prefix; path_component leaf_path; path.split(prefix); leaf_path = prefix.back(); prefix.pop_back(); I(! null_name(leaf_path)); tid dir_tid = ensure_dir_in_map(prefix, dmap, state, ts); return ensure_entry(dmap, state, dir_tid, ptype_file, leaf_path, ts); } static void ensure_entries_exist (path_state const & self_state, directory_map & other_dmap, path_state & other_state, tid_source & ts) { for (path_state::const_iterator i = self_state.begin(); i != self_state.end(); ++i) { if (other_state.find(path_state_tid(i)) != other_state.end()) continue; if (null_name(path_item_name(path_state_item(i)))) continue; file_path full; get_full_path(self_state, path_state_tid(i), full); switch (path_item_type(path_state_item(i))) { case ptype_directory: ensure_dir_in_map(full, other_dmap, other_state, ts); break; case ptype_file: ensure_file_in_map(full, other_dmap, other_state, ts); break; } } } static void apply_state_renumbering(state_renumbering const & renumbering, path_state & state) { sanity_check_path_state(state); path_state tmp(state); state.clear(); for (path_state::const_iterator i = tmp.begin(); i != tmp.end(); ++i) { path_item item = path_state_item(i); tid t = path_state_tid(i); state_renumbering::const_iterator j = renumbering.find(t); if (j != renumbering.end()) t = j->second; j = renumbering.find(item.parent); if (j != renumbering.end()) item.parent = j->second; state.insert(std::make_pair(t, item)); } sanity_check_path_state(state); } static void apply_state_renumbering(state_renumbering const & renumbering, path_analysis & pa) { apply_state_renumbering(renumbering, pa.first); apply_state_renumbering(renumbering, pa.second); } // this takes a path in the path space defined by input_dir and rebuilds it // in the path space defined by output_space, including any changes to // parents in the path (rather than directly to the path leaf name). it // therefore *always* succeeds; sometimes it does nothing if there's no // affected parent, but you always get a rebuilt path in the output space. static void reconstruct_path(file_path const & input, directory_map const & input_dir, path_state const & output_space, file_path & output) { std::vector<path_component> vec; std::vector<path_component> rebuilt; // L(F("reconstructing path '%s' under analysis\n") % input); input.split(vec); tid t = root_tid; std::vector<path_component>::const_iterator pth = vec.begin(); while (pth != vec.end()) { directory_map::const_iterator dirent = input_dir.find(t); if (dirent == input_dir.end()) break; boost::shared_ptr<directory_node> node = dirent->second; directory_node::const_iterator entry = node->find(*pth); if (entry == node->end()) break; { // check to see if this is the image of an added or deleted entry // (i.e. null name in output space), if so it terminates our // search. path_state::const_iterator i = output_space.find(directory_entry_tid(entry)); I(i != output_space.end()); if (null_name(path_item_name(path_state_item(i)))) { // L(F("input path element '%s' is null in output space, mapping truncated\n") % *pth); break; } } // L(F("resolved entry '%s' in reconstruction\n") % *pth); ++pth; t = directory_entry_tid(entry); if (directory_entry_type(entry) != ptype_directory) break; } get_full_path(output_space, t, rebuilt); while(pth != vec.end()) { // L(F("copying tail entry '%s' in reconstruction\n") % *pth); rebuilt.push_back(*pth); ++pth; } output = file_path(rebuilt); // L(F("reconstructed path '%s' as '%s'\n") % input % output); } static void build_directory_map(path_state const & state, directory_map & dir) { sanity_check_path_state(state); dir.clear(); // L(F("building directory map for %d entries\n") % state.size()); for (path_state::const_iterator i = state.begin(); i != state.end(); ++i) { tid curr = path_state_tid(i); path_item item = path_state_item(i); tid parent = path_item_parent(item); path_component name = path_item_name(item); ptype type = path_item_type(item); // L(F("adding entry %s (%s %d) to directory node %d\n") // % name % (type == ptype_directory ? "dir" : "file") % curr % parent); dnode(dir, parent)->insert(std::make_pair(name,std::make_pair(type, curr))); // also, make sure to add current node if it's a directory, even if // there are no entries in it if (type == ptype_directory) dnode(dir, curr); } } void analyze_rearrangement(change_set::path_rearrangement const & pr, path_analysis & pa, tid_source & ts) { directory_map first_map, second_map; state_renumbering renumbering; std::set<tid> damaged_in_second; pa.first.clear(); pa.second.clear(); for (std::set<file_path>::const_iterator f = pr.deleted_files.begin(); f != pr.deleted_files.end(); ++f) { tid x = ensure_file_in_map(*f, first_map, pa.first, ts); pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_file, the_null_component))); } for (std::set<file_path>::const_iterator d = pr.deleted_dirs.begin(); d != pr.deleted_dirs.end(); ++d) { tid x = ensure_dir_in_map(*d, first_map, pa.first, ts); pa.second.insert(std::make_pair(x, path_item(root_tid, ptype_directory, the_null_component))); } for (std::map<file_path,file_path>::const_iterator rf = pr.renamed_files.begin(); rf != pr.renamed_files.end(); ++rf) { tid a = ensure_file_in_map(rf->first, first_map, pa.first, ts); tid b = ensure_file_in_map(rf->second, second_map, pa.second, ts); I(renumbering.find(a) == renumbering.end()); renumbering.insert(std::make_pair(b,a)); damaged_in_second.insert(b); } for (std::map<file_path,file_path>::const_iterator rd = pr.renamed_dirs.begin(); rd != pr.renamed_dirs.end(); ++rd) { tid a = ensure_dir_in_map(rd->first, first_map, pa.first, ts); tid b = ensure_dir_in_map(rd->second, second_map, pa.second, ts); I(renumbering.find(a) == renumbering.end()); renumbering.insert(std::make_pair(b,a)); damaged_in_second.insert(b); } for (std::set<file_path>::const_iterator a = pr.added_files.begin(); a != pr.added_files.end(); ++a) { tid x = ensure_file_in_map(*a, second_map, pa.second, ts); pa.first.insert(std::make_pair(x, path_item(root_tid, ptype_file, the_null_component))); damaged_in_second.insert(x); } // we now have two states which probably have a number of entries in // common. we know already of an interesting set of entries they have in // common: all the renamed_foo entries. for each such renamed_foo(a,b) // entry, we made an entry in our state_renumbering of the form b->a, // while building the states. // dump_analysis("analyzed", pa); // dump_renumbering("first", renumbering); apply_state_renumbering(renumbering, pa.second); build_directory_map(pa.first, first_map); build_directory_map(pa.second, second_map); renumbering.clear(); // dump_analysis("renumbered once", pa); // that only gets us half way, though: // // - every object which was explicitly moved (thus stayed alive) has been // renumbered in re.second to have the same tid as in re.first // // - every object which was merely mentionned in passing -- say due to // being an intermediate directory in a path -- and was not moved, still // has differing tids in re.first and re.second (or worse, may only // even have an *entry* in one of them) // // the second point here is what we need to correct: if a path didn't // move, wasn't destroyed, and wasn't added, we want it to have the same // tid. but that's a relatively easy condition to check; we've been // keeping sets of all the objects which were damaged on each side of // this business anyways. // pass #1 makes sure that all the entries in each state *exist* within // the other state, even if they have the wrong numbers ensure_entries_exist (pa.first, second_map, pa.second, ts); ensure_entries_exist (pa.second, first_map, pa.first, ts); // pass #2 identifies common un-damaged elements from 2->1 and inserts // renumberings for (path_state::const_iterator i = pa.second.begin(); i != pa.second.end(); ++i) { tid first_tid, second_tid; second_tid = path_state_tid(i); file_path full; if (pa.first.find(second_tid) != pa.first.end()) continue; get_full_path(pa.second, second_tid, full); if (damaged_in_second.find(second_tid) != damaged_in_second.end()) continue; if (null_name(path_item_name(path_state_item(i)))) continue; I(lookup_path(full, first_map, first_tid)); renumbering.insert(std::make_pair(second_tid, first_tid)); } // dump_renumbering("second", renumbering); apply_state_renumbering(renumbering, pa.second); // dump_analysis("renumbered again", pa); // that should be the whole deal; if we don't have consensus at this // point we have done something wrong. sanity_check_path_analysis (pa); } void normalize_path_rearrangement(change_set::path_rearrangement & norm) { path_analysis tmp; tid_source ts; analyze_rearrangement(norm, tmp, ts); clear_rearrangement(norm); compose_rearrangement(tmp, norm); } void normalize_change_set(change_set & norm) { normalize_path_rearrangement(norm.rearrangement); change_set::delta_map tmp = norm