Rev 3104: Initial attempt at proper handling of shortcuts. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update
John Arbash Meinel
john at arbash-meinel.com
Fri Dec 14 21:29:38 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update
------------------------------------------------------------
revno: 3104
revision-id:john at arbash-meinel.com-20071214212912-e9qgqb2z44k6nr01
parent: john at arbash-meinel.com-20071214184807-af9dw18c6qonpj2t
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Fri 2007-12-14 15:29:12 -0600
message:
Initial attempt at proper handling of shortcuts.
We add a second refining loop, which ensures that we keep searching
the common nodes, until only ancestors of all uncommon nodes remain.
It passes the test cases.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-12-14 18:48:07 +0000
+++ b/bzrlib/graph.py 2007-12-14 21:29:12 +0000
@@ -243,13 +243,10 @@
"""Determine the graph difference between two revisions"""
border, common, searchers = self._find_border_ancestors(
[left_revision, right_revision])
- # Now that we have the superset of uncommon nodes, lets refine it with
- # a bit more searching.
+ self._search_for_extra_common(common, searchers)
left = searchers[0].seen
right = searchers[1].seen
- left_diff = left.difference(right).difference(common)
- right_diff = right.difference(left).difference(common)
- return left_diff, right_diff
+ return left.difference(right), right.difference(left)
def _make_breadth_first_searcher(self, revisions):
return _BreadthFirstSearcher(revisions, self)
@@ -280,10 +277,10 @@
next_to_search = set()
while True:
- parents = self.get_parent_map(next_to_search)
+ parent_map = self.get_parent_map(next_to_search)
newly_seen = set()
for searcher in searchers:
- new_ancestors = searcher.step(parents)
+ new_ancestors = searcher.step(parent_map)
if new_ancestors:
newly_seen.update(new_ancestors)
new_common = set()
@@ -361,13 +358,13 @@
active_searchers = dict(searchers)
# The first parents we will be accessing are just the candidate_heads,
# so ask the parents provider to pull them up
- parents = self.get_parent_map(candidate_heads)
+ parent_map = self.get_parent_map(candidate_heads)
# skip over the actual candidate for each searcher, and figure out what
# the next nodes are going to be.
next_to_search = set()
for searcher in active_searchers.itervalues():
- next_to_search.update(searcher.step(parents))
+ next_to_search.update(searcher.step(parent_map))
# The common walker finds nodes that are common to two or more of the
# input keys, so that we don't access all history when a currently
# uncommon search point actually meets up with something behind a
@@ -376,10 +373,10 @@
# accessing all history.
common_walker = self._make_breadth_first_searcher([])
while len(active_searchers) > 0:
- parents = self.get_parent_map(next_to_search)
+ parent_map = self.get_parent_map(next_to_search)
ancestors = set()
# advance searches
- new_common = common_walker.step(parents)
+ new_common = common_walker.step(parent_map)
for candidate in active_searchers.keys():
try:
searcher = active_searchers[candidate]
@@ -388,7 +385,7 @@
# through this for loop, because it was determined to be
# a descendant of another candidate.
continue
- next_ancestors = searcher.step(parents)
+ next_ancestors = searcher.step(parent_map)
if next_ancestors:
ancestors.update(next_ancestors)
else:
@@ -481,36 +478,171 @@
return set([candidate_descendant]) == self.heads(
[candidate_ancestor, candidate_descendant])
- def _remove_simple_descendants(self, revisions, parents):
+ def _search_for_extra_common(self, common, searchers):
+ """Make sure that unique nodes are genuinely unique.
+
+ After a simple search, we end up with genuine common nodes, but some
+ uncommon nodes might actually be descended from common nodes, and we
+ just didn't search far enough.
+
+ We know that we have searched enough when all common search tips are
+ descended from all unique (uncommon) nodes because we know that a node
+ cannot be an ancestor of its own ancestor.
+
+ :param common: A set of common nodes
+ :param searchers: The searchers returned from _find_border_ancestors
+ :return: None
+ """
+ # Basic algorithm...
+ # A) The passed in searchers should all be on the same tips, thus
+ # they should be considered the "common" searchers.
+ # B) We find the difference between the searchers, these are the
+ # "unique" nodes for each side.
+ # C) We do a quick culling so that we only start searching from the
+ # more interesting unique nodes. (A unique ancestor is more
+ # interesting that any of its children.)
+ # D) We start searching for ancestors common to all unique nodes.
+ # E) We have the common searchers stop searching any ancestors of
+ # nodes found by (D)
+ # F) When there are no more common search tips, we stop
+ assert len(searchers) == 2, (
+ "Algorithm not yet implemented for > 2 searchers")
+ common_searchers = searchers
+ left_searcher = searchers[0]
+ right_searcher = searchers[1]
+ unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
+ self._remove_simple_descendants(unique, self.get_parent_map(unique))
+ # TODO: jam 20071214 Would it be possible to seed these searchers with
+ # the revisions that we have already seen on each side?
+ # Maybe something like:
+ # unique_searchers = []
+ # for left_unique in left.difference(right):
+ # l_searcher = self._make_breadth_first_searcher(left_unique)
+ # l_searcher.seen.update(left.seen)
+ # ... (same for right.difference(left))
+ # This might also be easier to set up for the case of >2
+ # searchers.
+ unique_searchers = [self._make_breadth_first_searcher([r])
+ for r in unique]
+ # Aggregate all of the searchers into a single common searcher, would
+ # it be okay to do this?
+ # okay to do this?
+ # common_searcher = self._make_breadth_first_searcher([])
+ # for searcher in searchers:
+ # common_searcher.start_searching(searcher.will_search())
+ # common_searcher.seen.update(searcher.seen)
+ next_to_search = set()
+ next_to_search.update(left_searcher.will_search())
+ next_to_search.update(right_searcher.will_search())
+
+ common_ancestors_unique = set()
+
+ while True: # If we have no more nodes we have nothing to do
+ parent_map = self.get_parent_map(next_to_search)
+ # XXX: Any nodes here which don't match between searchers indicate
+ # that we have found a genuinely unique node, which would not
+ # have been found by the other searching techniques
+ newly_seen_common = set()
+ for searcher in common_searchers:
+ newly_seen_common.update(searcher.step(parent_map))
+ newly_seen_unique = set()
+ for searcher in unique_searchers:
+ new_ancestors = searcher.step(parent_map)
+ if new_ancestors:
+ newly_seen_unique.update(new_ancestors)
+ new_common_unique = set()
+ for revision in newly_seen_unique:
+ if revision in common_ancestors_unique:
+ # TODO: Do we need to add it to new_common_unique, since it
+ # seems to have already been found... ?
+ new_common_unique.add(revision)
+ continue
+ for searcher in unique_searchers:
+ if revision not in searcher.seen:
+ break
+ else:
+ # This is a border because it is a first common that we see
+ # after walking for a while.
+ new_common_unique.add(revision)
+ if newly_seen_common:
+ # These are nodes descended from one of the 'common' searchers.
+ # Make sure all searchers are on the same page
+ for searcher in common_searchers:
+ newly_seen_common.update(searcher.find_seen_ancestors(newly_seen_common))
+ for searcher in common_searchers:
+ searcher.start_searching(newly_seen_common)
+ if new_common_unique:
+ # We found some ancestors that are common, jump all the way to
+ # their most ancestral node that we have already seen.
+ for searcher in unique_searchers:
+ new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
+ # Since these are common, we can grab another set of ancestors
+ # that we have seen
+ for searcher in common_searchers:
+ new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
+
+ # Now we have a complete set of common nodes which are
+ # ancestors of the unique nodes.
+ # We can tell all of the unique searchers to start at these
+ # nodes, and tell all of the common searchers to *stop*
+ # searching these nodes
+ for searcher in unique_searchers:
+ searcher.start_searching(new_common_unique)
+ for searcher in common_searchers:
+ searcher.stop_searching_any(new_common_unique)
+ common_ancestors_unique.update(new_common_unique)
+
+ # Figure out what the searchers will be searching next
+ next_to_search_unique = set()
+ for searcher in unique_searchers:
+ next_to_search_unique.update(searcher.will_search())
+
+ # If the unique_searchers have nothing they will be searching, we
+ # are done
+ if not next_to_search_unique:
+ break
+
+ next_to_search = next_to_search_unique
+
+ # Next check the next nodes for the common searchers
+ next_to_search_common = set()
+ for searcher in common_searchers:
+ next_to_search_common.update(searcher.will_search())
+ # The common search has finished, we are done
+ if not next_to_search_common:
+ break
+ next_to_search.update(next_to_search_common)
+
+ def _remove_simple_descendants(self, revisions, parent_map):
"""remove revisions which are children of other ones in the set
This doesn't do any graph searching, it just checks the immediate
- parents to find if there are any children which can be removed.
+ parent_map to find if there are any children which can be removed.
:param revisions: A set of revision_ids
:return: A set of revision_ids with the children removed
"""
simple_ancestors = revisions.copy()
# TODO: jam 20071214 we *could* restrict it to searching only the
- # parents of revisions already present in 'revisions', but
+ # parent_map of revisions already present in 'revisions', but
# considering the general use case, I think this is actually
# better.
# This is the same as the following loop. I don't know that it is any
# faster.
- ## simple_ancestors.difference_update(r for r, p_ids in parents.iteritems()
+ ## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
## if p_ids is not None and revisions.intersection(p_ids))
## return simple_ancestors
# Yet Another Way, invert the parent map (which can be cached)
## descendants = {}
- ## for revision_id, parent_ids in parents.iteritems():
+ ## for revision_id, parent_ids in parent_map.iteritems():
## for p_id in parent_ids:
## descendants.setdefault(p_id, []).append(revision_id)
## for revision in revisions.intersection(descendants):
## simple_ancestors.difference_update(descendants[revision])
## return simple_ancestors
- for revision, parent_ids in parents.iteritems():
+ for revision, parent_ids in parent_map.iteritems():
if parent_ids is None:
continue
for parent_id in parent_ids:
@@ -598,15 +730,16 @@
def will_search(self):
"""Give the list of nodes that will be searched next."""
- if self._search_revisions is None:
- return self._start
- else:
- return self._search_revisions
+ # will_search does not give valid responses for the first request,
+ # because it doesn't need parents, it is going to directly return these
+ # nodes
+ assert self._search_revisions is not None
+ return self._search_revisions
- def step(self, parents):
+ def step(self, parent_map):
"""Step to the next set of ancestors.
- :param parents: A dictionary mapping revision_id => parent_ids
+ :param parent_map: A dictionary mapping revision_id => parent_ids
It is assumed that all parents will be available. Callers should
use "will_search()" to find what revisions this searcher will need,
and then load them outside of this function.
@@ -617,7 +750,7 @@
else:
new_search_revisions = set()
for revision_id in self._search_revisions:
- parent_ids = parents.get(revision_id, None)
+ parent_ids = parent_map.get(revision_id, None)
if parent_ids is None:
continue
new_search_revisions.update(parent_ids)
@@ -632,8 +765,11 @@
Ancestors are returned in the order they are seen in a breadth-first
traversal. No ancestor will be returned more than once.
"""
- to_search = self.will_search()
- parent_map = self._parents_provider.get_parent_map(to_search)
+ if self._search_revisions is None:
+ parent_map = self._parents_provider.get_parent_map(
+ self._search_revisions)
+ else:
+ parent_map = {}
next_revisions = self.step(parent_map)
if not next_revisions:
raise StopIteration()
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2007-12-14 18:48:07 +0000
+++ b/bzrlib/tests/test_graph.py 2007-12-14 21:29:12 +0000
@@ -194,24 +194,6 @@
'i':['e', 'g'], 'j':['g'], 'k':['j'],
'l':['k'], 'm':['i', 'l'], 'n':['l', 'h']}
-# a--
-# |\ \
-# b | c
-# | | |
-# d | e
-# | | |
-# f | |
-# |\| |
-# | g |
-# | ./
-# | h
-# |/|
-# i
-# |
-# j
-# |
-# k
-
# NULL_REVISION
# |
# a
@@ -223,48 +205,26 @@
# d
# |\
# e f
-# |\|\
-# | g h
+# | |\
+# i | h
+# |\| |
+# | g |
# | | |
-# i j |
+# | j |
# | | |
# | k |
# | | |
# | l |
# |/|/
# m n
-complex_shortcut2 = {'a':[NULL_REVISION], 'b':['a'], 'c':['b'], 'd':['c'],
- 'e':['d'], 'f':['d'], 'g':['f', 'i'], 'h':['f'],
+complex_shortcut2 = {'d':[NULL_REVISION],
+ 'x':['d'], 'y':['x'],
+ 'e':['y'], 'f':['d'], 'g':['f', 'i'], 'h':['f'],
'i':['e'], 'j':['g'], 'k':['j'],
- 'l':['k'], 'm':['i', 'l'], 'n':['l', 'h']}
-## NULL_REVISION
-## |
-## a
-## |
-## b
-## |
-## c
-## |
-## d-.
-## |\ \
-## e f |
-## |\| |
-## | g h
-## | | |
-## i j |
-## | | |
-## | k |
-## | |/
-## | l
-## |/
-## m
-## |
-## n
-##
-##complex_shortcut2 = {'a':[NULL_REVISION], 'b':['a'], 'c':['b'], 'd':['c'],
-## 'e':['d'], 'f':['d'], 'g':['e', 'f'], 'h':['d'],
-## 'i':['e'], 'j':['g'], 'k':['j'],
-## 'l':['p', 'h'], 'm':['i', 'l'], 'n':['m']}
+ 'l':['k'], 'm':['i', 's'], 'n':['s', 'h'],
+ 'o':['l'], 'p':['o'], 'q':['p'],
+ 'r':['q'], 's':['r'],
+ }
# Shortcut with extra root
# We have a long history shortcut, and an extra root, which is why we can't
@@ -321,6 +281,8 @@
class TestGraph(TestCaseWithMemoryTransport):
def make_graph(self, ancestors):
+ # XXX: This seems valid, is there a reason to actually create a
+ # repository and put things in it?
return _mod_graph.Graph(_mod_graph.DictParentsProvider(ancestors))
tree = self.prepare_memory_tree('.')
self.build_ancestry(tree, ancestors)
@@ -525,9 +487,8 @@
graph.find_difference('m', 'n'))
def test_graph_difference_complex_shortcut2(self):
- import pdb; pdb.set_trace()
- graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(complex_shortcut2))
- self.assertEqual((set(['m', 'i']), set(['h', 'n'])),
+ graph = self.make_graph(complex_shortcut2)
+ self.assertEqual((set(['m']), set(['h', 'n'])),
graph.find_difference('m', 'n'))
def test_graph_difference_shortcut_extra_root(self):
More information about the bazaar-commits
mailing list