Rev 2929: (robertc) Prevent heads() calls from accessing all history unnecessarily. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Oct 23 09:21:14 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2929
revision-id: pqm at pqm.ubuntu.com-20071023082111-h6u34i4gvlb2nwch
parent: pqm at pqm.ubuntu.com-20071023030547-pjlo1thei6q984os
parent: robertc at robertcollins.net-20071023073514-r99iahf2lm257zrp
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-10-23 09:21:11 +0100
message:
(robertc) Prevent heads() calls from accessing all history unnecessarily. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
------------------------------------------------------------
revno: 2921.3.5
merged: robertc at robertcollins.net-20071023073514-r99iahf2lm257zrp
parent: robertc at robertcollins.net-20071022194931-ohs6yd08iy7deqlr
committer: Robert Collins <robertc at robertcollins.net>
branch nick: graph
timestamp: Tue 2007-10-23 17:35:14 +1000
message:
Used fixed rather than bugfixed at Ian's request in NEWS.
------------------------------------------------------------
revno: 2921.3.4
merged: robertc at robertcollins.net-20071022194931-ohs6yd08iy7deqlr
parent: robertc at robertcollins.net-20071022054449-v4kdk8bue5a7ckym
committer: Robert Collins <robertc at robertcollins.net>
branch nick: graph
timestamp: Tue 2007-10-23 05:49:31 +1000
message:
Review feedback.
------------------------------------------------------------
revno: 2921.3.3
merged: robertc at robertcollins.net-20071022054449-v4kdk8bue5a7ckym
parent: robertc at robertcollins.net-20071022054315-b7eddcxqlvy3q58j
committer: Robert Collins <robertc at robertcollins.net>
branch nick: graph
timestamp: Mon 2007-10-22 15:44:49 +1000
message:
Review feedback.
------------------------------------------------------------
revno: 2921.3.2
merged: robertc at robertcollins.net-20071022054315-b7eddcxqlvy3q58j
parent: robertc at robertcollins.net-20071022004427-x1pn4ywiuubx354f
committer: Robert Collins <robertc at robertcollins.net>
branch nick: graph
timestamp: Mon 2007-10-22 15:43:15 +1000
message:
Fix mistaken pdb import.
------------------------------------------------------------
revno: 2921.3.1
merged: robertc at robertcollins.net-20071022004427-x1pn4ywiuubx354f
parent: pqm at pqm.ubuntu.com-20071019201226-6z006xotgfe7zmu8
committer: Robert Collins <robertc at robertcollins.net>
branch nick: graph
timestamp: Mon 2007-10-22 10:44:27 +1000
message:
* Graph ``heads()`` queries have been bugfixed to no longer access all
history unnecessarily. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS 2007-10-22 21:25:20 +0000
+++ b/NEWS 2007-10-23 08:21:11 +0000
@@ -69,6 +69,9 @@
* XML inventory serialisation takes 20% less time while being stricter about
the contents. (Robert Collins)
+ * Graph ``heads()`` queries have been fixed to no longer access all history
+ unnecessarily. (Robert Collins)
+
IMPROVEMENTS:
* ``bzr+https://`` smart server across https now supported.
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-10-19 03:45:02 +0000
+++ b/bzrlib/graph.py 2007-10-22 19:49:31 +0000
@@ -91,7 +91,7 @@
specialized implementations for particular repository types. See
Repository.get_graph()
- :param parents_func: an object providing a get_parents call
+ :param parents_provider: An object providing a get_parents call
conforming to the behavior of StackedParentsProvider.get_parents
"""
self.get_parents = parents_provider.get_parents
@@ -239,7 +239,21 @@
# skip over the actual candidate for each searcher
for searcher in active_searchers.itervalues():
searcher.next()
+ # The common walker finds nodes that are common to two or more of the
+ # input keys, so that we don't access all history when a currently
+ # uncommon search point actually meets up with something behind a
+ # common search point. Common search points do not keep searches
+ # active; they just allow us to make searches inactive without
+ # accessing all history.
+ common_walker = self._make_breadth_first_searcher([])
while len(active_searchers) > 0:
+ ancestors = set()
+ # advance searches
+ try:
+ common_walker.next()
+ except StopIteration:
+ # No common points being searched at this time.
+ pass
for candidate in active_searchers.keys():
try:
searcher = active_searchers[candidate]
@@ -249,27 +263,40 @@
# a descendant of another candidate.
continue
try:
- ancestors = searcher.next()
+ ancestors.update(searcher.next())
except StopIteration:
del active_searchers[candidate]
continue
- for ancestor in ancestors:
- if ancestor in candidate_heads:
- candidate_heads.remove(ancestor)
- del searchers[ancestor]
- if ancestor in active_searchers:
- del active_searchers[ancestor]
+ # process found nodes
+ new_common = set()
+ for ancestor in ancestors:
+ if ancestor in candidate_heads:
+ candidate_heads.remove(ancestor)
+ del searchers[ancestor]
+ if ancestor in active_searchers:
+ del active_searchers[ancestor]
+ # it may meet up with a known common node
+ if ancestor in common_walker.seen:
+ # some searcher has encountered our known common nodes:
+ # just stop it
+ ancestor_set = set([ancestor])
+ for searcher in searchers.itervalues():
+ searcher.stop_searching_any(ancestor_set)
+ else:
+ # or it may have been just reached by all the searchers:
for searcher in searchers.itervalues():
if ancestor not in searcher.seen:
break
else:
- # if this revision was seen by all searchers, then it
- # is a descendant of all candidates, so we can stop
- # searching it, and any seen ancestors
+ # The final active searcher has just reached this node,
+ # making it be known as a descendant of all candidates,
+ # so we can stop searching it, and any seen ancestors
+ new_common.add(ancestor)
for searcher in searchers.itervalues():
seen_ancestors =\
searcher.find_seen_ancestors(ancestor)
searcher.stop_searching_any(seen_ancestors)
+ common_walker.start_searching(new_common)
return candidate_heads
def find_unique_lca(self, left_revision, right_revision):
@@ -306,69 +333,12 @@
def is_ancestor(self, candidate_ancestor, candidate_descendant):
"""Determine whether a revision is an ancestor of another.
- There are two possible outcomes: True and False, but there are three
- possible relationships:
-
- a) candidate_ancestor is an ancestor of candidate_descendant
- b) candidate_ancestor is an descendant of candidate_descendant
- c) candidate_ancestor is an sibling of candidate_descendant
-
- To check for a, we walk from candidate_descendant, looking for
- candidate_ancestor.
-
- To check for b, we walk from candidate_ancestor, looking for
- candidate_descendant.
-
- To make a and b more efficient, we can stop any searches that hit
- common ancestors.
-
- If we exhaust our searches, but neither a or b is true, then c is true.
-
- In order to find c efficiently, we must avoid searching from
- candidate_descendant or candidate_ancestor into common ancestors. But
- if we don't search common ancestors at all, we won't know if we hit
- common ancestors. So we have a walker for common ancestors. Note that
- its searches are not required to terminate in order to determine c to
- be true.
+ We answer this using heads() as heads() has the logic to perform the
+ smallest number of parent looksup to determine the ancestral
+ relationship between N revisions.
"""
- ancestor_walker = self._make_breadth_first_searcher(
- [candidate_ancestor])
- descendant_walker = self._make_breadth_first_searcher(
- [candidate_descendant])
- common_walker = self._make_breadth_first_searcher([])
- active_ancestor = True
- active_descendant = True
- while (active_ancestor or active_descendant):
- new_common = set()
- if active_descendant:
- try:
- nodes = descendant_walker.next()
- except StopIteration:
- active_descendant = False
- else:
- if candidate_ancestor in nodes:
- return True
- new_common.update(nodes.intersection(ancestor_walker.seen))
- if active_ancestor:
- try:
- nodes = ancestor_walker.next()
- except StopIteration:
- active_ancestor = False
- else:
- if candidate_descendant in nodes:
- return False
- new_common.update(nodes.intersection(
- descendant_walker.seen))
- try:
- new_common.update(common_walker.next())
- except StopIteration:
- pass
- for walker in (ancestor_walker, descendant_walker):
- for node in new_common:
- c_ancestors = walker.find_seen_ancestors(node)
- walker.stop_searching_any(c_ancestors)
- common_walker.start_searching(new_common)
- return False
+ return set([candidate_descendant]) == self.heads(
+ [candidate_ancestor, candidate_descendant])
class HeadsCache(object):
@@ -400,7 +370,7 @@
class _BreadthFirstSearcher(object):
- """Parallel search the breadth-first the ancestry of revisions.
+ """Parallel search breadth-first the ancestry of revisions.
This class implements the iterator protocol, but additionally
1. provides a set of seen ancestors, and
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2007-09-03 22:17:20 +0000
+++ b/bzrlib/tests/test_graph.py 2007-10-22 05:44:49 +0000
@@ -453,3 +453,57 @@
graph.heads(['rev2a', 'rev3b']))
self.assertEqual(set(['rev2c', 'rev3a']),
graph.heads(['rev2c', 'rev3a']))
+
+ def _run_heads_break_deeper(self, graph_dict, search):
+ """Run heads on a graph-as-a-dict.
+
+ If the search asks for the parents of 'deeper' the test will fail.
+ """
+ class stub(object):
+ pass
+ def get_parents(keys):
+ result = []
+ for key in keys:
+ if key == 'deeper':
+ self.fail('key deeper was accessed')
+ result.append(graph_dict[key])
+ return result
+ an_obj = stub()
+ an_obj.get_parents = get_parents
+ graph = _mod_graph.Graph(an_obj)
+ return graph.heads(search)
+
+ def test_heads_limits_search(self):
+ # test that a heads query does not search all of history
+ graph_dict = {
+ 'left':['common'],
+ 'right':['common'],
+ 'common':['deeper'],
+ }
+ self.assertEqual(set(['left', 'right']),
+ self._run_heads_break_deeper(graph_dict, ['left', 'right']))
+
+ def test_heads_limits_search_assymetric(self):
+ # test that a heads query does not search all of history
+ graph_dict = {
+ 'left':['midleft'],
+ 'midleft':['common'],
+ 'right':['common'],
+ 'common':['aftercommon'],
+ 'aftercommon':['deeper'],
+ }
+ self.assertEqual(set(['left', 'right']),
+ self._run_heads_break_deeper(graph_dict, ['left', 'right']))
+
+ def test_heads_limits_search_common_search_must_continue(self):
+ # test that common nodes are still queried, preventing
+ # all-the-way-to-origin behaviour in the following graph:
+ graph_dict = {
+ 'h1':['shortcut', 'common1'],
+ 'h2':['common1'],
+ 'shortcut':['common2'],
+ 'common1':['common2'],
+ 'common2':['deeper'],
+ }
+ self.assertEqual(set(['h1', 'h2']),
+ self._run_heads_break_deeper(graph_dict, ['h1', 'h2']))
More information about the bazaar-commits
mailing list