Rev 3387: Tweak _BreadthFirstSearcher.find_seen_ancestors() in http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/find_differences

Wed Apr 23 00:14:23 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.4-dev/find_differences

------------------------------------------------------------
revno: 3387
revision-id: john at arbash-meinel.com-20080422225828-l6qigns5f4t81dbi
parent: john at arbash-meinel.com-20080422210318-z0mc5q4hdxsur9qm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: find_differences
timestamp: Tue 2008-04-22 17:58:28 -0500
message:
  Tweak _BreadthFirstSearcher.find_seen_ancestors()
  
  The old function stepped too far. It would query for the very tip nodes,
  which had not actually been probed for yet. This was further exacerbated,
  because it was using a _BreadthFirstSearcher to do the sub-iteration,
  which probably had the same problem.
  
  Overall, seems to be considerably faster.
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2008-04-22 21:03:18 +0000
+++ b/bzrlib/graph.py	2008-04-22 22:58:28 +0000
@@ -18,6 +18,7 @@
     errors,
     revision,
     symbol_versioning,
+    trace,
     tsort,
     )
 from bzrlib.deprecated_graph import (node_distances, select_farthest)
@@ -481,9 +482,9 @@
     def _search_for_extra_common(self, common, searchers):
         """Make sure that unique nodes are genuinely unique.
 
-        After a simple search, we end up with genuine common nodes, but some
-        uncommon nodes might actually be descended from common nodes, and we
-        just didn't search far enough.
+        After _find_border_ancestors, all nodes marked "common" are indeed
+        common. Some of the nodes considered unique are not, due to history
+        shortcuts stopping the searches early.
 
         We know that we have searched enough when all common search tips are
         descended from all unique (uncommon) nodes because we know that a node
@@ -500,19 +501,26 @@
         #      "unique" nodes for each side.
         #   C) We do a quick culling so that we only start searching from the
         #      more interesting unique nodes. (A unique ancestor is more
-        #      interesting that any of its children.)
+        #      interesting than any of its children.)
         #   D) We start searching for ancestors common to all unique nodes.
         #   E) We have the common searchers stop searching any ancestors of
         #      nodes found by (D)
         #   F) When there are no more common search tips, we stop
+
+        # TODO: We need a way to remove unique_searchers when they overlap with
+        #       other unique searchers.
         assert len(searchers) == 2, (
             "Algorithm not yet implemented for > 2 searchers")
         common_searchers = searchers
         left_searcher = searchers[0]
         right_searcher = searchers[1]
         unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
+        total_unique = len(unique)
         unique = self._remove_simple_descendants(unique,
                     self.get_parent_map(unique))
+        simple_unique = len(unique)
+        trace.mutter('Starting %s unique searchers for %s unique revisions',
+                     simple_unique, total_unique)
         # TODO: jam 20071214 Would it be possible to seed these searchers with
         #       the revisions that we have already seen on each side?
         #       Maybe something like:
@@ -568,12 +576,8 @@
             if new_common_unique:
                 # We found some ancestors that are common, jump all the way to
                 # their most ancestral node that we have already seen.
-                try:
-                    for searcher in unique_searchers:
-                        new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
-                except TypeError:
-                    import pdb; pdb.set_trace()
-                    raise
+                for searcher in unique_searchers:
+                    new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
                 # Since these are common, we can grab another set of ancestors
                 # that we have seen
                 for searcher in common_searchers:
@@ -854,16 +858,30 @@
 
     def find_seen_ancestors(self, revisions):
         """Find ancestors of these revisions that have already been seen."""
-        searcher = _BreadthFirstSearcher(revisions, self._parents_provider)
-        seen_ancestors = set()
-        for ancestors in searcher:
-            stop_nodes = set()
-            for ancestor in ancestors:
-                if ancestor not in self.seen:
-                    stop_nodes.add(ancestor)
-                else:
-                    seen_ancestors.add(ancestor)
-            searcher.stop_searching_any(stop_nodes)
+        all_seen = self.seen
+        pending = set(revisions).intersection(all_seen)
+        seen_ancestors = set(pending)
+
+        if self._returning == 'next':
+            # self.seen contains what nodes have been returned, not what nodes
+            # have been queried. We don't want to probe for nodes that haven't
+            # been searched yet.
+            not_searched_yet = self._next_query
+        else:
+            not_searched_yet = ()
+        get_parent_map = self._parents_provider.get_parent_map
+        while pending:
+            parent_map = get_parent_map(pending)
+            all_parents = []
+            # We don't care if it is a ghost, since it can't be seen if it is
+            # a ghost
+            for parent_ids in parent_map.itervalues():
+                all_parents.extend(parent_ids)
+            next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)
+            seen_ancestors.update(next_pending)
+            next_pending.difference_update(not_searched_yet)
+            pending = next_pending
+
         return seen_ancestors
 
     def stop_searching_any(self, revisions):