Rev 3104: Initial attempt at proper handling of shortcuts. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

Fri Dec 14 21:29:38 GMT 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

------------------------------------------------------------
revno: 3104
revision-id:john at arbash-meinel.com-20071214212912-e9qgqb2z44k6nr01
parent: john at arbash-meinel.com-20071214184807-af9dw18c6qonpj2t
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Fri 2007-12-14 15:29:12 -0600
message:
  Initial attempt at proper handling of shortcuts.
  We add a second refining loop, which ensures that we keep searching
  the common nodes, until only ancestors of all uncommon nodes remain.
  It passes the test cases.
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2007-12-14 18:48:07 +0000
+++ b/bzrlib/graph.py	2007-12-14 21:29:12 +0000
@@ -243,13 +243,10 @@
         """Determine the graph difference between two revisions"""
         border, common, searchers = self._find_border_ancestors(
             [left_revision, right_revision])
-        # Now that we have the superset of uncommon nodes, lets refine it with
-        # a bit more searching.
+        self._search_for_extra_common(common, searchers)
         left = searchers[0].seen
         right = searchers[1].seen
-        left_diff = left.difference(right).difference(common)
-        right_diff = right.difference(left).difference(common)
-        return left_diff, right_diff
+        return left.difference(right), right.difference(left)
 
     def _make_breadth_first_searcher(self, revisions):
         return _BreadthFirstSearcher(revisions, self)
@@ -280,10 +277,10 @@
         next_to_search = set()
 
         while True:
-            parents = self.get_parent_map(next_to_search)
+            parent_map = self.get_parent_map(next_to_search)
             newly_seen = set()
             for searcher in searchers:
-                new_ancestors = searcher.step(parents)
+                new_ancestors = searcher.step(parent_map)
                 if new_ancestors:
                     newly_seen.update(new_ancestors)
             new_common = set()
@@ -361,13 +358,13 @@
         active_searchers = dict(searchers)
         # The first parents we will be accessing are just the candidate_heads,
         # so ask the parents provider to pull them up
-        parents = self.get_parent_map(candidate_heads)
+        parent_map = self.get_parent_map(candidate_heads)
 
         # skip over the actual candidate for each searcher, and figure out what
         # the next nodes are going to be.
         next_to_search = set()
         for searcher in active_searchers.itervalues():
-            next_to_search.update(searcher.step(parents))
+            next_to_search.update(searcher.step(parent_map))
         # The common walker finds nodes that are common to two or more of the
         # input keys, so that we don't access all history when a currently
         # uncommon search point actually meets up with something behind a
@@ -376,10 +373,10 @@
         # accessing all history.
         common_walker = self._make_breadth_first_searcher([])
         while len(active_searchers) > 0:
-            parents = self.get_parent_map(next_to_search)
+            parent_map = self.get_parent_map(next_to_search)
             ancestors = set()
             # advance searches
-            new_common = common_walker.step(parents)
+            new_common = common_walker.step(parent_map)
             for candidate in active_searchers.keys():
                 try:
                     searcher = active_searchers[candidate]
@@ -388,7 +385,7 @@
                     # through this for loop, because it was determined to be
                     # a descendant of another candidate.
                     continue
-                next_ancestors = searcher.step(parents)
+                next_ancestors = searcher.step(parent_map)
                 if next_ancestors:
                     ancestors.update(next_ancestors)
                 else:
@@ -481,36 +478,171 @@
         return set([candidate_descendant]) == self.heads(
             [candidate_ancestor, candidate_descendant])
 
-    def _remove_simple_descendants(self, revisions, parents):
+    def _search_for_extra_common(self, common, searchers):
+        """Make sure that unique nodes are genuinely unique.
+
+        After a simple search, we end up with genuine common nodes, but some
+        uncommon nodes might actually be descended from common nodes, and we
+        just didn't search far enough.
+
+        We know that we have searched enough when all common search tips are
+        descended from all unique (uncommon) nodes because we know that a node
+        cannot be an ancestor of its own ancestor.
+
+        :param common: A set of common nodes
+        :param searchers: The searchers returned from _find_border_ancestors
+        :return: None
+        """
+        # Basic algorithm...
+        #   A) The passed in searchers should all be on the same tips, thus
+        #      they should be considered the "common" searchers.
+        #   B) We find the difference between the searchers, these are the
+        #      "unique" nodes for each side.
+        #   C) We do a quick culling so that we only start searching from the
+        #      more interesting unique nodes. (A unique ancestor is more
+        #      interesting that any of its children.)
+        #   D) We start searching for ancestors common to all unique nodes.
+        #   E) We have the common searchers stop searching any ancestors of
+        #      nodes found by (D)
+        #   F) When there are no more common search tips, we stop
+        assert len(searchers) == 2, (
+            "Algorithm not yet implemented for > 2 searchers")
+        common_searchers = searchers
+        left_searcher = searchers[0]
+        right_searcher = searchers[1]
+        unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
+        self._remove_simple_descendants(unique, self.get_parent_map(unique))
+        # TODO: jam 20071214 Would it be possible to seed these searchers with
+        #       the revisions that we have already seen on each side?
+        #       Maybe something like:
+        #       unique_searchers = []
+        #       for left_unique in left.difference(right):
+        #          l_searcher = self._make_breadth_first_searcher(left_unique)
+        #          l_searcher.seen.update(left.seen)
+        #       ... (same for right.difference(left))
+        #       This might also be easier to set up for the case of >2
+        #       searchers.
+        unique_searchers = [self._make_breadth_first_searcher([r])
+                            for r in unique]
+        # Aggregate all of the searchers into a single common searcher, would
+        # it be okay to do this?
+        # okay to do this?
+        # common_searcher = self._make_breadth_first_searcher([])
+        # for searcher in searchers:
+        #     common_searcher.start_searching(searcher.will_search())
+        #     common_searcher.seen.update(searcher.seen)
+        next_to_search = set()
+        next_to_search.update(left_searcher.will_search())
+        next_to_search.update(right_searcher.will_search())
+
+        common_ancestors_unique = set()
+
+        while True: # If we have no more nodes we have nothing to do
+            parent_map = self.get_parent_map(next_to_search)
+            # XXX: Any nodes here which don't match between searchers indicate
+            #      that we have found a genuinely unique node, which would not
+            #      have been found by the other searching techniques
+            newly_seen_common = set()
+            for searcher in common_searchers:
+                newly_seen_common.update(searcher.step(parent_map))
+            newly_seen_unique = set()
+            for searcher in unique_searchers:
+                new_ancestors = searcher.step(parent_map)
+                if new_ancestors:
+                    newly_seen_unique.update(new_ancestors)
+            new_common_unique = set()
+            for revision in newly_seen_unique:
+                if revision in common_ancestors_unique:
+                    # TODO: Do we need to add it to new_common_unique, since it
+                    #       seems to have already been found... ?
+                    new_common_unique.add(revision)
+                    continue
+                for searcher in unique_searchers:
+                    if revision not in searcher.seen:
+                        break
+                else:
+                    # This is a border because it is a first common that we see
+                    # after walking for a while.
+                    new_common_unique.add(revision)
+            if newly_seen_common:
+                # These are nodes descended from one of the 'common' searchers.
+                # Make sure all searchers are on the same page
+                for searcher in common_searchers:
+                    newly_seen_common.update(searcher.find_seen_ancestors(newly_seen_common))
+                for searcher in common_searchers:
+                    searcher.start_searching(newly_seen_common)
+            if new_common_unique:
+                # We found some ancestors that are common, jump all the way to
+                # their most ancestral node that we have already seen.
+                for searcher in unique_searchers:
+                    new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
+                # Since these are common, we can grab another set of ancestors
+                # that we have seen
+                for searcher in common_searchers:
+                    new_common_unique.update(searcher.find_seen_ancestors(new_common_unique))
+
+                # Now we have a complete set of common nodes which are
+                # ancestors of the unique nodes.
+                # We can tell all of the unique searchers to start at these
+                # nodes, and tell all of the common searchers to *stop*
+                # searching these nodes
+                for searcher in unique_searchers:
+                    searcher.start_searching(new_common_unique)
+                for searcher in common_searchers:
+                    searcher.stop_searching_any(new_common_unique)
+                common_ancestors_unique.update(new_common_unique)
+
+            # Figure out what the searchers will be searching next
+            next_to_search_unique = set()
+            for searcher in unique_searchers:
+                next_to_search_unique.update(searcher.will_search())
+
+            # If the unique_searchers have nothing they will be searching, we
+            # are done
+            if not next_to_search_unique:
+                break
+
+            next_to_search = next_to_search_unique
+
+            # Next check the next nodes for the common searchers
+            next_to_search_common = set()
+            for searcher in common_searchers:
+                next_to_search_common.update(searcher.will_search())
+            # The common search has finished, we are done
+            if not next_to_search_common:
+                break
+            next_to_search.update(next_to_search_common)
+
+    def _remove_simple_descendants(self, revisions, parent_map):
         """remove revisions which are children of other ones in the set
 
         This doesn't do any graph searching, it just checks the immediate
-        parents to find if there are any children which can be removed.
+        parent_map to find if there are any children which can be removed.
 
         :param revisions: A set of revision_ids
         :return: A set of revision_ids with the children removed
         """
         simple_ancestors = revisions.copy()
         # TODO: jam 20071214 we *could* restrict it to searching only the
-        #       parents of revisions already present in 'revisions', but
+        #       parent_map of revisions already present in 'revisions', but
         #       considering the general use case, I think this is actually
         #       better.
 
         # This is the same as the following loop. I don't know that it is any
         # faster.
-        ## simple_ancestors.difference_update(r for r, p_ids in parents.iteritems()
+        ## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
         ##     if p_ids is not None and revisions.intersection(p_ids))
         ## return simple_ancestors
 
         # Yet Another Way, invert the parent map (which can be cached)
         ## descendants = {}
-        ## for revision_id, parent_ids in parents.iteritems():
+        ## for revision_id, parent_ids in parent_map.iteritems():
         ##   for p_id in parent_ids:
         ##       descendants.setdefault(p_id, []).append(revision_id)
         ## for revision in revisions.intersection(descendants):
         ##   simple_ancestors.difference_update(descendants[revision])
         ## return simple_ancestors
-        for revision, parent_ids in parents.iteritems():
+        for revision, parent_ids in parent_map.iteritems():
             if parent_ids is None:
                 continue
             for parent_id in parent_ids:
@@ -598,15 +730,16 @@
 
     def will_search(self):
         """Give the list of nodes that will be searched next."""
-        if self._search_revisions is None:
-            return self._start
-        else:
-            return self._search_revisions
+        # will_search does not give valid responses for the first request,
+        # because it doesn't need parents, it is going to directly return these
+        # nodes
+        assert self._search_revisions is not None
+        return self._search_revisions
 
-    def step(self, parents):
+    def step(self, parent_map):
         """Step to the next set of ancestors.
 
-        :param parents: A dictionary mapping revision_id => parent_ids
+        :param parent_map: A dictionary mapping revision_id => parent_ids
             It is assumed that all parents will be available. Callers should
             use "will_search()" to find what revisions this searcher will need,
             and then load them outside of this function.
@@ -617,7 +750,7 @@
         else:
             new_search_revisions = set()
             for revision_id in self._search_revisions:
-                parent_ids = parents.get(revision_id, None)
+                parent_ids = parent_map.get(revision_id, None)
                 if parent_ids is None:
                     continue
                 new_search_revisions.update(parent_ids)
@@ -632,8 +765,11 @@
         Ancestors are returned in the order they are seen in a breadth-first
         traversal.  No ancestor will be returned more than once.
         """
-        to_search = self.will_search()
-        parent_map = self._parents_provider.get_parent_map(to_search)
+        if self._search_revisions is None:
+            parent_map = self._parents_provider.get_parent_map(
+                            self._search_revisions)
+        else:
+            parent_map = {}
         next_revisions = self.step(parent_map)
         if not next_revisions:
             raise StopIteration()

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2007-12-14 18:48:07 +0000
+++ b/bzrlib/tests/test_graph.py	2007-12-14 21:29:12 +0000
@@ -194,24 +194,6 @@
                     'i':['e', 'g'], 'j':['g'], 'k':['j'],
                     'l':['k'], 'm':['i', 'l'], 'n':['l', 'h']}
 
-#        a--
-#        |\ \
-#        b | c
-#        | | |
-#        d | e
-#        | | |
-#        f | |
-#        |\| |
-#        | g |
-#        | ./
-#        | h
-#        |/|
-#        i
-#        |
-#        j
-#        |
-#        k
-
 # NULL_REVISION
 #     |
 #     a
@@ -223,48 +205,26 @@
 #     d
 #     |\
 #     e f
-#     |\|\
-#     | g h
+#     | |\
+#     i | h
+#     |\| |
+#     | g |
 #     | | |
-#     i j |
+#     | j |
 #     | | |
 #     | k |
 #     | | |
 #     | l |
 #     |/|/
 #     m n
-complex_shortcut2 = {'a':[NULL_REVISION], 'b':['a'], 'c':['b'], 'd':['c'],
-                    'e':['d'], 'f':['d'], 'g':['f', 'i'], 'h':['f'],
+complex_shortcut2 = {'d':[NULL_REVISION],
+                    'x':['d'], 'y':['x'],
+                    'e':['y'], 'f':['d'], 'g':['f', 'i'], 'h':['f'],
                     'i':['e'], 'j':['g'], 'k':['j'],
-                    'l':['k'], 'm':['i', 'l'], 'n':['l', 'h']}
-## NULL_REVISION
-##     |
-##     a
-##     |
-##     b
-##     |
-##     c
-##     |
-##     d-.
-##     |\ \
-##     e f |
-##     |\| |
-##     | g h
-##     | | |
-##     i j |
-##     | | |
-##     | k |
-##     | |/
-##     | l
-##     |/
-##     m
-##     |
-##     n
-##
-##complex_shortcut2 = {'a':[NULL_REVISION], 'b':['a'], 'c':['b'], 'd':['c'],
-##                   'e':['d'], 'f':['d'], 'g':['e', 'f'], 'h':['d'],
-##                   'i':['e'], 'j':['g'], 'k':['j'],
-##                   'l':['p', 'h'], 'm':['i', 'l'], 'n':['m']}
+                    'l':['k'], 'm':['i', 's'], 'n':['s', 'h'],
+                    'o':['l'], 'p':['o'], 'q':['p'],
+                    'r':['q'], 's':['r'],
+                    }
 
 # Shortcut with extra root
 # We have a long history shortcut, and an extra root, which is why we can't
@@ -321,6 +281,8 @@
 class TestGraph(TestCaseWithMemoryTransport):
 
     def make_graph(self, ancestors):
+        # XXX: This seems valid, is there a reason to actually create a
+        # repository and put things in it?
         return _mod_graph.Graph(_mod_graph.DictParentsProvider(ancestors))
         tree = self.prepare_memory_tree('.')
         self.build_ancestry(tree, ancestors)
@@ -525,9 +487,8 @@
                          graph.find_difference('m', 'n'))
 
     def test_graph_difference_complex_shortcut2(self):
-        import pdb; pdb.set_trace()
-        graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(complex_shortcut2))
-        self.assertEqual((set(['m', 'i']), set(['h', 'n'])),
+        graph = self.make_graph(complex_shortcut2)
+        self.assertEqual((set(['m']), set(['h', 'n'])),
                          graph.find_difference('m', 'n'))
 
     def test_graph_difference_shortcut_extra_root(self):