Rev 4414: A new heads() method based on gdfo. in file:///home/vila/src/bzr/reviews/vila-better-heads/

Wed Jun 17 13:32:22 BST 2009

At file:///home/vila/src/bzr/reviews/vila-better-heads/

------------------------------------------------------------
revno: 4414
revision-id: v.ladeuil+lp at free.fr-20090617123221-ijscdmxhxds026c0
parent: v.ladeuil+lp at free.fr-20090616134841-mu1e58vknaend6dz
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: vila-better-heads
timestamp: Wed 2009-06-17 14:32:21 +0200
message:
  A new heads() method based on gdfo.
  
  * bzrlib/tests/test__known_graph.py:
  (TestKnownGraph.test_heads_with_ghost): Add tests to ensure ghosts
  are corectly handled.
  
  * bzrlib/_known_graph_py.py:
  (KnownGraph.heads): A new version taking avantage of gdfo to
  reduce the number of nodes processed. This implementation passes
  all the currenlty defined tests and then some. Using time_graph.py
  reports 0.220s instead of the previous 7.350s.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'

--- a/bzrlib/_known_graph_py.py	2009-06-16 13:48:41 +0000
+++ b/bzrlib/_known_graph_py.py	2009-06-17 12:32:21 +0000
@@ -245,6 +245,65 @@
         This is done by searching the ancestries of each key.  Any key that is
         reachable from another key is not returned; all the others are.
 
+        This operation scales with the relative depth between any two keys. It
+        uses gdfo to avoid walking all ancestry.
+
+        :param keys: An iterable of keys.
+        :return: A set of the heads. Note that as a set there is no ordering
+            information. Callers will need to filter their input to create
+            order if they need it.
+        """
+        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
+        if revision.NULL_REVISION in candidate_nodes:
+            # NULL_REVISION is only a head if it is the only entry
+            candidate_nodes.pop(revision.NULL_REVISION)
+            if not candidate_nodes:
+                return frozenset([revision.NULL_REVISION])
+        if len(candidate_nodes) < 2:
+            # No or only one candidate
+            return frozenset(candidate_nodes)
+        heads_key = frozenset(candidate_nodes)
+        if heads_key != frozenset(keys):
+            # Mention duplicates
+            note('%s != %s', heads_key, frozenset(keys))
+        # Do we have a cached result ?
+        try:
+            heads = self._known_heads[heads_key]
+            return heads
+        except KeyError:
+            pass
+        # Let's compute the heads
+        seen = {}
+        pending = []
+        min_gdfo = None
+        for node in candidate_nodes.values():
+            if node.parent_keys: # protect against ghosts, jam, fixme ?
+                pending.extend(node.parent_keys)
+            if min_gdfo is None or node.gdfo < min_gdfo:
+                min_gdfo = node.gdfo
+        nodes = self._nodes
+        while pending:
+            node_key = pending.pop()
+            if node_key in seen:
+                # node already appears in some ancestry
+                continue
+            seen[node_key] = True
+            node = nodes[node_key]
+            if node.gdfo <= min_gdfo:
+                continue
+            if node.parent_keys: # protect against ghosts, jam, fixme ?
+                pending.extend(node.parent_keys)
+        heads = heads_key.difference(seen.keys())
+        if self.do_cache:
+            self._known_heads[heads_key] = heads
+        return heads
+
+    def xheads(self, keys):
+        """Return the heads from amongst keys.
+
+        This is done by searching the ancestries of each key.  Any key that is
+        reachable from another key is not returned; all the others are.
+
         This operation scales with the relative depth between any two keys. If
         any two keys are completely disconnected all ancestry of both sides
         will be retrieved.

=== modified file 'bzrlib/tests/test__known_graph.py'
--- a/bzrlib/tests/test__known_graph.py	2009-06-12 19:37:30 +0000
+++ b/bzrlib/tests/test__known_graph.py	2009-06-17 12:32:21 +0000
@@ -230,3 +230,14 @@
         self.assertEqual(set(['z']), graph.heads(['w', 's', 'z']))
         self.assertEqual(set(['w', 'q']), graph.heads(['w', 's', 'q']))
         self.assertEqual(set(['z']), graph.heads(['s', 'z']))
+
+    def test_heads_with_ghost(self):
+        graph = self.make_known_graph(test_graph.with_ghost)
+        self.assertEqual(set(['e', 'g']), graph.heads(['e', 'g']))
+        self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c']))
+        self.assertEqual(set(['a', 'g']), graph.heads(['a', 'g']))
+        self.assertEqual(set(['f', 'g']), graph.heads(['f', 'g']))
+        self.assertEqual(set(['c']), graph.heads(['c', 'g']))
+        self.assertEqual(set(['c']), graph.heads(['c', 'b', 'd', 'g']))
+        self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'e', 'g']))
+        self.assertEqual(set(['a', 'c']), graph.heads(['a', 'c', 'f']))