Rev 4411: Fixes for linear_nodes in the pyrex version. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

Fri Jun 12 21:21:17 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.16-better_heads

------------------------------------------------------------
revno: 4411
revision-id: john at arbash-meinel.com-20090612202108-blsyks1kdxod0lsk
parent: john at arbash-meinel.com-20090612193730-jwaocmo8a9m4t1jz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-better_heads
timestamp: Fri 2009-06-12 15:21:08 -0500
message:
  Fixes for linear_nodes in the pyrex version.
  Doesn't seem to specifically impact performance.
-------------- next part --------------
=== modified file 'bzrlib/_known_graph_py.py'

--- a/bzrlib/_known_graph_py.py	2009-06-12 19:37:30 +0000
+++ b/bzrlib/_known_graph_py.py	2009-06-12 20:21:08 +0000
@@ -186,36 +186,34 @@
                         child_node.gdfo = next_gdfo
                         heappush(todo, (next_gdfo, child_node))
 
-    def _get_dominators_to_keys(self, candidate_nodes):
-        """Get the reverse mapping from dominators => candidate_nodes.
+    def _get_dominators_to_nodes(self, candidate_nodes):
+        """Get the reverse mapping from dominator_key => candidate_nodes.
 
-        This might also exclude certain candidate_nodes if it is determined
-        that they share a dominator.
+        As a side effect, this can also remove potential candidate nodes if we
+        determine that they share a dominator.
         """
-        dom_to_key = {}
+        dom_to_node = {}
         keys_to_remove = []
         for node in candidate_nodes.values():
-            if node.linear_dominator in dom_to_key:
+            if node.linear_dominator in dom_to_node:
                 # This node already exists, resolve which node supersedes the
                 # other
-                other_node_key = dom_to_key[node.linear_dominator]
-                other_node = self._nodes[other_node_key]
+                other_node = dom_to_node[node.linear_dominator]
                 # There should be no way that nodes sharing a dominator could
                 # 'tie' for gdfo
                 assert other_node.gdfo != node.gdfo
                 if other_node.gdfo > node.gdfo:
                     # The other node has this node as an ancestor
                     keys_to_remove.append(node.key)
-                    continue
                 else:
                     # Replace the other node, and set this as the new key
                     keys_to_remove.append(other_node.key)
-                    dom_to_key[node.linear_dominator] = node.key
+                    dom_to_node[node.linear_dominator] = node
             else:
-                dom_to_key[node.linear_dominator] = node.key
+                dom_to_node[node.linear_dominator] = node
         for key in keys_to_remove:
             candidate_nodes.pop(key)
-        return dom_to_key
+        return dom_to_node
 
     def heads(self, keys):
         """Return the heads from amongst keys.
@@ -248,7 +246,7 @@
             return heads
         except KeyError:
             pass # compute it ourselves
-        dom_to_key = self._get_dominators_to_keys(candidate_nodes)
+        dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
         if len(candidate_nodes) < 2:
             # We shrunk candidate_nodes and determined a new head
             return frozenset(candidate_nodes)
@@ -260,9 +258,9 @@
         if dom_heads_key in self._known_heads:
             # map back into the original keys
             heads = self._known_heads[dom_heads_key]
-            heads = frozenset([dom_to_key[key] for key in heads])
+            heads = frozenset([dom_to_node[key].key for key in heads])
             return heads
-        heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_key)
+        heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
         if self.do_cache:
             self._known_heads[heads_key] = heads
             # Cache the dominator heads
@@ -272,7 +270,7 @@
                 self._known_heads[dom_heads_key] = dom_heads
         return heads
 
-    def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_key):
+    def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
         queue = []
         to_cleanup = []
         to_cleanup_append = to_cleanup.append
@@ -325,11 +323,11 @@
                     candidate_nodes.pop(parent_key)
                     if len(candidate_nodes) <= 1:
                         break
-                if parent_key in dom_to_key:
-                    orig_key = dom_to_key[parent_key]
-                    if orig_key != node.key:
-                        if orig_key in candidate_nodes:
-                            candidate_nodes.pop(orig_key)
+                elif parent_key in dom_to_node:
+                    orig_node = dom_to_node[parent_key]
+                    if orig_node is not node:
+                        if orig_node.key in candidate_nodes:
+                            candidate_nodes.pop(orig_node.key)
                             if len(candidate_nodes) <= 1:
                                 break
                 parent_node = nodes[parent_key]

=== modified file 'bzrlib/_known_graph_pyx.pyx'
--- a/bzrlib/_known_graph_pyx.pyx	2009-06-12 04:44:24 +0000
+++ b/bzrlib/_known_graph_pyx.pyx	2009-06-12 20:21:08 +0000
@@ -404,16 +404,18 @@
             heads_key = PyFrozenSet_New(candidate_nodes)
         if len(candidate_nodes) < 2:
             return heads_key
-        # Check the linear dominators of these keys, to see if we already
-        # know the heads answer
-        dom_lookup_key, heads = self._heads_from_dominators(candidate_nodes)
+        dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
+        if PyDict_Size(candidate_nodes) < 2:
+            return frozenset(candidate_nodes)
+        dom_lookup_key, heads = self._heads_from_dominators(candidate_nodes,
+                                                            dom_to_node)
         if heads is not None:
             if self.do_cache:
                 # This heads was not in the cache, or it would have been caught
                 # earlier, but the dom head *was*, so do the simple cache
                 PyDict_SetItem(self._known_heads, heads_key, heads)
             return heads
-        heads = self._heads_from_candidate_nodes(candidate_nodes)
+        heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
         if self.do_cache:
             self._cache_heads(heads, heads_key, dom_lookup_key, candidate_nodes)
         return heads
@@ -434,9 +436,44 @@
         PyDict_SetItem(self._known_heads, dom_lookup_key,
                        PyFrozenSet_New(dom_heads))
 
-    cdef object _heads_from_dominators(self, candidate_nodes):
+    cdef _get_dominators_to_nodes(self, candidate_nodes):
+        """Get the reverse mapping from dominator_key => candidate_nodes.
+
+        As a side effect, this can also remove potential candidate nodes if we
+        determine that they share a dominator.
+        """
+        cdef Py_ssize_t pos
+        cdef _KnownGraphNode node, other_node
+        cdef PyObject *temp_node
+        cdef PyObject *maybe_node
+
+        dom_to_node = {}
+        keys_to_remove = []
+        pos = 0
+        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
+            node = <_KnownGraphNode>temp_node
+            dom_key = node.linear_dominator_node.key
+            maybe_node = PyDict_GetItem(dom_to_node, dom_key)
+            if maybe_node == NULL:
+                PyDict_SetItem(dom_to_node, dom_key, node)
+            else:
+                other_node = <_KnownGraphNode>maybe_node
+                # These nodes share a dominator, one of them obviously
+                # supersedes the other, figure out which
+                if other_node.gdfo > node.gdfo:
+                    PyList_Append(keys_to_remove, node.key)
+                else:
+                    # This wins, replace the other
+                    PyList_Append(keys_to_remove, other_node.key)
+                    PyDict_SetItem(dom_to_node, dom_key, node)
+        for pos from 0 <= pos < PyList_GET_SIZE(keys_to_remove):
+            key = <object>PyList_GET_ITEM(keys_to_remove, pos)
+            candidate_nodes.pop(key)
+        return dom_to_node
+
+    cdef object _heads_from_dominators(self, candidate_nodes, dom_to_node):
         cdef PyObject *maybe_heads
-        cdef PyObject *maybe_key
+        cdef PyObject *maybe_node
         cdef _KnownGraphNode node
         cdef Py_ssize_t pos
         cdef PyObject *temp_node
@@ -452,33 +489,41 @@
             return dom_lookup_key, None
         # We need to map back from the dominator head to the original keys
         dom_heads = <object>maybe_heads
-        dom_to_key = {}
-        pos = 0
-        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
-            node = <_KnownGraphNode>temp_node
-            PyDict_SetItem(dom_to_key, node.linear_dominator_node.key,
-                                       node.key)
         heads = []
         for dom_key in dom_heads:
-            maybe_key = PyDict_GetItem(dom_to_key, dom_key)
-            if maybe_key == NULL:
+            maybe_node = PyDict_GetItem(dom_to_node, dom_key)
+            if maybe_node == NULL:
                 # Should never happen
                 raise KeyError
-            PyList_Append(heads, <object>maybe_key)
+            node = <_KnownGraphNode>maybe_node
+            PyList_Append(heads, node.key)
         return dom_lookup_key, PyFrozenSet_New(heads)
 
     cdef int _process_parent(self, _KnownGraphNode node,
                              _KnownGraphNode parent_node,
-                             candidate_nodes,
+                             candidate_nodes, dom_to_node,
                              queue, int *replace_item) except -1:
         """Process the parent of a node, seeing if we need to walk it."""
         cdef PyObject *maybe_candidate
+        cdef PyObject *maybe_node
+        cdef _KnownGraphNode dom_child_node
         maybe_candidate = PyDict_GetItem(candidate_nodes, parent_node.key)
         if maybe_candidate != NULL:
             candidate_nodes.pop(parent_node.key)
             # We could pass up a flag that tells the caller to stop processing,
             # but it doesn't help much, and makes the code uglier
             return 0
+        maybe_node = PyDict_GetItem(dom_to_node, parent_node.key)
+        if maybe_node != NULL:
+            # This is a dominator of a node
+            dom_child_node = <_KnownGraphNode>maybe_node
+            if dom_child_node is not node:
+                # It isn't a dominator of a node we are searching, so we should
+                # remove it from the search
+                maybe_candidate = PyDict_GetItem(candidate_nodes, dom_child_node.key)
+                if maybe_candidate != NULL:
+                    candidate_nodes.pop(dom_child_node.key)
+                    return 0
         if parent_node.ancestor_of is None:
             # This node hasn't been walked yet, so just project node's ancestor
             # info directly to parent_node, and enqueue it for later processing
@@ -499,7 +544,7 @@
             parent_node.ancestor_of = tuple(sorted(all_ancestors))
         return 0
 
-    cdef object _heads_from_candidate_nodes(self, candidate_nodes):
+    cdef object _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
         cdef _KnownGraphNode node
         cdef _KnownGraphNode parent_node
         cdef Py_ssize_t num_candidates
@@ -554,12 +599,12 @@
                 # We know that there is nothing between here and the tail
                 # that is interesting, so skip to the end
                 self._process_parent(node, node.linear_dominator_node,
-                                     candidate_nodes, queue, &replace_item)
+                                     candidate_nodes, dom_to_node, queue, &replace_item)
             else:
                 for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
                     parent_node = _get_parent(node.parents, pos)
                     self._process_parent(node, parent_node, candidate_nodes,
-                                         queue, &replace_item)
+                                         dom_to_node, queue, &replace_item)
         for pos from 0 <= pos < PyList_GET_SIZE(self._to_cleanup):
             node = _get_list_node(self._to_cleanup, pos)
             node.ancestor_of = None