Rev 3096: Implement get_parent_map for all parent providers in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

John Arbash Meinel john at arbash-meinel.com
Sat Dec 8 02:08:30 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update

------------------------------------------------------------
revno: 3096
revision-id:john at arbash-meinel.com-20071208020754-8jo7xvj2e2l4dzrc
parent: john at arbash-meinel.com-20071207222622-wrglvu7ydcwnjh3r
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Fri 2007-12-07 20:07:54 -0600
message:
  Implement get_parent_map for all parent providers
  Use it in preference to get_parents, since it has much lower api friction.
  We need a dict anyway, and lots of the providers would rather provide
  it in that form.
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2007-12-07 22:26:22 +0000
+++ b/bzrlib/graph.py	2007-12-08 02:07:54 +0000
@@ -55,6 +55,22 @@
     def get_parents(self, revisions):
         return [self.ancestry.get(r, None) for r in revisions]
 
+    def get_parent_map(self, keys):
+        """Get a mapping of keys => parents
+
+        A dictionary is returned with an entry for each key present in this
+        source. If this source doesn't have information about a key, it should
+        not include an entry.
+
+        [NULL_REVISION] is used as the parent of the first user-committed
+        revision.  Its parent list is empty.
+
+        :param keys: An iterable returning keys to check (eg revision_ids)
+        :return: A dictionary mapping each key to its parents
+        """
+        ancestry = self.ancestry
+        return dict((k, ancestry[k]) for k in keys if k in ancestry)
+
 
 class _StackedParentsProvider(object):
 
@@ -76,16 +92,32 @@
         If the revision is not present (i.e. a ghost), None is used in place
         of the list of parents.
         """
+        found = self.get_parent_map(revision_ids)
+        get = found.get
+        return [get(r, None) for r in revision_ids]
+
+    def get_parent_map(self, keys):
+        """Get a mapping of keys => parents
+
+        A dictionary is returned with an entry for each key present in this
+        source. If this source doesn't have information about a key, it should
+        not include an entry.
+
+        [NULL_REVISION] is used as the parent of the first user-committed
+        revision.  Its parent list is empty.
+
+        :param keys: An iterable returning keys to check (eg revision_ids)
+        :return: A dictionary mapping each key to its parents
+        """
         found = {}
+        remaining = set(keys)
         for parents_provider in self._parent_providers:
-            pending_revisions = [r for r in revision_ids if r not in found]
-            parent_list = parents_provider.get_parents(pending_revisions)
-            new_found = dict((k, v) for k, v in zip(pending_revisions,
-                             parent_list) if v is not None)
+            new_found = parents_provider.get_parent_map(remaining)
             found.update(new_found)
-            if len(found) == len(revision_ids):
+            remaining.difference_update(new_found)
+            if not remaining:
                 break
-        return [found.get(r, None) for r in revision_ids]
+        return found
 
 
 class CachingParentsProvider(object):
@@ -114,23 +146,33 @@
         If the revision is not present (i.e. a ghost), None is used in place
         of the list of parents.
         """
+        found = self.get_parent_map(revision_ids)
+        get = found.get
+        return [get(r, None) for r in revision_ids]
+
+    def get_parent_map(self, keys):
         needed = set()
-        # TODO: is it worth having the dict here, or is it better to just load
-        # the cache, and then return requests from it?
-        parents = {}
+        # If the _real_provider doesn't have a key, we cache a value of None,
+        # which we then later use to realize we cannot provide a value for that
+        # key.
+        parent_map = {}
         cache = self._cache
-        for revision_id in revision_ids:
-            if revision_id in cache:
-                parents[revision_id] = cache[revision_id]
+        for key in keys:
+            if key in cache:
+                value = cache[key]
+                if value is not None:
+                    parent_map[key] = value
             else:
-                needed.add(revision_id)
+                needed.add(key)
 
         if needed:
-            new_parents = dict(zip(needed,
-                                   self._real_provider.get_parents(needed)))
+            new_parents = self._real_provider.get_parent_map(needed)
             cache.update(new_parents)
-            parents.update(new_parents)
-        return [parents[r] for r in revision_ids]
+            parent_map.update(new_parents)
+            needed.difference_update(new_parents)
+            for key in needed:
+                cache[key] = None
+        return parent_map
 
 
 class Graph(object):
@@ -151,6 +193,7 @@
             conforming to the behavior of StackedParentsProvider.get_parents
         """
         self.get_parents = parents_provider.get_parents
+        self.get_parent_map = parents_provider.get_parent_map
         self._parents_provider = parents_provider
 
     def __repr__(self):
@@ -239,7 +282,7 @@
             if len(active_searchers) == 0:
                 return border_ancestors, common_ancestors, [s.seen for s in
                                                             searchers]
-            parents = self._preload_parents(next_to_search)
+            parents = self.get_parent_map(next_to_search)
             new_common = common_searcher.step(parents)
             if new_common:
                 common_ancestors.update(new_common)
@@ -279,15 +322,6 @@
             for searcher in active_searchers:
                 next_to_search.update(searcher.will_search())
 
-    def _preload_parents(self, keys):
-        """Call get_parents() for all of the keys we will need.
-
-        :return: A dictionary mapping key => parents
-        """
-        # TODO: Use izip? the number of keys here will probably always be
-        #       relatively small, it may be faster to just use zip
-        return dict(zip(keys, self.get_parents(keys)))
-
     def heads(self, keys):
         """Return the heads from amongst keys.
 
@@ -316,7 +350,7 @@
         active_searchers = dict(searchers)
         # The first parents we will be accessing are just the candidate_heads,
         # so ask the parents provider to pull them up
-        parents = self._preload_parents(candidate_heads)
+        parents = self.get_parent_map(candidate_heads)
 
         # skip over the actual candidate for each searcher, and figure out what
         # the next nodes are going to be.
@@ -331,7 +365,7 @@
         # accessing all history.
         common_walker = self._make_breadth_first_searcher([])
         while len(active_searchers) > 0:
-            parents = self._preload_parents(next_to_search)
+            parents = self.get_parent_map(next_to_search)
             ancestors = set()
             # advance searches
             new_common = common_walker.step(parents)
@@ -420,7 +454,7 @@
         An ancestor may sort after a descendant if the relationship is not
         visible in the supplied list of revisions.
         """
-        sorter = tsort.TopoSorter(zip(revisions, self.get_parents(revisions)))
+        sorter = tsort.TopoSorter(self.get_parent_map(revisions))
         return sorter.iter_topo_order()
 
     def is_ancestor(self, candidate_ancestor, candidate_descendant):
@@ -525,7 +559,7 @@
         else:
             new_search_revisions = set()
             for revision_id in self._search_revisions:
-                parent_ids = parents[revision_id]
+                parent_ids = parents.get(revision_id, None)
                 if parent_ids is None:
                     continue
                 new_search_revisions.update(parent_ids)
@@ -541,9 +575,8 @@
         traversal.  No ancestor will be returned more than once.
         """
         to_search = self.will_search()
-        parent_ids = self._parents_provider.get_parents(to_search)
-        parents = dict(zip(to_search, parent_ids))
-        next_revisions = self.step(parents)
+        parent_map = self._parents_provider.get_parent_map(to_search)
+        next_revisions = self.step(parent_map)
         if not next_revisions:
             raise StopIteration()
         return next_revisions
@@ -558,7 +591,13 @@
         while revisions:
             seen_ancestors.update(revisions)
             next_revisions = set()
-            for parents in self._parents_provider.get_parents(revisions):
+            # TODO: couldn't we just iterate the parent_map, since if a key
+            #       isn't present, that means we would skip anyway
+            #       It depends if parent_map can ever contain keys that weren't
+            #       requested.
+            parent_map = self._parents_provider.get_parent_map(revisions)
+            for revision_id in revisions:
+                parents = parent_map.get(revision_id, None)
                 if parents is None:
                     continue
                 next_revisions.update(self.seen.intersection(parents))

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-12-07 17:32:16 +0000
+++ b/bzrlib/index.py	2007-12-08 02:07:54 +0000
@@ -1008,21 +1008,22 @@
              * (NULL_REVISION,) when the key has no parents.
              * (parent_key, parent_key...) otherwise.
         """
-        search_keys = set(revision_ids)
-        search_keys.discard(NULL_REVISION)
-        found_parents = {NULL_REVISION:[]}
+        parent_map = self.get_parent_map(revision_ids)
+        return [parent_map.get(r, None) for r in revision_ids]
+
+    def get_parent_map(self, keys):
+        search_keys = set(keys)
+        if NULL_REVISION in search_keys:
+            search_keys.discard(NULL_REVISION)
+            found_parents = {NULL_REVISION:[]}
+        else:
+            found_parents = {}
         for index, key, value, refs in self.iter_entries(search_keys):
             parents = refs[0]
             if not parents:
                 parents = (NULL_REVISION,)
             found_parents[key] = parents
-        result = []
-        for key in revision_ids:
-            try:
-                result.append(found_parents[key])
-            except KeyError:
-                result.append(None)
-        return result
+        return found_parents
 
     def insert_index(self, pos, index):
         """Insert a new index in the list of indices to query.

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-12-07 17:32:16 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-12-08 02:07:54 +0000
@@ -59,20 +59,24 @@
         return 'KnitParentsProvider(%r)' % self._knit
 
     def get_parents(self, revision_ids):
-        parents_list = []
-        for revision_id in revision_ids:
+        parent_map = self.get_parent_map(revision_ids)
+        return [parent_map.get(r, None) for r in revision_ids]
+
+    def get_parent_map(self, keys):
+        parent_map = {}
+        for revision_id in keys:
             if revision_id == _mod_revision.NULL_REVISION:
-                parents = []
+                parent_map[revision_id] = []
             else:
                 try:
                     parents = self._knit.get_parents_with_ghosts(revision_id)
                 except errors.RevisionNotPresent:
-                    parents = None
+                    pass
                 else:
                     if len(parents) == 0:
                         parents = [_mod_revision.NULL_REVISION]
-            parents_list.append(parents)
-        return parents_list
+                parent_map[revision_id] = parents
+        return parent_map
 
 
 class KnitRepository(MetaDirRepository):

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-12-07 17:32:16 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-12-08 02:07:54 +0000
@@ -1833,13 +1833,21 @@
         This implementation accesses the combined revision index to provide
         answers.
         """
+        parent_map = self.get_parent_map(revision_ids)
+        return [parent_map.get(r, None) for r in revision_ids]
+
+    def get_parent_map(self, keys):
         self._pack_collection.ensure_loaded()
         index = self._pack_collection.revision_index.combined_index
-        search_keys = set()
-        for revision_id in revision_ids:
-            if revision_id != _mod_revision.NULL_REVISION:
-                search_keys.add((revision_id,))
-        found_parents = {_mod_revision.NULL_REVISION:[]}
+        keys = set(keys)
+        if _mod_revision.NULL_REVISION in keys:
+            have_null = True
+            keys.discard(_mod_revision.NULL_REVISION)
+            found_parents = {_mod_revision.NULL_REVISION:[]}
+        else:
+            have_null = False
+            found_parents = {}
+        search_keys = set((revision_id,) for revision_id in keys)
         for index, key, value, refs in index.iter_entries(search_keys):
             parents = refs[0]
             if not parents:
@@ -1847,13 +1855,7 @@
             else:
                 parents = tuple(parent[0] for parent in parents)
             found_parents[key[0]] = parents
-        result = []
-        for revision_id in revision_ids:
-            try:
-                result.append(found_parents[revision_id])
-            except KeyError:
-                result.append(None)
-        return result
+        return found_parents
 
     def _make_parents_provider(self):
         return graph.CachingParentsProvider(self)

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-12-07 17:32:16 +0000
+++ b/bzrlib/repository.py	2007-12-08 02:07:54 +0000
@@ -1645,20 +1645,36 @@
 
     def get_parents(self, revision_ids):
         """See StackedParentsProvider.get_parents"""
-        parents_list = []
-        for revision_id in revision_ids:
+        parent_map = self.get_parent_map(revision_ids)
+        return [parent_map.get(r, None) for r in revision_ids]
+
+    def get_parent_map(self, keys):
+        """Get a mapping of keys => parents
+
+        A dictionary is returned with an entry for each key present in this
+        source. If this source doesn't have information about a key, it should
+        not include an entry.
+
+        [NULL_REVISION] is used as the parent of the first user-committed
+        revision.  Its parent list is empty.
+
+        :param keys: An iterable returning keys to check (eg revision_ids)
+        :return: A dictionary mapping each key to its parents
+        """
+        parent_map = {}
+        for revision_id in keys:
             if revision_id == _mod_revision.NULL_REVISION:
-                parents = []
+                parents[revision_id] = []
             else:
                 try:
-                    parents = self.get_revision(revision_id).parent_ids
+                    parent_ids = self.get_revision(revision_id).parent_ids
                 except errors.NoSuchRevision:
-                    parents = None
+                    pass
                 else:
-                    if len(parents) == 0:
-                        parents = [_mod_revision.NULL_REVISION]
-            parents_list.append(parents)
-        return parents_list
+                    if len(parent_ids) == 0:
+                        parent_ids = [_mod_revision.NULL_REVISION]
+                    parent_map[revision_id] = parent_ids
+        return parent_map
 
     def _make_parents_provider(self):
         return self

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2007-12-07 22:26:22 +0000
+++ b/bzrlib/tests/test_graph.py	2007-12-08 02:07:54 +0000
@@ -185,6 +185,10 @@
         self.calls.extend(nodes)
         return self._real_parents_provider.get_parents(nodes)
 
+    def get_parent_map(self, nodes):
+        self.calls.extend(nodes)
+        return self._real_parents_provider.get_parent_map(nodes)
+
 
 class TestGraph(TestCaseWithMemoryTransport):
 
@@ -518,8 +522,16 @@
                     self.fail('key deeper was accessed')
                 result.append(graph_dict[key])
             return result
+        def get_parent_map(keys):
+            result = {}
+            for key in keys:
+                if key == 'deeper':
+                    self.fail('key deeper was accessed')
+                result[key] = graph_dict[key]
+            return result
         an_obj = stub()
         an_obj.get_parents = get_parents
+        an_obj.get_parent_map = get_parent_map
         graph = _mod_graph.Graph(an_obj)
         return graph.heads(search)
 



More information about the bazaar-commits mailing list