Rev 3096: Implement get_parent_map for all parent providers in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update
John Arbash Meinel
john at arbash-meinel.com
Sat Dec 8 02:08:30 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/graph_update
------------------------------------------------------------
revno: 3096
revision-id:john at arbash-meinel.com-20071208020754-8jo7xvj2e2l4dzrc
parent: john at arbash-meinel.com-20071207222622-wrglvu7ydcwnjh3r
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: graph_update
timestamp: Fri 2007-12-07 20:07:54 -0600
message:
Implement get_parent_map for all parent providers
Use it in preference to get_parents, since it has much lower api friction.
We need a dict anyway, and lots of the providers would rather provide
it in that form.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-12-07 22:26:22 +0000
+++ b/bzrlib/graph.py 2007-12-08 02:07:54 +0000
@@ -55,6 +55,22 @@
def get_parents(self, revisions):
return [self.ancestry.get(r, None) for r in revisions]
+ def get_parent_map(self, keys):
+ """Get a mapping of keys => parents
+
+ A dictionary is returned with an entry for each key present in this
+ source. If this source doesn't have information about a key, it should
+ not include an entry.
+
+ [NULL_REVISION] is used as the parent of the first user-committed
+ revision. Its parent list is empty.
+
+ :param keys: An iterable returning keys to check (eg revision_ids)
+ :return: A dictionary mapping each key to its parents
+ """
+ ancestry = self.ancestry
+ return dict((k, ancestry[k]) for k in keys if k in ancestry)
+
class _StackedParentsProvider(object):
@@ -76,16 +92,32 @@
If the revision is not present (i.e. a ghost), None is used in place
of the list of parents.
"""
+ found = self.get_parent_map(revision_ids)
+ get = found.get
+ return [get(r, None) for r in revision_ids]
+
+ def get_parent_map(self, keys):
+ """Get a mapping of keys => parents
+
+ A dictionary is returned with an entry for each key present in this
+ source. If this source doesn't have information about a key, it should
+ not include an entry.
+
+ [NULL_REVISION] is used as the parent of the first user-committed
+ revision. Its parent list is empty.
+
+ :param keys: An iterable returning keys to check (eg revision_ids)
+ :return: A dictionary mapping each key to its parents
+ """
found = {}
+ remaining = set(keys)
for parents_provider in self._parent_providers:
- pending_revisions = [r for r in revision_ids if r not in found]
- parent_list = parents_provider.get_parents(pending_revisions)
- new_found = dict((k, v) for k, v in zip(pending_revisions,
- parent_list) if v is not None)
+ new_found = parents_provider.get_parent_map(remaining)
found.update(new_found)
- if len(found) == len(revision_ids):
+ remaining.difference_update(new_found)
+ if not remaining:
break
- return [found.get(r, None) for r in revision_ids]
+ return found
class CachingParentsProvider(object):
@@ -114,23 +146,33 @@
If the revision is not present (i.e. a ghost), None is used in place
of the list of parents.
"""
+ found = self.get_parent_map(revision_ids)
+ get = found.get
+ return [get(r, None) for r in revision_ids]
+
+ def get_parent_map(self, keys):
needed = set()
- # TODO: is it worth having the dict here, or is it better to just load
- # the cache, and then return requests from it?
- parents = {}
+ # If the _real_provider doesn't have a key, we cache a value of None,
+ # which we then later use to realize we cannot provide a value for that
+ # key.
+ parent_map = {}
cache = self._cache
- for revision_id in revision_ids:
- if revision_id in cache:
- parents[revision_id] = cache[revision_id]
+ for key in keys:
+ if key in cache:
+ value = cache[key]
+ if value is not None:
+ parent_map[key] = value
else:
- needed.add(revision_id)
+ needed.add(key)
if needed:
- new_parents = dict(zip(needed,
- self._real_provider.get_parents(needed)))
+ new_parents = self._real_provider.get_parent_map(needed)
cache.update(new_parents)
- parents.update(new_parents)
- return [parents[r] for r in revision_ids]
+ parent_map.update(new_parents)
+ needed.difference_update(new_parents)
+ for key in needed:
+ cache[key] = None
+ return parent_map
class Graph(object):
@@ -151,6 +193,7 @@
conforming to the behavior of StackedParentsProvider.get_parents
"""
self.get_parents = parents_provider.get_parents
+ self.get_parent_map = parents_provider.get_parent_map
self._parents_provider = parents_provider
def __repr__(self):
@@ -239,7 +282,7 @@
if len(active_searchers) == 0:
return border_ancestors, common_ancestors, [s.seen for s in
searchers]
- parents = self._preload_parents(next_to_search)
+ parents = self.get_parent_map(next_to_search)
new_common = common_searcher.step(parents)
if new_common:
common_ancestors.update(new_common)
@@ -279,15 +322,6 @@
for searcher in active_searchers:
next_to_search.update(searcher.will_search())
- def _preload_parents(self, keys):
- """Call get_parents() for all of the keys we will need.
-
- :return: A dictionary mapping key => parents
- """
- # TODO: Use izip? the number of keys here will probably always be
- # relatively small, it may be faster to just use zip
- return dict(zip(keys, self.get_parents(keys)))
-
def heads(self, keys):
"""Return the heads from amongst keys.
@@ -316,7 +350,7 @@
active_searchers = dict(searchers)
# The first parents we will be accessing are just the candidate_heads,
# so ask the parents provider to pull them up
- parents = self._preload_parents(candidate_heads)
+ parents = self.get_parent_map(candidate_heads)
# skip over the actual candidate for each searcher, and figure out what
# the next nodes are going to be.
@@ -331,7 +365,7 @@
# accessing all history.
common_walker = self._make_breadth_first_searcher([])
while len(active_searchers) > 0:
- parents = self._preload_parents(next_to_search)
+ parents = self.get_parent_map(next_to_search)
ancestors = set()
# advance searches
new_common = common_walker.step(parents)
@@ -420,7 +454,7 @@
An ancestor may sort after a descendant if the relationship is not
visible in the supplied list of revisions.
"""
- sorter = tsort.TopoSorter(zip(revisions, self.get_parents(revisions)))
+ sorter = tsort.TopoSorter(self.get_parent_map(revisions))
return sorter.iter_topo_order()
def is_ancestor(self, candidate_ancestor, candidate_descendant):
@@ -525,7 +559,7 @@
else:
new_search_revisions = set()
for revision_id in self._search_revisions:
- parent_ids = parents[revision_id]
+ parent_ids = parents.get(revision_id, None)
if parent_ids is None:
continue
new_search_revisions.update(parent_ids)
@@ -541,9 +575,8 @@
traversal. No ancestor will be returned more than once.
"""
to_search = self.will_search()
- parent_ids = self._parents_provider.get_parents(to_search)
- parents = dict(zip(to_search, parent_ids))
- next_revisions = self.step(parents)
+ parent_map = self._parents_provider.get_parent_map(to_search)
+ next_revisions = self.step(parent_map)
if not next_revisions:
raise StopIteration()
return next_revisions
@@ -558,7 +591,13 @@
while revisions:
seen_ancestors.update(revisions)
next_revisions = set()
- for parents in self._parents_provider.get_parents(revisions):
+ # TODO: couldn't we just iterate the parent_map, since if a key
+ # isn't present, that means we would skip anyway
+ # It depends if parent_map can ever contain keys that weren't
+ # requested.
+ parent_map = self._parents_provider.get_parent_map(revisions)
+ for revision_id in revisions:
+ parents = parent_map.get(revision_id, None)
if parents is None:
continue
next_revisions.update(self.seen.intersection(parents))
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-12-07 17:32:16 +0000
+++ b/bzrlib/index.py 2007-12-08 02:07:54 +0000
@@ -1008,21 +1008,22 @@
* (NULL_REVISION,) when the key has no parents.
* (parent_key, parent_key...) otherwise.
"""
- search_keys = set(revision_ids)
- search_keys.discard(NULL_REVISION)
- found_parents = {NULL_REVISION:[]}
+ parent_map = self.get_parent_map(revision_ids)
+ return [parent_map.get(r, None) for r in revision_ids]
+
+ def get_parent_map(self, keys):
+ search_keys = set(keys)
+ if NULL_REVISION in search_keys:
+ search_keys.discard(NULL_REVISION)
+ found_parents = {NULL_REVISION:[]}
+ else:
+ found_parents = {}
for index, key, value, refs in self.iter_entries(search_keys):
parents = refs[0]
if not parents:
parents = (NULL_REVISION,)
found_parents[key] = parents
- result = []
- for key in revision_ids:
- try:
- result.append(found_parents[key])
- except KeyError:
- result.append(None)
- return result
+ return found_parents
def insert_index(self, pos, index):
"""Insert a new index in the list of indices to query.
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2007-12-07 17:32:16 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2007-12-08 02:07:54 +0000
@@ -59,20 +59,24 @@
return 'KnitParentsProvider(%r)' % self._knit
def get_parents(self, revision_ids):
- parents_list = []
- for revision_id in revision_ids:
+ parent_map = self.get_parent_map(revision_ids)
+ return [parent_map.get(r, None) for r in revision_ids]
+
+ def get_parent_map(self, keys):
+ parent_map = {}
+ for revision_id in keys:
if revision_id == _mod_revision.NULL_REVISION:
- parents = []
+ parent_map[revision_id] = []
else:
try:
parents = self._knit.get_parents_with_ghosts(revision_id)
except errors.RevisionNotPresent:
- parents = None
+ pass
else:
if len(parents) == 0:
parents = [_mod_revision.NULL_REVISION]
- parents_list.append(parents)
- return parents_list
+ parent_map[revision_id] = parents
+ return parent_map
class KnitRepository(MetaDirRepository):
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-12-07 17:32:16 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-12-08 02:07:54 +0000
@@ -1833,13 +1833,21 @@
This implementation accesses the combined revision index to provide
answers.
"""
+ parent_map = self.get_parent_map(revision_ids)
+ return [parent_map.get(r, None) for r in revision_ids]
+
+ def get_parent_map(self, keys):
self._pack_collection.ensure_loaded()
index = self._pack_collection.revision_index.combined_index
- search_keys = set()
- for revision_id in revision_ids:
- if revision_id != _mod_revision.NULL_REVISION:
- search_keys.add((revision_id,))
- found_parents = {_mod_revision.NULL_REVISION:[]}
+ keys = set(keys)
+ if _mod_revision.NULL_REVISION in keys:
+ have_null = True
+ keys.discard(_mod_revision.NULL_REVISION)
+ found_parents = {_mod_revision.NULL_REVISION:[]}
+ else:
+ have_null = False
+ found_parents = {}
+ search_keys = set((revision_id,) for revision_id in keys)
for index, key, value, refs in index.iter_entries(search_keys):
parents = refs[0]
if not parents:
@@ -1847,13 +1855,7 @@
else:
parents = tuple(parent[0] for parent in parents)
found_parents[key[0]] = parents
- result = []
- for revision_id in revision_ids:
- try:
- result.append(found_parents[revision_id])
- except KeyError:
- result.append(None)
- return result
+ return found_parents
def _make_parents_provider(self):
return graph.CachingParentsProvider(self)
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-12-07 17:32:16 +0000
+++ b/bzrlib/repository.py 2007-12-08 02:07:54 +0000
@@ -1645,20 +1645,36 @@
def get_parents(self, revision_ids):
"""See StackedParentsProvider.get_parents"""
- parents_list = []
- for revision_id in revision_ids:
+ parent_map = self.get_parent_map(revision_ids)
+ return [parent_map.get(r, None) for r in revision_ids]
+
+ def get_parent_map(self, keys):
+ """Get a mapping of keys => parents
+
+ A dictionary is returned with an entry for each key present in this
+ source. If this source doesn't have information about a key, it should
+ not include an entry.
+
+ [NULL_REVISION] is used as the parent of the first user-committed
+ revision. Its parent list is empty.
+
+ :param keys: An iterable returning keys to check (eg revision_ids)
+ :return: A dictionary mapping each key to its parents
+ """
+ parent_map = {}
+ for revision_id in keys:
if revision_id == _mod_revision.NULL_REVISION:
- parents = []
+ parents[revision_id] = []
else:
try:
- parents = self.get_revision(revision_id).parent_ids
+ parent_ids = self.get_revision(revision_id).parent_ids
except errors.NoSuchRevision:
- parents = None
+ pass
else:
- if len(parents) == 0:
- parents = [_mod_revision.NULL_REVISION]
- parents_list.append(parents)
- return parents_list
+ if len(parent_ids) == 0:
+ parent_ids = [_mod_revision.NULL_REVISION]
+ parent_map[revision_id] = parent_ids
+ return parent_map
def _make_parents_provider(self):
return self
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2007-12-07 22:26:22 +0000
+++ b/bzrlib/tests/test_graph.py 2007-12-08 02:07:54 +0000
@@ -185,6 +185,10 @@
self.calls.extend(nodes)
return self._real_parents_provider.get_parents(nodes)
+ def get_parent_map(self, nodes):
+ self.calls.extend(nodes)
+ return self._real_parents_provider.get_parent_map(nodes)
+
class TestGraph(TestCaseWithMemoryTransport):
@@ -518,8 +522,16 @@
self.fail('key deeper was accessed')
result.append(graph_dict[key])
return result
+ def get_parent_map(keys):
+ result = {}
+ for key in keys:
+ if key == 'deeper':
+ self.fail('key deeper was accessed')
+ result[key] = graph_dict[key]
+ return result
an_obj = stub()
an_obj.get_parents = get_parents
+ an_obj.get_parent_map = get_parent_map
graph = _mod_graph.Graph(an_obj)
return graph.heads(search)
More information about the bazaar-commits
mailing list