Rev 6024: Prototype the walk-backwards-and-then-forwards code. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
John Arbash Meinel
john at arbash-meinel.com
Tue Jul 26 14:11:02 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
------------------------------------------------------------
revno: 6024
revision-id: john at arbash-meinel.com-20110726141058-9lmlw4tzr693e10x
parent: john at arbash-meinel.com-20110726132849-6kno2j5jct20l4u3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Tue 2011-07-26 16:10:58 +0200
message:
Prototype the walk-backwards-and-then-forwards code.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2011-07-26 13:28:49 +0000
+++ b/bzrlib/graph.py 2011-07-26 14:10:58 +0000
@@ -1892,39 +1892,106 @@
def ignore():
- # TODO: If we use this heavily, then we should just cache the
- # reverse map. It certainly only changes based on newly
- # requested entries.
- parent_to_children_map = {}
- for child, parents in parents_map.iteritems():
- for p in parents:
- # Any given parent is likely to have only a small handful
- # of children, many will have only one.
- # TODO: Use static_tuple
- if p not in parent_to_children_map:
- parent_to_children_map[p] = (child,)
- else:
- parent_to_children_map[p] = parent_to_children_map[p] + (child,)
- stop_keys = set(keys)
- stop_keys.difference_update(self._unstacked_provider.missing_keys)
- # Just look at immediate children
- child_keys = set()
- for k in keys:
- child_keys.update(parent_to_children_map[k])
- # Without this line, we get the revision count wrong for 'bzr'. I'm
- # guessing a shortcut caused some revs to be found early, and then
- # not walked now. So without c for c in parents_map[k] we get *way*
- # too many keys, because the graph flood-fills. Without 'if c not
- # in child_keys' we stop before we start and get the wrong answer
- # that way.
- map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
- for k in child_keys])
- mutter('Faking search set _get_parent_map_rpc,'
- ' %d cache size, %d start keys'
- ' %d included_keys %d stop_keys',
- len(parents_map), len(child_keys), len(child_keys),
- len(keys))
- recipe = ('manual', child_keys, stop_keys, len(child_keys))
+ # TODO: If we use this heavily, then we should just cache the
+ # reverse map. It certainly only changes based on newly
+ # requested entries.
+ stop_keys = set(keys)
+ stop_keys.difference_update(self._unstacked_provider.missing_keys)
+ # Just look at immediate children
+ child_keys = set()
+ for k in keys:
+ child_keys.update(parent_to_children_map[k])
+ # Without this line, we get the revision count wrong for 'bzr'. I'm
+ # guessing a shortcut caused some revs to be found early, and then
+ # not walked now. So without c for c in parents_map[k] we get *way*
+ # too many keys, because the graph flood-fills. Without 'if c not
+ # in child_keys' we stop before we start and get the wrong answer
+ # that way.
+ map(stop_keys.update, [[c for c in parents_map[k] if c not in child_keys]
+ for k in child_keys])
+ mutter('Faking search set _get_parent_map_rpc,'
+ ' %d cache size, %d start keys'
+ ' %d included_keys %d stop_keys',
+ len(parents_map), len(child_keys), len(child_keys),
+ len(keys))
+ recipe = ('manual', child_keys, stop_keys, len(child_keys))
+
+
+def invert_parent_map(parent_map):
+ """Given a map from child => parents, create a map of parent=>children"""
+ child_map = {}
+ for child, parents in parent_map.iteritems():
+ for p in parents:
+ # Any given parent is likely to have only a small handful
+ # of children, many will have only one. So we avoid mem overhead of
+ # a list, in exchange for extra copying of tuples
+ # TODO: Use static_tuple
+ if p not in child_map:
+ child_map[p] = (child,)
+ else:
+ child_map[p] = child_map[p] + (child,)
+ return child_map
+
+
+def search_result_from_parent_map_with_keys(parent_map, missing_keys,
+ tip_keys, depth):
+ """Transform a parent_map that is searching 'tip_keys' into an
+ approximate SearchResult.
+
+ We should be able to generate a SearchResult from a given set of starting
+ keys, that covers a subset of parent_map that has the last step pointing at
+ tip_keys. This is to handle the case that really-long-searches shouldn't be
+ started from scratch on each get_parent_map request, but we *do* want to
+ filter out some of the keys that we've already seen, so we don't get
+ information that we already know about on every request.
+
+ The server will validate the search (that starting at start_keys and
+ stopping at stop_keys yields the exact key_count), so we have to be careful
+ to give an exact recipe.
+
+ Basic algorithm is:
+ 1) Invert parent_map to get child_map (todo: have it cached and pass it
+ in)
+ 2) Starting at tip_keys, walk towards children for 'depth' steps.
+ 3) At that point, we have the 'start' keys.
+ 4) Start walking parent_map from 'start' keys, counting how many keys
+ are seen, and generating stop_keys for anything that would walk
+ outside of the parent_map.
+
+ :param parent_map: A map from {child_id: (parent_ids,)}
+ :param missing_keys: parent_ids that we know are unavailable
+ :param tip_keys: the revision_ids that we are searching
+ :param depth: How far back to walk.
+ """
+ child_map = invert_parent_map(parent_map)
+ current_tips = set(tip_keys)
+ walked = set(current_tips)
+ while current_tips and depth:
+ depth -= 1
+ children = set()
+ for p in current_tips:
+ # Is it better to pre- or post- filter the children?
+ children.update[child_map[p]]
+ # TODO: Use the right set notation here since 'walked' grows large,
+ # while 'children' should usually be small
+ # Don't walk keys we've already walked
+ children.difference_update(walked)
+ walked.update(children)
+ ### if not children:
+ ### # We walked off the graph
+ ### break
+ current_tips = children
+ start_keys = current_tips
+ g = Graph(DictParentsProvider(parent_map))
+ s = g._make_breadth_first_searcher(start_keys)
+ while True:
+ try:
+ s.next_with_ghosts()
+ except StopIteration:
+ break
+ return s.get_result().get_recipe()
+
+
def search_result_from_parent_map(parent_map, missing_keys):
"""Transform a parent_map into SearchResult information."""
if not parent_map:
More information about the bazaar-commits
mailing list