Rev 6021: When the overall graph gets too big to replicate on the server, in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
John Arbash Meinel
john at arbash-meinel.com
Thu Jul 21 14:21:17 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
------------------------------------------------------------
revno: 6021
revision-id: john at arbash-meinel.com-20110721142102-z333hn366uvpntn3
parent: pqm at pqm.ubuntu.com-20110718144335-8v4u0gbnwzm0vzmt
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Thu 2011-07-21 16:21:02 +0200
message:
When the overall graph gets too big to replicate on the server,
fall back to doing a *very* local search.
This avoids causing the Server to iterate over most of history at each step.
In timing gcc-linaro, it reduces the walk-all-history time from 5.5min down
to 1m15s.
-------------- next part --------------
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2011-06-18 13:57:17 +0000
+++ b/bzrlib/remote.py 2011-07-21 14:21:02 +0000
@@ -1744,27 +1744,61 @@
if parents_map is None:
# Repository is not locked, so there's no cache.
parents_map = {}
- # start_set is all the keys in the cache
- start_set = set(parents_map)
- # result set is all the references to keys in the cache
- result_parents = set()
- for parents in parents_map.itervalues():
- result_parents.update(parents)
- stop_keys = result_parents.difference(start_set)
- # We don't need to send ghosts back to the server as a position to
- # stop either.
- stop_keys.difference_update(self._unstacked_provider.missing_keys)
- key_count = len(parents_map)
- if (NULL_REVISION in result_parents
- and NULL_REVISION in self._unstacked_provider.missing_keys):
- # If we pruned NULL_REVISION from the stop_keys because it's also
- # in our cache of "missing" keys we need to increment our key count
- # by 1, because the reconsitituted SearchResult on the server will
- # still consider NULL_REVISION to be an included key.
- key_count += 1
- included_keys = start_set.intersection(result_parents)
- start_set.difference_update(included_keys)
- recipe = ('manual', start_set, stop_keys, key_count)
+ if len(parents_map) < 10000:
+ # start_set is all the keys in the cache
+ start_set = set(parents_map)
+ # result set is all the references to keys in the cache
+ result_parents = set()
+ for parents in parents_map.itervalues():
+ result_parents.update(parents)
+ stop_keys = result_parents.difference(start_set)
+ # We don't need to send ghosts back to the server as a position to
+ # stop either.
+ stop_keys.difference_update(self._unstacked_provider.missing_keys)
+ key_count = len(parents_map)
+ if (NULL_REVISION in result_parents
+ and NULL_REVISION in self._unstacked_provider.missing_keys):
+ # If we pruned NULL_REVISION from the stop_keys because it's also
+ # in our cache of "missing" keys we need to increment our key count
+ # by 1, because the reconsitituted SearchResult on the server will
+ # still consider NULL_REVISION to be an included key.
+ key_count += 1
+ included_keys = start_set.intersection(result_parents)
+ start_set.difference_update(included_keys)
+ mutter('Doing _get_parent_map_rpc,'
+ ' %d cache size, %d start_set, %d result_parents,'
+ ' %d stop_keys, %d included_keys, %d keys,'
+ ' %d keys matching stop keys',
+ len(parents_map), len(start_set), len(result_parents),
+ len(stop_keys), len(included_keys), len(keys),
+ len(keys.intersection(stop_keys)))
+ recipe = ('manual', start_set, stop_keys, key_count)
+ else:
+ import pdb; pdb.set_trace()
+ # We've searched too many revisions for it to be efficient to
+ # recreate our search on the server. So just create a 'minimal'
+ # search pattern
+ parent_to_children_map = {}
+ for child, parents in parents_map.iteritems():
+ for p in parents:
+ # Any given parent is likely to have only a small handful
+ # of children, many will have only one.
+ # TODO: Use static_tuple
+ if p not in parent_to_children_map:
+ parent_to_children_map[p] = (child,)
+ else:
+ parent_to_children_map[p] = parent_to_children_map[p] + (child,)
+ stop_keys = set(keys)
+ # Just look at immediate children
+ child_keys = set()
+ for k in keys:
+ child_keys.update(parent_to_children_map[k])
+ mutter('Faking search set _get_parent_map_rpc,'
+ ' %d cache size, %d start keys'
+ ' %d included_keys %d stop_keys',
+ len(parents_map), len(child_keys), len(child_keys),
+ len(keys))
+ recipe = ('manual', child_keys, stop_keys, len(child_keys))
body = self._serialise_search_recipe(recipe)
path = self.bzrdir._path_for_remote_call(self._client)
for key in keys:
@@ -1773,6 +1807,7 @@
"key %r not a plain string" % (key,))
verb = 'Repository.get_parent_map'
args = (path, 'include-missing:') + tuple(keys)
+ mutter('keys: %r body: %r' % (keys, body))
try:
response = self._call_with_body_bytes_expecting_body(
verb, args, body)
=== modified file 'bzrlib/vf_repository.py'
--- a/bzrlib/vf_repository.py 2011-06-28 17:25:26 +0000
+++ b/bzrlib/vf_repository.py 2011-07-21 14:21:02 +0000
@@ -2502,6 +2502,7 @@
:param revision_ids: The start point for the search.
:return: A set of revision ids.
"""
+ import pdb; pdb.set_trace()
target_graph = self.target.get_graph()
revision_ids = frozenset(revision_ids)
if if_present_ids:
@@ -2514,12 +2515,21 @@
searcher = source_graph._make_breadth_first_searcher(all_wanted_revs)
null_set = frozenset([_mod_revision.NULL_REVISION])
searcher_exhausted = False
+ search_step = rev_count = 0
+ gpm = searcher._parents_provider.get_parent_map
+ def get_parent_map_logging(revisions):
+ res = gpm(revisions)
+ mutter('step %d, requested %d returned %d'
+ % (search_step, len(revisions), len(res)))
+ return res
+ searcher._parents_provider.get_parent_map = get_parent_map_logging
while True:
next_revs = set()
ghosts = set()
# Iterate the searcher until we have enough next_revs
while len(next_revs) < self._walk_to_common_revisions_batch_size:
try:
+ search_step += 1
next_revs_part, ghosts_part = searcher.next_with_ghosts()
next_revs.update(next_revs_part)
ghosts.update(ghosts_part)
@@ -2550,6 +2560,7 @@
searcher.stop_searching_any(stop_revs)
if searcher_exhausted:
break
+ import pdb; pdb.set_trace()
return searcher.get_result()
@needs_read_lock
More information about the bazaar-commits
mailing list