Rev 6105: Merge fix for bug #388269 into trunk, resolve conflicts and add release notes. in http://bazaar.launchpad.net/~jameinel/bzr/2.5-get_cached_parent_map-388269
John Arbash Meinel
john at arbash-meinel.com
Fri Aug 26 11:36:00 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.5-get_cached_parent_map-388269
------------------------------------------------------------
revno: 6105 [merge]
revision-id: john at arbash-meinel.com-20110826113550-jtk2c6s2is9b0th4
parent: pqm at pqm.ubuntu.com-20110826095404-1b0s2cpvcf97ox0z
parent: john at arbash-meinel.com-20110826112951-69npob6aoky0avmj
fixes bug(s): https://launchpad.net/bugs/388269
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.5-get_cached_parent_map-388269
timestamp: Fri 2011-08-26 13:35:50 +0200
message:
Merge fix for bug #388269 into trunk, resolve conflicts and add release notes.
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/tests/per_repository_reference/test_graph.py test_graph.py-20110208074011-b92ci87urjayd2q7-1
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
bzrlib/tests/test_remote.py test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2011-08-24 09:01:44 +0000
+++ b/bzrlib/graph.py 2011-08-26 11:35:50 +0000
@@ -58,6 +58,11 @@
def __repr__(self):
return 'DictParentsProvider(%r)' % self.ancestry
+ # Note: DictParentsProvider does not implement get_cached_parent_map
+ # Arguably, the data is clearly cached in memory. However, this class
+ # is mostly used for testing, and it keeps the tests clean to not
+ # change it.
+
def get_parent_map(self, keys):
"""See StackedParentsProvider.get_parent_map"""
ancestry = self.ancestry
@@ -66,10 +71,10 @@
class StackedParentsProvider(object):
"""A parents provider which stacks (or unions) multiple providers.
-
+
The providers are queries in the order of the provided parent_providers.
"""
-
+
def __init__(self, parent_providers):
self._parent_providers = parent_providers
@@ -91,6 +96,22 @@
"""
found = {}
remaining = set(keys)
+ # This adds getattr() overhead to each get_parent_map call. However,
+ # this is StackedParentsProvider, which means we're dealing with I/O
+ # (either local indexes, or remote RPCs), so CPU overhead should be
+ # minimal.
+ for parents_provider in self._parent_providers:
+ get_cached = getattr(parents_provider, 'get_cached_parent_map',
+ None)
+ if get_cached is None:
+ continue
+ new_found = get_cached(remaining)
+ found.update(new_found)
+ remaining.difference_update(new_found)
+ if not remaining:
+ break
+ if not remaining:
+ return found
for parents_provider in self._parent_providers:
new_found = parents_provider.get_parent_map(remaining)
found.update(new_found)
@@ -150,6 +171,17 @@
return None
return dict(self._cache)
+ def get_cached_parent_map(self, keys):
+ """Return items from the cache.
+
+ This returns the same info as get_parent_map, but explicitly does not
+ invoke the supplied ParentsProvider to search for uncached values.
+ """
+ cache = self._cache
+ if cache is None:
+ return {}
+ return dict([(key, cache[key]) for key in keys if key in cache])
+
def get_parent_map(self, keys):
"""See StackedParentsProvider.get_parent_map."""
cache = self._cache
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2011-08-25 07:51:37 +0000
+++ b/bzrlib/remote.py 2011-08-26 11:35:50 +0000
@@ -1685,6 +1685,10 @@
self._ensure_real()
return self._real_repository.iter_files_bytes(desired_files)
+ def get_cached_parent_map(self, revision_ids):
+ """See bzrlib.CachingParentsProvider.get_cached_parent_map"""
+ return self._unstacked_provider.get_cached_parent_map(revision_ids)
+
def get_parent_map(self, revision_ids):
"""See bzrlib.Graph.get_parent_map()."""
return self._make_parents_provider().get_parent_map(revision_ids)
=== modified file 'bzrlib/tests/per_repository_reference/test_graph.py'
--- a/bzrlib/tests/per_repository_reference/test_graph.py 2011-02-09 08:07:51 +0000
+++ b/bzrlib/tests/per_repository_reference/test_graph.py 2011-08-26 11:26:29 +0000
@@ -18,6 +18,11 @@
"""Tests for graph operations on stacked repositories."""
+from bzrlib import (
+ remote,
+ repository,
+ tests,
+ )
from bzrlib.tests.per_repository import TestCaseWithRepository
@@ -43,3 +48,67 @@
branch_c.set_stacked_on_url('../b')
revid_1 = wt_a.commit('first commit')
return branch_a, branch_b, branch_c, revid_1
+
+ def make_stacked_branch_with_long_history(self):
+ builder = self.make_branch_builder('source')
+ builder.start_series()
+ builder.build_snapshot('A', None, [
+ ('add', ('', 'directory', 'root-id', None))])
+ builder.build_snapshot('B', ['A'], [])
+ builder.build_snapshot('C', ['B'], [])
+ builder.build_snapshot('D', ['C'], [])
+ builder.build_snapshot('E', ['D'], [])
+ builder.build_snapshot('F', ['E'], [])
+ source_b = builder.get_branch()
+ master_b = self.make_branch('master')
+ master_b.pull(source_b, stop_revision='E')
+ stacked_b = self.make_branch('stacked')
+ stacked_b.set_stacked_on_url('../master')
+ stacked_b.pull(source_b, stop_revision='F')
+ builder.finish_series()
+ return master_b, stacked_b
+
+ def assertParentMapCalls(self, expected):
+ """Check that self.hpss_calls has the expected get_parent_map calls."""
+ get_parent_map_calls = []
+ for c in self.hpss_calls:
+ # Right now, the only RPCs that get called are get_parent_map. If
+ # this changes in the future, we can change this to:
+ # if c.call.method != 'Repository.get_parent_map':
+ # continue
+ self.assertEqual('Repository.get_parent_map', c.call.method)
+ args = c.call.args
+ location = args[0]
+ self.assertEqual('include-missing:', args[1])
+ revisions = sorted(args[2:])
+ get_parent_map_calls.append((location, revisions))
+ self.assertEqual(expected, get_parent_map_calls)
+
+ def test_doesnt_call_get_parent_map_on_all_fallback_revs(self):
+ if not isinstance(self.repository_format,
+ remote.RemoteRepositoryFormat):
+ raise tests.TestNotApplicable('only for RemoteRepository')
+ # bug #388269
+ master_b, stacked_b = self.make_stacked_branch_with_long_history()
+ self.addCleanup(stacked_b.lock_read().unlock)
+ self.make_repository('target_repo', shared=True)
+ target_b = self.make_branch('target_repo/branch')
+ self.addCleanup(target_b.lock_write().unlock)
+ self.setup_smart_server_with_call_log()
+ res = target_b.repository.search_missing_revision_ids(
+ stacked_b.repository, revision_ids=['F'],
+ find_ghosts=False)
+ self.assertParentMapCalls([
+ # One call to stacked to start, which returns F=>E, and that E
+ # itself is missing, so when we step, we won't look for it.
+ ('extra/stacked/', ['F']),
+ # One fallback call to extra/master, which will return the rest of
+ # the history.
+ ('extra/master/', ['E']),
+ # And then one get_parent_map call to the target, to see if it
+ # already has any of these revisions.
+ ('extra/target_repo/branch/', ['A', 'B', 'C', 'D', 'E', 'F']),
+ ])
+ # Before bug #388269 was fixed, there would be a bunch of extra calls
+ # to 'extra/stacked', ['D'] then ['C'], then ['B'], then ['A'].
+ # One-at-a-time for the rest of the ancestry.
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2011-08-24 09:01:44 +0000
+++ b/bzrlib/tests/test_graph.py 2011-08-26 11:35:50 +0000
@@ -424,11 +424,41 @@
def __init__(self, parents_provider):
self.calls = []
self._real_parents_provider = parents_provider
+ get_cached = getattr(parents_provider, 'get_cached_parent_map', None)
+ if get_cached is not None:
+ # Only expose the underlying 'get_cached_parent_map' function if
+ # the wrapped provider has it.
+ self.get_cached_parent_map = self._get_cached_parent_map
def get_parent_map(self, nodes):
self.calls.extend(nodes)
return self._real_parents_provider.get_parent_map(nodes)
+ def _get_cached_parent_map(self, nodes):
+ self.calls.append(('cached', sorted(nodes)))
+ return self._real_parents_provider.get_cached_parent_map(nodes)
+
+
+class SharedInstrumentedParentsProvider(object):
+
+ def __init__(self, parents_provider, calls, info):
+ self.calls = calls
+ self.info = info
+ self._real_parents_provider = parents_provider
+ get_cached = getattr(parents_provider, 'get_cached_parent_map', None)
+ if get_cached is not None:
+ # Only expose the underlying 'get_cached_parent_map' function if
+ # the wrapped provider has it.
+ self.get_cached_parent_map = self._get_cached_parent_map
+
+ def get_parent_map(self, nodes):
+ self.calls.append((self.info, sorted(nodes)))
+ return self._real_parents_provider.get_parent_map(nodes)
+
+ def _get_cached_parent_map(self, nodes):
+ self.calls.append((self.info, 'cached', sorted(nodes)))
+ return self._real_parents_provider.get_cached_parent_map(nodes)
+
class TestGraphBase(tests.TestCase):
@@ -673,31 +703,6 @@
self.assertEqual((set(['e']), set(['f', 'g'])),
graph.find_difference('e', 'f'))
-
- def test_stacked_parents_provider(self):
- parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev3']})
- parents2 = _mod_graph.DictParentsProvider({'rev1': ['rev4']})
- stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
- self.assertEqual({'rev1':['rev4'], 'rev2':['rev3']},
- stacked.get_parent_map(['rev1', 'rev2']))
- self.assertEqual({'rev2':['rev3'], 'rev1':['rev4']},
- stacked.get_parent_map(['rev2', 'rev1']))
- self.assertEqual({'rev2':['rev3']},
- stacked.get_parent_map(['rev2', 'rev2']))
- self.assertEqual({'rev1':['rev4']},
- stacked.get_parent_map(['rev1', 'rev1']))
-
- def test_stacked_parents_provider_overlapping(self):
- # rev2 is availible in both providers.
- # 1
- # |
- # 2
- parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
- parents2 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
- stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
- self.assertEqual({'rev2': ['rev1']},
- stacked.get_parent_map(['rev2']))
-
def test_iter_topo_order(self):
graph = self.make_graph(ancestry_1)
args = ['rev2a', 'rev3', 'rev1']
@@ -1473,7 +1478,7 @@
def setUp(self):
super(TestCachingParentsProvider, self).setUp()
- dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
+ dict_pp = _mod_graph.DictParentsProvider({'a': ('b',)})
self.inst_pp = InstrumentedParentsProvider(dict_pp)
self.caching_pp = _mod_graph.CachingParentsProvider(self.inst_pp)
@@ -1518,6 +1523,14 @@
self.assertEqual([], self.inst_pp.calls)
self.assertEqual(set(['b']), self.caching_pp.missing_keys)
+ def test_get_cached_parent_map(self):
+ self.assertEqual({}, self.caching_pp.get_cached_parent_map(['a']))
+ self.assertEqual([], self.inst_pp.calls)
+ self.assertEqual({'a': ('b',)}, self.caching_pp.get_parent_map(['a']))
+ self.assertEqual(['a'], self.inst_pp.calls)
+ self.assertEqual({'a': ('b',)},
+ self.caching_pp.get_cached_parent_map(['a']))
+
class TestCachingParentsProviderExtras(tests.TestCaseWithTransport):
"""Test the behaviour when parents are provided that were not requested."""
@@ -1582,6 +1595,14 @@
self.caching_pp.get_parent_map(['rev2']))
self.assertEqual(['rev3'], self.inst_pp.calls)
+ def test_extras_using_cached(self):
+ self.assertEqual({}, self.caching_pp.get_cached_parent_map(['rev3']))
+ self.assertEqual({}, self.caching_pp.get_parent_map(['rev3']))
+ self.assertEqual({'rev2': ['rev1']},
+ self.caching_pp.get_cached_parent_map(['rev2']))
+ self.assertEqual(['rev3'], self.inst_pp.calls)
+
+
class TestCollapseLinearRegions(tests.TestCase):
@@ -1815,3 +1836,70 @@
extended_history_shortcut, (), ['a'], 1)
self.assertSearchResult(['f'], ['a'], 4,
extended_history_shortcut, (), ['a'], 2)
+
+
+class TestStackedParentsProvider(tests.TestCase):
+
+ def setUp(self):
+ super(TestStackedParentsProvider, self).setUp()
+ self.calls = []
+
+ def get_shared_provider(self, info, ancestry, has_cached):
+ pp = _mod_graph.DictParentsProvider(ancestry)
+ if has_cached:
+ pp.get_cached_parent_map = pp.get_parent_map
+ return SharedInstrumentedParentsProvider(pp, self.calls, info)
+
+ def test_stacked_parents_provider(self):
+ parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev3']})
+ parents2 = _mod_graph.DictParentsProvider({'rev1': ['rev4']})
+ stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
+ self.assertEqual({'rev1':['rev4'], 'rev2':['rev3']},
+ stacked.get_parent_map(['rev1', 'rev2']))
+ self.assertEqual({'rev2':['rev3'], 'rev1':['rev4']},
+ stacked.get_parent_map(['rev2', 'rev1']))
+ self.assertEqual({'rev2':['rev3']},
+ stacked.get_parent_map(['rev2', 'rev2']))
+ self.assertEqual({'rev1':['rev4']},
+ stacked.get_parent_map(['rev1', 'rev1']))
+
+ def test_stacked_parents_provider_overlapping(self):
+ # rev2 is availible in both providers.
+ # 1
+ # |
+ # 2
+ parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
+ parents2 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
+ stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
+ self.assertEqual({'rev2': ['rev1']},
+ stacked.get_parent_map(['rev2']))
+
+ def test_handles_no_get_cached_parent_map(self):
+ # this shows that we both handle when a provider doesn't implement
+ # get_cached_parent_map
+ pp1 = self.get_shared_provider('pp1', {'rev2': ('rev1',)},
+ has_cached=False)
+ pp2 = self.get_shared_provider('pp2', {'rev2': ('rev1',)},
+ has_cached=True)
+ stacked = _mod_graph.StackedParentsProvider([pp1, pp2])
+ self.assertEqual({'rev2': ('rev1',)}, stacked.get_parent_map(['rev2']))
+ # No call on 'pp1' because it doesn't provide get_cached_parent_map
+ self.assertEqual([('pp2', 'cached', ['rev2'])], self.calls)
+
+ def test_query_order(self):
+ # We should call get_cached_parent_map on all providers before we call
+ # get_parent_map. Further, we should track what entries we have found,
+ # and not re-try them.
+ pp1 = self.get_shared_provider('pp1', {'a': ()}, has_cached=True)
+ pp2 = self.get_shared_provider('pp2', {'c': ('b',)}, has_cached=False)
+ pp3 = self.get_shared_provider('pp3', {'b': ('a',)}, has_cached=True)
+ stacked = _mod_graph.StackedParentsProvider([pp1, pp2, pp3])
+ self.assertEqual({'a': (), 'b': ('a',), 'c': ('b',)},
+ stacked.get_parent_map(['a', 'b', 'c', 'd']))
+ self.assertEqual([('pp1', 'cached', ['a', 'b', 'c', 'd']),
+ # No call to pp2, because it doesn't have cached
+ ('pp3', 'cached', ['b', 'c', 'd']),
+ ('pp1', ['c', 'd']),
+ ('pp2', ['c', 'd']),
+ ('pp3', ['d']),
+ ], self.calls)
=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py 2011-08-25 07:51:37 +0000
+++ b/bzrlib/tests/test_remote.py 2011-08-26 11:35:50 +0000
@@ -2274,6 +2274,32 @@
self.assertEqual({}, repo.get_parent_map(['non-existant']))
self.assertLength(0, self.hpss_calls)
+ def test_exposes_get_cached_parent_map(self):
+ """RemoteRepository exposes get_cached_parent_map from
+ _unstacked_provider
+ """
+ r1 = u'\u0e33'.encode('utf8')
+ r2 = u'\u0dab'.encode('utf8')
+ lines = [' '.join([r2, r1]), r1]
+ encoded_body = bz2.compress('\n'.join(lines))
+
+ transport_path = 'quack'
+ repo, client = self.setup_fake_client_and_repository(transport_path)
+ client.add_success_response_with_body(encoded_body, 'ok')
+ repo.lock_read()
+ # get_cached_parent_map should *not* trigger an RPC
+ self.assertEqual({}, repo.get_cached_parent_map([r1]))
+ self.assertEqual([], client._calls)
+ self.assertEqual({r2: (r1,)}, repo.get_parent_map([r2]))
+ self.assertEqual({r1: (NULL_REVISION,)},
+ repo.get_cached_parent_map([r1]))
+ self.assertEqual(
+ [('call_with_body_bytes_expecting_body',
+ 'Repository.get_parent_map', ('quack/', 'include-missing:', r2),
+ '\n\n0')],
+ client._calls)
+ repo.unlock()
+
class TestGetParentMapAllowsNew(tests.TestCaseWithTransport):
=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt 2011-08-26 00:50:59 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt 2011-08-26 11:35:50 +0000
@@ -122,6 +122,12 @@
that subfolder.
(Bastian Bowe, #809901)
+* Branching from a stacked branch no longer does a ``get_parent_map``
+ request for each revisions that is in the stacked-on repository while
+ determining what revisions need to be fetched. This mostly impacts
+ branching initialy into an empty shared repository when the source is
+ not the development focus. (John Arbash Meinel, #388269)
+
* Decode ``BZR_HOME`` with fs encoding on posix platforms to avoid unicode
errors. (Vincent Ladeuil, #822571)
More information about the bazaar-commits
mailing list