Rev 6105: Merge fix for bug #388269 into trunk, resolve conflicts and add release notes. in http://bazaar.launchpad.net/~jameinel/bzr/2.5-get_cached_parent_map-388269

Fri Aug 26 11:36:00 UTC 2011

At http://bazaar.launchpad.net/~jameinel/bzr/2.5-get_cached_parent_map-388269

------------------------------------------------------------
revno: 6105 [merge]
revision-id: john at arbash-meinel.com-20110826113550-jtk2c6s2is9b0th4
parent: pqm at pqm.ubuntu.com-20110826095404-1b0s2cpvcf97ox0z
parent: john at arbash-meinel.com-20110826112951-69npob6aoky0avmj
fixes bug(s): https://launchpad.net/bugs/388269
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.5-get_cached_parent_map-388269
timestamp: Fri 2011-08-26 13:35:50 +0200
message:
  Merge fix for bug #388269 into trunk, resolve conflicts and add release notes.
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/tests/per_repository_reference/test_graph.py test_graph.py-20110208074011-b92ci87urjayd2q7-1
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
  bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
  doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
-------------- next part --------------
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2011-08-24 09:01:44 +0000
+++ b/bzrlib/graph.py	2011-08-26 11:35:50 +0000
@@ -58,6 +58,11 @@
     def __repr__(self):
         return 'DictParentsProvider(%r)' % self.ancestry
 
+    # Note: DictParentsProvider does not implement get_cached_parent_map
+    #       Arguably, the data is clearly cached in memory. However, this class
+    #       is mostly used for testing, and it keeps the tests clean to not
+    #       change it.
+
     def get_parent_map(self, keys):
         """See StackedParentsProvider.get_parent_map"""
         ancestry = self.ancestry
@@ -66,10 +71,10 @@
 
 class StackedParentsProvider(object):
     """A parents provider which stacks (or unions) multiple providers.
-    
+
     The providers are queries in the order of the provided parent_providers.
     """
-    
+
     def __init__(self, parent_providers):
         self._parent_providers = parent_providers
 
@@ -91,6 +96,22 @@
         """
         found = {}
         remaining = set(keys)
+        # This adds getattr() overhead to each get_parent_map call. However,
+        # this is StackedParentsProvider, which means we're dealing with I/O
+        # (either local indexes, or remote RPCs), so CPU overhead should be
+        # minimal.
+        for parents_provider in self._parent_providers:
+            get_cached = getattr(parents_provider, 'get_cached_parent_map',
+                                 None)
+            if get_cached is None:
+                continue
+            new_found = get_cached(remaining)
+            found.update(new_found)
+            remaining.difference_update(new_found)
+            if not remaining:
+                break
+        if not remaining:
+            return found
         for parents_provider in self._parent_providers:
             new_found = parents_provider.get_parent_map(remaining)
             found.update(new_found)
@@ -150,6 +171,17 @@
             return None
         return dict(self._cache)
 
+    def get_cached_parent_map(self, keys):
+        """Return items from the cache.
+
+        This returns the same info as get_parent_map, but explicitly does not
+        invoke the supplied ParentsProvider to search for uncached values.
+        """
+        cache = self._cache
+        if cache is None:
+            return {}
+        return dict([(key, cache[key]) for key in keys if key in cache])
+
     def get_parent_map(self, keys):
         """See StackedParentsProvider.get_parent_map."""
         cache = self._cache

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2011-08-25 07:51:37 +0000
+++ b/bzrlib/remote.py	2011-08-26 11:35:50 +0000
@@ -1685,6 +1685,10 @@
         self._ensure_real()
         return self._real_repository.iter_files_bytes(desired_files)
 
+    def get_cached_parent_map(self, revision_ids):
+        """See bzrlib.CachingParentsProvider.get_cached_parent_map"""
+        return self._unstacked_provider.get_cached_parent_map(revision_ids)
+
     def get_parent_map(self, revision_ids):
         """See bzrlib.Graph.get_parent_map()."""
         return self._make_parents_provider().get_parent_map(revision_ids)

=== modified file 'bzrlib/tests/per_repository_reference/test_graph.py'
--- a/bzrlib/tests/per_repository_reference/test_graph.py	2011-02-09 08:07:51 +0000
+++ b/bzrlib/tests/per_repository_reference/test_graph.py	2011-08-26 11:26:29 +0000
@@ -18,6 +18,11 @@
 """Tests for graph operations on stacked repositories."""
 
 
+from bzrlib import (
+    remote,
+    repository,
+    tests,
+    )
 from bzrlib.tests.per_repository import TestCaseWithRepository
 
 
@@ -43,3 +48,67 @@
         branch_c.set_stacked_on_url('../b')
         revid_1 = wt_a.commit('first commit')
         return branch_a, branch_b, branch_c, revid_1
+
+    def make_stacked_branch_with_long_history(self):
+        builder = self.make_branch_builder('source')
+        builder.start_series()
+        builder.build_snapshot('A', None, [
+            ('add', ('', 'directory', 'root-id', None))])
+        builder.build_snapshot('B', ['A'], [])
+        builder.build_snapshot('C', ['B'], [])
+        builder.build_snapshot('D', ['C'], [])
+        builder.build_snapshot('E', ['D'], [])
+        builder.build_snapshot('F', ['E'], [])
+        source_b = builder.get_branch()
+        master_b = self.make_branch('master')
+        master_b.pull(source_b, stop_revision='E')
+        stacked_b = self.make_branch('stacked')
+        stacked_b.set_stacked_on_url('../master')
+        stacked_b.pull(source_b, stop_revision='F')
+        builder.finish_series()
+        return master_b, stacked_b
+
+    def assertParentMapCalls(self, expected):
+        """Check that self.hpss_calls has the expected get_parent_map calls."""
+        get_parent_map_calls = []
+        for c in self.hpss_calls:
+            # Right now, the only RPCs that get called are get_parent_map. If
+            # this changes in the future, we can change this to:
+            # if c.call.method != 'Repository.get_parent_map':
+            #    continue
+            self.assertEqual('Repository.get_parent_map', c.call.method)
+            args = c.call.args
+            location = args[0]
+            self.assertEqual('include-missing:', args[1])
+            revisions = sorted(args[2:])
+            get_parent_map_calls.append((location, revisions))
+        self.assertEqual(expected, get_parent_map_calls)
+
+    def test_doesnt_call_get_parent_map_on_all_fallback_revs(self):
+        if not isinstance(self.repository_format,
+                          remote.RemoteRepositoryFormat):
+            raise tests.TestNotApplicable('only for RemoteRepository')
+        # bug #388269
+        master_b, stacked_b = self.make_stacked_branch_with_long_history()
+        self.addCleanup(stacked_b.lock_read().unlock)
+        self.make_repository('target_repo', shared=True)
+        target_b = self.make_branch('target_repo/branch')
+        self.addCleanup(target_b.lock_write().unlock)
+        self.setup_smart_server_with_call_log()
+        res = target_b.repository.search_missing_revision_ids(
+                stacked_b.repository, revision_ids=['F'],
+                find_ghosts=False)
+        self.assertParentMapCalls([
+            # One call to stacked to start, which returns F=>E, and that E
+            # itself is missing, so when we step, we won't look for it.
+            ('extra/stacked/', ['F']),
+            # One fallback call to extra/master, which will return the rest of
+            # the history.
+            ('extra/master/', ['E']),
+            # And then one get_parent_map call to the target, to see if it
+            # already has any of these revisions.
+            ('extra/target_repo/branch/', ['A', 'B', 'C', 'D', 'E', 'F']),
+            ])
+        # Before bug #388269 was fixed, there would be a bunch of extra calls
+        # to 'extra/stacked', ['D'] then ['C'], then ['B'], then ['A'].
+        # One-at-a-time for the rest of the ancestry.

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2011-08-24 09:01:44 +0000
+++ b/bzrlib/tests/test_graph.py	2011-08-26 11:35:50 +0000
@@ -424,11 +424,41 @@
     def __init__(self, parents_provider):
         self.calls = []
         self._real_parents_provider = parents_provider
+        get_cached = getattr(parents_provider, 'get_cached_parent_map', None)
+        if get_cached is not None:
+            # Only expose the underlying 'get_cached_parent_map' function if
+            # the wrapped provider has it.
+            self.get_cached_parent_map = self._get_cached_parent_map
 
     def get_parent_map(self, nodes):
         self.calls.extend(nodes)
         return self._real_parents_provider.get_parent_map(nodes)
 
+    def _get_cached_parent_map(self, nodes):
+        self.calls.append(('cached', sorted(nodes)))
+        return self._real_parents_provider.get_cached_parent_map(nodes)
+
+
+class SharedInstrumentedParentsProvider(object):
+
+    def __init__(self, parents_provider, calls, info):
+        self.calls = calls
+        self.info = info
+        self._real_parents_provider = parents_provider
+        get_cached = getattr(parents_provider, 'get_cached_parent_map', None)
+        if get_cached is not None:
+            # Only expose the underlying 'get_cached_parent_map' function if
+            # the wrapped provider has it.
+            self.get_cached_parent_map = self._get_cached_parent_map
+
+    def get_parent_map(self, nodes):
+        self.calls.append((self.info, sorted(nodes)))
+        return self._real_parents_provider.get_parent_map(nodes)
+
+    def _get_cached_parent_map(self, nodes):
+        self.calls.append((self.info, 'cached', sorted(nodes)))
+        return self._real_parents_provider.get_cached_parent_map(nodes)
+
 
 class TestGraphBase(tests.TestCase):
 
@@ -673,31 +703,6 @@
         self.assertEqual((set(['e']), set(['f', 'g'])),
                          graph.find_difference('e', 'f'))
 
-
-    def test_stacked_parents_provider(self):
-        parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev3']})
-        parents2 = _mod_graph.DictParentsProvider({'rev1': ['rev4']})
-        stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
-        self.assertEqual({'rev1':['rev4'], 'rev2':['rev3']},
-                         stacked.get_parent_map(['rev1', 'rev2']))
-        self.assertEqual({'rev2':['rev3'], 'rev1':['rev4']},
-                         stacked.get_parent_map(['rev2', 'rev1']))
-        self.assertEqual({'rev2':['rev3']},
-                         stacked.get_parent_map(['rev2', 'rev2']))
-        self.assertEqual({'rev1':['rev4']},
-                         stacked.get_parent_map(['rev1', 'rev1']))
-    
-    def test_stacked_parents_provider_overlapping(self):
-        # rev2 is availible in both providers.
-        # 1
-        # |
-        # 2
-        parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
-        parents2 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
-        stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
-        self.assertEqual({'rev2': ['rev1']},
-                         stacked.get_parent_map(['rev2']))
-
     def test_iter_topo_order(self):
         graph = self.make_graph(ancestry_1)
         args = ['rev2a', 'rev3', 'rev1']
@@ -1473,7 +1478,7 @@
 
     def setUp(self):
         super(TestCachingParentsProvider, self).setUp()
-        dict_pp = _mod_graph.DictParentsProvider({'a':('b',)})
+        dict_pp = _mod_graph.DictParentsProvider({'a': ('b',)})
         self.inst_pp = InstrumentedParentsProvider(dict_pp)
         self.caching_pp = _mod_graph.CachingParentsProvider(self.inst_pp)
 
@@ -1518,6 +1523,14 @@
         self.assertEqual([], self.inst_pp.calls)
         self.assertEqual(set(['b']), self.caching_pp.missing_keys)
 
+    def test_get_cached_parent_map(self):
+        self.assertEqual({}, self.caching_pp.get_cached_parent_map(['a']))
+        self.assertEqual([], self.inst_pp.calls)
+        self.assertEqual({'a': ('b',)}, self.caching_pp.get_parent_map(['a']))
+        self.assertEqual(['a'], self.inst_pp.calls)
+        self.assertEqual({'a': ('b',)},
+                         self.caching_pp.get_cached_parent_map(['a']))
+
 
 class TestCachingParentsProviderExtras(tests.TestCaseWithTransport):
     """Test the behaviour when parents are provided that were not requested."""
@@ -1582,6 +1595,14 @@
                          self.caching_pp.get_parent_map(['rev2']))
         self.assertEqual(['rev3'], self.inst_pp.calls)
 
+    def test_extras_using_cached(self):
+        self.assertEqual({}, self.caching_pp.get_cached_parent_map(['rev3']))
+        self.assertEqual({}, self.caching_pp.get_parent_map(['rev3']))
+        self.assertEqual({'rev2': ['rev1']},
+                         self.caching_pp.get_cached_parent_map(['rev2']))
+        self.assertEqual(['rev3'], self.inst_pp.calls)
+
+
 
 class TestCollapseLinearRegions(tests.TestCase):
 
@@ -1815,3 +1836,70 @@
                                 extended_history_shortcut, (), ['a'], 1)
         self.assertSearchResult(['f'], ['a'], 4,
                                 extended_history_shortcut, (), ['a'], 2)
+
+
+class TestStackedParentsProvider(tests.TestCase):
+
+    def setUp(self):
+        super(TestStackedParentsProvider, self).setUp()
+        self.calls = []
+
+    def get_shared_provider(self, info, ancestry, has_cached):
+        pp = _mod_graph.DictParentsProvider(ancestry)
+        if has_cached:
+            pp.get_cached_parent_map = pp.get_parent_map
+        return SharedInstrumentedParentsProvider(pp, self.calls, info)
+
+    def test_stacked_parents_provider(self):
+        parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev3']})
+        parents2 = _mod_graph.DictParentsProvider({'rev1': ['rev4']})
+        stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
+        self.assertEqual({'rev1':['rev4'], 'rev2':['rev3']},
+                         stacked.get_parent_map(['rev1', 'rev2']))
+        self.assertEqual({'rev2':['rev3'], 'rev1':['rev4']},
+                         stacked.get_parent_map(['rev2', 'rev1']))
+        self.assertEqual({'rev2':['rev3']},
+                         stacked.get_parent_map(['rev2', 'rev2']))
+        self.assertEqual({'rev1':['rev4']},
+                         stacked.get_parent_map(['rev1', 'rev1']))
+
+    def test_stacked_parents_provider_overlapping(self):
+        # rev2 is availible in both providers.
+        # 1
+        # |
+        # 2
+        parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
+        parents2 = _mod_graph.DictParentsProvider({'rev2': ['rev1']})
+        stacked = _mod_graph.StackedParentsProvider([parents1, parents2])
+        self.assertEqual({'rev2': ['rev1']},
+                         stacked.get_parent_map(['rev2']))
+
+    def test_handles_no_get_cached_parent_map(self):
+        # this shows that we both handle when a provider doesn't implement
+        # get_cached_parent_map
+        pp1 = self.get_shared_provider('pp1', {'rev2': ('rev1',)},
+                                       has_cached=False)
+        pp2 = self.get_shared_provider('pp2', {'rev2': ('rev1',)},
+                                       has_cached=True)
+        stacked = _mod_graph.StackedParentsProvider([pp1, pp2])
+        self.assertEqual({'rev2': ('rev1',)}, stacked.get_parent_map(['rev2']))
+        # No call on 'pp1' because it doesn't provide get_cached_parent_map
+        self.assertEqual([('pp2', 'cached', ['rev2'])], self.calls)
+
+    def test_query_order(self):
+        # We should call get_cached_parent_map on all providers before we call
+        # get_parent_map. Further, we should track what entries we have found,
+        # and not re-try them.
+        pp1 = self.get_shared_provider('pp1', {'a': ()}, has_cached=True)
+        pp2 = self.get_shared_provider('pp2', {'c': ('b',)}, has_cached=False)
+        pp3 = self.get_shared_provider('pp3', {'b': ('a',)}, has_cached=True)
+        stacked = _mod_graph.StackedParentsProvider([pp1, pp2, pp3])
+        self.assertEqual({'a': (), 'b': ('a',), 'c': ('b',)},
+                         stacked.get_parent_map(['a', 'b', 'c', 'd']))
+        self.assertEqual([('pp1', 'cached', ['a', 'b', 'c', 'd']),
+                          # No call to pp2, because it doesn't have cached
+                          ('pp3', 'cached', ['b', 'c', 'd']),
+                          ('pp1', ['c', 'd']),
+                          ('pp2', ['c', 'd']),
+                          ('pp3', ['d']),
+                         ], self.calls)

=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py	2011-08-25 07:51:37 +0000
+++ b/bzrlib/tests/test_remote.py	2011-08-26 11:35:50 +0000
@@ -2274,6 +2274,32 @@
         self.assertEqual({}, repo.get_parent_map(['non-existant']))
         self.assertLength(0, self.hpss_calls)
 
+    def test_exposes_get_cached_parent_map(self):
+        """RemoteRepository exposes get_cached_parent_map from
+        _unstacked_provider
+        """
+        r1 = u'\u0e33'.encode('utf8')
+        r2 = u'\u0dab'.encode('utf8')
+        lines = [' '.join([r2, r1]), r1]
+        encoded_body = bz2.compress('\n'.join(lines))
+
+        transport_path = 'quack'
+        repo, client = self.setup_fake_client_and_repository(transport_path)
+        client.add_success_response_with_body(encoded_body, 'ok')
+        repo.lock_read()
+        # get_cached_parent_map should *not* trigger an RPC
+        self.assertEqual({}, repo.get_cached_parent_map([r1]))
+        self.assertEqual([], client._calls)
+        self.assertEqual({r2: (r1,)}, repo.get_parent_map([r2]))
+        self.assertEqual({r1: (NULL_REVISION,)},
+            repo.get_cached_parent_map([r1]))
+        self.assertEqual(
+            [('call_with_body_bytes_expecting_body',
+              'Repository.get_parent_map', ('quack/', 'include-missing:', r2),
+              '\n\n0')],
+            client._calls)
+        repo.unlock()
+
 
 class TestGetParentMapAllowsNew(tests.TestCaseWithTransport):
 

=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt	2011-08-26 00:50:59 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt	2011-08-26 11:35:50 +0000
@@ -122,6 +122,12 @@
   that subfolder.
   (Bastian Bowe, #809901)
 
+* Branching from a stacked branch no longer does a ``get_parent_map``
+  request for each revisions that is in the stacked-on repository while
+  determining what revisions need to be fetched. This mostly impacts
+  branching initialy into an empty shared repository when the source is
+  not the development focus.  (John Arbash Meinel, #388269)
+
 * Decode ``BZR_HOME`` with fs encoding on posix platforms to avoid unicode
   errors.  (Vincent Ladeuil, #822571)