Rev 5839: (spiv) Make better use of smart server when a local repository is stacked on in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Mon May 9 03:23:27 UTC 2011

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5839 [merge]
revision-id: pqm at pqm.ubuntu.com-20110509032323-fbl2v7298id5trxy
parent: pqm at pqm.ubuntu.com-20110508173705-uvxqfb8c0rtzqj2c
parent: andrew.bennetts at canonical.com-20110509023316-v1n4di9dh5lx5icj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2011-05-09 03:23:23 +0000
message:
  (spiv) Make better use of smart server when a local repository is stacked on
   a remote repository. (Andrew Bennetts)
added:
  bzrlib/tests/per_repository_reference/test__make_parents_provider.py test__make_parents_p-20110504034232-rvaa0psicyjc2i7h-1
modified:
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/groupcompress.py        groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/blackbox/test_branch.py test_branch.py-20060524161337-noms9gmcwqqrfi8y-1
  bzrlib/tests/per_repository_reference/__init__.py __init__.py-20080220025549-nnm2s80it1lvcwnc-2
  bzrlib/tests/per_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
  doc/en/whats-new/whats-new-in-2.4.txt whatsnewin2.4.txt-20110114044330-nipk1og7j729fy89-1
=== modified file 'bzrlib/graph.py'

--- a/bzrlib/graph.py	2011-04-05 17:57:02 +0000
+++ b/bzrlib/graph.py	2011-05-04 02:34:05 +0000
@@ -179,6 +179,23 @@
             self.missing_keys.add(key)
 
 
+class CallableToParentsProviderAdapter(object):
+    """A parents provider that adapts any callable to the parents provider API.
+
+    i.e. it accepts calls to self.get_parent_map and relays them to the
+    callable it was constructed with.
+    """
+
+    def __init__(self, a_callable):
+        self.callable = a_callable
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.callable)
+
+    def get_parent_map(self, keys):
+        return self.callable(keys)
+
+
 class Graph(object):
     """Provide incremental access to revision graphs.
 

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2011-05-05 06:11:43 +0000
+++ b/bzrlib/groupcompress.py	2011-05-09 02:33:16 +0000
@@ -48,7 +48,7 @@
     AbsentContentFactory,
     ChunkedContentFactory,
     FulltextContentFactory,
-    VersionedFiles,
+    VersionedFilesWithFallbacks,
     )
 
 # Minimum number of uncompressed bytes to try fetch at once when retrieving
@@ -1174,7 +1174,7 @@
         self.total_bytes = 0
 
 
-class GroupCompressVersionedFiles(VersionedFiles):
+class GroupCompressVersionedFiles(VersionedFilesWithFallbacks):
     """A group-compress based VersionedFiles implementation."""
 
     def __init__(self, index, access, delta=True, _unadded_refs=None,

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2011-04-15 11:26:00 +0000
+++ b/bzrlib/knit.py	2011-05-04 02:34:05 +0000
@@ -103,7 +103,7 @@
     ConstantMapper,
     ContentFactory,
     sort_groupcompress,
-    VersionedFiles,
+    VersionedFilesWithFallbacks,
     )
 
 
@@ -845,7 +845,7 @@
                 in all_build_index_memos.itervalues()])
 
 
-class KnitVersionedFiles(VersionedFiles):
+class KnitVersionedFiles(VersionedFilesWithFallbacks):
     """Storage for many versioned files using knit compression.
 
     Backend storage is managed by indices and data objects.
@@ -887,6 +887,12 @@
             self._index,
             self._access)
 
+    def without_fallbacks(self):
+        """Return a clone of this object without any fallbacks configured."""
+        return KnitVersionedFiles(self._index, self._access,
+            self._max_delta_chain, self._factory.annotated,
+            self._reload_func)
+
     def add_fallback_versioned_files(self, a_versioned_files):
         """Add a source of texts for texts not present in this knit.
 

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2011-04-27 10:55:49 +0000
+++ b/bzrlib/remote.py	2011-05-04 03:43:03 +0000
@@ -43,7 +43,7 @@
 from bzrlib.smart import client, vfs, repository as smart_repo
 from bzrlib.smart.client import _SmartClient
 from bzrlib.revision import NULL_REVISION
-from bzrlib.repository import RepositoryWriteLockResult
+from bzrlib.repository import RepositoryWriteLockResult, _LazyListJoin
 from bzrlib.trace import mutter, note, warning
 
 
@@ -2009,9 +2009,8 @@
         providers = [self._unstacked_provider]
         if other is not None:
             providers.insert(0, other)
-        providers.extend(r._make_parents_provider() for r in
-                         self._fallback_repositories)
-        return graph.StackedParentsProvider(providers)
+        return graph.StackedParentsProvider(_LazyListJoin(
+            providers, self._fallback_repositories))
 
     def _serialise_search_recipe(self, recipe):
         """Serialise a graph search recipe.

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2011-05-05 06:11:43 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2011-05-09 02:33:16 +0000
@@ -1269,6 +1269,9 @@
                         self._revision_keys)
         self.from_repository.revisions.clear_cache()
         self.from_repository.signatures.clear_cache()
+        # Clear the repo's get_parent_map cache too.
+        self.from_repository._unstacked_provider.disable_cache()
+        self.from_repository._unstacked_provider.enable_cache()
         s = self._get_inventory_stream(self._revision_keys)
         yield (s[0], wrap_and_count(pb, rc, s[1]))
         self.from_repository.inventories.clear_cache()

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2011-04-19 02:03:54 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2011-05-04 04:20:20 +0000
@@ -53,6 +53,7 @@
 from bzrlib.lock import LogicalLockResult
 from bzrlib.repository import (
     CommitBuilder,
+    _LazyListJoin,
     MetaDirRepository,
     MetaDirRepositoryFormat,
     RepositoryFormat,
@@ -1660,6 +1661,12 @@
         self._commit_builder_class = _commit_builder_class
         self._serializer = _serializer
         self._reconcile_fixes_text_parents = True
+        if self._format.supports_external_lookups:
+            self._unstacked_provider = graph.CachingParentsProvider(
+                self._make_parents_provider_unstacked())
+        else:
+            self._unstacked_provider = graph.CachingParentsProvider(self)
+        self._unstacked_provider.disable_cache()
 
     @needs_read_lock
     def _all_revision_ids(self):
@@ -1671,12 +1678,17 @@
         self._pack_collection._abort_write_group()
 
     def _make_parents_provider(self):
-        return graph.CachingParentsProvider(self)
+        if not self._format.supports_external_lookups:
+            return self._unstacked_provider
+        return graph.StackedParentsProvider(_LazyListJoin(
+            [self._unstacked_provider], self._fallback_repositories))
 
     def _refresh_data(self):
         if not self.is_locked():
             return
         self._pack_collection.reload_pack_names()
+        self._unstacked_provider.disable_cache()
+        self._unstacked_provider.enable_cache()
 
     def _start_write_group(self):
         self._pack_collection._start_write_group()
@@ -1684,6 +1696,10 @@
     def _commit_write_group(self):
         hint = self._pack_collection._commit_write_group()
         self.revisions._index._key_dependencies.clear()
+        # The commit may have added keys that were previously cached as
+        # missing, so reset the cache.
+        self._unstacked_provider.disable_cache()
+        self._unstacked_provider.enable_cache()
         return hint
 
     def suspend_write_group(self):
@@ -1730,6 +1746,7 @@
             if 'relock' in debug.debug_flags and self._prev_lock == 'w':
                 note('%r was write locked again', self)
             self._prev_lock = 'w'
+            self._unstacked_provider.enable_cache()
             for repo in self._fallback_repositories:
                 # Writes don't affect fallback repos
                 repo.lock_read()
@@ -1750,6 +1767,7 @@
             if 'relock' in debug.debug_flags and self._prev_lock == 'r':
                 note('%r was read locked again', self)
             self._prev_lock = 'r'
+            self._unstacked_provider.enable_cache()
             for repo in self._fallback_repositories:
                 repo.lock_read()
             self._refresh_data()
@@ -1787,6 +1805,7 @@
     def unlock(self):
         if self._write_lock_count == 1 and self._write_group is not None:
             self.abort_write_group()
+            self._unstacked_provider.disable_cache()
             self._transaction = None
             self._write_lock_count = 0
             raise errors.BzrError(
@@ -1802,6 +1821,7 @@
             self.control_files.unlock()
 
         if not self.is_locked():
+            self._unstacked_provider.disable_cache()
             for repo in self._fallback_repositories:
                 repo.unlock()
 

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2011-05-02 17:01:39 +0000
+++ b/bzrlib/repository.py	2011-05-04 04:20:20 +0000
@@ -2623,8 +2623,39 @@
                 result[revision_id] = (_mod_revision.NULL_REVISION,)
         return result
 
+    def _get_parent_map_no_fallbacks(self, revision_ids):
+        """Same as Repository.get_parent_map except doesn't query fallbacks."""
+        # revisions index works in keys; this just works in revisions
+        # therefore wrap and unwrap
+        query_keys = []
+        result = {}
+        for revision_id in revision_ids:
+            if revision_id == _mod_revision.NULL_REVISION:
+                result[revision_id] = ()
+            elif revision_id is None:
+                raise ValueError('get_parent_map(None) is not valid')
+            else:
+                query_keys.append((revision_id ,))
+        vf = self.revisions.without_fallbacks()
+        for ((revision_id,), parent_keys) in \
+                vf.get_parent_map(query_keys).iteritems():
+            if parent_keys:
+                result[revision_id] = tuple([parent_revid
+                    for (parent_revid,) in parent_keys])
+            else:
+                result[revision_id] = (_mod_revision.NULL_REVISION,)
+        return result
+
     def _make_parents_provider(self):
-        return self
+        if not self._format.supports_external_lookups:
+            return self
+        return graph.StackedParentsProvider(_LazyListJoin(
+            [self._make_parents_provider_unstacked()],
+            self._fallback_repositories))
+
+    def _make_parents_provider_unstacked(self):
+        return graph.CallableToParentsProviderAdapter(
+            self._get_parent_map_no_fallbacks)
 
     @needs_read_lock
     def get_known_graph_ancestry(self, revision_ids):
@@ -4517,3 +4548,28 @@
     except StopIteration:
         # No more history
         return
+
+
+class _LazyListJoin(object):
+    """An iterable yielding the contents of many lists as one list.
+
+    Each iterator made from this will reflect the current contents of the lists
+    at the time the iterator is made.
+    
+    This is used by Repository's _make_parents_provider implementation so that
+    it is safe to do::
+
+      pp = repo._make_parents_provider()      # uses a list of fallback repos
+      pp.add_fallback_repository(other_repo)  # appends to that list
+      result = pp.get_parent_map(...)
+      # The result will include revs from other_repo
+    """
+
+    def __init__(self, *list_parts):
+        self.list_parts = list_parts
+
+    def __iter__(self):
+        full_list = []
+        for list_part in self.list_parts:
+            full_list.extend(list_part)
+        return iter(full_list)

=== modified file 'bzrlib/tests/blackbox/test_branch.py'
--- a/bzrlib/tests/blackbox/test_branch.py	2011-05-05 16:24:16 +0000
+++ b/bzrlib/tests/blackbox/test_branch.py	2011-05-09 02:33:16 +0000
@@ -480,6 +480,25 @@
         # upwards without agreement from bzr's network support maintainers.
         self.assertLength(9, self.hpss_calls)
 
+    def test_branch_to_stacked_from_trivial_branch_streaming_acceptance(self):
+        self.setup_smart_server_with_call_log()
+        t = self.make_branch_and_tree('from')
+        for count in range(9):
+            t.commit(message='commit %d' % count)
+        self.reset_smart_call_log()
+        out, err = self.run_bzr(['branch', '--stacked', self.get_url('from'),
+            'local-target'])
+        # XXX: the number of hpss calls for this case isn't deterministic yet,
+        # so we can't easily assert about the number of calls.
+        #self.assertLength(XXX, self.hpss_calls)
+        # We can assert that none of the calls were readv requests for rix
+        # files, though (demonstrating that at least get_parent_map calls are
+        # not using VFS RPCs).
+        readvs_of_rix_files = [
+            c for c in self.hpss_calls
+            if c.call.method == 'readv' and c.call.args[-1].endswith('.rix')]
+        self.assertLength(0, readvs_of_rix_files)
+
 
 class TestRemoteBranch(TestCaseWithSFTPServer):
 

=== modified file 'bzrlib/tests/per_repository_reference/__init__.py'
--- a/bzrlib/tests/per_repository_reference/__init__.py	2011-02-09 06:35:00 +0000
+++ b/bzrlib/tests/per_repository_reference/__init__.py	2011-05-04 03:43:03 +0000
@@ -107,6 +107,7 @@
         'bzrlib.tests.per_repository_reference.test_get_rev_id_for_revno',
         'bzrlib.tests.per_repository_reference.test_graph',
         'bzrlib.tests.per_repository_reference.test_initialize',
+        'bzrlib.tests.per_repository_reference.test__make_parents_provider',
         'bzrlib.tests.per_repository_reference.test_unlock',
         ]
     # Parameterize per_repository_reference test modules by format.

=== added file 'bzrlib/tests/per_repository_reference/test__make_parents_provider.py'
--- a/bzrlib/tests/per_repository_reference/test__make_parents_provider.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/per_repository_reference/test__make_parents_provider.py	2011-05-04 03:43:03 +0000
@@ -0,0 +1,44 @@
+# Copyright (C) 2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+"""Tests for _make_parents_provider on stacked repositories."""
+
+
+from bzrlib.tests.per_repository import TestCaseWithRepository
+
+
+class Test_MakeParentsProvider(TestCaseWithRepository):
+
+    def test_add_fallback_after_make_pp(self):
+        """Fallbacks added after _make_parents_provider are used by that
+        provider.
+        """
+        referring_repo = self.make_repository('repo')
+        pp = referring_repo._make_parents_provider()
+        # Initially referring_repo has no revisions and no fallbacks
+        self.addCleanup(referring_repo.lock_read().unlock)
+        self.assertEqual({}, pp.get_parent_map(['revid2']))
+        # Add a fallback repo with a commit
+        wt_a = self.make_branch_and_tree('fallback')
+        wt_a.commit('first commit', rev_id='revid1')
+        wt_a.commit('second commit', rev_id='revid2')
+        fallback_repo = wt_a.branch.repository
+        referring_repo.add_fallback_repository(fallback_repo)
+        # Now revid1 appears in pp's results.
+        self.assertEqual(('revid1',), pp.get_parent_map(['revid2'])['revid2'])
+
+

=== modified file 'bzrlib/tests/per_versionedfile.py'
--- a/bzrlib/tests/per_versionedfile.py	2011-02-10 17:03:01 +0000
+++ b/bzrlib/tests/per_versionedfile.py	2011-05-04 02:40:34 +0000
@@ -1482,6 +1482,18 @@
         else:
             return ('FileA',) + (suffix,)
 
+    def test_add_fallback_implies_without_fallbacks(self):
+        f = self.get_versionedfiles('files')
+        if getattr(f, 'add_fallback_versioned_files', None) is None:
+            raise TestNotApplicable("%s doesn't support fallbacks"
+                                    % (f.__class__.__name__,))
+        g = self.get_versionedfiles('fallback')
+        key_a = self.get_simple_key('a')
+        g.add_lines(key_a, [], ['\n'])
+        f.add_fallback_versioned_files(g)
+        self.assertTrue(key_a in f.get_parent_map([key_a]))
+        self.assertFalse(key_a in f.without_fallbacks().get_parent_map([key_a]))
+
     def test_add_lines(self):
         f = self.get_versionedfiles()
         key0 = self.get_simple_key('r0')

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2011-04-05 17:52:20 +0000
+++ b/bzrlib/versionedfile.py	2011-05-04 02:34:05 +0000
@@ -1422,6 +1422,20 @@
         return result
 
 
+class VersionedFilesWithFallbacks(VersionedFiles):
+
+    def without_fallbacks(self):
+        """Return a clone of this object without any fallbacks configured."""
+        raise NotImplementedError(self.without_fallbacks)
+
+    def add_fallback_versioned_files(self, a_versioned_files):
+        """Add a source of texts for texts not present in this knit.
+
+        :param a_versioned_files: A VersionedFiles object.
+        """
+        raise NotImplementedError(self.add_fallback_versioned_files)
+
+
 class _PlanMergeVersionedFile(VersionedFiles):
     """A VersionedFile for uncommitted and committed texts.
 

=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt	2011-05-07 23:58:36 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt	2011-05-09 02:33:16 +0000
@@ -26,6 +26,10 @@
 .. Improvements to existing commands, especially improved performance 
    or memory usage, or better results.
 
+* ``bzr branch --stacked`` from a smart server uses the network a little
+  more efficiently.  For a simple branch it reduces the number of
+  round-trips by about 20%.  (Andrew Bennetts)
+
 * Slightly reduced memory consumption when fetching into a 2a repository
   by reusing existing caching a little better.  (Andrew Bennetts)
 

=== modified file 'doc/en/whats-new/whats-new-in-2.4.txt'
--- a/doc/en/whats-new/whats-new-in-2.4.txt	2011-03-27 11:04:46 +0000
+++ b/doc/en/whats-new/whats-new-in-2.4.txt	2011-05-09 02:13:40 +0000
@@ -54,6 +54,14 @@
 format.  Refer to ``bzr help changelog_merge`` for documentation on how to
 enable it and what it can do.
 
+Faster stacked branches
+***********************
+
+Creating a stacked branch from a smart server with ``bzr branch
+--stacked`` is a bit faster now.  For small branches it does 20% fewer
+network roundtrips.  Other operations where a local branch is stacked on a
+branch hosted on a smart server will also benefit.
+
 Further information
 *******************