Rev 4199: (robertc) Negatively cache ghosts and misses during read-locks in in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Mar 24 23:19:18 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4199
revision-id: pqm at pqm.ubuntu.com-20090324231912-rb0kgktzkvge8aea
parent: pqm at pqm.ubuntu.com-20090324175300-mnu99mqpmjkkzgq6
parent: robertc at robertcollins.net-20090324221924-y19e65gs9jw7u0ue
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-03-24 23:19:12 +0000
message:
  (robertc) Negatively cache ghosts and misses during read-locks in
  	RemoteRepository. (Robert Collins, Andrew Bennetts)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/memorytree.py           memorytree.py-20060906023413-4wlkalbdpsxi2r4y-1
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
  bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
  bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
  bzrlib/tests/test_smart.py     test_smart.py-20061122024551-ol0l0o0oofsu9b3t-2
    ------------------------------------------------------------
    revno: 4190.1.6
    revision-id: robertc at robertcollins.net-20090324221924-y19e65gs9jw7u0ue
    parent: robertc at robertcollins.net-20090324213428-rol001c1jypq9y30
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: integration
    timestamp: Wed 2009-03-25 09:19:24 +1100
    message:
      Missed some unit tests.
    modified:
      bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
    ------------------------------------------------------------
    revno: 4190.1.5
    revision-id: robertc at robertcollins.net-20090324213428-rol001c1jypq9y30
    parent: robertc at robertcollins.net-20090324070911-w59etn9q3f9xj4fu
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: Remote.negative_parents_cache
    timestamp: Wed 2009-03-25 08:34:28 +1100
    message:
      Review tweaks.
    modified:
      bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
      bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
    ------------------------------------------------------------
    revno: 4190.1.4
    revision-id: robertc at robertcollins.net-20090324070911-w59etn9q3f9xj4fu
    parent: robertc at robertcollins.net-20090324060735-nk6ti3wq06062lvz
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: Remote.negative_parents_cache
    timestamp: Tue 2009-03-24 18:09:11 +1100
    message:
      Cache ghosts when we can get them from a RemoteRepository in get_parent_map.
    modified:
      bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
      bzrlib/memorytree.py           memorytree.py-20060906023413-4wlkalbdpsxi2r4y-1
      bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
      bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
      bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
    ------------------------------------------------------------
    revno: 4190.1.3
    revision-id: robertc at robertcollins.net-20090324060735-nk6ti3wq06062lvz
    parent: robertc at robertcollins.net-20090324053736-rgxw7s77h2948gf2
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: Remote.negative_parents_cache
    timestamp: Tue 2009-03-24 17:07:35 +1100
    message:
      Allow optional inclusion of ghost data in server get_parent_map calls.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
      bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
      bzrlib/tests/test_smart.py     test_smart.py-20061122024551-ol0l0o0oofsu9b3t-2
    ------------------------------------------------------------
    revno: 4190.1.2
    revision-id: robertc at robertcollins.net-20090324053736-rgxw7s77h2948gf2
    parent: robertc at robertcollins.net-20090324051156-04bm37t0os1idrrs
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: Remote.negative_parents_cache
    timestamp: Tue 2009-03-24 16:37:36 +1100
    message:
      NEWS formatting.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
    ------------------------------------------------------------
    revno: 4190.1.1
    revision-id: robertc at robertcollins.net-20090324051156-04bm37t0os1idrrs
    parent: pqm at pqm.ubuntu.com-20090324015928-a4eisbr51odi0due
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: Remote.negative_parents_cache
    timestamp: Tue 2009-03-24 16:11:56 +1100
    message:
      Negatively cache misses during read-locks in RemoteRepository.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
      bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
      bzrlib/tests/test_graph.py     test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
      bzrlib/tests/test_remote.py    test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
=== modified file 'NEWS'
--- a/NEWS	2009-03-24 12:15:01 +0000
+++ b/NEWS	2009-03-24 23:19:12 +0000
@@ -211,6 +211,10 @@
   make visible data inserted into the repository by a smart server
   fetch operation. (Robert Collins, Andrew Bennetts)
 
+* ``RemoteRepository`` will now negatively cache missing revisions during
+  ``get_parent_map`` while read-locked. Write-locks are unaffected.
+  (Robert Collins, Andrew Bennetts)
+
 * Removed ``InterRemoteToOther``, ``InterOtherToRemote`` and
   ``InterPackToRemotePack`` classes, as they are now unnecessary.
   (Andrew Bennetts)
@@ -219,6 +223,11 @@
   whether the repository can efficiently generate deltas between trees
   regardless of tree size. (Robert Collins)
 
+* The smart server verb ``Repository.get_parent_map`` can now include
+  information about ghosts when the special revision ``include-missing:``
+  is in the requested parents map list. With this flag, ghosts are
+  included as ``missing:REVISION_ID``. (Robert Collins, Andrew Bennetts)
+
 * ``_walk_to_common_revisions`` will now batch up at least 50
   revisions before calling ``get_parent_map`` on the target,
   regardless of ``InterRepository``.

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2009-03-24 01:53:42 +0000
+++ b/bzrlib/graph.py	2009-03-24 23:19:12 +0000
@@ -121,8 +121,8 @@
             self._get_parent_map = self._real_provider.get_parent_map
         else:
             self._get_parent_map = get_parent_map
-        self._cache = {}
-        self._cache_misses = True
+        self._cache = None
+        self.enable_cache(True)
 
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self._real_provider)
@@ -133,38 +133,47 @@
             raise AssertionError('Cache enabled when already enabled.')
         self._cache = {}
         self._cache_misses = cache_misses
+        self.missing_keys = set()
 
     def disable_cache(self):
         """Disable and clear the cache."""
         self._cache = None
+        self._cache_misses = None
+        self.missing_keys = set()
 
     def get_cached_map(self):
         """Return any cached get_parent_map values."""
         if self._cache is None:
             return None
-        return dict((k, v) for k, v in self._cache.items()
-                    if v is not None)
+        return dict(self._cache)
 
     def get_parent_map(self, keys):
         """See _StackedParentsProvider.get_parent_map."""
-        # Hack to build up the caching logic.
-        ancestry = self._cache
-        if ancestry is None:
-            # Caching is disabled.
-            missing_revisions = set(keys)
-            ancestry = {}
+        cache = self._cache
+        if cache is None:
+            cache = self._get_parent_map(keys)
         else:
-            missing_revisions = set(key for key in keys if key not in ancestry)
-        if missing_revisions:
-            parent_map = self._get_parent_map(missing_revisions)
-            ancestry.update(parent_map)
-            if self._cache_misses:
-                # None is never a valid parents list, so it can be used to
-                # record misses.
-                ancestry.update(dict((k, None) for k in missing_revisions
-                                     if k not in parent_map))
-        present_keys = [k for k in keys if ancestry.get(k) is not None]
-        return dict((k, ancestry[k]) for k in present_keys)
+            needed_revisions = set(key for key in keys if key not in cache)
+            # Do not ask for negatively cached keys
+            needed_revisions.difference_update(self.missing_keys)
+            if needed_revisions:
+                parent_map = self._get_parent_map(needed_revisions)
+                cache.update(parent_map)
+                if self._cache_misses:
+                    for key in needed_revisions:
+                        if key not in parent_map:
+                            self.note_missing_key(key)
+        result = {}
+        for key in keys:
+            value = cache.get(key)
+            if value is not None:
+                result[key] = value
+        return result
+
+    def note_missing_key(self, key):
+        """Note that key is a missing key."""
+        if self._cache_misses:
+            self.missing_keys.add(key)
 
 
 class Graph(object):

=== modified file 'bzrlib/memorytree.py'
--- a/bzrlib/memorytree.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/memorytree.py	2009-03-24 23:19:12 +0000
@@ -212,8 +212,7 @@
 
     def _populate_from_branch(self):
         """Populate the in-tree state from the branch."""
-        self._basis_tree = self.branch.repository.revision_tree(
-            self._branch_revision_id)
+        self._set_basis()
         if self._branch_revision_id == _mod_revision.NULL_REVISION:
             self._parent_ids = []
         else:
@@ -280,13 +279,23 @@
             _mod_revision.check_not_reserved_id(revision_id)
         if len(revision_ids) == 0:
             self._parent_ids = []
-            self._basis_tree = self.branch.repository.revision_tree(
-                                    _mod_revision.NULL_REVISION)
+            self._branch_revision_id = _mod_revision.NULL_REVISION
         else:
             self._parent_ids = revision_ids
-            self._basis_tree = self.branch.repository.revision_tree(
-                                    revision_ids[0])
             self._branch_revision_id = revision_ids[0]
+        self._allow_leftmost_as_ghost = allow_leftmost_as_ghost
+        self._set_basis()
+    
+    def _set_basis(self):
+        try:
+            self._basis_tree = self.branch.repository.revision_tree(
+                self._branch_revision_id)
+        except errors.NoSuchRevision:
+            if self._allow_leftmost_as_ghost:
+                self._basis_tree = self.branch.repository.revision_tree(
+                    _mod_revision.NULL_REVISION)
+            else:
+                raise
 
     def set_parent_trees(self, parents_list, allow_leftmost_as_ghost=False):
         """See MutableTree.set_parent_trees()."""

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2009-03-24 06:40:26 +0000
+++ b/bzrlib/remote.py	2009-03-24 23:19:12 +0000
@@ -839,7 +839,7 @@
         if not self._lock_mode:
             self._lock_mode = 'r'
             self._lock_count = 1
-            self._unstacked_provider.enable_cache(cache_misses=False)
+            self._unstacked_provider.enable_cache(cache_misses=True)
             if self._real_repository is not None:
                 self._real_repository.lock_read()
         else:
@@ -1241,15 +1241,26 @@
         # TODO: Manage this incrementally to avoid covering the same path
         # repeatedly. (The server will have to on each request, but the less
         # work done the better).
+        #
+        # Negative caching notes:
+        # new server sends missing when a request including the revid
+        # 'include-missing:' is present in the request.
+        # missing keys are serialised as missing:X, and we then call
+        # provider.note_missing(X) for-all X
         parents_map = self._unstacked_provider.get_cached_map()
         if parents_map is None:
             # Repository is not locked, so there's no cache.
             parents_map = {}
+        # start_set is all the keys in the cache
         start_set = set(parents_map)
+        # result set is all the references to keys in the cache
         result_parents = set()
         for parents in parents_map.itervalues():
             result_parents.update(parents)
         stop_keys = result_parents.difference(start_set)
+        # We don't need to send ghosts back to the server as a position to
+        # stop either.
+        stop_keys.difference_update(self._unstacked_provider.missing_keys)
         included_keys = start_set.intersection(result_parents)
         start_set.difference_update(included_keys)
         recipe = ('manual', start_set, stop_keys, len(parents_map))
@@ -1260,7 +1271,7 @@
                 raise ValueError(
                     "key %r not a plain string" % (key,))
         verb = 'Repository.get_parent_map'
-        args = (path,) + tuple(keys)
+        args = (path, 'include-missing:') + tuple(keys)
         try:
             response = self._call_with_body_bytes_expecting_body(
                 verb, args, body)
@@ -1294,8 +1305,14 @@
                 if len(d) > 1:
                     revision_graph[d[0]] = d[1:]
                 else:
-                    # No parents - so give the Graph result (NULL_REVISION,).
-                    revision_graph[d[0]] = (NULL_REVISION,)
+                    # No parents:
+                    if d[0].startswith('missing:'):
+                        revid = d[0][8:]
+                        self._unstacked_provider.note_missing_key(revid)
+                    else:
+                        # no parents - so give the Graph result
+                        # (NULL_REVISION,).
+                        revision_graph[d[0]] = (NULL_REVISION,)
             return revision_graph
 
     @needs_read_lock

=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/smart/repository.py	2009-03-24 23:19:12 +0000
@@ -134,6 +134,10 @@
         from revision_ids is returned. The verb takes a body containing the
         current search state, see do_body for details.
 
+        If 'include-missing:' is in revision_ids, ghosts encountered in the
+        graph traversal for getting parent data are included in the result with
+        a prefix of 'missing:'.
+
         :param repository: The repository to query in.
         :param revision_ids: The utf8 encoded revision_id to answer for.
         """
@@ -158,6 +162,9 @@
     def _do_repository_request(self, body_bytes):
         repository = self._repository
         revision_ids = set(self._revision_ids)
+        include_missing = 'include-missing:' in revision_ids
+        if include_missing:
+            revision_ids.remove('include-missing:')
         body_lines = body_bytes.split('\n')
         search_result, error = self.recreate_search_from_recipe(
             repository, body_lines)
@@ -178,19 +185,29 @@
         while next_revs:
             queried_revs.update(next_revs)
             parent_map = repo_graph.get_parent_map(next_revs)
+            current_revs = next_revs
             next_revs = set()
-            for revision_id, parents in parent_map.iteritems():
-                # adjust for the wire
-                if parents == (_mod_revision.NULL_REVISION,):
-                    parents = ()
-                # prepare the next query
-                next_revs.update(parents)
-                if revision_id not in client_seen_revs:
+            for revision_id in current_revs:
+                missing_rev = False
+                parents = parent_map.get(revision_id)
+                if parents is not None:
+                    # adjust for the wire
+                    if parents == (_mod_revision.NULL_REVISION,):
+                        parents = ()
+                    # prepare the next query
+                    next_revs.update(parents)
+                    encoded_id = revision_id
+                else:
+                    missing_rev = True
+                    encoded_id = "missing:" + revision_id
+                    parents = []
+                if (revision_id not in client_seen_revs and
+                    (not missing_rev or include_missing)):
                     # Client does not have this revision, give it to it.
                     # add parents to the result
-                    result[revision_id] = parents
+                    result[encoded_id] = parents
                     # Approximate the serialized cost of this revision_id.
-                    size_so_far += 2 + len(revision_id) + sum(map(len, parents))
+                    size_so_far += 2 + len(encoded_id) + sum(map(len, parents))
             # get all the directly asked for parents, and then flesh out to
             # 64K (compressed) or so. We do one level of depth at a time to
             # stay in sync with the client. The 250000 magic number is

=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_graph.py	2009-03-24 23:19:12 +0000
@@ -1385,6 +1385,12 @@
 
 
 class TestCachingParentsProvider(tests.TestCase):
+    """These tests run with:
+
+    self.inst_pp, a recording parents provider with a graph of a->b, and b is a
+    ghost.
+    self.caching_pp, a CachingParentsProvider layered on inst_pp.
+    """
 
     def setUp(self):
         super(TestCachingParentsProvider, self).setUp()
@@ -1409,7 +1415,6 @@
         self.assertEqual({}, self.caching_pp.get_parent_map(['b']))
         # No new calls
         self.assertEqual(['b'], self.inst_pp.calls)
-        self.assertEqual({'b':None}, self.caching_pp._cache)
 
     def test_get_parent_map_mixed(self):
         """Anything that can be returned from cache, should be"""
@@ -1427,6 +1432,13 @@
         # only present 1 time.
         self.assertEqual(['a', 'b'], sorted(self.inst_pp.calls))
 
+    def test_note_missing_key(self):
+        """After noting that a key is missing it is cached."""
+        self.caching_pp.note_missing_key('b')
+        self.assertEqual({}, self.caching_pp.get_parent_map(['b']))
+        self.assertEqual([], self.inst_pp.calls)
+        self.assertEqual(set(['b']), self.caching_pp.missing_keys)
+
 
 class TestCachingParentsProviderExtras(tests.TestCaseWithTransport):
     """Test the behaviour when parents are provided that were not requested."""

=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py	2009-03-24 06:40:26 +0000
+++ b/bzrlib/tests/test_remote.py	2009-03-24 23:19:12 +0000
@@ -36,6 +36,7 @@
     repository,
     smart,
     tests,
+    treebuilder,
     urlutils,
     )
 from bzrlib.branch import Branch
@@ -1577,7 +1578,8 @@
         self.assertEqual({r1: (NULL_REVISION,)}, parents)
         self.assertEqual(
             [('call_with_body_bytes_expecting_body',
-              'Repository.get_parent_map', ('quack/', r2), '\n\n0')],
+              'Repository.get_parent_map', ('quack/', 'include-missing:', r2),
+              '\n\n0')],
             client._calls)
         repo.unlock()
         # now we call again, and it should use the second response.
@@ -1587,9 +1589,11 @@
         self.assertEqual({r1: (NULL_REVISION,)}, parents)
         self.assertEqual(
             [('call_with_body_bytes_expecting_body',
-              'Repository.get_parent_map', ('quack/', r2), '\n\n0'),
+              'Repository.get_parent_map', ('quack/', 'include-missing:', r2),
+              '\n\n0'),
              ('call_with_body_bytes_expecting_body',
-              'Repository.get_parent_map', ('quack/', r1), '\n\n0'),
+              'Repository.get_parent_map', ('quack/', 'include-missing:', r1),
+              '\n\n0'),
             ],
             client._calls)
         repo.unlock()
@@ -1604,7 +1608,8 @@
         parents = repo.get_parent_map([rev_id])
         self.assertEqual(
             [('call_with_body_bytes_expecting_body',
-              'Repository.get_parent_map', ('quack/', rev_id), '\n\n0'),
+              'Repository.get_parent_map', ('quack/', 'include-missing:',
+              rev_id), '\n\n0'),
              ('disconnect medium',),
              ('call_expecting_body', 'Repository.get_revision_graph',
               ('quack/', ''))],
@@ -1643,6 +1648,57 @@
             errors.UnexpectedSmartServerResponse,
             repo.get_parent_map, ['a-revision-id'])
 
+    def test_get_parent_map_negative_caches_missing_keys(self):
+        self.setup_smart_server_with_call_log()
+        repo = self.make_repository('foo')
+        self.assertIsInstance(repo, RemoteRepository)
+        repo.lock_read()
+        self.addCleanup(repo.unlock)
+        self.reset_smart_call_log()
+        graph = repo.get_graph()
+        self.assertEqual({},
+            graph.get_parent_map(['some-missing', 'other-missing']))
+        self.assertLength(1, self.hpss_calls)
+        # No call if we repeat this
+        self.reset_smart_call_log()
+        graph = repo.get_graph()
+        self.assertEqual({},
+            graph.get_parent_map(['some-missing', 'other-missing']))
+        self.assertLength(0, self.hpss_calls)
+        # Asking for more unknown keys makes a request.
+        self.reset_smart_call_log()
+        graph = repo.get_graph()
+        self.assertEqual({},
+            graph.get_parent_map(['some-missing', 'other-missing',
+                'more-missing']))
+        self.assertLength(1, self.hpss_calls)
+
+    def test_get_parent_map_gets_ghosts_from_result(self):
+        # asking for a revision should negatively cache close ghosts in its
+        # ancestry.
+        self.setup_smart_server_with_call_log()
+        tree = self.make_branch_and_memory_tree('foo')
+        tree.lock_write()
+        try:
+            builder = treebuilder.TreeBuilder()
+            builder.start_tree(tree)
+            builder.build([])
+            builder.finish_tree()
+            tree.set_parent_ids(['non-existant'], allow_leftmost_as_ghost=True)
+            rev_id = tree.commit('')
+        finally:
+            tree.unlock()
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        repo = tree.branch.repository
+        self.assertIsInstance(repo, RemoteRepository)
+        # ask for rev_id
+        repo.get_parent_map([rev_id])
+        self.reset_smart_call_log()
+        # Now asking for rev_id's ghost parent should not make calls
+        self.assertEqual({}, repo.get_parent_map(['non-existant']))
+        self.assertLength(0, self.hpss_calls)
+
 
 class TestGetParentMapAllowsNew(tests.TestCaseWithTransport):
 
@@ -2363,7 +2419,7 @@
         rev_ord, expected_revs = self.get_ordered_revs('knit', 'topological')
         self.assertEqual(expected_revs, rev_ord)
         # Getting topological sort requires VFS calls still
-        self.assertLength(14, self.hpss_calls)
+        self.assertLength(12, self.hpss_calls)
 
     def test_stacked_get_stream_groupcompress(self):
         # Repository._get_source.get_stream() from a stacked repository with

=== modified file 'bzrlib/tests/test_smart.py'
--- a/bzrlib/tests/test_smart.py	2009-03-24 06:40:26 +0000
+++ b/bzrlib/tests/test_smart.py	2009-03-24 23:19:12 +0000
@@ -931,11 +931,23 @@
 
         self.assertEqual(None,
             request.execute('', 'missing-id'))
-        # Note that it returns a body (of '' bzipped).
+        # Note that it returns a body that is bzipped.
         self.assertEqual(
             SuccessfulSmartServerResponse(('ok', ), bz2.compress('')),
             request.do_body('\n\n0\n'))
 
+    def test_trivial_include_missing(self):
+        backing = self.get_transport()
+        request = smart.repository.SmartServerRepositoryGetParentMap(backing)
+        tree = self.make_branch_and_memory_tree('.')
+
+        self.assertEqual(None,
+            request.execute('', 'missing-id', 'include-missing:'))
+        self.assertEqual(
+            SuccessfulSmartServerResponse(('ok', ),
+                bz2.compress('missing:missing-id')),
+            request.do_body('\n\n0\n'))
+
 
 class TestSmartServerRepositoryGetRevisionGraph(tests.TestCaseWithMemoryTransport):
 




More information about the bazaar-commits mailing list