Rev 3231: Implement basic repository supporting external references. in http://people.ubuntu.com/~robertc/baz2.0/shallow-branch

Fri Feb 22 01:10:39 GMT 2008

At http://people.ubuntu.com/~robertc/baz2.0/shallow-branch

------------------------------------------------------------
revno: 3231
revision-id:robertc at robertcollins.net-20080222011034-qsrzuxugew1k0eb9
parent: robertc at robertcollins.net-20080221230453-1l0e7r26sbmjkz11
parent: robertc at robertcollins.net-20080222002055-30veteyvioimdvfs
committer: Robert Collins <robertc at robertcollins.net>
branch nick: Development1
timestamp: Fri 2008-02-22 12:10:34 +1100
message:
  Implement basic repository supporting external references.
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/repository_external_reference_implementations/test_all_revision_ids.py test_all_revision_id-20080220041905-1j2g4lyz3c6h34v4-2
  bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
  bzrlib/tests/test_selftest.py  test_selftest.py-20051202044319-c110a115d8c0456a
    ------------------------------------------------------------
    revno: 3227.1.5
    revision-id:robertc at robertcollins.net-20080222002055-30veteyvioimdvfs
    parent: robertc at robertcollins.net-20080221230321-v8cm2p91zd3vbvyv
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: external_reference_tests
    timestamp: Fri 2008-02-22 11:20:55 +1100
    message:
      Update repository parameterisation tests to match refactoring.
    modified:
      bzrlib/tests/repository_external_reference_implementations/test_all_revision_ids.py test_all_revision_id-20080220041905-1j2g4lyz3c6h34v4-2
      bzrlib/tests/test_selftest.py  test_selftest.py-20051202044319-c110a115d8c0456a
=== modified file 'bzrlib/repofmt/pack_repo.py'

--- a/bzrlib/repofmt/pack_repo.py	2008-02-20 00:42:39 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-02-22 01:10:34 +0000
@@ -59,6 +59,7 @@
 from bzrlib.repofmt.knitrepo import KnitRepository
 from bzrlib.repository import (
     CommitBuilder,
+    InterRepository,
     MetaDirRepository,
     MetaDirRepositoryFormat,
     RootCommitBuilder,
@@ -209,7 +210,7 @@
 
     def __repr__(self):
         return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
-            id(self), self.transport, self.name)
+            id(self), self.pack_transport, self.name)
 
 
 class NewPack(Pack):
@@ -1197,8 +1198,10 @@
         :return: True if packing took place.
         """
         # XXX: Should not be needed when the management of indices is sane.
-        total_revisions = self.revision_index.combined_index.key_count()
-        total_packs = len(self._names)
+        
+        total_revisions = self._local_revision_index().key_count()
+        total_packs = len(list(collection for collection, sizes in
+            self._names.values() if collection is self))
         if self._max_pack_count(total_revisions) >= total_packs:
             return False
         # XXX: the following may want to be a class, to pack with a given
@@ -1210,6 +1213,8 @@
         pack_distribution = self.pack_distribution(total_revisions)
         existing_packs = []
         for pack in self.all_packs():
+            if self._names[pack.name][0] is not self:
+                continue
             revision_count = pack.get_revision_count()
             if revision_count == 0:
                 # revision less packs are not generated by normal operation,
@@ -1250,6 +1255,15 @@
         for revision_count, packs in pack_operations:
             self._obsolete_packs(packs)
 
+    def _local_revision_index(self):
+        """Return a combined index for all the local packs only."""
+        index = CombinedGraphIndex([])
+        for name, (collection, sizes) in self._names.items():
+            if collection is not self:
+                continue
+            index.insert_index(0, self.get_pack_by_name(name).revision_index)
+        return index
+
     def lock_names(self):
         """Acquire the mutex around the pack-names index.
         
@@ -1261,13 +1275,14 @@
     def pack(self):
         """Pack the pack collection totally."""
         self.ensure_loaded()
-        total_packs = len(self._names)
+        total_packs = len(list(collection for collection, sizes in
+            self._names.values() if collection is self))
         if total_packs < 2:
             # This is arguably wrong because we might not be optimal, but for
             # now lets leave it in. (e.g. reconcile -> one pack. But not
             # optimal.
             return
-        total_revisions = self.revision_index.combined_index.key_count()
+        total_revisions = self._local_revision_index().key_count()
         # XXX: the following may want to be a class, to pack with a given
         # policy.
         mutter('Packing repository %s, which has %d pack files, '
@@ -1277,6 +1292,8 @@
         pack_distribution = [1]
         pack_operations = [[0, []]]
         for pack in self.all_packs():
+            if self._names[pack.name][0] is not self:
+                continue
             pack_operations[-1][0] += pack.get_revision_count()
             pack_operations[-1][1].append(pack)
         self._execute_pack_operations(pack_operations, OptimisingPacker)
@@ -1330,14 +1347,28 @@
             raise errors.ObjectNotLocked(self.repo)
         if self._names is None:
             self._names = {}
+            # Get fallback repository packs.
+            # TODO: we really should try local packs first and thus order the
+            # indices appropriately.
+            self._names.update(self.fallback_packs_details())
+            # Now the local packs.
             self._packs_at_load = set()
             for index, key, value in self._iter_disk_pack_index():
                 name = key[0]
-                self._names[name] = self._parse_index_sizes(value)
+                self._names[name] = (self, self._parse_index_sizes(value))
                 self._packs_at_load.add((key, value))
         # populate all the metadata.
         self.all_packs()
 
+    def fallback_packs_details(self):
+        """Return a dict of name -> (collection, index) size tuples."""
+        result = {}
+        for repo in self.repo._fallback_repositories:
+            collection = repo._pack_collection
+            for index, key, value in collection._iter_disk_pack_index():
+                result[key[0]] = (collection, self._parse_index_sizes(value))
+        return result
+
     def _parse_index_sizes(self, value):
         """Parse a string of index sizes."""
         return tuple([int(digits) for digits in value.split(' ')])
@@ -1345,17 +1376,20 @@
     def get_pack_by_name(self, name):
         """Get a Pack object by name.
 
+        If previously accessed this returns from the self._packs_by_name cache.
+
         :param name: The name of the pack - e.g. '123456'
         :return: A Pack object.
         """
         try:
             return self._packs_by_name[name]
         except KeyError:
+            collection = self._names[name][0]
             rev_index = self._make_index(name, '.rix')
             inv_index = self._make_index(name, '.iix')
             txt_index = self._make_index(name, '.tix')
             sig_index = self._make_index(name, '.six')
-            result = ExistingPack(self._pack_transport, name, rev_index,
+            result = ExistingPack(collection._pack_transport, name, rev_index,
                 inv_index, txt_index, sig_index)
             self.add_pack_to_memory(result)
             return result
@@ -1370,7 +1404,7 @@
         if a_new_pack.name in self._names:
             raise errors.BzrError(
                 'Pack %r already exists in %s' % (a_new_pack.name, self))
-        self._names[a_new_pack.name] = tuple(a_new_pack.index_sizes)
+        self._names[a_new_pack.name] = self, tuple(a_new_pack.index_sizes)
         self.add_pack_to_memory(a_new_pack)
 
     def _iter_disk_pack_index(self):
@@ -1384,11 +1418,12 @@
                 ).iter_all_entries()
 
     def _make_index(self, name, suffix):
+        collection = self._names[name][0]
         size_offset = self._suffix_offsets[suffix]
         index_name = name + suffix
-        index_size = self._names[name][size_offset]
+        index_size = self._names[name][1][size_offset]
         return GraphIndex(
-            self._index_transport, index_name, index_size)
+            collection._index_transport, index_name, index_size)
 
     def _max_pack_count(self, total_revisions):
         """Return the maximum number of packs to use for total revisions.
@@ -1563,7 +1598,9 @@
                 disk_nodes.add((key, value))
             # do a two-way diff against our original content
             current_nodes = set()
-            for name, sizes in self._names.iteritems():
+            for name, (collection, sizes) in self._names.iteritems():
+                if collection is not self:
+                    continue
                 current_nodes.add(
                     ((name, ), ' '.join(str(size) for size in sizes)))
             deleted_nodes = self._packs_at_load - current_nodes
@@ -1586,18 +1623,18 @@
         finally:
             self._unlock_names()
         # synchronise the memory packs list with what we just wrote:
-        new_names = dict(disk_nodes)
+        new_names = self.fallback_packs_details()
+        for key, value in disk_nodes:
+            new_names[key[0]] = self, self._parse_index_sizes(value)
         # drop no longer present nodes
         for pack in self.all_packs():
-            if (pack.name,) not in new_names:
+            if pack.name not in new_names:
                 self._remove_pack_from_memory(pack)
         # add new nodes/refresh existing ones
-        for key, value in disk_nodes:
-            name = key[0]
-            sizes = self._parse_index_sizes(value)
+        for name, (collection, sizes) in new_names.iteritems():
             if name in self._names:
                 # existing
-                if sizes != self._names[name]:
+                if sizes != self._names[name][1]:
                     # the pack for name has had its indices replaced - rare but
                     # important to handle. XXX: probably can never happen today
                     # because the three-way merge code above does not handle it
@@ -1611,7 +1648,7 @@
                     self.get_pack_by_name(name)
             else:
                 # new
-                self._names[name] = sizes
+                self._names[name] = collection, sizes
                 self.get_pack_by_name(name)
 
     def _start_write_group(self):
@@ -1856,6 +1893,15 @@
             return 'w'
         return 'r'
 
+    def _add_fallback_repository_check(self, repository):
+        """Check that this repository can fallback to repository safely.
+        
+        :param repository: A repository to fallback to.
+        :return: True if the repositories can stack ok.
+        """
+        return (InterRepository._same_model(self, repository) and
+            self._format.__class__ == repository._format.__class__)
+
     def _find_inconsistent_revision_parents(self):
         """Find revisions with incorrectly cached parents.
 

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-02-20 00:42:39 +0000
+++ b/bzrlib/repository.py	2008-02-22 01:10:34 +0000
@@ -502,8 +502,18 @@
         """
         if not self._format.supports_external_lookups:
             raise errors.UnstackableRepositoryFormat(self._format, self.base)
+        if not self._add_fallback_repository_check(repository):
+            raise errors.IncompatibleRepositories(self, repository)
         self._fallback_repositories.append(repository)
 
+    def _add_fallback_repository_check(self, repository):
+        """Check that this repository can fallback to repository safely.
+        
+        :param repository: A repository to fallback to.
+        :return: True if the repositories can stack ok.
+        """
+        return InterRepository._same_model(self, repository)
+
     def add_inventory(self, revision_id, inv, parents):
         """Add the inventory inv to the repository as revision_id.
         
@@ -579,10 +589,7 @@
         """
         if 'evil' in debug.debug_flags:
             mutter_callsite(2, "all_revision_ids is linear with history.")
-        all_ids = set(self._all_revision_ids())
-        for repository in self._fallback_repositories:
-            all_ids.update(repository.all_revision_ids())
-        return all_ids
+        return self._all_revision_ids()
 
     def _all_revision_ids(self):
         """Returns a list of all the revision ids in the repository. 

=== modified file 'bzrlib/tests/repository_external_reference_implementations/test_all_revision_ids.py'
--- a/bzrlib/tests/repository_external_reference_implementations/test_all_revision_ids.py	2008-02-20 04:45:00 +0000
+++ b/bzrlib/tests/repository_external_reference_implementations/test_all_revision_ids.py	2008-02-22 00:20:55 +0000
@@ -27,13 +27,13 @@
     def test_all_revision_ids_empty(self):
         base = self.make_repository('base')
         repo = self.make_referring('referring', 'base')
-        self.assertEqual(set([]), repo.all_revision_ids())
+        self.assertEqual(set([]), set(repo.all_revision_ids()))
 
     def test_all_revision_ids_from_base(self):
         tree = self.make_branch_and_tree('base')
         revid = tree.commit('one')
         repo = self.make_referring('referring', 'base')
-        self.assertEqual(set([revid]), repo.all_revision_ids())
+        self.assertEqual(set([revid]), set(repo.all_revision_ids()))
 
     def test_all_revision_ids_from_repo(self):
         tree = self.make_branch_and_tree('spare')
@@ -41,7 +41,7 @@
         base = self.make_repository('base')
         repo = self.make_referring('referring', 'base')
         repo.fetch(tree.branch.repository, revid)
-        self.assertEqual(set([revid]), repo.all_revision_ids())
+        self.assertEqual(set([revid]), set(repo.all_revision_ids()))
 
     def test_all_revision_ids_from_both(self):
         tree = self.make_branch_and_tree('spare')
@@ -50,7 +50,7 @@
         revid2 = base_tree.commit('two')
         repo = self.make_referring('referring', 'base')
         repo.fetch(tree.branch.repository, revid)
-        self.assertEqual(set([revid, revid2]), repo.all_revision_ids())
+        self.assertEqual(set([revid, revid2]), set(repo.all_revision_ids()))
 
     def test_duplicate_ids_do_not_affect_length(self):
         tree = self.make_branch_and_tree('spare')
@@ -59,6 +59,6 @@
         repo = self.make_referring('referring', 'base')
         repo.fetch(tree.branch.repository, revid)
         base.fetch(tree.branch.repository, revid)
-        self.assertEqual(set([revid]), repo.all_revision_ids())
+        self.assertEqual(set([revid]), set(repo.all_revision_ids()))
         self.assertEqual(1, len(repo.all_revision_ids()))
 

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2008-02-20 00:42:39 +0000
+++ b/bzrlib/tests/test_repository.py	2008-02-22 01:10:34 +0000
@@ -1234,7 +1234,84 @@
             t.get('format').read())
 
 
-class TestDevelopment1(TestKnitPackNoSubtrees):
+class TestExternalDevelopment1(object):
+
+    def test_error_mismatched_format(self):
+        # this first implementation uses the internal pack list from each
+        # repository and thus needs the same format in both sides.
+        base = self.make_repository('base', format='pack-0.92')
+        repo = self.make_repository('repo', format=self.get_format())
+        self.assertRaises(errors.IncompatibleRepositories,
+            repo.add_fallback_repository, base)
+
+    def test_ensure_loaded_gets_other_repositories(self):
+        tree = self.make_branch_and_tree('base', format=self.get_format())
+        repo = self.make_repository('repo', format=self.get_format())
+        repo.add_fallback_repository(tree.branch.repository)
+        tree.commit('foo')
+        repo.lock_read()
+        self.addCleanup(repo.unlock)
+        repo._pack_collection.ensure_loaded()
+        packs_repo = repo._pack_collection.all_packs()
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        packs_base = tree.branch.repository._pack_collection.all_packs()
+        self.assertEqual(packs_base, packs_repo)
+        self.assertEqual(1, len(packs_base))
+
+    def test_adding_pack_does_not_record_pack_names_from_other_repositories(self):
+        base = self.make_branch_and_tree('base', format=self.get_format())
+        base.commit('foo')
+        referencing = self.make_branch_and_tree('repo', format=self.get_format())
+        referencing.branch.repository.add_fallback_repository(base.branch.repository)
+        referencing.commit('bar')
+        new_instance = referencing.bzrdir.open_repository()
+        new_instance.lock_read()
+        self.addCleanup(new_instance.unlock)
+        new_instance._pack_collection.ensure_loaded()
+        self.assertEqual(1, len(new_instance._pack_collection.all_packs()))
+
+    def test_autopack_only_considers_main_repo_packs(self):
+        base = self.make_branch_and_tree('base', format=self.get_format())
+        base.commit('foo')
+        tree = self.make_branch_and_tree('repo', format=self.get_format())
+        tree.branch.repository.add_fallback_repository(base.branch.repository)
+        trans = tree.branch.repository.bzrdir.get_repository_transport(None)
+        # This test could be a little cheaper by replacing the packs
+        # attribute on the repository to allow a different pack distribution
+        # and max packs policy - so we are checking the policy is honoured
+        # in the test. But for now 11 commits is not a big deal in a single
+        # test.
+        for x in range(9):
+            tree.commit('commit %s' % x)
+        # there should be 9 packs:
+        index = GraphIndex(trans, 'pack-names', None)
+        self.assertEqual(9, len(list(index.iter_all_entries())))
+        # committing one more should coalesce to 1 of 10.
+        tree.commit('commit triggering pack')
+        index = GraphIndex(trans, 'pack-names', None)
+        self.assertEqual(1, len(list(index.iter_all_entries())))
+        # packing should not damage data
+        tree = tree.bzrdir.open_workingtree()
+        check_result = tree.branch.repository.check(
+            [tree.branch.last_revision()])
+        # We should have 50 (10x5) files in the obsolete_packs directory.
+        obsolete_files = list(trans.list_dir('obsolete_packs'))
+        self.assertFalse('foo' in obsolete_files)
+        self.assertFalse('bar' in obsolete_files)
+        self.assertEqual(50, len(obsolete_files))
+        # XXX: Todo check packs obsoleted correctly - old packs and indices
+        # in the obsolete_packs directory.
+        large_pack_name = list(index.iter_all_entries())[0][1][0]
+        # finally, committing again should not touch the large pack.
+        tree.commit('commit not triggering pack')
+        index = GraphIndex(trans, 'pack-names', None)
+        self.assertEqual(2, len(list(index.iter_all_entries())))
+        pack_names = [node[1][0] for node in index.iter_all_entries()]
+        self.assertTrue(large_pack_name in pack_names)
+
+
+class TestDevelopment1(TestKnitPackNoSubtrees, TestExternalDevelopment1):
 
     def get_format(self):
         return bzrdir.format_registry.make_bzrdir(
@@ -1250,7 +1327,7 @@
         self.assertTrue(repo._format.supports_external_lookups)
 
 
-class TestDevelopment1Subtree(TestKnitPackNoSubtrees):
+class TestDevelopment1Subtree(TestKnitPackNoSubtrees, TestExternalDevelopment1):
 
     def get_format(self):
         return bzrdir.format_registry.make_bzrdir(
@@ -1429,14 +1506,11 @@
         name = packs.names()[0]
         pack_1 = packs.get_pack_by_name(name)
         # the pack should be correctly initialised
-        rev_index = GraphIndex(packs._index_transport, name + '.rix',
-            packs._names[name][0])
-        inv_index = GraphIndex(packs._index_transport, name + '.iix',
-            packs._names[name][1])
-        txt_index = GraphIndex(packs._index_transport, name + '.tix',
-            packs._names[name][2])
-        sig_index = GraphIndex(packs._index_transport, name + '.six',
-            packs._names[name][3])
+        sizes = packs._names[name][1]
+        rev_index = GraphIndex(packs._index_transport, name + '.rix', sizes[0])
+        inv_index = GraphIndex(packs._index_transport, name + '.iix', sizes[1])
+        txt_index = GraphIndex(packs._index_transport, name + '.tix', sizes[2])
+        sig_index = GraphIndex(packs._index_transport, name + '.six', sizes[3])
         self.assertEqual(pack_repo.ExistingPack(packs._pack_transport,
             name, rev_index, inv_index, txt_index, sig_index), pack_1)
         # and the same instance should be returned on successive calls.

=== modified file 'bzrlib/tests/test_selftest.py'
--- a/bzrlib/tests/test_selftest.py	2008-01-29 08:21:19 +0000
+++ b/bzrlib/tests/test_selftest.py	2008-02-22 00:20:55 +0000
@@ -230,35 +230,16 @@
             adapter.scenarios)
 
 
-class TestRepositoryProviderAdapter(TestCase):
+class TestRepositoryParameterisation(TestCase):
     """A group of tests that test the repository implementation test adapter."""
 
-    def test_constructor(self):
-        # check that constructor parameters are passed through to the
-        # scenarios.
-        from bzrlib.tests.repository_implementations import RepositoryTestProviderAdapter
-        server1 = "a"
-        server2 = "b"
-        formats = [("c", "C"), ("d", "D")]
-        adapter = RepositoryTestProviderAdapter(server1, server2, formats)
-        self.assertEqual([
-            ('str',
-             {'bzrdir_format': 'C',
-              'repository_format': 'c',
-              'transport_readonly_server': 'b',
-              'transport_server': 'a'}),
-            ('str',
-             {'bzrdir_format': 'D',
-              'repository_format': 'd',
-              'transport_readonly_server': 'b',
-              'transport_server': 'a'})],
-            adapter.scenarios)
-
     def test_setting_vfs_transport(self):
         """The vfs_transport_factory can be set optionally."""
-        from bzrlib.tests.repository_implementations import RepositoryTestProviderAdapter
-        formats = [("a", "b"), ("c", "d")]
-        adapter = RepositoryTestProviderAdapter(None, None, formats,
+        from bzrlib.tests.repository_implementations import formats_to_scenarios
+        scenarios = formats_to_scenarios(
+            [("a", "b"), ("c", "d")],
+            None,
+            None,
             vfs_transport_factory="vfs")
         self.assertEqual([
             ('str',
@@ -273,17 +254,17 @@
               'transport_readonly_server': None,
               'transport_server': None,
               'vfs_transport_factory': 'vfs'})],
-            adapter.scenarios)
+            scenarios)
 
     def test_formats_to_scenarios(self):
         """The adapter can generate all the scenarios needed."""
-        from bzrlib.tests.repository_implementations import RepositoryTestProviderAdapter
-        no_vfs_adapter = RepositoryTestProviderAdapter("server", "readonly",
-            [], None)
-        vfs_adapter = RepositoryTestProviderAdapter("server", "readonly",
-            [], vfs_transport_factory="vfs")
+        from bzrlib.tests.repository_implementations import formats_to_scenarios
+        formats = [("c", "C"), (1, "D")]
+        no_vfs_scenarios = formats_to_scenarios(formats, "server", "readonly",
+            None)
+        vfs_scenarios = formats_to_scenarios(formats, "server", "readonly",
+            vfs_transport_factory="vfs")
         # no_vfs generate scenarios without vfs_transport_factor
-        formats = [("c", "C"), (1, "D")]
         self.assertEqual([
             ('str',
              {'bzrdir_format': 'C',
@@ -295,7 +276,7 @@
               'repository_format': 1,
               'transport_readonly_server': 'readonly',
               'transport_server': 'server'})],
-            no_vfs_adapter.formats_to_scenarios(formats))
+            no_vfs_scenarios)
         self.assertEqual([
             ('str',
              {'bzrdir_format': 'C',
@@ -309,7 +290,7 @@
               'transport_readonly_server': 'readonly',
               'transport_server': 'server',
               'vfs_transport_factory': 'vfs'})],
-            vfs_adapter.formats_to_scenarios(formats))
+            vfs_scenarios)
 
 
 class TestTestScenarioApplier(TestCase):