Rev 3791: Add RepositoryPackCollection.reload_pack_names() in http://bzr.arbash-meinel.com/branches/bzr/1.9-dev/pack_retry_153786

John Arbash Meinel john at arbash-meinel.com
Thu Oct 23 20:40:21 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.9-dev/pack_retry_153786

------------------------------------------------------------
revno: 3791
revision-id: john at arbash-meinel.com-20081023193959-xdytlck4ym0vr6lt
parent: john at arbash-meinel.com-20081023191146-jiz88y5og2koaahs
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pack_retry_153786
timestamp: Thu 2008-10-23 14:39:59 -0500
message:
  Add RepositoryPackCollection.reload_pack_names()
  
  This refactors the _save_pack_names code into helper functions, and
  exposes a public member for outside entities to call to refresh the
  list. It updates the internal names and also re-updates the various
  indexes.
-------------- next part --------------
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2008-10-01 05:40:45 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-10-23 19:39:59 +0000
@@ -1553,51 +1553,55 @@
         """Release the mutex around the pack-names index."""
         self.repo.control_files.unlock()
 
-    def _save_pack_names(self, clear_obsolete_packs=False):
-        """Save the list of packs.
-
-        This will take out the mutex around the pack names list for the
-        duration of the method call. If concurrent updates have been made, a
-        three-way merge between the current list and the current in memory list
-        is performed.
-
-        :param clear_obsolete_packs: If True, clear out the contents of the
-            obsolete_packs directory.
-        """
-        self.lock_names()
-        try:
-            builder = self._index_builder_class()
-            # load the disk nodes across
-            disk_nodes = set()
-            for index, key, value in self._iter_disk_pack_index():
-                disk_nodes.add((key, value))
-            # do a two-way diff against our original content
-            current_nodes = set()
-            for name, sizes in self._names.iteritems():
-                current_nodes.add(
-                    ((name, ), ' '.join(str(size) for size in sizes)))
-            deleted_nodes = self._packs_at_load - current_nodes
-            new_nodes = current_nodes - self._packs_at_load
-            disk_nodes.difference_update(deleted_nodes)
-            disk_nodes.update(new_nodes)
-            # TODO: handle same-name, index-size-changes here - 
-            # e.g. use the value from disk, not ours, *unless* we're the one
-            # changing it.
-            for key, value in disk_nodes:
-                builder.add_node(key, value)
-            self.transport.put_file('pack-names', builder.finish(),
-                mode=self.repo.bzrdir._get_file_mode())
-            # move the baseline forward
-            self._packs_at_load = disk_nodes
-            if clear_obsolete_packs:
-                self._clear_obsolete_packs()
-        finally:
-            self._unlock_names()
-        # synchronise the memory packs list with what we just wrote:
+    def _diff_pack_names(self):
+        """Read the pack names from disk, and compare it to the one in memory.
+
+        :return: (disk_nodes, deleted_nodes, new_nodes)
+            disk_nodes    The final set of nodes that should be referenced
+            deleted_nodes Nodes which have been removed from when we started
+            new_nodes     Nodes that are newly introduced
+        """
+        # load the disk nodes across
+        disk_nodes = set()
+        for index, key, value in self._iter_disk_pack_index():
+            disk_nodes.add((key, value))
+
+        # do a two-way diff against our original content
+        current_nodes = set()
+        for name, sizes in self._names.iteritems():
+            current_nodes.add(
+                ((name, ), ' '.join(str(size) for size in sizes)))
+
+        # Packs no longer present in the repository, which were present when we
+        # locked the repository
+        deleted_nodes = self._packs_at_load - current_nodes
+        # Packs which this process is adding
+        new_nodes = current_nodes - self._packs_at_load
+
+        # Update the disk_nodes set to include the ones we are adding, and
+        # remove the ones which were removed by someone else
+        disk_nodes.difference_update(deleted_nodes)
+        disk_nodes.update(new_nodes)
+
+        return disk_nodes, deleted_nodes, new_nodes
+
+    def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
+        """Given the correct set of pack files, update our saved info.
+
+        :return: (removed, added, modified)
+            removed     pack names removed from self._names
+            added       pack names added to self._names
+            modified    pack names that had changed value
+        """
+        removed = []
+        added = []
+        modified = []
+        ## self._packs_at_load = disk_nodes
         new_names = dict(disk_nodes)
         # drop no longer present nodes
         for pack in self.all_packs():
             if (pack.name,) not in new_names:
+                removed.append(pack.name)
                 self._remove_pack_from_memory(pack)
         # add new nodes/refresh existing ones
         for key, value in disk_nodes:
@@ -1617,10 +1621,57 @@
                     self._remove_pack_from_memory(self.get_pack_by_name(name))
                     self._names[name] = sizes
                     self.get_pack_by_name(name)
+                    modified.append(name)
             else:
                 # new
                 self._names[name] = sizes
                 self.get_pack_by_name(name)
+                added.append(name)
+        return removed, added, modified
+
+    def _save_pack_names(self, clear_obsolete_packs=False):
+        """Save the list of packs.
+
+        This will take out the mutex around the pack names list for the
+        duration of the method call. If concurrent updates have been made, a
+        three-way merge between the current list and the current in memory list
+        is performed.
+
+        :param clear_obsolete_packs: If True, clear out the contents of the
+            obsolete_packs directory.
+        """
+        self.lock_names()
+        try:
+            builder = self._index_builder_class()
+            disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
+            # TODO: handle same-name, index-size-changes here - 
+            # e.g. use the value from disk, not ours, *unless* we're the one
+            # changing it.
+            for key, value in disk_nodes:
+                builder.add_node(key, value)
+            self.transport.put_file('pack-names', builder.finish(),
+                mode=self.repo.bzrdir._get_file_mode())
+            # move the baseline forward
+            self._packs_at_load = disk_nodes
+            if clear_obsolete_packs:
+                self._clear_obsolete_packs()
+        finally:
+            self._unlock_names()
+        # synchronise the memory packs list with what we just wrote:
+        self._syncronize_pack_names_from_disk_nodes(disk_nodes)
+
+    def reload_pack_names(self):
+        """Sync our pack listing with what is present in the repository.
+
+        This should be called when we find out that something we thought was
+        present is now missing. This happens when another process re-packs the
+        repository, etc.
+        """
+        # This is functionally similar to _save_pack_names, but we don't write
+        # out the new value.
+        disk_nodes, _, _ = self._diff_pack_names()
+        self._packs_at_load = disk_nodes
+        return self._syncronize_pack_names_from_disk_nodes(disk_nodes)
 
     def _clear_obsolete_packs(self):
         """Delete everything from the obsolete-packs directory.

=== modified file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py	2008-10-23 19:11:46 +0000
+++ b/bzrlib/tests/test_pack_repository.py	2008-10-23 19:39:59 +0000
@@ -398,23 +398,21 @@
     def test_concurrent_pack_triggers_reload(self):
         # create 2 packs, which we will then collapse
         tree = self.make_branch_and_tree('tree')
-        rev1 = tree.commit('one')
-        rev2 = tree.commit('two')
-        r1 = tree.branch.repository
-        r2 = repository.Repository.open('tree')
-        r1.lock_write()
+        tree.lock_write()
         try:
+            rev1 = tree.commit('one')
+            rev2 = tree.commit('two')
+            r2 = repository.Repository.open('tree')
             r2.lock_read()
             try:
                 # Now r2 has read the pack-names file, but will need to reload
                 # it after r1 has repacked
-                r1.pack()
-                self.assertEqual({rev2:(rev1,)},
-                                 r2.get_parent_map([rev2]))
+                tree.branch.repository.pack()
+                self.assertEqual({rev2:(rev1,)}, r2.get_parent_map([rev2]))
             finally:
                 r2.unlock()
         finally:
-            r1.unlock()
+            tree.unlock()
 
     def test_lock_write_does_not_physically_lock(self):
         repo = self.make_repository('.', format=self.get_format())

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2008-09-29 07:03:55 +0000
+++ b/bzrlib/tests/test_repository.py	2008-10-23 19:39:59 +0000
@@ -917,6 +917,55 @@
         # and the same instance should be returned on successive calls.
         self.assertTrue(pack_1 is packs.get_pack_by_name(name))
 
+    def test_reload_pack_names_new_entry(self):
+        tree = self.make_branch_and_tree('.')
+        tree.lock_write()
+        self.addCleanup(tree.unlock)
+        rev1 = tree.commit('one')
+        rev2 = tree.commit('two')
+        r = repository.Repository.open('.')
+        r.lock_read()
+        self.addCleanup(r.unlock)
+        packs = r._pack_collection
+        packs.ensure_loaded()
+        names = packs.names()
+        # Add a new pack file into the repository
+        rev3 = tree.commit('three')
+        new_names = tree.branch.repository._pack_collection.names()
+        new_name = set(new_names).difference(names)
+        self.assertEqual(1, len(new_name))
+        new_name = new_name.pop()
+        # The old collection hasn't noticed yet
+        self.assertEqual(names, packs.names())
+        # [removed], [added], [modified]
+        self.assertEqual(([], [new_name], []), packs.reload_pack_names())
+        self.assertEqual(new_names, packs.names())
+        # And the repository can access the new revision
+        self.assertEqual({rev3:(rev2,)}, r.get_parent_map([rev3]))
+
+    def test_reload_pack_names_added_and_removed(self):
+        tree = self.make_branch_and_tree('.')
+        tree.lock_write()
+        self.addCleanup(tree.unlock)
+        rev1 = tree.commit('one')
+        rev2 = tree.commit('two')
+        r = repository.Repository.open('.')
+        r.lock_read()
+        self.addCleanup(r.unlock)
+        packs = r._pack_collection
+        packs.ensure_loaded()
+        names = packs.names()
+        # Now repack the whole thing
+        tree.branch.repository.pack()
+        new_names = tree.branch.repository._pack_collection.names()
+        # The other collection hasn't noticed yet
+        self.assertEqual(names, packs.names())
+        removed, added, modified = packs.reload_pack_names()
+        self.assertEqual(new_names, packs.names())
+        self.assertEqual((names, new_names, []),
+                         (sorted(removed), sorted(added), sorted(modified)))
+        self.assertEqual({rev2:(rev1,)}, r.get_parent_map([rev2]))
+
 
 class TestPack(TestCaseWithTransport):
     """Tests for the Pack object."""



More information about the bazaar-commits mailing list