Rev 3791: Add RepositoryPackCollection.reload_pack_names() in http://bzr.arbash-meinel.com/branches/bzr/1.9-dev/pack_retry_153786
John Arbash Meinel
john at arbash-meinel.com
Thu Oct 23 20:40:21 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.9-dev/pack_retry_153786
------------------------------------------------------------
revno: 3791
revision-id: john at arbash-meinel.com-20081023193959-xdytlck4ym0vr6lt
parent: john at arbash-meinel.com-20081023191146-jiz88y5og2koaahs
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pack_retry_153786
timestamp: Thu 2008-10-23 14:39:59 -0500
message:
Add RepositoryPackCollection.reload_pack_names()
This refactors the _save_pack_names code into helper functions, and
exposes a public member for outside entities to call to refresh the
list. It updates the internal names and also re-updates the various
indexes.
-------------- next part --------------
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-10-01 05:40:45 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-10-23 19:39:59 +0000
@@ -1553,51 +1553,55 @@
"""Release the mutex around the pack-names index."""
self.repo.control_files.unlock()
- def _save_pack_names(self, clear_obsolete_packs=False):
- """Save the list of packs.
-
- This will take out the mutex around the pack names list for the
- duration of the method call. If concurrent updates have been made, a
- three-way merge between the current list and the current in memory list
- is performed.
-
- :param clear_obsolete_packs: If True, clear out the contents of the
- obsolete_packs directory.
- """
- self.lock_names()
- try:
- builder = self._index_builder_class()
- # load the disk nodes across
- disk_nodes = set()
- for index, key, value in self._iter_disk_pack_index():
- disk_nodes.add((key, value))
- # do a two-way diff against our original content
- current_nodes = set()
- for name, sizes in self._names.iteritems():
- current_nodes.add(
- ((name, ), ' '.join(str(size) for size in sizes)))
- deleted_nodes = self._packs_at_load - current_nodes
- new_nodes = current_nodes - self._packs_at_load
- disk_nodes.difference_update(deleted_nodes)
- disk_nodes.update(new_nodes)
- # TODO: handle same-name, index-size-changes here -
- # e.g. use the value from disk, not ours, *unless* we're the one
- # changing it.
- for key, value in disk_nodes:
- builder.add_node(key, value)
- self.transport.put_file('pack-names', builder.finish(),
- mode=self.repo.bzrdir._get_file_mode())
- # move the baseline forward
- self._packs_at_load = disk_nodes
- if clear_obsolete_packs:
- self._clear_obsolete_packs()
- finally:
- self._unlock_names()
- # synchronise the memory packs list with what we just wrote:
+ def _diff_pack_names(self):
+ """Read the pack names from disk, and compare it to the one in memory.
+
+ :return: (disk_nodes, deleted_nodes, new_nodes)
+ disk_nodes The final set of nodes that should be referenced
+ deleted_nodes Nodes which have been removed from when we started
+ new_nodes Nodes that are newly introduced
+ """
+ # load the disk nodes across
+ disk_nodes = set()
+ for index, key, value in self._iter_disk_pack_index():
+ disk_nodes.add((key, value))
+
+ # do a two-way diff against our original content
+ current_nodes = set()
+ for name, sizes in self._names.iteritems():
+ current_nodes.add(
+ ((name, ), ' '.join(str(size) for size in sizes)))
+
+ # Packs no longer present in the repository, which were present when we
+ # locked the repository
+ deleted_nodes = self._packs_at_load - current_nodes
+ # Packs which this process is adding
+ new_nodes = current_nodes - self._packs_at_load
+
+ # Update the disk_nodes set to include the ones we are adding, and
+ # remove the ones which were removed by someone else
+ disk_nodes.difference_update(deleted_nodes)
+ disk_nodes.update(new_nodes)
+
+ return disk_nodes, deleted_nodes, new_nodes
+
+ def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
+ """Given the correct set of pack files, update our saved info.
+
+ :return: (removed, added, modified)
+ removed pack names removed from self._names
+ added pack names added to self._names
+ modified pack names that had changed value
+ """
+ removed = []
+ added = []
+ modified = []
+ ## self._packs_at_load = disk_nodes
new_names = dict(disk_nodes)
# drop no longer present nodes
for pack in self.all_packs():
if (pack.name,) not in new_names:
+ removed.append(pack.name)
self._remove_pack_from_memory(pack)
# add new nodes/refresh existing ones
for key, value in disk_nodes:
@@ -1617,10 +1621,57 @@
self._remove_pack_from_memory(self.get_pack_by_name(name))
self._names[name] = sizes
self.get_pack_by_name(name)
+ modified.append(name)
else:
# new
self._names[name] = sizes
self.get_pack_by_name(name)
+ added.append(name)
+ return removed, added, modified
+
+ def _save_pack_names(self, clear_obsolete_packs=False):
+ """Save the list of packs.
+
+ This will take out the mutex around the pack names list for the
+ duration of the method call. If concurrent updates have been made, a
+ three-way merge between the current list and the current in memory list
+ is performed.
+
+ :param clear_obsolete_packs: If True, clear out the contents of the
+ obsolete_packs directory.
+ """
+ self.lock_names()
+ try:
+ builder = self._index_builder_class()
+ disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
+ # TODO: handle same-name, index-size-changes here -
+ # e.g. use the value from disk, not ours, *unless* we're the one
+ # changing it.
+ for key, value in disk_nodes:
+ builder.add_node(key, value)
+ self.transport.put_file('pack-names', builder.finish(),
+ mode=self.repo.bzrdir._get_file_mode())
+ # move the baseline forward
+ self._packs_at_load = disk_nodes
+ if clear_obsolete_packs:
+ self._clear_obsolete_packs()
+ finally:
+ self._unlock_names()
+ # synchronise the memory packs list with what we just wrote:
+ self._syncronize_pack_names_from_disk_nodes(disk_nodes)
+
+ def reload_pack_names(self):
+ """Sync our pack listing with what is present in the repository.
+
+ This should be called when we find out that something we thought was
+ present is now missing. This happens when another process re-packs the
+ repository, etc.
+ """
+ # This is functionally similar to _save_pack_names, but we don't write
+ # out the new value.
+ disk_nodes, _, _ = self._diff_pack_names()
+ self._packs_at_load = disk_nodes
+ return self._syncronize_pack_names_from_disk_nodes(disk_nodes)
def _clear_obsolete_packs(self):
"""Delete everything from the obsolete-packs directory.
=== modified file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py 2008-10-23 19:11:46 +0000
+++ b/bzrlib/tests/test_pack_repository.py 2008-10-23 19:39:59 +0000
@@ -398,23 +398,21 @@
def test_concurrent_pack_triggers_reload(self):
# create 2 packs, which we will then collapse
tree = self.make_branch_and_tree('tree')
- rev1 = tree.commit('one')
- rev2 = tree.commit('two')
- r1 = tree.branch.repository
- r2 = repository.Repository.open('tree')
- r1.lock_write()
+ tree.lock_write()
try:
+ rev1 = tree.commit('one')
+ rev2 = tree.commit('two')
+ r2 = repository.Repository.open('tree')
r2.lock_read()
try:
# Now r2 has read the pack-names file, but will need to reload
# it after r1 has repacked
- r1.pack()
- self.assertEqual({rev2:(rev1,)},
- r2.get_parent_map([rev2]))
+ tree.branch.repository.pack()
+ self.assertEqual({rev2:(rev1,)}, r2.get_parent_map([rev2]))
finally:
r2.unlock()
finally:
- r1.unlock()
+ tree.unlock()
def test_lock_write_does_not_physically_lock(self):
repo = self.make_repository('.', format=self.get_format())
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2008-09-29 07:03:55 +0000
+++ b/bzrlib/tests/test_repository.py 2008-10-23 19:39:59 +0000
@@ -917,6 +917,55 @@
# and the same instance should be returned on successive calls.
self.assertTrue(pack_1 is packs.get_pack_by_name(name))
+ def test_reload_pack_names_new_entry(self):
+ tree = self.make_branch_and_tree('.')
+ tree.lock_write()
+ self.addCleanup(tree.unlock)
+ rev1 = tree.commit('one')
+ rev2 = tree.commit('two')
+ r = repository.Repository.open('.')
+ r.lock_read()
+ self.addCleanup(r.unlock)
+ packs = r._pack_collection
+ packs.ensure_loaded()
+ names = packs.names()
+ # Add a new pack file into the repository
+ rev3 = tree.commit('three')
+ new_names = tree.branch.repository._pack_collection.names()
+ new_name = set(new_names).difference(names)
+ self.assertEqual(1, len(new_name))
+ new_name = new_name.pop()
+ # The old collection hasn't noticed yet
+ self.assertEqual(names, packs.names())
+ # [removed], [added], [modified]
+ self.assertEqual(([], [new_name], []), packs.reload_pack_names())
+ self.assertEqual(new_names, packs.names())
+ # And the repository can access the new revision
+ self.assertEqual({rev3:(rev2,)}, r.get_parent_map([rev3]))
+
+ def test_reload_pack_names_added_and_removed(self):
+ tree = self.make_branch_and_tree('.')
+ tree.lock_write()
+ self.addCleanup(tree.unlock)
+ rev1 = tree.commit('one')
+ rev2 = tree.commit('two')
+ r = repository.Repository.open('.')
+ r.lock_read()
+ self.addCleanup(r.unlock)
+ packs = r._pack_collection
+ packs.ensure_loaded()
+ names = packs.names()
+ # Now repack the whole thing
+ tree.branch.repository.pack()
+ new_names = tree.branch.repository._pack_collection.names()
+ # The other collection hasn't noticed yet
+ self.assertEqual(names, packs.names())
+ removed, added, modified = packs.reload_pack_names()
+ self.assertEqual(new_names, packs.names())
+ self.assertEqual((names, new_names, []),
+ (sorted(removed), sorted(added), sorted(modified)))
+ self.assertEqual({rev2:(rev1,)}, r.get_parent_map([rev2]))
+
class TestPack(TestCaseWithTransport):
"""Tests for the Pack object."""
More information about the bazaar-commits
mailing list