Rev 2619: Add abort_write_group and wire write_groups into fetch and commit. in http://people.ubuntu.com/~robertc/baz2.0/repo-write-group

Robert Collins robertc at robertcollins.net
Tue Jul 17 17:04:14 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repo-write-group

------------------------------------------------------------
revno: 2619
revision-id: robertc at robertcollins.net-20070717160400-l1ng2wgmldsttebk
parent: robertc at robertcollins.net-20070716110120-9we93ynxjza948vd
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repo-write-group
timestamp: Wed 2007-07-18 02:04:00 +1000
message:
  Add abort_write_group and wire write_groups into fetch and commit.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/branch_implementations/test_branch.py testbranch.py-20050711070244-121d632bc37d7253
  bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
  bzrlib/tests/repository_implementations/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
  bzrlib/tests/repository_implementations/test_write_group.py test_write_group.py-20070716105516-89n34xtogq5frn0m-1
  bzrlib/tests/workingtree_implementations/test_inv.py test_inv.py-20070311221604-ighlq8tbn5xq0kuo-1
=== modified file 'NEWS'
--- a/NEWS	2007-07-16 11:01:20 +0000
+++ b/NEWS	2007-07-17 16:04:00 +0000
@@ -47,12 +47,12 @@
       the null revision, and consider using ``None`` for this purpose
       deprecated.  (Aaron Bentley)
 
-    * New method on Repository - ``start_write_group``, ``end_write_group``
-      and ``is_in_write_group`` - which provide a clean hook point for 
-      transactional Repositories - ones where all the data for a fetch or
-      commit needs to be made atomically available in one step. This allows
-      the write lock to remain while making a series of data insertions.
-      (e.g. data conversion). (Robert Collins)
+    * New methods on Repository - ``start_write_group``,
+      ``commit_write_group``, ``abort_write_group`` and ``is_in_write_group`` -
+      which provide a clean hook point for transactional Repositories - ones
+      where all the data for a fetch or commit needs to be made atomically
+      available in one step. This allows the write lock to remain while making
+      a series of data insertions.  (e.g. data conversion). (Robert Collins)
 
   TESTING:
 

=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2007-07-13 02:23:34 +0000
+++ b/bzrlib/commit.py	2007-07-17 16:04:00 +0000
@@ -300,25 +300,30 @@
             self._emit_progress_set_stage("Collecting changes", show_entries=True)
             self.builder = self.branch.get_commit_builder(self.parents,
                 self.config, timestamp, timezone, committer, revprops, rev_id)
-            self._update_builder_with_changes()
-            self._check_pointless()
-
-            # TODO: Now the new inventory is known, check for conflicts.
-            # ADHB 2006-08-08: If this is done, populate_new_inv should not add
-            # weave lines, because nothing should be recorded until it is known
-            # that commit will succeed.
-            self._emit_progress_set_stage("Saving data locally")
-            self.builder.finish_inventory()
-
-            # Prompt the user for a commit message if none provided
-            message = message_callback(self)
-            assert isinstance(message, unicode), type(message)
-            self.message = message
-            self._escape_commit_message()
-
-            # Add revision data to the local branch
-            self.rev_id = self.builder.commit(self.message)
-            
+            try:
+                self._update_builder_with_changes()
+                self._check_pointless()
+
+                # TODO: Now the new inventory is known, check for conflicts.
+                # ADHB 2006-08-08: If this is done, populate_new_inv should not add
+                # weave lines, because nothing should be recorded until it is known
+                # that commit will succeed.
+                self._emit_progress_set_stage("Saving data locally")
+                self.builder.finish_inventory()
+
+                # Prompt the user for a commit message if none provided
+                message = message_callback(self)
+                assert isinstance(message, unicode), type(message)
+                self.message = message
+                self._escape_commit_message()
+
+                # Add revision data to the local branch
+                self.rev_id = self.builder.commit(self.message)
+            except:
+                # perhaps this should be done by the CommitBuilder ?
+                self.work_tree.branch.repository.abort_write_group()
+                raise
+
             # Upload revision data to the master.
             # this will propagate merged revisions too if needed.
             if self.bound_branch:

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2007-04-20 04:18:37 +0000
+++ b/bzrlib/fetch.py	2007-07-17 16:04:00 +0000
@@ -108,7 +108,14 @@
         try:
             self.to_repository.lock_write()
             try:
-                self.__fetch()
+                self.to_repository.start_write_group()
+                try:
+                    self.__fetch()
+                except:
+                    self.to_repository.abort_write_group()
+                    raise
+                else:
+                    self.to_repository.commit_write_group()
             finally:
                 if self.nested_pb is not None:
                     self.nested_pb.finished()

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2007-07-16 11:01:20 +0000
+++ b/bzrlib/remote.py	2007-07-17 16:04:00 +0000
@@ -249,16 +249,27 @@
         self._lock_count = 0
         self._leave_lock = False
 
-    def end_write_group(self):
-        """End a write group on the decorated repository.
-        
-        Smart methods peform operations in a single step so this api
-        is not really applicable except as a compatability thunk
-        for older plugins that don't use e.g. the CommitBuilder
-        facility.
-        """
-        self._ensure_real()
-        return self._real_repository.end_write_group()
+    def abort_write_group(self):
+        """complete a write group on the decorated repository.
+        
+        Smart methods peform operations in a single step so this api
+        is not really applicable except as a compatability thunk
+        for older plugins that don't use e.g. the CommitBuilder
+        facility.
+        """
+        self._ensure_real()
+        return self._real_repository.abort_write_group()
+
+    def commit_write_group(self):
+        """complete a write group on the decorated repository.
+        
+        Smart methods peform operations in a single step so this api
+        is not really applicable except as a compatability thunk
+        for older plugins that don't use e.g. the CommitBuilder
+        facility.
+        """
+        self._ensure_real()
+        return self._real_repository.commit_write_group()
 
     def _ensure_real(self):
         """Ensure that there is a _real_repository set.

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-07-10 21:18:54 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-07-17 16:04:00 +0000
@@ -275,8 +275,10 @@
         :param revision_id: Optional revision id.
         """
         revision_id = osutils.safe_revision_id(revision_id)
-        return RootCommitBuilder(self, parents, config, timestamp, timezone,
+        result = RootCommitBuilder(self, parents, config, timestamp, timezone,
                                  committer, revprops, revision_id)
+        self.start_write_group()
+        return result
 
 
 class RepositoryFormatKnit(MetaDirRepositoryFormat):

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-07-16 11:01:20 +0000
+++ b/bzrlib/repository.py	2007-07-17 16:04:00 +0000
@@ -81,6 +81,30 @@
         r'.*revision="(?P<revision_id>[^"]+)"'
         )
 
+    def abort_write_group(self):
+        """Commit the contents accrued within the current write group.
+
+        :seealso: start_write_group.
+        """
+        if self._write_group is not self.get_transaction():
+            # has an unlock or relock occured ?
+            raise errors.BzrError('mismatched lock context and write group.')
+        self._abort_write_group()
+        self._write_group = None
+
+    def _abort_write_group(self):
+        """Template method for per-repository write group cleanup.
+        
+        This is called during abort before the write group is considered to be 
+        finished and should cleanup any internal state accrued during the write
+        group. There is no requirement that data handed to the repository be
+        *not* made available - this is not a rollback - but neither should any
+        attempt be made to ensure that data added is fully commited. Abort is
+        invoked when an error has occured so futher disk or network operations
+        may not be possible or may error and if possible should not be
+        attempted.
+        """
+
     @needs_write_lock
     def add_inventory(self, revision_id, inv, parents):
         """Add the inventory inv to the repository as revision_id.
@@ -257,10 +281,13 @@
 
         XXX: this docstring is duplicated in many places, e.g. lockable_files.py
         """
-        return self.control_files.lock_write(token=token)
+        result = self.control_files.lock_write(token=token)
+        self._refresh_data()
+        return result
 
     def lock_read(self):
         self.control_files.lock_read()
+        self._refresh_data()
 
     def get_physical_lock_status(self):
         return self.control_files.get_physical_lock_status()
@@ -361,16 +388,26 @@
         revision_id = osutils.safe_revision_id(revision_id)
         return InterRepository.get(self, destination).copy_content(revision_id)
 
-    def end_write_group(self):
-        """End a write group in the repository.
+    def commit_write_group(self):
+        """Commit the contents accrued within the current write group.
 
         :seealso: start_write_group.
         """
         if self._write_group is not self.get_transaction():
             # has an unlock or relock occured ?
             raise errors.BzrError('mismatched lock context and write group.')
+        self._commit_write_group()
         self._write_group = None
 
+    def _commit_write_group(self):
+        """Template method for per-repository write group cleanup.
+        
+        This is called before the write group is considered to be 
+        finished and should ensure that all data handed to the repository
+        for writing during the write group is safely committed (to the 
+        extent possible considering file system caching etc).
+        """
+
     def fetch(self, source, revision_id=None, pb=None):
         """Fetch the content required to construct revision_id from source.
 
@@ -398,8 +435,10 @@
         :param revision_id: Optional revision id.
         """
         revision_id = osutils.safe_revision_id(revision_id)
-        return _CommitBuilder(self, parents, config, timestamp, timezone,
+        result =_CommitBuilder(self, parents, config, timestamp, timezone,
                               committer, revprops, revision_id)
+        self.start_write_group()
+        return result
 
     def unlock(self):
         if (self.control_files._lock_count == 1 and
@@ -431,16 +470,24 @@
         between file ids and backend store to manage the insertion of data from
         both fetch and commit operations.
 
-        A write lock is required around the start_write_group/end_write_group
+        A write lock is required around the start_write_group/commit_write_group
         for the support of lock-requiring repository formats.
         """
         if not self.is_locked() or self.control_files._lock_mode != 'w':
             raise errors.NotWriteLocked(self)
         if self._write_group:
             raise errors.BzrError('already in a write group')
-        # so we can detect unlock/relock.
+        self._start_write_group()
+        # so we can detect unlock/relock - the write group is now entered.
         self._write_group = self.get_transaction()
 
+    def _start_write_group(self):
+        """Template method for per-repository write group startup.
+        
+        This is called before the write group is considered to be 
+        entered.
+        """
+
     @needs_read_lock
     def sprout(self, to_bzrdir, revision_id=None):
         """Create a descendent repository for new development.
@@ -829,6 +876,16 @@
         reconciler.reconcile()
         return reconciler
 
+    def _refresh_data(self):
+        """Helper called from lock_* to ensure coherency with disk.
+
+        The default implementation does nothing; it is however possible
+        for repositories to maintain loaded indices across multiple locks
+        by checking inside their implementation of this method to see
+        whether their indices are still valid. This depends of course on
+        the disk format being validatable in this manner.
+        """
+
     @needs_read_lock
     def revision_tree(self, revision_id):
         """Return Tree for a revision on this branch.
@@ -1954,8 +2011,9 @@
                        revision_id=self._new_revision_id,
                        properties=self._revprops)
         rev.parent_ids = self.parents
-        self.repository.add_revision(self._new_revision_id, rev, 
+        self.repository.add_revision(self._new_revision_id, rev,
             self.new_inventory, self._config)
+        self.repository.commit_write_group()
         return self._new_revision_id
 
     def revision_tree(self):

=== modified file 'bzrlib/tests/branch_implementations/test_branch.py'
--- a/bzrlib/tests/branch_implementations/test_branch.py	2007-06-12 16:21:40 +0000
+++ b/bzrlib/tests/branch_implementations/test_branch.py	2007-07-17 16:04:00 +0000
@@ -367,8 +367,12 @@
         result.report_results(verbose=False)
 
     def test_get_commit_builder(self):
-        self.assertIsInstance(self.make_branch(".").get_commit_builder([]), 
-            repository.CommitBuilder)
+        branch = self.make_branch(".")
+        branch.lock_write()
+        builder = branch.get_commit_builder([])
+        self.assertIsInstance(builder, repository.CommitBuilder)
+        branch.repository.commit_write_group()
+        branch.unlock()
 
     def test_generate_revision_history(self):
         """Create a fake revision history easily."""

=== modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
--- a/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-03-07 01:54:13 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-07-17 16:04:00 +0000
@@ -182,6 +182,8 @@
         # this should ensure that the new versions of files are being checked
         # for during pull operations
         inv = source.get_inventory('a')
+        source.lock_write()
+        source.start_write_group()
         inv['id'].revision = 'b'
         inv.revision_id = 'b'
         sha1 = source.add_inventory('b', inv, ['a'])
@@ -193,6 +195,8 @@
                        revision_id='b')
         rev.parent_ids = ['a']
         source.add_revision('b', rev)
+        source.commit_write_group()
+        source.unlock()
         self.assertRaises(errors.RevisionNotPresent, target.fetch, source)
         self.assertFalse(target.has_revision('b'))
 

=== modified file 'bzrlib/tests/repository_implementations/test_commit_builder.py'
--- a/bzrlib/tests/repository_implementations/test_commit_builder.py	2007-04-11 05:58:16 +0000
+++ b/bzrlib/tests/repository_implementations/test_commit_builder.py	2007-07-17 16:04:00 +0000
@@ -26,9 +26,13 @@
 class TestCommitBuilder(TestCaseWithRepository):
 
     def test_get_commit_builder(self):
-        tree = self.make_branch_and_tree(".")
-        builder = tree.branch.get_commit_builder([])
+        branch = self.make_branch('.')
+        branch.repository.lock_write()
+        builder = branch.repository.get_commit_builder(
+            branch, [], branch.get_config())
         self.assertIsInstance(builder, CommitBuilder)
+        branch.repository.commit_write_group()
+        branch.repository.unlock()
 
     def record_root(self, builder, tree):
         if builder.record_root_entry is True:
@@ -43,21 +47,27 @@
 
     def test_finish_inventory(self):
         tree = self.make_branch_and_tree(".")
+        tree.lock_write()
         builder = tree.branch.get_commit_builder([])
         self.record_root(builder, tree)
         builder.finish_inventory()
+        tree.branch.repository.commit_write_group()
+        tree.unlock()
 
     def test_commit_message(self):
         tree = self.make_branch_and_tree(".")
+        tree.lock_write()
         builder = tree.branch.get_commit_builder([])
         self.record_root(builder, tree)
         builder.finish_inventory()
         rev_id = builder.commit('foo bar blah')
+        tree.unlock()
         rev = tree.branch.repository.get_revision(rev_id)
         self.assertEqual('foo bar blah', rev.message)
 
     def test_commit_with_revision_id(self):
         tree = self.make_branch_and_tree(".")
+        tree.lock_write()
         # use a unicode revision id to test more corner cases.
         # The repository layer is meant to handle this.
         revision_id = u'\xc8abc'.encode('utf8')
@@ -71,10 +81,12 @@
                     revision_id=revision_id)
         except CannotSetRevisionId:
             # This format doesn't support supplied revision ids
+            tree.unlock()
             return
         self.record_root(builder, tree)
         builder.finish_inventory()
         self.assertEqual(revision_id, builder.commit('foo bar'))
+        tree.unlock()
         self.assertTrue(tree.branch.repository.has_revision(revision_id))
         # the revision id must be set on the inventory when saving it. This
         # does not precisely test that - a repository that wants to can add it
@@ -86,12 +98,12 @@
     def test_commit_without_root(self):
         """This should cause a deprecation warning, not an assertion failure"""
         tree = self.make_branch_and_tree(".")
-        if tree.branch.repository.supports_rich_root():
-            raise tests.TestSkipped('Format requires root')
-        self.build_tree(['foo'])
-        tree.add('foo', 'foo-id')
         tree.lock_write()
         try:
+            if tree.branch.repository.supports_rich_root():
+                raise tests.TestSkipped('Format requires root')
+            self.build_tree(['foo'])
+            tree.add('foo', 'foo-id')
             entry = tree.inventory['foo-id']
             builder = tree.branch.get_commit_builder([])
             self.callDeprecated(['Root entry should be supplied to'
@@ -104,10 +116,12 @@
 
     def test_commit(self):
         tree = self.make_branch_and_tree(".")
+        tree.lock_write()
         builder = tree.branch.get_commit_builder([])
         self.record_root(builder, tree)
         builder.finish_inventory()
         rev_id = builder.commit('foo bar')
+        tree.unlock()
         self.assertNotEqual(None, rev_id)
         self.assertTrue(tree.branch.repository.has_revision(rev_id))
         # the revision id must be set on the inventory when saving it. This does not
@@ -117,10 +131,12 @@
 
     def test_revision_tree(self):
         tree = self.make_branch_and_tree(".")
+        tree.lock_write()
         builder = tree.branch.get_commit_builder([])
         self.record_root(builder, tree)
         builder.finish_inventory()
         rev_id = builder.commit('foo bar')
+        tree.unlock()
         rev_tree = builder.revision_tree()
         # Just a couple simple tests to ensure that it actually follows
         # the RevisionTree api.

=== modified file 'bzrlib/tests/repository_implementations/test_write_group.py'
--- a/bzrlib/tests/repository_implementations/test_write_group.py	2007-07-16 11:01:20 +0000
+++ b/bzrlib/tests/repository_implementations/test_write_group.py	2007-07-17 16:04:00 +0000
@@ -38,7 +38,7 @@
         repo = self.make_repository('.')
         repo.lock_write()
         self.assertEqual(None, repo.start_write_group())
-        repo.end_write_group()
+        repo.commit_write_group()
         repo.unlock()
 
     def test_start_write_group_twice_errors(self):
@@ -51,14 +51,14 @@
             # semantic information.
             self.assertRaises(errors.BzrError, repo.start_write_group)
         finally:
-            repo.end_write_group()
+            repo.commit_write_group()
             repo.unlock()
 
-    def test_end_write_group_gets_None(self):
+    def test_commit_write_group_gets_None(self):
         repo = self.make_repository('.')
         repo.lock_write()
         repo.start_write_group()
-        self.assertEqual(None, repo.end_write_group())
+        self.assertEqual(None, repo.commit_write_group())
         repo.unlock()
 
     def test_unlock_after_start_errors(self):
@@ -70,7 +70,7 @@
         # semantic information.
         self.assertRaises(errors.BzrError, repo.unlock)
         self.assertTrue(repo.is_locked())
-        repo.end_write_group()
+        repo.commit_write_group()
         repo.unlock()
 
     def test_is_in_write_group(self):
@@ -79,6 +79,18 @@
         repo.lock_write()
         repo.start_write_group()
         self.assertTrue(repo.is_in_write_group())
-        repo.end_write_group()
-        self.assertFalse(repo.is_in_write_group())
+        repo.commit_write_group()
+        self.assertFalse(repo.is_in_write_group())
+        # abort also removes the in_write_group status.
+        repo.start_write_group()
+        self.assertTrue(repo.is_in_write_group())
+        repo.abort_write_group()
+        self.assertFalse(repo.is_in_write_group())
+        repo.unlock()
+
+    def test_abort_write_group_gets_None(self):
+        repo = self.make_repository('.')
+        repo.lock_write()
+        repo.start_write_group()
+        self.assertEqual(None, repo.abort_write_group())
         repo.unlock()

=== modified file 'bzrlib/tests/workingtree_implementations/test_inv.py'
--- a/bzrlib/tests/workingtree_implementations/test_inv.py	2007-04-02 17:32:40 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_inv.py	2007-07-17 16:04:00 +0000
@@ -80,6 +80,7 @@
             'fileid', 
             self.branch.repository.get_transaction()).get_lines('2')
         self.assertEqual(lines, ['contents of subdir/file\n'])
+        self.wt.branch.repository.commit_write_group()
 
     def test_snapshot_unchanged(self):
         #This tests that a simple commit does not make a new entry for
@@ -94,6 +95,7 @@
         self.assertRaises(errors.RevisionNotPresent,
                           vf.get_lines,
                           '2')
+        self.wt.branch.repository.commit_write_group()
 
     def test_snapshot_merge_identical_different_revid(self):
         # This tests that a commit with two identical parents, one of which has
@@ -114,6 +116,7 @@
                                   {'1':self.file_1, 'other':other_ie},
                                   self.wt, self.builder)
         self.assertEqual(self.file_active.revision, '2')
+        self.wt.branch.repository.commit_write_group()
 
     def test_snapshot_changed(self):
         # This tests that a commit with one different parent results in a new
@@ -124,6 +127,7 @@
                                   self.wt, self.builder)
         # expected outcome - file_1 has a revision id of '2'
         self.assertEqual(self.file_active.revision, '2')
+        self.wt.branch.repository.commit_write_group()
 
 
 class TestApplyInventoryDelta(TestCaseWithWorkingTree):



More information about the bazaar-commits mailing list