Rev 2717: Incrementally closing in on a correct fetch for packs. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Wed Aug 15 01:42:52 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2717
revision-id: robertc at robertcollins.net-20070815004245-9vjai5ebacea2b1e
parent: robertc at robertcollins.net-20070814071804-71d340lob7qv3wep
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2007-08-15 10:42:45 +1000
message:
  Incrementally closing in on a correct fetch for packs.
modified:
  bzrlib/debug.py                debug.py-20061102062349-vdhrw9qdpck8cl35-1
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
=== modified file 'bzrlib/debug.py'
--- a/bzrlib/debug.py	2007-07-09 04:31:30 +0000
+++ b/bzrlib/debug.py	2007-08-15 00:42:45 +0000
@@ -23,6 +23,7 @@
 Options include:
     
  * error - show stack traces for all top level exceptions
+ * fetch - trace history copying between repositories
  * hooks 
  * hpss - trace smart protocol requests and responses
  * lock - trace when lockdir locks are taken or released

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-08-08 05:17:26 +0000
+++ b/bzrlib/knit.py	2007-08-15 00:42:45 +0000
@@ -870,9 +870,6 @@
         the requested versions and content_map contains the KnitContents.
         Both dicts take version_ids as their keys.
         """
-        for version_id in version_ids:
-            if not self.has_version(version_id):
-                raise RevisionNotPresent(version_id, self.filename)
         record_map = self._get_record_map(version_ids)
 
         text_map = {}

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-08-14 07:18:04 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-08-15 00:42:45 +0000
@@ -19,8 +19,10 @@
 from itertools import izip
 import math
 import md5
+import time
 
 from bzrlib import (
+        debug,
         pack,
         ui,
         )
@@ -195,7 +197,7 @@
         :param revision_ids: Either None, to copy all data, or a list
             of revision_ids to limit the copied data to the data they
             introduced.
-        :return: The number of revisions copied.
+        :return: A Pack object, or None if nothing was copied.
         """
         # open a pack - using the same name as the last temporary file
         # - which has already been flushed, so its safe.
@@ -205,7 +207,18 @@
             raise errors.BzrError('call to create_pack_from_packs while '
                 'another pack is being written.')
         random_name = self.repo.control_files._lock.nonce + '.autopack'
+        if 'fetch' in debug.debug_flags:
+            plain_pack_list = ['%s%s' % (transport.base, name) for
+                transport, name in revision_index_map.itervalues()]
+            mutter('%s: create_pack: creating pack from source packs: %s%s %s t=0',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                plain_pack_list)
+            start_time = time.time()
         write_stream = self.repo._upload_transport.open_file_stream(random_name)
+        if 'fetch' in debug.debug_flags:
+            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                time.time() - start_time)
         pack_hash = md5.new()
         def write_data(bytes, update=pack_hash.update):
             write_stream(bytes)
@@ -221,43 +234,102 @@
         revision_nodes = self._index_contents(revision_index_map)
         # copy revision keys and adjust values
         self._copy_nodes_graph(revision_nodes, revision_index_map, writer, revision_index)
+        if 'fetch' in debug.debug_flags:
+            mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                len(list(revision_index.iter_all_entries())),
+                time.time() - start_time)
         # select inventory keys
         inv_nodes = self._index_contents(inventory_index_map)
         # copy inventory keys and adjust values
         self._copy_nodes_graph(inv_nodes, inventory_index_map, writer, inv_index)
+        if 'fetch' in debug.debug_flags:
+            mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                len(list(inv_index.iter_all_entries())),
+                time.time() - start_time)
         # select text keys
         text_nodes = self._index_contents(text_index_map)
         # copy text keys and adjust values
         self._copy_nodes_graph(text_nodes, text_index_map, writer, text_index)
+        if 'fetch' in debug.debug_flags:
+            mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                len(list(text_index.iter_all_entries())),
+                time.time() - start_time)
         # select signature keys
         signature_nodes = self._index_contents(signature_index_map)
         # copy signature keys and adjust values
         self._copy_nodes(signature_nodes, signature_index_map, writer, signature_index)
+        if 'fetch' in debug.debug_flags:
+            mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                len(list(signature_index.iter_all_entries())),
+                time.time() - start_time)
         # finish the pack
         writer.end()
         new_name = pack_hash.hexdigest()
+        # if nothing has been written, discard the new pack.
+        if 0 == sum((len(list(revision_index.iter_all_entries())),
+            len(list(inv_index.iter_all_entries())),
+            len(list(text_index.iter_all_entries())),
+            len(list(signature_index.iter_all_entries())),
+            )):
+            self.repo._upload_transport.delete(random_name)
+            return None
         # add to names
         self.allocate(new_name)
         # rename into place
         self.repo._upload_transport.close_file_stream(random_name)
         self.repo._upload_transport.rename(random_name, '../packs/' + new_name + '.pack')
+        result = Pack()
+        result.name = new_name
+        result.transport = self.repo._upload_transport.clone('../packs/')
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                result.transport, result.name,
+                time.time() - start_time)
         # write indices
         index_transport = self.repo._upload_transport.clone('../indices')
         rev_index_name = self.repo._revision_store.name_to_revision_index_name(new_name)
         index_transport.put_file(rev_index_name, revision_index.finish())
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: wrote revision index: %s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                time.time() - start_time)
         inv_index_name = self.repo._inv_thunk.name_to_inv_index_name(new_name)
         index_transport.put_file(inv_index_name, inv_index.finish())
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: wrote inventory index: %s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                time.time() - start_time)
         text_index_name = self.repo.weave_store.name_to_text_index_name(new_name)
         index_transport.put_file(text_index_name, text_index.finish())
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: wrote file texts index: %s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                time.time() - start_time)
         signature_index_name = self.repo._revision_store.name_to_signature_index_name(new_name)
         index_transport.put_file(signature_index_name, signature_index.finish())
-        result = Pack()
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: wrote revision signatures index: %s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                time.time() - start_time)
         result.revision_index = revision_index
         result.inventory_index = inv_index
         result.text_index = text_index
         result.signature_index = signature_index
-        result.name = new_name
-        result.transport = self.repo._upload_transport.clone('../packs/')
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: finished: %s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                time.time() - start_time)
         return result
 
     def _execute_pack_operations(self, pack_operations):
@@ -444,6 +516,7 @@
                 GraphIndex(self.transport, 'pack-names').iter_all_entries())
 
     def allocate(self, name):
+        self.ensure_loaded()
         if name in self._names:
             raise errors.DuplicateKey(name)
         self._names.add(name)

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-08-14 07:18:04 +0000
+++ b/bzrlib/repository.py	2007-08-15 00:42:45 +0000
@@ -1062,7 +1062,14 @@
     def sign_revision(self, revision_id, gpg_strategy):
         revision_id = osutils.safe_revision_id(revision_id)
         plaintext = Testament.from_revision(self, revision_id).as_short_text()
-        self.store_revision_signature(gpg_strategy, plaintext, revision_id)
+        self.start_write_group()
+        try:
+            self.store_revision_signature(gpg_strategy, plaintext, revision_id)
+        except:
+            self.abort_write_group()
+            raise
+        else:
+            self.commit_write_group()
 
     @needs_read_lock
     def has_signature_for_revision_id(self, revision_id):
@@ -1845,11 +1852,11 @@
         """
         from bzrlib.repofmt.pack_repo import RepositoryFormatPack
         try:
-            are_knits = (isinstance(source._format, RepositoryFormatPack) and
+            are_packs = (isinstance(source._format, RepositoryFormatPack) and
                 isinstance(target._format, RepositoryFormatPack))
         except AttributeError:
             return False
-        return are_knits and InterRepository._same_model(source, target)
+        return are_packs and InterRepository._same_model(source, target)
 
     @needs_write_lock
     def fetch(self, revision_id=None, pb=None):
@@ -1860,15 +1867,15 @@
         revision_id = osutils.safe_revision_id(revision_id)
         self.count_copied = 0
         if revision_id is None:
-            # nothing to do.
-            return
-        if _mod_revision.is_null(revision_id):
             # TODO:
             # everything to do - use pack logic
             # to fetch from all packs to one without
             # inventory parsing etc.
             # till then:
             revision_ids = self.source.all_revision_ids()
+        elif _mod_revision.is_null(revision_id):
+            # nothing to do:
+            return
         else:
             try:
                 revision_ids = self.missing_revision_ids(revision_id)
@@ -1886,9 +1893,12 @@
             signature_index_map,
             revision_ids
             )
-        self.target._packs.save()
-        self.target._packs.add_pack_to_memory(pack)
-        return pack.get_revision_count()
+        if pack is not None:
+            self.target._packs.save()
+            self.target._packs.add_pack_to_memory(pack)
+            return pack.get_revision_count()
+        else:
+            return 0
 
     @needs_read_lock
     def missing_revision_ids(self, revision_id=None):

=== modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
--- a/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-07-17 16:04:00 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-08-15 00:42:45 +0000
@@ -237,10 +237,12 @@
         tree_a = self.make_branch_and_tree('a')
         self.bzrdir = tree_a.branch.bzrdir
         # add a corrupt inventory 'orphan'
-        inv_file = tree_a.branch.repository.control_weaves.get_weave(
-            'inventory', 
-            tree_a.branch.repository.get_transaction())
+        tree_a.branch.repository.lock_write()
+        tree_a.branch.repository.start_write_group()
+        inv_file = tree_a.branch.repository.get_inventory_weave()
         inv_file.add_lines('orphan', [], [])
+        tree_a.branch.repository.commit_write_group()
+        tree_a.branch.repository.unlock()
         # add a real revision 'rev1'
         tree_a.commit('rev1', rev_id='rev1', allow_pointless=True)
         # add a real revision 'rev2' based on rev1
@@ -271,7 +273,7 @@
         self.assertEqual(['rev1'],
                          repo_b.missing_revision_ids(repo_a, revision_id='rev1'))
         
-    def test_fetch_preserves_signatures(self):
+    def test_fetch_fetches_signatures_too(self):
         from_repo = self.bzrdir.open_repository()
         from_signature = from_repo.get_signature_text('rev2')
         to_repo = self.make_to_repository('target')



More information about the bazaar-commits mailing list