Rev 5773: (jelmer) Move default ReconcilePacker from bzrlib.repofmt.pack_repo to in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Fri Apr 8 15:03:48 UTC 2011

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5773 [merge]
revision-id: pqm at pqm.ubuntu.com-20110408150331-pc8lu2zpvce2qw7f
parent: pqm at pqm.ubuntu.com-20110408133740-znydrnxzi0ucm2ep
parent: jelmer at samba.org-20110405231355-rojtfqe6yysd2462
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2011-04-08 15:03:31 +0000
message:
  (jelmer) Move default ReconcilePacker from bzrlib.repofmt.pack_repo to
   bzrlib.repofmt.knitpack_repo. (Jelmer Vernooij)
modified:
  bzrlib/repofmt/knitpack_repo.py knitpack_repo.py-20110405143430-6p75yrk99v6pb770-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
=== modified file 'bzrlib/repofmt/knitpack_repo.py'

--- a/bzrlib/repofmt/knitpack_repo.py	2011-04-05 15:07:09 +0000
+++ b/bzrlib/repofmt/knitpack_repo.py	2011-04-05 23:13:55 +0000
@@ -20,11 +20,16 @@
 lazy_import(globals(), """
 from bzrlib import (
     bzrdir,
+    knit,
+    osutils,
+    revision as _mod_revision,
+    tsort,
     xml5,
     xml6,
     xml7,
     )
 from bzrlib.knit import (
+    _KnitGraphIndex,
     KnitPlainFactory,
     KnitVersionedFiles,
     )
@@ -35,10 +40,12 @@
     )
 from bzrlib.index import (
     GraphIndex,
+    GraphIndexPrefixAdapter,
     InMemoryGraphIndex,
     )
 from bzrlib.repofmt.pack_repo import (
     RepositoryFormatPack,
+    Packer,
     PackCommitBuilder,
     PackRepository,
     PackRootCommitBuilder,
@@ -55,6 +62,10 @@
             return KnitPackStreamSource(self, to_format)
         return PackRepository._get_source(self, to_format)
 
+    def _reconcile_pack(self, collection, packs, extension, revs, pb):
+        packer = KnitReconcilePacker(collection, packs, extension, revs)
+        return packer.pack(pb)
+
 
 class RepositoryFormatKnitPack1(RepositoryFormatPack):
     """A no-subtrees parameterized Pack repository.
@@ -478,3 +489,146 @@
         yield self._get_filtered_inv_stream(revision_ids)
         yield self._get_text_stream()
 
+
+class KnitReconcilePacker(Packer):
+    """A packer which regenerates indices etc as it copies.
+
+    This is used by ``bzr reconcile`` to cause parent text pointers to be
+    regenerated.
+    """
+
+    def _extra_init(self):
+        self._data_changed = False
+
+    def _process_inventory_lines(self, inv_lines):
+        """Generate a text key reference map rather for reconciling with."""
+        repo = self._pack_collection.repo
+        refs = repo._serializer._find_text_key_references(inv_lines)
+        self._text_refs = refs
+        # during reconcile we:
+        #  - convert unreferenced texts to full texts
+        #  - correct texts which reference a text not copied to be full texts
+        #  - copy all others as-is but with corrected parents.
+        #  - so at this point we don't know enough to decide what becomes a full
+        #    text.
+        self._text_filter = None
+
+    def _copy_text_texts(self):
+        """generate what texts we should have and then copy."""
+        self.pb.update("Copying content texts", 3)
+        # we have three major tasks here:
+        # 1) generate the ideal index
+        repo = self._pack_collection.repo
+        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
+            _1, key, _2, refs in
+            self.new_pack.revision_index.iter_all_entries()])
+        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
+        # 2) generate a text_nodes list that contains all the deltas that can
+        #    be used as-is, with corrected parents.
+        ok_nodes = []
+        bad_texts = []
+        discarded_nodes = []
+        NULL_REVISION = _mod_revision.NULL_REVISION
+        text_index_map, text_nodes = self._get_text_nodes()
+        for node in text_nodes:
+            # 0 - index
+            # 1 - key
+            # 2 - value
+            # 3 - refs
+            try:
+                ideal_parents = tuple(ideal_index[node[1]])
+            except KeyError:
+                discarded_nodes.append(node)
+                self._data_changed = True
+            else:
+                if ideal_parents == (NULL_REVISION,):
+                    ideal_parents = ()
+                if ideal_parents == node[3][0]:
+                    # no change needed.
+                    ok_nodes.append(node)
+                elif ideal_parents[0:1] == node[3][0][0:1]:
+                    # the left most parent is the same, or there are no parents
+                    # today. Either way, we can preserve the representation as
+                    # long as we change the refs to be inserted.
+                    self._data_changed = True
+                    ok_nodes.append((node[0], node[1], node[2],
+                        (ideal_parents, node[3][1])))
+                    self._data_changed = True
+                else:
+                    # Reinsert this text completely
+                    bad_texts.append((node[1], ideal_parents))
+                    self._data_changed = True
+        # we're finished with some data.
+        del ideal_index
+        del text_nodes
+        # 3) bulk copy the ok data
+        total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
+        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
+            self.new_pack.text_index, readv_group_iter, total_items))
+        # 4) adhoc copy all the other texts.
+        # We have to topologically insert all texts otherwise we can fail to
+        # reconcile when parts of a single delta chain are preserved intact,
+        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
+        # reinserted, and if d3 has incorrect parents it will also be
+        # reinserted. If we insert d3 first, d2 is present (as it was bulk
+        # copied), so we will try to delta, but d2 is not currently able to be
+        # extracted because its basis d1 is not present. Topologically sorting
+        # addresses this. The following generates a sort for all the texts that
+        # are being inserted without having to reference the entire text key
+        # space (we only topo sort the revisions, which is smaller).
+        topo_order = tsort.topo_sort(ancestors)
+        rev_order = dict(zip(topo_order, range(len(topo_order))))
+        bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
+        transaction = repo.get_transaction()
+        file_id_index = GraphIndexPrefixAdapter(
+            self.new_pack.text_index,
+            ('blank', ), 1,
+            add_nodes_callback=self.new_pack.text_index.add_nodes)
+        data_access = knit._DirectPackAccess(
+                {self.new_pack.text_index:self.new_pack.access_tuple()})
+        data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
+            self.new_pack.access_tuple())
+        output_texts = KnitVersionedFiles(
+            _KnitGraphIndex(self.new_pack.text_index,
+                add_callback=self.new_pack.text_index.add_nodes,
+                deltas=True, parents=True, is_locked=repo.is_locked),
+            data_access=data_access, max_delta_chain=200)
+        for key, parent_keys in bad_texts:
+            # We refer to the new pack to delta data being output.
+            # A possible improvement would be to catch errors on short reads
+            # and only flush then.
+            self.new_pack.flush()
+            parents = []
+            for parent_key in parent_keys:
+                if parent_key[0] != key[0]:
+                    # Graph parents must match the fileid
+                    raise errors.BzrError('Mismatched key parent %r:%r' %
+                        (key, parent_keys))
+                parents.append(parent_key[1])
+            text_lines = osutils.split_lines(repo.texts.get_record_stream(
+                [key], 'unordered', True).next().get_bytes_as('fulltext'))
+            output_texts.add_lines(key, parent_keys, text_lines,
+                random_id=True, check_content=False)
+        # 5) check that nothing inserted has a reference outside the keyspace.
+        missing_text_keys = self.new_pack.text_index._external_references()
+        if missing_text_keys:
+            raise errors.BzrCheckError('Reference to missing compression parents %r'
+                % (missing_text_keys,))
+        self._log_copied_texts()
+
+    def _use_pack(self, new_pack):
+        """Override _use_pack to check for reconcile having changed content."""
+        # XXX: we might be better checking this at the copy time.
+        original_inventory_keys = set()
+        inv_index = self._pack_collection.inventory_index.combined_index
+        for entry in inv_index.iter_all_entries():
+            original_inventory_keys.add(entry[1])
+        new_inventory_keys = set()
+        for entry in new_pack.inventory_index.iter_all_entries():
+            new_inventory_keys.add(entry[1])
+        if new_inventory_keys != original_inventory_keys:
+            self._data_changed = True
+        return new_pack.data_inserted() and self._data_changed
+
+
+

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2011-04-08 13:37:40 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2011-04-08 15:03:31 +0000
@@ -49,7 +49,6 @@
     errors,
     lockable_files,
     lockdir,
-    revision as _mod_revision,
     )
 
 from bzrlib.decorators import needs_write_lock, only_raises
@@ -1200,147 +1199,6 @@
         return new_pack
 
 
-class ReconcilePacker(Packer):
-    """A packer which regenerates indices etc as it copies.
-
-    This is used by ``bzr reconcile`` to cause parent text pointers to be
-    regenerated.
-    """
-
-    def _extra_init(self):
-        self._data_changed = False
-
-    def _process_inventory_lines(self, inv_lines):
-        """Generate a text key reference map rather for reconciling with."""
-        repo = self._pack_collection.repo
-        refs = repo._serializer._find_text_key_references(inv_lines)
-        self._text_refs = refs
-        # during reconcile we:
-        #  - convert unreferenced texts to full texts
-        #  - correct texts which reference a text not copied to be full texts
-        #  - copy all others as-is but with corrected parents.
-        #  - so at this point we don't know enough to decide what becomes a full
-        #    text.
-        self._text_filter = None
-
-    def _copy_text_texts(self):
-        """generate what texts we should have and then copy."""
-        self.pb.update("Copying content texts", 3)
-        # we have three major tasks here:
-        # 1) generate the ideal index
-        repo = self._pack_collection.repo
-        ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
-            _1, key, _2, refs in
-            self.new_pack.revision_index.iter_all_entries()])
-        ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
-        # 2) generate a text_nodes list that contains all the deltas that can
-        #    be used as-is, with corrected parents.
-        ok_nodes = []
-        bad_texts = []
-        discarded_nodes = []
-        NULL_REVISION = _mod_revision.NULL_REVISION
-        text_index_map, text_nodes = self._get_text_nodes()
-        for node in text_nodes:
-            # 0 - index
-            # 1 - key
-            # 2 - value
-            # 3 - refs
-            try:
-                ideal_parents = tuple(ideal_index[node[1]])
-            except KeyError:
-                discarded_nodes.append(node)
-                self._data_changed = True
-            else:
-                if ideal_parents == (NULL_REVISION,):
-                    ideal_parents = ()
-                if ideal_parents == node[3][0]:
-                    # no change needed.
-                    ok_nodes.append(node)
-                elif ideal_parents[0:1] == node[3][0][0:1]:
-                    # the left most parent is the same, or there are no parents
-                    # today. Either way, we can preserve the representation as
-                    # long as we change the refs to be inserted.
-                    self._data_changed = True
-                    ok_nodes.append((node[0], node[1], node[2],
-                        (ideal_parents, node[3][1])))
-                    self._data_changed = True
-                else:
-                    # Reinsert this text completely
-                    bad_texts.append((node[1], ideal_parents))
-                    self._data_changed = True
-        # we're finished with some data.
-        del ideal_index
-        del text_nodes
-        # 3) bulk copy the ok data
-        total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
-        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
-            self.new_pack.text_index, readv_group_iter, total_items))
-        # 4) adhoc copy all the other texts.
-        # We have to topologically insert all texts otherwise we can fail to
-        # reconcile when parts of a single delta chain are preserved intact,
-        # and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
-        # reinserted, and if d3 has incorrect parents it will also be
-        # reinserted. If we insert d3 first, d2 is present (as it was bulk
-        # copied), so we will try to delta, but d2 is not currently able to be
-        # extracted because its basis d1 is not present. Topologically sorting
-        # addresses this. The following generates a sort for all the texts that
-        # are being inserted without having to reference the entire text key
-        # space (we only topo sort the revisions, which is smaller).
-        topo_order = tsort.topo_sort(ancestors)
-        rev_order = dict(zip(topo_order, range(len(topo_order))))
-        bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
-        transaction = repo.get_transaction()
-        file_id_index = GraphIndexPrefixAdapter(
-            self.new_pack.text_index,
-            ('blank', ), 1,
-            add_nodes_callback=self.new_pack.text_index.add_nodes)
-        data_access = _DirectPackAccess(
-                {self.new_pack.text_index:self.new_pack.access_tuple()})
-        data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
-            self.new_pack.access_tuple())
-        output_texts = KnitVersionedFiles(
-            _KnitGraphIndex(self.new_pack.text_index,
-                add_callback=self.new_pack.text_index.add_nodes,
-                deltas=True, parents=True, is_locked=repo.is_locked),
-            data_access=data_access, max_delta_chain=200)
-        for key, parent_keys in bad_texts:
-            # We refer to the new pack to delta data being output.
-            # A possible improvement would be to catch errors on short reads
-            # and only flush then.
-            self.new_pack.flush()
-            parents = []
-            for parent_key in parent_keys:
-                if parent_key[0] != key[0]:
-                    # Graph parents must match the fileid
-                    raise errors.BzrError('Mismatched key parent %r:%r' %
-                        (key, parent_keys))
-                parents.append(parent_key[1])
-            text_lines = osutils.split_lines(repo.texts.get_record_stream(
-                [key], 'unordered', True).next().get_bytes_as('fulltext'))
-            output_texts.add_lines(key, parent_keys, text_lines,
-                random_id=True, check_content=False)
-        # 5) check that nothing inserted has a reference outside the keyspace.
-        missing_text_keys = self.new_pack.text_index._external_references()
-        if missing_text_keys:
-            raise errors.BzrCheckError('Reference to missing compression parents %r'
-                % (missing_text_keys,))
-        self._log_copied_texts()
-
-    def _use_pack(self, new_pack):
-        """Override _use_pack to check for reconcile having changed content."""
-        # XXX: we might be better checking this at the copy time.
-        original_inventory_keys = set()
-        inv_index = self._pack_collection.inventory_index.combined_index
-        for entry in inv_index.iter_all_entries():
-            original_inventory_keys.add(entry[1])
-        new_inventory_keys = set()
-        for entry in new_pack.inventory_index.iter_all_entries():
-            new_inventory_keys.add(entry[1])
-        if new_inventory_keys != original_inventory_keys:
-            self._data_changed = True
-        return new_pack.data_inserted() and self._data_changed
-
-
 class RepositoryPackCollection(object):
     """Management of packs within a repository.
 
@@ -2409,8 +2267,7 @@
         return reconciler
 
     def _reconcile_pack(self, collection, packs, extension, revs, pb):
-        packer = ReconcilePacker(collection, packs, extension, revs)
-        return packer.pack(pb)
+        raise NotImplementedError(self._reconcile_pack)
 
     @only_raises(errors.LockNotHeld, errors.LockBroken)
     def unlock(self):