Rev 2910: (Andrew Bennetts) Speed up reconcile by not repeatedly fetching the full inventories, by cache heads and parents queries, and by fetching revision trees in batches. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Oct 16 03:42:36 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2910
revision-id: pqm at pqm.ubuntu.com-20071016024233-6fmmyqoh0cfnsni8
parent: pqm at pqm.ubuntu.com-20071015224423-kn8kb5xrc4pef7vu
parent: andrew.bennetts at canonical.com-20071015091030-t9f7qvqueo9lswgc
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-10-16 03:42:33 +0100
message:
  (Andrew Bennetts) Speed up reconcile by not repeatedly fetching the full inventories, by cache heads and parents queries, and by fetching revision trees in batches.
modified:
  bzrlib/reconcile.py            reweave_inventory.py-20051108164726-1e5e0934febac06e
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 2906.1.1
    merged: andrew.bennetts at canonical.com-20071015091030-t9f7qvqueo9lswgc
    parent: pqm at pqm.ubuntu.com-20071012085726-lyq36i8bo7ew28ba
    committer: Andrew Bennetts <andrew.bennetts at canonical.com>
    branch nick: reconcile-speed
    timestamp: Mon 2007-10-15 19:10:30 +1000
    message:
      Speed up reconcile by not repeatedly fetching the full inventories, by cache heads and parents queries, and by fetching revision trees in batches.
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py	2007-10-05 02:19:09 +0000
+++ b/bzrlib/reconcile.py	2007-10-15 09:10:30 +0000
@@ -372,12 +372,14 @@
         """
         transaction = self.repo.get_transaction()
         revision_versions = repository._RevisionTextVersionCache(self.repo)
+        versions = self.revisions.versions()
+        revision_versions.prepopulate_revs(versions)
         for num, file_id in enumerate(self.repo.weave_store):
             self.pb.update('Fixing text parents', num,
                            len(self.repo.weave_store))
             vf = self.repo.weave_store.get_weave(file_id, transaction)
             vf_checker = self.repo.get_versioned_file_checker(
-                self.revisions.versions(), revision_versions)
+                versions, revision_versions)
             versions_with_bad_parents = vf_checker.check_file_version_parents(
                 vf, file_id)
             if len(versions_with_bad_parents) == 0:

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-10-15 05:23:29 +0000
+++ b/bzrlib/repository.py	2007-10-16 02:42:33 +0000
@@ -2493,6 +2493,9 @@
     def __init__(self, repository):
         self.repository = repository
         self.revision_versions = {}
+        self.revision_parents = {}
+        self.repo_graph = self.repository.get_graph()
+        self.rev_heads = {}
 
     def add_revision_text_versions(self, tree):
         """Cache text version data from the supplied revision tree"""
@@ -2507,10 +2510,44 @@
         try:
             inv_revisions = self.revision_versions[revision_id]
         except KeyError:
-            tree = self.repository.revision_tree(revision_id)
-            inv_revisions = self.add_revision_text_versions(tree)
+            try:
+                tree = self.repository.revision_tree(revision_id)
+            except errors.RevisionNotPresent:
+                self.revision_versions[revision_id] = inv_revisions = {}
+            else:
+                inv_revisions = self.add_revision_text_versions(tree)
         return inv_revisions.get(file_id)
 
+    def prepopulate_revs(self, revision_ids):
+        # Filter out versions that we don't have an inventory for, so that the
+        # revision_trees() call won't fail.
+        inv_weave = self.repository.get_inventory_weave()
+        revs = [r for r in revision_ids if inv_weave.has_version(r)]
+        # XXX: this loop is very similar to
+        # bzrlib.fetch.Inter1and2Helper.iter_rev_trees.
+        while revs:
+            for tree in self.repository.revision_trees(revs[:100]):
+                if tree.inventory.revision_id is None:
+                    tree.inventory.revision_id = tree.get_revision_id()
+                self.add_revision_text_versions(tree)
+            revs = revs[100:]
+
+    def get_parents(self, revision_id):
+        try:
+            return self.revision_parents[revision_id]
+        except KeyError:
+            parents = self.repository.get_parents([revision_id])[0]
+            self.revision_parents[revision_id] = parents
+            return parents
+
+    def heads(self, revision_ids):
+        revision_ids = tuple(revision_ids)
+        try:
+            return self.rev_heads[revision_ids]
+        except KeyError:
+            heads = self.repo_graph.heads(revision_ids)
+            self.rev_heads[revision_ids] = heads
+            return heads
 
 class VersionedFileChecker(object):
 
@@ -2524,25 +2561,17 @@
             file_id, revision_id)
         if text_revision is None:
             return None
-        parents_of_text_revision = self.repository.get_parents(
-            [text_revision])[0]
+        parents_of_text_revision = self.revision_versions.get_parents(
+            text_revision)
         parents_from_inventories = []
         for parent in parents_of_text_revision:
             if parent == _mod_revision.NULL_REVISION:
                 continue
-            try:
-                inventory = self.repository.get_inventory(parent)
-            except errors.RevisionNotPresent:
-                pass
-            else:
-                try:
-                    introduced_in = inventory[file_id].revision
-                except errors.NoSuchId:
-                    pass
-                else:
-                    parents_from_inventories.append(introduced_in)
-        graph = self.repository.get_graph()
-        heads = set(graph.heads(parents_from_inventories))
+            introduced_in = self.revision_versions.get_text_version(file_id,
+                    parent)
+            if introduced_in is not None:
+                parents_from_inventories.append(introduced_in)
+        heads = set(self.revision_versions.heads(parents_from_inventories))
         new_parents = []
         for parent in parents_from_inventories:
             if parent in heads and parent not in new_parents:




More information about the bazaar-commits mailing list