Rev 2910: (Andrew Bennetts) Speed up reconcile by not repeatedly fetching the full inventories, by cache heads and parents queries, and by fetching revision trees in batches. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Oct 16 03:42:36 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2910
revision-id: pqm at pqm.ubuntu.com-20071016024233-6fmmyqoh0cfnsni8
parent: pqm at pqm.ubuntu.com-20071015224423-kn8kb5xrc4pef7vu
parent: andrew.bennetts at canonical.com-20071015091030-t9f7qvqueo9lswgc
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-10-16 03:42:33 +0100
message:
(Andrew Bennetts) Speed up reconcile by not repeatedly fetching the full inventories, by cache heads and parents queries, and by fetching revision trees in batches.
modified:
bzrlib/reconcile.py reweave_inventory.py-20051108164726-1e5e0934febac06e
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
------------------------------------------------------------
revno: 2906.1.1
merged: andrew.bennetts at canonical.com-20071015091030-t9f7qvqueo9lswgc
parent: pqm at pqm.ubuntu.com-20071012085726-lyq36i8bo7ew28ba
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: reconcile-speed
timestamp: Mon 2007-10-15 19:10:30 +1000
message:
Speed up reconcile by not repeatedly fetching the full inventories, by cache heads and parents queries, and by fetching revision trees in batches.
=== modified file 'bzrlib/reconcile.py'
--- a/bzrlib/reconcile.py 2007-10-05 02:19:09 +0000
+++ b/bzrlib/reconcile.py 2007-10-15 09:10:30 +0000
@@ -372,12 +372,14 @@
"""
transaction = self.repo.get_transaction()
revision_versions = repository._RevisionTextVersionCache(self.repo)
+ versions = self.revisions.versions()
+ revision_versions.prepopulate_revs(versions)
for num, file_id in enumerate(self.repo.weave_store):
self.pb.update('Fixing text parents', num,
len(self.repo.weave_store))
vf = self.repo.weave_store.get_weave(file_id, transaction)
vf_checker = self.repo.get_versioned_file_checker(
- self.revisions.versions(), revision_versions)
+ versions, revision_versions)
versions_with_bad_parents = vf_checker.check_file_version_parents(
vf, file_id)
if len(versions_with_bad_parents) == 0:
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-10-15 05:23:29 +0000
+++ b/bzrlib/repository.py 2007-10-16 02:42:33 +0000
@@ -2493,6 +2493,9 @@
def __init__(self, repository):
self.repository = repository
self.revision_versions = {}
+ self.revision_parents = {}
+ self.repo_graph = self.repository.get_graph()
+ self.rev_heads = {}
def add_revision_text_versions(self, tree):
"""Cache text version data from the supplied revision tree"""
@@ -2507,10 +2510,44 @@
try:
inv_revisions = self.revision_versions[revision_id]
except KeyError:
- tree = self.repository.revision_tree(revision_id)
- inv_revisions = self.add_revision_text_versions(tree)
+ try:
+ tree = self.repository.revision_tree(revision_id)
+ except errors.RevisionNotPresent:
+ self.revision_versions[revision_id] = inv_revisions = {}
+ else:
+ inv_revisions = self.add_revision_text_versions(tree)
return inv_revisions.get(file_id)
+ def prepopulate_revs(self, revision_ids):
+ # Filter out versions that we don't have an inventory for, so that the
+ # revision_trees() call won't fail.
+ inv_weave = self.repository.get_inventory_weave()
+ revs = [r for r in revision_ids if inv_weave.has_version(r)]
+ # XXX: this loop is very similar to
+ # bzrlib.fetch.Inter1and2Helper.iter_rev_trees.
+ while revs:
+ for tree in self.repository.revision_trees(revs[:100]):
+ if tree.inventory.revision_id is None:
+ tree.inventory.revision_id = tree.get_revision_id()
+ self.add_revision_text_versions(tree)
+ revs = revs[100:]
+
+ def get_parents(self, revision_id):
+ try:
+ return self.revision_parents[revision_id]
+ except KeyError:
+ parents = self.repository.get_parents([revision_id])[0]
+ self.revision_parents[revision_id] = parents
+ return parents
+
+ def heads(self, revision_ids):
+ revision_ids = tuple(revision_ids)
+ try:
+ return self.rev_heads[revision_ids]
+ except KeyError:
+ heads = self.repo_graph.heads(revision_ids)
+ self.rev_heads[revision_ids] = heads
+ return heads
class VersionedFileChecker(object):
@@ -2524,25 +2561,17 @@
file_id, revision_id)
if text_revision is None:
return None
- parents_of_text_revision = self.repository.get_parents(
- [text_revision])[0]
+ parents_of_text_revision = self.revision_versions.get_parents(
+ text_revision)
parents_from_inventories = []
for parent in parents_of_text_revision:
if parent == _mod_revision.NULL_REVISION:
continue
- try:
- inventory = self.repository.get_inventory(parent)
- except errors.RevisionNotPresent:
- pass
- else:
- try:
- introduced_in = inventory[file_id].revision
- except errors.NoSuchId:
- pass
- else:
- parents_from_inventories.append(introduced_in)
- graph = self.repository.get_graph()
- heads = set(graph.heads(parents_from_inventories))
+ introduced_in = self.revision_versions.get_text_version(file_id,
+ parent)
+ if introduced_in is not None:
+ parents_from_inventories.append(introduced_in)
+ heads = set(self.revision_versions.heads(parents_from_inventories))
new_parents = []
for parent in parents_from_inventories:
if parent in heads and parent not in new_parents:
More information about the bazaar-commits
mailing list