Rev 4835: (jam) Improve conversion efficiency by using a better heads() in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Nov 30 04:49:35 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4835 [merge]
revision-id: pqm at pqm.ubuntu.com-20091130044931-b5rjfh24zq1d3lju
parent: pqm at pqm.ubuntu.com-20091128071329-pb89sfsgsvxcyob9
parent: john at arbash-meinel.com-20091130033903-2n14h8xr1ebp9qna
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-11-30 04:49:31 +0000
message:
(jam) Improve conversion efficiency by using a better heads()
implementation.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
=== modified file 'NEWS'
--- a/NEWS 2009-11-27 10:43:47 +0000
+++ b/NEWS 2009-11-30 04:49:31 +0000
@@ -53,6 +53,10 @@
etc. Now only change slashes if there is something being glob expanded.
(John Arbash Meinel, #485771)
+* Use our faster ``KnownGraph.heads()`` functionality when computing the
+ new rich-root heads. This can cut a conversion time in half (mysql from
+ 13.5h => 6.2h) (John Arbash Meinel, #487632)
+
Improvements
************
=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py 2009-08-07 04:29:36 +0000
+++ b/bzrlib/fetch.py 2009-11-30 03:34:09 +0000
@@ -28,6 +28,8 @@
from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
from bzrlib import (
+ graph as _mod_graph,
+ static_tuple,
tsort,
versionedfile,
)
@@ -36,10 +38,10 @@
from bzrlib import (
errors,
symbol_versioning,
+ ui,
)
from bzrlib.revision import NULL_REVISION
from bzrlib.trace import mutter
-import bzrlib.ui
class RepoFetcher(object):
@@ -96,7 +98,7 @@
# assert not missing
self.count_total = 0
self.file_ids_names = {}
- pb = bzrlib.ui.ui_factory.nested_progress_bar()
+ pb = ui.ui_factory.nested_progress_bar()
pb.show_pct = pb.show_count = False
try:
pb.update("Finding revisions", 0, 2)
@@ -123,7 +125,7 @@
raise errors.IncompatibleRepositories(
self.from_repository, self.to_repository,
"different rich-root support")
- pb = bzrlib.ui.ui_factory.nested_progress_bar()
+ pb = ui.ui_factory.nested_progress_bar()
try:
pb.update("Get stream source")
source = self.from_repository._get_source(
@@ -251,13 +253,22 @@
# yet, and are unlikely to in non-rich-root environments anyway.
root_id_order.sort(key=operator.itemgetter(0))
# Create a record stream containing the roots to create.
- from bzrlib.graph import FrozenHeadsCache
- graph = FrozenHeadsCache(graph)
+ if len(revs) > 100:
+ graph = _get_rich_root_heads_graph(self.source_repo, revs)
new_roots_stream = _new_root_data_stream(
root_id_order, rev_id_to_root_id, parent_map, self.source, graph)
return [('texts', new_roots_stream)]
+def _get_rich_root_heads_graph(source_repo, revision_ids):
+ """Get a Graph object suitable for asking heads() for new rich roots."""
+ st = static_tuple.StaticTuple
+ revision_keys = [st(r_id).intern() for r_id in revision_ids]
+ known_graph = source_repo.revisions.get_known_graph_ancestry(
+ revision_keys)
+ return graph.GraphThunkIdsToKeys(known_graph)
+
+
def _new_root_data_stream(
root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
"""Generate a texts substream of synthesised root entries.
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2009-09-14 01:48:28 +0000
+++ b/bzrlib/graph.py 2009-11-30 03:16:22 +0000
@@ -1679,6 +1679,19 @@
return result
+class GraphThunkIdsToKeys(object):
+ """Forwards calls about 'ids' to be about keys internally."""
+
+ def __init__(self, graph):
+ self._graph = graph
+
+ def heads(self, ids):
+ """See Graph.heads()"""
+ as_keys = [(i,) for i in ids]
+ head_keys = self._graph.heads(as_keys)
+ return set([h[0] for h in head_keys])
+
+
_counters = [0,0,0,0,0,0,0]
try:
from bzrlib._known_graph_pyx import KnownGraph
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-11-19 15:06:47 +0000
+++ b/bzrlib/repository.py 2009-11-30 03:34:09 +0000
@@ -26,6 +26,7 @@
chk_map,
debug,
errors,
+ fetch as _mod_fetch,
fifo_cache,
generate_ids,
gpg,
@@ -2668,8 +2669,8 @@
for ((revision_id,), parent_keys) in \
self.revisions.get_parent_map(query_keys).iteritems():
if parent_keys:
- result[revision_id] = tuple(parent_revid
- for (parent_revid,) in parent_keys)
+ result[revision_id] = tuple([parent_revid
+ for (parent_revid,) in parent_keys])
else:
result[revision_id] = (_mod_revision.NULL_REVISION,)
return result
@@ -3412,8 +3413,7 @@
provided a default one will be created.
:return: None.
"""
- from bzrlib.fetch import RepoFetcher
- f = RepoFetcher(to_repository=self.target,
+ f = _mod_fetch.RepoFetcher(to_repository=self.target,
from_repository=self.source,
last_revision=revision_id,
fetch_spec=fetch_spec,
@@ -3819,13 +3819,15 @@
basis_id, delta, current_revision_id, parents_parents)
cache[current_revision_id] = parent_tree
- def _fetch_batch(self, revision_ids, basis_id, cache):
+ def _fetch_batch(self, revision_ids, basis_id, cache, a_graph=None):
"""Fetch across a few revisions.
:param revision_ids: The revisions to copy
:param basis_id: The revision_id of a tree that must be in cache, used
as a basis for delta when no other base is available
:param cache: A cache of RevisionTrees that we can use.
+ :param a_graph: A Graph object to determine the heads() of the
+ rich-root data stream.
:return: The revision_id of the last converted tree. The RevisionTree
for it will be in cache
"""
@@ -3895,10 +3897,9 @@
from_texts = self.source.texts
to_texts = self.target.texts
if root_keys_to_create:
- from bzrlib.fetch import _new_root_data_stream
- root_stream = _new_root_data_stream(
+ root_stream = _mod_fetch._new_root_data_stream(
root_keys_to_create, self._revision_id_to_root_id, parent_map,
- self.source)
+ self.source, graph=a_graph)
to_texts.insert_record_stream(root_stream)
to_texts.insert_record_stream(from_texts.get_record_stream(
text_keys, self.target._format._fetch_order,
@@ -3961,13 +3962,20 @@
cache[basis_id] = basis_tree
del basis_tree # We don't want to hang on to it here
hints = []
+ if self._converting_to_rich_root and len(revision_ids) > 100:
+ a_graph = _mod_fetch._get_rich_root_heads_graph(self.source,
+ revision_ids)
+ else:
+ a_graph = None
+
for offset in range(0, len(revision_ids), batch_size):
self.target.start_write_group()
try:
pb.update('Transferring revisions', offset,
len(revision_ids))
batch = revision_ids[offset:offset+batch_size]
- basis_id = self._fetch_batch(batch, basis_id, cache)
+ basis_id = self._fetch_batch(batch, basis_id, cache,
+ a_graph=a_graph)
except:
self.target.abort_write_group()
raise
@@ -4446,8 +4454,7 @@
fetching the inventory weave.
"""
if self._rich_root_upgrade():
- import bzrlib.fetch
- return bzrlib.fetch.Inter1and2Helper(
+ return _mod_fetch.Inter1and2Helper(
self.from_repository).generate_root_texts(revs)
else:
return []
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2009-08-04 04:36:34 +0000
+++ b/bzrlib/tests/test_graph.py 2009-11-30 03:16:22 +0000
@@ -1582,6 +1582,24 @@
self.assertCollapsed(d, d)
+class TestGraphThunkIdsToKeys(tests.TestCase):
+
+ def test_heads(self):
+ # A
+ # |\
+ # B C
+ # |/
+ # D
+ d = {('D',): [('B',), ('C',)], ('C',):[('A',)],
+ ('B',): [('A',)], ('A',): []}
+ g = _mod_graph.Graph(_mod_graph.DictParentsProvider(d))
+ graph_thunk = _mod_graph.GraphThunkIdsToKeys(g)
+ self.assertEqual(['D'], sorted(graph_thunk.heads(['D', 'A'])))
+ self.assertEqual(['D'], sorted(graph_thunk.heads(['D', 'B'])))
+ self.assertEqual(['D'], sorted(graph_thunk.heads(['D', 'C'])))
+ self.assertEqual(['B', 'C'], sorted(graph_thunk.heads(['B', 'C'])))
+
+
class TestPendingAncestryResultGetKeys(TestCaseWithMemoryTransport):
"""Tests for bzrlib.graph.PendingAncestryResult."""
More information about the bazaar-commits
mailing list