Rev 68: It now seems to be *correct* and is capable of importing all of bzr.dev. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Thu Apr 8 21:01:03 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 68
revision-id: john at arbash-meinel.com-20100408200049-2fo6izcgb05g6j7u
parent: john at arbash-meinel.com-20100408173452-sqro9wruo1xdsloy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Thu 2010-04-08 15:00:49 -0500
message:
It now seems to be *correct* and is capable of importing all of bzr.dev.
It's pretty darn slow, though. Instead of being ~3min to import bzr.dev
it is now 23min...
Time to start profiling. I'm guessing the python<=>sqlite transitioning is
the main problem, but I don't really know yet.
-------------- next part --------------
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-08 17:34:52 +0000
+++ b/history_db.py 2010-04-08 20:00:49 +0000
@@ -210,13 +210,26 @@
# Assert that the result is valid
actual_ms = self._graph.merge_sort((tip_revision_id,))
actual_ms_iter = iter(actual_ms)
+
+ def assert_is_equal(x, y):
+ if x != y:
+ import pdb; pdb.set_trace()
for node in merge_sorted:
- node.key = (db_id_to_rev_id[node.key],)
+ try:
+ node.key = (db_id_to_rev_id[node.key],)
+ except KeyError: # Look this one up in the db
+ rev_res = self._cursor.execute(
+ "SELECT revision_id FROM revision WHERE db_id = ?",
+ (node.key,)).fetchone()
+ rev_id = rev_res[0]
+ db_id_to_rev_id[node.key] = rev_id
+ self._rev_id_to_db_id[rev_id] = node.key
+ node.key = (rev_id,)
actual_node = actual_ms_iter.next()
- assert node.key == actual_node.key
- assert node.revno == actual_node.revno
- assert node.merge_depth == actual_node.merge_depth
- assert node.end_of_merge == actual_node.end_of_merge
+ assert_is_equal(node.key, actual_node.key)
+ assert_is_equal(node.revno, actual_node.revno)
+ assert_is_equal(node.merge_depth, actual_node.merge_depth)
+ assert_is_equal(node.end_of_merge, actual_node.end_of_merge)
else:
merge_sorted = self._graph.merge_sort((tip_revision_id,))
try:
@@ -740,7 +753,7 @@
Either the data should be in _imported_dotted_revno, or the lh parent
should be in interesting_ancestor_ids (meaning we will number it).
"""
- pmap = self._parent_map
+ #XXX REMOVE: pmap = self._parent_map
missing_parent_ids = set()
for db_id in self._interesting_ancestor_ids:
parent_ids = self._get_parents(db_id)
@@ -854,6 +867,9 @@
def _push_node(self, db_id, merge_depth):
# TODO: Check if db_id is a ghost (not allowed on the stack)
+ if db_id not in self._interesting_ancestor_ids:
+ # This is a parent that we really don't need to number
+ return
parent_ids = self._get_parents(db_id)
if len(parent_ids) <= 0:
left_parent = None
@@ -973,6 +989,9 @@
self._depth_first_stack = []
self._scheduled_stack = []
self._seen_parents = set()
+ ## if not self._mainline_db_ids:
+ ## # Nothing to number
+ ## return
self._push_node(self._mainline_db_ids[0], 0)
while self._depth_first_stack:
=== modified file 'test_importer.py'
--- a/test_importer.py 2010-04-08 17:22:15 +0000
+++ b/test_importer.py 2010-04-08 20:00:49 +0000
@@ -643,3 +643,33 @@
[(self.D_id, (0, 1, 1), True, 1),
(self.E_id, (4,), False, 0),
])
+
+ def test_ignore_uninteresting_ancestors(self):
+ # Graph:
+ # A
+ # |\
+ # B C
+ # |X|
+ # D E
+ # |\|
+ # | F
+ # |/
+ # G
+ #
+ # Some did work C, while trunk evolved to B. C was landed, while
+ # concurrently someone tried to update C for the trunk changes.
+ # After trying to cleanup, they had to do it again.
+ # If D is imported, we should only number E and F, we shouldn't try to
+ # include B or C
+ # Note: This ancestry was taken from bzr.dev at 5114.1.1, which
+ # demonstrated the race condition.
+ ancestry = {'A': (), 'B': ('A',), 'C': ('A',), 'D': ('B', 'C'),
+ 'E': ('C', 'B'), 'F': ('E', 'D'), 'G': ('D', 'F')}
+ b = MockBranch(ancestry, 'G')
+ inc_merger = self.make_inc_merger(b, 'D', 'G')
+ inc_merger.topo_order()
+ self.assertScheduledStack(inc_merger,
+ [(self.E_id, (1, 1, 2), True, 1),
+ (self.F_id, (1, 1, 3), False, 1),
+ (self.G_id, (4,), False, 0),
+ ])
More information about the bazaar-commits
mailing list