Rev 52: Filter out tips we've already found interesting. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Wed Apr 7 16:08:00 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 52
revision-id: john at arbash-meinel.com-20100407150745-2t3a9o66annnsxdo
parent: john at arbash-meinel.com-20100406215530-ellqwfhrk1n2qy55
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Wed 2010-04-07 10:07:45 -0500
message:
Filter out tips we've already found interesting.
-------------- next part --------------
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-06 21:55:30 +0000
+++ b/history_db.py 2010-04-07 15:07:45 +0000
@@ -642,12 +642,15 @@
"SELECT parent, gdfo FROM parent, revision"
" WHERE parent=db_id AND child IN (%s)",
len(self._search_tips)), list(self._search_tips)).fetchall()
- # XXX: Filter out search tips that we've already searched via a
- # different path, either entries already in the old _search_tips,
- # or something in _interesting_ancestor_ids, etc. Note that by
- # construction, everything in _search_tips should be in
- # _interesting_ancestor_ids...
- self._search_tips = set([r[0] for r in res])
+ # Filter out search tips that we've already searched via a different
+ # path. By construction, if we are stepping the search tips, we know
+ # that all previous search tips are either in
+ # self._imported_dotted_revno or in self._interesting_ancestor_ids.
+ # _imported_dotted_revno will be filtered in the first
+ # _split_search_tips_by_gdfo call, so we just filter out already
+ # interesting ones.
+ interesting = self._interesting_ancestor_ids
+ self._search_tips = set([r[0] for r in res if r[0] not in interesting])
# TODO: For search tips we will be removing, we don't need to join
# against revision since we should already have them. There may
# be other ways that we already know gdfo. It may be cheaper to
=== modified file 'test_importer.py'
--- a/test_importer.py 2010-04-06 21:55:30 +0000
+++ b/test_importer.py 2010-04-07 15:07:45 +0000
@@ -235,7 +235,6 @@
b._tip_revision = 'D' # Something older
importer = history_db.Importer(':memory:', b, incremental=False)
importer.do_import()
- D_id = importer._rev_id_to_db_id['D']
self.assertEqual(1, importer._cursor.execute(
"SELECT count(*) FROM dotted_revno, revision"
" WHERE tip_revision = merged_revision"
@@ -244,7 +243,6 @@
# Now work on just importing G
importer._update_ancestry('G')
self.grab_interesting_ids(importer._rev_id_to_db_id)
- G_id = importer._rev_id_to_db_id['G']
inc_importer = history_db._IncrementalImporter(importer, self.G_id)
inc_importer._find_needed_mainline()
self.assertEqual([self.G_id], inc_importer._mainline_db_ids)
@@ -295,7 +293,6 @@
inc_importer._step_mainline()
self.assertEqual(self.A_id, inc_importer._imported_mainline_id)
self.assertEqual(1, inc_importer._imported_gdfo)
- self.C_id = importer._rev_id_to_db_id['C']
self.assertEqual({self.D_id: ((2,), 0, 0), self.C_id: ((1,1,2), 0, 1),
self.B_id: ((1,1,1), 1, 1),
}, inc_importer._imported_dotted_revno)
@@ -342,3 +339,47 @@
}, inc_importer._imported_dotted_revno)
self.assertEqual({(2,): self.D_id, (1,1,2): self.C_id,
(1,1,1): self.B_id}, inc_importer._dotted_to_db_id)
+
+ def test__incremental_step_skips_already_seen(self):
+ # Simpler graph:
+ # A
+ # |
+ # B
+ # |\
+ # | C
+ # | |\
+ # | D E
+ # |/|/
+ # F G
+ # |/
+ # H
+ # In this case, first step should go to G & D, when stepping from
+ # there, G => D should not continue on D, since it has already been
+ # seen, but we should include E.
+ ancestry = {'A': (),
+ 'B': ('A',),
+ 'C': ('B',),
+ 'D': ('C',),
+ 'E': ('C',),
+ 'F': ('B', 'D'),
+ 'G': ('D', 'E'),
+ 'H': ('F', 'G'),
+ }
+ b = MockBranch(ancestry, 'B')
+ importer = history_db.Importer(':memory:', b, incremental=False)
+ importer.do_import()
+ importer._update_ancestry('H')
+ self.grab_interesting_ids(importer._rev_id_to_db_id)
+ inc_importer = history_db._IncrementalImporter(importer, self.H_id)
+ inc_importer._find_needed_mainline()
+ self.assertEqual([self.H_id, self.F_id], inc_importer._mainline_db_ids)
+ self.assertEqual(self.B_id, inc_importer._imported_mainline_id)
+ inc_importer._get_initial_search_tips()
+ self.assertEqual(set([self.D_id, self.G_id]), inc_importer._search_tips)
+ # Both have higher-than-mainline gdfos
+ self.assertEqual([],
+ inc_importer._split_search_tips_by_gdfo([self.D_id, self.G_id]))
+ inc_importer._step_search_tips()
+ # It should want to include D_id, but it should know that we've already
+ # been there
+ self.assertEqual(set([self.C_id, self.E_id]), inc_importer._search_tips)
More information about the bazaar-commits
mailing list