Rev 4508: A few more updates. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-chk-multilevel
John Arbash Meinel
john at arbash-meinel.com
Tue Jun 30 23:28:27 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-chk-multilevel
------------------------------------------------------------
revno: 4508
revision-id: john at arbash-meinel.com-20090630222821-yq4yubbcw0r5b1lz
parent: john at arbash-meinel.com-20090630211033-7tm7nmaz2xwwhcg0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-chk-multilevel
timestamp: Tue 2009-06-30 17:28:21 -0500
message:
A few more updates.
Split out the interesting chks into a separate set.
We still will filter based on what is in it, except
now we can also filter out uninteresting items without
running into them.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-06-30 21:10:33 +0000
+++ b/bzrlib/chk_map.py 2009-06-30 22:28:21 +0000
@@ -1418,6 +1418,9 @@
# it out, yet.
self._all_uninteresting_chks = set(self._uninteresting_root_keys)
self._all_uninteresting_items = set()
+ # These are interesting items which were either read, or already in the
+ # interesting queue (so don't add these refs again)
+ self._processed_interesting_refs = set()
self._interesting_queued_refs = set()
# TODO: use this or delete it
self._interesting_queued_items = set()
@@ -1488,17 +1491,17 @@
interesting_prefixes = set()
# We are about to yield all of these, so we don't want them getting
# added a second time
- self._all_uninteresting_chks.update(interesting_keys)
+ self._processed_interesting_refs.update(interesting_keys)
for record, node, prefix_refs, items in \
self._read_nodes_from_store(interesting_keys):
# At this level, we now know all the uninteresting references
# So we can go ahead and filter, and queue up whatever is remaining
for prefix, ref in prefix_refs:
if (ref in self._all_uninteresting_chks
- or ref in self._interesting_queued_refs):
+ or ref in self._processed_interesting_refs):
# Either in the uninteresting set, or added by another root
continue
- self._interesting_queued_refs.add(ref)
+ self._processed_interesting_refs.add(ref)
interesting_prefixes.update(
[prefix[:i+1] for i in xrange(len(prefix))])
heapq.heappush(self._interesting_queue,
@@ -1554,9 +1557,14 @@
refs = refs.difference(self._all_uninteresting_chks)
all_uninteresting_chks = self._all_uninteresting_chks
+ processed_interesting_refs = self._processed_interesting_refs
all_uninteresting_items = self._all_uninteresting_items
while refs:
- all_uninteresting_chks.update(refs)
+ # TODO: add a test for this
+ # The idea is that we saw an item in the queue (say during
+ # the root load), and since then we have seen that we
+ # shouldn't walk it after all.
+ # refs = refs.difference(all_uninteresting_chks)
next_refs = set()
next_refs_update = next_refs.update
# Inlining _read_nodes_from_store improves 'bzr branch bzr.dev'
@@ -1568,6 +1576,8 @@
yield record, items
next_refs_update([i[1] for i in prefix_refs])
next_refs = next_refs.difference(all_uninteresting_chks)
+ next_refs = next_refs.difference(processed_interesting_refs)
+ processed_interesting_refs.update(next_refs)
refs = next_refs
def _process_next_uninteresting(self):
@@ -1576,17 +1586,19 @@
# out uninteresting nodes that are not referenced by interesting
# items (we *do* currently filter out uninteresting nodes
# referenced from the root.)
- prefix, ref = heapq.heappop(self._uninteresting_queue)
- for record, node, prefix_refs, items in \
- self._read_nodes_from_store([ref]):
+ # For now, since we don't ever abort from loading uninteresting items,
+ # just read the whole thing in at once, so that we get a single
+ # request, instead of multiple
+ refs = [pr[1] for pr in self._uninteresting_queue]
+ self._uninteresting_queue = []
+ all_uninteresting_chks = self._all_uninteresting_chks
+ for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
self._all_uninteresting_items.update(items)
- for prefix, ref in prefix_refs:
- # TODO: Get a test written that exercises this, and then
- # uncomment
- # if ref in self._all_uninteresting_chks:
- # continue
- self._all_uninteresting_chks.add(ref)
- heapq.heappush(self._uninteresting_queue, (prefix, ref))
+ prefix_refs = [pr for pr in prefix_refs
+ if pr[1] not in all_uninteresting_chks]
+ self._uninteresting_queue.extend(prefix_refs)
+ all_uninteresting_chks.update([pr[1] for pr in prefix_refs])
+ heapq.heapify(self._uninteresting_queue)
def _process_queues(self):
# Finish processing all of the items in the queue, for simplicity, we
@@ -1617,22 +1629,32 @@
if item not in self._all_uninteresting_items:
yield None, [item]
else:
- # Node, value == ref
+ # Node => value == ref
# if value in self._all_uninteresting_chks:
# continue
for record, node, prefix_refs, items in \
self._read_nodes_from_store([value]):
yield record, []
# This record has been yielded, mark it uninteresting
- self._all_uninteresting_chks.add(record.key)
+ assert record.key in self._processed_interesting_refs
for prefix, ref in prefix_refs:
if (ref in self._all_uninteresting_chks
- or ref in self._interesting_queued_refs):
+ or ref in self._processed_interesting_refs):
continue
- self._interesting_queued_refs.add(ref)
heapq.heappush(self._interesting_queue,
(prefix, None, ref))
+ self._processed_interesting_refs.add(ref)
# TODO: do we know if these are truly interesting yet?
+ # Initial guess is that we do not.
+ # interesting may reference 'abc' from a Leaf at
+ # 'a', while uninteresting may reference it from
+ # a leaf at 'ab', and unfortunately 'ab' comes
+ # before 'abc' but after 'a'.
+ # One possibility, check to see if the first
+ # uninteresting node has a prefix which is after
+ # the possible prefix for item. So if you had
+ # uninteresting at 'b' then you are sure you
+ # could just yield all items that begin with 'a'
for item in items:
if item in self._all_uninteresting_items:
continue
=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 2009-06-30 15:19:52 +0000
+++ b/bzrlib/tests/test_chk_map.py 2009-06-30 22:28:21 +0000
@@ -2442,6 +2442,38 @@
iterator._uninteresting_queue)
self.assertEqual([('a', None, key3_a)], iterator._interesting_queue)
+ def test__process_next_uninteresting_batched_no_dupes(self):
+ c_map = self.make_two_deep_map()
+ key1 = c_map.key()
+ c_map._dump_tree() # load everything
+ key1_a = c_map._root_node._items['a'].key()
+ key1_aa = c_map._root_node._items['a']._items['aa'].key()
+ key1_ab = c_map._root_node._items['a']._items['ab'].key()
+ key1_ac = c_map._root_node._items['a']._items['ac'].key()
+ key1_ad = c_map._root_node._items['a']._items['ad'].key()
+ c_map.map(('aaa',), 'new aaa value')
+ key2 = c_map._save()
+ key2_a = c_map._root_node._items['a'].key()
+ key2_aa = c_map._root_node._items['a']._items['aa'].key()
+ c_map.map(('acc',), 'new acc content')
+ key3 = c_map._save()
+ key3_a = c_map._root_node._items['a'].key()
+ key3_ac = c_map._root_node._items['a']._items['ac'].key()
+ iterator = self.get_iterator([key3], [key1, key2],
+ chk_map._search_key_plain)
+ root_results = [record.key for record in iterator._read_all_roots()]
+ self.assertEqual([key3], root_results)
+ self.assertEqual(sorted([('a', key1_a), ('a', key2_a)]),
+ sorted(iterator._uninteresting_queue))
+ self.assertEqual([('a', None, key3_a)], iterator._interesting_queue)
+ iterator._process_next_uninteresting()
+ # All of the uninteresting records should be brought in and queued up,
+ # but we should not have any duplicates
+ self.assertEqual(sorted([('aa', key1_aa), ('ab', key1_ab),
+ ('ac', key1_ac), ('ad', key1_ad),
+ ('aa', key2_aa),
+ ]), sorted(iterator._uninteresting_queue))
+
class TestIterInterestingNodes(TestCaseWithExampleMaps):
More information about the bazaar-commits
mailing list