Rev 4415: Merge the chk_map.InternalNode._iter_nodes improvements. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-chk-direct
John Arbash Meinel
john at arbash-meinel.com
Fri Jun 5 22:11:26 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.16-chk-direct
------------------------------------------------------------
revno: 4415 [merge]
revision-id: john at arbash-meinel.com-20090605211101-cu88w49o0ys97r3c
parent: john at arbash-meinel.com-20090605211029-1mc1tejpgyj7ww8g
parent: john at arbash-meinel.com-20090605195715-q2gcpaypbixwk4wg
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-chk-direct
timestamp: Fri 2009-06-05 16:11:01 -0500
message:
Merge the chk_map.InternalNode._iter_nodes improvements.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/tests/test_chk_map.py test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
bzrlib/workingtree.py workingtree.py-20050511021032-29b6ec0a681e02e3
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-06-05 21:10:29 +0000
+++ b/bzrlib/chk_map.py 2009-06-05 21:11:01 +0000
@@ -968,34 +968,99 @@
# prefix is the key in self._items to use, key_filter is the key_filter
# entries that would match this node
keys = {}
+ shortcut = False
if key_filter is None:
+ # yielding all nodes, yield whatever we have, and queue up a read
+ # for whatever we are missing
+ shortcut = True
for prefix, node in self._items.iteritems():
- if type(node) == tuple:
+ if type(node) is tuple:
keys[node] = (prefix, None)
else:
yield node, None
- else:
- # XXX defaultdict ?
+ elif len(key_filter) == 1:
+ # Technically, this path could also be handled by the first check
+ # in 'self._node_width' in length_filters. However, we can handle
+ # this case without spending any time building up the
+ # prefix_to_keys, etc state.
+
+ # This is a bit ugly, but TIMEIT showed it to be by far the fastest
+ # 0.626us list(key_filter)[0]
+ # is a func() for list(), 2 mallocs, and a getitem
+ # 0.489us [k for k in key_filter][0]
+ # still has the mallocs, avoids the func() call
+ # 0.350us iter(key_filter).next()
+ # has a func() call, and mallocs an iterator
+ # 0.125us for key in key_filter: pass
+ # no func() overhead, might malloc an iterator
+ # 0.105us for key in key_filter: break
+ # no func() overhead, might malloc an iterator, probably
+ # avoids checking an 'else' clause as part of the for
+ for key in key_filter:
+ break
+ search_prefix = self._search_prefix_filter(key)
+ if len(search_prefix) == self._node_width:
+ # This item will match exactly, so just do a dict lookup, and
+ # see what we can return
+ shortcut = True
+ try:
+ node = self._items[search_prefix]
+ except KeyError:
+ # A given key can only match 1 child node, if it isn't
+ # there, then we can just return nothing
+ return
+ if node.__class__ is tuple:
+ keys[node] = (search_prefix, [key])
+ else:
+ # This is loaded, and the only thing that can match,
+ # return
+ yield node, [key]
+ return
+ if not shortcut:
+ # First, convert all keys into a list of search prefixes
+ # Aggregate common prefixes, and track the keys they come from
prefix_to_keys = {}
length_filters = {}
for key in key_filter:
- search_key = self._search_prefix_filter(key)
+ search_prefix = self._search_prefix_filter(key)
length_filter = length_filters.setdefault(
- len(search_key), set())
- length_filter.add(search_key)
- prefix_to_keys.setdefault(search_key, []).append(key)
- length_filters = length_filters.items()
- for prefix, node in self._items.iteritems():
- node_key_filter = []
- for length, length_filter in length_filters:
- sub_prefix = prefix[:length]
- if sub_prefix in length_filter:
- node_key_filter.extend(prefix_to_keys[sub_prefix])
- if node_key_filter: # this key matched something, yield it
+ len(search_prefix), set())
+ length_filter.add(search_prefix)
+ prefix_to_keys.setdefault(search_prefix, []).append(key)
+
+ if (self._node_width in length_filters
+ and len(length_filters) == 1):
+ # all of the search prefixes match exactly _node_width. This
+ # means that everything is an exact match, and we can do a
+ # lookup into self._items, rather than iterating over the items
+ # dict.
+ search_prefixes = length_filters[self._node_width]
+ for search_prefix in search_prefixes:
+ try:
+ node = self._items[search_prefix]
+ except KeyError:
+ # We can ignore this one
+ continue
+ node_key_filter = prefix_to_keys[search_prefix]
if type(node) == tuple:
- keys[node] = (prefix, node_key_filter)
+ keys[node] = (search_prefix, node_key_filter)
else:
yield node, node_key_filter
+ else:
+ # The slow way. We walk every item in self._items, and check to
+ # see if there are any matches
+ length_filters = length_filters.items()
+ for prefix, node in self._items.iteritems():
+ node_key_filter = []
+ for length, length_filter in length_filters:
+ sub_prefix = prefix[:length]
+ if sub_prefix in length_filter:
+ node_key_filter.extend(prefix_to_keys[sub_prefix])
+ if node_key_filter: # this key matched something, yield it
+ if type(node) == tuple:
+ keys[node] = (prefix, node_key_filter)
+ else:
+ yield node, node_key_filter
if keys:
# Look in the page cache for some more bytes
found_keys = set()
=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 2009-05-13 21:59:57 +0000
+++ b/bzrlib/tests/test_chk_map.py 2009-06-05 18:03:40 +0000
@@ -1560,13 +1560,66 @@
child.map(None, ("baz",), "val")
node.add_node("b", child)
- key_filter = (('foo',), ('fob',), ('bar',), ('baz',))
+ # Note that 'ram' doesn't match anything, so it should be freely
+ # ignored
+ key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
for child, node_key_filter in node._iter_nodes(None,
key_filter=key_filter):
- # each child could matches two key filters, so make sure they were
+ # each child could match two key filters, so make sure they were
# both included.
self.assertEqual(2, len(node_key_filter))
+ def make_fo_fa_node(self):
+ node = InternalNode('f')
+ child = LeafNode()
+ child.set_maximum_size(100)
+ child.map(None, ("foo",), "val")
+ child.map(None, ("fob",), "val")
+ node.add_node('fo', child)
+ child = LeafNode()
+ child.set_maximum_size(100)
+ child.map(None, ("far",), "val")
+ child.map(None, ("faz",), "val")
+ node.add_node("fa", child)
+ return node
+
+ def test__iter_nodes_single_entry(self):
+ node = self.make_fo_fa_node()
+ key_filter = [('foo',)]
+ nodes = list(node._iter_nodes(None, key_filter=key_filter))
+ self.assertEqual(1, len(nodes))
+ self.assertEqual(key_filter, nodes[0][1])
+
+ def test__iter_nodes_single_entry_misses(self):
+ node = self.make_fo_fa_node()
+ key_filter = [('bar',)]
+ nodes = list(node._iter_nodes(None, key_filter=key_filter))
+ self.assertEqual(0, len(nodes))
+
+ def test__iter_nodes_mixed_key_width(self):
+ node = self.make_fo_fa_node()
+ key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
+ nodes = list(node._iter_nodes(None, key_filter=key_filter))
+ self.assertEqual(1, len(nodes))
+ matches = key_filter[:]
+ matches.remove(('b',))
+ self.assertEqual(sorted(matches), sorted(nodes[0][1]))
+
+ def test__iter_nodes_match_all(self):
+ node = self.make_fo_fa_node()
+ key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
+ nodes = list(node._iter_nodes(None, key_filter=key_filter))
+ self.assertEqual(2, len(nodes))
+
+ def test__iter_nodes_fixed_widths_and_misses(self):
+ node = self.make_fo_fa_node()
+ # foo and faa should both match one child, baz should miss
+ key_filter = [('foo',), ('faa',), ('baz',)]
+ nodes = list(node._iter_nodes(None, key_filter=key_filter))
+ self.assertEqual(2, len(nodes))
+ for node, matches in nodes:
+ self.assertEqual(1, len(matches))
+
def test_iteritems_empty_new(self):
node = InternalNode()
self.assertEqual([], sorted(node.iteritems(None)))
=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py 2009-06-03 01:43:22 +0000
+++ b/bzrlib/workingtree.py 2009-06-05 19:57:15 +0000
@@ -451,7 +451,7 @@
path = self.id2path(file_id)
file_obj = self.get_file_byname(path, filtered=False)
stat_value = _fstat(file_obj.fileno())
- if self.supports_content_filtering() and filtered:
+ if filtered and self.supports_content_filtering():
filters = self._content_filter_stack(path)
file_obj = filtered_input_file(file_obj, filters)
return (file_obj, stat_value)
@@ -462,7 +462,7 @@
def get_file_byname(self, filename, filtered=True):
path = self.abspath(filename)
f = file(path, 'rb')
- if self.supports_content_filtering() and filtered:
+ if filtered and self.supports_content_filtering():
filters = self._content_filter_stack(filename)
return filtered_input_file(f, filters)
else:
More information about the bazaar-commits
mailing list