Rev 3851: Fix the root cause of why _iter_nodes was not handling key_filter. in http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core

John Arbash Meinel john at arbash-meinel.com
Fri Mar 6 20:25:37 GMT 2009


At http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core

------------------------------------------------------------
revno: 3851
revision-id: john at arbash-meinel.com-20090306202334-syc95qwv1pf7o8fn
parent: john at arbash-meinel.com-20090306142456-6zily2byb31185af
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: brisbane-core
timestamp: Fri 2009-03-06 14:23:34 -0600
message:
  Fix the root cause of why _iter_nodes was not handling key_filter.
  
  The _search_prefix_filter was not evaluating _search_key_func(key) when
  the key was not the same length as the internal key width.
  Now we cap the max prefix at the width of a key, but if you are
  searching for just a prefix portion of the overall key, you don't want the
  extra nulls that _search_key() gives.
  Added some chk_map tests to prove this.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-03-06 14:24:56 +0000
+++ b/bzrlib/chk_map.py	2009-03-06 20:23:34 +0000
@@ -934,7 +934,7 @@
         # when filtering (parent_file_id,) against the parent_id_basename
         # CHKMap. We probably need to fix something in iter_nodes but, for
         # now, this at least gets things working - IGC 20090306
-        for node in self._iter_nodes(store, key_filter=None):
+        for node in self._iter_nodes(store, key_filter=key_filter):
             for item in node.iteritems(store, key_filter=key_filter):
                 yield item
 
@@ -1123,9 +1123,7 @@
 
     def _search_prefix_filter(self, key):
         """Serialise key for use as a prefix filter in iteritems."""
-        if len(key) == self._key_width:
-            return self._search_key(key)
-        return '\x00'.join(key)[:self._node_width]
+        return self._search_key_func(key)[:self._node_width]
 
     def _split(self, offset):
         """Split this node into smaller nodes starting at offset.

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2009-02-19 15:38:50 +0000
+++ b/bzrlib/tests/test_chk_map.py	2009-03-06 20:23:34 +0000
@@ -52,6 +52,8 @@
 
     def test_not_a_prefix(self):
         self.assertCommonPrefix('b', 'begin', 'b')
+
+
 class TestCaseWithStore(tests.TestCaseWithTransport):
 
     def get_chk_bytes(self):
@@ -64,12 +66,14 @@
         self.addCleanup(repo.abort_write_group)
         return repo.chk_bytes
 
-    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1):
+    def _get_map(self, a_dict, maximum_size=0, chk_bytes=None, key_width=1,
+                 search_key_func=None):
         if chk_bytes is None:
             chk_bytes = self.get_chk_bytes()
         root_key = CHKMap.from_dict(chk_bytes, a_dict,
-            maximum_size=maximum_size, key_width=key_width)
-        chkmap = CHKMap(chk_bytes, root_key)
+            maximum_size=maximum_size, key_width=key_width,
+            search_key_func=search_key_func)
+        chkmap = CHKMap(chk_bytes, root_key, search_key_func=search_key_func)
         return chkmap
 
     def read_bytes(self, chk_bytes, key):
@@ -875,6 +879,19 @@
             {("a", "a"): "content here", ("a", "b"): 'more content'},
             self.to_dict(chkmap, [("a",)]))
 
+    def test_iteritems_keys_prefixed_by_2_width_nodes_hashed(self):
+        search_key_func = chk_map.search_key_registry.get('hash-16-way')
+        self.assertEqual('E8B7BE43\x00E8B7BE43', search_key_func(('a', 'a')))
+        self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
+        self.assertEqual('71BEEFF9\x0000000000', search_key_func(('b', '')))
+        chkmap = self._get_map(
+            {("a","a"):"content here", ("a", "b",):"more content",
+             ("b", ""): 'boring content'},
+            maximum_size=10, key_width=2, search_key_func=search_key_func)
+        self.assertEqual(
+            {("a", "a"): "content here", ("a", "b"): 'more content'},
+            self.to_dict(chkmap, [("a",)]))
+
     def test_iteritems_keys_prefixed_by_2_width_one_leaf(self):
         chkmap = self._get_map(
             {("a","a"):"content here", ("a", "b",):"more content",
@@ -1517,6 +1534,23 @@
         self.assertEqual([(('strange',), 'beast')],
             sorted(node.iteritems(None, [('strange',), ('weird',)])))
 
+    def test_iteritems_two_children_with_hash(self):
+        search_key_func = chk_map.search_key_registry.get('hash-255-way')
+        node = InternalNode(search_key_func=search_key_func)
+        leaf1 = LeafNode(search_key_func=search_key_func)
+        leaf1.map(None, ('foo bar',), 'quux')
+        leaf2 = LeafNode(search_key_func=search_key_func)
+        leaf2.map(None, ('strange',), 'beast')
+        self.assertEqual('\xbeF\x014', search_key_func(('foo bar',)))
+        self.assertEqual('\x85\xfa\xf7K', search_key_func(('strange',)))
+        node.add_node("\xbe", leaf1)
+        # This sets up a path that should not be followed - it will error if
+        # the code tries to.
+        node._items['\xbe'] = None
+        node.add_node("\x85", leaf2)
+        self.assertEqual([(('strange',), 'beast')],
+            sorted(node.iteritems(None, [('strange',), ('weird',)])))
+
     def test_iteritems_partial_empty(self):
         node = InternalNode()
         self.assertEqual([], sorted(node.iteritems([('missing',)])))
@@ -1607,6 +1641,15 @@
                              "      ('k23',) 'quux'\n"
                              , chkmap._dump_tree())
 
+    def test__search_prefix_filter_with_hash(self):
+        search_key_func = chk_map.search_key_registry.get('hash-16-way')
+        node = InternalNode(search_key_func=search_key_func)
+        node._key_width = 2
+        node._node_width = 4
+        self.assertEqual('E8B7BE43\x0071BEEFF9', search_key_func(('a', 'b')))
+        self.assertEqual('E8B7', node._search_prefix_filter(('a', 'b')))
+        self.assertEqual('E8B7', node._search_prefix_filter(('a',)))
+
     def test_unmap_k23_from_k1_k22_k23_gives_k1_k22_tree_new(self):
         chkmap = self._get_map(
             {('k1',):'foo', ('k22',):'bar', ('k23',): 'quux'}, maximum_size=10)



More information about the bazaar-commits mailing list