Rev 3660: The flatten code now handles the no-ref-list case. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

John Arbash Meinel john at arbash-meinel.com
Thu Aug 21 21:55:03 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

------------------------------------------------------------
revno: 3660
revision-id: john at arbash-meinel.com-20080821205458-cjk22p6p5yqfhmv7
parent: john at arbash-meinel.com-20080821202206-nvbybzwmfvagg4jm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Thu 2008-08-21 15:54:58 -0500
message:
  The flatten code now handles the no-ref-list case.
modified:
  bzrlib/_parse_btree_c.pyx      _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
  bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
-------------- next part --------------
=== modified file 'bzrlib/_parse_btree_c.pyx'
--- a/bzrlib/_parse_btree_c.pyx	2008-08-21 20:22:06 +0000
+++ b/bzrlib/_parse_btree_c.pyx	2008-08-21 20:54:58 +0000
@@ -24,9 +24,12 @@
 
     char *PyString_AsString(object p)
     object PyString_FromStringAndSize(char *, Py_ssize_t)
+    int PyString_CheckExact(object s)
     Py_ssize_t PyString_Size(object p)
+    int PyTuple_CheckExact(object t)
 
 cdef extern from "string.h":
+    void *memcpy(void *dest, void *src, size_t n)
     void *memchr(void *s, int c, size_t n)
     # GNU extension
     # void *memrchr(void *s, int c, size_t n)
@@ -225,7 +228,9 @@
         return 0
 
     def parse(self):
-        cdef int byte_count
+        cdef Py_ssize_t byte_count
+        if not PyString_CheckExact(self.bytes):
+            raise AssertionError('self.bytes is not a string.')
         byte_count = PyString_Size(self.bytes)
         self._cur_str = PyString_AsString(self.bytes)
         # This points to the last character in the string
@@ -250,16 +255,49 @@
         string_key  The serialized key for referencing this node
         flattened   A string with the serialized form for the contents
     """
+    cdef Py_ssize_t flat_len
+    cdef Py_ssize_t key_len
+    cdef char * value
+    cdef Py_ssize_t value_len
+    cdef char * s
+
+    # I don't expect that we can do faster than string.join()
+    string_key = '\x00'.join(node[1])
+
     # TODO: instead of using string joins, precompute the final string length,
     #       and then malloc a single string and copy everything in.
-    flattened_references = []
-    if reference_lists:
+
+    # TODO: We probably want to use PySequenceFast, because we have lists and
+    #       tuples, but we aren't sure which we will get.
+
+    # line := string_key NULL flat_refs NULL value LF
+    # string_key := BYTES (NULL BYTES)*
+    # flat_refs := ref_list (TAB ref_list)*
+    # ref_list := ref (CR ref)*
+    # ref := BYTES (NULL BYTES)*
+    # value := BYTES
+    if not reference_lists:
+        # Simple case, we only have the key and the value
+        # So we have the (key NULL NULL value LF)
+        key_len = PyString_Size(string_key)
+        value = PyString_AsString(node[2])
+        value_len = PyString_Size(node[2])
+        flat_len = (key_len + 1 + 1 + value_len + 1)
+        line = PyString_FromStringAndSize(NULL, flat_len)
+        # Get a pointer to the new buffer
+        s = PyString_AsString(line)
+        memcpy(s, PyString_AsString(string_key), key_len)
+        s[key_len] = c'\0'
+        s[key_len + 1] = c'\0'
+        memcpy(s + key_len + 2, value, value_len)
+        s[key_len + 2 + value_len] = c'\n'
+    else:
+        flattened_references = []
         for ref_list in node[3]:
             ref_keys = []
             for reference in ref_list:
                 ref_keys.append('\x00'.join(reference))
             flattened_references.append('\r'.join(ref_keys))
-    string_key = '\x00'.join(node[1])
-    line = ("%s\x00%s\x00%s\n" % (string_key,
-        '\t'.join(flattened_references), node[2]))
+        line = ("%s\x00%s\x00%s\n" % (string_key,
+            '\t'.join(flattened_references), node[2]))
     return string_key, line

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2008-08-21 20:22:06 +0000
+++ b/bzrlib/tests/test_btree_index.py	2008-08-21 20:54:58 +0000
@@ -913,15 +913,35 @@
             ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
             }, node.keys)
 
-    def assertFlattened(self, expected, key, value, refs, reference_lists):
+    def assertFlattened(self, expected, key, value, refs):
         flat_key, flat_line = self.parse_btree._flatten_node(
-            (None, key, value, refs), reference_lists)
+            (None, key, value, refs), bool(refs))
         self.assertEqual('\x00'.join(key), flat_key)
         self.assertEqual(expected, flat_line)
 
-    def test_flatten_node_to_line_no_references(self):
-        self.assertFlattened('key\x00\x00value\n',
-                             ('key',), 'value', [], False)
+    def test__flatten_node(self):
+        self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
+        self.assertFlattened('key\0tuple\0\0value str\n',
+                             ('key', 'tuple'), 'value str', [])
+        self.assertFlattened('key\0tuple\0triple\0\0value str\n',
+                             ('key', 'tuple', 'triple'), 'value str', [])
+        self.assertFlattened('k\0t\0s\0ref\0value str\n',
+                             ('k', 't', 's'), 'value str', [[('ref',)]])
+        self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
+                             ('key', 'tuple'), 'value str', [[('ref', 'key')]])
+        self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
+            ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
+        self.assertFlattened(
+            "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
+            ('00', '11'), 'value:1',
+                ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
+        self.assertFlattened(
+            "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
+            ('11', '33'), 'value:3',
+                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
+        self.assertFlattened(
+            "11\x0044\x00\t11\x00ref00\x00value:4\n",
+            ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
 
 
 class TestCompiledBtree(tests.TestCase):



More information about the bazaar-commits mailing list