Rev 3660: The flatten code now handles the no-ref-list case. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
John Arbash Meinel
john at arbash-meinel.com
Thu Aug 21 21:55:03 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree
------------------------------------------------------------
revno: 3660
revision-id: john at arbash-meinel.com-20080821205458-cjk22p6p5yqfhmv7
parent: john at arbash-meinel.com-20080821202206-nvbybzwmfvagg4jm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Thu 2008-08-21 15:54:58 -0500
message:
The flatten code now handles the no-ref-list case.
modified:
bzrlib/_parse_btree_c.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
-------------- next part --------------
=== modified file 'bzrlib/_parse_btree_c.pyx'
--- a/bzrlib/_parse_btree_c.pyx 2008-08-21 20:22:06 +0000
+++ b/bzrlib/_parse_btree_c.pyx 2008-08-21 20:54:58 +0000
@@ -24,9 +24,12 @@
char *PyString_AsString(object p)
object PyString_FromStringAndSize(char *, Py_ssize_t)
+ int PyString_CheckExact(object s)
Py_ssize_t PyString_Size(object p)
+ int PyTuple_CheckExact(object t)
cdef extern from "string.h":
+ void *memcpy(void *dest, void *src, size_t n)
void *memchr(void *s, int c, size_t n)
# GNU extension
# void *memrchr(void *s, int c, size_t n)
@@ -225,7 +228,9 @@
return 0
def parse(self):
- cdef int byte_count
+ cdef Py_ssize_t byte_count
+ if not PyString_CheckExact(self.bytes):
+ raise AssertionError('self.bytes is not a string.')
byte_count = PyString_Size(self.bytes)
self._cur_str = PyString_AsString(self.bytes)
# This points to the last character in the string
@@ -250,16 +255,49 @@
string_key The serialized key for referencing this node
flattened A string with the serialized form for the contents
"""
+ cdef Py_ssize_t flat_len
+ cdef Py_ssize_t key_len
+ cdef char * value
+ cdef Py_ssize_t value_len
+ cdef char * s
+
+ # I don't expect that we can do faster than string.join()
+ string_key = '\x00'.join(node[1])
+
# TODO: instead of using string joins, precompute the final string length,
# and then malloc a single string and copy everything in.
- flattened_references = []
- if reference_lists:
+
+ # TODO: We probably want to use PySequenceFast, because we have lists and
+ # tuples, but we aren't sure which we will get.
+
+ # line := string_key NULL flat_refs NULL value LF
+ # string_key := BYTES (NULL BYTES)*
+ # flat_refs := ref_list (TAB ref_list)*
+ # ref_list := ref (CR ref)*
+ # ref := BYTES (NULL BYTES)*
+ # value := BYTES
+ if not reference_lists:
+ # Simple case, we only have the key and the value
+ # So we have the (key NULL NULL value LF)
+ key_len = PyString_Size(string_key)
+ value = PyString_AsString(node[2])
+ value_len = PyString_Size(node[2])
+ flat_len = (key_len + 1 + 1 + value_len + 1)
+ line = PyString_FromStringAndSize(NULL, flat_len)
+ # Get a pointer to the new buffer
+ s = PyString_AsString(line)
+ memcpy(s, PyString_AsString(string_key), key_len)
+ s[key_len] = c'\0'
+ s[key_len + 1] = c'\0'
+ memcpy(s + key_len + 2, value, value_len)
+ s[key_len + 2 + value_len] = c'\n'
+ else:
+ flattened_references = []
for ref_list in node[3]:
ref_keys = []
for reference in ref_list:
ref_keys.append('\x00'.join(reference))
flattened_references.append('\r'.join(ref_keys))
- string_key = '\x00'.join(node[1])
- line = ("%s\x00%s\x00%s\n" % (string_key,
- '\t'.join(flattened_references), node[2]))
+ line = ("%s\x00%s\x00%s\n" % (string_key,
+ '\t'.join(flattened_references), node[2]))
return string_key, line
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2008-08-21 20:22:06 +0000
+++ b/bzrlib/tests/test_btree_index.py 2008-08-21 20:54:58 +0000
@@ -913,15 +913,35 @@
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
}, node.keys)
- def assertFlattened(self, expected, key, value, refs, reference_lists):
+ def assertFlattened(self, expected, key, value, refs):
flat_key, flat_line = self.parse_btree._flatten_node(
- (None, key, value, refs), reference_lists)
+ (None, key, value, refs), bool(refs))
self.assertEqual('\x00'.join(key), flat_key)
self.assertEqual(expected, flat_line)
- def test_flatten_node_to_line_no_references(self):
- self.assertFlattened('key\x00\x00value\n',
- ('key',), 'value', [], False)
+ def test__flatten_node(self):
+ self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
+ self.assertFlattened('key\0tuple\0\0value str\n',
+ ('key', 'tuple'), 'value str', [])
+ self.assertFlattened('key\0tuple\0triple\0\0value str\n',
+ ('key', 'tuple', 'triple'), 'value str', [])
+ self.assertFlattened('k\0t\0s\0ref\0value str\n',
+ ('k', 't', 's'), 'value str', [[('ref',)]])
+ self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
+ ('key', 'tuple'), 'value str', [[('ref', 'key')]])
+ self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
+ ('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
+ self.assertFlattened(
+ "00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
+ ('00', '11'), 'value:1',
+ ((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
+ self.assertFlattened(
+ "11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
+ ('11', '33'), 'value:3',
+ ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
+ self.assertFlattened(
+ "11\x0044\x00\t11\x00ref00\x00value:4\n",
+ ('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
class TestCompiledBtree(tests.TestCase):
More information about the bazaar-commits
mailing list