Rev 4505: Start using the _LeafLineEntries directly in http://bazaar.launchpad.net/~jameinel/bzr/1.17-btree-faster

John Arbash Meinel john at arbash-meinel.com
Wed Jul 1 23:13:04 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.17-btree-faster

------------------------------------------------------------
revno: 4505
revision-id: john at arbash-meinel.com-20090701221254-5d901cn8py29zdes
parent: john at arbash-meinel.com-20090701215405-vu7kdewhxj51lh75
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-btree-faster
timestamp: Wed 2009-07-01 17:12:54 -0500
message:
  Start using the _LeafLineEntries directly
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-07-01 21:54:05 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-07-01 22:12:54 +0000
@@ -49,6 +49,8 @@
     void Py_INCREF(object)
 
     PyObject *PyDict_GetItem(object, object)
+    int PyDict_SetItem(object d, object k, object v) except -1
+
     object PyTuple_New(Py_ssize_t n)
     void PyTuple_SET_ITEM(object, Py_ssize_t offset, object)
 
@@ -311,6 +313,71 @@
         self.data_start = NULL
         self.data_end = NULL
 
+    cdef _ensure_value(self):
+        cdef char *temp_ptr
+
+        if self.value is not None:
+            return
+        temp_ptr = <char*>_my_memrchr(self.data_start, c'\0',
+                                      self.data_end - self.data_start)
+        if temp_ptr == NULL:
+            # Invalid line
+            raise ValueError('Could not find the value section for %s'
+                             % (self.key,))
+        # capture the value string
+        self.value = safe_string_from_size(temp_ptr + 1,
+                                           self.data_end - temp_ptr - 1)
+        # shrink the references end point
+        self.data_end = temp_ptr
+
+    cdef _ensure_refs(self, int key_length, int ref_list_length):
+        cdef int i
+        cdef char *temp_ptr, *ref_ptr, *start, *next_start, *last
+
+        if self.refs is not None:
+            return
+        # refs come before value, so make sure data_end has been updated
+        self._ensure_value()
+        if not ref_list_length:
+            self.refs = ()
+            return
+        ref_lists = PyTuple_New(ref_list_length)
+        last = self.data_end
+        start = self.data_start
+        for i from 0 <= i < ref_list_length:
+            # extract a reference list
+            if last < start:
+                raise ValueError('failed to find proper reference lists')
+            # find the next reference list end point:
+            temp_ptr = <char*>memchr(start, c'\t', last - start)
+            if temp_ptr == NULL:
+                # Only valid for the last list
+                if i + 1 != ref_list_length:
+                    raise AssertionError("invalid key")
+                else:
+                    # scan to the end of the ref list area
+                    ref_ptr = last
+                    next_start = last
+            else:
+                # scan to the end of this ref list
+                ref_ptr = temp_ptr
+                next_start = temp_ptr + 1
+            # Now, there may be multiple keys in the ref list.
+            ref_list = []
+            while start < ref_ptr:
+                # loop finding keys and extracting them
+                temp_ptr = <char*>memchr(start, c'\r', ref_ptr - start)
+                if temp_ptr == NULL:
+                    # key runs to the end
+                    temp_ptr = ref_ptr
+                key = _extract_key(&start, temp_ptr, key_length)
+                PyList_Append(ref_list, key)
+            ref_list = tuple(ref_list)
+            Py_INCREF(ref_list)
+            PyTuple_SET_ITEM(ref_lists, i, ref_list)
+            start = next_start
+        self.refs = ref_lists
+
 
 
 cdef class _LeafNode:
@@ -356,14 +423,20 @@
                 next_line = _LeafLineEntry(key)
                 next_line.data_start = start
                 next_line.data_end = c_bytes + pos
-                self._lines[key] = next_line
+                PyDict_SetItem(self._lines, key, next_line)
                 start = c_bytes + pos + 1
         # Do we check that the very last byte is '\n'?
 
     cdef _populate_keys(self):
         cdef _LeafLineEntry entry
-        keys = dict(_parse_leaf_lines(self._bytes, self._key_length,
-                                      self._ref_list_length))
+        keys = {}
+        for entry in self._lines.itervalues():
+            entry._ensure_refs(self._key_length, self._ref_list_length)
+            PyDict_SetItem(keys, entry.key, (entry.value, entry.refs))
+        # keys = dict(_parse_leaf_lines(self._bytes, self._key_length,
+        #                               self._ref_list_length))
+        # if keys1 != keys:
+        #     raise ValueError('%s != %s')
         self._keys = keys
 
     property keys:
@@ -377,10 +450,20 @@
         # return len(self._keys)
 
     def get(self, key):
-        if self._keys is None:
-            self._populate_keys()
-        # PyDict_GetItem...
-        return self._keys.get(key, None)
+        cdef _LeafLineEntry entry
+        cdef PyObject *maybe
+        if self._keys is not None:
+            maybe = PyDict_GetItem(self._keys, key)
+            if maybe == NULL:
+                return None
+            return <object> maybe
+        else:
+            maybe = PyDict_GetItem(self._lines, key)
+            if maybe == NULL:
+                return None
+            entry = <_LeafLineEntry>maybe
+            entry._ensure_refs(self._key_length, self._ref_list_length)
+            return (entry.value, entry.refs)
 
 
 def _flatten_node(node, reference_lists):



More information about the bazaar-commits mailing list