Rev 124: Bunch more work on the custom MOC implementation. in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

John Arbash Meinel john at arbash-meinel.com
Thu Dec 24 05:21:33 GMT 2009


At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection

------------------------------------------------------------
revno: 124
revision-id: john at arbash-meinel.com-20091224052116-q568ek1c9qgthyx2
parent: john at arbash-meinel.com-20091224033431-vbjptb3wsptqp49v
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Wed 2009-12-23 23:21:16 -0600
message:
  Bunch more work on the custom MOC implementation.
  
  Still a ways from being 'ready', but getting there.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx	2009-12-24 03:34:31 +0000
+++ b/meliae/_loader.pyx	2009-12-24 05:21:16 +0000
@@ -1,14 +1,14 @@
 # Copyright (C) 2009 Canonical Ltd
-# 
+#
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 3 as
 # published by the Free Software Foundation.
-# 
+#
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
@@ -33,6 +33,8 @@
     int PyObject_RichCompareBool(PyObject *, PyObject *, int) except -1
     int Py_EQ
     void memset(void *, int, size_t)
+    # void fprintf(void *, char *, ...)
+    # void *stderr
 
 
 ctypedef struct RefList:
@@ -47,7 +49,7 @@
     may be slightly worse because it does 2 dict lookups?)
     """
     cdef PyObject *tmp
-    
+
     tmp = PyDict_GetItem(d, val)
     if tmp == NULL:
         PyDict_SetItem(d, val, val)
@@ -137,8 +139,93 @@
     unsigned long total_size
 
 
-cdef _MemObject *_dummy
-_dummy = <_MemObject*>(-1)
+cdef PyObject *_dummy
+dummy = object()
+_dummy = <PyObject *>dummy
+
+
+cdef class MemObjectCollection
+
+
+cdef class _MemObjectProxy:
+    """This class proxies between a real Python object and MOC's data.
+
+    MOC stores the data as a fairly efficient table, without the overhead of
+    having a regular python object for every data point. However, the rest of
+    python code needs to interact with a real python object, so we generate
+    these on-the-fly.
+    """
+
+    # Note that it is possible for the table to be mutated while this object is
+    # live, which is why we don't hold a direct pointer to the _MemObject, and
+    # instead go through a table lookup for all attributes.  An alternative
+    # option would be to hold a copy-by-value, and only go back to the original
+    # object in __set__ functions.
+    # Alternatively, we could keep track of all proxy objects returned, and
+    # point them to the new locations when things move.
+    # We may want to keep track of them anyway, so that we don't create 20
+    # proxy objects for the same address. TBD
+
+    cdef MemObjectCollection collection
+    cdef public object address
+
+    def __init__(self, address, collection):
+        self.address = address
+        self.collection = collection
+
+    cdef _MemObject *_get_obj(self) except NULL:
+        cdef _MemObject *slot
+
+        slot = self.collection._lookup(self.address)
+        if slot.address == NULL or slot.address == _dummy:
+            raise RuntimeError('we failed to lookup the obj at address %d'
+                               % (self.address,))
+        return slot
+
+    property type_str:
+        """The type of this object."""
+        def __get__(self):
+            cdef _MemObject *slot
+
+            slot = self._get_obj()
+            return <object>(slot.type_str)
+
+    property size:
+        """The number of bytes allocated for this object."""
+        def __get__(self):
+            cdef _MemObject *slot
+
+            slot = self._get_obj()
+            return slot.size
+
+        def __set__(self, value):
+            cdef _MemObject *slot
+
+            slot = self._get_obj()
+            slot.size = value
+
+    def __len__(self):
+        cdef _MemObject *slot
+
+        slot = self._get_obj()
+        if slot.ref_list == NULL:
+            return 0
+        return slot.ref_list.size
+
+    # def __getitem__(self, offset):
+    #     cdef _MemObject *slot
+    #     cdef PyObject *item_addr
+    #     cdef long off
+
+    #     slot = self._get_obj()
+    #     if slot.ref_list == NULL:
+    #         raise IndexError('%s has no references' % (self,))
+    #     off = offset
+    #     if off >= slot.ref_list.size:
+    #         raise IndexError('%s has only %d (not %d) references'
+    #                          % (self, slot.ref_list.size, offset))
+    #     item_addr = slot.ref_list.refs[off]
+    #     return self.collection[<object>item_addr]
 
 
 cdef class MemObjectCollection:
@@ -154,13 +241,15 @@
         self._table = <_MemObject*>PyMem_Malloc(sizeof(_MemObject)*1024)
         memset(self._table, 0, sizeof(_MemObject)*1024)
 
-    cdef _MemObject* _lookup(self, PyObject *address) except NULL:
+    cdef _MemObject* _lookup(self, address) except NULL:
         cdef long the_hash
         cdef size_t i, n_lookup
         cdef long mask
         cdef _MemObject *table, *slot, *free_slot
+        cdef PyObject *py_addr
 
-        the_hash = PyObject_Hash(address)
+        py_addr = <PyObject *>address
+        the_hash = PyObject_Hash(py_addr)
         i = <size_t>the_hash
         mask = self._table_mask
         table = self._table
@@ -174,53 +263,176 @@
                     return free_slot
                 else:
                     return slot
-            if slot.address == address:
+            if slot.address == py_addr:
                 # Found an exact pointer to the key
                 return slot
-            if slot == _dummy:
+            if slot.address == _dummy:
                 if free_slot == NULL:
                     free_slot = slot
-            elif PyObject_RichCompareBool(slot.address, address, Py_EQ):
+            elif PyObject_RichCompareBool(slot.address, py_addr, Py_EQ):
                 # Both py_key and cur belong in this slot, return it
                 return slot
             i = i + 1 + n_lookup
-        raise AssertionError('should never get here')
+        raise AssertionError('we failed to find an open slot')
+
+    cdef _clear_slot(self, int offset):
+        cdef _MemObject *cur
+
+        cur = self._table + offset
+        if cur.address == NULL: # Already cleared
+            return
+        Py_XDECREF(cur.address)
+        Py_XDECREF(cur.type_str)
+        cur.type_str = NULL
+        _free_ref_list(cur.ref_list)
+        cur.ref_list = NULL
+        Py_XDECREF(cur.value)
+        cur.value = NULL
+        Py_XDECREF(cur.name)
+        cur.name = NULL
+        _free_ref_list(cur.referrer_list)
+        cur.referrer_list = NULL
 
     def _test_lookup(self, address):
-        cdef _MemObject *slot 
+        cdef _MemObject *slot
 
-        slot = self._lookup(<PyObject *>address)
+        slot = self._lookup(address)
         return (slot - self._table)
 
-    # def __contains__(self, address):
-    #     pass
-
-    # def __getitem__(self, address):
-    #     pass
+    def __contains__(self, address):
+        cdef _MemObject *slot
+
+        slot = self._lookup(address)
+        if slot.address == NULL or slot.address == _dummy:
+            return False
+        return True
+
+    def __getitem__(self, address):
+        cdef _MemObject *slot
+
+        slot = self._lookup(address)
+        if slot.address == NULL or slot.address == _dummy:
+            raise KeyError('address %s not present' % (address,))
+        return _MemObjectProxy(address, self)
 
     # def __delitem__(self, address):
     #     pass
 
-    # def __setitem__(self, address, value):
-    #     pass
+    #def __setitem__(self, address, value):
+    #    """moc[address] = value"""
+    #    pass
+
+    cdef int _insert_clean(self, _MemObject *entry) except -1:
+        """Copy _MemObject into the table.
+
+        We know that this _MemObject is unique, and we know that self._table
+        contains no _dummy entries. So we can do the lookup cheaply, without
+        any equality checks, etc.
+        """
+        cdef long the_hash
+        cdef size_t i, n_lookup, mask
+        cdef _MemObject *slot, *table
+
+        assert entry != NULL and entry.address != NULL
+        mask = <size_t>self._table_mask
+        the_hash = <size_t>PyObject_Hash(entry.address)
+        o = int(<object>entry.address)
+        for n_lookup from 0 <= n_lookup < mask:
+            slot = &self._table[i & mask]
+            if slot.address == NULL:
+                slot[0] = entry[0]
+                self._filled += 1
+                self._active += 1
+                return 1
+            i = i + 1 + n_lookup
+        raise RuntimeError('could not find a free slot after %d lookups'
+                           % (n_lookup,))
+
+    cdef int _resize(self, int min_active) except -1:
+        """Resize the internal table.
+
+        We will be big enough to hold at least 'min_active' entries. We will
+        create a copy of all data, leaving out dummy entries.
+
+        :return: The new table size.
+        """
+        cdef int new_size, remaining
+        cdef size_t n_bytes
+        cdef _MemObject *old_table, *old_slot, *new_table
+
+        new_size = 1024
+        while new_size <= min_active and new_size > 0:
+            new_size <<= 1
+        if new_size <= 0:
+            raise MemoryError('table size too large for %d entries'
+                              % (min_active,))
+        n_bytes = sizeof(_MemObject)*new_size
+        new_table = <_MemObject*>PyMem_Malloc(n_bytes)
+        if new_table == NULL:
+            raise MemoryError('Failed to allocate %d bytes' % (n_bytes,))
+        memset(new_table, 0, n_bytes)
+        old_slot = old_table = self._table
+        self._table = new_table
+        self._table_mask = new_size - 1
+        remaining = self._active
+        self._filled = 0
+        self._active = 0
+        # fprintf(stderr, "malloced %d %d @%x\n", new_size, n_bytes, new_table)
+
+        while remaining > 0:
+            if old_slot.address == NULL:
+                pass # empty
+            elif old_slot.address == _dummy:
+                pass # dummy
+            else:
+                remaining -= 1
+                self._insert_clean(old_slot)
+            old_slot += 1
+        # Moving everything over is refcount neutral, so we just free the old
+        # table
+        PyMem_Free(old_table)
+        return new_size
+
+
+    def add(self, address, type_str, size, ref_list=(), length=0,
+            value=None, name=None, referrer_list=(), total_size=0):
+        """Add a new MemObject to this collection."""
+        cdef _MemObject *slot
+        cdef PyObject *addr
+
+        slot = self._lookup(address)
+        if slot.address != NULL and slot.address != _dummy:
+            # We are overwriting an existing entry, for now, fail
+            # Probably all we have to do is clear the slot first, then continue
+            assert False, "We don't support overwrite yet."
+        addr = <PyObject *>address
+        if slot.address == NULL:
+            self._filled += 1
+        self._active += 1
+        Py_INCREF(addr)
+        slot.address = addr
+        slot.type_str = <PyObject *>type_str
+        Py_INCREF(slot.type_str)
+        slot.size = size
+        slot.ref_list = _list_to_ref_list(ref_list)
+        slot.length = length
+        slot.value = <PyObject *>value
+        Py_INCREF(slot.value)
+        slot.name = <PyObject *>name
+        Py_INCREF(slot.name)
+        slot.referrer_list = _list_to_ref_list(referrer_list)
+        slot.total_size = total_size
+
+        if self._filled * 3 > (self._table_mask + 1) * 2:
+            # We need to grow
+            # fprintf(stderr, "resizing to %d\n", self._active * 2)
+            self._resize(self._active * 2)
 
     def __dealloc__(self):
         cdef long i
-        cdef _MemObject *cur
 
         for i from 0 <= i < self._table_mask:
-            cur = self._table + i
-            if cur.address != NULL:
-                Py_XDECREF(cur.type_str)
-                cur.type_str = NULL
-                _free_ref_list(cur.ref_list)
-                cur.ref_list = NULL
-                Py_XDECREF(cur.value)
-                cur.value = NULL
-                Py_XDECREF(cur.name)
-                cur.name = NULL
-                _free_ref_list(cur.referrer_list)
-                cur.referrer_list = NULL
+            self._clear_slot(i)
         PyMem_Free(self._table)
         self._table = NULL
 
@@ -369,7 +581,7 @@
                 total_size = total_size / 1024
                 order = 'GiB'
             total_size_str = ', %.1f%s' % (total_size, order)
-            
+
 
         return ('%s(%d, %s%s, %d bytes, %d refs%s%s%s%s%s)'
                 % (self.__class__.__name__, self.address, self.type_str,
@@ -416,3 +628,4 @@
         return '{"address": %d, "type": "%s", "size": %d, %s%s%s"refs": [%s]}' % (
             self.address, self.type_str, self.size, name, length, value,
             ', '.join(refs))
+

=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py	2009-12-24 03:34:31 +0000
+++ b/meliae/tests/test__loader.py	2009-12-24 05:21:16 +0000
@@ -132,3 +132,52 @@
         self.assertEqual(933, moc._test_lookup(933+1024))
         self.assertEqual(933, moc._test_lookup(933L+1024L))
         self.assertEqual(933, moc._test_lookup(933L+2**32-1))
+
+    def test__lookup_collide(self):
+        moc = _loader.MemObjectCollection()
+        self.assertEqual(1023, moc._table_mask)
+        self.assertEqual(0, moc._test_lookup(0))
+        self.assertEqual(0, moc._test_lookup(1024))
+        moc.add(0, 'foo', 100)
+        self.assertEqual(0, moc._test_lookup(0))
+        self.assertEqual(1, moc._test_lookup(1024))
+        moc.add(1024, 'bar', 200)
+        self.assertEqual(0, moc._test_lookup(0))
+        self.assertEqual(1, moc._test_lookup(1024))
+
+    def test__contains__(self):
+        moc = _loader.MemObjectCollection()
+        self.assertEqual(0, moc._test_lookup(0))
+        self.assertEqual(0, moc._test_lookup(1024))
+        self.assertFalse(0 in moc)
+        self.assertFalse(1024 in moc)
+        moc.add(0, 'foo', 100)
+        self.assertTrue(0 in moc)
+        self.assertFalse(1024 in moc)
+        moc.add(1024, 'bar', 200)
+        self.assertTrue(0 in moc)
+        self.assertTrue(1024 in moc)
+
+    def test__getitem__(self):
+        moc = _loader.MemObjectCollection()
+        def get(offset):
+            return moc[offset]
+        self.assertRaises(KeyError, get, 0)
+        self.assertRaises(KeyError, get, 1024)
+        moc.add(0, 'foo', 100)
+        mop = moc[0]
+        self.assertTrue(isinstance(mop, _loader._MemObjectProxy))
+        self.assertEqual('foo', mop.type_str)
+        self.assertEqual(100, mop.size)
+        self.assertRaises(KeyError, get, 1024)
+
+    def test_add_until_resize(self):
+        moc = _loader.MemObjectCollection()
+        for i in xrange(1025):
+            moc.add(i, 'foo', 100+i)
+        self.assertEqual(1025, moc._filled)
+        self.assertEqual(1025, moc._active)
+        self.assertEqual(2047, moc._table_mask)
+        mop = moc[1024]
+        self.assertEqual(1024, mop.address)
+        self.assertEqual(1124, mop.size)



More information about the bazaar-commits mailing list