Rev 156: Remove MemObject entirely. in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection
John Arbash Meinel
john at arbash-meinel.com
Tue Dec 29 21:35:15 GMT 2009
At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection
------------------------------------------------------------
revno: 156
revision-id: john at arbash-meinel.com-20091229213451-fqddkg3pmr0e23nn
parent: john at arbash-meinel.com-20091229203715-0lu6cshqg59ac54w
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Tue 2009-12-29 15:34:51 -0600
message:
Remove MemObject entirely.
I'm not 100% satisfied with the new api, but it does make it clear
that the old way is no longer valid.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx 2009-12-29 15:59:46 +0000
+++ b/meliae/_loader.pyx 2009-12-29 21:34:51 +0000
@@ -180,6 +180,40 @@
PyObject *proxy
+cdef _MemObject *_new_mem_object(address, type_str, size, ref_list,
+ value, name, parent_list, total_size) except NULL:
+ cdef _MemObject *new_entry
+ cdef PyObject *addr
+
+ new_entry = <_MemObject *>PyMem_Malloc(sizeof(_MemObject))
+ if new_entry == NULL:
+ raise MemoryError('Failed to allocate %d bytes' % (sizeof(_MemObject),))
+ memset(new_entry, 0, sizeof(_MemObject))
+ addr = <PyObject *>address
+ Py_INCREF(addr)
+ new_entry.address = addr
+ new_entry.type_str = <PyObject *>type_str
+ Py_INCREF(new_entry.type_str)
+ new_entry.size = size
+ new_entry.ref_list = _list_to_ref_list(ref_list)
+ # TODO: Was found wanting and removed
+ # if length is None:
+ # new_entry.length = -1
+ # else:
+ # new_entry.length = length
+ if value is not None and name is not None:
+ raise RuntimeError("We currently only support one of value or name"
+ " per object.")
+ if value is not None:
+ new_entry.value = <PyObject *>value
+ else:
+ new_entry.value = <PyObject *>name
+ Py_INCREF(new_entry.value)
+ new_entry.parent_list = _list_to_ref_list(parent_list)
+ new_entry.total_size = total_size
+ return new_entry
+
+
cdef int _free_mem_object(_MemObject *cur) except -1:
if cur == NULL: # Already cleared
return 0
@@ -209,6 +243,27 @@
cdef class MemObjectCollection
+cdef class _MemObjectProxy
+
+
+def _MemObjectProxy_from_args(address, type_str, size, ref_list=(), length=0,
+ value=None, name=None, parent_list=(),
+ total_size=0):
+ """Create a standalone _MemObjectProxy instance.
+
+ Note that things like '__getitem__' won't work, as they query the
+ collection for the actual data.
+ """
+ cdef _MemObject *new_entry
+ cdef _MemObjectProxy proxy
+
+ new_entry = _new_mem_object(address, type_str, size, ref_list,
+ value, name, parent_list, total_size)
+ proxy = _MemObjectProxy(None)
+ proxy._obj = new_entry
+ proxy._managed_obj = new_entry
+ new_entry.proxy = <PyObject *>proxy
+ return proxy
cdef class _MemObjectProxy:
@@ -420,6 +475,25 @@
self.type_str, self.address, self.size,
refs, parent_str, val, total_size_str)
+ def to_json(self):
+ """Convert this back into json."""
+ refs = []
+ for ref in sorted(self.ref_list):
+ refs.append(str(ref))
+ # Note: We've lost the info about whether this was a value or a name
+ # We've also lost the 'length' field.
+ if self.value is not None:
+ if self.type_str == 'int':
+ value = '"value": %s, ' % self.value
+ else:
+ # TODO: This isn't perfect, as it doesn't do proper json
+ # escaping
+ value = '"value": "%s", ' % self.value
+ else:
+ value = ''
+ return '{"address": %d, "type": "%s", "size": %d, %s"refs": [%s]}' % (
+ self.address, self.type_str, self.size, value, ', '.join(refs))
+
cdef class MemObjectCollection:
"""Track a bunch of _MemObject instances."""
@@ -635,7 +709,6 @@
"""Add a new MemObject to this collection."""
cdef _MemObject **slot, *new_entry
cdef _MemObjectProxy proxy
- cdef PyObject *addr
slot = self._lookup(address)
if slot[0] != NULL and slot[0] != _dummy:
@@ -644,41 +717,13 @@
assert False, "We don't support overwrite yet."
# TODO: These are fairy small and more subject to churn, maybe we
# should be using PyObj_Malloc instead...
- new_entry = <_MemObject *>PyMem_Malloc(sizeof(_MemObject))
- if new_entry == NULL:
- # TODO: as we are running out of memory here, we might want to
- # pre-allocate this object. Since it is likely to take as
- # much mem to create this object as _MemObject
- raise MemoryError('Failed to allocate %d bytes'
- % (sizeof(_MemObject),))
- memset(new_entry, 0, sizeof(_MemObject))
- addr = <PyObject *>address
+ new_entry = _new_mem_object(address, type_str, size, ref_list,
+ value, name, parent_list, total_size)
+
if slot[0] == NULL:
self._filled += 1
self._active += 1
slot[0] = new_entry
- Py_INCREF(addr)
- new_entry.address = addr
- new_entry.type_str = <PyObject *>type_str
- Py_INCREF(new_entry.type_str)
- new_entry.size = size
- new_entry.ref_list = _list_to_ref_list(ref_list)
- # TODO: Scheduled for removal
- # if length is None:
- # new_entry.length = -1
- # else:
- # new_entry.length = length
- if value is not None and name is not None:
- raise RuntimeError("We currently only support one of value or name"
- " per object.")
- if value is not None:
- new_entry.value = <PyObject *>value
- else:
- new_entry.value = <PyObject *>name
- Py_INCREF(new_entry.value)
- new_entry.parent_list = _list_to_ref_list(parent_list)
- new_entry.total_size = total_size
-
if self._filled * 3 > (self._table_mask + 1) * 2:
# We need to grow
self._resize(self._active * 2)
@@ -807,203 +852,3 @@
' %d, %d %d' % (<int>cur, self.table_pos,
self.collection._table_mask))
return self.collection._proxy_for(<object>cur.address, cur)
-
-
-cdef class MemObject:
- """This defines the information we know about the objects.
-
- We use a Pyrex class, since in python each object is 40 bytes, but you also
- have to include the size of all the objects referenced. (a 4-byte integer,
- becomes a 12-byte PyInt.)
-
- :ivar address: The address in memory of the original object. This is used
- as the 'handle' to this object.
- :ivar type_str: The type of this object
- :ivar size: The number of bytes consumed for just this object. So for a
- dict, this would be the basic_size + the size of the allocated array to
- store the reference pointers
- :ivar ref_list: A list of items referenced from this object
- :ivar num_refs: Count of references
- :ivar value: A PyObject representing the Value for this object. (For
- strings, it is the first 100 bytes, it may be None if we have no value,
- or it may be an integer, etc.)
- :ivar name: Some objects have associated names, like modules, classes, etc.
- """
-
- cdef readonly object address # We track the address by pointing to a PyInt
- # This is valid, because we put these objects
- # into a dict anyway, so we need a PyInt
- # And we can just share it
- cdef readonly object type_str # pointer to a PyString, this is expected to
- # be shared with many other instances, but
- # longer than 4 bytes
- cdef public long size # Number of bytes consumed by this instance
- # TODO: Right now this points to the integer offset, which we then look up
- # in the OM dict. However, if we are going to go with PyObject *, why
- # not just point to the final object anyway...
- cdef RefList *_ref_list # An array of addresses that this object
- # referenced. May be NULL if len() == 0
- # TODO: Scheduled for removal
- cdef readonly int length # Object length (ob_size), aka len(object)
- cdef public object value # May be None, a PyString or a PyInt
- cdef readonly object name # Name of this object (only valid for
- # modules, etc)
- cdef RefList *_referrer_list # An array of addresses that refer to this,
-
- cdef public unsigned long total_size # Size of everything referenced from
- # this object
-
- def __init__(self, address, type_str, size, ref_list, length=None,
- value=None, name=None):
- self.address = address
- self.type_str = type_str
- self.size = size
- self._ref_list = _list_to_ref_list(ref_list)
- if length is None:
- self.length = -1
- else:
- self.length = length
- self.value = value
- self.name = name
- self._referrer_list = NULL
- self.total_size = 0 # uncomputed yet
-
- property ref_list:
- """The list of objects referenced by this object."""
- def __get__(self):
- return _ref_list_to_list(self._ref_list)
-
- def __set__(self, value):
- _free_ref_list(self._ref_list)
- self._ref_list = _list_to_ref_list(value)
-
- property num_refs:
- """The length of the ref_list."""
- def __get__(self):
- if self._ref_list == NULL:
- return 0
- return self._ref_list.size
-
- def __len__(self):
- if self._ref_list == NULL:
- return 0
- return self._ref_list.size
-
- property referrers:
- """The list of objects that reference this object.
-
- Original set to None, can be computed on demand.
- """
- def __get__(self):
- return _ref_list_to_list(self._referrer_list)
-
- def __set__(self, value):
- _free_ref_list(self._referrer_list)
- self._referrer_list = _list_to_ref_list(value)
-
- property num_referrers:
- """The length of the referrers list."""
- def __get__(self):
- if self._referrer_list == NULL:
- return 0
- return self._referrer_list.size
-
- def __dealloc__(self):
- cdef long i
- _free_ref_list(self._ref_list)
- self._ref_list = NULL
- _free_ref_list(self._referrer_list)
- self._referrer_list = NULL
-
- def __repr__(self):
- cdef int i, max_refs
- cdef double total_size
- if self.name is not None:
- name_str = ', %s' % (self.name,)
- else:
- name_str = ''
- if self._ref_list == NULL:
- num_refs = 0
- ref_space = ''
- ref_str = ''
- else:
- num_refs = self._ref_list.size
- ref_str = _format_list(self._ref_list)
- ref_space = ' '
- if self._referrer_list == NULL:
- referrer_str = ''
- else:
- referrer_str = ', %d referrers %s' % (
- self._referrer_list.size,
- _format_list(self._referrer_list))
- if self.value is None:
- value_str = ''
- else:
- r = repr(self.value)
- if isinstance(self.value, basestring):
- if len(r) > 21:
- r = r[:18] + "..."
- value_str = ', %s' % (r,)
- if self.total_size == 0:
- total_size_str = ''
- else:
- total_size = self.total_size
- order = 'B'
- if total_size > 800.0:
- total_size = total_size / 1024
- order = 'KiB'
- if total_size > 800.0:
- total_size = total_size / 1024
- order = 'MiB'
- if total_size > 800.0:
- total_size = total_size / 1024
- order = 'GiB'
- total_size_str = ', %.1f%s' % (total_size, order)
-
-
- return ('%s(%d, %s%s, %d bytes, %d refs%s%s%s%s%s)'
- % (self.__class__.__name__, self.address, self.type_str,
- name_str, self.size, num_refs, ref_space, ref_str,
- referrer_str, value_str, total_size_str))
-
- def __getitem__(self, offset):
- cdef long off
- cdef PyObject *res
-
- if self._ref_list == NULL:
- raise IndexError('%s has no refs' % (self,))
- off = offset
- if off >= self._ref_list.size:
- raise IndexError('%s has only %d refs'
- % (self, self._ref_list.size))
- res = self._ref_list.refs[off]
- return <object>res
-
- def _intern_from_cache(self, cache):
- self.address = _set_default(cache, self.address)
- self.type_str = _set_default(cache, self.type_str)
-
- def to_json(self):
- """Convert this MemObject to json."""
- refs = []
- for ref in sorted(self.ref_list):
- refs.append(str(ref))
- if self.length != -1:
- length = '"len": %d, ' % self.length
- else:
- length = ''
- if self.value is not None:
- if self.type_str == 'int':
- value = '"value": %s, ' % self.value
- else:
- value = '"value": "%s", ' % self.value
- else:
- value = ''
- if self.name:
- name = '"name": "%s", ' % self.name
- else:
- name = ''
- return '{"address": %d, "type": "%s", "size": %d, %s%s%s"refs": [%s]}' % (
- self.address, self.type_str, self.size, name, length, value,
- ', '.join(refs))
-
=== modified file 'meliae/loader.py'
--- a/meliae/loader.py 2009-12-29 20:37:15 +0000
+++ b/meliae/loader.py 2009-12-29 21:34:51 +0000
@@ -17,6 +17,7 @@
Currently requires simplejson to parse.
"""
+import gc
import math
import os
import re
@@ -209,6 +210,7 @@
get_refs = referrers.get
total = len(self.objs)
tlast = timer()-20
+ gc.disable()
for idx, obj in enumerate(self.objs.itervalues()):
if self.show_progress and idx & 0x3f == 0:
tnow = timer()
@@ -267,6 +269,7 @@
obj.referrers = (refs,)
else:
obj.referrers = refs
+ gc.enable()
if self.show_progress:
sys.stderr.write('set referrers %8d / %8d \n'
% (idx, total))
@@ -382,15 +385,19 @@
# and reference a 'classobj' with the actual type name
collapsed = 0
total = len(self.objs)
+ tlast = timer()-20
for item_idx, (address, obj) in enumerate(self.objs.items()):
if obj.type_str in ('str', 'dict', 'tuple', 'list', 'type',
'function', 'wrapper_descriptor',
'code', 'classobj', 'int',
'weakref'):
continue
- if self.show_progress and item_idx & 0x5ff:
- sys.stderr.write('checked %8d / %8d collapsed %8d \r'
- % (item_idx, total, collapsed))
+ if self.show_progress and item_idx & 0x3f:
+ tnow = timer()
+ if tnow - tlast > 0.1:
+ tlast = tnow
+ sys.stderr.write('checked %8d / %8d collapsed %8d \r'
+ % (item_idx, total, collapsed))
if obj.type_str == 'module' and len(obj) == 1:
(dict_ref,) = obj.ref_list
extra_refs = []
@@ -418,14 +425,14 @@
# the dict from the collection
del self.objs[dict_ref]
if self.show_progress:
- sys.stderr.write('checked %8d / %8d collapsed %8d \r'
+ sys.stderr.write('checked %8d / %8d collapsed %8d \n'
% (item_idx, total, collapsed))
if collapsed:
self.compute_referrers()
def refs_as_dict(self, obj):
"""Expand the ref list considering it to be a 'dict' structure.
-
+
Often we have dicts that point to simple strings and ints, etc. This
tries to expand that as much as possible.
@@ -514,8 +521,8 @@
:param objs: Either None or a dict containing objects by address. If not
None, then duplicate objects will not be parsed or output.
:param factory: Use this to create new instances, if None, use
- _loader.MemObject
- :return: A generator of MemObjects.
+ _loader._MemObjectProxy.from_args
+ :return: A generator of memory objects.
"""
# TODO: cStringIO?
tstart = timer()
@@ -532,7 +539,7 @@
else:
decoder = _from_line
if factory is None:
- factory = _loader.MemObject
+ factory = _loader._MemObjectProxy_from_args
for line_num, line in enumerate(source):
bytes_read += len(line)
if line in ("[\n", "]\n"):
@@ -619,7 +626,7 @@
# Second pass, any object which refers to something in noref_objs will
# have that reference removed, and replaced with the null_memobj
num_expensive = len(noref_objs)
- null_memobj = _loader.MemObject(0, '<ex-reference>', 0, [])
+ null_memobj = _loader._MemObjectProxy_from_args(0, '<ex-reference>', 0, [])
if not seen_zero:
yield (True, null_memobj)
if show_progress and total_objs == 0:
=== modified file 'meliae/tests/test__loader.py'
--- a/meliae/tests/test__loader.py 2009-12-29 15:40:23 +0000
+++ b/meliae/tests/test__loader.py 2009-12-29 21:34:51 +0000
@@ -20,100 +20,6 @@
)
-class TestMemObject(tests.TestCase):
-
- def test_test_simple_attributes(self):
- mem = _loader.MemObject(1234, 'int', 12, [])
- self.assertEqual(1234, mem.address)
- # Make sure we don't cast into PyLong
- self.assertTrue(isinstance(mem.address, int))
- self.assertEqual('int', mem.type_str)
- self.assertEqual(12, mem.size)
- self.assertTrue(isinstance(mem.size, int))
- self.assertEqual((), mem.ref_list)
- self.assertEqual(0, mem.total_size)
-
- def test_ref_list(self):
- mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
- self.assertEqual([4567, 8901], mem.ref_list)
- mem.ref_list = [999, 4567, 0]
- self.assertEqual([999, 4567, 0], mem.ref_list)
- self.assertEqual(3, mem.num_refs)
-
- def test_num_refs(self):
- mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
- self.assertEqual(2, mem.num_refs)
- mem = _loader.MemObject(1234, 'tuple', 12, [])
- self.assertEqual(0, mem.num_refs)
-
- def test__getitem__(self):
- mem = _loader.MemObject(1234, 'tuple', 12, [])
- def get(offset):
- return mem[offset]
- self.assertRaises(IndexError, get, 0)
- self.assertRaises(IndexError, get, 1)
- self.assertRaises(IndexError, get, -1)
- mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
- self.assertEqual(4567, mem[0])
- self.assertEqual(8901, mem[1])
-
- def test_num_referrers(self):
- mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
- mem.referrers = ()
- self.assertEqual(0, mem.num_referrers)
- self.assertEqual((), mem.referrers)
- mem.referrers = [1, 2, 3]
- self.assertEqual(3, mem.num_referrers)
- self.assertEqual([1, 2, 3], mem.referrers)
-
- def test_total_size(self):
- mem = _loader.MemObject(1234, 'tuple', 20, [4567, 8901])
- self.assertEqual(0, mem.total_size)
- mem.total_size = 100
- self.assertEqual(100, mem.total_size)
-
- def test__repr__(self):
- mem = _loader.MemObject(1234, 'str', 24, [])
- self.assertEqual('MemObject(1234, str, 24 bytes'
- ', 0 refs)', repr(mem))
- mem = _loader.MemObject(1234, 'tuple', 12, [4567, 8900])
- self.assertEqual('MemObject(1234, tuple, 12 bytes'
- ', 2 refs [4567, 8900])', repr(mem))
- mem = _loader.MemObject(1234, 'module', 12, [4567, 8900],
- name='named')
- self.assertEqual('MemObject(1234, module, named, 12 bytes'
- ', 2 refs [4567, 8900])', repr(mem))
- mem = _loader.MemObject(1234, 'module', 12, range(20))
- self.assertEqual('MemObject(1234, module, 12 bytes'
- ', 20 refs [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ...])',
- repr(mem))
- mem = _loader.MemObject(1234, 'foo', 12, [10])
- mem.referrers = [20, 30]
- self.assertEqual('MemObject(1234, foo, 12 bytes'
- ', 1 refs [10], 2 referrers [20, 30])',
- repr(mem))
- mem = _loader.MemObject(1234, 'str', 24, [])
- mem.value = 'teststr'
- self.assertEqual('MemObject(1234, str, 24 bytes'
- ', 0 refs, \'teststr\')', repr(mem))
- mem.value = 'averylongstringwithmorestuff'
- self.assertEqual('MemObject(1234, str, 24 bytes'
- ', 0 refs, \'averylongstringwi...)', repr(mem))
- mem = _loader.MemObject(1234, 'int', 12, [])
- mem.value = 12345
- self.assertEqual('MemObject(1234, int, 12 bytes'
- ', 0 refs, 12345)', repr(mem))
- mem.total_size = 12
- self.assertEqual('MemObject(1234, int, 12 bytes'
- ', 0 refs, 12345, 12.0B)', repr(mem))
- mem.total_size = 1024
- self.assertEqual('MemObject(1234, int, 12 bytes'
- ', 0 refs, 12345, 1.0KiB)', repr(mem))
- mem.total_size = int(1024*1024*10.5)
- self.assertEqual('MemObject(1234, int, 12 bytes'
- ', 0 refs, 12345, 10.5MiB)', repr(mem))
-
-
class TestMemObjectCollection(tests.TestCase):
def test__init__(self):
=== modified file 'meliae/tests/test_loader.py'
--- a/meliae/tests/test_loader.py 2009-12-29 20:37:15 +0000
+++ b/meliae/tests/test_loader.py 2009-12-29 21:34:51 +0000
@@ -166,9 +166,19 @@
class TestMemObj(tests.TestCase):
def test_to_json(self):
- objs = list(loader.iter_objs(_example_dump))
+ manager = loader.load(_example_dump, show_prog=False)
+ objs = manager.objs.values()
objs.sort(key=lambda x:x.address)
- expected = sorted(_example_dump)
+ expected = [
+'{"address": 1, "type": "tuple", "size": 20, "refs": [2, 3]}',
+'{"address": 2, "type": "dict", "size": 124, "refs": [4, 5, 6, 7]}',
+'{"address": 3, "type": "list", "size": 44, "refs": [3, 4, 5]}',
+'{"address": 4, "type": "int", "size": 12, "value": 2, "refs": []}',
+'{"address": 5, "type": "int", "size": 12, "value": 1, "refs": []}',
+'{"address": 6, "type": "str", "size": 29, "value": "a str", "refs": []}',
+'{"address": 7, "type": "tuple", "size": 20, "refs": [4, 5]}',
+'{"address": 8, "type": "module", "size": 60, "value": "mymod", "refs": [2]}',
+ ]
self.assertEqual(expected, [obj.to_json() for obj in objs])
More information about the bazaar-commits
mailing list