Rev 154: Speed up collection.items() tremendously by disabling gc. in http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection
John Arbash Meinel
john at arbash-meinel.com
Tue Dec 29 16:00:08 GMT 2009
At http://bazaar.launchpad.net/~jameinel/meliae/mem-object-collection
------------------------------------------------------------
revno: 154
revision-id: john at arbash-meinel.com-20091229155946-18hvabfyjvs39m59
parent: john at arbash-meinel.com-20091229154532-4drym9jwwkpy598i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: mem-object-collection
timestamp: Tue 2009-12-29 09:59:46 -0600
message:
Speed up collection.items() tremendously by disabling gc.
-------------- next part --------------
=== modified file 'meliae/_loader.pyx'
--- a/meliae/_loader.pyx 2009-12-29 15:40:23 +0000
+++ b/meliae/_loader.pyx 2009-12-29 15:59:46 +0000
@@ -24,6 +24,8 @@
long PyObject_Hash(PyObject *) except -1
+ object PyList_New(Py_ssize_t)
+ void PyList_SET_ITEM(object, Py_ssize_t, object)
PyObject *PyDict_GetItem(object d, object key)
PyObject *PyDict_GetItem_ptr "PyDict_GetItem" (object d, PyObject *key)
int PyDict_SetItem(object d, object key, object val) except -1
@@ -41,6 +43,8 @@
# void fprintf(void *, char *, ...)
# void *stderr
+import gc
+
ctypedef struct RefList:
long size
@@ -716,20 +720,35 @@
def items(self):
"""Iterate over (key, value) tuples."""
- cdef long i
+ cdef long i, out_idx
cdef _MemObject *cur
cdef _MemObjectProxy proxy
- # TODO: Pre-allocate the full size list
- values = []
- for i from 0 <= i < self._table_mask:
- cur = self._table[i]
- if cur == NULL or cur == _dummy:
- continue
- else:
- address = <object>cur.address
- proxy = self._proxy_for(address, cur)
- values.append((address, proxy))
+ enabled = gc.isenabled()
+ if enabled:
+ # We are going to be creating a lot of objects here, but not with
+ # cycles, so we disable gc temporarily
+ # With an object list of ~3M items, this drops the .items() time
+ # from 25s down to 1.3s
+ gc.disable()
+ try:
+ values = PyList_New(self._active)
+ out_idx = 0
+ for i from 0 <= i < self._table_mask:
+ cur = self._table[i]
+ if cur == NULL or cur == _dummy:
+ continue
+ else:
+ address = <object>cur.address
+ proxy = self._proxy_for(address, cur)
+ item = (address, proxy)
+ # SET_ITEM steals a reference
+ Py_INCREF(<PyObject *>item)
+ PyList_SET_ITEM(values, out_idx, item)
+ out_idx += 1
+ finally:
+ if enabled:
+ gc.enable()
return values
def itervalues(self):
=== modified file 'meliae/loader.py'
--- a/meliae/loader.py 2009-12-29 15:45:32 +0000
+++ b/meliae/loader.py 2009-12-29 15:59:46 +0000
@@ -577,7 +577,7 @@
# _fill_total_size(objs)
return ObjManager(objs, show_progress=show_prog)
-#_load = _load_moc
+_load = _load_moc
def remove_expensive_references(source, total_objs=0, show_progress=False):
More information about the bazaar-commits
mailing list