Rev 19: Merge John. in http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
Robert Collins
robertc at robertcollins.net
Fri Jul 25 04:13:27 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
------------------------------------------------------------
revno: 19
revision-id: robertc at robertcollins.net-20080725031326-2c1xq6xusfoefxsi
parent: robertc at robertcollins.net-20080725000927-d88agg6z2bfzcks2
parent: john at arbash-meinel.com-20080725014613-6pe4o1mjuhajfkwb
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2008-07-25 13:13:26 +1000
message:
Merge John.
modified:
_groupcompress_c.pyx _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
tests/test__groupcompress_c.py test__groupcompress_-20080724145854-koifwb7749cfzrvj-1
------------------------------------------------------------
revno: 14.1.33
revision-id: john at arbash-meinel.com-20080725014613-6pe4o1mjuhajfkwb
parent: john at arbash-meinel.com-20080725013428-hi3lvvasnz8qorp8
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: groupcompress
timestamp: Thu 2008-07-24 20:46:13 -0500
message:
Use the cached match from the previous run, drops time from 2.4s => 2.0s on inventory.py
modified:
_groupcompress_c.pyx _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
------------------------------------------------------------
revno: 14.1.32
revision-id: john at arbash-meinel.com-20080725013428-hi3lvvasnz8qorp8
parent: john at arbash-meinel.com-20080724230854-rly3bvlhfl0p06jq
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: groupcompress
timestamp: Thu 2008-07-24 20:34:28 -0500
message:
Switch away from += for older versions of pyrex,
and increase the internal hash array free space.
modified:
_groupcompress_c.pyx _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
tests/test__groupcompress_c.py test__groupcompress_-20080724145854-koifwb7749cfzrvj-1
=== modified file '_groupcompress_c.pyx'
--- a/_groupcompress_c.pyx 2008-07-25 00:09:27 +0000
+++ b/_groupcompress_c.pyx 2008-07-25 03:13:26 +0000
@@ -175,7 +175,7 @@
# size hash table. However, any collision would then have a long way to
# traverse before it could find a 'free' slot.
# So we set the minimum size to give us 33% empty slots.
- min_size = <Py_ssize_t>(needed * 1.5)
+ min_size = <Py_ssize_t>(needed * 2)
hash_size = 1
while hash_size < min_size:
hash_size = hash_size << 1
@@ -199,11 +199,11 @@
# We start off with a 8k hash (after doubling), because there isn't a
# lot of reason to go smaller than that (this class isn't one you'll be
# instantiating thousands of, and you are always likely to grow here.)
- hash_size = 4096
+ hash_size = 2048
while hash_size < needed:
hash_size = hash_size << 1
- # And we always give at least 2x blank space
- hash_size = hash_size << 1
+ # And we always give at least 4x blank space
+ hash_size = hash_size << 2
return hash_size
def _py_compute_recommended_hash_size(self, needed):
@@ -543,10 +543,30 @@
cdef int i
lst = []
for i from 0 <= i < count:
- lst.append(array[i])
+ PyList_Append(lst, array[i])
return lst
+cdef Py_ssize_t list_as_array(object lst, Py_ssize_t **array) except -1:
+ """Convert a Python sequence to an integer array.
+
+ :return: The size of the output, -1 on error
+ """
+ cdef int i
+ cdef PyObject *seq
+ cdef Py_ssize_t size
+
+ seq = PySequence_Fast(lst, "expected a sequence for lst")
+ try:
+ size = PySequence_Fast_GET_SIZE(seq)
+ array[0] = <Py_ssize_t*>safe_malloc(sizeof(Py_ssize_t) * size)
+ for i from 0 <= i < size:
+ array[0][i] = <object>PySequence_Fast_GET_ITEM(seq, i)
+ finally:
+ Py_DECREF(seq)
+ return size
+
+
def _get_longest_match(equivalence_table, pos, max_pos, locations):
"""Get the longest possible match for the current position."""
cdef EquivalenceTable eq
@@ -573,13 +593,16 @@
copy_count = 0
# TODO: Handle when locations is not None
+ if locations is not None:
+ match_count = list_as_array(locations, &matches)
while cpos < cmax_pos:
# TODO: Instead of grabbing the full list of matches and then doing an
# intersection, use a helper that does the intersection
# as-it-goes.
- line = PyList_GET_ITEM(eq._right_lines, cpos)
- safe_free(<void**>&matches)
- match_count = eq.get_raw_matches(line, &matches)
+ if matches == NULL:
+ line = PyList_GET_ITEM(eq._right_lines, cpos)
+ safe_free(<void**>&matches)
+ match_count = eq.get_raw_matches(line, &matches)
if match_count == 0:
# No more matches, just return whatever we have, but we know that
# this last position is not going to match anything
=== modified file 'tests/test__groupcompress_c.py'
--- a/tests/test__groupcompress_c.py 2008-07-24 23:08:54 +0000
+++ b/tests/test__groupcompress_c.py 2008-07-25 01:34:28 +0000
@@ -57,10 +57,10 @@
def test_minimum_hash_size(self):
eq = self._gc_module.EquivalenceTable([])
- # We request at least 33% free space in the hash (to make collisions
+ # We request at least 50% free space in the hash (to make collisions
# more bearable)
- self.assertEqual(1024, eq._py_compute_minimum_hash_size(683))
- self.assertEqual(2048, eq._py_compute_minimum_hash_size(684))
+ self.assertEqual(1024, eq._py_compute_minimum_hash_size(512))
+ self.assertEqual(2048, eq._py_compute_minimum_hash_size(513))
self.assertEqual(2048, eq._py_compute_minimum_hash_size(1000))
self.assertEqual(2048, eq._py_compute_minimum_hash_size(1024))
@@ -70,11 +70,11 @@
self.assertEqual(8192, eq._py_compute_recommended_hash_size(10))
self.assertEqual(8192, eq._py_compute_recommended_hash_size(1000))
self.assertEqual(8192, eq._py_compute_recommended_hash_size(2000))
- self.assertEqual(8192, eq._py_compute_recommended_hash_size(4000))
+ self.assertEqual(16384, eq._py_compute_recommended_hash_size(4000))
- # And we recommend at least 50% free slots
- self.assertEqual(8192, eq._py_compute_recommended_hash_size(4096))
- self.assertEqual(16384, eq._py_compute_recommended_hash_size(4097))
+ # And we recommend at least 75% free slots
+ self.assertEqual(16384, eq._py_compute_recommended_hash_size(4096))
+ self.assertEqual(32768, eq._py_compute_recommended_hash_size(4097))
def test__raw_lines(self):
eq = self._gc_module.EquivalenceTable([1, 2, 3])
More information about the bazaar-commits
mailing list