Rev 3910: Have _LazyGroupContentManager pre-extract everything it holds. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 19 03:06:08 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/lazy_gc_stream
------------------------------------------------------------
revno: 3910
revision-id: john at arbash-meinel.com-20090319030602-stjxub1g3yhq0u32
parent: john at arbash-meinel.com-20090317203354-77ub807e883l8qx1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lazy_gc_stream
timestamp: Wed 2009-03-18 22:06:02 -0500
message:
Have _LazyGroupContentManager pre-extract everything it holds.
This doesn't seem to speed up decompression time, but it does prevent us from
re-allocating the _content buffer for every bit we extract.
Saves ~1s/30s on mysql-525 repack.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-17 20:33:54 +0000
+++ b/bzrlib/groupcompress.py 2009-03-19 03:06:02 +0000
@@ -227,17 +227,22 @@
else:
# Start a zlib decompressor
assert self._compressor_name == 'zlib'
- self._z_content_decompressor = zlib.decompressobj()
- # Seed the decompressor with the uncompressed bytes, so that
- # the rest of the code is simplified
- self._content = self._z_content_decompressor.decompress(
- self._z_content, _ZLIB_DECOMP_WINDOW)
+ if num_bytes is None:
+ self._content = zlib.decompress(self._z_content)
+ else:
+ self._z_content_decompressor = zlib.decompressobj()
+ # Seed the decompressor with the uncompressed bytes, so
+ # that the rest of the code is simplified
+ self._content = self._z_content_decompressor.decompress(
+ self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)
# Any bytes remaining to be decompressed will be in the
# decompressors 'unconsumed_tail'
- self._z_content = None
# Do we have enough bytes already?
if num_bytes is not None and len(self._content) >= num_bytes:
return
+ if num_bytes is None and self._z_content_decompressor is None:
+ # We must have already decompressed everything
+ return
# If we got this far, and don't have a decompressor, something is wrong
assert self._z_content_decompressor is not None
remaining_decomp = self._z_content_decompressor.unconsumed_tail
@@ -508,6 +513,7 @@
else:
return ''
if storage_kind in ('fulltext', 'chunked'):
+ self._manager._prepare_for_extract()
block = self._manager._block
_, bytes = block.extract(self.key, self._start, self._end)
if storage_kind == 'fulltext':
@@ -525,6 +531,7 @@
self._block = block
# We need to preserve the ordering
self._factories = []
+ self._last_byte = 0
def add_factory(self, key, parents, start, end):
if not self._factories:
@@ -534,6 +541,7 @@
# Note that this creates a reference cycle....
factory = _LazyGroupCompressFactory(key, parents, self,
start, end, first=first)
+ self._last_byte = max(end, self._last_byte)
self._factories.append(factory)
def get_record_stream(self):
@@ -570,6 +578,7 @@
factory._start = cur_endpoint
factory._end = end_point
cur_endpoint = end_point
+ self._last_byte = cur_endpoint
new_block = compressor.flush()
# TODO: Should we check that new_block really *is* smaller than the old
# block? It seems hard to come up with a method that it would
@@ -581,6 +590,14 @@
' %d bytes => %d bytes', delta, old_length,
self._block._content_length)
+ def _prepare_for_extract(self):
+ """A _LazyGroupCompressFactory is about to extract to fulltext."""
+ # We expect that if one child is going to fulltext, all will be. This
+ # helps prevent all of them from extracting a small amount at a time.
+ # Which in itself isn't terribly expensive, but resizing 2MB 32kB at a
+ # time (self._block._content) is a little expensive.
+ self._block._ensure_content(self._last_byte)
+
def _check_rebuild_block(self):
"""Check to see if our block should be repacked."""
total_bytes_used = 0
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-17 19:38:14 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-19 03:06:02 +0000
@@ -214,7 +214,7 @@
self.assertEqual('', block._z_content)
block._ensure_content()
self.assertEqual('', block._content)
- self.assertIs(None, block._z_content)
+ self.assertEqual('', block._z_content)
block._ensure_content() # Ensure content is safe to call 2x
def test_from_bytes_with_labels(self):
@@ -264,7 +264,7 @@
self.assertEqual(z_content, block._z_content)
self.assertIs(None, block._content)
block._ensure_content()
- self.assertIs(None, block._z_content)
+ self.assertEqual(z_content, block._z_content)
self.assertEqual(content, block._content)
def test_from_old_bytes(self):
@@ -282,7 +282,7 @@
z_bytes)
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
block._ensure_content()
- self.assertIs(None, block._z_content)
+ self.assertEqual(z_content, block._z_content)
self.assertEqual(content, block._content)
def test_add_entry(self):
More information about the bazaar-commits
mailing list