Rev 4202: Start reducing duplication between gc implementations. in file:///home/vila/src/bzr/experimental/gc-py-bbc/
Vincent Ladeuil
v.ladeuil+lp at free.fr
Fri Mar 27 12:12:11 GMT 2009
At file:///home/vila/src/bzr/experimental/gc-py-bbc/
------------------------------------------------------------
revno: 4202
revision-id: v.ladeuil+lp at free.fr-20090327121210-3tk013ffpia10typ
parent: v.ladeuil+lp at free.fr-20090327103944-w1cj5fm56eloochs
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: groupcompress-python-only
timestamp: Fri 2009-03-27 13:12:10 +0100
message:
Start reducing duplication between gc implementations.
* bzrlib/tests/test__groupcompress.py:
Fix line too long and various cosmetic changes.
(load_tests): Fix docstring.
* bzrlib/groupcompress.py:
(_CommonGroupCompressor.compress,
CommonGroupCompressor._compress): Start reducing duplication between
C and python versions.
* bzrlib/_groupcompress_py.py:
(make_delta): Add checls for test conformance.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'
--- a/bzrlib/_groupcompress_py.py 2009-03-27 10:38:26 +0000
+++ b/bzrlib/_groupcompress_py.py 2009-03-27 12:12:10 +0000
@@ -264,6 +264,11 @@
def make_delta(source_bytes, target_bytes):
"""Create a delta from source to target."""
+ # TODO: The checks below may not be a the right place yet.
+ if not isinstance(source_bytes, str):
+ raise TypeError('source is not a str')
+ if not isinstance(target_bytes, str):
+ raise TypeError('target is not a str')
line_locations = EquivalenceTable([])
return None
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx 2009-03-24 20:02:26 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx 2009-03-27 12:12:10 +0000
@@ -89,10 +89,6 @@
cdef readonly unsigned int _max_num_sources
cdef public unsigned long _source_offset
- def __repr__(self):
- return '%s(%d, %d)' % (self.__class__.__name__,
- len(self._sources), self._source_offset)
-
def __init__(self, source=None):
self._sources = []
self._index = NULL
@@ -104,6 +100,10 @@
if source is not None:
self.add_source(source, 0)
+ def __repr__(self):
+ return '%s(%d, %d)' % (self.__class__.__name__,
+ len(self._sources), self._source_offset)
+
def __dealloc__(self):
if self._index != NULL:
free_delta_index(self._index)
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-27 10:38:26 +0000
+++ b/bzrlib/groupcompress.py 2009-03-27 12:12:10 +0000
@@ -741,6 +741,70 @@
self.labels_deltas = {}
self._block = GroupCompressBlock()
+ def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
+ """Compress lines with label key.
+
+ :param key: A key tuple. It is stored in the output
+ for identification of the text during decompression. If the last
+ element is 'None' it is replaced with the sha1 of the text -
+ e.g. sha1:xxxxxxx.
+ :param bytes: The bytes to be compressed
+ :param expected_sha: If non-None, the sha the lines are believed to
+ have. During compression the sha is calculated; a mismatch will
+ cause an error.
+ :param nostore_sha: If the computed sha1 sum matches, we will raise
+ ExistingContent rather than adding the text.
+ :param soft: Do a 'soft' compression. This means that we require larger
+ ranges to match to be considered for a copy command.
+
+ :return: The sha1 of lines, the start and end offsets in the delta, the
+ type ('fulltext' or 'delta') and the number of bytes accumulated in
+ the group output so far.
+
+ :seealso VersionedFiles.add_lines:
+ """
+ if not bytes: # empty, like a dir entry, etc
+ if nostore_sha == _null_sha1:
+ raise errors.ExistingContent()
+ self._block.add_entry(key, type='empty',
+ sha1=None, start=0,
+ length=0)
+ return _null_sha1, 0, 0, 'fulltext', 0
+ # we assume someone knew what they were doing when they passed it in
+ if expected_sha is not None:
+ sha1 = expected_sha
+ else:
+ sha1 = osutils.sha_string(bytes)
+ if nostore_sha is not None:
+ if sha1 == nostore_sha:
+ raise errors.ExistingContent()
+ if key[-1] is None:
+ key = key[:-1] + ('sha1:' + sha1,)
+
+ return self._compress(key, bytes, sha1, len(bytes) / 2, soft)
+
+ def _compress(self, key, bytes, sha1, max_delta_size, soft=False):
+ """Compress lines with label key.
+
+ :param key: A key tuple. It is stored in the output for identification
+ of the text during decompression.
+
+ :param bytes: The bytes to be compressed
+
+ :param sha1: The sha1 for 'bytes'.
+
+ :param max_delta_size: The size above which we issue a fulltext instead
+ of a delta.
+
+ :param soft: Do a 'soft' compression. This means that we require larger
+ ranges to match to be considered for a copy command.
+
+ :return: The sha1 of lines, the start and end offsets in the delta, the
+ type ('fulltext' or 'delta') and the number of bytes accumulated in
+ the group output so far.
+ """
+ raise NotImplementedError(self._compress)
+
def extract(self, key):
"""Extract a key previously added to the compressor.
@@ -812,43 +876,14 @@
self.lines = self.line_locations.lines
self._present_prefixes = set()
- def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
- """Compress lines with label key.
-
- :param key: A key tuple. It is stored in the output
- for identification of the text during decompression. If the last
- element is 'None' it is replaced with the sha1 of the text -
- e.g. sha1:xxxxxxx.
- :param bytes: The bytes to be compressed
- :param expected_sha: If non-None, the sha the lines are believed to
- have. During compression the sha is calculated; a mismatch will
- cause an error.
- :param nostore_sha: If the computed sha1 sum matches, we will raise
- ExistingContent rather than adding the text.
- :param soft: Do a 'soft' compression. This means that we require larger
- ranges to match to be considered for a copy command.
- :return: The sha1 of lines, and the number of bytes accumulated in
- the group output so far.
- :seealso VersionedFiles.add_lines:
- """
- if not bytes: # empty, like a dir entry, etc
- if nostore_sha == _null_sha1:
- raise errors.ExistingContent()
- self._block.add_entry(key, type='empty',
- sha1=None, start=0,
- length=0)
- return _null_sha1, 0, 0, 'fulltext', 0
+ def _compress(self, key, bytes, sha1, max_delta_size, soft=False):
+ """see _CommonGroupCompressor._compress"""
bytes_length = len(bytes)
new_lines = osutils.split_lines(bytes)
- sha1 = osutils.sha_string(bytes)
- if sha1 == nostore_sha:
- raise errors.ExistingContent()
- if key[-1] is None:
- key = key[:-1] + ('sha1:' + sha1,)
out_lines, index_lines = self.line_locations.make_delta(new_lines,
soft=soft)
delta_length = sum(map(len, out_lines))
- if delta_length * 2 > bytes_length:
+ if delta_length > max_delta_size:
# The delta is longer than the fulltext, insert a fulltext
type = 'fulltext'
out_lines = ['f', encode_base128_int(bytes_length)]
@@ -865,13 +900,13 @@
out_length = len(out_lines[3]) + 1 + delta_length
self._block.add_entry(key, type=type, sha1=sha1,
start=self.endpoint, length=out_length)
- start = self.endpoint # Keep it
+ start = self.endpoint # Before insertion
delta_start = (self.endpoint, len(self.lines))
self.output_lines(out_lines, index_lines)
self.input_bytes += bytes_length
delta_end = (self.endpoint, len(self.lines))
self.labels_deltas[key] = (delta_start, delta_end)
- return sha1, start, self.endpoint, 'delta', out_length
+ return sha1, start, self.endpoint, type, out_length
def flush(self):
self._block.set_content(''.join(self.lines))
@@ -917,42 +952,8 @@
self.num_keys = 0
self._delta_index = DeltaIndex()
- def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
- """Compress lines with label key.
-
- :param key: A key tuple. It is stored in the output
- for identification of the text during decompression. If the last
- element is 'None' it is replaced with the sha1 of the text -
- e.g. sha1:xxxxxxx.
- :param bytes: The bytes to be compressed
- :param expected_sha: If non-None, the sha the lines are believed to
- have. During compression the sha is calculated; a mismatch will
- cause an error.
- :param nostore_sha: If the computed sha1 sum matches, we will raise
- ExistingContent rather than adding the text.
- :param soft: Do a 'soft' compression. This means that we require larger
- ranges to match to be considered for a copy command.
- :return: The sha1 of lines, and the number of bytes accumulated in
- the group output so far.
- :seealso VersionedFiles.add_lines:
- """
- if not bytes: # empty, like a dir entry, etc
- if nostore_sha == _null_sha1:
- raise errors.ExistingContent()
- self._block.add_entry(key, type='empty',
- sha1=None, start=0,
- length=0)
- return _null_sha1, 0, 0, 'fulltext', 0
- # we assume someone knew what they were doing when they passed it in
- if expected_sha is not None:
- sha1 = expected_sha
- else:
- sha1 = osutils.sha_string(bytes)
- if nostore_sha is not None:
- if sha1 == nostore_sha:
- raise errors.ExistingContent()
- if key[-1] is None:
- key = key[:-1] + ('sha1:' + sha1,)
+ def _compress(self, key, bytes, sha1, max_delta_size, soft=False):
+ """see _CommonGroupCompressor._compress"""
input_len = len(bytes)
# By having action/label/sha1/len, we can parse the group if the index
# was ever destroyed, we have the key in 'label', we know the final
@@ -967,7 +968,6 @@
raise AssertionError('_source_offset != endpoint'
' somehow the DeltaIndex got out of sync with'
' the output lines')
- max_delta_size = len(bytes) / 2
delta = self._delta_index.make_delta(bytes, max_delta_size)
if (delta is None):
type = 'fulltext'
@@ -988,7 +988,7 @@
self._delta_index.add_delta_source(delta, len_mini_header)
self._block.add_entry(key, type=type, sha1=sha1,
start=self.endpoint, length=length)
- start = self.endpoint
+ start = self.endpoint # Before insertion
delta_start = (self.endpoint, len(self.lines))
self.num_keys += 1
self.output_chunks(new_chunks)
=== modified file 'bzrlib/tests/test__groupcompress.py'
--- a/bzrlib/tests/test__groupcompress.py 2009-03-25 20:58:16 +0000
+++ b/bzrlib/tests/test__groupcompress.py 2009-03-27 12:12:10 +0000
@@ -24,7 +24,7 @@
def load_tests(standard_tests, module, loader):
- """Parameterize tests for view-aware vs not."""
+ """Parameterize tests for all versions of groupcompress."""
to_adapt, result = tests.split_suite_by_condition(
standard_tests, tests.condition_isinstance(TestMakeAndApplyDelta))
scenarios = [
@@ -50,6 +50,7 @@
def feature_name(self):
return 'bzrlib._groupcompress_pyx'
+
CompiledGroupCompressFeature = _CompiledGroupCompressFeature()
_text1 = """\
@@ -120,14 +121,14 @@
def test_make_delta_is_typesafe(self):
self.make_delta('a string', 'another string')
- self.assertRaises(TypeError,
- self.make_delta, 'a string', object())
- self.assertRaises(TypeError,
- self.make_delta, 'a string', u'not a string')
- self.assertRaises(TypeError,
- self.make_delta, object(), 'a string')
- self.assertRaises(TypeError,
- self.make_delta, u'not a string', 'a string')
+
+ def _check_make_delta(string1, string2):
+ self.assertRaises(TypeError, self.make_delta, string1, string2)
+
+ _check_make_delta('a string', object())
+ _check_make_delta('a string', u'not a string')
+ _check_make_delta(object(), 'a string')
+ _check_make_delta(u'not a string', 'a string')
def test_make_noop_delta(self):
ident_delta = self.make_delta(_text1, _text1)
@@ -150,14 +151,11 @@
def test_apply_delta_is_typesafe(self):
self.apply_delta(_text1, 'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, object(), 'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, unicode(_text1), 'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, _text1, u'MM\x90M')
- self.assertRaises(TypeError,
- self.apply_delta, _text1, object())
+ self.assertRaises(TypeError, self.apply_delta, object(), 'MM\x90M')
+ self.assertRaises(TypeError, self.apply_delta,
+ unicode(_text1), 'MM\x90M')
+ self.assertRaises(TypeError, self.apply_delta, _text1, u'MM\x90M')
+ self.assertRaises(TypeError, self.apply_delta, _text1, object())
def test_apply_delta(self):
target = self.apply_delta(_text1,
@@ -193,7 +191,8 @@
di.add_source(_first_text, 0)
self.assertEqual(len(_first_text), di._source_offset)
di.add_source(_second_text, 0)
- self.assertEqual(len(_first_text) + len(_second_text), di._source_offset)
+ self.assertEqual(len(_first_text) + len(_second_text),
+ di._source_offset)
delta = di.make_delta(_third_text)
result = self._gc_module.apply_delta(_first_text + _second_text, delta)
self.assertEqualDiff(_third_text, result)
@@ -230,8 +229,8 @@
second_delta = di.make_delta(_third_text)
result = self._gc_module.apply_delta(source, second_delta)
self.assertEqualDiff(_third_text, result)
- # We should be able to match against the 'previous text\nand has some...'
- # that was part of the delta bytes
+ # We should be able to match against the
+ # 'previous text\nand has some...' that was part of the delta bytes
# Note that we don't match the 'common with the', because it isn't long
# enough to match in the original text, and those bytes are not present
# in the delta for the second text.
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-25 21:20:18 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-27 12:12:10 +0000
@@ -30,7 +30,7 @@
def load_tests(standard_tests, module, loader):
- """Parameterize tests for view-aware vs not."""
+ """Parameterize tests for all versions of groupcompress."""
to_adapt, result = tests.split_suite_by_condition(
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
scenarios = [
@@ -113,7 +113,7 @@
# get the first out
self.assertEqual(('strange\ncommon long line\n'
'that needs a 16 byte match\n', sha1_1),
- compressor.extract(('label',)))
+ compressor.extract(('label',)))
# and the second
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
'different\n', sha1_2),
More information about the bazaar-commits
mailing list