Rev 106: fix up the failing tests. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
John Arbash Meinel
john at arbash-meinel.com
Wed Mar 4 15:36:11 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
------------------------------------------------------------
revno: 106
revision-id: john at arbash-meinel.com-20090304152748-iqp4zqlzvnq5pm23
parent: john at arbash-meinel.com-20090304150015-b6o2fru8grx5ubpm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: rabin
timestamp: Wed 2009-03-04 09:27:48 -0600
message:
fix up the failing tests.
The new delta code needs a 16-byte window to match, so to *know* that there will
be a match, you need ~32-bytes in common. (guarantees that 16-bytes somewhere in
that 32-byte range will match.)
Also, when setting 'max_delta', it is possible that we run out of bytes before
we actually find the last match, which would make things compress better.
This is rare in practice, because texts are longer than 40 bytes. But it happens
in testing.
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-03-03 22:50:27 +0000
+++ b/groupcompress.py 2009-03-04 15:27:48 +0000
@@ -134,14 +134,14 @@
self.labels_deltas = {}
self._delta_index = _groupcompress_pyx.DeltaIndex()
- def compress(self, key, chunks, expected_sha, soft=False):
+ def compress(self, key, bytes, expected_sha, soft=False):
"""Compress lines with label key.
:param key: A key tuple. It is stored in the output
for identification of the text during decompression. If the last
element is 'None' it is replaced with the sha1 of the text -
e.g. sha1:xxxxxxx.
- :param chunks: The chunks to be compressed
+ :param bytes: The bytes to be compressed
:param expected_sha: If non-None, the sha the lines are believed to
have. During compression the sha is calculated; a mismatch will
cause an error.
@@ -150,9 +150,6 @@
:return: The sha1 of lines, and the number of bytes accumulated in
the group output so far.
"""
- # TODO: Change this to a bytes interface, since the output is now a
- # bytes interface anyway.
- bytes = ''.join(chunks)
if not _FAST or expected_sha is None:
sha1 = sha_string(bytes)
else:
@@ -629,7 +626,7 @@
groups += 1
last_prefix = prefix
found_sha1, end_point = self._compressor.compress(record.key,
- [bytes], record.sha1, soft=soft)
+ bytes, record.sha1, soft=soft)
if record.key[-1] is None:
key = record.key[:-1] + ('sha1:' + found_sha1,)
else:
=== modified file 'tests/test_groupcompress.py'
--- a/tests/test_groupcompress.py 2009-03-02 19:43:37 +0000
+++ b/tests/test_groupcompress.py 2009-03-04 15:27:48 +0000
@@ -20,7 +20,7 @@
import zlib
from bzrlib import tests
-from bzrlib.osutils import sha_strings
+from bzrlib.osutils import sha_string
from bzrlib.plugins.groupcompress_rabin import errors, groupcompress
from bzrlib.tests import (
TestCaseWithTransport,
@@ -60,8 +60,8 @@
# diff against NUKK
compressor = groupcompress.GroupCompressor(True)
sha1, end_point = compressor.compress(('label',),
- ['strange\n', 'common\n'], None)
- self.assertEqual(sha_strings(['strange\n', 'common\n']), sha1)
+ 'strange\ncommon\n', None)
+ self.assertEqual(sha_string('strange\ncommon\n'), sha1)
expected_lines = [
'fulltext\n',
'label:label\nsha1:%s\n' % sha1,
@@ -71,27 +71,44 @@
self.assertEqual(expected_lines, compressor.lines)
self.assertEqual(sum(map(len, expected_lines)), end_point)
+ def _chunks_to_repr_lines(self, chunks):
+ return '\n'.join(map(repr, ''.join(chunks).split('\n')))
+
+ def assertEqualDiffEncoded(self, expected, actual):
+ """Compare the actual content to the expected content.
+
+ :param expected: A group of chunks that we expect to see
+ :param actual: The measured 'chunks'
+
+ We will transform the chunks back into lines, and then run 'repr()'
+ over them to handle non-ascii characters.
+ """
+ self.assertEqualDiff(self._chunks_to_repr_lines(expected),
+ self._chunks_to_repr_lines(actual))
+
def test_two_nosha_delta(self):
compressor = groupcompress.GroupCompressor(True)
sha1_1, _ = compressor.compress(('label',),
- ['strange\n', 'common very very very long line\n'], None)
+ 'strange\ncommon long line\nthat needs a 16 byte match\n', None)
expected_lines = list(compressor.lines)
sha1_2, end_point = compressor.compress(('newlabel',),
- ['common very very very long line\n', 'different\n'], None)
- self.assertEqual(sha_strings(['common very very very long line\n',
- 'different\n']), sha1_2)
+ 'common long line\nthat needs a 16 byte match\ndifferent\n', None)
+ self.assertEqual(sha_string('common long line\n'
+ 'that needs a 16 byte match\n'
+ 'different\n'), sha1_2)
expected_lines.extend([
'delta\n'
'label:newlabel\n',
'sha1:%s\n' % sha1_2,
'len:16\n',
+ # source and target length
+ '\x7e\x36',
# copy the line common
- 'c,72,17\n',
+ '\x91\x52\x2c', #copy, offset 0x52, len 0x2c
# add the line different, and the trailing newline
- 'i,2\n',
- 'different\n',
+ '\x0adifferent\n', # insert 10 bytes
])
- self.assertEqualDiff(''.join(expected_lines), ''.join(compressor.lines))
+ self.assertEqualDiffEncoded(expected_lines, compressor.lines)
self.assertEqual(sum(map(len, expected_lines)), end_point)
def test_three_nosha_delta(self):
@@ -99,52 +116,50 @@
# both parents.
compressor = groupcompress.GroupCompressor(True)
sha1_1, end_point = compressor.compress(('label',),
- ['strange\n', 'common long line\n'], None)
+ 'strange\ncommon very very long line\nwith some extra text\n', None)
sha1_2, _ = compressor.compress(('newlabel',),
- ['common long line\n', 'different\n', 'moredifferent\n'], None)
+ 'different\nmoredifferent\nand then some more\n', None)
expected_lines = list(compressor.lines)
sha1_3, end_point = compressor.compress(('label3',),
- ['new\n', 'common long line\n', 'different\n', 'moredifferent\n'],
+ 'new\ncommon very very long line\nwith some extra text\n'
+ 'different\nmoredifferent\nand then some more\n',
None)
self.assertEqual(
- sha_strings(['new\n', 'common long line\n', 'different\n',
- 'moredifferent\n']),
+ sha_string('new\ncommon very very long line\nwith some extra text\n'
+ 'different\nmoredifferent\nand then some more\n'),
sha1_3)
expected_lines.extend([
'delta\n',
'label:label3\n',
'sha1:%s\n' % sha1_3,
- 'len:11\n',
+ 'len:13\n',
+ '\xfa\x01\x5f' # source and target length
# insert new
- 'i,1\n',
- 'new\n',
- # copy the line common
- 'c,72,17\n',
- # copy the lines different, moredifferent and trailing newline
- 'c,165,25\n',
+ '\x03new',
+ # Copy of first parent 'common' range
+ '\x91\x51\x31' # copy, offset 0x51, 0x31 bytes
+ # Copy of second parent 'different' range
+ '\x91\xcf\x2b' # copy, offset 0xcf, 0x2b bytes
])
- self.assertEqualDiff(''.join(expected_lines),
- ''.join(compressor.lines))
+ self.assertEqualDiffEncoded(expected_lines, compressor.lines)
self.assertEqual(sum(map(len, expected_lines)), end_point)
def test_stats(self):
compressor = groupcompress.GroupCompressor(True)
- compressor.compress(('label',),
- ['strange\n', 'common\n'], None)
+ compressor.compress(('label',), 'strange\ncommon\n', None)
compressor.compress(('newlabel',),
- ['common\n', 'different\n', 'moredifferent\n'], None)
+ 'common\ndifferent\nmoredifferent\n', None)
compressor.compress(('label3',),
- ['new\n', 'common\n', 'different\n', 'moredifferent\n'], None)
+ 'new\ncommon\ndifferent\nmoredifferent\n', None)
self.assertAlmostEqual(0.3, compressor.ratio(), 1)
def test_extract_from_compressor(self):
# Knit fetching will try to reconstruct texts locally which results in
# reading something that is in the compressor stream already.
compressor = groupcompress.GroupCompressor(True)
- sha_1, _ = compressor.compress(('label',),
- ['strange\n', 'common\n'], None)
+ sha_1, _ = compressor.compress(('label',), 'strange\ncommon\n', None)
sha_2, _ = compressor.compress(('newlabel',),
- ['common\n', 'different\n', 'moredifferent\n'], None)
+ 'common\ndifferent\nmoredifferent\n', None)
# get the first out
self.assertEqual((['strange\ncommon\n'], sha_1),
compressor.extract(('label',)))
More information about the bazaar-commits
mailing list