Rev 3911: For the simple tests cases we have, in http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam
John Arbash Meinel
john at arbash-meinel.com
Wed Mar 25 20:00:45 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam
------------------------------------------------------------
revno: 3911
revision-id: john at arbash-meinel.com-20090325200023-lfrhvlvdsv3swdpw
parent: john at arbash-meinel.com-20090325193113-7crd62vmi7ryobh5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: vilajam
timestamp: Wed 2009-03-25 15:00:23 -0500
message:
For the simple tests cases we have,
the python matcher works exactly the same as the compiled matcher.
Because the test case has exact line matching (no sub-line matches
are expected.)
Cheer \!\! Testing is easier.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'
--- a/bzrlib/_groupcompress_py.py 2009-03-25 17:20:33 +0000
+++ b/bzrlib/_groupcompress_py.py 2009-03-25 20:00:23 +0000
@@ -44,6 +44,7 @@
def _update_matching_lines(self, new_lines, index):
matches = self._matching_lines
start_idx = len(self.lines)
+ assert len(new_lines) == len(index)
for idx, do_index in enumerate(index):
if not do_index:
continue
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-25 19:31:13 +0000
+++ b/bzrlib/groupcompress.py 2009-03-25 20:00:23 +0000
@@ -770,6 +770,7 @@
self.endpoint = 0
self.input_bytes = 0
self.labels_deltas = {}
+ self._block = GroupCompressBlock()
def ratio(self):
"""Return the overall compression ratio."""
@@ -848,35 +849,63 @@
the group output so far.
:seealso VersionedFiles.add_lines:
"""
+ if not bytes: # empty, like a dir entry, etc
+ if nostore_sha == _null_sha1:
+ raise errors.ExistingContent()
+ self._block.add_entry(key, type='empty',
+ sha1=None, start=0,
+ length=0)
+ return _null_sha1, 0, 0, 'fulltext', 0
+ bytes_length = len(bytes)
new_lines = osutils.split_lines(bytes)
sha1 = osutils.sha_string(bytes)
+ if sha1 == nostore_sha:
+ raise errors.ExistingContent()
if key[-1] is None:
key = key[:-1] + ('sha1:' + sha1,)
- out_lines = []
- index_lines = []
+ # reserved for content type, content length, source_len, target_len
+ out_lines = ['', '', '', '']
+ index_lines = [False, False, False, False]
blocks = self.get_matching_blocks(new_lines, soft=soft)
current_line_num = 0
# We either copy a range (while there are reusable lines) or we
# insert new lines. To find reusable lines we traverse
for old_start, new_start, range_len in blocks:
if new_start != current_line_num:
- # non-matching region
- self.flush_insert(current_line_num, new_start,
- new_lines, out_lines, index_lines)
+ # non-matching region, insert the content
+ self._flush_insert(current_line_num, new_start,
+ new_lines, out_lines, index_lines)
current_line_num = new_start + range_len
- if not range_len:
- continue
- self._flush_copy(old_start, range_len,
- new_lines, out_lines, index_lines)
+ if range_len:
+ self._flush_copy(old_start, range_len, out_lines, index_lines)
+ delta_length = sum(map(len, out_lines))
+ if delta_length * 2 > bytes_length:
+ # The delta is longer than the fulltext, insert a fulltext
+ type = 'fulltext'
+ out_lines = ['f', encode_base128_int(bytes_length)]
+ out_lines.extend(new_lines)
+ index_lines = [False, False]
+ index_lines.extend([True] * len(new_lines))
+ out_length = len(out_lines[1]) + bytes_length + 1
+ else:
+ # this is a worthy delta, output it
+ type = 'delta'
+ out_lines[0] = 'd'
+ out_lines[1] = encode_base128_int(delta_length)
+ out_lines[2] = encode_base128_int(self.endpoint)
+ out_lines[3] = encode_base128_int(bytes_length)
+ out_length = (len(out_lines[1]) + len(out_lines[2])
+ + len(out_lines[3]) + 1 + delta_length)
+ self._block.add_entry(key, type=type, sha1=sha1,
+ start=self.endpoint, length=out_length)
start = self.endpoint # Keep it
delta_start = (self.endpoint, len(self.lines))
self.output_lines(out_lines, index_lines)
- length = len(bytes)
- self.input_bytes += length
+ self.input_bytes += bytes_length
delta_end = (self.endpoint, len(self.lines))
self.labels_deltas[key] = (delta_start, delta_end)
# FIXME: lot of guessing below
- return sha1, start, self.endpoint, 'delta', length
+ return sha1, start, self.endpoint, 'delta', out_length
def extract(self, key):
"""Extract a key previously added to the compressor.
@@ -923,33 +952,9 @@
# code, we will also limit it to a 64kB copy
for start_byte in xrange(first_byte, stop_byte, 64*1024):
num_bytes = min(64*1024, stop_byte - first_byte)
- copy_command, copy_bytes = encode_copy_instruction(start_byte,
- num_bytes)
-
- def flush_range(self, new_line_start, source_line_start, match_num_lines,
- new_lines, out_lines, index_lines):
- """Insert the control codes for this copy & insert instruction.
-
- :param range_start:
- """
- if copy_start is not None:
- # range stops, flush and start a new copy range
- stop_byte = self.line_offsets[copy_start + range_len - 1]
- if copy_start == 0:
- start_byte = 0
- else:
- start_byte = self.line_offsets[copy_start - 1]
- bytes = stop_byte - start_byte
- copy_byte = 0
- copy_control_instruction =0
- new_lines.append(copy_control_instruction)
+ copy_bytes = encode_copy_instruction(start_byte, num_bytes)
+ out_lines.append(copy_bytes)
index_lines.append(False)
- return
- # not copying, or inserting is shorter than copying, so insert.
- new_lines.append(insert_instruction)
- new_lines.extend(lines[range_start:range_start+range_len])
- index_lines.append(False)
- index_lines.extend([copy_start is None]*range_len)
def flush(self):
# FIXME: ugly hack to masquerade ourself as the pyrex version
@@ -999,11 +1004,10 @@
"""
def __init__(self):
- super(PythonGroupCompressor, self).__init__()
+ super(PyrexGroupCompressor, self).__init__()
self.num_keys = 0
self._last = None
self._delta_index = DeltaIndex()
- self._block = GroupCompressBlock()
def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
"""Compress lines with label key.
@@ -2002,6 +2006,12 @@
return node[0], start, stop, basis_end, delta_end
+from bzrlib._groupcompress_py import (
+ apply_delta,
+ EquivalenceTable,
+ _get_longest_match,
+ trim_encoding_newline,
+ )
try:
from bzrlib._groupcompress_pyx import (
apply_delta,
@@ -2009,11 +2019,5 @@
)
GroupCompressor = PyrexGroupCompressor
except ImportError:
- from bzrlib._groupcompress_py import (
- apply_delta,
- EquivalenceTable,
- _get_longest_match,
- trim_encoding_newline,
- )
GroupCompressor = PythonGroupCompressor
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-25 19:31:13 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-25 20:00:23 +0000
@@ -32,28 +32,56 @@
)
+def load_tests(standard_tests, module, loader):
+ """Parameterize tests for view-aware vs not."""
+ to_adapt, result = tests.split_suite_by_condition(
+ standard_tests, tests.condition_isinstance(TestGroupCompressor))
+ scenarios = [
+ ('python', {'compressor': groupcompress.PythonGroupCompressor}),
+ ]
+ if CompiledGroupcompressFeature.available():
+ scenarios.append(('C',
+ {'compressor': groupcompress.PyrexGroupCompressor}))
+ return multiply_tests(to_adapt, scenarios, result)
+
+
+class _CompiledGroupcompressFeature(tests.Feature):
+
+ def _probe(self):
+ try:
+ import bzrlib._groupcompress_pyx
+ except ImportError:
+ return False
+ return True
+
+ def feature_name(self):
+ return "bzrlib._groupcompress_pyx"
+
+CompiledGroupcompressFeature = _CompiledGroupcompressFeature()
+
+
class TestGroupCompressor(tests.TestCase):
"""Tests for GroupCompressor"""
+ compressor = None # Set by multiply_tests
+
def test_empty_delta(self):
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
self.assertEqual([], compressor.lines)
def test_one_nosha_delta(self):
# diff against NUKK
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
sha1, start_point, end_point, _, _ = compressor.compress(('label',),
'strange\ncommon\n', None)
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
- expected_lines = [
- 'f', '\x0f', 'strange\ncommon\n',
- ]
- self.assertEqual(expected_lines, compressor.lines)
+ expected_lines = 'f' '\x0f' 'strange\ncommon\n'
+ self.assertEqual(expected_lines, ''.join(compressor.lines))
self.assertEqual(0, start_point)
self.assertEqual(sum(map(len, expected_lines)), end_point)
def test_empty_content(self):
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
# Adding empty bytes should return the 'null' record
sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
'', None)
@@ -89,7 +117,7 @@
self._chunks_to_repr_lines(actual))
def test_two_nosha_delta(self):
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
sha1_1, _, _, _, _ = compressor.compress(('label',),
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
expected_lines = list(compressor.lines)
@@ -114,7 +142,7 @@
def test_three_nosha_delta(self):
# The first interesting test: make a change that should use lines from
# both parents.
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
sha1_1, _, _, _, _ = compressor.compress(('label',),
'strange\ncommon very very long line\nwith some extra text\n', None)
sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
@@ -144,7 +172,7 @@
self.assertEqual(sum(map(len, expected_lines)), end_point)
def test_stats(self):
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
compressor.compress(('label',), 'strange\ncommon long line\n'
'plus more text\n', None)
compressor.compress(('newlabel',),
@@ -158,7 +186,7 @@
def test_extract_from_compressor(self):
# Knit fetching will try to reconstruct texts locally which results in
# reading something that is in the compressor stream already.
- compressor = groupcompress.GroupCompressor()
+ compressor = self.compressor()
sha1_1, _, _, _, _ = compressor.compress(('label',),
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
expected_lines = list(compressor.lines)
More information about the bazaar-commits
mailing list