Rev 3911: For the simple tests cases we have, in http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam

Wed Mar 25 20:00:45 GMT 2009

At http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam

------------------------------------------------------------
revno: 3911
revision-id: john at arbash-meinel.com-20090325200023-lfrhvlvdsv3swdpw
parent: john at arbash-meinel.com-20090325193113-7crd62vmi7ryobh5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: vilajam
timestamp: Wed 2009-03-25 15:00:23 -0500
message:
  For the simple tests cases we have,
  the python matcher works exactly the same as the compiled matcher.
  Because the test case has exact line matching (no sub-line matches
  are expected.) 
  Cheer \!\! Testing is easier.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'

--- a/bzrlib/_groupcompress_py.py	2009-03-25 17:20:33 +0000
+++ b/bzrlib/_groupcompress_py.py	2009-03-25 20:00:23 +0000
@@ -44,6 +44,7 @@
     def _update_matching_lines(self, new_lines, index):
         matches = self._matching_lines
         start_idx = len(self.lines)
+        assert len(new_lines) == len(index)
         for idx, do_index in enumerate(index):
             if not do_index:
                 continue

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-25 19:31:13 +0000
+++ b/bzrlib/groupcompress.py	2009-03-25 20:00:23 +0000
@@ -770,6 +770,7 @@
         self.endpoint = 0
         self.input_bytes = 0
         self.labels_deltas = {}
+        self._block = GroupCompressBlock()
 
     def ratio(self):
         """Return the overall compression ratio."""
@@ -848,35 +849,63 @@
             the group output so far.
         :seealso VersionedFiles.add_lines:
         """
+        if not bytes: # empty, like a dir entry, etc
+            if nostore_sha == _null_sha1:
+                raise errors.ExistingContent()
+            self._block.add_entry(key, type='empty',
+                                  sha1=None, start=0,
+                                  length=0)
+            return _null_sha1, 0, 0, 'fulltext', 0
+        bytes_length = len(bytes)
         new_lines = osutils.split_lines(bytes)
         sha1 = osutils.sha_string(bytes)
+        if sha1 == nostore_sha:
+            raise errors.ExistingContent()
         if key[-1] is None:
             key = key[:-1] + ('sha1:' + sha1,)
-        out_lines = []
-        index_lines = []
+        # reserved for content type, content length, source_len, target_len
+        out_lines = ['', '', '', '']
+        index_lines = [False, False, False, False]
         blocks = self.get_matching_blocks(new_lines, soft=soft)
         current_line_num = 0
         # We either copy a range (while there are reusable lines) or we
         # insert new lines. To find reusable lines we traverse
         for old_start, new_start, range_len in blocks:
             if new_start != current_line_num:
-                # non-matching region
-                self.flush_insert(current_line_num, new_start,
-                                  new_lines, out_lines, index_lines)
+                # non-matching region, insert the content
+                self._flush_insert(current_line_num, new_start,
+                                   new_lines, out_lines, index_lines)
             current_line_num = new_start + range_len
-            if not range_len:
-                continue
-            self._flush_copy(old_start, range_len,
-                             new_lines, out_lines, index_lines)
+            if range_len:
+                self._flush_copy(old_start, range_len, out_lines, index_lines)
+        delta_length = sum(map(len, out_lines))
+        if delta_length * 2 > bytes_length:
+            # The delta is longer than the fulltext, insert a fulltext
+            type = 'fulltext'
+            out_lines = ['f', encode_base128_int(bytes_length)]
+            out_lines.extend(new_lines)
+            index_lines = [False, False]
+            index_lines.extend([True] * len(new_lines))
+            out_length = len(out_lines[1]) + bytes_length + 1
+        else:
+            # this is a worthy delta, output it
+            type = 'delta'
+            out_lines[0] = 'd'
+            out_lines[1] = encode_base128_int(delta_length)
+            out_lines[2] = encode_base128_int(self.endpoint)
+            out_lines[3] = encode_base128_int(bytes_length)
+            out_length = (len(out_lines[1]) + len(out_lines[2])
+                          + len(out_lines[3]) + 1 + delta_length)
+        self._block.add_entry(key, type=type, sha1=sha1,
+                              start=self.endpoint, length=out_length)
         start = self.endpoint # Keep it
         delta_start = (self.endpoint, len(self.lines))
         self.output_lines(out_lines, index_lines)
-        length = len(bytes)
-        self.input_bytes += length
+        self.input_bytes += bytes_length
         delta_end = (self.endpoint, len(self.lines))
         self.labels_deltas[key] = (delta_start, delta_end)
         # FIXME: lot of guessing below
-        return sha1, start, self.endpoint, 'delta', length
+        return sha1, start, self.endpoint, 'delta', out_length
 
     def extract(self, key):
         """Extract a key previously added to the compressor.
@@ -923,33 +952,9 @@
         # code, we will also limit it to a 64kB copy
         for start_byte in xrange(first_byte, stop_byte, 64*1024):
             num_bytes = min(64*1024, stop_byte - first_byte)
-            copy_command, copy_bytes = encode_copy_instruction(start_byte,
-                                                               num_bytes)
-
-    def flush_range(self, new_line_start, source_line_start, match_num_lines,
-                    new_lines, out_lines, index_lines):
-        """Insert the control codes for this copy & insert instruction.
-
-        :param range_start: 
-        """
-        if copy_start is not None:
-            # range stops, flush and start a new copy range
-            stop_byte = self.line_offsets[copy_start + range_len - 1]
-            if copy_start == 0:
-                start_byte = 0
-            else:
-                start_byte = self.line_offsets[copy_start - 1]
-            bytes = stop_byte - start_byte
-            copy_byte = 0
-            copy_control_instruction =0
-            new_lines.append(copy_control_instruction)
+            copy_bytes = encode_copy_instruction(start_byte, num_bytes)
+            out_lines.append(copy_bytes)
             index_lines.append(False)
-            return
-        # not copying, or inserting is shorter than copying, so insert.
-        new_lines.append(insert_instruction)
-        new_lines.extend(lines[range_start:range_start+range_len])
-        index_lines.append(False)
-        index_lines.extend([copy_start is None]*range_len)
 
     def flush(self):
         # FIXME: ugly hack to masquerade ourself as the pyrex version
@@ -999,11 +1004,10 @@
     """
 
     def __init__(self):
-        super(PythonGroupCompressor, self).__init__()
+        super(PyrexGroupCompressor, self).__init__()
         self.num_keys = 0
         self._last = None
         self._delta_index = DeltaIndex()
-        self._block = GroupCompressBlock()
 
     def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
         """Compress lines with label key.
@@ -2002,6 +2006,12 @@
         return node[0], start, stop, basis_end, delta_end
 
 
+from bzrlib._groupcompress_py import (
+    apply_delta,
+    EquivalenceTable,
+    _get_longest_match,
+    trim_encoding_newline,
+    )
 try:
     from bzrlib._groupcompress_pyx import (
         apply_delta,
@@ -2009,11 +2019,5 @@
         )
     GroupCompressor = PyrexGroupCompressor
 except ImportError:
-    from bzrlib._groupcompress_py import (
-        apply_delta,
-        EquivalenceTable,
-        _get_longest_match,
-        trim_encoding_newline,
-        )
     GroupCompressor = PythonGroupCompressor
 

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-25 19:31:13 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-25 20:00:23 +0000
@@ -32,28 +32,56 @@
     )
 
 
+def load_tests(standard_tests, module, loader):
+    """Parameterize tests for view-aware vs not."""
+    to_adapt, result = tests.split_suite_by_condition(
+        standard_tests, tests.condition_isinstance(TestGroupCompressor))
+    scenarios = [
+        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
+        ]
+    if CompiledGroupcompressFeature.available():
+        scenarios.append(('C',
+            {'compressor': groupcompress.PyrexGroupCompressor}))
+    return multiply_tests(to_adapt, scenarios, result)
+
+
+class _CompiledGroupcompressFeature(tests.Feature):
+
+    def _probe(self):
+        try:
+            import bzrlib._groupcompress_pyx
+        except ImportError:
+            return False
+        return True
+
+    def feature_name(self):
+        return "bzrlib._groupcompress_pyx"
+
+CompiledGroupcompressFeature = _CompiledGroupcompressFeature()
+
+
 class TestGroupCompressor(tests.TestCase):
     """Tests for GroupCompressor"""
 
+    compressor = None # Set by multiply_tests
+
     def test_empty_delta(self):
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         self.assertEqual([], compressor.lines)
 
     def test_one_nosha_delta(self):
         # diff against NUKK
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         sha1, start_point, end_point, _, _ = compressor.compress(('label',),
             'strange\ncommon\n', None)
         self.assertEqual(sha_string('strange\ncommon\n'), sha1)
-        expected_lines = [
-            'f', '\x0f', 'strange\ncommon\n',
-            ]
-        self.assertEqual(expected_lines, compressor.lines)
+        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
+        self.assertEqual(expected_lines, ''.join(compressor.lines))
         self.assertEqual(0, start_point)
         self.assertEqual(sum(map(len, expected_lines)), end_point)
 
     def test_empty_content(self):
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         # Adding empty bytes should return the 'null' record
         sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
             '', None)
@@ -89,7 +117,7 @@
                              self._chunks_to_repr_lines(actual))
 
     def test_two_nosha_delta(self):
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         sha1_1, _, _, _, _ = compressor.compress(('label',),
             'strange\ncommon long line\nthat needs a 16 byte match\n', None)
         expected_lines = list(compressor.lines)
@@ -114,7 +142,7 @@
     def test_three_nosha_delta(self):
         # The first interesting test: make a change that should use lines from
         # both parents.
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         sha1_1, _, _, _, _ = compressor.compress(('label',),
             'strange\ncommon very very long line\nwith some extra text\n', None)
         sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
@@ -144,7 +172,7 @@
         self.assertEqual(sum(map(len, expected_lines)), end_point)
 
     def test_stats(self):
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         compressor.compress(('label',), 'strange\ncommon long line\n'
                                         'plus more text\n', None)
         compressor.compress(('newlabel',),
@@ -158,7 +186,7 @@
     def test_extract_from_compressor(self):
         # Knit fetching will try to reconstruct texts locally which results in
         # reading something that is in the compressor stream already.
-        compressor = groupcompress.GroupCompressor()
+        compressor = self.compressor()
         sha1_1, _, _, _, _ = compressor.compress(('label',),
             'strange\ncommon long line\nthat needs a 16 byte match\n', None)
         expected_lines = list(compressor.lines)