Rev 4202: Start reducing duplication between gc implementations. in file:///home/vila/src/bzr/experimental/gc-py-bbc/

Fri Mar 27 12:12:11 GMT 2009

At file:///home/vila/src/bzr/experimental/gc-py-bbc/

------------------------------------------------------------
revno: 4202
revision-id: v.ladeuil+lp at free.fr-20090327121210-3tk013ffpia10typ
parent: v.ladeuil+lp at free.fr-20090327103944-w1cj5fm56eloochs
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: groupcompress-python-only
timestamp: Fri 2009-03-27 13:12:10 +0100
message:
  Start reducing duplication between gc implementations.
  
  * bzrlib/tests/test__groupcompress.py:
  Fix line too long and various cosmetic changes.
  (load_tests): Fix docstring.
  
  * bzrlib/groupcompress.py:
  (_CommonGroupCompressor.compress,
  CommonGroupCompressor._compress): Start reducing duplication between
  C and python versions.
  
  * bzrlib/_groupcompress_py.py:
  (make_delta): Add checls for test conformance.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'

--- a/bzrlib/_groupcompress_py.py	2009-03-27 10:38:26 +0000
+++ b/bzrlib/_groupcompress_py.py	2009-03-27 12:12:10 +0000
@@ -264,6 +264,11 @@
 
 def make_delta(source_bytes, target_bytes):
     """Create a delta from source to target."""
+    # TODO: The checks below may not be a the right place yet.
+    if not isinstance(source_bytes, str):
+        raise TypeError('source is not a str')
+    if not isinstance(target_bytes, str):
+        raise TypeError('target is not a str')
     line_locations = EquivalenceTable([])
     return None
 

=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx	2009-03-24 20:02:26 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx	2009-03-27 12:12:10 +0000
@@ -89,10 +89,6 @@
     cdef readonly unsigned int _max_num_sources
     cdef public unsigned long _source_offset
 
-    def __repr__(self):
-        return '%s(%d, %d)' % (self.__class__.__name__,
-            len(self._sources), self._source_offset)
-
     def __init__(self, source=None):
         self._sources = []
         self._index = NULL
@@ -104,6 +100,10 @@
         if source is not None:
             self.add_source(source, 0)
 
+    def __repr__(self):
+        return '%s(%d, %d)' % (self.__class__.__name__,
+            len(self._sources), self._source_offset)
+
     def __dealloc__(self):
         if self._index != NULL:
             free_delta_index(self._index)

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-27 10:38:26 +0000
+++ b/bzrlib/groupcompress.py	2009-03-27 12:12:10 +0000
@@ -741,6 +741,70 @@
         self.labels_deltas = {}
         self._block = GroupCompressBlock()
 
+    def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
+        """Compress lines with label key.
+
+        :param key: A key tuple. It is stored in the output
+            for identification of the text during decompression. If the last
+            element is 'None' it is replaced with the sha1 of the text -
+            e.g. sha1:xxxxxxx.
+        :param bytes: The bytes to be compressed
+        :param expected_sha: If non-None, the sha the lines are believed to
+            have. During compression the sha is calculated; a mismatch will
+            cause an error.
+        :param nostore_sha: If the computed sha1 sum matches, we will raise
+            ExistingContent rather than adding the text.
+        :param soft: Do a 'soft' compression. This means that we require larger
+            ranges to match to be considered for a copy command.
+
+        :return: The sha1 of lines, the start and end offsets in the delta, the
+            type ('fulltext' or 'delta') and the number of bytes accumulated in
+            the group output so far.
+
+        :seealso VersionedFiles.add_lines:
+        """
+        if not bytes: # empty, like a dir entry, etc
+            if nostore_sha == _null_sha1:
+                raise errors.ExistingContent()
+            self._block.add_entry(key, type='empty',
+                                  sha1=None, start=0,
+                                  length=0)
+            return _null_sha1, 0, 0, 'fulltext', 0
+        # we assume someone knew what they were doing when they passed it in
+        if expected_sha is not None:
+            sha1 = expected_sha
+        else:
+            sha1 = osutils.sha_string(bytes)
+        if nostore_sha is not None:
+            if sha1 == nostore_sha:
+                raise errors.ExistingContent()
+        if key[-1] is None:
+            key = key[:-1] + ('sha1:' + sha1,)
+
+        return self._compress(key, bytes, sha1, len(bytes) / 2, soft)
+
+    def _compress(self, key, bytes, sha1, max_delta_size, soft=False):
+        """Compress lines with label key.
+
+        :param key: A key tuple. It is stored in the output for identification
+            of the text during decompression.
+
+        :param bytes: The bytes to be compressed
+
+        :param sha1: The sha1 for 'bytes'.
+
+        :param max_delta_size: The size above which we issue a fulltext instead
+            of a delta.
+
+        :param soft: Do a 'soft' compression. This means that we require larger
+            ranges to match to be considered for a copy command.
+
+        :return: The sha1 of lines, the start and end offsets in the delta, the
+            type ('fulltext' or 'delta') and the number of bytes accumulated in
+            the group output so far.
+        """
+        raise NotImplementedError(self._compress)
+
     def extract(self, key):
         """Extract a key previously added to the compressor.
 
@@ -812,43 +876,14 @@
         self.lines = self.line_locations.lines
         self._present_prefixes = set()
 
-    def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
-        """Compress lines with label key.
-
-        :param key: A key tuple. It is stored in the output
-            for identification of the text during decompression. If the last
-            element is 'None' it is replaced with the sha1 of the text -
-            e.g. sha1:xxxxxxx.
-        :param bytes: The bytes to be compressed
-        :param expected_sha: If non-None, the sha the lines are believed to
-            have. During compression the sha is calculated; a mismatch will
-            cause an error.
-        :param nostore_sha: If the computed sha1 sum matches, we will raise
-            ExistingContent rather than adding the text.
-        :param soft: Do a 'soft' compression. This means that we require larger
-            ranges to match to be considered for a copy command.
-        :return: The sha1 of lines, and the number of bytes accumulated in
-            the group output so far.
-        :seealso VersionedFiles.add_lines:
-        """
-        if not bytes: # empty, like a dir entry, etc
-            if nostore_sha == _null_sha1:
-                raise errors.ExistingContent()
-            self._block.add_entry(key, type='empty',
-                                  sha1=None, start=0,
-                                  length=0)
-            return _null_sha1, 0, 0, 'fulltext', 0
+    def _compress(self, key, bytes, sha1, max_delta_size, soft=False):
+        """see _CommonGroupCompressor._compress"""
         bytes_length = len(bytes)
         new_lines = osutils.split_lines(bytes)
-        sha1 = osutils.sha_string(bytes)
-        if sha1 == nostore_sha:
-            raise errors.ExistingContent()
-        if key[-1] is None:
-            key = key[:-1] + ('sha1:' + sha1,)
         out_lines, index_lines = self.line_locations.make_delta(new_lines,
                                                                 soft=soft)
         delta_length = sum(map(len, out_lines))
-        if delta_length * 2 > bytes_length:
+        if delta_length > max_delta_size:
             # The delta is longer than the fulltext, insert a fulltext
             type = 'fulltext'
             out_lines = ['f', encode_base128_int(bytes_length)]
@@ -865,13 +900,13 @@
             out_length = len(out_lines[3]) + 1 + delta_length
         self._block.add_entry(key, type=type, sha1=sha1,
                               start=self.endpoint, length=out_length)
-        start = self.endpoint # Keep it
+        start = self.endpoint # Before insertion
         delta_start = (self.endpoint, len(self.lines))
         self.output_lines(out_lines, index_lines)
         self.input_bytes += bytes_length
         delta_end = (self.endpoint, len(self.lines))
         self.labels_deltas[key] = (delta_start, delta_end)
-        return sha1, start, self.endpoint, 'delta', out_length
+        return sha1, start, self.endpoint, type, out_length
 
     def flush(self):
         self._block.set_content(''.join(self.lines))
@@ -917,42 +952,8 @@
         self.num_keys = 0
         self._delta_index = DeltaIndex()
 
-    def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
-        """Compress lines with label key.
-
-        :param key: A key tuple. It is stored in the output
-            for identification of the text during decompression. If the last
-            element is 'None' it is replaced with the sha1 of the text -
-            e.g. sha1:xxxxxxx.
-        :param bytes: The bytes to be compressed
-        :param expected_sha: If non-None, the sha the lines are believed to
-            have. During compression the sha is calculated; a mismatch will
-            cause an error.
-        :param nostore_sha: If the computed sha1 sum matches, we will raise
-            ExistingContent rather than adding the text.
-        :param soft: Do a 'soft' compression. This means that we require larger
-            ranges to match to be considered for a copy command.
-        :return: The sha1 of lines, and the number of bytes accumulated in
-            the group output so far.
-        :seealso VersionedFiles.add_lines:
-        """
-        if not bytes: # empty, like a dir entry, etc
-            if nostore_sha == _null_sha1:
-                raise errors.ExistingContent()
-            self._block.add_entry(key, type='empty',
-                                  sha1=None, start=0,
-                                  length=0)
-            return _null_sha1, 0, 0, 'fulltext', 0
-        # we assume someone knew what they were doing when they passed it in
-        if expected_sha is not None:
-            sha1 = expected_sha
-        else:
-            sha1 = osutils.sha_string(bytes)
-        if nostore_sha is not None:
-            if sha1 == nostore_sha:
-                raise errors.ExistingContent()
-        if key[-1] is None:
-            key = key[:-1] + ('sha1:' + sha1,)
+    def _compress(self, key, bytes, sha1, max_delta_size, soft=False):
+        """see _CommonGroupCompressor._compress"""
         input_len = len(bytes)
         # By having action/label/sha1/len, we can parse the group if the index
         # was ever destroyed, we have the key in 'label', we know the final
@@ -967,7 +968,6 @@
             raise AssertionError('_source_offset != endpoint'
                 ' somehow the DeltaIndex got out of sync with'
                 ' the output lines')
-        max_delta_size = len(bytes) / 2
         delta = self._delta_index.make_delta(bytes, max_delta_size)
         if (delta is None):
             type = 'fulltext'
@@ -988,7 +988,7 @@
                 self._delta_index.add_delta_source(delta, len_mini_header)
         self._block.add_entry(key, type=type, sha1=sha1,
                               start=self.endpoint, length=length)
-        start = self.endpoint
+        start = self.endpoint # Before insertion
         delta_start = (self.endpoint, len(self.lines))
         self.num_keys += 1
         self.output_chunks(new_chunks)

=== modified file 'bzrlib/tests/test__groupcompress.py'
--- a/bzrlib/tests/test__groupcompress.py	2009-03-25 20:58:16 +0000
+++ b/bzrlib/tests/test__groupcompress.py	2009-03-27 12:12:10 +0000
@@ -24,7 +24,7 @@
 
 
 def load_tests(standard_tests, module, loader):
-    """Parameterize tests for view-aware vs not."""
+    """Parameterize tests for all versions of groupcompress."""
     to_adapt, result = tests.split_suite_by_condition(
         standard_tests, tests.condition_isinstance(TestMakeAndApplyDelta))
     scenarios = [
@@ -50,6 +50,7 @@
     def feature_name(self):
         return 'bzrlib._groupcompress_pyx'
 
+
 CompiledGroupCompressFeature = _CompiledGroupCompressFeature()
 
 _text1 = """\
@@ -120,14 +121,14 @@
 
     def test_make_delta_is_typesafe(self):
         self.make_delta('a string', 'another string')
-        self.assertRaises(TypeError,
-            self.make_delta, 'a string', object())
-        self.assertRaises(TypeError,
-            self.make_delta, 'a string', u'not a string')
-        self.assertRaises(TypeError,
-            self.make_delta, object(), 'a string')
-        self.assertRaises(TypeError,
-            self.make_delta, u'not a string', 'a string')
+
+        def _check_make_delta(string1, string2):
+            self.assertRaises(TypeError, self.make_delta, string1, string2)
+
+        _check_make_delta('a string', object())
+        _check_make_delta('a string', u'not a string')
+        _check_make_delta(object(), 'a string')
+        _check_make_delta(u'not a string', 'a string')
 
     def test_make_noop_delta(self):
         ident_delta = self.make_delta(_text1, _text1)
@@ -150,14 +151,11 @@
 
     def test_apply_delta_is_typesafe(self):
         self.apply_delta(_text1, 'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, object(), 'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, unicode(_text1), 'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, _text1, u'MM\x90M')
-        self.assertRaises(TypeError,
-            self.apply_delta, _text1, object())
+        self.assertRaises(TypeError, self.apply_delta, object(), 'MM\x90M')
+        self.assertRaises(TypeError, self.apply_delta,
+                          unicode(_text1), 'MM\x90M')
+        self.assertRaises(TypeError, self.apply_delta, _text1, u'MM\x90M')
+        self.assertRaises(TypeError, self.apply_delta, _text1, object())
 
     def test_apply_delta(self):
         target = self.apply_delta(_text1,
@@ -193,7 +191,8 @@
         di.add_source(_first_text, 0)
         self.assertEqual(len(_first_text), di._source_offset)
         di.add_source(_second_text, 0)
-        self.assertEqual(len(_first_text) + len(_second_text), di._source_offset)
+        self.assertEqual(len(_first_text) + len(_second_text),
+                         di._source_offset)
         delta = di.make_delta(_third_text)
         result = self._gc_module.apply_delta(_first_text + _second_text, delta)
         self.assertEqualDiff(_third_text, result)
@@ -230,8 +229,8 @@
         second_delta = di.make_delta(_third_text)
         result = self._gc_module.apply_delta(source, second_delta)
         self.assertEqualDiff(_third_text, result)
-        # We should be able to match against the 'previous text\nand has some...'
-        # that was part of the delta bytes
+        # We should be able to match against the
+        # 'previous text\nand has some...'  that was part of the delta bytes
         # Note that we don't match the 'common with the', because it isn't long
         # enough to match in the original text, and those bytes are not present
         # in the delta for the second text.

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-25 21:20:18 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-27 12:12:10 +0000
@@ -30,7 +30,7 @@
 
 
 def load_tests(standard_tests, module, loader):
-    """Parameterize tests for view-aware vs not."""
+    """Parameterize tests for all versions of groupcompress."""
     to_adapt, result = tests.split_suite_by_condition(
         standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
     scenarios = [
@@ -113,7 +113,7 @@
         # get the first out
         self.assertEqual(('strange\ncommon long line\n'
                           'that needs a 16 byte match\n', sha1_1),
-            compressor.extract(('label',)))
+                         compressor.extract(('label',)))
         # and the second
         self.assertEqual(('common long line\nthat needs a 16 byte match\n'
                           'different\n', sha1_2),