Rev 3926: Implement apply_delta_to_source which doesn't have to malloc another string. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam

John Arbash Meinel john at arbash-meinel.com
Fri Mar 27 22:30:12 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam

------------------------------------------------------------
revno: 3926
revision-id: john at arbash-meinel.com-20090327222955-utifmfm888zerixt
parent: john at arbash-meinel.com-20090327220537-loj7fdr9hi360qc3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: vilajam
timestamp: Fri 2009-03-27 17:29:55 -0500
message:
  Implement apply_delta_to_source which doesn't have to malloc another string.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'
--- a/bzrlib/_groupcompress_py.py	2009-03-27 20:50:36 +0000
+++ b/bzrlib/_groupcompress_py.py	2009-03-27 22:29:55 +0000
@@ -362,7 +362,11 @@
         pos += 1
         if cmd & 0x80:
             offset, length, pos = decode_copy_instruction(delta, cmd, pos)
-            lines.append(basis[offset:offset+length])
+            last = offset + length
+            if last > len(basis):
+                raise ValueError('data would copy bytes past the'
+                                 'end of source')
+            lines.append(basis[offset:last])
         else: # Insert of 'cmd' bytes
             if cmd == 0:
                 raise ValueError('Command == 0 not supported yet')
@@ -373,3 +377,16 @@
         raise ValueError('Delta claimed to be %d long, but ended up'
                          ' %d long' % (target_length, len(bytes)))
     return bytes
+
+
+def apply_delta_to_source(source, delta_start, delta_end):
+    """Extract a delta from source bytes, and apply it."""
+    source_size = len(source)
+    if delta_start >= source_size:
+        raise ValueError('delta starts after source')
+    if delta_end > source_size:
+        raise ValueError('delta ends after source')
+    if delta_start >= delta_end:
+        raise ValueError('delta starts after it ends')
+    delta_bytes = source[delta_start:delta_end]
+    return apply_delta(source, delta_bytes)

=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx	2009-03-27 21:29:32 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx	2009-03-27 22:29:55 +0000
@@ -226,21 +226,15 @@
     cdef Py_ssize_t source_size
     cdef char *delta
     cdef Py_ssize_t delta_size
-    cdef unsigned char *data, *top
-    cdef unsigned char *dst_buf, *out, cmd
-    cdef Py_ssize_t size
-    cdef unsigned long cp_off, cp_size
 
     if not PyString_CheckExact(source_bytes):
         raise TypeError('source is not a str')
     if not PyString_CheckExact(delta_bytes):
         raise TypeError('delta is not a str')
-
     source = PyString_AS_STRING(source_bytes)
     source_size = PyString_GET_SIZE(source_bytes)
     delta = PyString_AS_STRING(delta_bytes)
     delta_size = PyString_GET_SIZE(delta_bytes)
-
     # Code taken from patch-delta.c, only brought here to give better error
     # handling, and to avoid double allocating memory
     if (delta_size < DELTA_SIZE_MIN):
@@ -248,6 +242,17 @@
         raise RuntimeError('delta_size %d smaller than min delta size %d'
                            % (delta_size, DELTA_SIZE_MIN))
 
+    return _apply_delta(source, source_size, delta, delta_size)
+
+
+cdef object _apply_delta(char *source, Py_ssize_t source_size,
+                         char *delta, Py_ssize_t delta_size):
+    """common functionality between apply_delta and apply_delta_to_source."""
+    cdef unsigned char *data, *top
+    cdef unsigned char *dst_buf, *out, cmd
+    cdef Py_ssize_t size
+    cdef unsigned long cp_off, cp_size
+
     data = <unsigned char *>delta
     top = data + delta_size
 
@@ -328,6 +333,34 @@
     return result
 
 
+def apply_delta_to_source(source, delta_start, delta_end):
+    """Extract a delta from source bytes, and apply it."""
+    cdef char *c_source
+    cdef Py_ssize_t c_source_size
+    cdef char *c_delta
+    cdef Py_ssize_t c_delta_size
+    cdef Py_ssize_t c_delta_start, c_delta_end
+
+    if not PyString_CheckExact(source):
+        raise TypeError('source is not a str')
+    c_source_size = PyString_GET_SIZE(source)
+    c_delta_start = delta_start
+    c_delta_end = delta_end
+    if c_delta_start >= c_source_size:
+        raise ValueError('delta starts after source')
+    if c_delta_end > c_source_size:
+        raise ValueError('delta ends after source')
+    if c_delta_start >= c_delta_end:
+        raise ValueError('delta starts after it ends')
+
+    c_delta_size = c_delta_end - c_delta_start
+    c_source = PyString_AS_STRING(source)
+    c_delta = c_source + c_delta_start
+    # We don't use source_size, because we know the delta should not refer to
+    # any bytes after it starts
+    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
+
+
 def encode_base128_int(val):
     """Convert an integer into a 7-bit lsb encoding."""
     cdef unsigned int c_val

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-27 22:05:37 +0000
+++ b/bzrlib/groupcompress.py	2009-03-27 22:29:55 +0000
@@ -248,11 +248,10 @@
         if end != content_start + content_len:
             raise ValueError('end != len according to field header'
                 ' %s != %s' % (end, content_start + content_len))
-        content = self._content[content_start:end]
         if c == 'f':
-            bytes = content
+            bytes = self._content[content_start:end]
         elif c == 'd':
-            bytes = apply_delta(self._content, content)
+            bytes = apply_delta_to_source(self._content, content_start, end)
         return bytes
 
     def set_content(self, content):
@@ -1641,6 +1640,7 @@
 
 from bzrlib._groupcompress_py import (
     apply_delta,
+    apply_delta_to_source,
     encode_base128_int,
     decode_base128_int,
     LinesDeltaIndex,
@@ -1648,6 +1648,7 @@
 try:
     from bzrlib._groupcompress_pyx import (
         apply_delta,
+        apply_delta_to_source,
         DeltaIndex,
         encode_base128_int,
         decode_base128_int,

=== modified file 'bzrlib/tests/test__groupcompress.py'
--- a/bzrlib/tests/test__groupcompress.py	2009-03-27 21:29:32 +0000
+++ b/bzrlib/tests/test__groupcompress.py	2009-03-27 22:29:55 +0000
@@ -135,6 +135,7 @@
         super(TestMakeAndApplyDelta, self).setUp()
         self.make_delta = self._gc_module.make_delta
         self.apply_delta = self._gc_module.apply_delta
+        self.apply_delta_to_source = self._gc_module.apply_delta_to_source
 
     def test_make_delta_is_typesafe(self):
         self.make_delta('a string', 'another string')
@@ -201,6 +202,27 @@
                     'M\x90/\x1ebe matched\nagainst other text\n')
         self.assertEqual(_text1, target)
 
+    def test_apply_delta_to_source_is_safe(self):
+        self.assertRaises(TypeError,
+            self.apply_delta_to_source, object(), 0, 1)
+        self.assertRaises(TypeError,
+            self.apply_delta_to_source, u'unicode str', 0, 1)
+        # end > length
+        self.assertRaises(ValueError,
+            self.apply_delta_to_source, 'foo', 1, 4)
+        # start > length
+        self.assertRaises(ValueError,
+            self.apply_delta_to_source, 'foo', 5, 3)
+        # start > end
+        self.assertRaises(ValueError,
+            self.apply_delta_to_source, 'foo', 3, 2)
+
+    def test_apply_delta_to_source(self):
+        source_and_delta = (_text1
+                            + 'N\x90/\x1fdiffer from\nagainst other text\n')
+        self.assertEqual(_text2, self.apply_delta_to_source(source_and_delta,
+                                    len(_text1), len(source_and_delta)))
+
 
 class TestMakeAndApplyCompatible(tests.TestCase):
 



More information about the bazaar-commits mailing list