Rev 3926: Implement apply_delta_to_source which doesn't have to malloc another string. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam
John Arbash Meinel
john at arbash-meinel.com
Fri Mar 27 22:30:12 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam
------------------------------------------------------------
revno: 3926
revision-id: john at arbash-meinel.com-20090327222955-utifmfm888zerixt
parent: john at arbash-meinel.com-20090327220537-loj7fdr9hi360qc3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: vilajam
timestamp: Fri 2009-03-27 17:29:55 -0500
message:
Implement apply_delta_to_source which doesn't have to malloc another string.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'
--- a/bzrlib/_groupcompress_py.py 2009-03-27 20:50:36 +0000
+++ b/bzrlib/_groupcompress_py.py 2009-03-27 22:29:55 +0000
@@ -362,7 +362,11 @@
pos += 1
if cmd & 0x80:
offset, length, pos = decode_copy_instruction(delta, cmd, pos)
- lines.append(basis[offset:offset+length])
+ last = offset + length
+ if last > len(basis):
+ raise ValueError('data would copy bytes past the'
+ 'end of source')
+ lines.append(basis[offset:last])
else: # Insert of 'cmd' bytes
if cmd == 0:
raise ValueError('Command == 0 not supported yet')
@@ -373,3 +377,16 @@
raise ValueError('Delta claimed to be %d long, but ended up'
' %d long' % (target_length, len(bytes)))
return bytes
+
+
+def apply_delta_to_source(source, delta_start, delta_end):
+ """Extract a delta from source bytes, and apply it."""
+ source_size = len(source)
+ if delta_start >= source_size:
+ raise ValueError('delta starts after source')
+ if delta_end > source_size:
+ raise ValueError('delta ends after source')
+ if delta_start >= delta_end:
+ raise ValueError('delta starts after it ends')
+ delta_bytes = source[delta_start:delta_end]
+ return apply_delta(source, delta_bytes)
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx 2009-03-27 21:29:32 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx 2009-03-27 22:29:55 +0000
@@ -226,21 +226,15 @@
cdef Py_ssize_t source_size
cdef char *delta
cdef Py_ssize_t delta_size
- cdef unsigned char *data, *top
- cdef unsigned char *dst_buf, *out, cmd
- cdef Py_ssize_t size
- cdef unsigned long cp_off, cp_size
if not PyString_CheckExact(source_bytes):
raise TypeError('source is not a str')
if not PyString_CheckExact(delta_bytes):
raise TypeError('delta is not a str')
-
source = PyString_AS_STRING(source_bytes)
source_size = PyString_GET_SIZE(source_bytes)
delta = PyString_AS_STRING(delta_bytes)
delta_size = PyString_GET_SIZE(delta_bytes)
-
# Code taken from patch-delta.c, only brought here to give better error
# handling, and to avoid double allocating memory
if (delta_size < DELTA_SIZE_MIN):
@@ -248,6 +242,17 @@
raise RuntimeError('delta_size %d smaller than min delta size %d'
% (delta_size, DELTA_SIZE_MIN))
+ return _apply_delta(source, source_size, delta, delta_size)
+
+
+cdef object _apply_delta(char *source, Py_ssize_t source_size,
+ char *delta, Py_ssize_t delta_size):
+ """common functionality between apply_delta and apply_delta_to_source."""
+ cdef unsigned char *data, *top
+ cdef unsigned char *dst_buf, *out, cmd
+ cdef Py_ssize_t size
+ cdef unsigned long cp_off, cp_size
+
data = <unsigned char *>delta
top = data + delta_size
@@ -328,6 +333,34 @@
return result
+def apply_delta_to_source(source, delta_start, delta_end):
+ """Extract a delta from source bytes, and apply it."""
+ cdef char *c_source
+ cdef Py_ssize_t c_source_size
+ cdef char *c_delta
+ cdef Py_ssize_t c_delta_size
+ cdef Py_ssize_t c_delta_start, c_delta_end
+
+ if not PyString_CheckExact(source):
+ raise TypeError('source is not a str')
+ c_source_size = PyString_GET_SIZE(source)
+ c_delta_start = delta_start
+ c_delta_end = delta_end
+ if c_delta_start >= c_source_size:
+ raise ValueError('delta starts after source')
+ if c_delta_end > c_source_size:
+ raise ValueError('delta ends after source')
+ if c_delta_start >= c_delta_end:
+ raise ValueError('delta starts after it ends')
+
+ c_delta_size = c_delta_end - c_delta_start
+ c_source = PyString_AS_STRING(source)
+ c_delta = c_source + c_delta_start
+ # We don't use source_size, because we know the delta should not refer to
+ # any bytes after it starts
+ return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
+
+
def encode_base128_int(val):
"""Convert an integer into a 7-bit lsb encoding."""
cdef unsigned int c_val
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-27 22:05:37 +0000
+++ b/bzrlib/groupcompress.py 2009-03-27 22:29:55 +0000
@@ -248,11 +248,10 @@
if end != content_start + content_len:
raise ValueError('end != len according to field header'
' %s != %s' % (end, content_start + content_len))
- content = self._content[content_start:end]
if c == 'f':
- bytes = content
+ bytes = self._content[content_start:end]
elif c == 'd':
- bytes = apply_delta(self._content, content)
+ bytes = apply_delta_to_source(self._content, content_start, end)
return bytes
def set_content(self, content):
@@ -1641,6 +1640,7 @@
from bzrlib._groupcompress_py import (
apply_delta,
+ apply_delta_to_source,
encode_base128_int,
decode_base128_int,
LinesDeltaIndex,
@@ -1648,6 +1648,7 @@
try:
from bzrlib._groupcompress_pyx import (
apply_delta,
+ apply_delta_to_source,
DeltaIndex,
encode_base128_int,
decode_base128_int,
=== modified file 'bzrlib/tests/test__groupcompress.py'
--- a/bzrlib/tests/test__groupcompress.py 2009-03-27 21:29:32 +0000
+++ b/bzrlib/tests/test__groupcompress.py 2009-03-27 22:29:55 +0000
@@ -135,6 +135,7 @@
super(TestMakeAndApplyDelta, self).setUp()
self.make_delta = self._gc_module.make_delta
self.apply_delta = self._gc_module.apply_delta
+ self.apply_delta_to_source = self._gc_module.apply_delta_to_source
def test_make_delta_is_typesafe(self):
self.make_delta('a string', 'another string')
@@ -201,6 +202,27 @@
'M\x90/\x1ebe matched\nagainst other text\n')
self.assertEqual(_text1, target)
+ def test_apply_delta_to_source_is_safe(self):
+ self.assertRaises(TypeError,
+ self.apply_delta_to_source, object(), 0, 1)
+ self.assertRaises(TypeError,
+ self.apply_delta_to_source, u'unicode str', 0, 1)
+ # end > length
+ self.assertRaises(ValueError,
+ self.apply_delta_to_source, 'foo', 1, 4)
+ # start > length
+ self.assertRaises(ValueError,
+ self.apply_delta_to_source, 'foo', 5, 3)
+ # start > end
+ self.assertRaises(ValueError,
+ self.apply_delta_to_source, 'foo', 3, 2)
+
+ def test_apply_delta_to_source(self):
+ source_and_delta = (_text1
+ + 'N\x90/\x1fdiffer from\nagainst other text\n')
+ self.assertEqual(_text2, self.apply_delta_to_source(source_and_delta,
+ len(_text1), len(source_and_delta)))
+
class TestMakeAndApplyCompatible(tests.TestCase):
More information about the bazaar-commits
mailing list