Rev 2908: work in some extra fields for KnitDataV2 in http://bzr.arbash-meinel.com/branches/bzr/0.92-dev/knit_parents
John Arbash Meinel
john at arbash-meinel.com
Fri Oct 26 22:28:12 BST 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.92-dev/knit_parents
------------------------------------------------------------
revno: 2908
revision-id:john at arbash-meinel.com-20071026212731-cxsrlnhu4qhfemiu
parent: john at arbash-meinel.com-20071019164517-2q6gx1ynkn8mdrx4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_parents
timestamp: Fri 2007-10-26 16:27:31 -0500
message:
work in some extra fields for KnitDataV2
And add a couple more tests for the format.
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-10-19 16:45:17 +0000
+++ b/bzrlib/knit.py 2007-10-26 21:27:31 +0000
@@ -912,11 +912,14 @@
parent_texts, delta, self.factory.annotated,
left_matching_blocks)
+ has_eol = not ('no-eol' in options)
if delta:
options.append('line-delta')
+ delta_parent = parents[0]
store_lines = self.factory.lower_line_delta(delta_hunks)
size, bytes = self._data._record_to_data(version_id, digest,
- store_lines, parents=parents)
+ store_lines, parents=parents, has_eol=has_eol,
+ delta_parent=delta_parent)
else:
options.append('fulltext')
# isinstance is slower and we have no hierarchy.
@@ -924,13 +927,15 @@
# Use the already joined bytes saving iteration time in
# _record_to_data.
size, bytes = self._data._record_to_data(version_id, digest,
- lines, [line_bytes], parents=parents)
+ lines, [line_bytes], parents=parents, has_eol=has_eol,
+ delta_parent=None)
else:
# get mixed annotation + content and feed it into the
# serialiser.
store_lines = self.factory.lower_fulltext(content)
size, bytes = self._data._record_to_data(version_id, digest,
- store_lines, parents=parents)
+ store_lines, parents=parents, has_eol=has_eol,
+ delta_parent=None)
access_memo = self._data.add_raw_records([size], bytes)[0]
self._index.add_versions(
@@ -1991,16 +1996,21 @@
return self._access.open_file()
def _record_to_data(self, version_id, digest, lines, dense_lines=None,
- parents=None):
+ parents=None, has_eol=True, delta_parent=None):
"""Convert version_id, digest, lines into a raw data block.
-
+
:param dense_lines: The bytes of lines but in a denser form. For
instance, if lines is a list of 1000 bytestrings each ending in \n,
dense_lines may be a list with one line in it, containing all the
1000's lines and their \n's. Using dense_lines if it is already
known is a win because the string join to create bytes in this
function spends less time resizing the final string.
- :return: (len, a StringIO instance with the raw data ready to read.)
+ :param parents: The list of parents of this revision
+ :param has_eol: Does this text have a newline (\\n) as its last
+ character
+ :param delta_parent: If this is a delta, the revision id we are delta'd
+ against
+ :return: (len, a byte string with the compressed data)
"""
# Note: using a string copy here increases memory pressure with e.g.
# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine
@@ -2172,33 +2182,43 @@
return components
-class _KnitDataWithParents(_KnitData):
+class _KnitDataV2(_KnitData):
def _record_to_data(self, version_id, digest, lines, dense_lines=None,
- parents=None):
+ parents=None, has_eol=True, delta_parent=None):
"""Convert version_id, digest, lines into a raw data block.
-
+
:param dense_lines: The bytes of lines but in a denser form. For
instance, if lines is a list of 1000 bytestrings each ending in \n,
dense_lines may be a list with one line in it, containing all the
1000's lines and their \n's. Using dense_lines if it is already
known is a win because the string join to create bytes in this
function spends less time resizing the final string.
- :return: (len, a StringIO instance with the raw data ready to read.)
+ :param parents: The list of parents of this revision
+ :param has_eol: Does this text have a newline (\\n) as its last
+ character
+ :param delta_parent: If this is a delta, the revision id we are delta'd
+ against
+ :return: (len, a byte string with the compressed data)
"""
# Note: using a string copy here increases memory pressure with e.g.
# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine
# when doing the initial commit of a mozilla tree. RBC 20070921
if parents:
+ num_parents = len(parents)
parents_str = ' ' + ' '.join(parents)
else:
+ num_parents = 0
parents_str = ''
+ eol_str = has_eol and 'eol' or 'no-eol'
+ kind_str = delta_parent and 'linedelta' or 'full'
+ basis_str = delta_parent or ''
bytes = ''.join(chain(
- ["version %s %d %s%s\n" % (version_id,
- len(lines),
- digest,
- parents_str)],
+ ["version %s %d %s %s %s %s %d%s\n" % (version_id,
+ len(lines), digest, eol_str,
+ kind_str, basis_str, num_parents, parents_str,
+ )],
dense_lines or lines,
["end %s\n" % version_id]))
assert bytes.__class__ == str
@@ -2206,15 +2226,110 @@
return len(compressed_bytes), compressed_bytes
def _check_header(self, version_id, line):
- rec = line.split()
- if len(rec) >= 4:
+ # The old api just wanted the list of records
+ data = self._check_info_header(version_id, line)
+ return data[0]
+
+ def _check_info_header(self, version_id, line):
+ """Parse the header line, and extract all information.
+
+ :return: (raw_records, num_lines, sha1_digest, has_eol,
+ delta_or_full, basis, parent_ids)
+ """
+ # 0:'version', 1:version_id, 2:num_lines, 3:digest, 4:eol?,
+ # 5: delta/full 6: basis 7:num_parents, 8+:parent-ids
+ rec = line.split(' ')
+ if len(rec) < 8:
raise KnitCorrupt(self._access,
'unexpected number of elements in record header')
if rec[1] != version_id:
raise KnitCorrupt(self._access,
'unexpected version, wanted %r, got %r'
% (version_id, rec[1]))
- return rec
+ if not rec[-1].endswith('\n'):
+ raise KnitCorrupt(self._access,
+ 'missing trailing newline for version_id: %r'
+ % (version_id,))
+ rec[-1] = rec[-1][:-1] # Strip off the trailing newline
+
+ try:
+ num_lines = int(rec[2])
+ except IndexError:
+ raise KnitCorrupt(self._access,
+ 'Invalid number of lines. Expected an integer'
+ ' not: %r for version_id %r'
+ % (rec[2], version_id))
+
+ if rec[4] == 'eol':
+ has_eol = True
+ elif rec[4] == 'no-eol':
+ has_eol = False
+ else:
+ raise KnitCorrupt(self._access,
+ 'invalid EOL value expected T or F, got %s'
+ % (rec[4],))
+
+ text_kind = rec[5]
+ if text_kind not in ('linedelta', 'full'):
+ raise KnitCorrupt(self._access,
+ "invalid format, expected 'linedelta' or 'full'"
+ " not %r for version_id %r"
+ % (text_kind, version_id))
+ basis = rec[6]
+ if text_kind == 'full':
+ basis = None
+ if rec[6] != '':
+ raise KnitCorrupt(self._access,
+ "full texts have no basis,"
+ " not %r for version_id %r"
+ % (rec[6], version_id))
+ elif rec[6] == '':
+ raise KnitCorrupt(self._access,
+ "delta texts require a basis,"
+ " for version_id %r"
+ % (version_id,))
+ try:
+ num_parents = int(rec[7])
+ except IndexError:
+ raise KnitCorrupt(self._access,
+ "Invalid number of parents. Expected an integer"
+ " not %s for version_id %r"
+ % (rec[7], version_id))
+ expected_num_fields = 8 + num_parents
+ if len(rec) != expected_num_fields:
+ raise KnitCorrupt(self._access,
+ "Invalid number of fields. Expected %d not %d"
+ " version_id %r"
+ % (expected_num_fields, len(rec), version_id))
+ parents = rec[8:8+num_parents]
+ return (rec, num_lines, rec[3], has_eol, text_kind, basis, parents)
+
+ def read_info_record(self, version_id, index_memo):
+ """Read records"""
+ raw_data = list(self._access.get_raw_records([index_memo]))[0]
+ df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
+ lines = df.readlines()
+ df.close()
+ header_line = lines.pop(0)
+ last_line = lines.pop()
+ if last_line != 'end %s\n' % (version_id,):
+ raise KnitCorrupt(self._access,
+ 'unexpected version end line %r, wanted %r'
+ % (last_line, version_id))
+ try:
+ (rec, num_lines, digest, has_eol, text_kind, basis,
+ parents) = self._check_info_header(version_id, header_line)
+ except Exception, e:
+ raise KnitCorrupt(self._access,
+ "While reading {%s} got %s(%s)"
+ % (version_id, e.__class__.__name__, str(e)))
+ if len(lines) != num_lines:
+ raise KnitCorrupt(self._access,
+ 'incorrect number of lines %s != %s'
+ ' for version {%s}'
+ % (len(record_contents), num_lines,
+ version_id))
+ return lines, digest, has_eol, text_kind, basis, parents
class InterKnit(InterVersionedFile):
@@ -2327,12 +2442,17 @@
if 'fulltext' in options:
content = self.source.factory.parse_fulltext(data, version_id)
lines = self.target.factory.lower_fulltext(content)
+ delta_parent = None
else:
delta = self.source.factory.parse_line_delta(data, version_id,
plain=True)
lines = self.target.factory.lower_line_delta(delta)
+ delta_parent = parents[0]
+ has_eol = 'no-eol' in options
return self.target._data._record_to_data(version_id, digest, lines,
- parents=parents)
+ parents=parents,
+ has_eol=has_eol,
+ delta_parent=delta_parent)
InterVersionedFile.register_optimiser(InterKnit)
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2007-10-05 05:52:45 +0000
+++ b/bzrlib/tests/test_knit.py 2007-10-26 21:27:31 +0000
@@ -18,7 +18,6 @@
from cStringIO import StringIO
import difflib
-import gzip
import sha
import sys
@@ -27,6 +26,7 @@
generate_ids,
knit,
pack,
+ tuned_gzip,
)
from bzrlib.errors import (
RevisionAlreadyPresent,
@@ -363,11 +363,7 @@
class LowLevelKnitDataTests(TestCase):
def create_gz_content(self, text):
- sio = StringIO()
- gz_file = gzip.GzipFile(mode='wb', fileobj=sio)
- gz_file.write(text)
- gz_file.close()
- return sio.getvalue()
+ return tuned_gzip.bytes_to_gzip(text)
def test_valid_knit_data(self):
sha1sum = sha.new('foo\nbar\n').hexdigest()
@@ -479,6 +475,116 @@
self.assertRaises(errors.KnitCorrupt, list,
data.read_records_iter_raw(records))
+ def test__record_to_data_basic(self):
+ knit_data = _KnitData(None)
+ data_len, gz_data = knit_data._record_to_data('version-id', 'digest',
+ ['two\n', 'lines\n'])
+ self.assertEqual(data_len, len(gz_data))
+ data = tuned_gzip.GzipFile(fileobj=StringIO(gz_data), mode='rb').read()
+ self.assertEqualDiff('version version-id 2 digest\n'
+ 'two\n'
+ 'lines\n'
+ 'end version-id\n',
+ data)
+
+ def test__record_to_data_extra(self):
+ knit_data = _KnitData(None)
+ data_len, gz_data = knit_data._record_to_data('version-id', 'digest',
+ ['two\n', 'lines\n'],
+ ['dense\nlines\n'], # Intentionally different
+ parents=['parent-version-id'],
+ has_eol=False,
+ delta_parent='parent-version-id',
+ )
+ self.assertEqual(data_len, len(gz_data))
+ data = tuned_gzip.GzipFile(fileobj=StringIO(gz_data), mode='rb').read()
+ self.assertEqualDiff('version version-id 2 digest\n'
+ 'dense\n'
+ 'lines\n'
+ 'end version-id\n',
+ data)
+
+
+class LowLevelKnitDataV2Tests(TestCase):
+
+ def test_read_info_record(self):
+ sha1sum = sha.new('foo\nbar\n').hexdigest()
+ gz_txt = tuned_gzip.bytes_to_gzip('version rev-id-1 2 %s eol full 0\n'
+ 'foo\n'
+ 'bar\n'
+ 'end rev-id-1\n'
+ % (sha1sum,))
+ transport = MockTransport([gz_txt])
+ access = _KnitAccess(transport, 'filename', None, None, False, False)
+ data = knit._KnitDataV2(access=access)
+
+ index_memo = (None, 0, len(gz_txt))
+ self.assertEqual((['foo\n', 'bar\n'], sha1sum, True, 'full', None, []),
+ data.read_info_record('rev-id-1', index_memo))
+
+ def test__record_to_data_basic(self):
+ knit_data = knit._KnitDataV2(None)
+ data_len, gz_data = knit_data._record_to_data('version-id', 'digest',
+ ['two\n', 'lines\n'])
+ self.assertEqual(data_len, len(gz_data))
+ data = tuned_gzip.GzipFile(fileobj=StringIO(gz_data),
+ mode='rb').read()
+ self.assertEqualDiff('version version-id 2 digest eol full 0\n'
+ 'two\n'
+ 'lines\n'
+ 'end version-id\n',
+ data)
+ transport = MockTransport([gz_data])
+ access = _KnitAccess(transport, 'filename', None, None, False, False)
+ knit_data = knit._KnitDataV2(access=access)
+ index_memo = (None, 0, len(gz_data))
+ self.assertEqual((['two\n', 'lines\n'], 'digest', True, 'full', None, []),
+ knit_data.read_info_record('version-id', index_memo))
+
+ def test__record_to_data_extra(self):
+ knit_data = knit._KnitDataV2(None)
+ data_len, gz_data = knit_data._record_to_data('version-id', 'digest',
+ ['two\n', 'lines\n'],
+ ['dense\nlines\n'], # Intentionally different
+ parents=['parent-version-id'],
+ has_eol=False,
+ delta_parent='delta-version-id',
+ )
+ self.assertEqual(data_len, len(gz_data))
+ data = tuned_gzip.GzipFile(fileobj=StringIO(gz_data),
+ mode='rb').read()
+ self.assertEqualDiff('version version-id 2 digest no-eol'
+ ' linedelta delta-version-id'
+ ' 1 parent-version-id\n'
+ 'dense\n'
+ 'lines\n'
+ 'end version-id\n',
+ data)
+ transport = MockTransport([gz_data])
+ access = _KnitAccess(transport, 'filename', None, None, False, False)
+ knit_data = knit._KnitDataV2(access=access)
+ index_memo = (None, 0, len(gz_data))
+ self.assertEqual((['dense\n', 'lines\n'], 'digest', False,
+ 'linedelta', 'delta-version-id',
+ ['parent-version-id']),
+ knit_data.read_info_record('version-id', index_memo))
+
+ def test__check_header_v1(self):
+ # This is the v1 format
+ gz_data = tuned_gzip.bytes_to_gzip(
+ 'version rev-id-1 2 digest\n'
+ 'foo\n'
+ 'bar\n'
+ 'end rev-id-1\n'
+ )
+ transport = MockTransport([gz_data])
+ access = _KnitAccess(transport, 'filename', None, None, False, False)
+ knit_data = knit._KnitDataV2(access=access)
+ index_memo = (None, 0, len(gz_data))
+ self.assertRaises(errors.KnitCorrupt,
+ knit_data.read_info_record,
+ 'rev-id-1', index_memo)
+
class LowLevelKnitIndexTests(TestCase):
More information about the bazaar-commits
mailing list