Rev 3354: Finish adapters for annotated knits to unannotated knits and full texts. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files
Robert Collins
robertc at robertcollins.net
Fri Apr 18 06:02:54 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/versioned_files
------------------------------------------------------------
revno: 3354
revision-id: robertc at robertcollins.net-20080418050245-07ltcfhagcw3xhnb
parent: robertc at robertcollins.net-20080416040740-g3cjhoaez95b7gsr
committer: Robert Collins <robertc at robertcollins.net>
branch nick: data_stream_revamp
timestamp: Fri 2008-04-18 15:02:45 +1000
message:
Finish adapters for annotated knits to unannotated knits and full texts.
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-04-16 04:07:40 +0000
+++ b/bzrlib/knit.py 2008-04-18 05:02:45 +0000
@@ -142,6 +142,76 @@
INDEX_SUFFIX = '.kndx'
+class KnitAdapter(object):
+ """Base class for knit record adaption."""
+
+ def __init__(self):
+ self._data = _KnitData(None)
+ self._annotate_factory = KnitAnnotateFactory()
+ self._plain_factory = KnitPlainFactory()
+
+
+class FTAnnotatedToUnannotated(KnitAdapter):
+ """An adapter from FT annotated knits to unannotated ones."""
+
+ def get_bytes(self, factory, annotated_compressed_bytes):
+ rec, contents = \
+ self._data._parse_record_unchecked(annotated_compressed_bytes)
+ content = self._annotate_factory.parse_fulltext(contents, rec[1])
+ size, bytes = self._data._record_to_data(rec[1], rec[3], content.text())
+ return bytes
+
+
+class DeltaAnnotatedToUnannotated(KnitAdapter):
+ """An adapter for deltas from annotated to unannotated."""
+
+ def get_bytes(self, factory, annotated_compressed_bytes):
+ rec, contents = \
+ self._data._parse_record_unchecked(annotated_compressed_bytes)
+ delta = self._annotate_factory.parse_line_delta(contents, rec[1],
+ plain=True)
+ contents = self._plain_factory.lower_line_delta(delta)
+ size, bytes = self._data._record_to_data(rec[1], rec[3], contents)
+ return bytes
+
+
+class FTAnnotatedToFullText(KnitAdapter):
+ """An adapter from FT annotated knits to unannotated ones."""
+
+ def get_bytes(self, factory, annotated_compressed_bytes):
+ rec, contents = \
+ self._data._parse_record_unchecked(annotated_compressed_bytes)
+ content, delta = self._annotate_factory.parse_record(factory.key[0],
+ contents, factory._build_details, None)
+ return ''.join(content.text())
+
+
+class DeltaAnnotatedToFullText(KnitAdapter):
+ """An adapter for deltas from annotated to unannotated."""
+
+ def __init__(self, basis_vf):
+ """Create an adapter which accesses full texts from basis_vf.
+
+ :param basis_vf: A versioned file to access basis texts of deltas from.
+ """
+ KnitAdapter.__init__(self)
+ self._basis_vf = basis_vf
+
+ def get_bytes(self, factory, annotated_compressed_bytes):
+ rec, contents = \
+ self._data._parse_record_unchecked(annotated_compressed_bytes)
+ delta = self._annotate_factory.parse_line_delta(contents, rec[1],
+ plain=True)
+ compression_parent = factory.parents[0][0]
+ basis_lines = self._basis_vf.get_lines(compression_parent)
+ # Manually apply the delta because we have one annotated content and
+ # one plain.
+ basis_content = PlainKnitContent(basis_lines, compression_parent)
+ basis_content.apply_delta(delta, rec[1])
+ basis_content._should_strip_eol = factory._build_details[1]
+ return ''.join(basis_content.text())
+
+
class KnitContentFactory(ContentFactory):
"""Content factory for streaming from knits.
@@ -280,8 +350,7 @@
% (e,))
if self._should_strip_eol:
- anno, line = lines[-1]
- lines[-1] = (anno, line.rstrip('\n'))
+ lines[-1] = lines[-1].rstrip('\n')
return lines
def copy(self):
@@ -2518,45 +2587,54 @@
% (version_id, e.__class__.__name__, str(e)))
return df, rec
- def _check_header(self, version_id, line):
+ def _split_header(self, line):
rec = line.split()
if len(rec) != 4:
raise KnitCorrupt(self._access,
'unexpected number of elements in record header')
+ return rec
+
+ def _check_header_version(self, rec, version_id):
if rec[1] != version_id:
raise KnitCorrupt(self._access,
'unexpected version, wanted %r, got %r'
% (version_id, rec[1]))
+
+ def _check_header(self, version_id, line):
+ rec = self._split_header(line)
+ self._check_header_version(rec, version_id)
return rec
- def _parse_record(self, version_id, data):
+ def _parse_record_unchecked(self, data):
# profiling notes:
# 4168 calls in 2880 217 internal
# 4168 calls to _parse_record_header in 2121
# 4168 calls to readlines in 330
df = GzipFile(mode='rb', fileobj=StringIO(data))
-
try:
record_contents = df.readlines()
except Exception, e:
- raise KnitCorrupt(self._access,
- "While reading {%s} got %s(%s)"
- % (version_id, e.__class__.__name__, str(e)))
+ raise KnitCorrupt(self._access, "Corrupt compressed record %r, got %s(%s)" %
+ (data, e.__class__.__name__, str(e)))
header = record_contents.pop(0)
- rec = self._check_header(version_id, header)
-
+ rec = self._split_header(header)
last_line = record_contents.pop()
if len(record_contents) != int(rec[2]):
raise KnitCorrupt(self._access,
'incorrect number of lines %s != %s'
' for version {%s}'
% (len(record_contents), int(rec[2]),
- version_id))
+ rec[1]))
if last_line != 'end %s\n' % rec[1]:
raise KnitCorrupt(self._access,
'unexpected version end line %r, wanted %r'
- % (last_line, version_id))
+ % (last_line, rec[1]))
df.close()
+ return rec, record_contents
+
+ def _parse_record(self, version_id, data):
+ rec, record_contents = self._parse_record_unchecked(data)
+ self._check_header_version(rec, version_id)
return record_contents, rec[3]
def read_records_iter_raw(self, records):
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2008-04-16 04:07:40 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2008-04-18 05:02:45 +0000
@@ -34,6 +34,7 @@
RevisionAlreadyPresent,
WeaveParentMismatch
)
+from bzrlib import knit as _mod_knit
from bzrlib.knit import (
make_file_knit,
KnitAnnotateFactory,
@@ -46,11 +47,39 @@
from bzrlib.transport import get_transport
from bzrlib.transport.memory import MemoryTransport
from bzrlib.tsort import topo_sort
+from bzrlib.tuned_gzip import GzipFile
import bzrlib.versionedfile as versionedfile
from bzrlib.weave import WeaveFile
from bzrlib.weavefile import read_weave, write_weave
+def get_diamond_vf(f, trailing_eol=True):
+ """Get a diamond graph to exercise deltas and merges.
+
+ :param trailing_eol: If True end the last line with \n.
+ """
+ parents = {
+ 'origin': (),
+ 'base': (('origin',),),
+ 'left': (('base',),),
+ 'right': (('base',),),
+ 'merged': (('left',), ('right',)),
+ }
+ # insert a diamond graph to exercise deltas and merges.
+ if trailing_eol:
+ last_char = '\n'
+ else:
+ last_char = ''
+ f.add_lines('origin', [], ['origin' + last_char])
+ f.add_lines('base', ['origin'], ['base' + last_char])
+ f.add_lines('left', ['base'], ['base\n', 'left' + last_char])
+ f.add_lines('right', ['base'],
+ ['base\n', 'right' + last_char])
+ f.add_lines('merged', ['left', 'right'],
+ ['base\n', 'left\n', 'right\n', 'merged' + last_char])
+ return f, parents
+
+
class VersionedFileTestMixIn(object):
"""A mixin test class for testing VersionedFiles.
@@ -93,26 +122,6 @@
entries = f.get_record_stream([], 'unordered', False)
self.assertEqual([], list(entries))
- def get_diamond_vf(self):
- """Get a diamond graph to exercise deltas and merges."""
- f = self.get_file()
- parents = {
- 'origin': (),
- 'base': (('origin',),),
- 'left': (('base',),),
- 'right': (('base',),),
- 'merged': (('left',), ('right',)),
- }
- # insert a diamond graph to exercise deltas and merges.
- f.add_lines('origin', [], [])
- f.add_lines('base', ['origin'], ['base\n'])
- f.add_lines('left', ['base'], ['base\n', 'left\n'])
- f.add_lines('right', ['base'],
- ['base\n', 'right\n'])
- f.add_lines('merged', ['left', 'right'],
- ['base\n', 'left\n', 'right\n', 'merged\n'])
- return f, parents
-
def assertValidStorageKind(self, storage_kind):
"""Assert that storage_kind is a valid storage_kind."""
self.assertSubset([storage_kind],
@@ -132,7 +141,7 @@
def test_get_record_stream_interface(self):
"""each item in a stream has to provide a regular interface."""
- f, parents = self.get_diamond_vf()
+ f, parents = get_diamond_vf(self.get_file())
entries = f.get_record_stream(['merged', 'left', 'right', 'base'],
'unordered', False)
seen = set()
@@ -142,7 +151,7 @@
def test_get_record_stream_interface_ordered(self):
"""each item in a stream has to provide a regular interface."""
- f, parents = self.get_diamond_vf()
+ f, parents = get_diamond_vf(self.get_file())
entries = f.get_record_stream(['merged', 'left', 'right', 'base'],
'topological', False)
seen = []
@@ -155,7 +164,7 @@
def test_get_record_stream_interface_ordered_with_delta_closure(self):
"""each item in a stream has to provide a regular interface."""
- f, parents = self.get_diamond_vf()
+ f, parents = get_diamond_vf(self.get_file())
entries = f.get_record_stream(['merged', 'left', 'right', 'base'],
'topological', True)
seen = []
@@ -176,7 +185,7 @@
def test_get_record_stream_unknown_storage_kind_raises(self):
"""Asking for a storage kind that the stream cannot supply raises."""
- f, parents = self.get_diamond_vf()
+ f, parents = get_diamond_vf(self.get_file())
entries = f.get_record_stream(['merged', 'left', 'right', 'base'],
'unordered', False)
# We track the contents because we should be able to try, fail a
@@ -1351,3 +1360,89 @@
overlappedInsertExpected = ['aaa', '<<<<<<< ', 'xxx', 'yyy', '=======',
'xxx', '>>>>>>> ', 'bbb']
+
+
+class TestContentFactoryAdaption(TestCaseWithMemoryTransport):
+
+ def test_select_adaptor(self):
+ """Test that selecting an adaptor works."""
+ # self.assertEqual(versionedfile.
+
+ def get_knit(self):
+ return make_file_knit('knit', self.get_transport('.'), delta=True,
+ create=True)
+
+ def helpGetBytes(self, f, ft_adapter, delta_adapter):
+ """grab the interested adapted texts for tests."""
+ # origin is a fulltext
+ entries = f.get_record_stream(['origin'], 'unordered', False)
+ base = entries.next()
+ ft_data = ft_adapter.get_bytes(base, base.get_bytes_as(base.storage_kind))
+ # merged is both a delta and multiple parents.
+ entries = f.get_record_stream(['merged'], 'unordered', False)
+ merged = entries.next()
+ delta_data = delta_adapter.get_bytes(merged,
+ merged.get_bytes_as(merged.storage_kind))
+ return ft_data, delta_data
+
+ def test_deannotation_noeol(self):
+ """Test converting annotated knits to unannotated knits."""
+ # we need a full text, and a delta
+ f, parents = get_diamond_vf(self.get_knit(), trailing_eol=False)
+ ft_data, delta_data = self.helpGetBytes(f,
+ _mod_knit.FTAnnotatedToUnannotated(),
+ _mod_knit.DeltaAnnotatedToUnannotated())
+ self.assertEqual(
+ 'version origin 1 b284f94827db1fa2970d9e2014f080413b547a7e\n'
+ 'origin\n'
+ 'end origin\n',
+ GzipFile(mode='rb', fileobj=StringIO(ft_data)).read())
+ self.assertEqual(
+ 'version merged 4 32c2e79763b3f90e8ccde37f9710b6629c25a796\n'
+ '1,2,3\nleft\nright\nmerged\nend merged\n',
+ GzipFile(mode='rb', fileobj=StringIO(delta_data)).read())
+
+ def test_deannotation(self):
+ """Test converting annotated knits to unannotated knits."""
+ # we need a full text, and a delta
+ f, parents = get_diamond_vf(self.get_knit())
+ ft_data, delta_data = self.helpGetBytes(f,
+ _mod_knit.FTAnnotatedToUnannotated(),
+ _mod_knit.DeltaAnnotatedToUnannotated())
+ self.assertEqual(
+ 'version origin 1 00e364d235126be43292ab09cb4686cf703ddc17\n'
+ 'origin\n'
+ 'end origin\n',
+ GzipFile(mode='rb', fileobj=StringIO(ft_data)).read())
+ self.assertEqual(
+ 'version merged 3 ed8bce375198ea62444dc71952b22cfc2b09226d\n'
+ '2,2,2\nright\nmerged\nend merged\n',
+ GzipFile(mode='rb', fileobj=StringIO(delta_data)).read())
+
+ def test_annotated_to_fulltext_no_eol(self):
+ """Test adapting annotated knits to full texts (for -> weaves)."""
+ # we need a full text, and a delta
+ f, parents = get_diamond_vf(self.get_knit(), trailing_eol=False)
+ # Reconstructing a full text requires a backing versioned file, and it
+ # must have the base lines requested from it.
+ logged_vf = versionedfile.RecordingVersionedFileDecorator(f)
+ ft_data, delta_data = self.helpGetBytes(f,
+ _mod_knit.FTAnnotatedToFullText(),
+ _mod_knit.DeltaAnnotatedToFullText(logged_vf))
+ self.assertEqual('origin', ft_data)
+ self.assertEqual('base\nleft\nright\nmerged', delta_data)
+ self.assertEqual([('get_lines', 'left')], logged_vf.calls)
+
+ def test_annotated_to_fulltext(self):
+ """Test adapting annotated knits to full texts (for -> weaves)."""
+ # we need a full text, and a delta
+ f, parents = get_diamond_vf(self.get_knit())
+ # Reconstructing a full text requires a backing versioned file, and it
+ # must have the base lines requested from it.
+ logged_vf = versionedfile.RecordingVersionedFileDecorator(f)
+ ft_data, delta_data = self.helpGetBytes(f,
+ _mod_knit.FTAnnotatedToFullText(),
+ _mod_knit.DeltaAnnotatedToFullText(logged_vf))
+ self.assertEqual('origin\n', ft_data)
+ self.assertEqual('base\nleft\nright\nmerged\n', delta_data)
+ self.assertEqual([('get_lines', 'left')], logged_vf.calls)
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2008-04-16 04:07:40 +0000
+++ b/bzrlib/versionedfile.py 2008-04-18 05:02:45 +0000
@@ -553,6 +553,28 @@
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
+class RecordingVersionedFileDecorator(object):
+ """A minimal versioned file that records calls made on it.
+
+ Only enough methods have been added to support tests using it to date.
+
+ :ivar calls: A list of the calls made; can be reset at any time by
+ assigning [] to it.
+ """
+
+ def __init__(self, backing_vf):
+ """Create a RecordingVersionedFileDecorator decorating backing_vf.
+
+ :param backing_vf: The versioned file to answer all methods.
+ """
+ self._backing_vf = backing_vf
+ self.calls = []
+
+ def get_lines(self, version_ids):
+ self.calls.append(("get_lines", version_ids))
+ return self._backing_vf.get_lines(version_ids)
+
+
class _PlanMergeVersionedFile(object):
"""A VersionedFile for uncommitted and committed texts.
More information about the bazaar-commits
mailing list