Rev 3032: Merge the data-stream ordering fix. in http://people.ubuntu.com/~robertc/baz2.0/pack.read-locks
Robert Collins
robertc at robertcollins.net
Mon Nov 26 21:02:05 GMT 2007
At http://people.ubuntu.com/~robertc/baz2.0/pack.read-locks
------------------------------------------------------------
revno: 3032
revision-id:robertc at robertcollins.net-20071126210129-rnbasaov7i0iia8g
parent: robertc at robertcollins.net-20071126204930-3ykndfwze7ju7l9e
parent: mbp at sourcefrog.net-20071126080428-ccgoqxa6hr4xawf0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: pack.read-locks
timestamp: Tue 2007-11-27 08:01:29 +1100
message:
Merge the data-stream ordering fix.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/repository_implementations/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
------------------------------------------------------------
revno: 3015.1.8.1.4
revision-id:mbp at sourcefrog.net-20071126080428-ccgoqxa6hr4xawf0
parent: mbp at sourcefrog.net-20071126072017-h939i3145omvpore
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: 164637-delta-order
timestamp: Mon 2007-11-26 19:04:28 +1100
message:
update NEWS
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
------------------------------------------------------------
revno: 3015.1.8.1.3
revision-id:mbp at sourcefrog.net-20071126072017-h939i3145omvpore
parent: mbp at sourcefrog.net-20071126064616-ds3a78w2s1erqjhl
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: 164637-delta-order
timestamp: Mon 2007-11-26 18:20:17 +1100
message:
Update tests for new ordering of results from get_data_stream - the order is not defined by the interface, but is stable
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
------------------------------------------------------------
revno: 3015.1.8.1.2
revision-id:mbp at sourcefrog.net-20071126064616-ds3a78w2s1erqjhl
parent: mbp at sourcefrog.net-20071126023458-h63ii4ddopag1vq3
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: 164637-delta-order
timestamp: Mon 2007-11-26 17:46:16 +1100
message:
Fix KnitVersionedFile.get_data_stream to not assume .versions() is sorted. (lp:165106)
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
------------------------------------------------------------
revno: 3015.1.8.1.1
revision-id:mbp at sourcefrog.net-20071126023458-h63ii4ddopag1vq3
parent: pqm at pqm.ubuntu.com-20071125173141-g89p6qnnh90tk5zi
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: 164637-delta-order
timestamp: Mon 2007-11-26 13:34:58 +1100
message:
test_get_data_stream should indicate NotApplicable rather than silently passing
modified:
bzrlib/tests/repository_implementations/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
=== modified file 'NEWS'
--- a/NEWS 2007-11-26 13:55:51 +0000
+++ b/NEWS 2007-11-26 21:01:29 +0000
@@ -60,6 +60,13 @@
BUG FIXES:
+ * Fix possible error in insert_data_stream when copying between
+ pack repositories over bzr+ssh or bzr+http.
+ KnitVersionedFile.get_data_stream now makes sure that requested
+ compression parents are sent before any delta hunks that depend
+ on them.
+ (Martin Pool, #164637)
+
* A progress bar has been added for knitpack -> knitpack fetching.
(Robert Collins, #157789)
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-11-26 19:35:11 +0000
+++ b/bzrlib/knit.py 2007-11-26 21:01:29 +0000
@@ -576,7 +576,11 @@
"""Get a data stream for the specified versions.
Versions may be returned in any order, not necessarily the order
- specified.
+ specified. They are returned in a partial order by compression
+ parent, so that the deltas can be applied as the data stream is
+ inserted; however note that compression parents will not be sent
+ unless they were specifically requested, as the client may already
+ have them.
:param required_versions: The exact set of versions to be extracted.
Unlike some other knit methods, this is not used to generate a
@@ -585,35 +589,49 @@
:returns: format_signature, list of (version, options, length, parents),
reader_callable.
"""
- if not isinstance(required_versions, set):
- required_versions = set(required_versions)
- # we don't care about inclusions, the caller cares.
- # but we need to setup a list of records to visit.
+ required_version_set = frozenset(required_versions)
+ version_index = {}
+ # list of revisions that can just be sent without waiting for their
+ # compression parent
+ ready_to_send = []
+ # map from revision to the children based on it
+ deferred = {}
+ # first, read all relevant index data, enough to sort into the right
+ # order to return
for version_id in required_versions:
if not self.has_version(version_id):
raise RevisionNotPresent(version_id, self.filename)
- # Pick the desired versions out of the index in oldest-to-newest order
- version_list = []
- for version_id in self.versions():
- if version_id in required_versions:
- version_list.append(version_id)
-
- # create the list of version information for the result
- copy_queue_records = []
- copy_set = set()
- result_version_list = []
- for version_id in version_list:
options = self._index.get_options(version_id)
parents = self._index.get_parents_with_ghosts(version_id)
index_memo = self._index.get_position(version_id)
+ version_index[version_id] = (index_memo, options, parents)
+ if parents and parents[0] in required_version_set:
+ # must wait until the parent has been sent
+ deferred.setdefault(parents[0], []). \
+ append(version_id)
+ else:
+ # either a fulltext, or a delta whose parent the client did
+ # not ask for and presumably already has
+ ready_to_send.append(version_id)
+ # build a list of results to return, plus instructions for data to
+ # read from the file
+ copy_queue_records = []
+ result_version_list = []
+ while ready_to_send:
+ # XXX: pushing and popping lists may be a bit inefficient
+ version_id = ready_to_send.pop(0)
+ (index_memo, options, parents) = version_index[version_id]
copy_queue_records.append((version_id, index_memo))
none, data_pos, data_size = index_memo
- copy_set.add(version_id)
- # version, options, length, parents
result_version_list.append((version_id, options, data_size,
parents))
-
- # Read the compressed record data.
+ if version_id in deferred:
+ # now we can send all the children of this revision - we could
+ # put them in anywhere, but we hope that sending them soon
+ # after the fulltext will give good locality in the receiver
+ ready_to_send[:0] = deferred.pop(version_id)
+ assert len(deferred) == 0, \
+ "Still have compressed child versions waiting to be sent"
# XXX:
# From here down to the return should really be logic in the returned
# callable -- in a class that adapts read_records_iter_raw to read
@@ -623,7 +641,8 @@
(version_id2, options, _, parents) in \
izip(self._data.read_records_iter_raw(copy_queue_records),
result_version_list):
- assert version_id == version_id2, 'logic error, inconsistent results'
+ assert version_id == version_id2, \
+ 'logic error, inconsistent results'
raw_datum.append(raw_data)
pseudo_file = StringIO(''.join(raw_datum))
def read(length):
=== modified file 'bzrlib/tests/repository_implementations/test_repository.py'
--- a/bzrlib/tests/repository_implementations/test_repository.py 2007-11-26 03:47:18 +0000
+++ b/bzrlib/tests/repository_implementations/test_repository.py 2007-11-26 21:01:29 +0000
@@ -29,7 +29,11 @@
from bzrlib.delta import TreeDelta
from bzrlib.inventory import Inventory, InventoryDirectory
from bzrlib.revision import NULL_REVISION, Revision
-from bzrlib.tests import TestCaseWithTransport, TestSkipped
+from bzrlib.tests import (
+ TestCaseWithTransport,
+ TestNotApplicable,
+ TestSkipped,
+ )
from bzrlib.tests.repository_implementations import TestCaseWithRepository
from bzrlib.transport import get_transport
from bzrlib.upgrade import upgrade
@@ -377,8 +381,8 @@
try:
stream = repo.get_data_stream(['rev_id'])
except NotImplementedError:
- # Not all repositories support streaming.
- return
+ raise TestNotApplicable("%s doesn't support get_data_stream"
+ % repo._format)
# The data stream is a iterator that yields (name, versioned_file)
# pairs for:
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2007-11-09 17:50:31 +0000
+++ b/bzrlib/tests/test_knit.py 2007-11-26 07:20:17 +0000
@@ -1605,6 +1605,44 @@
self.assertEqual(expected_data_list, data_list)
self.assertRecordContentEqual(k1, 'text-m', reader_callable(None))
+ def test_get_data_stream_unordered_index(self):
+ """Get a data stream when the knit index reports versions out of order.
+
+ https://bugs.launchpad.net/bzr/+bug/164637
+ """
+ k1 = self.make_test_knit()
+ test_data = [
+ ('text-a', [], TEXT_1),
+ ('text-b', ['text-a'], TEXT_1),
+ ('text-c', [], TEXT_1),
+ ('text-d', ['text-c'], TEXT_1),
+ ('text-m', ['text-b', 'text-d'], TEXT_1),
+ ]
+ for version_id, parents, lines in test_data:
+ k1.add_lines(version_id, parents, split_lines(lines))
+ # monkey-patch versions method to return out of order, as if coming
+ # from multiple independently indexed packs
+ original_versions = k1.versions
+ k1.versions = lambda: reversed(original_versions())
+ expected_data_list = [
+ ('text-a', ['fulltext'], 122, []),
+ ('text-b', ['line-delta'], 84, ['text-a'])]
+ # now check the fulltext is first and the delta second
+ format, data_list, _ = k1.get_data_stream(['text-a', 'text-b'])
+ self.assertEqual('knit-plain', format)
+ self.assertEqual(expected_data_list, data_list)
+ # and that's true if we ask for them in the opposite order too
+ format, data_list, _ = k1.get_data_stream(['text-b', 'text-a'])
+ self.assertEqual(expected_data_list, data_list)
+ # also try requesting more versions
+ format, data_list, _ = k1.get_data_stream([
+ 'text-m', 'text-b', 'text-a'])
+ self.assertEqual([
+ ('text-a', ['fulltext'], 122, []),
+ ('text-b', ['line-delta'], 84, ['text-a']),
+ ('text-m', ['line-delta'], 84, ['text-b', 'text-d']),
+ ], data_list)
+
def test_get_stream_ghost_parent(self):
"""Get a data stream for a version with a ghost parent."""
k1 = self.make_test_knit()
@@ -1635,14 +1673,17 @@
('text-d', ['text-c'], TEXT_1),
('text-m', ['text-b', 'text-d'], TEXT_1),
]
+ for version_id, parents, lines in test_data:
+ k1.add_lines(version_id, parents, split_lines(lines))
+
+ # This test is actually a bit strict as the order in which they're
+ # returned is not defined. This matches the current (deterministic)
+ # behaviour.
expected_data_list = [
# version, options, length, parents
+ ('text-d', ['line-delta'], 84, ['text-c']),
('text-b', ['line-delta'], 84, ['text-a']),
- ('text-d', ['line-delta'], 84, ['text-c']),
]
- for version_id, parents, lines in test_data:
- k1.add_lines(version_id, parents, split_lines(lines))
-
# Note that even though we request the revision IDs in a particular
# order, the data stream may return them in any order it likes. In this
# case, they'll be in the order they were inserted into the knit.
@@ -1650,8 +1691,9 @@
['text-d', 'text-b'])
self.assertEqual('knit-plain', format)
self.assertEqual(expected_data_list, data_list)
+ # must match order they're returned
+ self.assertRecordContentEqual(k1, 'text-d', reader_callable(84))
self.assertRecordContentEqual(k1, 'text-b', reader_callable(84))
- self.assertRecordContentEqual(k1, 'text-d', reader_callable(84))
self.assertEqual('', reader_callable(None),
"There should be no more bytes left to read.")
@@ -1673,17 +1715,20 @@
('text-d', ['text-c'], TEXT_1),
('text-m', ['text-b', 'text-d'], TEXT_1),
]
+ for version_id, parents, lines in test_data:
+ k1.add_lines(version_id, parents, split_lines(lines))
+
+ # This test is actually a bit strict as the order in which they're
+ # returned is not defined. This matches the current (deterministic)
+ # behaviour.
expected_data_list = [
# version, options, length, parents
('text-a', ['fulltext'], 122, []),
('text-b', ['line-delta'], 84, ['text-a']),
+ ('text-m', ['line-delta'], 84, ['text-b', 'text-d']),
('text-c', ['fulltext'], 121, []),
('text-d', ['line-delta'], 84, ['text-c']),
- ('text-m', ['line-delta'], 84, ['text-b', 'text-d']),
]
- for version_id, parents, lines in test_data:
- k1.add_lines(version_id, parents, split_lines(lines))
-
format, data_list, reader_callable = k1.get_data_stream(
['text-a', 'text-b', 'text-c', 'text-d', 'text-m'])
self.assertEqual('knit-plain', format)
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2007-11-26 02:05:38 +0000
+++ b/bzrlib/tests/test_repository.py 2007-11-26 21:01:29 +0000
@@ -432,19 +432,22 @@
('text-d', ['text-c'], test_knit.TEXT_1),
('text-m', ['text-b', 'text-d'], test_knit.TEXT_1),
]
+ # This test is actually a bit strict as the order in which they're
+ # returned is not defined. This matches the current (deterministic)
+ # behaviour.
expected_data_list = [
# version, options, parents
('text-a', ['fulltext'], []),
('text-b', ['line-delta'], ['text-a']),
+ ('text-m', ['line-delta'], ['text-b', 'text-d']),
('text-c', ['fulltext'], []),
('text-d', ['line-delta'], ['text-c']),
- ('text-m', ['line-delta'], ['text-b', 'text-d']),
]
for version_id, parents, lines in test_data:
k1.add_lines(version_id, parents, test_knit.split_lines(lines))
bytes = knitrepo._get_stream_as_bytes(
- k1, ['text-a', 'text-b', 'text-c', 'text-d', 'text-m'])
+ k1, ['text-a', 'text-b', 'text-m', 'text-c', 'text-d', ])
data = bencode.bdecode(bytes)
format = data.pop(0)
More information about the bazaar-commits
mailing list