Rev 3741: Teach VersionedFiles how to allocate keys based on content hashes. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Mon Sep 29 05:21:41 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 3741
revision-id: robertc at robertcollins.net-20080929042134-f2jfg1dmxraydsbo
parent: robertc at robertcollins.net-20080926023230-fgakdzuk9d3li0tj
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2008-09-29 14:21:34 +1000
message:
Teach VersionedFiles how to allocate keys based on content hashes.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
=== modified file 'NEWS'
--- a/NEWS 2008-09-25 23:15:43 +0000
+++ b/NEWS 2008-09-29 04:21:34 +0000
@@ -120,6 +120,10 @@
* New win32utils.get_local_appdata_location() provides access to a local
directory for storing data. (Mark Hammond)
+ * Passing None in as the last element of a key tuple to
+ ``VersionedFiles.add`` will now cause a content-hash-key to be
+ allocated. (Robert Collins)
+
* The Repository model has been extended to allow some formats to
expose data via CHK based lookups (Though no formats support this as
yet). (Robert Collins)
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-09-11 04:16:24 +0000
+++ b/bzrlib/knit.py 2008-09-29 04:21:34 +0000
@@ -795,9 +795,13 @@
lines[-1] = lines[-1] + '\n'
line_bytes += '\n'
- for element in key:
+ for element in key[:-1]:
if type(element) != str:
raise TypeError("key contains non-strings: %r" % (key,))
+ if key[-1] is None:
+ key = key[:-1] + ('sha1:' + digest,)
+ elif type(key[-1]) != str:
+ raise TypeError("key contains non-strings: %r" % (key,))
# Knit hunks are still last-element only
version_id = key[-1]
content = self._factory.make(lines, version_id)
@@ -862,9 +866,10 @@
def _check_add(self, key, lines, random_id, check_content):
"""check that version_id and lines are safe to add."""
version_id = key[-1]
- if contains_whitespace(version_id):
- raise InvalidRevisionId(version_id, self)
- self.check_not_reserved_id(version_id)
+ if version_id is not None:
+ if contains_whitespace(version_id):
+ raise InvalidRevisionId(version_id, self)
+ self.check_not_reserved_id(version_id)
# TODO: If random_id==False and the key is already present, we should
# probably check that the existing content is identical to what is
# being inserted, and otherwise raise an exception. This would make
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2008-07-17 09:13:16 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2008-09-29 04:21:34 +0000
@@ -176,7 +176,7 @@
def get_diamond_files(files, key_length, trailing_eol=True, left_only=False,
- nograph=False):
+ nograph=False, nokeys=False):
"""Get a diamond graph to exercise deltas and merges.
This creates a 5-node graph in files. If files supports 2-length keys two
@@ -189,8 +189,12 @@
:param nograph: If True, do not provide parents to the add_lines calls;
this is useful for tests that need inserted data but have graphless
stores.
+ :param nokeys: If True, pass None is as the key for all insertions.
+ Currently implies nograph.
:return: The results of the add_lines calls.
"""
+ if nokeys:
+ nograph = True
if key_length == 1:
prefixes = [()]
else:
@@ -207,25 +211,30 @@
else:
result = [prefix + suffix for suffix in suffix_list]
return result
+ def get_key(suffix):
+ if nokeys:
+ return (None, )
+ else:
+ return (suffix,)
# we loop over each key because that spreads the inserts across prefixes,
# which is how commit operates.
for prefix in prefixes:
- result.append(files.add_lines(prefix + ('origin',), (),
+ result.append(files.add_lines(prefix + get_key('origin'), (),
['origin' + last_char]))
for prefix in prefixes:
- result.append(files.add_lines(prefix + ('base',),
+ result.append(files.add_lines(prefix + get_key('base'),
get_parents([('origin',)]), ['base' + last_char]))
for prefix in prefixes:
- result.append(files.add_lines(prefix + ('left',),
+ result.append(files.add_lines(prefix + get_key('left'),
get_parents([('base',)]),
['base\n', 'left' + last_char]))
if not left_only:
for prefix in prefixes:
- result.append(files.add_lines(prefix + ('right',),
+ result.append(files.add_lines(prefix + get_key('right'),
get_parents([('base',)]),
['base\n', 'right' + last_char]))
for prefix in prefixes:
- result.append(files.add_lines(prefix + ('merged',),
+ result.append(files.add_lines(prefix + get_key('merged'),
get_parents([('left',), ('right',)]),
['base\n', 'left\n', 'right\n', 'merged' + last_char]))
return result
@@ -1485,10 +1494,11 @@
"""Each parameterised test can be constructed on a transport."""
files = self.get_versionedfiles()
- def get_diamond_files(self, files, trailing_eol=True, left_only=False):
+ def get_diamond_files(self, files, trailing_eol=True, left_only=False,
+ nokeys=False):
return get_diamond_files(files, self.key_length,
trailing_eol=trailing_eol, nograph=not self.graph,
- left_only=left_only)
+ left_only=left_only, nokeys=nokeys)
def test_add_lines_return(self):
files = self.get_versionedfiles()
@@ -1521,6 +1531,60 @@
('ed8bce375198ea62444dc71952b22cfc2b09226d', 23)],
results)
+ def test_add_lines_no_key_generates_chk_key(self):
+ files = self.get_versionedfiles()
+ # save code by using the stock data insertion helper.
+ adds = self.get_diamond_files(files, nokeys=True)
+ results = []
+ # We can only validate the first 2 elements returned from add_lines.
+ for add in adds:
+ self.assertEqual(3, len(add))
+ results.append(add[:2])
+ if self.key_length == 1:
+ self.assertEqual([
+ ('00e364d235126be43292ab09cb4686cf703ddc17', 7),
+ ('51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44', 5),
+ ('a8478686da38e370e32e42e8a0c220e33ee9132f', 10),
+ ('9ef09dfa9d86780bdec9219a22560c6ece8e0ef1', 11),
+ ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23)],
+ results)
+ # Check the added items got CHK keys.
+ self.assertEqual(set([
+ ('sha1:00e364d235126be43292ab09cb4686cf703ddc17',),
+ ('sha1:51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44',),
+ ('sha1:9ef09dfa9d86780bdec9219a22560c6ece8e0ef1',),
+ ('sha1:a8478686da38e370e32e42e8a0c220e33ee9132f',),
+ ('sha1:ed8bce375198ea62444dc71952b22cfc2b09226d',),
+ ]),
+ files.keys())
+ elif self.key_length == 2:
+ self.assertEqual([
+ ('00e364d235126be43292ab09cb4686cf703ddc17', 7),
+ ('00e364d235126be43292ab09cb4686cf703ddc17', 7),
+ ('51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44', 5),
+ ('51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44', 5),
+ ('a8478686da38e370e32e42e8a0c220e33ee9132f', 10),
+ ('a8478686da38e370e32e42e8a0c220e33ee9132f', 10),
+ ('9ef09dfa9d86780bdec9219a22560c6ece8e0ef1', 11),
+ ('9ef09dfa9d86780bdec9219a22560c6ece8e0ef1', 11),
+ ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23),
+ ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23)],
+ results)
+ # Check the added items got CHK keys.
+ self.assertEqual(set([
+ ('FileA', 'sha1:00e364d235126be43292ab09cb4686cf703ddc17'),
+ ('FileA', 'sha1:51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44'),
+ ('FileA', 'sha1:9ef09dfa9d86780bdec9219a22560c6ece8e0ef1'),
+ ('FileA', 'sha1:a8478686da38e370e32e42e8a0c220e33ee9132f'),
+ ('FileA', 'sha1:ed8bce375198ea62444dc71952b22cfc2b09226d'),
+ ('FileB', 'sha1:00e364d235126be43292ab09cb4686cf703ddc17'),
+ ('FileB', 'sha1:51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44'),
+ ('FileB', 'sha1:9ef09dfa9d86780bdec9219a22560c6ece8e0ef1'),
+ ('FileB', 'sha1:a8478686da38e370e32e42e8a0c220e33ee9132f'),
+ ('FileB', 'sha1:ed8bce375198ea62444dc71952b22cfc2b09226d'),
+ ]),
+ files.keys())
+
def test_empty_lines(self):
"""Empty files can be stored."""
f = self.get_versionedfiles()
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2008-07-17 09:40:51 +0000
+++ b/bzrlib/versionedfile.py 2008-09-29 04:21:34 +0000
@@ -714,7 +714,8 @@
check_content=True):
"""Add a text to the store.
- :param key: The key tuple of the text to add.
+ :param key: The key tuple of the text to add. If the last element is
+ None, a CHK string will be generated during the addition.
:param parents: The parents key tuples of the text to add.
:param lines: A list of lines. Each line must be a bytestring. And all
of them except the last must be terminated with \n and contain no
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2008-07-16 18:14:23 +0000
+++ b/bzrlib/weave.py 2008-09-29 04:21:34 +0000
@@ -399,6 +399,7 @@
version_id
Symbolic name for this version.
(Typically the revision-id of the revision that added it.)
+ If None, a name will be allocated based on the hash. (sha1:SHAHASH)
parents
List or set of direct parent version numbers.
@@ -414,6 +415,8 @@
sha1 = sha_strings(lines)
if sha1 == nostore_sha:
raise errors.ExistingContent
+ if version_id is None:
+ version_id = "sha1:" + sha1
if version_id in self._name_map:
return self._check_repeated_add(version_id, parents, lines, sha1)
More information about the bazaar-commits
mailing list