Rev 3741: Teach VersionedFiles how to allocate keys based on content hashes. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Mon Sep 29 05:21:41 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 3741
revision-id: robertc at robertcollins.net-20080929042134-f2jfg1dmxraydsbo
parent: robertc at robertcollins.net-20080926023230-fgakdzuk9d3li0tj
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2008-09-29 14:21:34 +1000
message:
  Teach VersionedFiles how to allocate keys based on content hashes.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
=== modified file 'NEWS'
--- a/NEWS	2008-09-25 23:15:43 +0000
+++ b/NEWS	2008-09-29 04:21:34 +0000
@@ -120,6 +120,10 @@
     * New win32utils.get_local_appdata_location() provides access to a local
       directory for storing data.  (Mark Hammond)
 
+    * Passing None in as the last element of a key tuple to
+      ``VersionedFiles.add`` will now cause a content-hash-key to be
+      allocated. (Robert Collins)
+
     * The Repository model has been extended to allow some formats to
       expose data via CHK based lookups (Though no formats support this as
       yet). (Robert Collins)

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-09-11 04:16:24 +0000
+++ b/bzrlib/knit.py	2008-09-29 04:21:34 +0000
@@ -795,9 +795,13 @@
                 lines[-1] = lines[-1] + '\n'
                 line_bytes += '\n'
 
-        for element in key:
+        for element in key[:-1]:
             if type(element) != str:
                 raise TypeError("key contains non-strings: %r" % (key,))
+        if key[-1] is None:
+            key = key[:-1] + ('sha1:' + digest,)
+        elif type(key[-1]) != str:
+                raise TypeError("key contains non-strings: %r" % (key,))
         # Knit hunks are still last-element only
         version_id = key[-1]
         content = self._factory.make(lines, version_id)
@@ -862,9 +866,10 @@
     def _check_add(self, key, lines, random_id, check_content):
         """check that version_id and lines are safe to add."""
         version_id = key[-1]
-        if contains_whitespace(version_id):
-            raise InvalidRevisionId(version_id, self)
-        self.check_not_reserved_id(version_id)
+        if version_id is not None:
+            if contains_whitespace(version_id):
+                raise InvalidRevisionId(version_id, self)
+            self.check_not_reserved_id(version_id)
         # TODO: If random_id==False and the key is already present, we should
         # probably check that the existing content is identical to what is
         # being inserted, and otherwise raise an exception.  This would make

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-07-17 09:13:16 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-09-29 04:21:34 +0000
@@ -176,7 +176,7 @@
 
 
 def get_diamond_files(files, key_length, trailing_eol=True, left_only=False,
-    nograph=False):
+    nograph=False, nokeys=False):
     """Get a diamond graph to exercise deltas and merges.
 
     This creates a 5-node graph in files. If files supports 2-length keys two
@@ -189,8 +189,12 @@
     :param nograph: If True, do not provide parents to the add_lines calls;
         this is useful for tests that need inserted data but have graphless
         stores.
+    :param nokeys: If True, pass None is as the key for all insertions.
+        Currently implies nograph.
     :return: The results of the add_lines calls.
     """
+    if nokeys:
+        nograph = True
     if key_length == 1:
         prefixes = [()]
     else:
@@ -207,25 +211,30 @@
         else:
             result = [prefix + suffix for suffix in suffix_list]
             return result
+    def get_key(suffix):
+        if nokeys:
+            return (None, )
+        else:
+            return (suffix,)
     # we loop over each key because that spreads the inserts across prefixes,
     # which is how commit operates.
     for prefix in prefixes:
-        result.append(files.add_lines(prefix + ('origin',), (),
+        result.append(files.add_lines(prefix + get_key('origin'), (),
             ['origin' + last_char]))
     for prefix in prefixes:
-        result.append(files.add_lines(prefix + ('base',),
+        result.append(files.add_lines(prefix + get_key('base'),
             get_parents([('origin',)]), ['base' + last_char]))
     for prefix in prefixes:
-        result.append(files.add_lines(prefix + ('left',),
+        result.append(files.add_lines(prefix + get_key('left'),
             get_parents([('base',)]),
             ['base\n', 'left' + last_char]))
     if not left_only:
         for prefix in prefixes:
-            result.append(files.add_lines(prefix + ('right',),
+            result.append(files.add_lines(prefix + get_key('right'),
                 get_parents([('base',)]),
                 ['base\n', 'right' + last_char]))
         for prefix in prefixes:
-            result.append(files.add_lines(prefix + ('merged',),
+            result.append(files.add_lines(prefix + get_key('merged'),
                 get_parents([('left',), ('right',)]),
                 ['base\n', 'left\n', 'right\n', 'merged' + last_char]))
     return result
@@ -1485,10 +1494,11 @@
         """Each parameterised test can be constructed on a transport."""
         files = self.get_versionedfiles()
 
-    def get_diamond_files(self, files, trailing_eol=True, left_only=False):
+    def get_diamond_files(self, files, trailing_eol=True, left_only=False,
+        nokeys=False):
         return get_diamond_files(files, self.key_length,
             trailing_eol=trailing_eol, nograph=not self.graph,
-            left_only=left_only)
+            left_only=left_only, nokeys=nokeys)
 
     def test_add_lines_return(self):
         files = self.get_versionedfiles()
@@ -1521,6 +1531,60 @@
                 ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23)],
                 results)
 
+    def test_add_lines_no_key_generates_chk_key(self):
+        files = self.get_versionedfiles()
+        # save code by using the stock data insertion helper.
+        adds = self.get_diamond_files(files, nokeys=True)
+        results = []
+        # We can only validate the first 2 elements returned from add_lines.
+        for add in adds:
+            self.assertEqual(3, len(add))
+            results.append(add[:2])
+        if self.key_length == 1:
+            self.assertEqual([
+                ('00e364d235126be43292ab09cb4686cf703ddc17', 7),
+                ('51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44', 5),
+                ('a8478686da38e370e32e42e8a0c220e33ee9132f', 10),
+                ('9ef09dfa9d86780bdec9219a22560c6ece8e0ef1', 11),
+                ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23)],
+                results)
+            # Check the added items got CHK keys.
+            self.assertEqual(set([
+                ('sha1:00e364d235126be43292ab09cb4686cf703ddc17',),
+                ('sha1:51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44',),
+                ('sha1:9ef09dfa9d86780bdec9219a22560c6ece8e0ef1',),
+                ('sha1:a8478686da38e370e32e42e8a0c220e33ee9132f',),
+                ('sha1:ed8bce375198ea62444dc71952b22cfc2b09226d',),
+                ]),
+                files.keys())
+        elif self.key_length == 2:
+            self.assertEqual([
+                ('00e364d235126be43292ab09cb4686cf703ddc17', 7),
+                ('00e364d235126be43292ab09cb4686cf703ddc17', 7),
+                ('51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44', 5),
+                ('51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44', 5),
+                ('a8478686da38e370e32e42e8a0c220e33ee9132f', 10),
+                ('a8478686da38e370e32e42e8a0c220e33ee9132f', 10),
+                ('9ef09dfa9d86780bdec9219a22560c6ece8e0ef1', 11),
+                ('9ef09dfa9d86780bdec9219a22560c6ece8e0ef1', 11),
+                ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23),
+                ('ed8bce375198ea62444dc71952b22cfc2b09226d', 23)],
+                results)
+            # Check the added items got CHK keys.
+            self.assertEqual(set([
+                ('FileA', 'sha1:00e364d235126be43292ab09cb4686cf703ddc17'),
+                ('FileA', 'sha1:51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44'),
+                ('FileA', 'sha1:9ef09dfa9d86780bdec9219a22560c6ece8e0ef1'),
+                ('FileA', 'sha1:a8478686da38e370e32e42e8a0c220e33ee9132f'),
+                ('FileA', 'sha1:ed8bce375198ea62444dc71952b22cfc2b09226d'),
+                ('FileB', 'sha1:00e364d235126be43292ab09cb4686cf703ddc17'),
+                ('FileB', 'sha1:51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44'),
+                ('FileB', 'sha1:9ef09dfa9d86780bdec9219a22560c6ece8e0ef1'),
+                ('FileB', 'sha1:a8478686da38e370e32e42e8a0c220e33ee9132f'),
+                ('FileB', 'sha1:ed8bce375198ea62444dc71952b22cfc2b09226d'),
+                ]),
+                files.keys())
+
     def test_empty_lines(self):
         """Empty files can be stored."""
         f = self.get_versionedfiles()

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-07-17 09:40:51 +0000
+++ b/bzrlib/versionedfile.py	2008-09-29 04:21:34 +0000
@@ -714,7 +714,8 @@
         check_content=True):
         """Add a text to the store.
 
-        :param key: The key tuple of the text to add.
+        :param key: The key tuple of the text to add. If the last element is
+            None, a CHK string will be generated during the addition.
         :param parents: The parents key tuples of the text to add.
         :param lines: A list of lines. Each line must be a bytestring. And all
             of them except the last must be terminated with \n and contain no

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2008-07-16 18:14:23 +0000
+++ b/bzrlib/weave.py	2008-09-29 04:21:34 +0000
@@ -399,6 +399,7 @@
         version_id
             Symbolic name for this version.
             (Typically the revision-id of the revision that added it.)
+            If None, a name will be allocated based on the hash. (sha1:SHAHASH)
 
         parents
             List or set of direct parent version numbers.
@@ -414,6 +415,8 @@
             sha1 = sha_strings(lines)
         if sha1 == nostore_sha:
             raise errors.ExistingContent
+        if version_id is None:
+            version_id = "sha1:" + sha1
         if version_id in self._name_map:
             return self._check_repeated_add(version_id, parents, lines, sha1)
 




More information about the bazaar-commits mailing list