Rev 3167: Basic deserialisation to tuples done. in http://people.ubuntu.com/~robertc/baz2.0/inventory.journalled

Robert Collins robertc at robertcollins.net
Thu Jan 3 22:45:44 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/inventory.journalled

------------------------------------------------------------
revno: 3167
revision-id:robertc at robertcollins.net-20080103224539-l3vrykn2ia1h23yt
parent: robertc at robertcollins.net-20080103220818-ivwzayoouj30tlvn
committer: Robert Collins <robertc at robertcollins.net>
branch nick: inventory.journalled
timestamp: Fri 2008-01-04 09:45:39 +1100
message:
  Basic deserialisation to tuples done.
modified:
  bzrlib/journalled_inventory.py journalled_inventory-20080103020931-0ht5n40kwc0p7fy1-1
  bzrlib/tests/test_journalled_inv.py test_journalled_inv.-20080103012121-ny2w9slze5jgty8i-1
  doc/developers/inventory.txt   inventory.txt-20080103013957-opkrhxy6lmywmx4i-1
=== modified file 'bzrlib/journalled_inventory.py'
--- a/bzrlib/journalled_inventory.py	2008-01-03 22:08:18 +0000
+++ b/bzrlib/journalled_inventory.py	2008-01-03 22:45:39 +0000
@@ -34,7 +34,7 @@
     
     :param entry: An InventoryDirectory.
     """
-    return "dir"
+    return "dir\x00\x00"
 
 
 def _file_content(entry):
@@ -45,7 +45,7 @@
     size_sha = (entry.text_size, entry.text_sha1)
     if None in size_sha:
         raise errors.BzrError('Missing size or sha for %s' % entry.file_id)
-    return "file %s %s" % size_sha
+    return "file\x00%s\x00%s" % size_sha
 
 
 def _link_content(entry):
@@ -56,7 +56,7 @@
     target = entry.symlink_target
     if target is None:
         raise errors.BzrError('Missing target for %s' % entry.file_id)
-    return "link %s" % target
+    return "link\x00%s\x00" % target
 
 
 def _reference_content(entry):
@@ -67,21 +67,26 @@
     tree_revision = entry.reference_revision
     if tree_revision is None:
         raise errors.BzrError('Missing reference revision for %s' % entry.file_id)
-    return "tree %s" % tree_revision
+    return "tree\x00%s\x00" % tree_revision
 
 
 class _JournalEntry(object):
     """An individual entry in a journalled inventory."""
 
-    def __init__(self, parent_revision, tuples):
+    def __init__(self, parent_revision, by_id, versioned_root,
+        tree_references):
         """Create a _JournalEntry.
 
         :param parent_revision: The parent revision this entry is written
             against. null: indicates the start of a new delta chain.
-        :param tuples: The text-split items in this entry.
+        :param by_id: The text-split items in this entry indexed by id.
+        :param version_root: True if / paths will be versioned.
+        :param tree_references: True if tree references are supported.
         """
         self.parent_revision = parent_revision
-        self.tuples = tuples
+        self.by_id = by_id
+        self.versioned_root = versioned_root
+        self.tree_references = tree_references
 
 
 class EntryAccess(object):
@@ -101,6 +106,7 @@
         :param tree_references: If True support tree-reference entries.
         """
         self._versioned_root = versioned_root
+        self._tree_references = tree_references
         self._entry_to_content = {
             'directory': _directory_content,
             'file': _file_content,
@@ -155,7 +161,7 @@
             if last_modified is None:
                 raise errors.BzrError("no version for fileid %s" % file_id)
             content = self._entry_to_content[entry.kind](entry)
-        return ("%s %s %s %s %s\n" %
+        return ("%s\x00%s\x00%s\x00%s\x00%s\n" %
             (newpath_utf8, file_id, parent_id, last_modified, content))
 
     def parse_text_bytes(self, bytes):
@@ -171,4 +177,15 @@
         if len(lines) < 2 or not lines[1].startswith('parent: '):
             raise errors.BzrError('missing parent: marker')
         parent_id = lines[1][8:]
-        return _JournalEntry(parent_id, [])
+        by_id = {}
+        for line in lines[2:]:
+            newpath_utf8, file_id, parent_id, last_modified, content \
+                = line.split('\x00', 4)
+            parent_id = parent_id or None
+            by_id[file_id] = (newpath_utf8, file_id, parent_id,
+                last_modified, tuple(content.split('\x00')))
+        if len(by_id) + 2 != len(lines):
+            raise errors.BzrError(
+                "duplicate file id in journal entry %r" % lines)
+        return _JournalEntry(parent_id, by_id, self._versioned_root,
+            self._tree_references)

=== modified file 'bzrlib/tests/test_journalled_inv.py'
--- a/bzrlib/tests/test_journalled_inv.py	2008-01-03 22:08:18 +0000
+++ b/bzrlib/tests/test_journalled_inv.py	2008-01-03 22:45:39 +0000
@@ -37,24 +37,18 @@
 
 root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
 parent: null:
-/ an-id  a at e\xe5ample.com--2004 dir
+/\x00an-id\x00\x00a at e\xe5ample.com--2004\x00dir\x00\x00
 """
 
 root_only_unversioned = """format: bzr journalled inventory v1 (bzr 1.1)
 parent: null:
-/ TREE_ROOT  null: dir
-"""
-
-reference_lines = """format: bzr journalled inventory v1 (bzr 1.1)
-parent: null:
-/ TREE_ROOT  a at e\xe5ample.com--2004 dir
-/foo id TREE_ROOT changed tree subtree-version
-"""
-
-reference_lines = """format: bzr journalled inventory v1 (bzr 1.1)
-parent: null:
-/ TREE_ROOT  a at e\xe5ample.com--2004 dir
-/foo id TREE_ROOT changed tree subtree-version
+/\x00TREE_ROOT\x00\x00null:\x00dir\x00\x00
+"""
+
+reference_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+parent: null:
+/\x00TREE_ROOT\x00\x00a at e\xe5ample.com--2004\x00dir\x00\x00
+/foo\x00id\x00TREE_ROOT\x00changed\x00tree\x00subtree-version\x00
 """
 
 
@@ -232,31 +226,60 @@
             'format: bzr journalled inventory v1 (bzr 1.1)\n')
 
     def test_parse_empty(self):
-        journal = journalled_inventory.InventoryJournal(versioned_root=True,
-            tree_references=True)
-        journal_entry = journal.parse_text_bytes(empty_lines)
-        self.assertEqual(NULL_REVISION, journal_entry.parent_revision)
-        self.assertEqual([], journal_entry.tuples)
+        # quick loop to check that the parameters propogate to the generated
+        # entry.
+        for versioned_root in (False, True):
+            for tree_references in (False, True):
+                journal = journalled_inventory.InventoryJournal(
+                    versioned_root=versioned_root,
+                    tree_references=tree_references)
+                journal_entry = journal.parse_text_bytes(empty_lines)
+                self.assertEqual(NULL_REVISION, journal_entry.parent_revision)
+                self.assertEqual({}, journal_entry.by_id)
+                self.assertEqual(versioned_root, journal_entry.versioned_root)
+                self.assertEqual(tree_references, journal_entry.tree_references)
+
+    def test_parse_duplicate_key_errors(self):
+        journal = journalled_inventory.InventoryJournal(versioned_root=True,
+            tree_references=True)
+        double_root_lines = \
+"""format: bzr journalled inventory v1 (bzr 1.1)
+parent: null:
+/\x00an-id\x00\x00a at e\xe5ample.com--2004\x00dir\x00\x00
+/\x00an-id\x00\x00a at e\xe5ample.com--2004\x00dir\x00\x00
+"""
+        self.assertRaises(errors.BzrError,
+            journal.parse_text_bytes, double_root_lines)
+
+    def test_parse_versioned_root_only(self):
+        journal = journalled_inventory.InventoryJournal(versioned_root=True,
+            tree_references=True)
+        journal_entry = journal.parse_text_bytes(root_only_lines)
+        self.assertEqual({
+            'an-id':('/', 'an-id', None, 'a at e\xe5ample.com--2004',
+                     ('dir', '', '')),
+            },
+            journal_entry.by_id)
 
 
 class TestContent(TestCase):
 
     def test_dir(self):
         entry = inventory.make_entry('directory', 'a dir', None)
-        self.assertEqual('dir', journalled_inventory._directory_content(entry))
+        self.assertEqual('dir\x00\x00', journalled_inventory._directory_content(entry))
 
     def test_file_0_short_sha(self):
         file_entry = inventory.make_entry('file', 'a file', None, 'file-id')
         file_entry.text_sha1 = ''
         file_entry.text_size = 0
-        self.assertEqual('file 0 ',
+        self.assertEqual('file\x000\x00',
             journalled_inventory._file_content(file_entry))
 
     def test_file_10_foo(self):
         file_entry = inventory.make_entry('file', 'a file', None, 'file-id')
         file_entry.text_sha1 = 'foo'
         file_entry.text_size = 10
-        self.assertEqual('file 10 foo',
+        self.assertEqual('file\x0010\x00foo',
             journalled_inventory._file_content(file_entry))
 
     def test_file_without_size(self):
@@ -274,7 +297,13 @@
     def test_link_empty_target(self):
         entry = inventory.make_entry('symlink', 'a link', None)
         entry.symlink_target = ''
-        self.assertEqual('link ',
+        self.assertEqual('link\x00\x00',
+            journalled_inventory._link_content(entry))
+
+    def test_link_space_target(self):
+        entry = inventory.make_entry('symlink', 'a link', None)
+        entry.symlink_target = ' '
+        self.assertEqual('link\x00 \x00',
             journalled_inventory._link_content(entry))
 
     def test_link_no_target(self):
@@ -285,13 +314,13 @@
     def test_reference_null(self):
         entry = inventory.make_entry('tree-reference', 'a tree', None)
         entry.reference_revision = NULL_REVISION
-        self.assertEqual('tree null:',
+        self.assertEqual('tree\x00null:\x00',
             journalled_inventory._reference_content(entry))
 
     def test_reference_revision(self):
         entry = inventory.make_entry('tree-reference', 'a tree', None)
         entry.reference_revision = 'foo@\xe5b-lah'
-        self.assertEqual('tree foo@\xe5b-lah',
+        self.assertEqual('tree\x00foo@\xe5b-lah\x00',
             journalled_inventory._reference_content(entry))
 
     def test_reference_no_reference(self):

=== modified file 'doc/developers/inventory.txt'
--- a/doc/developers/inventory.txt	2008-01-03 20:30:16 +0000
+++ b/doc/developers/inventory.txt	2008-01-03 22:45:39 +0000
@@ -95,18 +95,19 @@
 'format: bzr journalled inventory v1' NL
 'parent:' SP BASIS_INVENTORY NL DELTA_LINES
 DELTA_LINES ::= (DELTA_LINE NL)*
-DELTA_LINE ::= NEWPATH SP file-id SP PARENT_ID SP LAST_MODIFIED SP CONTENT
+DELTA_LINE ::= NEWPATH NULL file-id NULL PARENT_ID NULL LAST_MODIFIED NULL CONTENT
 SP ::= ' '
+NULL ::= \x00
 NEWPATH ::= NONE | PATH
 NONE ::= 'None'
-PATH ::= '/' path
+PATH ::= path
 PARENT_ID ::= FILE_ID | ''
 CONTENT ::= DELETED_CONTENT | FILE_CONTENT | DIR_CONTENT | TREE_CONTENT | LINK_CONTENT
-DELETED_CONTENT ::= 'deleted'
-FILE_CONTENT ::= 'file' text_size text_sha1
-DIR_CONTENT ::= 'dir'
-TREE_CONTENT ::= 'tree' tree-revision
-LINK_CONTENT ::= 'link' link-target
+DELETED_CONTENT ::= 'deleted' NULL NULL
+FILE_CONTENT ::= 'file' NULL text_size NULL text_sha1
+DIR_CONTENT ::= 'dir' NULL NULL
+TREE_CONTENT ::= 'tree' NULL tree-revision NULL
+LINK_CONTENT ::= 'link' NULL link-target NULL
 BASIS_INVENTORY ::= NULL_OR_REVISION
 LAST_MODIFIED ::= NULL_OR_REVISION
 NULL_OR_REVISION ::= 'null:' | REVISION
@@ -117,7 +118,8 @@
 Some explanation is in order. When NEWPATH is 'None' a delete has been
 recorded, and because this journalled inventory is not attempting to be a
 reversible journal, the only other valid fields is 'file-id'. PARENT_ID is ''
-when a delete has been recorded or when recording a new root entry.
+when a delete has been recorded or when recording a new root entry. Content
+always has 2 NULL delimiters in it to allow easy parsing.
 
 At any commit the validator is the root of a tree. Changing the deltas -
 e.g. by rewriting an inventory in the history to be a full inventory



More information about the bazaar-commits mailing list