Rev 2362: (working), fix dirstate to use utf8 file ids. in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

John Arbash Meinel john at arbash-meinel.com
Thu Feb 22 15:40:35 GMT 2007


At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

------------------------------------------------------------
revno: 2362
revision-id: john at arbash-meinel.com-20070222153923-cpynsmaz50pdncre
parent: john at arbash-meinel.com-20070222150435-vtrqx5i64319z8pz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Thu 2007-02-22 09:39:23 -0600
message:
  (working), fix dirstate to use utf8 file ids.
  Also fix a bug in _generate_inventory for non-ascii paths. It was
  combining the decoded path with the utf8 prefix and assuming the
  whole thing was utf8.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/tree_implementations/__init__.py __init__.py-20060717075546-420s7b0bj9hzeowi-2
  bzrlib/tests/tree_implementations/test_test_trees.py test_tree_trees.py-20060720091921-3nwi5h21lf06vf5p-1
  bzrlib/tests/tree_implementations/test_walkdirs.py test_walkdirs.py-20060729160421-gmjnkotqgxdh98ce-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-22 01:14:15 +0000
+++ b/bzrlib/dirstate.py	2007-02-22 15:39:23 +0000
@@ -306,7 +306,9 @@
         # faster than three separate encodes.
         utf8path = (dirname + '/' + basename).strip('/').encode('utf8')
         dirname, basename = os.path.split(utf8path)
-        entry_key = (dirname, basename, file_id.encode('utf8'))
+        assert file_id.__class__ == str, \
+            "must be a utf8 file_id not %s" % (type(file_id))
+        entry_key = (dirname, basename, file_id)
         self._read_dirblocks_if_needed()
         block_index, present = self._find_block_index_from_key(entry_key)
         if not present:
@@ -756,7 +758,7 @@
             id.
         """
         kind = inv_entry.kind
-        tree_data = inv_entry.revision.encode('utf8')
+        tree_data = inv_entry.revision
         assert len(tree_data) > 0, 'empty revision for the inv_entry.'
         if kind == 'directory':
             fingerprint = ''
@@ -966,13 +968,11 @@
 
         :param path: The path inside the tree to set - '' is the root, 'foo'
             is the path foo in the root.
-        :param new_id: The new id to assign to the path. If unicode, it will
-            be encoded to utf8. In future this will be deprecated: avoid using
-            unicode ids if possible.
+        :param new_id: The new id to assign to the path. This must be a utf8
+            file id (not unicode, and not None).
         """
         # TODO: start warning here.
-        if new_id.__class__ == unicode:
-            new_id = new_id.encode('utf8')
+        assert new_id.__class__ == str
         self._read_dirblocks_if_needed()
         if len(path):
             import pdb;pdb.set_trace()
@@ -1067,7 +1067,7 @@
                 # new entry at this path: by adding the id->path mapping last,
                 # all the mappings are valid and have correct relocation
                 # records where needed. 
-                file_id = entry.file_id.encode('utf8')
+                file_id = entry.file_id
                 path_utf8 = path.encode('utf8')
                 dirname, basename = os.path.split(path_utf8)
                 new_entry_key = (dirname, basename, file_id)
@@ -1165,7 +1165,7 @@
                 # convert new into dirblock style
                 new_path_utf8 = current_new[0].encode('utf8')
                 new_dirname, new_basename = os.path.split(new_path_utf8)
-                new_id = current_new[1].file_id.encode('utf8')
+                new_id = current_new[1].file_id
                 new_entry_key = (new_dirname, new_basename, new_id)
             else:
                 # for safety disable variables

=== modified file 'bzrlib/tests/tree_implementations/__init__.py'
--- a/bzrlib/tests/tree_implementations/__init__.py	2007-02-22 15:04:35 +0000
+++ b/bzrlib/tests/tree_implementations/__init__.py	2007-02-22 15:39:23 +0000
@@ -200,7 +200,7 @@
         ids = [
             '2file',
             '1top-dir',
-            u'0utf\u1234file',
+            u'0utf\u1234file'.encode('utf8'),
             '1file-in-1topdir',
             '0dir-in-1topdir'
             ]
@@ -228,15 +228,21 @@
         # bzr itself does not create unicode file ids, but we want them for
         # testing.
         file_ids = [u'TREE_ROOT',
-                    u'f\xf6-id',
-                    u'b\xe5r-id',
-                    u'b\xe1z-id',
+                    u'f\xf6-id'.encode('utf8'),
+                    u'b\xe5r-id'.encode('utf8'),
+                    u'b\xe1z-id'.encode('utf8'),
                    ]
         try:
             self.build_tree(paths[1:])
         except UnicodeError:
             raise tests.TestSkipped('filesystem does not support unicode.')
-        tree.add(paths, file_ids)
+        if tree.path2id('') is None:
+            # Some trees do not have a root yet.
+            tree.add(paths, file_ids)
+        else:
+            # Some trees will already have a root
+            tree.set_root_id(file_ids[0])
+            tree.add(paths[1:], file_ids[1:])
         try:
             tree.commit(u'in\xedtial', rev_id=u'r\xe9v-1'.encode('utf8'))
         except errors.NonAsciiRevisionId:
@@ -247,7 +253,7 @@
         self._create_tree_with_utf8(tree)
         tree2 = tree.bzrdir.sprout('tree2').open_workingtree()
         self.build_tree([u'tree2/b\xe5r/z\xf7z'])
-        tree2.add([u'b\xe5r/z\xf7z'], [u'z\xf7z-id'])
+        tree2.add([u'b\xe5r/z\xf7z'], [u'z\xf7z-id'.encode('utf8')])
         tree2.commit(u'to m\xe9rge', rev_id=u'r\xe9v-2'.encode('utf8'))
 
         tree.merge_from_branch(tree2.branch)

=== modified file 'bzrlib/tests/tree_implementations/test_test_trees.py'
--- a/bzrlib/tests/tree_implementations/test_test_trees.py	2007-02-22 15:04:35 +0000
+++ b/bzrlib/tests/tree_implementations/test_test_trees.py	2007-02-22 15:39:23 +0000
@@ -160,7 +160,7 @@
                 '1top-dir',
                 '1file-in-1topdir',
                 '0dir-in-1topdir',
-                 u'0utf\u1234file',
+                 u'0utf\u1234file'.encode('utf8'),
                 'symlink',
                  ]),
             set(iter(tree)))
@@ -170,7 +170,7 @@
             [('', tree_root, 'directory'),
              ('0file', '2file', 'file'),
              ('1top-dir', '1top-dir', 'directory'),
-             (u'2utf\u1234file', u'0utf\u1234file', 'file'),
+             (u'2utf\u1234file', u'0utf\u1234file'.encode('utf8'), 'file'),
              ('symlink', 'symlink', 'symlink'),
              ('1top-dir/0file-in-1topdir', '1file-in-1topdir', 'file'),
              ('1top-dir/1dir-in-1topdir', '0dir-in-1topdir', 'directory')],

=== modified file 'bzrlib/tests/tree_implementations/test_walkdirs.py'
--- a/bzrlib/tests/tree_implementations/test_walkdirs.py	2007-02-17 04:06:47 +0000
+++ b/bzrlib/tests/tree_implementations/test_walkdirs.py	2007-02-22 15:39:23 +0000
@@ -27,7 +27,8 @@
             [
              ('0file', '0file', 'file', None, '2file', 'file'),
              ('1top-dir', '1top-dir', 'directory', None, '1top-dir', 'directory'),
-             (u'2utf\u1234file', u'2utf\u1234file', 'file', None, u'0utf\u1234file', 'file'),
+             (u'2utf\u1234file', u'2utf\u1234file', 'file', None,
+                                     u'0utf\u1234file'.encode('utf8'), 'file'),
              ('symlink', 'symlink', 'symlink', None, 'symlink', 'symlink')
             ]),
             (('1top-dir', '1top-dir'),

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-22 05:06:58 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-22 15:39:23 +0000
@@ -232,7 +232,7 @@
         state = self.current_dirstate()
         state._read_dirblocks_if_needed()
         root_key, current_entry = self._get_entry(path='')
-        current_id = root_key[2].decode('utf8')
+        current_id = root_key[2]
         assert current_entry[0][0] == 'directory'
         inv = Inventory(root_id=current_id)
         # we could do this straight out of the dirstate; it might be fast
@@ -249,10 +249,11 @@
                 if entry[0][0] in ('absent', 'relocated'):
                     # a parent tree only entry
                     continue
-                name = key[1].decode('utf8')
-                file_id = key[2].decode('utf8')
+                name = key[1]
+                name_unicode = name.decode('utf8')
+                file_id = key[2]
                 kind, link_or_sha1, size, executable, stat = entry[0]
-                inv_entry = entry_factory[kind](file_id, name, parent_id)
+                inv_entry = entry_factory[kind](file_id, name_unicode, parent_id)
                 if kind == 'file':
                     # not strictly needed: working tree
                     #entry.executable = executable
@@ -279,8 +280,6 @@
         if file_id is None and path is None:
             raise errors.BzrError('must supply file_id or path')
         state = self.current_dirstate()
-        if file_id is not None:
-            file_id = file_id.encode('utf8')
         if path is not None:
             path = path.encode('utf8')
         return state._get_entry(0, fileid_utf8=file_id, path_utf8=path)
@@ -316,11 +315,11 @@
     @needs_read_lock
     def get_root_id(self):
         """Return the id of this trees root"""
-        return self._get_entry(path='')[0][2].decode('utf8')
+        return self._get_entry(path='')[0][2]
 
     def has_id(self, file_id):
         state = self.current_dirstate()
-        fileid_utf8 = file_id.encode('utf8')
+        file_id = osutils.safe_file_id(file_id)
         row, parents = self._get_entry(file_id=file_id)
         if row is None:
             return False
@@ -330,8 +329,8 @@
     @needs_read_lock
     def id2path(self, fileid):
         state = self.current_dirstate()
-        fileid_utf8 = fileid.encode('utf8')
-        key, tree_details = state._get_entry(0, fileid_utf8=fileid_utf8)
+        fileid = osutils.safe_file_id(fileid)
+        key, tree_details = state._get_entry(0, fileid_utf8=fileid)
         return os.path.join(*key[0:2]).decode('utf8')
 
     @needs_read_lock
@@ -348,7 +347,7 @@
                 continue
             path = pathjoin(self.basedir, key[0].decode('utf8'), key[1].decode('utf8'))
             if osutils.lexists(path):
-                result.append(key[2].decode('utf8'))
+                result.append(key[2])
         return iter(result)
 
     @needs_read_lock
@@ -406,7 +405,7 @@
             update_inventory = True
             inv = self.inventory
             to_dir_ie = inv[to_dir_id]
-            to_dir_id = to_entry[0][2].decode('utf8')
+            to_dir_id = to_entry[0][2]
         else:
             update_inventory = False
 
@@ -420,7 +419,7 @@
                 raise errors.BzrMoveFailedError(from_rel,to_dir,
                     errors.NotVersionedError(path=str(from_rel)))
 
-            from_id = from_entry[0][2].decode('utf8')
+            from_id = from_entry[0][2]
             to_rel = pathjoin(to_dir, from_tail)
             item_to_entry = self._get_entry(path=to_rel)
             if item_to_entry != (None, None):
@@ -548,7 +547,7 @@
         entry = self._get_entry(path=path)
         if entry == (None, None):
             return None
-        return entry[0][2].decode('utf8')
+        return entry[0][2]
 
     def paths2ids(self, paths, trees=[], require_versioned=True):
         """See Tree.paths2ids().
@@ -793,8 +792,8 @@
         state = self.current_dirstate()
         state._read_dirblocks_if_needed()
         ids_to_unversion = set()
-        for fileid in file_ids:
-            ids_to_unversion.add(fileid.encode('utf8'))
+        for file_id in file_ids:
+            ids_to_unversion.add(osutils.safe_file_id(file_id))
         paths_to_unversion = set()
         # sketch:
         # check if the root is to be unversioned, if so, assert for now.
@@ -988,8 +987,7 @@
         """
         if file_id is None and path is None:
             raise errors.BzrError('must supply file_id or path')
-        if file_id is not None:
-            file_id = file_id.encode('utf8')
+        file_id = osutils.safe_file_id(file_id)
         if path is not None:
             path = path.encode('utf8')
         parent_index = self._dirstate.get_parent_ids().index(self._revision_id) + 1
@@ -1011,7 +1009,7 @@
         # This is identical now to the WorkingTree _generate_inventory except
         # for the tree index use.
         root_key, current_entry = self._dirstate._get_entry(parent_index, path_utf8='')
-        current_id = root_key[2].decode('utf8')
+        current_id = root_key[2]
         assert current_entry[parent_index][0] == 'directory'
         inv = Inventory(root_id=current_id, revision_id=self._revision_id)
         inv.root.revision = current_entry[parent_index][4]
@@ -1021,7 +1019,7 @@
         for block in self._dirstate._dirblocks[1:]: #skip root
             dirname = block[0]
             try:
-                parent_id = parent_ids[block[0]]
+                parent_id = parent_ids[dirname]
             except KeyError:
                 # all the paths in this block are not versioned in this tree
                 continue
@@ -1029,10 +1027,11 @@
                 if entry[parent_index][0] in ('absent', 'relocated'):
                     # not this tree
                     continue
-                name = key[1].decode('utf8')
-                file_id = key[2].decode('utf8')
+                name = key[1]
+                name_unicode = name.decode('utf8')
+                file_id = key[2]
                 kind, link_or_sha1, size, executable, revid = entry[parent_index]
-                inv_entry = entry_factory[kind](file_id, name, parent_id)
+                inv_entry = entry_factory[kind](file_id, name_unicode, parent_id)
                 inv_entry.revision = revid
                 if kind == 'file':
                     inv_entry.executable = executable
@@ -1123,7 +1122,7 @@
         entry = self._get_entry(path=path)
         if entry == (None, None):
             return None
-        return entry[0][2].decode('utf8')
+        return entry[0][2]
 
     def unlock(self):
         """Unlock, freeing any cache memory used during the lock."""



More information about the bazaar-commits mailing list