Rev 2362: (working), fix dirstate to use utf8 file ids. in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate
John Arbash Meinel
john at arbash-meinel.com
Thu Feb 22 15:40:35 GMT 2007
At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate
------------------------------------------------------------
revno: 2362
revision-id: john at arbash-meinel.com-20070222153923-cpynsmaz50pdncre
parent: john at arbash-meinel.com-20070222150435-vtrqx5i64319z8pz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Thu 2007-02-22 09:39:23 -0600
message:
(working), fix dirstate to use utf8 file ids.
Also fix a bug in _generate_inventory for non-ascii paths. It was
combining the decoded path with the utf8 prefix and assuming the
whole thing was utf8.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/tests/tree_implementations/__init__.py __init__.py-20060717075546-420s7b0bj9hzeowi-2
bzrlib/tests/tree_implementations/test_test_trees.py test_tree_trees.py-20060720091921-3nwi5h21lf06vf5p-1
bzrlib/tests/tree_implementations/test_walkdirs.py test_walkdirs.py-20060729160421-gmjnkotqgxdh98ce-1
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-02-22 01:14:15 +0000
+++ b/bzrlib/dirstate.py 2007-02-22 15:39:23 +0000
@@ -306,7 +306,9 @@
# faster than three separate encodes.
utf8path = (dirname + '/' + basename).strip('/').encode('utf8')
dirname, basename = os.path.split(utf8path)
- entry_key = (dirname, basename, file_id.encode('utf8'))
+ assert file_id.__class__ == str, \
+ "must be a utf8 file_id not %s" % (type(file_id))
+ entry_key = (dirname, basename, file_id)
self._read_dirblocks_if_needed()
block_index, present = self._find_block_index_from_key(entry_key)
if not present:
@@ -756,7 +758,7 @@
id.
"""
kind = inv_entry.kind
- tree_data = inv_entry.revision.encode('utf8')
+ tree_data = inv_entry.revision
assert len(tree_data) > 0, 'empty revision for the inv_entry.'
if kind == 'directory':
fingerprint = ''
@@ -966,13 +968,11 @@
:param path: The path inside the tree to set - '' is the root, 'foo'
is the path foo in the root.
- :param new_id: The new id to assign to the path. If unicode, it will
- be encoded to utf8. In future this will be deprecated: avoid using
- unicode ids if possible.
+ :param new_id: The new id to assign to the path. This must be a utf8
+ file id (not unicode, and not None).
"""
# TODO: start warning here.
- if new_id.__class__ == unicode:
- new_id = new_id.encode('utf8')
+ assert new_id.__class__ == str
self._read_dirblocks_if_needed()
if len(path):
import pdb;pdb.set_trace()
@@ -1067,7 +1067,7 @@
# new entry at this path: by adding the id->path mapping last,
# all the mappings are valid and have correct relocation
# records where needed.
- file_id = entry.file_id.encode('utf8')
+ file_id = entry.file_id
path_utf8 = path.encode('utf8')
dirname, basename = os.path.split(path_utf8)
new_entry_key = (dirname, basename, file_id)
@@ -1165,7 +1165,7 @@
# convert new into dirblock style
new_path_utf8 = current_new[0].encode('utf8')
new_dirname, new_basename = os.path.split(new_path_utf8)
- new_id = current_new[1].file_id.encode('utf8')
+ new_id = current_new[1].file_id
new_entry_key = (new_dirname, new_basename, new_id)
else:
# for safety disable variables
=== modified file 'bzrlib/tests/tree_implementations/__init__.py'
--- a/bzrlib/tests/tree_implementations/__init__.py 2007-02-22 15:04:35 +0000
+++ b/bzrlib/tests/tree_implementations/__init__.py 2007-02-22 15:39:23 +0000
@@ -200,7 +200,7 @@
ids = [
'2file',
'1top-dir',
- u'0utf\u1234file',
+ u'0utf\u1234file'.encode('utf8'),
'1file-in-1topdir',
'0dir-in-1topdir'
]
@@ -228,15 +228,21 @@
# bzr itself does not create unicode file ids, but we want them for
# testing.
file_ids = [u'TREE_ROOT',
- u'f\xf6-id',
- u'b\xe5r-id',
- u'b\xe1z-id',
+ u'f\xf6-id'.encode('utf8'),
+ u'b\xe5r-id'.encode('utf8'),
+ u'b\xe1z-id'.encode('utf8'),
]
try:
self.build_tree(paths[1:])
except UnicodeError:
raise tests.TestSkipped('filesystem does not support unicode.')
- tree.add(paths, file_ids)
+ if tree.path2id('') is None:
+ # Some trees do not have a root yet.
+ tree.add(paths, file_ids)
+ else:
+ # Some trees will already have a root
+ tree.set_root_id(file_ids[0])
+ tree.add(paths[1:], file_ids[1:])
try:
tree.commit(u'in\xedtial', rev_id=u'r\xe9v-1'.encode('utf8'))
except errors.NonAsciiRevisionId:
@@ -247,7 +253,7 @@
self._create_tree_with_utf8(tree)
tree2 = tree.bzrdir.sprout('tree2').open_workingtree()
self.build_tree([u'tree2/b\xe5r/z\xf7z'])
- tree2.add([u'b\xe5r/z\xf7z'], [u'z\xf7z-id'])
+ tree2.add([u'b\xe5r/z\xf7z'], [u'z\xf7z-id'.encode('utf8')])
tree2.commit(u'to m\xe9rge', rev_id=u'r\xe9v-2'.encode('utf8'))
tree.merge_from_branch(tree2.branch)
=== modified file 'bzrlib/tests/tree_implementations/test_test_trees.py'
--- a/bzrlib/tests/tree_implementations/test_test_trees.py 2007-02-22 15:04:35 +0000
+++ b/bzrlib/tests/tree_implementations/test_test_trees.py 2007-02-22 15:39:23 +0000
@@ -160,7 +160,7 @@
'1top-dir',
'1file-in-1topdir',
'0dir-in-1topdir',
- u'0utf\u1234file',
+ u'0utf\u1234file'.encode('utf8'),
'symlink',
]),
set(iter(tree)))
@@ -170,7 +170,7 @@
[('', tree_root, 'directory'),
('0file', '2file', 'file'),
('1top-dir', '1top-dir', 'directory'),
- (u'2utf\u1234file', u'0utf\u1234file', 'file'),
+ (u'2utf\u1234file', u'0utf\u1234file'.encode('utf8'), 'file'),
('symlink', 'symlink', 'symlink'),
('1top-dir/0file-in-1topdir', '1file-in-1topdir', 'file'),
('1top-dir/1dir-in-1topdir', '0dir-in-1topdir', 'directory')],
=== modified file 'bzrlib/tests/tree_implementations/test_walkdirs.py'
--- a/bzrlib/tests/tree_implementations/test_walkdirs.py 2007-02-17 04:06:47 +0000
+++ b/bzrlib/tests/tree_implementations/test_walkdirs.py 2007-02-22 15:39:23 +0000
@@ -27,7 +27,8 @@
[
('0file', '0file', 'file', None, '2file', 'file'),
('1top-dir', '1top-dir', 'directory', None, '1top-dir', 'directory'),
- (u'2utf\u1234file', u'2utf\u1234file', 'file', None, u'0utf\u1234file', 'file'),
+ (u'2utf\u1234file', u'2utf\u1234file', 'file', None,
+ u'0utf\u1234file'.encode('utf8'), 'file'),
('symlink', 'symlink', 'symlink', None, 'symlink', 'symlink')
]),
(('1top-dir', '1top-dir'),
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-02-22 05:06:58 +0000
+++ b/bzrlib/workingtree_4.py 2007-02-22 15:39:23 +0000
@@ -232,7 +232,7 @@
state = self.current_dirstate()
state._read_dirblocks_if_needed()
root_key, current_entry = self._get_entry(path='')
- current_id = root_key[2].decode('utf8')
+ current_id = root_key[2]
assert current_entry[0][0] == 'directory'
inv = Inventory(root_id=current_id)
# we could do this straight out of the dirstate; it might be fast
@@ -249,10 +249,11 @@
if entry[0][0] in ('absent', 'relocated'):
# a parent tree only entry
continue
- name = key[1].decode('utf8')
- file_id = key[2].decode('utf8')
+ name = key[1]
+ name_unicode = name.decode('utf8')
+ file_id = key[2]
kind, link_or_sha1, size, executable, stat = entry[0]
- inv_entry = entry_factory[kind](file_id, name, parent_id)
+ inv_entry = entry_factory[kind](file_id, name_unicode, parent_id)
if kind == 'file':
# not strictly needed: working tree
#entry.executable = executable
@@ -279,8 +280,6 @@
if file_id is None and path is None:
raise errors.BzrError('must supply file_id or path')
state = self.current_dirstate()
- if file_id is not None:
- file_id = file_id.encode('utf8')
if path is not None:
path = path.encode('utf8')
return state._get_entry(0, fileid_utf8=file_id, path_utf8=path)
@@ -316,11 +315,11 @@
@needs_read_lock
def get_root_id(self):
"""Return the id of this trees root"""
- return self._get_entry(path='')[0][2].decode('utf8')
+ return self._get_entry(path='')[0][2]
def has_id(self, file_id):
state = self.current_dirstate()
- fileid_utf8 = file_id.encode('utf8')
+ file_id = osutils.safe_file_id(file_id)
row, parents = self._get_entry(file_id=file_id)
if row is None:
return False
@@ -330,8 +329,8 @@
@needs_read_lock
def id2path(self, fileid):
state = self.current_dirstate()
- fileid_utf8 = fileid.encode('utf8')
- key, tree_details = state._get_entry(0, fileid_utf8=fileid_utf8)
+ fileid = osutils.safe_file_id(fileid)
+ key, tree_details = state._get_entry(0, fileid_utf8=fileid)
return os.path.join(*key[0:2]).decode('utf8')
@needs_read_lock
@@ -348,7 +347,7 @@
continue
path = pathjoin(self.basedir, key[0].decode('utf8'), key[1].decode('utf8'))
if osutils.lexists(path):
- result.append(key[2].decode('utf8'))
+ result.append(key[2])
return iter(result)
@needs_read_lock
@@ -406,7 +405,7 @@
update_inventory = True
inv = self.inventory
to_dir_ie = inv[to_dir_id]
- to_dir_id = to_entry[0][2].decode('utf8')
+ to_dir_id = to_entry[0][2]
else:
update_inventory = False
@@ -420,7 +419,7 @@
raise errors.BzrMoveFailedError(from_rel,to_dir,
errors.NotVersionedError(path=str(from_rel)))
- from_id = from_entry[0][2].decode('utf8')
+ from_id = from_entry[0][2]
to_rel = pathjoin(to_dir, from_tail)
item_to_entry = self._get_entry(path=to_rel)
if item_to_entry != (None, None):
@@ -548,7 +547,7 @@
entry = self._get_entry(path=path)
if entry == (None, None):
return None
- return entry[0][2].decode('utf8')
+ return entry[0][2]
def paths2ids(self, paths, trees=[], require_versioned=True):
"""See Tree.paths2ids().
@@ -793,8 +792,8 @@
state = self.current_dirstate()
state._read_dirblocks_if_needed()
ids_to_unversion = set()
- for fileid in file_ids:
- ids_to_unversion.add(fileid.encode('utf8'))
+ for file_id in file_ids:
+ ids_to_unversion.add(osutils.safe_file_id(file_id))
paths_to_unversion = set()
# sketch:
# check if the root is to be unversioned, if so, assert for now.
@@ -988,8 +987,7 @@
"""
if file_id is None and path is None:
raise errors.BzrError('must supply file_id or path')
- if file_id is not None:
- file_id = file_id.encode('utf8')
+ file_id = osutils.safe_file_id(file_id)
if path is not None:
path = path.encode('utf8')
parent_index = self._dirstate.get_parent_ids().index(self._revision_id) + 1
@@ -1011,7 +1009,7 @@
# This is identical now to the WorkingTree _generate_inventory except
# for the tree index use.
root_key, current_entry = self._dirstate._get_entry(parent_index, path_utf8='')
- current_id = root_key[2].decode('utf8')
+ current_id = root_key[2]
assert current_entry[parent_index][0] == 'directory'
inv = Inventory(root_id=current_id, revision_id=self._revision_id)
inv.root.revision = current_entry[parent_index][4]
@@ -1021,7 +1019,7 @@
for block in self._dirstate._dirblocks[1:]: #skip root
dirname = block[0]
try:
- parent_id = parent_ids[block[0]]
+ parent_id = parent_ids[dirname]
except KeyError:
# all the paths in this block are not versioned in this tree
continue
@@ -1029,10 +1027,11 @@
if entry[parent_index][0] in ('absent', 'relocated'):
# not this tree
continue
- name = key[1].decode('utf8')
- file_id = key[2].decode('utf8')
+ name = key[1]
+ name_unicode = name.decode('utf8')
+ file_id = key[2]
kind, link_or_sha1, size, executable, revid = entry[parent_index]
- inv_entry = entry_factory[kind](file_id, name, parent_id)
+ inv_entry = entry_factory[kind](file_id, name_unicode, parent_id)
inv_entry.revision = revid
if kind == 'file':
inv_entry.executable = executable
@@ -1123,7 +1122,7 @@
entry = self._get_entry(path=path)
if entry == (None, None):
return None
- return entry[0][2].decode('utf8')
+ return entry[0][2]
def unlock(self):
"""Unlock, freeing any cache memory used during the lock."""
More information about the bazaar-commits
mailing list