Rev 5892: (mbp) better handling of subprocesses with non-ascii encodings and filenames in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed May 18 16:12:00 UTC 2011
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 5892 [merge]
revision-id: pqm at pqm.ubuntu.com-20110518161105-pbt4yc8mgl0y3qsy
parent: pqm at pqm.ubuntu.com-20110518130252-ky96qcvzt6o0zg3f
parent: songofacandy at gmail.com-20110506174036-tdz8qfeo5ad83n9a
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2011-05-18 16:11:05 +0000
message:
(mbp) better handling of subprocesses with non-ascii encodings and filenames
(bug 523746) (INADA Naoki)
modified:
bzrlib/diff.py diff.py-20050309040759-26944fbbf2ebbf36
bzrlib/tests/test_diff.py testdiff.py-20050727164403-d1a3496ebb12e339
=== modified file 'bzrlib/diff.py'
--- a/bzrlib/diff.py 2011-04-07 10:36:24 +0000
+++ b/bzrlib/diff.py 2011-05-06 17:40:36 +0000
@@ -745,8 +745,18 @@
def _get_command(self, old_path, new_path):
my_map = {'old_path': old_path, 'new_path': new_path}
- return [AtTemplate(t).substitute(my_map) for t in
- self.command_template]
+ command = [AtTemplate(t).substitute(my_map) for t in
+ self.command_template]
+ if sys.platform == 'win32': # Popen doesn't accept unicode on win32
+ command_encoded = []
+ for c in command:
+ if isinstance(c, unicode):
+ command_encoded.append(c.encode('mbcs'))
+ else:
+ command_encoded.append(c)
+ return command_encoded
+ else:
+ return command
def _execute(self, old_path, new_path):
command = self._get_command(old_path, new_path)
@@ -772,12 +782,42 @@
raise
return True
+ @staticmethod
+ def _fenc():
+ """Returns safe encoding for passing file path to diff tool"""
+ if sys.platform == 'win32':
+ return 'mbcs'
+ else:
+ # Don't fallback to 'utf-8' because subprocess may not be able to
+ # handle utf-8 correctly when locale is not utf-8.
+ return sys.getfilesystemencoding() or 'ascii'
+
+ def _is_safepath(self, path):
+ """Return true if `path` may be able to pass to subprocess."""
+ fenc = self._fenc()
+ try:
+ return path == path.encode(fenc).decode(fenc)
+ except UnicodeError:
+ return False
+
+ def _safe_filename(self, prefix, relpath):
+ """Replace unsafe character in `relpath` then join `self._root`,
+ `prefix` and `relpath`."""
+ fenc = self._fenc()
+ # encoded_str.replace('?', '_') may break multibyte char.
+ # So we should encode, decode, then replace(u'?', u'_')
+ relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace')
+ relpath_tmp = relpath_tmp.replace(u'?', u'_')
+ return osutils.pathjoin(self._root, prefix, relpath_tmp)
+
def _write_file(self, file_id, tree, prefix, relpath, force_temp=False,
allow_write=False):
if not force_temp and isinstance(tree, WorkingTree):
- return tree.abspath(tree.id2path(file_id))
-
- full_path = osutils.pathjoin(self._root, prefix, relpath)
+ full_path = tree.abspath(tree.id2path(file_id))
+ if self._is_safepath(full_path):
+ return full_path
+
+ full_path = self._safe_filename(prefix, relpath)
if not force_temp and self._try_symlink_root(tree, prefix):
return full_path
parent_dir = osutils.dirname(full_path)
@@ -840,13 +880,13 @@
"""
old_path = self.old_tree.id2path(file_id)
new_path = self.new_tree.id2path(file_id)
- new_abs_path = self._prepare_files(file_id, old_path, new_path,
- allow_write_new=True,
- force_temp=True)[1]
- command = self._get_command(osutils.pathjoin('old', old_path),
- osutils.pathjoin('new', new_path))
+ old_abs_path, new_abs_path = self._prepare_files(
+ file_id, old_path, new_path,
+ allow_write_new=True,
+ force_temp=True)
+ command = self._get_command(old_abs_path, new_abs_path)
subprocess.call(command, cwd=self._root)
- new_file = open(new_abs_path, 'r')
+ new_file = open(new_abs_path, 'rb')
try:
return new_file.read()
finally:
=== modified file 'bzrlib/tests/test_diff.py'
--- a/bzrlib/tests/test_diff.py 2011-05-13 12:51:05 +0000
+++ b/bzrlib/tests/test_diff.py 2011-05-18 16:11:05 +0000
@@ -33,7 +33,7 @@
transform,
)
from bzrlib.symbol_versioning import deprecated_in
-from bzrlib.tests import features
+from bzrlib.tests import features, EncodingAdapter
from bzrlib.tests.blackbox.test_diff import subst_dates
@@ -1421,6 +1421,52 @@
diff_obj._prepare_files('file2-id', 'oldname2', 'newname2')
+class TestDiffFromToolEncodedFilename(tests.TestCaseWithTransport):
+
+ def test_encodable_filename(self):
+ # Just checks file path for external diff tool.
+ # We cannot change CPython's internal encoding used by os.exec*.
+ import sys
+ diffobj = diff.DiffFromTool(['dummy', '@old_path', '@new_path'],
+ None, None, None)
+ for _, scenario in EncodingAdapter.encoding_scenarios:
+ encoding = scenario['encoding']
+ dirname = scenario['info']['directory']
+ filename = scenario['info']['filename']
+
+ self.overrideAttr(diffobj, '_fenc', lambda: encoding)
+ relpath = dirname + u'/' + filename
+ fullpath = diffobj._safe_filename('safe', relpath)
+ self.assertEqual(
+ fullpath,
+ fullpath.encode(encoding).decode(encoding)
+ )
+ self.assert_(fullpath.startswith(diffobj._root + '/safe'))
+
+ def test_unencodable_filename(self):
+ import sys
+ diffobj = diff.DiffFromTool(['dummy', '@old_path', '@new_path'],
+ None, None, None)
+ for _, scenario in EncodingAdapter.encoding_scenarios:
+ encoding = scenario['encoding']
+ dirname = scenario['info']['directory']
+ filename = scenario['info']['filename']
+
+ if encoding == 'iso-8859-1':
+ encoding = 'iso-8859-2'
+ else:
+ encoding = 'iso-8859-1'
+
+ self.overrideAttr(diffobj, '_fenc', lambda: encoding)
+ relpath = dirname + u'/' + filename
+ fullpath = diffobj._safe_filename('safe', relpath)
+ self.assertEqual(
+ fullpath,
+ fullpath.encode(encoding).decode(encoding)
+ )
+ self.assert_(fullpath.startswith(diffobj._root + '/safe'))
+
+
class TestGetTreesAndBranchesToDiffLocked(tests.TestCaseWithTransport):
def call_gtabtd(self, path_list, revision_specs, old_url, new_url):
More information about the bazaar-commits
mailing list