Rev 5892: (mbp) better handling of subprocesses with non-ascii encodings and filenames in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed May 18 16:12:00 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5892 [merge]
revision-id: pqm at pqm.ubuntu.com-20110518161105-pbt4yc8mgl0y3qsy
parent: pqm at pqm.ubuntu.com-20110518130252-ky96qcvzt6o0zg3f
parent: songofacandy at gmail.com-20110506174036-tdz8qfeo5ad83n9a
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2011-05-18 16:11:05 +0000
message:
  (mbp) better handling of subprocesses with non-ascii encodings and filenames
   (bug 523746) (INADA Naoki)
modified:
  bzrlib/diff.py                 diff.py-20050309040759-26944fbbf2ebbf36
  bzrlib/tests/test_diff.py      testdiff.py-20050727164403-d1a3496ebb12e339
=== modified file 'bzrlib/diff.py'
--- a/bzrlib/diff.py	2011-04-07 10:36:24 +0000
+++ b/bzrlib/diff.py	2011-05-06 17:40:36 +0000
@@ -745,8 +745,18 @@
 
     def _get_command(self, old_path, new_path):
         my_map = {'old_path': old_path, 'new_path': new_path}
-        return [AtTemplate(t).substitute(my_map) for t in
-                self.command_template]
+        command = [AtTemplate(t).substitute(my_map) for t in
+                   self.command_template]
+        if sys.platform == 'win32': # Popen doesn't accept unicode on win32
+            command_encoded = []
+            for c in command:
+                if isinstance(c, unicode):
+                    command_encoded.append(c.encode('mbcs'))
+                else:
+                    command_encoded.append(c)
+            return command_encoded
+        else:
+            return command
 
     def _execute(self, old_path, new_path):
         command = self._get_command(old_path, new_path)
@@ -772,12 +782,42 @@
                 raise
         return True
 
+    @staticmethod
+    def _fenc():
+        """Returns safe encoding for passing file path to diff tool"""
+        if sys.platform == 'win32':
+            return 'mbcs'
+        else:
+            # Don't fallback to 'utf-8' because subprocess may not be able to
+            # handle utf-8 correctly when locale is not utf-8.
+            return sys.getfilesystemencoding() or 'ascii'
+
+    def _is_safepath(self, path):
+        """Return true if `path` may be able to pass to subprocess."""
+        fenc = self._fenc()
+        try:
+            return path == path.encode(fenc).decode(fenc)
+        except UnicodeError:
+            return False
+
+    def _safe_filename(self, prefix, relpath):
+        """Replace unsafe character in `relpath` then join `self._root`,
+        `prefix` and `relpath`."""
+        fenc = self._fenc()
+        # encoded_str.replace('?', '_') may break multibyte char.
+        # So we should encode, decode, then replace(u'?', u'_')
+        relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace')
+        relpath_tmp = relpath_tmp.replace(u'?', u'_')
+        return osutils.pathjoin(self._root, prefix, relpath_tmp)
+
     def _write_file(self, file_id, tree, prefix, relpath, force_temp=False,
                     allow_write=False):
         if not force_temp and isinstance(tree, WorkingTree):
-            return tree.abspath(tree.id2path(file_id))
-        
-        full_path = osutils.pathjoin(self._root, prefix, relpath)
+            full_path = tree.abspath(tree.id2path(file_id))
+            if self._is_safepath(full_path):
+                return full_path
+
+        full_path = self._safe_filename(prefix, relpath)
         if not force_temp and self._try_symlink_root(tree, prefix):
             return full_path
         parent_dir = osutils.dirname(full_path)
@@ -840,13 +880,13 @@
         """
         old_path = self.old_tree.id2path(file_id)
         new_path = self.new_tree.id2path(file_id)
-        new_abs_path = self._prepare_files(file_id, old_path, new_path,
-                                           allow_write_new=True,
-                                           force_temp=True)[1]
-        command = self._get_command(osutils.pathjoin('old', old_path),
-                                    osutils.pathjoin('new', new_path))
+        old_abs_path, new_abs_path = self._prepare_files(
+                                            file_id, old_path, new_path,
+                                            allow_write_new=True,
+                                            force_temp=True)
+        command = self._get_command(old_abs_path, new_abs_path)
         subprocess.call(command, cwd=self._root)
-        new_file = open(new_abs_path, 'r')
+        new_file = open(new_abs_path, 'rb')
         try:
             return new_file.read()
         finally:

=== modified file 'bzrlib/tests/test_diff.py'
--- a/bzrlib/tests/test_diff.py	2011-05-13 12:51:05 +0000
+++ b/bzrlib/tests/test_diff.py	2011-05-18 16:11:05 +0000
@@ -33,7 +33,7 @@
     transform,
     )
 from bzrlib.symbol_versioning import deprecated_in
-from bzrlib.tests import features
+from bzrlib.tests import features, EncodingAdapter
 from bzrlib.tests.blackbox.test_diff import subst_dates
 
 
@@ -1421,6 +1421,52 @@
         diff_obj._prepare_files('file2-id', 'oldname2', 'newname2')
 
 
+class TestDiffFromToolEncodedFilename(tests.TestCaseWithTransport):
+
+    def test_encodable_filename(self):
+        # Just checks file path for external diff tool.
+        # We cannot change CPython's internal encoding used by os.exec*.
+        import sys
+        diffobj = diff.DiffFromTool(['dummy', '@old_path', '@new_path'],
+                                    None, None, None)
+        for _, scenario in EncodingAdapter.encoding_scenarios:
+            encoding = scenario['encoding']
+            dirname  = scenario['info']['directory']
+            filename = scenario['info']['filename']
+
+            self.overrideAttr(diffobj, '_fenc', lambda: encoding)
+            relpath = dirname + u'/' + filename
+            fullpath = diffobj._safe_filename('safe', relpath)
+            self.assertEqual(
+                    fullpath,
+                    fullpath.encode(encoding).decode(encoding)
+                    )
+            self.assert_(fullpath.startswith(diffobj._root + '/safe'))
+
+    def test_unencodable_filename(self):
+        import sys
+        diffobj = diff.DiffFromTool(['dummy', '@old_path', '@new_path'],
+                                    None, None, None)
+        for _, scenario in EncodingAdapter.encoding_scenarios:
+            encoding = scenario['encoding']
+            dirname  = scenario['info']['directory']
+            filename = scenario['info']['filename']
+
+            if encoding == 'iso-8859-1':
+                encoding = 'iso-8859-2'
+            else:
+                encoding = 'iso-8859-1'
+
+            self.overrideAttr(diffobj, '_fenc', lambda: encoding)
+            relpath = dirname + u'/' + filename
+            fullpath = diffobj._safe_filename('safe', relpath)
+            self.assertEqual(
+                    fullpath,
+                    fullpath.encode(encoding).decode(encoding)
+                    )
+            self.assert_(fullpath.startswith(diffobj._root + '/safe'))
+
+
 class TestGetTreesAndBranchesToDiffLocked(tests.TestCaseWithTransport):
 
     def call_gtabtd(self, path_list, revision_specs, old_url, new_url):




More information about the bazaar-commits mailing list