Rev 5046: (mbp, for doxx) better win32 cmdline splitter in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Feb 18 03:02:31 GMT 2010
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 5046 [merge]
revision-id: pqm at pqm.ubuntu.com-20100218030228-dtbmt8pdtf21ndo7
parent: pqm at pqm.ubuntu.com-20100218011016-cx9drbdydd2xmu2p
parent: mbp at sourcefrog.net-20100218021548-kegv1m3k54jxjc2p
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2010-02-18 03:02:28 +0000
message:
(mbp, for doxx) better win32 cmdline splitter
added:
bzrlib/cmdline.py bzrlibcmdline.py-20100202043522-83yorxx3tcigi7ap-1
bzrlib/tests/test_cmdline.py bzrlibteststest_cmdl-20100202043522-83yorxx3tcigi7ap-2
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/commands.py bzr.py-20050309040720-d10f4714595cf8c3
bzrlib/diff.py diff.py-20050309040759-26944fbbf2ebbf36
bzrlib/rules.py properties.py-20080506032617-9k06uqalkf09ck0z-1
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/test_commands.py test_command.py-20051019190109-3b17be0f52eaa7a8
bzrlib/tests/test_diff.py testdiff.py-20050727164403-d1a3496ebb12e339
bzrlib/tests/test_win32utils.py test_win32utils.py-20070713181630-8xsrjymd3e8mgw23-108
bzrlib/win32utils.py win32console.py-20051021033308-123c6c929d04973d
=== modified file 'NEWS'
--- a/NEWS 2010-02-17 15:50:09 +0000
+++ b/NEWS 2010-02-18 03:02:28 +0000
@@ -40,6 +40,10 @@
automatically or by running ``apport-bug``. No information is sent
without specific permission from the user. (Martin Pool, #515052)
+* Parsing of command lines, for example in ``diff --using``, no longer
+ treats backslash as an escape character on Windows. (Gordon Tyler,
+ #392248)
+
* Tree-shape conflicts can be resolved by providing ``--take-this`` and
``--take-other`` to the ``bzr resolve`` command. Just marking the conflict
as resolved is still accessible via the ``--done`` default action.
=== added file 'bzrlib/cmdline.py'
--- a/bzrlib/cmdline.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/cmdline.py 2010-02-12 05:40:17 +0000
@@ -0,0 +1,160 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Unicode-compatible command-line splitter for all platforms."""
+
+import re
+
+
+_whitespace_match = re.compile(u'\s', re.UNICODE).match
+
+
+class _PushbackSequence(object):
+ def __init__(self, orig):
+ self._iter = iter(orig)
+ self._pushback_buffer = []
+
+ def next(self):
+ if len(self._pushback_buffer) > 0:
+ return self._pushback_buffer.pop()
+ else:
+ return self._iter.next()
+
+ def pushback(self, char):
+ self._pushback_buffer.append(char)
+
+ def __iter__(self):
+ return self
+
+
+class _Whitespace(object):
+ def process(self, next_char, context):
+ if _whitespace_match(next_char):
+ if len(context.token) > 0:
+ return None
+ else:
+ return self
+ elif next_char in context.allowed_quote_chars:
+ context.quoted = True
+ return _Quotes(next_char, self)
+ elif next_char == u'\\':
+ return _Backslash(self)
+ else:
+ context.token.append(next_char)
+ return _Word()
+
+
+class _Quotes(object):
+ def __init__(self, quote_char, exit_state):
+ self.quote_char = quote_char
+ self.exit_state = exit_state
+
+ def process(self, next_char, context):
+ if next_char == u'\\':
+ return _Backslash(self)
+ elif next_char == self.quote_char:
+ return self.exit_state
+ else:
+ context.token.append(next_char)
+ return self
+
+
+class _Backslash(object):
+ # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
+ def __init__(self, exit_state):
+ self.exit_state = exit_state
+ self.count = 1
+
+ def process(self, next_char, context):
+ if next_char == u'\\':
+ self.count += 1
+ return self
+ elif next_char in context.allowed_quote_chars:
+ # 2N backslashes followed by a quote are N backslashes
+ context.token.append(u'\\' * (self.count/2))
+ # 2N+1 backslashes follwed by a quote are N backslashes followed by
+ # the quote which should not be processed as the start or end of
+ # the quoted arg
+ if self.count % 2 == 1:
+ # odd number of \ escapes the quote
+ context.token.append(next_char)
+ else:
+ # let exit_state handle next_char
+ context.seq.pushback(next_char)
+ self.count = 0
+ return self.exit_state
+ else:
+ # N backslashes not followed by a quote are just N backslashes
+ if self.count > 0:
+ context.token.append(u'\\' * self.count)
+ self.count = 0
+ # let exit_state handle next_char
+ context.seq.pushback(next_char)
+ return self.exit_state
+
+ def finish(self, context):
+ if self.count > 0:
+ context.token.append(u'\\' * self.count)
+
+
+class _Word(object):
+ def process(self, next_char, context):
+ if _whitespace_match(next_char):
+ return None
+ elif next_char in context.allowed_quote_chars:
+ return _Quotes(next_char, self)
+ elif next_char == u'\\':
+ return _Backslash(self)
+ else:
+ context.token.append(next_char)
+ return self
+
+
+class Splitter(object):
+ def __init__(self, command_line, single_quotes_allowed):
+ self.seq = _PushbackSequence(command_line)
+ self.allowed_quote_chars = u'"'
+ if single_quotes_allowed:
+ self.allowed_quote_chars += u"'"
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ quoted, token = self._get_token()
+ if token is None:
+ raise StopIteration
+ return quoted, token
+
+ def _get_token(self):
+ self.quoted = False
+ self.token = []
+ state = _Whitespace()
+ for next_char in self.seq:
+ state = state.process(next_char, self)
+ if state is None:
+ break
+ if not state is None and not getattr(state, 'finish', None) is None:
+ state.finish(self)
+ result = u''.join(self.token)
+ if not self.quoted and result == '':
+ result = None
+ return self.quoted, result
+
+
+def split(unsplit, single_quotes_allowed=True):
+ splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)
+ return [arg for quoted, arg in splitter]
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py 2010-02-11 01:13:46 +0000
+++ b/bzrlib/commands.py 2010-02-12 04:02:50 +0000
@@ -41,6 +41,7 @@
import bzrlib
from bzrlib import (
cleanup,
+ cmdline,
debug,
errors,
option,
@@ -873,11 +874,6 @@
return ret
-def shlex_split_unicode(unsplit):
- import shlex
- return [u.decode('utf-8') for u in shlex.split(unsplit.encode('utf-8'))]
-
-
def get_alias(cmd, config=None):
"""Return an expanded alias, or None if no alias exists.
@@ -893,7 +889,7 @@
config = bzrlib.config.GlobalConfig()
alias = config.get_alias(cmd)
if (alias):
- return shlex_split_unicode(alias)
+ return cmdline.split(alias)
return None
=== modified file 'bzrlib/diff.py'
--- a/bzrlib/diff.py 2010-01-20 23:26:31 +0000
+++ b/bzrlib/diff.py 2010-02-02 06:30:43 +0000
@@ -31,7 +31,7 @@
from bzrlib import (
branch as _mod_branch,
bzrdir,
- commands,
+ cmdline,
errors,
osutils,
patiencediff,
@@ -683,7 +683,7 @@
@classmethod
def from_string(klass, command_string, old_tree, new_tree, to_file,
path_encoding='utf-8'):
- command_template = commands.shlex_split_unicode(command_string)
+ command_template = cmdline.split(command_string)
if '@' not in command_string:
command_template.extend(['@old_path', '@new_path'])
return klass(command_template, old_tree, new_tree, to_file,
=== modified file 'bzrlib/rules.py'
--- a/bzrlib/rules.py 2009-05-07 05:08:46 +0000
+++ b/bzrlib/rules.py 2010-02-02 06:30:43 +0000
@@ -21,7 +21,7 @@
from bzrlib import (
config,
- commands,
+ cmdline,
errors,
globbing,
osutils,
@@ -81,8 +81,7 @@
self.pattern_to_section = {}
for s in sections:
if s.startswith(FILE_PREFS_PREFIX):
- file_patterns = commands.shlex_split_unicode(
- s[FILE_PREFS_PREFIX_LEN:])
+ file_patterns = cmdline.split(s[FILE_PREFS_PREFIX_LEN:])
patterns.extend(file_patterns)
for fp in file_patterns:
self.pattern_to_section[fp] = s
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2010-02-17 05:12:01 +0000
+++ b/bzrlib/tests/__init__.py 2010-02-18 02:15:48 +0000
@@ -3628,6 +3628,7 @@
'bzrlib.tests.test_chunk_writer',
'bzrlib.tests.test_clean_tree',
'bzrlib.tests.test_cleanup',
+ 'bzrlib.tests.test_cmdline',
'bzrlib.tests.test_commands',
'bzrlib.tests.test_commit',
'bzrlib.tests.test_commit_merge',
=== added file 'bzrlib/tests/test_cmdline.py'
--- a/bzrlib/tests/test_cmdline.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_cmdline.py 2010-02-18 02:15:48 +0000
@@ -0,0 +1,93 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+from bzrlib import (
+ cmdline,
+ tests)
+
+class TestSplitter(tests.TestCase):
+
+ def assertAsTokens(self, expected, line, single_quotes_allowed=False):
+ s = cmdline.Splitter(line, single_quotes_allowed=single_quotes_allowed)
+ self.assertEqual(expected, list(s))
+
+ def test_simple(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
+ u'foo bar baz')
+
+ def test_ignore_multiple_spaces(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar')
+
+ def test_ignore_leading_space(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar')
+
+ def test_ignore_trailing_space(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ')
+
+ def test_posix_quotations(self):
+ self.assertAsTokens([(True, u'foo bar')], u"'foo bar'",
+ single_quotes_allowed=True)
+ self.assertAsTokens([(True, u'foo bar')], u"'fo''o b''ar'",
+ single_quotes_allowed=True)
+ self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"',
+ single_quotes_allowed=True)
+ self.assertAsTokens([(True, u'foo bar')], u'"fo"\'o b\'"ar"',
+ single_quotes_allowed=True)
+
+ def test_nested_quotations(self):
+ self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"")
+ self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
+ self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"",
+ single_quotes_allowed=True)
+ self.assertAsTokens([(True, u'foo"" bar')], u"'foo\"\" bar'",
+ single_quotes_allowed=True)
+
+ def test_empty_result(self):
+ self.assertAsTokens([], u'')
+ self.assertAsTokens([], u' ')
+
+ def test_quoted_empty(self):
+ self.assertAsTokens([(True, '')], u'""')
+ self.assertAsTokens([(False, u"''")], u"''")
+ self.assertAsTokens([(True, '')], u"''", single_quotes_allowed=True)
+
+ def test_unicode_chars(self):
+ self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
+ u'f\xb5\xee \u1234\u3456')
+
+ def test_newline_in_quoted_section(self):
+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u"'foo\nbar\nbaz\n'",
+ single_quotes_allowed=True)
+
+ def test_escape_chars(self):
+ self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
+
+ def test_escape_quote(self):
+ self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
+ self.assertAsTokens([(True, u'foo\\"bar')], u'"foo\\\\\\"bar"')
+ self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\"bar"')
+
+ def test_double_escape(self):
+ self.assertAsTokens([(True, u'foo\\\\bar')], u'"foo\\\\bar"')
+ self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
+
+ def test_multiple_quoted_args(self):
+ self.assertAsTokens([(True, u'x x'), (True, u'y y')],
+ u'"x x" "y y"')
+ self.assertAsTokens([(True, u'x x'), (True, u'y y')],
+ u'"x x" \'y y\'', single_quotes_allowed=True)
=== modified file 'bzrlib/tests/test_commands.py'
--- a/bzrlib/tests/test_commands.py 2009-05-23 21:01:51 +0000
+++ b/bzrlib/tests/test_commands.py 2010-01-14 13:17:33 +0000
@@ -111,7 +111,7 @@
def test_unicode(self):
my_config = self._get_config("[ALIASES]\n"
- u"iam=whoami 'Erik B\u00e5gfors <erik at bagfors.nu>'\n")
+ u'iam=whoami "Erik B\u00e5gfors <erik at bagfors.nu>"\n')
self.assertEqual([u'whoami', u'Erik B\u00e5gfors <erik at bagfors.nu>'],
commands.get_alias("iam", config=my_config))
=== modified file 'bzrlib/tests/test_diff.py'
--- a/bzrlib/tests/test_diff.py 2009-12-22 15:50:40 +0000
+++ b/bzrlib/tests/test_diff.py 2010-01-14 13:17:33 +0000
@@ -45,6 +45,8 @@
from bzrlib.revisiontree import RevisionTree
from bzrlib.revisionspec import RevisionSpec
+from bzrlib.tests.test_win32utils import BackslashDirSeparatorFeature
+
class _AttribFeature(Feature):
@@ -1292,12 +1294,22 @@
diff_obj.command_template)
def test_from_string_u5(self):
- diff_obj = DiffFromTool.from_string('diff -u\\ 5', None, None, None)
+ diff_obj = DiffFromTool.from_string('diff "-u 5"', None, None, None)
self.addCleanup(diff_obj.finish)
self.assertEqual(['diff', '-u 5', '@old_path', '@new_path'],
diff_obj.command_template)
self.assertEqual(['diff', '-u 5', 'old-path', 'new-path'],
diff_obj._get_command('old-path', 'new-path'))
+
+ def test_from_string_path_with_backslashes(self):
+ self.requireFeature(BackslashDirSeparatorFeature)
+ tool = 'C:\\Tools\\Diff.exe'
+ diff_obj = DiffFromTool.from_string(tool, None, None, None)
+ self.addCleanup(diff_obj.finish)
+ self.assertEqual(['C:\\Tools\\Diff.exe', '@old_path', '@new_path'],
+ diff_obj.command_template)
+ self.assertEqual(['C:\\Tools\\Diff.exe', 'old-path', 'new-path'],
+ diff_obj._get_command('old-path', 'new-path'))
def test_execute(self):
output = StringIO()
=== modified file 'bzrlib/tests/test_win32utils.py'
--- a/bzrlib/tests/test_win32utils.py 2010-01-25 17:48:22 +0000
+++ b/bzrlib/tests/test_win32utils.py 2010-02-02 06:39:31 +0000
@@ -288,70 +288,15 @@
-class TestUnicodeShlex(tests.TestCase):
-
- def assertAsTokens(self, expected, line):
- s = win32utils.UnicodeShlex(line)
- self.assertEqual(expected, list(s))
-
- def test_simple(self):
- self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
- u'foo bar baz')
-
- def test_ignore_multiple_spaces(self):
- self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar')
-
- def test_ignore_leading_space(self):
- self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar')
-
- def test_ignore_trailing_space(self):
- self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ')
-
- def test_posix_quotations(self):
- self.assertAsTokens([(True, u'foo bar')], u'"foo bar"')
- self.assertAsTokens([(False, u"'fo''o"), (False, u"b''ar'")],
- u"'fo''o b''ar'")
- self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"')
- self.assertAsTokens([(True, u"fo'o"), (True, u"b'ar")],
- u'"fo"\'o b\'"ar"')
-
- def test_nested_quotations(self):
- self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"")
- self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
-
- def test_empty_result(self):
- self.assertAsTokens([], u'')
- self.assertAsTokens([], u' ')
-
- def test_quoted_empty(self):
- self.assertAsTokens([(True, '')], u'""')
- self.assertAsTokens([(False, u"''")], u"''")
-
- def test_unicode_chars(self):
- self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
- u'f\xb5\xee \u1234\u3456')
-
- def test_newline_in_quoted_section(self):
- self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
-
- def test_escape_chars(self):
- self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
-
- def test_escape_quote(self):
- self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
-
- def test_double_escape(self):
- self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"')
- self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
-
class Test_CommandLineToArgv(tests.TestCaseInTempDir):
- def assertCommandLine(self, expected, line):
+ def assertCommandLine(self, expected, line, single_quotes_allowed=False):
# Strictly speaking we should respect parameter order versus glob
# expansions, but it's not really worth the effort here
- self.assertEqual(expected,
- sorted(win32utils._command_line_to_argv(line)))
+ argv = win32utils._command_line_to_argv(line,
+ single_quotes_allowed=single_quotes_allowed)
+ self.assertEqual(expected, sorted(argv))
def test_glob_paths(self):
self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
@@ -367,19 +312,25 @@
self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
self.assertCommandLine([u'a/*.c'], '"a/*.c"')
self.assertCommandLine([u"'a/*.c'"], "'a/*.c'")
+ self.assertCommandLine([u'a/*.c'], "'a/*.c'",
+ single_quotes_allowed=True)
def test_slashes_changed(self):
# Quoting doesn't change the supplied args
self.assertCommandLine([u'a\\*.c'], '"a\\*.c"')
+ self.assertCommandLine([u'a\\*.c'], "'a\\*.c'",
+ single_quotes_allowed=True)
# Expands the glob, but nothing matches, swaps slashes
self.assertCommandLine([u'a/*.c'], 'a\\*.c')
self.assertCommandLine([u'a/?.c'], 'a\\?.c')
# No glob, doesn't touch slashes
self.assertCommandLine([u'a\\foo.c'], 'a\\foo.c')
- def test_no_single_quote_supported(self):
+ def test_single_quote_support(self):
self.assertCommandLine(["add", "let's-do-it.txt"],
"add let's-do-it.txt")
+ self.assertCommandLine(["add", "lets do it.txt"],
+ "add 'lets do it.txt'", single_quotes_allowed=True)
def test_case_insensitive_globs(self):
self.requireFeature(tests.CaseInsCasePresFilenameFeature)
=== modified file 'bzrlib/win32utils.py'
--- a/bzrlib/win32utils.py 2010-02-04 16:06:36 +0000
+++ b/bzrlib/win32utils.py 2010-02-12 04:02:50 +0000
@@ -25,6 +25,7 @@
import struct
import sys
+from bzrlib import cmdline
# Windows version
if sys.platform == 'win32':
@@ -522,112 +523,21 @@
trace.mutter('Unable to set hidden attribute on %r: %s', path, e)
-
-class UnicodeShlex(object):
- """This is a very simplified version of shlex.shlex.
-
- The main change is that it supports non-ascii input streams. The internal
- structure is quite simplified relative to shlex.shlex, since we aren't
- trying to handle multiple input streams, etc. In fact, we don't use a
- file-like api either.
- """
-
- def __init__(self, uni_string):
- self._input = uni_string
- self._input_iter = iter(self._input)
- self._whitespace_match = re.compile(u'\s').match
- self._word_match = re.compile(u'\S').match
- self._quote_chars = u'"'
- # self._quote_match = re.compile(u'[\'"]').match
- self._escape_match = lambda x: None # Never matches
- self._escape = '\\'
- # State can be
- # ' ' - after whitespace, starting a new token
- # 'a' - after text, currently working on a token
- # '"' - after ", currently in a "-delimited quoted section
- # "\" - after '\', checking the next char
- self._state = ' '
- self._token = [] # Current token being parsed
-
- def _get_token(self):
- # Were there quote chars as part of this token?
- quoted = False
- quoted_state = None
- for nextchar in self._input_iter:
- if self._state == ' ':
- if self._whitespace_match(nextchar):
- # if self._token: return token
- continue
- elif nextchar in self._quote_chars:
- self._state = nextchar # quoted state
- elif self._word_match(nextchar):
- self._token.append(nextchar)
- self._state = 'a'
- else:
- raise AssertionError('wtttf?')
- elif self._state in self._quote_chars:
- quoted = True
- if nextchar == self._state: # End of quote
- self._state = 'a' # posix allows 'foo'bar to translate to
- # foobar
- elif self._state == '"' and nextchar == self._escape:
- quoted_state = self._state
- self._state = nextchar
- else:
- self._token.append(nextchar)
- elif self._state == self._escape:
- if nextchar == '\\':
- self._token.append('\\')
- elif nextchar == '"':
- self._token.append(nextchar)
- else:
- self._token.append('\\' + nextchar)
- self._state = quoted_state
- elif self._state == 'a':
- if self._whitespace_match(nextchar):
- if self._token:
- break # emit this token
- else:
- continue # no token to emit
- elif nextchar in self._quote_chars:
- # Start a new quoted section
- self._state = nextchar
- # escape?
- elif (self._word_match(nextchar)
- or nextchar in self._quote_chars
- # or whitespace_split?
- ):
- self._token.append(nextchar)
- else:
- raise AssertionError('state == "a", char: %r'
- % (nextchar,))
- else:
- raise AssertionError('unknown state: %r' % (self._state,))
- result = ''.join(self._token)
- self._token = []
- if not quoted and result == '':
- result = None
- return quoted, result
-
- def __iter__(self):
- return self
-
- def next(self):
- quoted, token = self._get_token()
- if token is None:
- raise StopIteration
- return quoted, token
-
-
-def _command_line_to_argv(command_line):
- """Convert a Unicode command line into a set of argv arguments.
-
- This does wildcard expansion, etc. It is intended to make wildcards act
- closer to how they work in posix shells, versus how they work by default on
- Windows.
- """
- s = UnicodeShlex(command_line)
- # Now that we've split the content, expand globs
+def _command_line_to_argv(command_line, single_quotes_allowed=False):
+ """Convert a Unicode command line into a list of argv arguments.
+
+ It performs wildcard expansion to make wildcards act closer to how they
+ work in posix shells, versus how they work by default on Windows. Quoted
+ arguments are left untouched.
+
+ :param command_line: The unicode string to split into an arg list.
+ :param single_quotes_allowed: Whether single quotes are accepted as quoting
+ characters like double quotes. False by
+ default.
+ :return: A list of unicode strings.
+ """
+ s = cmdline.Splitter(command_line, single_quotes_allowed=single_quotes_allowed)
+ # Now that we've split the content, expand globs if necessary
# TODO: Use 'globbing' instead of 'glob.glob', this gives us stuff like
# '**/' style globs
args = []
@@ -641,14 +551,12 @@
if has_ctypes and winver != 'Windows 98':
def get_unicode_argv():
- LPCWSTR = ctypes.c_wchar_p
- INT = ctypes.c_int
- POINTER = ctypes.POINTER
- prototype = ctypes.WINFUNCTYPE(LPCWSTR)
- GetCommandLine = prototype(("GetCommandLineW",
- ctypes.windll.kernel32))
- prototype = ctypes.WINFUNCTYPE(POINTER(LPCWSTR), LPCWSTR, POINTER(INT))
- command_line = GetCommandLine()
+ prototype = ctypes.WINFUNCTYPE(ctypes.c_wchar_p)
+ GetCommandLineW = prototype(("GetCommandLineW",
+ ctypes.windll.kernel32))
+ command_line = GetCommandLineW()
+ if command_line is None:
+ raise ctypes.WinError()
# Skip the first argument, since we only care about parameters
argv = _command_line_to_argv(command_line)[1:]
if getattr(sys, 'frozen', None) is None:
More information about the bazaar-commits
mailing list