Rev 6063: (gz) Making logging non-ascii strings more robust with new in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/2.4/

Patch Queue Manager pqm at pqm.ubuntu.com
Mon Dec 5 09:48:01 UTC 2011


At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/2.4/

------------------------------------------------------------
revno: 6063 [merge]
revision-id: pqm at pqm.ubuntu.com-20111205094801-tcph3bazkwcwwubp
parent: pqm at pqm.ubuntu.com-20111201013001-346ixnbkd9n7d6be
parent: martin.packman at canonical.com-20111130224717-07oi0fd5j1i7unmu
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: 2.4
timestamp: Mon 2011-12-05 09:48:01 +0000
message:
  (gz) Making logging non-ascii strings more robust with new
   EncodedStreamHandler class (Martin Packman)
modified:
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_trace.py     testtrace.py-20051110225523-a21117fc7a07eeff
  bzrlib/trace.py                trace.py-20050309040759-c8ed824bdcd4748a
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2011-11-14 09:37:40 +0000
+++ b/bzrlib/tests/__init__.py	2011-11-30 22:47:17 +0000
@@ -1939,8 +1939,8 @@
 
         self.log('run bzr: %r', args)
         # FIXME: don't call into logging here
-        handler = logging.StreamHandler(stderr)
-        handler.setLevel(logging.INFO)
+        handler = trace.EncodedStreamHandler(stderr, errors="replace",
+            level=logging.INFO)
         logger = logging.getLogger('')
         logger.addHandler(handler)
         old_ui_factory = ui.ui_factory

=== modified file 'bzrlib/tests/test_trace.py'
--- a/bzrlib/tests/test_trace.py	2011-05-27 20:46:01 +0000
+++ b/bzrlib/tests/test_trace.py	2011-11-30 22:44:28 +0000
@@ -20,6 +20,7 @@
 
 from cStringIO import StringIO
 import errno
+import logging
 import os
 import re
 import sys
@@ -345,6 +346,72 @@
         self.assertEqual(0, get_verbosity_level())
 
 
+class TestLogging(TestCase):
+    """Check logging functionality robustly records information"""
+
+    def test_note(self):
+        trace.note("Noted")
+        self.assertEqual("    INFO  Noted\n", self.get_log())
+
+    def test_warning(self):
+        trace.warning("Warned")
+        self.assertEqual(" WARNING  Warned\n", self.get_log())
+
+    def test_log(self):
+        logging.getLogger("bzr").error("Errored")
+        self.assertEqual("   ERROR  Errored\n", self.get_log())
+
+    def test_log_sub(self):
+        logging.getLogger("bzr.test_log_sub").debug("Whispered")
+        self.assertEqual("   DEBUG  Whispered\n", self.get_log())
+
+    def test_log_unicode_msg(self):
+        logging.getLogger("bzr").debug(u"\xa7")
+        self.assertEqual(u"   DEBUG  \xa7\n", self.get_log())
+
+    def test_log_unicode_arg(self):
+        logging.getLogger("bzr").debug("%s", u"\xa7")
+        self.assertEqual(u"   DEBUG  \xa7\n", self.get_log())
+
+    def test_log_utf8_msg(self):
+        logging.getLogger("bzr").debug("\xc2\xa7")
+        self.assertEqual(u"   DEBUG  \xa7\n", self.get_log())
+
+    def test_log_utf8_arg(self):
+        logging.getLogger("bzr").debug("%s", "\xc2\xa7")
+        self.assertEqual(u"   DEBUG  \xa7\n", self.get_log())
+
+    def test_log_bytes_msg(self):
+        logging.getLogger("bzr").debug("\xa7")
+        log = self.get_log()
+        self.assertContainsString(log, "UnicodeDecodeError: ")
+        self.assertContainsString(log,
+            "Logging record unformattable: '\\xa7' % ()\n")
+
+    def test_log_bytes_arg(self):
+        logging.getLogger("bzr").debug("%s", "\xa7")
+        log = self.get_log()
+        self.assertContainsString(log, "UnicodeDecodeError: ")
+        self.assertContainsString(log,
+            "Logging record unformattable: '%s' % ('\\xa7',)\n")
+
+    def test_log_mixed_strings(self):
+        logging.getLogger("bzr").debug(u"%s", "\xa7")
+        log = self.get_log()
+        self.assertContainsString(log, "UnicodeDecodeError: ")
+        self.assertContainsString(log,
+            "Logging record unformattable: u'%s' % ('\\xa7',)\n")
+
+    def test_log_repr_broken(self):
+        class BadRepr(object):
+            def __repr__(self):
+                raise ValueError("Broken object")
+        logging.getLogger("bzr").debug("%s", BadRepr())
+        log = self.get_log()
+        self.assertContainsRe(log, "ValueError: Broken object\n")
+        self.assertContainsRe(log, "Logging record unformattable: '%s' % .*\n")
+
+
 class TestBzrLog(TestCaseInTempDir):
 
     def test_log_rollover(self):

=== modified file 'bzrlib/trace.py'
--- a/bzrlib/trace.py	2011-05-19 09:32:38 +0000
+++ b/bzrlib/trace.py	2011-11-30 22:45:49 +0000
@@ -54,7 +54,6 @@
 # increased cost of logging.py is not so bad, and we could standardize on
 # that.
 
-import codecs
 import logging
 import os
 import sys
@@ -116,8 +115,6 @@
 
     :return: None
     """
-    # FIXME note always emits utf-8, regardless of the terminal encoding
-    #
     # FIXME: clearing the ui and then going through the abstract logging
     # framework is whack; we should probably have a logging Handler that
     # deals with terminal output if needed.
@@ -308,7 +305,6 @@
     """
     start_time = osutils.format_local_date(_bzr_log_start_time,
                                            timezone='local')
-    # create encoded wrapper around stderr
     bzr_log_file = _open_bzr_log()
     if bzr_log_file is not None:
         bzr_log_file.write(start_time.encode('utf-8') + '\n')
@@ -317,11 +313,8 @@
         r'%Y-%m-%d %H:%M:%S')
     # after hooking output into bzr_log, we also need to attach a stderr
     # handler, writing only at level info and with encoding
-    term_encoding = osutils.get_terminal_encoding()
-    writer_factory = codecs.getwriter(term_encoding)
-    encoded_stderr = writer_factory(sys.stderr, errors='replace')
-    stderr_handler = logging.StreamHandler(encoded_stderr)
-    stderr_handler.setLevel(logging.INFO)
+    stderr_handler = EncodedStreamHandler(sys.stderr,
+        osutils.get_terminal_encoding(), 'replace', level=logging.INFO)
     logging.getLogger('bzr').addHandler(stderr_handler)
     return memento
 
@@ -336,8 +329,7 @@
     """
     global _trace_file
     # make a new handler
-    new_handler = logging.StreamHandler(to_file)
-    new_handler.setLevel(logging.DEBUG)
+    new_handler = EncodedStreamHandler(to_file, "utf-8", level=logging.DEBUG)
     if log_format is None:
         log_format = '%(levelname)8s  %(message)s'
     new_handler.setFormatter(logging.Formatter(log_format, date_format))
@@ -597,6 +589,49 @@
         _trace_file.flush()
 
 
+class EncodedStreamHandler(logging.Handler):
+    """Robustly write logging events to a stream using the specified encoding
+
+    Messages are expected to be formatted to unicode, but UTF-8 byte strings
+    are also accepted. An error during formatting or a str message in another
+    encoding will be quitely noted as an error in the Bazaar log file.
+
+    The stream is not closed so sys.stdout or sys.stderr may be passed.
+    """
+
+    def __init__(self, stream, encoding=None, errors='strict', level=0):
+        logging.Handler.__init__(self, level)
+        self.stream = stream
+        if encoding is None:
+            encoding = getattr(stream, "encoding", "ascii")
+        self.encoding = encoding
+        self.errors = errors
+
+    def flush(self):
+        flush = getattr(self.stream, "flush", None)
+        if flush is not None:
+            flush()
+
+    def emit(self, record):
+        try:
+            line = self.format(record)
+            if not isinstance(line, unicode):
+                line = line.decode("utf-8")
+            self.stream.write(line.encode(self.encoding, self.errors) + "\n")
+        except Exception:
+            log_exception_quietly()
+            # Try saving the details that would have been logged in some form
+            msg = args = "<Unformattable>"
+            try:
+                msg = repr(record.msg).encode("ascii")
+                args = repr(record.args).encode("ascii")
+            except Exception:
+                pass
+            # Using mutter() bypasses the logging module and writes directly
+            # to the file so there's no danger of getting into a loop here.
+            mutter("Logging record unformattable: %s %% %s", msg, args)
+
+
 class Config(object):
     """Configuration of message tracing in bzrlib.
 




More information about the bazaar-commits mailing list