Rev 6035: (mbp) add ContentFilterTree decorator and use it for cat and export (Martin in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Jul 21 07:51:25 UTC 2011
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 6035 [merge]
revision-id: pqm at pqm.ubuntu.com-20110721075122-l9d0n4d3pk4p3hfg
parent: pqm at pqm.ubuntu.com-20110719130644-efx0i6dq30myjhmk
parent: mbp at canonical.com-20110721070805-1fne0y2fn8vnziwj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2011-07-21 07:51:22 +0000
message:
(mbp) add ContentFilterTree decorator and use it for cat and export (Martin
Pool)
added:
bzrlib/filter_tree.py filter_tree.py-20110614035703-dpvxzqjyrhoc47mt-1
bzrlib/tests/test_filter_tree.py test_filter_tree.py-20110614035703-dpvxzqjyrhoc47mt-2
modified:
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/export/__init__.py __init__.py-20051114235828-1ba62cb4062304e6
bzrlib/export/dir_exporter.py dir_exporter.py-20051114235828-b51397f56bc7b117
bzrlib/export/tar_exporter.py tar_exporter.py-20051114235828-1f6349a2f090a5d0
bzrlib/export/zip_exporter.py zip_exporter.py-20051114235828-8f57f954fba6497e
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/fixtures.py fixtures.py-20100514150609-1kpa1jqaciel01wn-1
bzrlib/tree.py tree.py-20050309040759-9d5f2496be663e77
doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py 2011-07-12 11:09:57 +0000
+++ b/bzrlib/builtins.py 2011-07-21 07:08:05 +0000
@@ -3073,6 +3073,10 @@
old_file_id = rev_tree.path2id(relpath)
+ # TODO: Split out this code to something that generically finds the
+ # best id for a path across one or more trees; it's like
+ # find_ids_across_trees but restricted to find just one. -- mbp
+ # 20110705.
if name_from_revision:
# Try in revision if requested
if old_file_id is None:
@@ -3080,41 +3084,26 @@
"%r is not present in revision %s" % (
filename, rev_tree.get_revision_id()))
else:
- content = rev_tree.get_file_text(old_file_id)
+ actual_file_id = old_file_id
else:
cur_file_id = tree.path2id(relpath)
- found = False
- if cur_file_id is not None:
- # Then try with the actual file id
- try:
- content = rev_tree.get_file_text(cur_file_id)
- found = True
- except errors.NoSuchId:
- # The actual file id didn't exist at that time
- pass
- if not found and old_file_id is not None:
- # Finally try with the old file id
- content = rev_tree.get_file_text(old_file_id)
- found = True
- if not found:
- # Can't be found anywhere
+ if cur_file_id is not None and rev_tree.has_id(cur_file_id):
+ actual_file_id = cur_file_id
+ elif old_file_id is not None:
+ actual_file_id = old_file_id
+ else:
raise errors.BzrCommandError(
"%r is not present in revision %s" % (
filename, rev_tree.get_revision_id()))
if filtered:
- from bzrlib.filters import (
- ContentFilterContext,
- filtered_output_bytes,
- )
- filters = rev_tree._content_filter_stack(relpath)
- chunks = content.splitlines(True)
- content = filtered_output_bytes(chunks, filters,
- ContentFilterContext(relpath, rev_tree))
- self.cleanup_now()
- self.outf.writelines(content)
+ from bzrlib.filter_tree import ContentFilterTree
+ filter_tree = ContentFilterTree(rev_tree,
+ rev_tree._content_filter_stack)
+ content = filter_tree.get_file_text(actual_file_id)
else:
- self.cleanup_now()
- self.outf.write(content)
+ content = rev_tree.get_file_text(actual_file_id)
+ self.cleanup_now()
+ self.outf.write(content)
class cmd_local_time_offset(Command):
=== modified file 'bzrlib/export/__init__.py'
--- a/bzrlib/export/__init__.py 2011-06-13 16:34:53 +0000
+++ b/bzrlib/export/__init__.py 2011-07-08 23:38:46 +0000
@@ -19,6 +19,8 @@
import os
import time
+import warnings
+
from bzrlib import (
errors,
pyutils,
@@ -58,10 +60,10 @@
When requesting a specific type of export, load the respective path.
"""
- def _loader(tree, dest, root, subdir, filtered, force_mtime, fileobj):
+ def _loader(tree, dest, root, subdir, force_mtime, fileobj):
func = pyutils.get_named_object(module, funcname)
- return func(tree, dest, root, subdir, filtered=filtered,
- force_mtime=force_mtime, fileobj=fileobj)
+ return func(tree, dest, root, subdir, force_mtime=force_mtime,
+ fileobj=fileobj)
register_exporter(scheme, extensions, _loader)
@@ -91,7 +93,8 @@
a directory to start exporting from.
:param filtered: If True, content filtering is applied to the exported
- files.
+ files. Deprecated in favour of passing a ContentFilterTree
+ as the source.
:param per_file_timestamps: Whether to use the timestamp stored in the tree
rather than now(). This will do a revision lookup for every file so
@@ -122,13 +125,20 @@
trace.mutter('export version %r', tree)
+ if filtered:
+ from bzrlib.filter_tree import ContentFilterTree
+ warnings.warn(
+ "passing filtered=True to export is deprecated in bzr 2.4",
+ stacklevel=2)
+ tree = ContentFilterTree(tree, tree._content_filter_stack)
+ # We don't want things re-filtered by the specific exporter.
+ filtered = False
+
+ tree.lock_read()
try:
- tree.lock_read()
-
- for _ in _exporters[format](tree, dest, root, subdir,
- filtered=filtered,
- force_mtime=force_mtime, fileobj=fileobj):
-
+ for _ in _exporters[format](
+ tree, dest, root, subdir,
+ force_mtime=force_mtime, fileobj=fileobj):
yield
finally:
tree.unlock()
@@ -153,7 +163,7 @@
the entire tree, and anything else should specify the relative path to
a directory to start exporting from.
:param filtered: If True, content filtering is applied to the
- files exported.
+ files exported. Deprecated in favor of passing an ContentFilterTree.
:param per_file_timestamps: Whether to use the timestamp stored in the
tree rather than now(). This will do a revision lookup
for every file so will be significantly slower.
=== modified file 'bzrlib/export/dir_exporter.py'
--- a/bzrlib/export/dir_exporter.py 2011-06-28 13:55:39 +0000
+++ b/bzrlib/export/dir_exporter.py 2011-07-04 21:55:35 +0000
@@ -21,13 +21,9 @@
from bzrlib import errors, osutils
from bzrlib.export import _export_iter_entries
-from bzrlib.filters import (
- ContentFilterContext,
- filtered_output_bytes,
- )
-
-
-def dir_exporter_generator(tree, dest, root, subdir=None, filtered=False,
+
+
+def dir_exporter_generator(tree, dest, root, subdir=None,
force_mtime=None, fileobj=None):
"""Return a generator that exports this tree to a new directory.
@@ -79,10 +75,6 @@
# the directories
flags = os.O_CREAT | os.O_TRUNC | os.O_WRONLY | getattr(os, 'O_BINARY', 0)
for (relpath, executable), chunks in tree.iter_files_bytes(to_fetch):
- if filtered:
- filters = tree._content_filter_stack(relpath)
- context = ContentFilterContext(relpath, tree, ie)
- chunks = filtered_output_bytes(chunks, filters, context)
fullpath = osutils.pathjoin(dest, relpath)
# We set the mode and let the umask sort out the file info
mode = 0666
=== modified file 'bzrlib/export/tar_exporter.py'
--- a/bzrlib/export/tar_exporter.py 2011-07-11 00:59:24 +0000
+++ b/bzrlib/export/tar_exporter.py 2011-07-21 07:08:05 +0000
@@ -14,7 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-"""Export a Tree to a non-versioned directory."""
+"""Export a tree to a tarball."""
import os
import StringIO
@@ -26,14 +26,9 @@
osutils,
)
from bzrlib.export import _export_iter_entries
-from bzrlib.filters import (
- ContentFilterContext,
- filtered_output_bytes,
- )
-
-
-def prepare_tarball_item(tree, root, final_path, entry, filtered=False,
- force_mtime=None):
+
+
+def prepare_tarball_item(tree, root, final_path, entry, force_mtime=None):
"""Prepare a tarball item for exporting
:param tree: Tree to export
@@ -42,8 +37,6 @@
:param entry: Entry to export
- :param filtered: Whether to apply filters
-
:param force_mtime: Option mtime to force, instead of using tree
timestamps.
@@ -61,17 +54,13 @@
item.mode = 0755
else:
item.mode = 0644
- if filtered:
- chunks = tree.get_file_lines(entry.file_id)
- filters = tree._content_filter_stack(final_path)
- context = ContentFilterContext(final_path, tree, entry)
- contents = filtered_output_bytes(chunks, filters, context)
- content = ''.join(contents)
- item.size = len(content)
- fileobj = StringIO.StringIO(content)
- else:
- item.size = tree.get_file_size(entry.file_id)
- fileobj = tree.get_file(entry.file_id)
+ # This brings the whole file into memory, but that's almost needed for
+ # the tarfile contract, which wants the size of the file up front. We
+ # want to make sure it doesn't change, and we need to read it in one
+ # go for content filtering.
+ content = tree.get_file_text(entry.file_id)
+ item.size = len(content)
+ fileobj = StringIO.StringIO(content)
elif entry.kind == "directory":
item.type = tarfile.DIRTYPE
item.name += '/'
@@ -90,8 +79,7 @@
return (item, fileobj)
-def export_tarball_generator(tree, ball, root, subdir=None, filtered=False,
- force_mtime=None):
+def export_tarball_generator(tree, ball, root, subdir=None, force_mtime=None):
"""Export tree contents to a tarball.
:returns: A generator that will repeatedly produce None as each file is
@@ -103,8 +91,6 @@
:param ball: Tarball to export to; it will be closed when writing is
complete.
- :param filtered: Whether to apply filters
-
:param subdir: Sub directory to export
:param force_mtime: Option mtime to force, instead of using tree
@@ -113,15 +99,15 @@
try:
for final_path, entry in _export_iter_entries(tree, subdir):
(item, fileobj) = prepare_tarball_item(
- tree, root, final_path, entry, filtered, force_mtime)
+ tree, root, final_path, entry, force_mtime)
ball.addfile(item, fileobj)
yield
finally:
ball.close()
-def tgz_exporter_generator(tree, dest, root, subdir, filtered=False,
- force_mtime=None, fileobj=None):
+def tgz_exporter_generator(tree, dest, root, subdir, force_mtime=None,
+ fileobj=None):
"""Export this tree to a new tar file.
`dest` will be created holding the contents of this tree; if it
@@ -161,7 +147,7 @@
zipstream = gzip.GzipFile(basename, 'w', fileobj=stream)
ball = tarfile.open(None, 'w|', fileobj=zipstream)
for _ in export_tarball_generator(
- tree, ball, root, subdir, filtered, force_mtime):
+ tree, ball, root, subdir, force_mtime):
yield
# Closing zipstream may trigger writes to stream
zipstream.close()
@@ -170,7 +156,7 @@
stream.close()
-def tbz_exporter_generator(tree, dest, root, subdir, filtered=False,
+def tbz_exporter_generator(tree, dest, root, subdir,
force_mtime=None, fileobj=None):
"""Export this tree to a new tar file.
@@ -189,12 +175,11 @@
# Python 2.6.5 and 2.7b1)
ball = tarfile.open(dest.encode(osutils._fs_enc), 'w:bz2')
return export_tarball_generator(
- tree, ball, root, subdir, filtered, force_mtime)
+ tree, ball, root, subdir, force_mtime)
def plain_tar_exporter_generator(tree, dest, root, subdir, compression=None,
- filtered=False, force_mtime=None,
- fileobj=None):
+ force_mtime=None, fileobj=None):
"""Export this tree to a new tar file.
`dest` will be created holding the contents of this tree; if it
@@ -208,16 +193,16 @@
stream = open(dest, 'wb')
ball = tarfile.open(None, 'w|', stream)
return export_tarball_generator(
- tree, ball, root, subdir, filtered, force_mtime)
-
-
-def tar_xz_exporter_generator(tree, dest, root, subdir, filtered=False,
+ tree, ball, root, subdir, force_mtime)
+
+
+def tar_xz_exporter_generator(tree, dest, root, subdir,
force_mtime=None, fileobj=None):
- return tar_lzma_exporter_generator(tree, dest, root, subdir, filtered,
+ return tar_lzma_exporter_generator(tree, dest, root, subdir,
force_mtime, fileobj, "xz")
-def tar_lzma_exporter_generator(tree, dest, root, subdir, filtered=False,
+def tar_lzma_exporter_generator(tree, dest, root, subdir,
force_mtime=None, fileobj=None,
compression_format="alone"):
"""Export this tree to a new .tar.lzma file.
@@ -240,4 +225,4 @@
options={"format": compression_format})
ball = tarfile.open(None, 'w:', fileobj=stream)
return export_tarball_generator(
- tree, ball, root, subdir, filtered=filtered, force_mtime=force_mtime)
+ tree, ball, root, subdir, force_mtime=force_mtime)
=== modified file 'bzrlib/export/zip_exporter.py'
--- a/bzrlib/export/zip_exporter.py 2011-06-13 16:34:53 +0000
+++ b/bzrlib/export/zip_exporter.py 2011-07-04 21:55:35 +0000
@@ -27,10 +27,6 @@
osutils,
)
from bzrlib.export import _export_iter_entries
-from bzrlib.filters import (
- ContentFilterContext,
- filtered_output_bytes,
- )
from bzrlib.trace import mutter
@@ -44,7 +40,7 @@
_DIR_ATTR = stat.S_IFDIR | ZIP_DIRECTORY_BIT | DIR_PERMISSIONS
-def zip_exporter_generator(tree, dest, root, subdir=None, filtered=False,
+def zip_exporter_generator(tree, dest, root, subdir=None,
force_mtime=None, fileobj=None):
""" Export this tree to a new zip file.
@@ -77,14 +73,7 @@
date_time=date_time)
zinfo.compress_type = compression
zinfo.external_attr = _FILE_ATTR
- if filtered:
- chunks = tree.get_file_lines(file_id)
- filters = tree._content_filter_stack(dp)
- context = ContentFilterContext(dp, tree, ie)
- contents = filtered_output_bytes(chunks, filters, context)
- content = ''.join(contents)
- else:
- content = tree.get_file_text(file_id)
+ content = tree.get_file_text(file_id)
zipf.writestr(zinfo, content)
elif ie.kind == "directory":
# Directories must contain a trailing slash, to indicate
=== added file 'bzrlib/filter_tree.py'
--- a/bzrlib/filter_tree.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/filter_tree.py 2011-07-04 21:10:37 +0000
@@ -0,0 +1,82 @@
+# Copyright (C) 2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Content-filtered view of any tree.
+"""
+
+
+from bzrlib import (
+ tree,
+ )
+from bzrlib.filters import (
+ ContentFilter,
+ ContentFilterContext,
+ filtered_input_file,
+ filtered_output_bytes,
+ _get_filter_stack_for,
+ _get_registered_names,
+ internal_size_sha_file_byname,
+ register_filter_stack_map,
+ )
+
+
+class ContentFilterTree(tree.Tree):
+ """A virtual tree that applies content filters to an underlying tree.
+
+ Not every operation is supported yet.
+ """
+
+ def __init__(self, backing_tree, filter_stack_callback):
+ """Construct a new filtered tree view.
+
+ :param filter_stack_callback: A callable taking a path that returns
+ the filter stack that should be used for that path.
+ :param backing_tree: An underlying tree to wrap.
+ """
+ self.backing_tree = backing_tree
+ self.filter_stack_callback = filter_stack_callback
+
+ def get_file_text(self, file_id, path=None):
+ chunks = self.backing_tree.get_file_lines(file_id, path)
+ filters = self.filter_stack_callback(path)
+ if path is None:
+ path = self.backing_tree.id2path(file_id)
+ context = ContentFilterContext(path, self, None)
+ contents = filtered_output_bytes(chunks, filters, context)
+ content = ''.join(contents)
+ return content
+
+ def has_filename(self, filename):
+ return self.backing_tree.has_filename
+
+ def is_executable(self, file_id, path=None):
+ return self.backing_tree.is_executable(file_id, path)
+
+ def iter_entries_by_dir(self, specific_file_ids=None, yield_parents=None):
+ # NB: This simply returns the parent tree's entries; the length may be
+ # wrong but it can't easily be calculated without filtering the whole
+ # text. Currently all callers cope with this; perhaps they should be
+ # updated to a narrower interface that only provides things guaranteed
+ # cheaply available across all trees. -- mbp 20110705
+ return self.backing_tree.iter_entries_by_dir(
+ specific_file_ids=specific_file_ids,
+ yield_parents=yield_parents)
+
+ def lock_read(self):
+ return self.backing_tree.lock_read()
+
+ def unlock(self):
+ return self.backing_tree.unlock()
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2011-07-15 09:22:16 +0000
+++ b/bzrlib/tests/__init__.py 2011-07-21 07:08:05 +0000
@@ -16,12 +16,6 @@
"""Testing framework extensions"""
-# TODO: Perhaps there should be an API to find out if bzr running under the
-# test suite -- some plugins might want to avoid making intrusive changes if
-# this is the case. However, we want behaviour under to test to diverge as
-# little as possible, so this should be used rarely if it's added at all.
-# (Suggestion from j-a-meinel, 2005-11-24)
-
# NOTE: Some classes in here use camelCaseNaming() rather than
# underscore_naming(). That's for consistency with unittest; it's not the
# general style of bzrlib. Please continue that consistency when adding e.g.
@@ -3896,6 +3890,7 @@
'bzrlib.tests.test_fixtures',
'bzrlib.tests.test_fifo_cache',
'bzrlib.tests.test_filters',
+ 'bzrlib.tests.test_filter_tree',
'bzrlib.tests.test_ftp_transport',
'bzrlib.tests.test_foreign',
'bzrlib.tests.test_generate_docs',
=== modified file 'bzrlib/tests/fixtures.py'
--- a/bzrlib/tests/fixtures.py 2011-02-09 06:36:35 +0000
+++ b/bzrlib/tests/fixtures.py 2011-07-04 20:17:44 +0000
@@ -125,3 +125,16 @@
source.set_last_revision_info(1, 'rev-1')
return source
+
+def make_branch_and_populated_tree(testcase):
+ """Make a simple branch and tree.
+
+ The tree holds some added but uncommitted files.
+ """
+ # TODO: Either accept or return the names of the files, so the caller
+ # doesn't need to be bound to the particular files created? -- mbp
+ # 20110705
+ tree = testcase.make_branch_and_tree('t')
+ testcase.build_tree_contents([('t/hello', 'hello world')])
+ tree.add(['hello'], ['hello-id'])
+ return tree
=== added file 'bzrlib/tests/test_filter_tree.py'
--- a/bzrlib/tests/test_filter_tree.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_filter_tree.py 2011-07-04 21:15:24 +0000
@@ -0,0 +1,68 @@
+# Copyright (C) 2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Tests for ContentFilterTree"""
+
+import tarfile
+import zipfile
+
+from bzrlib import (
+ export,
+ filter_tree,
+ tests,
+ )
+from bzrlib.tests import (
+ fixtures,
+ )
+from bzrlib.tests.test_filters import _stack_1
+
+
+class TestFilterTree(tests.TestCaseWithTransport):
+
+ def make_tree(self):
+ self.underlying_tree = fixtures.make_branch_and_populated_tree(
+ self)
+ def stack_callback(path):
+ return _stack_1
+ self.filter_tree = filter_tree.ContentFilterTree(
+ self.underlying_tree, stack_callback)
+ return self.filter_tree
+
+ def test_get_file_text(self):
+ self.make_tree()
+ self.assertEquals(
+ self.underlying_tree.get_file_text('hello-id'),
+ 'hello world')
+ self.assertEquals(
+ self.filter_tree.get_file_text('hello-id'),
+ 'HELLO WORLD')
+
+ def test_tar_export_content_filter_tree(self):
+ # TODO: this could usefully be run generically across all exporters.
+ self.make_tree()
+ export.export(self.filter_tree, "out.tgz")
+ ball = tarfile.open("out.tgz", "r:gz")
+ self.assertEquals(
+ 'HELLO WORLD',
+ ball.extractfile('out/hello').read())
+
+ def test_zip_export_content_filter_tree(self):
+ self.make_tree()
+ export.export(self.filter_tree, 'out.zip')
+ zipf = zipfile.ZipFile('out.zip', 'r')
+ self.assertEquals(
+ 'HELLO WORLD',
+ zipf.read('out/hello'))
=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py 2011-06-19 02:24:39 +0000
+++ b/bzrlib/tree.py 2011-07-04 20:17:44 +0000
@@ -277,8 +277,11 @@
:param file_id: The file_id of the file.
:param path: The path of the file.
+
If both file_id and path are supplied, an implementation may use
either one.
+
+ :returns: A single byte string for the whole file.
"""
my_file = self.get_file(file_id, path)
try:
=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt 2011-07-19 13:06:44 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt 2011-07-21 07:08:05 +0000
@@ -89,6 +89,12 @@
* Remove ``TransportListRegistry.set_default_transport``, as the concept of
a default transport is currently unused. (Jelmer Vernooij)
+* There is a new class `ContentFilterTree` that provides a facade for
+ content filtering. The `filtered` parameter to `export` is deprecated
+ in favor of passing a filtered tree, and the specific exporter plugins
+ no longer support it.
+ (Martin Pool)
+
Internals
*********
More information about the bazaar-commits
mailing list