Rev 32: Play around with doing deltas and then packing them after the fact. in http://bzr.arbash-meinel.com/plugins/xdelta_test
John Arbash Meinel
john at arbash-meinel.com
Fri Jun 22 16:49:05 BST 2007
At http://bzr.arbash-meinel.com/plugins/xdelta_test
------------------------------------------------------------
revno: 32
revision-id: john at arbash-meinel.com-20070622154902-5rvjc20mrrt373zg
parent: john at arbash-meinel.com-20070621142809-r7oxr1k8og1vln4j
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: xdelta_test
timestamp: Fri 2007-06-22 10:49:02 -0500
message:
Play around with doing deltas and then packing them after the fact.
modified:
bench_important_algorithms/bench_all_texts.py bench_all_texts.py-20070528175343-sl9y3m4xrylwr5n9-3
compression_algorithms.py compression_algorith-20070528163119-4mg41krgj6fz5xen-2
-------------- next part --------------
=== modified file 'bench_important_algorithms/bench_all_texts.py'
--- a/bench_important_algorithms/bench_all_texts.py 2007-05-29 14:31:26 +0000
+++ b/bench_important_algorithms/bench_all_texts.py 2007-06-22 15:49:02 +0000
@@ -14,6 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+import bz2
import sys
plugin_name = __name__.rsplit('.', 2)[0]
@@ -102,6 +103,27 @@
self._size_to_bytes(total_comp_len-fulltext_len),
))
+ def double_compress_and_report(self):
+ texts = self.get_all_texts()
+ ancestry_graph = self.get_ancestry_graph()
+ version_to_offsets = self.get_version_to_offsets()
+ base_versions = self.base_algorithm(ancestry_graph, version_to_offsets,
+ texts)
+
+ total_len = sum(len(text) for text in texts)
+ compressed = self.time(self._compress_all_texts_in_mem,
+ texts, version_to_offsets, base_versions)
+ final = bz2.compress(''.join(compressed))
+ total_comp_len = len(final)
+ fulltexts = [v for v, b in base_versions.iteritems() if b is None]
+ sys.stdout.write('\n %9s => %9s, %5.0f:1 (%d full, %d delta)\t'
+ % (self._size_to_bytes(total_len),
+ self._size_to_bytes(total_comp_len),
+ round(float(total_len) / total_comp_len),
+ len(fulltexts),
+ len(compressed) - len(fulltexts),
+ ))
+
def decompress_and_report(self, texts, base_offsets):
texts = self.get_all_texts()
ancestry_graph = self.get_ancestry_graph()
@@ -139,3 +161,7 @@
texts, base_offsets = self.get_lh_parent_base()
self.decompress_and_report(texts, base_offsets)
texts = self.get_all_texts()
+
+ # def test_compress_twice(self):
+ # """Compress the texts linearly, and then run bzip2 over the lot."""
+ # self.double_compress_and_report()
=== modified file 'compression_algorithms.py'
--- a/compression_algorithms.py 2007-05-29 17:51:11 +0000
+++ b/compression_algorithms.py 2007-06-22 15:49:02 +0000
@@ -360,6 +360,9 @@
important_algorithms = [CompressionAlgorithm.find_algorithm(x) for x in
- ['xd3-default', 'xd3-djw', 'xd3-NOCOMPRESS+zlib',
+ ['xd3-default', 'xd3-djw',
+ #'xd3-NOCOMPRESS',
+ 'xd3-NOCOMPRESS+zlib',
+ #'bdiff-one',
'bdiff-one+zlib', 'bdiff-multi+zlib',
]]
More information about the bazaar-commits
mailing list