Rev 43: Slightly different handling of large texts. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/internal_index
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 5 03:47:25 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/internal_index
------------------------------------------------------------
revno: 43
revision-id: john at arbash-meinel.com-20090305034657-t3qbsogy187yul4z
parent: john at arbash-meinel.com-20090305032949-ffww56phklv1vhbj
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: internal_index
timestamp: Wed 2009-03-04 21:46:57 -0600
message:
Slightly different handling of large texts.
We should only use 2*max_fulltext as a minimum size if we are still working
on the same file. That allows us to avoid packing all texts in
after an ISO.
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-03-05 03:29:49 +0000
+++ b/groupcompress.py 2009-03-05 03:46:57 +0000
@@ -766,6 +766,7 @@
last_prefix = None
last_fulltext_len = None
max_fulltext_len = 0
+ max_fulltext_prefix = None
for record in stream:
# Raise an error when a record is missing.
if record.storage_kind == 'absent':
@@ -778,20 +779,30 @@
bytes = adapter.get_bytes(record)
if len(record.key) > 1:
prefix = record.key[0]
+ soft = (prefix == last_prefix)
else:
prefix = None
- max_fulltext_len = max(max_fulltext_len, len(bytes))
+ soft = False
+ if max_fulltext_len < len(bytes):
+ max_fulltext_len = len(bytes)
+ max_fulltext_prefix = prefix
(found_sha1, end_point, type,
length) = self._compressor.compress(record.key,
- bytes, record.sha1)
+ bytes, record.sha1, soft=soft)
+ # delta_ratio = float(len(bytes)) / length
# Check if we want to continue to include that text
- start_new_block = False
- if end_point > 2 * max_fulltext_len:
- if end_point > 4*1024*1024:
- start_new_block = True
- elif (prefix is not None and prefix != last_prefix
- and end_point > 2*1024*1024):
- start_new_block = True
+ if (prefix == max_fulltext_prefix
+ and end_point < 2 * max_fulltext_len):
+ # As long as we are on the same file_id, we will fill at least
+ # 2 * max_fulltext_len
+ start_new_block = False
+ elif end_point > 4*1024*1024:
+ start_new_block = True
+ elif (prefix is not None and prefix != last_prefix
+ and end_point > 2*1024*1024):
+ start_new_block = True
+ else:
+ start_new_block = False
# if type == 'fulltext':
# # If this is the first text, we don't do anything
# if self._compressor.num_keys > 1:
More information about the bazaar-commits
mailing list