Rev 4175: Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize(). in lp:///~jameinel/bzr/1.14-btree_spill
John Arbash Meinel
john at arbash-meinel.com
Mon Mar 23 19:35:50 GMT 2009
At lp:///~jameinel/bzr/1.14-btree_spill
------------------------------------------------------------
revno: 4175
revision-id: john at arbash-meinel.com-20090323193538-3d01aetz07jsyd3w
parent: john at arbash-meinel.com-20090323192057-eh1l34z1ab5x3qt4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.14-btree_spill
timestamp: Mon 2009-03-23 14:35:38 -0500
message:
Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().
Update the Packer code so that it sets combine_backing_indices=False, as we know that
we won't be making extra queries.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-03-23 19:20:57 +0000
+++ b/bzrlib/btree_index.py 2009-03-23 19:35:38 +0000
@@ -140,7 +140,6 @@
# Indicate it hasn't been built yet
self._nodes_by_key = None
self._optimize_for_size = False
- self._combine_spilled_indices = True
def add_node(self, key, value, references=()):
"""Add a node to the index.
@@ -181,7 +180,7 @@
combine mem with the first and second indexes, creating a new one of
size 4x. On the fifth create a single new one, etc.
"""
- if self._combine_spilled_indices:
+ if self._combine_backing_indices:
(new_backing_file, size,
backing_pos) = self._spill_mem_keys_and_combine()
else:
@@ -193,7 +192,7 @@
base_name, size)
# GC will clean up the file
new_backing._file = new_backing_file
- if self._combine_spilled_indices:
+ if self._combine_backing_indices:
if len(self._backing_indices) == backing_pos:
self._backing_indices.append(None)
self._backing_indices[backing_pos] = new_backing
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2009-02-23 15:29:35 +0000
+++ b/bzrlib/index.py 2009-03-23 19:35:38 +0000
@@ -99,6 +99,7 @@
self._nodes_by_key = None
self._key_length = key_elements
self._optimize_for_size = False
+ self._combine_backing_indices = True
def _check_key(self, key):
"""Raise BadIndexKey if key is not a valid key for this index."""
@@ -315,16 +316,23 @@
(len(result.getvalue()), expected_bytes))
return result
- def set_optimize(self, for_size=True):
+ def set_optimize(self, for_size=None, combine_backing_indices=None):
"""Change how the builder tries to optimize the result.
:param for_size: Tell the builder to try and make the index as small as
possible.
+ :param combine_backing_indices: If the builder spills to disk to save
+ memory, should the on-disk indices be combined. Set to True if you
+ are going to be probing the index, but to False if you are not. (If
+ you are not querying, then the time spent combining is wasted.)
:return: None
"""
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
# other builders do.
- self._optimize_for_size = for_size
+ if for_size is not None:
+ self._optimize_for_size = for_size
+ if combine_backing_indices is not None:
+ self._combine_backing_indices = combine_backing_indices
class GraphIndex(object):
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2009-03-16 05:33:31 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2009-03-23 19:35:38 +0000
@@ -725,8 +725,15 @@
def open_pack(self):
"""Open a pack for the pack we are creating."""
- return NewPack(self._pack_collection, upload_suffix=self.suffix,
+ new_pack = NewPack(self._pack_collection, upload_suffix=self.suffix,
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
+ # We know that we will process all nodes in order, and don't need to
+ # query, so don't combine any indices spilled to disk until we are done
+ new_pack.revision_index.set_optimize(combine_backing_indices=False)
+ new_pack.inventory_index.set_optimize(combine_backing_indices=False)
+ new_pack.text_index.set_optimize(combine_backing_indices=False)
+ new_pack.signature_index.set_optimize(combine_backing_indices=False)
+ return new_pack
def _update_pack_order(self, entries, index_to_pack_map):
"""Determine how we want our packs to be ordered.
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2009-03-23 19:20:57 +0000
+++ b/bzrlib/tests/test_btree_index.py 2009-03-23 19:35:38 +0000
@@ -433,7 +433,7 @@
def test_spill_index_stress_1_1_no_combine(self):
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
- builder._combine_spilled_indices = False
+ builder.set_optimize(for_size=False, combine_backing_indices=False)
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
builder.add_node(*nodes[0])
# Test the parts of the index that take up memory are doing so
@@ -510,6 +510,16 @@
self.assertTrue(builder._optimize_for_size)
builder.set_optimize(for_size=False)
self.assertFalse(builder._optimize_for_size)
+ # test that we can set combine_backing_indices without effecting
+ # _optimize_for_size
+ obj = object()
+ builder._optimize_for_size = obj
+ builder.set_optimize(combine_backing_indices=False)
+ self.assertFalse(builder._combine_backing_indices)
+ self.assertIs(obj, builder._optimize_for_size)
+ builder.set_optimize(combine_backing_indices=True)
+ self.assertTrue(builder._combine_backing_indices)
+ self.assertIs(obj, builder._optimize_for_size)
def test_spill_index_stress_2_2(self):
# test that references and longer keys don't confuse things.
More information about the bazaar-commits
mailing list