Rev 5075: Add an offset flag to BTreeGraphIndex. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-btree-offset
John Arbash Meinel
john at arbash-meinel.com
Fri Mar 5 17:21:54 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-btree-offset
------------------------------------------------------------
revno: 5075
revision-id: john at arbash-meinel.com-20100305172120-fmc6vu6kdqqcnq5t
parent: pqm at pqm.ubuntu.com-20100303113037-51ffw5xyk93yzgl0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-btree-offset
timestamp: Fri 2010-03-05 11:21:20 -0600
message:
Add an offset flag to BTreeGraphIndex.
For the combined pack-and-index case, we need to be able to
start a btree index at an arbitrary offset. Robert did this
in bzr-search using a 'FileView' Transport abstraction. However,
this seems a bit abstract versus just telling the btree it
needs to look at offset X.
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2010-02-17 17:11:16 +0000
+++ b/bzrlib/btree_index.py 2010-03-05 17:21:20 +0000
@@ -647,7 +647,8 @@
memory except when very large walks are done.
"""
- def __init__(self, transport, name, size, unlimited_cache=False):
+ def __init__(self, transport, name, size, unlimited_cache=False,
+ offset=0):
"""Create a B+Tree index object on the index name.
:param transport: The transport to read data for the index from.
@@ -660,6 +661,8 @@
:param unlimited_cache: If set to True, then instead of using an
LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always
cache all leaf nodes.
+ :param offset: The start of the btree index data isn't byte 0 of the
+ file. Instead it starts at some point later.
"""
self._transport = transport
self._name = name
@@ -667,6 +670,7 @@
self._file = None
self._recommended_pages = self._compute_recommended_pages()
self._root_node = None
+ self._base_offset = offset
# Default max size is 100,000 leave values
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
if unlimited_cache:
@@ -1494,8 +1498,9 @@
# list of (offset, length) regions of the file that should, evenually
# be read in to data_ranges, either from 'bytes' or from the transport
ranges = []
+ base_offset = self._base_offset
for index in nodes:
- offset = index * _PAGE_SIZE
+ offset = (index * _PAGE_SIZE)
size = _PAGE_SIZE
if index == 0:
# Root node - special case
@@ -1505,9 +1510,11 @@
# The only case where we don't know the size, is for very
# small indexes. So we read the whole thing
bytes = self._transport.get_bytes(self._name)
- self._size = len(bytes)
+ num_bytes = len(bytes)
+ self._size = num_bytes - base_offset
# the whole thing should be parsed out of 'bytes'
- ranges.append((0, len(bytes)))
+ ranges = [(start, min(_PAGE_SIZE, num_bytes - start))
+ for start in xrange(base_offset, num_bytes, _PAGE_SIZE)]
break
else:
if offset > self._size:
@@ -1515,13 +1522,13 @@
' of the file %s > %s'
% (offset, self._size))
size = min(size, self._size - offset)
- ranges.append((offset, size))
+ ranges.append((base_offset + offset, size))
if not ranges:
return
elif bytes is not None:
# already have the whole file
- data_ranges = [(start, bytes[start:start+_PAGE_SIZE])
- for start in xrange(0, len(bytes), _PAGE_SIZE)]
+ data_ranges = [(start, bytes[start:start+size])
+ for start, size in ranges]
elif self._file is None:
data_ranges = self._transport.readv(self._name, ranges)
else:
@@ -1530,6 +1537,7 @@
self._file.seek(offset)
data_ranges.append((offset, self._file.read(size)))
for offset, data in data_ranges:
+ offset -= base_offset
if offset == 0:
# extract the header
offset, data = self._parse_header_from_bytes(data)
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2010-02-23 07:43:11 +0000
+++ b/bzrlib/tests/test_btree_index.py 2010-03-05 17:21:20 +0000
@@ -611,6 +611,18 @@
size = trans.put_file('index', stream)
return btree_index.BTreeGraphIndex(trans, 'index', size)
+ def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
+ offset=0):
+ builder = btree_index.BTreeBuilder(key_elements=key_elements,
+ reference_lists=ref_lists)
+ builder.add_nodes(nodes)
+ transport = self.get_transport('')
+ content = builder.finish().read()
+ size = len(content)
+ transport.put_bytes('index', (' '*offset)+content)
+ return btree_index.BTreeGraphIndex(transport, 'index', size=size,
+ offset=offset)
+
def test_clear_cache(self):
nodes = self.make_nodes(160, 2, 2)
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
@@ -686,6 +698,25 @@
transport._activity)
self.assertEqual(1173, size)
+ def test_with_offset_no_size(self):
+ index = self.make_index_with_offset(key_elements=1, ref_lists=1,
+ offset=1234,
+ nodes=self.make_nodes(200, 1, 1))
+ index._size = None # throw away the size info
+ self.assertEqual(200, index.key_count())
+
+ def test_with_small_offset(self):
+ index = self.make_index_with_offset(key_elements=1, ref_lists=1,
+ offset=1234,
+ nodes=self.make_nodes(200, 1, 1))
+ self.assertEqual(200, index.key_count())
+
+ def test_with_large_offset(self):
+ index = self.make_index_with_offset(key_elements=1, ref_lists=1,
+ offset=123456,
+ nodes=self.make_nodes(200, 1, 1))
+ self.assertEqual(200, index.key_count())
+
def test__read_nodes_no_size_one_page_reads_once(self):
self.make_index(nodes=[(('key',), 'value', ())])
trans = get_transport('trace+' + self.get_url())
More information about the bazaar-commits
mailing list