Rev 5084: Use a bencode string for the meta-info. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
John Arbash Meinel
john at arbash-meinel.com
Fri Mar 5 19:41:14 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
------------------------------------------------------------
revno: 5084
revision-id: john at arbash-meinel.com-20100305194041-psk80jrojuznuzrt
parent: john at arbash-meinel.com-20100305184828-7w4d0sz875k13ws5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Fri 2010-03-05 13:40:41 -0600
message:
Use a bencode string for the meta-info.
The main reason is that we can then copy this string verbatim into the
overall pack-names file. And it is nice to have them be identical,
rather than just representing the same stuff.
-------------- next part --------------
=== modified file 'bzrlib/sack.py'
--- a/bzrlib/sack.py 2010-03-05 18:48:28 +0000
+++ b/bzrlib/sack.py 2010-03-05 19:40:41 +0000
@@ -24,8 +24,9 @@
import struct
from bzrlib import (
+ bencode,
+ btree_index,
errors,
- btree_index,
)
_HEADER_BASE = '\nBazaar Sack v'
@@ -38,7 +39,7 @@
It describes the version of the file (also present at the beginning),
as well as some basic information about where data can be found.
- | header | btree index | 8-byte start-of-header | 4-byte version |
+ | header | index info | 8-byte start-of-header | 4-byte version |
The last two records are using fixed-width MSB encoding, so that we always
know how much to parse.
@@ -50,14 +51,14 @@
def __init__(self, start_offset):
self.start_offset = start_offset
self.version = _VERSION
- self._index_builder = btree_index.BTreeBuilder(reference_lists=0,
- key_elements=1)
+ self._index_info = {}
def add_index_info(self, index_type, start, length):
# Note: bzr-search uses a ContainerWriter to write out the bytes, and
# then adjusts the offsets so that it skips the 'Pack' overhead bytes.
# I guess I don't really see the benefit versus the crufty overhead...
- self._index_builder.add_node((index_type,), '%d %d' % (start, length))
+ assert index_type not in self._index_info
+ self._index_info[index_type] = (start, length)
def finish(self):
# TODO: Perhaps this should be more like BTreeBuilder and return a
@@ -67,8 +68,10 @@
# about memory pressure, etc.
chunks = []
chunks.append('%s%d\n' % (_HEADER_BASE, self.version))
- chunks.append(self._index_builder.finish().read())
- self._index_builder = None
+ # TODO: Should this bencode chunk be zlib compressed? I don't expect
+ # it will be particularly long, but it is ascii, and probably
+ # will compress well.
+ chunks.append(bencode.bencode(self._index_info))
chunks.append(struct.pack('!QI', self.start_offset, self.version))
return ''.join(chunks)
@@ -99,20 +102,14 @@
def _read_named_sections(self, end_of_file):
expected_header = '%s%d\n' % (_HEADER_BASE, self.version)
- _, start = self._transport.readv(self._filename,
- [(self.start_offset, len(expected_header))]).next()
- assert start == expected_header
- root_start = self.start_offset + len(expected_header)
- root_end = end_of_file - 12
- self._section_file_map['root-index'] = (root_start, root_end)
- named_sections = btree_index.BTreeGraphIndex(self._transport,
- self._filename, root_end - root_start, offset=root_start)
+ _, tail = self._transport.readv(self._filename,
+ [(self.start_offset, end_of_file - self.start_offset)]).next()
+ assert tail.startswith(expected_header)
+ index_info_bytes = tail[len(expected_header):-12]
+ index_info = bencode.bdecode_as_tuple(index_info_bytes)
+ assert type(index_info) is dict
# Ensure that we have entries
- for _, key, value in named_sections.iter_all_entries():
- start, length = map(int, value.split())
- assert len(key) == 1
- name, = key
- self._section_file_map[name] = (start, start+length)
+ self._section_file_map.update(index_info)
@staticmethod
def parse_tail_bytes(bytes):
@@ -141,10 +138,17 @@
:param **kwargs: Any other named arguments will be passed to the index
constructor
"""
- start, end = self._section_file_map[name]
- return index_class(self._transport, self._filename, size=(end-start),
+ start, length = self._section_file_map[name]
+ return index_class(self._transport, self._filename, size=length,
**kwargs)
+ def get_indicies_memo(self):
+ """Get a string giving the hints about where indices are located.
+
+ This is used to aggregate indices across separate pack files into a
+ single meta-index. (eg 'pack-names').
+ """
+
class Sack(object):
"""A self-contained pack file.
=== modified file 'bzrlib/tests/test_sack.py'
--- a/bzrlib/tests/test_sack.py 2010-03-04 23:04:18 +0000
+++ b/bzrlib/tests/test_sack.py 2010-03-05 19:40:41 +0000
@@ -19,6 +19,7 @@
import struct
from bzrlib import (
+ bencode,
btree_index,
errors,
sack,
@@ -61,7 +62,8 @@
self.assertEqual(ti.version, version)
self.assertEqual(ti.start_offset, offset)
index_bytes = content[len(header):-12]
- assert_btree_matches(self, index_content, index_bytes)
+ index_dict = bencode.bdecode_as_tuple(index_bytes)
+ self.assertEqual(index_content, index_dict)
def test_tail_info(self):
self.assertAsBytes({}, sack.TrailingIndexBuilder(0))
@@ -77,9 +79,9 @@
builder.add_index_info('revisions', 0, 100)
builder.add_index_info('inventories', 100, 50)
builder.add_index_info('texts', 150, 350)
- self.assertAsBytes({('revisions',): ('0 100',),
- ('inventories',): ('100 50',),
- ('texts',): ('150 350',),
+ self.assertAsBytes({'revisions': (0, 100),
+ 'inventories': (100, 50),
+ 'texts': (150, 350),
}, builder)
@@ -109,9 +111,7 @@
t.put_bytes('test.sack', ' '*500 + content)
ti = sack.TrailingIndex.from_transport(t, 'test.sack')
# We skip the 16-byte header at the beginning, and the 12-byte tail
- self.assertEqual({'root-index': (516, 500+len(content)-12),
- 'texts': (150, 500),
- }, ti._section_file_map)
+ self.assertEqual({'texts': (150, 350)}, ti._section_file_map)
def test_get_named_index(self):
index_builder = btree_index.BTreeBuilder(0, 1)
@@ -122,13 +122,12 @@
trailing_builder = sack.TrailingIndexBuilder(
start_offset=trail_start)
trailing_builder.add_index_info('texts', 0, trail_start)
- content = text_idx_content + trailing_builder.finish()
+ trailing_content = trailing_builder.finish()
+ content = text_idx_content + trailing_content
t = memory.MemoryTransport('')
t.put_bytes('test.sack', content)
ti = sack.TrailingIndex.from_transport(t, 'test.sack')
- self.assertEqual({'root-index': (trail_start+16, len(content)-12),
- 'texts': (0, trail_start),
- }, ti._section_file_map)
+ self.assertEqual({'texts': (0, trail_start)}, ti._section_file_map)
text_index = ti.get_named_index('texts', btree_index.BTreeGraphIndex)
assert_index_content(self, {('key1',): ('value1',),
('key2',): ('value2',),
More information about the bazaar-commits
mailing list