Rev 5084: Use a bencode string for the meta-info. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

Fri Mar 5 19:41:14 GMT 2010

At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5084
revision-id: john at arbash-meinel.com-20100305194041-psk80jrojuznuzrt
parent: john at arbash-meinel.com-20100305184828-7w4d0sz875k13ws5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Fri 2010-03-05 13:40:41 -0600
message:
  Use a bencode string for the meta-info.
  
  The main reason is that we can then copy this string verbatim into the
  overall pack-names file. And it is nice to have them be identical,
  rather than just representing the same stuff.
-------------- next part --------------
=== modified file 'bzrlib/sack.py'

--- a/bzrlib/sack.py	2010-03-05 18:48:28 +0000
+++ b/bzrlib/sack.py	2010-03-05 19:40:41 +0000
@@ -24,8 +24,9 @@
 import struct
 
 from bzrlib import (
+    bencode,
+    btree_index,
     errors,
-    btree_index,
     )
 
 _HEADER_BASE = '\nBazaar Sack v'
@@ -38,7 +39,7 @@
     It describes the version of the file (also present at the beginning),
     as well as some basic information about where data can be found.
 
-    | header | btree index | 8-byte start-of-header | 4-byte version |
+    | header | index info | 8-byte start-of-header | 4-byte version |
 
     The last two records are using fixed-width MSB encoding, so that we always
     know how much to parse.
@@ -50,14 +51,14 @@
     def __init__(self, start_offset):
         self.start_offset = start_offset
         self.version = _VERSION
-        self._index_builder = btree_index.BTreeBuilder(reference_lists=0,
-                                                       key_elements=1)
+        self._index_info = {}
 
     def add_index_info(self, index_type, start, length):
         # Note: bzr-search uses a ContainerWriter to write out the bytes, and
         # then adjusts the offsets so that it skips the 'Pack' overhead bytes.
         # I guess I don't really see the benefit versus the crufty overhead...
-        self._index_builder.add_node((index_type,), '%d %d' % (start, length))
+        assert index_type not in self._index_info
+        self._index_info[index_type] = (start, length)
 
     def finish(self):
         # TODO: Perhaps this should be more like BTreeBuilder and return a
@@ -67,8 +68,10 @@
         #       about memory pressure, etc.
         chunks = []
         chunks.append('%s%d\n' % (_HEADER_BASE, self.version))
-        chunks.append(self._index_builder.finish().read())
-        self._index_builder = None
+        # TODO: Should this bencode chunk be zlib compressed? I don't expect
+        #       it will be particularly long, but it is ascii, and probably
+        #       will compress well.
+        chunks.append(bencode.bencode(self._index_info))
         chunks.append(struct.pack('!QI', self.start_offset, self.version))
         return ''.join(chunks)
 
@@ -99,20 +102,14 @@
 
     def _read_named_sections(self, end_of_file):
         expected_header = '%s%d\n' % (_HEADER_BASE, self.version)
-        _, start = self._transport.readv(self._filename,
-            [(self.start_offset, len(expected_header))]).next()
-        assert start == expected_header
-        root_start = self.start_offset + len(expected_header)
-        root_end = end_of_file - 12
-        self._section_file_map['root-index'] = (root_start, root_end)
-        named_sections = btree_index.BTreeGraphIndex(self._transport,
-            self._filename, root_end - root_start, offset=root_start)
+        _, tail = self._transport.readv(self._filename,
+            [(self.start_offset, end_of_file - self.start_offset)]).next()
+        assert tail.startswith(expected_header)
+        index_info_bytes = tail[len(expected_header):-12]
+        index_info = bencode.bdecode_as_tuple(index_info_bytes)
+        assert type(index_info) is dict
         # Ensure that we have entries
-        for _, key, value in named_sections.iter_all_entries():
-            start, length = map(int, value.split())
-            assert len(key) == 1
-            name, = key
-            self._section_file_map[name] = (start, start+length)
+        self._section_file_map.update(index_info)
 
     @staticmethod
     def parse_tail_bytes(bytes):
@@ -141,10 +138,17 @@
         :param **kwargs: Any other named arguments will be passed to the index
             constructor
         """
-        start, end = self._section_file_map[name]
-        return index_class(self._transport, self._filename, size=(end-start),
+        start, length = self._section_file_map[name]
+        return index_class(self._transport, self._filename, size=length,
                            **kwargs)
 
+    def get_indicies_memo(self):
+        """Get a string giving the hints about where indices are located.
+
+        This is used to aggregate indices across separate pack files into a
+        single meta-index. (eg 'pack-names').
+        """
+
 
 class Sack(object):
     """A self-contained pack file.

=== modified file 'bzrlib/tests/test_sack.py'
--- a/bzrlib/tests/test_sack.py	2010-03-04 23:04:18 +0000
+++ b/bzrlib/tests/test_sack.py	2010-03-05 19:40:41 +0000
@@ -19,6 +19,7 @@
 import struct
 
 from bzrlib import (
+    bencode,
     btree_index,
     errors,
     sack,
@@ -61,7 +62,8 @@
         self.assertEqual(ti.version, version)
         self.assertEqual(ti.start_offset, offset)
         index_bytes = content[len(header):-12]
-        assert_btree_matches(self, index_content, index_bytes)
+        index_dict = bencode.bdecode_as_tuple(index_bytes)
+        self.assertEqual(index_content, index_dict)
 
     def test_tail_info(self):
         self.assertAsBytes({}, sack.TrailingIndexBuilder(0))
@@ -77,9 +79,9 @@
         builder.add_index_info('revisions', 0, 100)
         builder.add_index_info('inventories', 100, 50)
         builder.add_index_info('texts', 150, 350)
-        self.assertAsBytes({('revisions',): ('0 100',),
-                            ('inventories',): ('100 50',),
-                            ('texts',): ('150 350',),
+        self.assertAsBytes({'revisions': (0, 100),
+                            'inventories': (100, 50),
+                            'texts': (150, 350),
                            }, builder)
 
 
@@ -109,9 +111,7 @@
         t.put_bytes('test.sack', ' '*500 + content)
         ti = sack.TrailingIndex.from_transport(t, 'test.sack')
         # We skip the 16-byte header at the beginning, and the 12-byte tail
-        self.assertEqual({'root-index': (516, 500+len(content)-12),
-                          'texts': (150, 500),
-                         }, ti._section_file_map)
+        self.assertEqual({'texts': (150, 350)}, ti._section_file_map)
 
     def test_get_named_index(self):
         index_builder = btree_index.BTreeBuilder(0, 1)
@@ -122,13 +122,12 @@
         trailing_builder = sack.TrailingIndexBuilder(
                                 start_offset=trail_start)
         trailing_builder.add_index_info('texts', 0, trail_start)
-        content = text_idx_content + trailing_builder.finish()
+        trailing_content = trailing_builder.finish()
+        content = text_idx_content + trailing_content
         t = memory.MemoryTransport('')
         t.put_bytes('test.sack', content)
         ti = sack.TrailingIndex.from_transport(t, 'test.sack')
-        self.assertEqual({'root-index': (trail_start+16, len(content)-12),
-                          'texts': (0, trail_start),
-                         }, ti._section_file_map)
+        self.assertEqual({'texts': (0, trail_start)}, ti._section_file_map)
         text_index = ti.get_named_index('texts', btree_index.BTreeGraphIndex)
         assert_index_content(self, {('key1',): ('value1',),
                                     ('key2',): ('value2',),