Rev 5077: Switch over the meta-index to using another BTreeIndex for tracking the data. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

John Arbash Meinel john at arbash-meinel.com
Thu Mar 4 21:49:18 GMT 2010


At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5077
revision-id: john at arbash-meinel.com-20100304214850-4a0r305ahuocgh11
parent: john at arbash-meinel.com-20100304211802-fxoa6vtwg03vov8w
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Thu 2010-03-04 15:48:50 -0600
message:
  Switch over the meta-index to using another BTreeIndex for tracking the data.
  
  It isn't a huge difference, but I guess it will be a bit smaller on disk.
-------------- next part --------------
=== modified file 'bzrlib/sack.py'
--- a/bzrlib/sack.py	2010-03-04 20:34:35 +0000
+++ b/bzrlib/sack.py	2010-03-04 21:48:50 +0000
@@ -20,68 +20,55 @@
 rather than storing them in a separate disk structure.
 """
 
+import cStringIO
 import struct
 
-
-class IndexInfo(object):
-    """Information that we track for a single index."""
-
-    def __init__(self):
-        self.start_offset = 0
-        self.length = 0
-        self.index_type = None # 'revisions', etc
-
-    def to_bytes(self):
-        pass
-
-
-class SackTie(object):
+from bzrlib import (
+    errors,
+    btree_index,
+    )
+from bzrlib.transport import file_view
+
+
+class TrailingIndexBuilder(object):
     """This is the final bit that gets written to the sack content.
 
     It describes the version of the file (also present at the beginning),
     as well as some basic information about where data can be found.
 
-    The tie byte layout is basically:
-
-    | header | index count | index information
-        | 8-byte start-of-header | 4-byte version |
+    | header | btree index | 8-byte start-of-header | 4-byte version |
 
     The last two records are using fixed-width MSB encoding, so that we always
-    know how much to parse. The rest is more free-form text.
-
-    :ivar start_offset: The beginning of the tie information
-    :ivar version: The serialized version of this content
-    :ivar index_info: Information about individual indexes
+    know how much to parse.
     """
 
     # Note that the header intentionally starts with '\n' so that it separates
     # from the rest of the data if you open it in a text editor
     _HEADER_BASE = '\nBazaar Sack v'
 
-    def __init__(self, version):
-        self.start_offset = 0
+    def __init__(self, version, start_offset):
+        self.start_offset = start_offset
         self.version = version
-        self.index_info = []
-
-    def to_bytes(self, start_offset):
+        self._index_builder = btree_index.BTreeBuilder(reference_lists=0,
+                                                       key_elements=1)
+
+    def add_index_info(self, index_type, start, length):
+        return
+        self._index_builder.add_node(index_type, '%d %d' % (start, length))
+
+    def finish(self):
         chunks = []
-        chunks.append('%s%d\n'
-                      'num_indexes: %d\n'
-                      % (self._HEADER_BASE, self.version,
-                         len(self.index_info))
-                     )
-        for ii in self.index_info:
-            chunks.append(ii.to_bytes())
-        chunks.append(struct.pack('!QI', start_offset, self.version))
-        # TODO: 'set self.start_offset' ?
+        chunks.append('%s%d\n' % (self._HEADER_BASE, self.version))
+        chunks.append(self._index_builder.finish().read())
+        self._index_builder = None
+        chunks.append(struct.pack('!QI', self.start_offset, self.version))
         return ''.join(chunks)
 
     @classmethod
     def from_tail(cls, bytes):
         """Get the meta-info out of the last 12 bytes of content."""
         offset, version = struct.unpack('!QI', bytes[-12:])
-        tie = cls(version)
-        tie.start_offset = offset
+        tie = cls(version, offset)
         return tie
 
     @classmethod
@@ -89,6 +76,12 @@
         pass
 
 
+class TrailingIndex(object):
+    """Track the root-structure for the Sack.
+
+    See TrailingIndexBuilder for the structure of this tail.
+    """
+
 
 class Sack(object):
     """A self-contained pack file.

=== modified file 'bzrlib/tests/test_sack.py'
--- a/bzrlib/tests/test_sack.py	2010-03-04 20:34:35 +0000
+++ b/bzrlib/tests/test_sack.py	2010-03-04 21:48:50 +0000
@@ -19,11 +19,14 @@
 import struct
 
 from bzrlib import (
+    btree_index,
     errors,
     sack,
     tests,
     )
 
+from bzrlib.transport import memory
+
 
 class TestSack(tests.TestCaseWithMemoryTransport):
     
@@ -31,54 +34,47 @@
         pass
 
 
-
-class TestSackTie(tests.TestCase):
-
-    def assertToBytes(self, expected, tie, start_offset):
-        content = tie.to_bytes(start_offset)
+def assert_btree_matches(test, a_dict, bytes):
+    t = memory.MemoryTransport('')
+    t.put_bytes('index', bytes)
+    index = btree_index.BTreeGraphIndex(t, 'index', len(bytes))
+    as_dict = {}
+    for info in index.iter_all_entries():
+        key = info[1]
+        value_refs = info[2:]
+        as_dict[key] = value_refs
+    test.assertEqual(a_dict, as_dict)
+
+
+class TestTrailingIndexBuilder(tests.TestCase):
+
+    def assertAsBytes(self, index_content, ti):
+        content = ti.finish()
+        header = '\nBazaar Sack v%d\n' % (ti.version,)
+        self.assertStartsWith(content, header)
         tail = content[-12:]
-        content = content[:-12]
         offset, version = struct.unpack('!QI', tail)
-        self.assertEqual(tie.version, version)
-        self.assertEqual(start_offset, offset)
-        self.assertEqualDiff(expected, content)
+        self.assertEqual(ti.version, version)
+        self.assertEqual(ti.start_offset, offset)
+        index_bytes = content[len(header):-12]
+        assert_btree_matches(self, index_content, index_bytes)
 
     def assertFromTail(self, start_offset, version, bytes):
-        tie = sack.SackTie.from_tail(bytes)
-        self.assertIsInstance(tie, sack.SackTie)
-        self.assertEqual(start_offset, tie.start_offset)
-        self.assertEqual(version, tie.version)
+        ti = sack.TrailingIndexBuilder.from_tail(bytes)
+        self.assertIsInstance(ti, sack.TrailingIndexBuilder)
+        self.assertEqual(start_offset, ti.start_offset)
+        self.assertEqual(version, ti.version)
 
-    def test_to_bytes(self):
-        tie = sack.SackTie(1)
-        self.assertToBytes('\nBazaar Sack v1\n'
-                           'num_indexes: 0\n',
-                           tie, 0)
-        self.assertToBytes('\nBazaar Sack v1\n'
-                           'num_indexes: 0\n',
-                           tie, 1)
-        self.assertToBytes('\nBazaar Sack v1\n'
-                           'num_indexes: 0\n',
-                           tie, 2**48-1)
-        self.assertToBytes('\nBazaar Sack v1\n'
-                           'num_indexes: 0\n',
-                           tie, 2**64-1)
+    def test_as_bytes(self):
+        self.assertAsBytes({}, sack.TrailingIndexBuilder(1, 0))
+        self.assertAsBytes({}, sack.TrailingIndexBuilder(1, 12345))
+        self.assertAsBytes({}, sack.TrailingIndexBuilder(3, 2**48-1))
+        self.assertAsBytes({}, sack.TrailingIndexBuilder(3, 2**64-1))
 
     def test_from_tail(self):
         self.assertFromTail(12345, 1,
-                            '\nBazaar Sack v1\n'
-                            'num_indexes: 0\n'
                             '\x00\x00\x00\x00\x00\x00\x30\x39'
                             '\x00\x00\x00\x01')
         self.assertFromTail(12345, 123,
-                            '\nBazaar Sack v123\n'
-                            'num_indexes: 0\n'
                             '\x00\x00\x00\x00\x00\x00\x30\x39'
                             '\x00\x00\x00\x7b')
-
-
-class TestIndexInfo(tests.TestCase):
-
-    def assertToBytes(self, expected, index_info):
-        content = index_info.to_bytes()
-        self.assertEqualDiff(expected, content)



More information about the bazaar-commits mailing list