Rev 3754: Partial multi-layer chk dictionary trees. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Wed Nov 5 06:31:11 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 3754
revision-id: robertc at robertcollins.net-20081105063100-t4ghwsz33mycoisz
parent: robertc at robertcollins.net-20081022064059-wb4jp5sdx2t2whn5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2008-11-05 17:31:00 +1100
message:
Partial multi-layer chk dictionary trees.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/tests/per_repository_chk/test_supported.py test_supported.py-20080925063728-k65ry0n2rhta6t34-1
bzrlib/tests/test_chk_map.py test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
bzrlib/tests/test_inv.py testinv.py-20050722220913-1dc326138d1a5892
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2008-10-22 06:40:59 +0000
+++ b/bzrlib/chk_map.py 2008-11-05 06:31:00 +0000
@@ -67,18 +67,23 @@
return stream.next().get_bytes_as('fulltext')
@classmethod
- def from_dict(klass, store, initial_value):
+ def from_dict(klass, store, initial_value, maximum_size=0):
"""Create a CHKMap in store with initial_value as the content.
:param store: The store to record initial_value in, a VersionedFiles
object with 1-tuple keys supporting CHK key generation.
:param initial_value: A dict to store in store. Its keys and values
must be bytestrings.
+ :param maximum_size: The maximum_size rule to apply to nodes. This
+ determines the size at which no new data is added to a single node.
:return: The root chk of te resulting CHKMap.
"""
result = CHKMap(store, None)
+ result._root_node.set_maximum_size(maximum_size)
+ delta = []
for key, value in initial_value.items():
- result._map(key, value)
+ delta.append((None, key, value))
+ result.apply_delta(delta)
return result._save()
def iteritems(self, key_filter=None):
@@ -141,13 +146,295 @@
return result
-class RootNode(object):
+class Node(object):
+ """Base class defining the protocol for CHK Map nodes."""
+
+ def __init__(self, key_width=1):
+ """Create a node.
+
+ :param key_width: The width of keys for this node.
+ """
+ self._key = None
+ # Current number of elements
+ self._len = 0
+ self._maximum_size = 0
+ self._key_width = 1
+ # current size in bytes
+ self._size = 0
+ # The pointers/values this node has - meaning defined by child classes.
+ self._items = {}
+
+ def __len__(self):
+ return self._len
+
+ @property
+ def maximum_size(self):
+ """What is the upper limit for adding references to a node."""
+ return self._maximum_size
+
+ def set_maximum_size(self, new_size):
+ """Set the size threshold for nodes.
+
+ :param new_size: The size at which no data is added to a node. 0 for
+ unlimited.
+ """
+ self._maximum_size = new_size
+
+
+class LeafNode(Node):
+ """A node containing actual key:value pairs."""
+
+ def __init__(self):
+ Node.__init__(self)
+ # The size of a leaf node with default values and no children.
+ self._size = 12
+
+ def _current_size(self):
+ """Answer the current serialised size of this node."""
+ return (self._size + len(str(self._len)) + len(str(self._key_width)) +
+ len(str(self._maximum_size)))
+
+ @classmethod
+ def deserialise(klass, bytes, key):
+ """Deserialise bytes, with key key, into a LeafNode.
+
+ :param bytes: The bytes of the node.
+ :param key: The key that the serialised node has.
+ """
+ result = LeafNode()
+ lines = bytes.splitlines()
+ items = {}
+ if lines[0] != 'chkleaf:':
+ raise ValueError("not a serialised leaf node: %r" % bytes)
+ maximum_size = int(lines[1])
+ width = int(lines[2])
+ length = int(lines[3])
+ for line in lines[4:]:
+ elements = line.split('\x00')
+ items[tuple(elements[:-1])] = elements[-1]
+ if len(items) != length:
+ raise AssertionError("item count mismatch")
+ result._items = items
+ result._len = length
+ result._maximum_size = maximum_size
+ result._key = key
+ result._key_width = width
+ result._size = len(bytes)
+ return result
+
+ def iteritems(self, key_filter=None):
+ if key_filter is not None:
+ for item in self._items.iteritems():
+ if item[0] in key_filter:
+ yield item
+ else:
+ for item in self._items.iteritems():
+ yield item
+
+ def key(self):
+ return self._key
+
+ def map(self, key, value):
+ """Map key to value."""
+ if key in self._items:
+ self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
+ self._len -= 1
+ self._items[key] = value
+ self._size += 2 + len('\x00'.join(key)) + len(value)
+ self._len += 1
+ self._key = None
+ if (self._maximum_size and self._current_size() > self._maximum_size and
+ self._len > 1):
+ result = InternalNode()
+ result.set_maximum_size(self._maximum_size)
+ result._key_width = self._key_width
+ result._add_node(self)
+ return result
+ else:
+ return self
+
+ def serialise(self, store):
+ """Serialise the tree to store.
+
+ :param store: A VersionedFiles honouring the CHK extensions.
+ :return: An iterable of the keys inserted by this operation.
+ """
+ lines = ["chkleaf:\n"]
+ lines.append("%d\n" % self._maximum_size)
+ lines.append("%d\n" % self._key_width)
+ lines.append("%d\n" % self._len)
+ for key, value in sorted(self._items.items()):
+ lines.append("%s\x00%s\n" % ('\x00'.join(key), value))
+ sha1, _, _ = store.add_lines((None,), (), lines)
+ self._key = ("sha1:" + sha1,)
+ return [self._key]
+
+ def unmap(self, key):
+ """Unmap key from the node."""
+ self._size -= 2 + len('\x00'.join(key)) + len(self._items[key])
+ self._len -= 1
+ del self._items[key]
+ self._key = None
+ return self
+
+
+class InternalNode(Node):
+ """A node that contains references to other nodes."""
+
+ def __init__(self):
+ Node.__init__(self)
+ # The size of an internalnode with default values and no children.
+ # self._size = 12
+ # How many octets key prefixes within this node are.
+ self._node_width = 0
+
+ def _add_node(self, node):
+ """Add a node to the InternalNode.
+
+ :param node: An existing node to add. The node will be examined to see
+ if it is over or undersized and rebalanced if needed across this
+ nodes children.
+ """
+ if self._len == 0:
+ # new tree level, we're being populated by upspill from a overfull
+ # tree.
+ # Cheap-to-code-but-slow?
+ elements = {}
+ max_width = 0
+ # suck in all the values
+ for key, value in node.iteritems():
+ # We work on the serialised keys
+ serialised_key = '\x00'.join(key)
+ elements[serialised_key] = (key, value)
+ max_width = max(len(serialised_key), max_width)
+ # Determine the maximum common key width we will internally handle.
+ # Start with the full key width; if that exceeds our node size
+ # shrink it until we are within the node limit.
+ self._node_width = max_width
+ width = self._node_width
+ # Populate all the resulting keys:
+ items = self._items
+ for serialised_key, key_value in elements.iteritems():
+ actual_key = self._serialised_key(key_value[0])
+ child = items.get(actual_key, None)
+ if not child:
+ child = LeafNode()
+ child.set_maximum_size(self._maximum_size)
+ child._key_width = self._key_width
+ items[actual_key] = child
+ child.map(key_value[0], key_value[1])
+ self._len += 1
+ else:
+ raise NotImplementedError(self._add_node)
+
+ def _current_size(self):
+ """Answer the current serialised size of this node."""
+ return (self._size + len(str(self._len)) + len(str(self._key_width)) +
+ len(str(self._maximum_size)))
+
+ def iteritems(self, key_filter=None):
+ if key_filter is None:
+ for child in self._items.itervalues():
+ for item in child.iteritems():
+ yield item
+ else:
+ serialised_filter = set([self._serialised_key(key) for key in
+ key_filter])
+ for key, child in self._items.iteritems():
+ if key in serialised_filter:
+ for item in child.iteritems(key_filter):
+ yield item
+
+ def map(self, key, value):
+ """Map key to value."""
+ serialised_key = self._serialised_key(key)
+ try:
+ child = self._items[serialised_key]
+ except KeyError:
+ child = LeafNode()
+ child.set_maximum_size(self._maximum_size)
+ child._key_width = self._key_width
+ self._items[serialised_key] = child
+ old_len = len(child)
+ new_child = child.map(key, value)
+ # TODO: rebalance/enforce balance
+ if new_child is not child:
+ # The child has exceeded its size; if we take more bytes off the
+ # key prefix for the child, that may fit into our node;
+ # How many more bytes can we fit?
+ remaining_size = max(0, self.maximum_size - self._current_size())
+ size_per_row = (self._node_width + 45 + 2)
+ # without increasing the key width:
+ extra_rows = remaining_size / size_per_row
+ if extra_rows:
+ # What is the minimum node width increase to split new_child:
+ offset_bytes = [1]
+ offset = self._node_width - 1
+ while len(offset_bytes) == 1 and offset < new_child._node_width:
+ offset += 1
+ offset_bytes = set(child_key[offset] for child_key in
+ new_child._items.keys())
+ if len(offset_bytes) > 1:
+ # We've found the fan out point
+ increase = self._node_width - offset
+ # calculate how many more pointers we need to carry
+ new_keys = len(offset_bytes)
+ for subnode in self._items.values():
+ new_keys += subnode._key_count(self._node_width, offset)
+ if (new_keys * (offset + 45 + 2) +
+ self._prelude_size() > self._maximum_size):
+ # can't fit it all, accept the new child
+ self._items[serialised_key] = new_child
+ else:
+ # increasing the
+ pass
+ else:
+ # it didn't fan out! wtf!
+ raise AssertionError("no fan out")
+ else:
+ # leave it split
+ self._items[serialised_key] = new_child
+ self._len += 1
+ return self
+
+ def _serialised_key(self, key):
+ """Return the serialised key for key in this node."""
+ return ('\x00'.join(key) + '\x00'*self._node_width)[:self._node_width]
+
+ def _key_count(self, parent_width, cut_width):
+ """Return the number of keys in/under this node between two widths.
+
+ :param parent_width: The start offset in keys to consider.
+ :param cut_width: The width to stop considering at.
+ """
+ if cut_width > self._node_width:
+ raise NotImplementedError(self._key_count)
+ # Generate a list of unique substrings
+
+
+ def unmap(self, key):
+ """Remove key from this node and it's children."""
+ serialised_key = self._serialised_key(key)
+ child = self._items[serialised_key]
+ new_child = child.unmap(key)
+ # TODO shrink/rebalance
+ if not len(new_child):
+ del self._items[serialised_key]
+ if len(self._items) == 1:
+ return self._items.values()[0]
+ elif new_child is not child:
+ self._items[serialised_key] = new_child
+ return self
+
+
+class RootNode(Node):
"""A root node in a CHKMap."""
def __init__(self):
+ Node.__init__(self)
self._nodes = {}
- self._key = None
- self._len = 0
+ self._size = 12
+ self.prefix_width = 0
def add_child(self, name, child):
"""Add a child to the node.
@@ -157,9 +444,20 @@
:param name: The name of the child. A bytestring.
:param child: The child, a key tuple for the childs value.
"""
+ if self._maximum_size and self._current_size() >= self._maximum_size:
+ return False
+ if name in self._nodes:
+ self.remove_child(name)
self._nodes[name] = child
self._len += 1
self._key = None
+ self._size += len(name) + len(child[0]) + 2
+ return True
+
+ def _current_size(self):
+ """Answer the current serialised size of this node."""
+ return (self._size + len(str(self._maximum_size)) + len(str(self._len))
+ + len(str(self.prefix_width)))
def deserialise(self, bytes, key):
"""Set the nodes value to that contained in bytes.
@@ -171,16 +469,17 @@
nodes = {}
if lines[0] != 'chkroot:':
raise ValueError("not a serialised root node: %r" % bytes)
- length = int(lines[1])
- for line in lines[2:]:
+ maximum_size = int(lines[1])
+ prefix_width = int(lines[2])
+ length = int(lines[3])
+ for line in lines[4:]:
name, value = line.split('\x00')
nodes[name] = (value,)
self._nodes = nodes
self._len = length
+ self._maximum_size = maximum_size
self._key = key
-
- def __len__(self):
- return self._len
+ self.prefix_width = prefix_width
def refs(self):
"""Get the CHK key references this node holds."""
@@ -193,7 +492,8 @@
:param name: The name to remove from the node.
"""
- del self._nodes[name]
+ node = self._nodes.pop(name)
+ self._size -= 2 + len(name) + len(node[0])
self._len -= 1
self._key = None
@@ -203,6 +503,8 @@
:return: A bytestring.
"""
lines = ["chkroot:\n"]
+ lines.append("%d\n" % self._maximum_size)
+ lines.append("%d\n" % self.prefix_width)
lines.append("%d\n" % self._len)
for name, child in sorted(self._nodes.items()):
lines.append("%s\x00%s\n" % (name, child[0]))
=== modified file 'bzrlib/tests/per_repository_chk/test_supported.py'
--- a/bzrlib/tests/per_repository_chk/test_supported.py 2008-10-22 06:40:59 +0000
+++ b/bzrlib/tests/per_repository_chk/test_supported.py 2008-11-05 06:31:00 +0000
@@ -65,13 +65,15 @@
repo.unlock()
def test_pack_preserves_chk_bytes_store(self):
+ expected_set = set([('sha1:062ef095245d8617d7671e50a71b529d13d93022',),
+ ('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',)])
repo = self.make_repository('.')
repo.lock_write()
try:
repo.start_write_group()
try:
- repo.chk_bytes.add_lines((None,), None, ["chkroot:\n", "0\n"],
- random_id=True)
+ repo.chk_bytes.add_lines((None,), None,
+ ["chkroot:\n", "0\n", "0\n"], random_id=True)
except:
repo.abort_write_group()
raise
@@ -87,19 +89,13 @@
else:
repo.commit_write_group()
repo.pack()
- self.assertEqual(
- set([('sha1:062ef095245d8617d7671e50a71b529d13d93022',),
- ('sha1:8e13ef930ff710425830ffd5077146b259b49534',)]),
- repo.chk_bytes.keys())
+ self.assertEqual(expected_set, repo.chk_bytes.keys())
finally:
repo.unlock()
# and reopening
repo = repo.bzrdir.open_repository()
repo.lock_read()
try:
- self.assertEqual(
- set([('sha1:062ef095245d8617d7671e50a71b529d13d93022',),
- ('sha1:8e13ef930ff710425830ffd5077146b259b49534',)]),
- repo.chk_bytes.keys())
+ self.assertEqual(expected_set, repo.chk_bytes.keys())
finally:
repo.unlock()
=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 2008-10-22 06:40:59 +0000
+++ b/bzrlib/tests/test_chk_map.py 2008-11-05 06:31:00 +0000
@@ -16,11 +16,11 @@
"""Tests for maps built on a CHK versionedfiles facility."""
-from bzrlib.chk_map import CHKMap, RootNode, ValueNode
+from bzrlib.chk_map import CHKMap, RootNode, InternalNode, LeafNode, ValueNode
from bzrlib.tests import TestCaseWithTransport
-class TestDumbMap(TestCaseWithTransport):
+class TestCaseWithStore(TestCaseWithTransport):
def get_chk_bytes(self):
# The eassiest way to get a CHK store is a development3 repository and
@@ -36,10 +36,16 @@
stream = chk_bytes.get_record_stream([key], 'unordered', True)
return stream.next().get_bytes_as("fulltext")
+ def to_dict(self, node):
+ return dict(node.iteritems())
+
+
+class TestMap(TestCaseWithStore):
+
def assertHasABMap(self, chk_bytes):
- root_key = ('sha1:674c986f86eacc7d9e4fbee45eda625e5689f9a4',)
+ root_key = ('sha1:29f1da33ce2323d754485fd308abc5ff17f3856e',)
self.assertEqual(
- "chkroot:\n1\na\x00sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b\n",
+ "chkroot:\n0\n1\na\x00sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b\n",
self.read_bytes(chk_bytes, root_key))
self.assertEqual(
"chkvalue:\nb",
@@ -47,26 +53,26 @@
("sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b",)))
def assertHasEmptyMap(self, chk_bytes):
- root_key = ('sha1:8e13ef930ff710425830ffd5077146b259b49534',)
- self.assertEqual("chkroot:\n0\n", self.read_bytes(chk_bytes, root_key))
+ root_key = ('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',)
+ self.assertEqual("chkroot:\n0\n0\n", self.read_bytes(chk_bytes, root_key))
- def _get_map(self, a_dict):
+ def _get_map(self, a_dict, maximum_size=0):
chk_bytes = self.get_chk_bytes()
- root_key = CHKMap.from_dict(chk_bytes, a_dict)
+ root_key = CHKMap.from_dict(chk_bytes, a_dict, maximum_size=maximum_size)
chkmap = CHKMap(chk_bytes, root_key)
return chkmap
def test_from_dict_empty(self):
chk_bytes = self.get_chk_bytes()
root_key = CHKMap.from_dict(chk_bytes, {})
- self.assertEqual(('sha1:8e13ef930ff710425830ffd5077146b259b49534',),
+ self.assertEqual(('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',),
root_key)
self.assertHasEmptyMap(chk_bytes)
def test_from_dict_ab(self):
chk_bytes = self.get_chk_bytes()
root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
- self.assertEqual(('sha1:674c986f86eacc7d9e4fbee45eda625e5689f9a4',),
+ self.assertEqual(('sha1:29f1da33ce2323d754485fd308abc5ff17f3856e',),
root_key)
self.assertHasABMap(chk_bytes)
@@ -77,7 +83,7 @@
root_key = CHKMap.from_dict(chk_bytes, {})
chkmap = CHKMap(chk_bytes, root_key)
new_root = chkmap.apply_delta([(None, "a", "b")])
- self.assertEqual(('sha1:674c986f86eacc7d9e4fbee45eda625e5689f9a4',),
+ self.assertEqual(('sha1:29f1da33ce2323d754485fd308abc5ff17f3856e',),
new_root)
self.assertHasABMap(chk_bytes)
# The update should have left us with an in memory root node, with an
@@ -91,7 +97,7 @@
root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
chkmap = CHKMap(chk_bytes, root_key)
new_root = chkmap.apply_delta([("a", None, None)])
- self.assertEqual(('sha1:8e13ef930ff710425830ffd5077146b259b49534',),
+ self.assertEqual(('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',),
new_root)
self.assertHasEmptyMap(chk_bytes)
# The update should have left us with an in memory root node, with an
@@ -122,16 +128,76 @@
self.assertEqual(0, len(chkmap))
def test___len__2(self):
- chkmap = self._get_map( {"foo":"bar", "gam":"quux"})
- self.assertEqual(2, len(chkmap))
+ chkmap = self._get_map({"foo":"bar", "gam":"quux"})
+ self.assertEqual(2, len(chkmap))
+
+ def test_max_size_100_bytes(self):
+ # When there is a 100 byte upper node limit, a tree is formed.
+ chkmap = self._get_map({"k1"*50:"v1", "k2"*50:"v2"}, maximum_size=100)
+ # We expect three nodes:
+ # A root, with two children, and with two key prefixes - k1 to one, and
+ # k2 to the other as our node splitting is only just being developed.
+ # The maximum size should be embedded
+ chkmap._ensure_root()
+ self.assertEqual(100, chkmap._root_node.maximum_size)
+ # There should be two child nodes, and prefix of 2(bytes):
+ self.assertEqual(2, len(chkmap._root_node._nodes))
+ self.assertEqual(2, chkmap._root_node.prefix_length)
+ # The actual nodes pointed at will change as serialisers change; so
+ # here we test that the key prefix is correct; then load the nodes and
+ # check they have the right pointed at key; whether they have the
+ # pointed at value inline or not is also unrelated to this test so we
+ # don't check that.
+ nodes = sorted(chkmap._root_node._nodes.items())
+ ptr1 = nodes[0]
+ ptr2 = nodes[1]
+ self.assertEqual('k1', ptr1[0])
+ self.assertEqual('k2', ptr2[0])
+ node1 = chk_map._deserialise(chkmap._read_bytes(ptr1[1]), ptr1[1])
+ self.assertEqual(1, len(node1._nodes))
+ self.assertEqual(['k1'*50], node1._nodes.keys())
+ node2 = chk_map._deserialise(chkmap._read_bytes(ptr2[1]), ptr2[1])
+ self.assertEqual(1, len(node2._nodes))
+ self.assertEqual(['k2'*50], node2._nodes.keys())
+ # Having checked we have a good structure, check that the content is
+ # still accessible.
+ self.assertEqual(2, len(chkmap))
+ self.assertEqual([("k1"*50, "v1"), ("k2"*50, "v2")],
+ sorted(list(chkmap.iteritems())))
class TestRootNode(TestCaseWithTransport):
+ def test__current_size(self):
+ node = RootNode()
+ self.assertEqual(15, node._current_size())
+ node.add_child("cd", ("sha1:12345",))
+ self.assertEqual(29, node._current_size())
+ self.assertEqual(29, len(node.serialise()))
+ node.add_child("cd", ("sha1:123456",))
+ self.assertEqual(30, node._current_size())
+ self.assertEqual(30, len(node.serialise()))
+ node.remove_child("cd")
+ self.assertEqual(15, node._current_size())
+ self.assertEqual(15, len(node.serialise()))
+ node.set_maximum_size(100)
+ self.assertEqual(17, node._current_size())
+
def test_serialise_empty(self):
node = RootNode()
bytes = node.serialise()
- self.assertEqual("chkroot:\n0\n", bytes)
+ self.assertEqual("chkroot:\n0\n0\n0\n", bytes)
+
+ def test_add_child_over_limit(self):
+ node = RootNode()
+ node.set_maximum_size(20)
+ node.add_child("abcdef", ("sha1:12345",))
+ size = node._current_size()
+ self.assertTrue(20 < size)
+ self.assertEqual(False, node.add_child("12345", ("sha1:34",)))
+ # Nothing should have changed
+ self.assertEqual(size, node._current_size())
+ self.assertEqual(1, len(node))
def test_add_child_resets_key(self):
node = RootNode()
@@ -139,6 +205,11 @@
node.add_child("c", ("sha1:1234",))
self.assertEqual(None, node._key)
+ def test_add_child_returns_True(self):
+ node = RootNode()
+ node._key = ("something",)
+ self.assertEqual(True, node.add_child("c", ("sha1:1234",)))
+
def test_add_child_increases_len(self):
node = RootNode()
node._key = ("something",)
@@ -171,16 +242,28 @@
# deserialising from a bytestring & key sets the nodes and the known
# key.
node = RootNode()
- node.deserialise("chkroot:\n1\nc\x00sha1:1234\n", ("foo",))
+ node.deserialise("chkroot:\n0\n0\n1\nc\x00sha1:1234\n", ("foo",))
self.assertEqual({"c": ("sha1:1234",)}, node._nodes)
self.assertEqual(("foo",), node._key)
self.assertEqual(1, len(node))
+ self.assertEqual(0, node.maximum_size)
def test_serialise_with_child(self):
node = RootNode()
node.add_child("c", ("sha1:1234",))
bytes = node.serialise()
- self.assertEqual("chkroot:\n1\nc\x00sha1:1234\n", bytes)
+ # type 0-max-length 1-value key\x00CHK
+ self.assertEqual("chkroot:\n0\n0\n1\nc\x00sha1:1234\n", bytes)
+
+ def test_deserialise_max_size(self):
+ node = RootNode()
+ node.deserialise("chkroot:\n100\n0\n1\nc\x00sha1:1234\n", ("foo",))
+ self.assertEqual(100, node.maximum_size)
+
+ def test_deserialise_key_prefix(self):
+ node = RootNode()
+ node.deserialise("chkroot:\n100\n10\n1\nc\x00sha1:1234\n", ("foo",))
+ self.assertEqual(10, node.prefix_width)
class TestValueNode(TestCaseWithTransport):
@@ -193,3 +276,297 @@
node = ValueNode("b")
bytes = node.serialise()
self.assertEqual("chkvalue:\nb", bytes)
+
+
+class TestLeafNode(TestCaseWithStore):
+
+ def test_current_size_empty(self):
+ node = LeafNode()
+ self.assertEqual(15, node._current_size())
+
+ def test_current_size_size_changed(self):
+ node = LeafNode()
+ node.set_maximum_size(10)
+ self.assertEqual(16, node._current_size())
+
+ def test_current_size_width_changed(self):
+ node = LeafNode()
+ node._key_width = 10
+ self.assertEqual(16, node._current_size())
+
+ def test_current_size_items(self):
+ node = LeafNode()
+ base_size = node._current_size()
+ node = node.map(("foo bar",), "baz")
+ self.assertEqual(base_size + 12, node._current_size())
+
+ def test_deserialise_empty(self):
+ node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
+ self.assertEqual(0, len(node))
+ self.assertEqual(10, node.maximum_size)
+ self.assertEqual(("sha1:1234",), node.key())
+
+ def test_deserialise_items(self):
+ node = LeafNode.deserialise(
+ "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
+ self.assertEqual(2, len(node))
+ self.assertEqual([(("foo bar",), "baz"), (("quux",), "blarh")],
+ sorted(node.iteritems()))
+
+ def test_iteritems_selected_one_of_two_items(self):
+ node = LeafNode.deserialise(
+ "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
+ self.assertEqual(2, len(node))
+ self.assertEqual([(("quux",), "blarh")],
+ sorted(node.iteritems([("quux",), ("qaz",)])))
+
+ def test_key_new(self):
+ node = LeafNode()
+ self.assertEqual(None, node.key())
+
+ def test_key_after_map(self):
+ node = LeafNode.deserialise("chkleaf:\n10\n1\n0\n", ("sha1:1234",))
+ node = node.map(("foo bar",), "baz quux")
+ self.assertEqual(None, node.key())
+
+ def test_key_after_unmap(self):
+ node = LeafNode.deserialise(
+ "chkleaf:\n0\n1\n2\nfoo bar\x00baz\nquux\x00blarh\n", ("sha1:1234",))
+ node = node.unmap(("foo bar",))
+ self.assertEqual(None, node.key())
+
+ def test_map_exceeding_max_size_only_entry(self):
+ node = LeafNode()
+ node.set_maximum_size(10)
+ result = node.map(("foo bar",), "baz quux")
+ self.assertEqual(result, node)
+ self.assertTrue(10 < result._current_size())
+
+ def test_map_exceeding_max_size_second_entry_early_difference(self):
+ node = LeafNode()
+ node.set_maximum_size(10)
+ node = node.map(("foo bar",), "baz quux")
+ result = node.map(("blue",), "red")
+ self.assertIsInstance(result, InternalNode)
+ # should have copied the data in:
+ self.assertEqual(2, len(result))
+ self.assertEqual({('blue',): 'red', ('foo bar',): 'baz quux'},
+ self.to_dict(result))
+ self.assertEqual(10, result.maximum_size)
+ self.assertEqual(1, result._key_width)
+
+ def test_map_exceeding_max_size_second_entry_last_octect_changed(self):
+ node = LeafNode()
+ node.set_maximum_size(10)
+ node = node.map(("foo bar",), "baz quux")
+ result = node.map(("foo baz",), "red")
+ self.assertIsInstance(result, InternalNode)
+ # should have copied the data in:
+ self.assertEqual(2, len(result))
+ self.assertEqual({('foo baz',): 'red', ('foo bar',): 'baz quux'},
+ self.to_dict(result))
+ self.assertEqual(10, result.maximum_size)
+ self.assertEqual(1, result._key_width)
+
+ def test_map_first(self):
+ node = LeafNode()
+ result = node.map(("foo bar",), "baz quux")
+ self.assertEqual(result, node)
+ self.assertEqual({("foo bar",):"baz quux"}, self.to_dict(node))
+ self.assertEqual(1, len(node))
+
+ def test_map_second(self):
+ node = LeafNode()
+ node = node.map(("foo bar",), "baz quux")
+ result = node.map(("bingo",), "bango")
+ self.assertEqual(result, node)
+ self.assertEqual({("foo bar",):"baz quux", ("bingo",):"bango"},
+ self.to_dict(node))
+ self.assertEqual(2, len(node))
+
+ def test_map_replacement(self):
+ node = LeafNode()
+ node = node.map(("foo bar",), "baz quux")
+ result = node.map(("foo bar",), "bango")
+ self.assertEqual(result, node)
+ self.assertEqual({("foo bar",): "bango"},
+ self.to_dict(node))
+ self.assertEqual(1, len(node))
+
+ def test_serialise_empty(self):
+ store = self.get_chk_bytes()
+ node = LeafNode()
+ node.set_maximum_size(10)
+ expected_key = ("sha1:62cc3565b48b0e830216e652cf99c6bd6b05b4b9",)
+ self.assertEqual([expected_key],
+ list(node.serialise(store)))
+ self.assertEqual("chkleaf:\n10\n1\n0\n", self.read_bytes(store, expected_key))
+ self.assertEqual(expected_key, node.key())
+
+ def test_serialise_items(self):
+ store = self.get_chk_bytes()
+ node = LeafNode()
+ node.set_maximum_size(10)
+ node = node.map(("foo bar",), "baz quux")
+ expected_key = ("sha1:d44cb6f0299b7e047da7f9e98f810e98f1dce1a7",)
+ self.assertEqual([expected_key],
+ list(node.serialise(store)))
+ self.assertEqual("chkleaf:\n10\n1\n1\nfoo bar\x00baz quux\n",
+ self.read_bytes(store, expected_key))
+ self.assertEqual(expected_key, node.key())
+
+ def test_unmap_missing(self):
+ node = LeafNode()
+ self.assertRaises(KeyError, node.unmap, ("foo bar",))
+
+ def test_unmap_present(self):
+ node = LeafNode()
+ node = node.map(("foo bar",), "baz quux")
+ result = node.unmap(("foo bar",))
+ self.assertEqual(result, node)
+ self.assertEqual({}, self.to_dict(node))
+ self.assertEqual(0, len(node))
+
+
+class TestInternalNode(TestCaseWithStore):
+
+ def test_add_node_empty_oversized_no_common_sets_prefix(self):
+ # adding a node with two children that is oversized will generate two
+ # new leaf nodes, and a prefix width that cuts one byte off the longest
+ # key (because that is sufficient to guarantee a split
+ overpacked = LeafNode()
+ overpacked.set_maximum_size(10)
+ overpacked.map(("foo bar",), "baz")
+ overpacked.map(("strange thing",), "it is")
+ # at this point, map returned a new internal node that is already
+ # packed, but that should have preserved the old node due to the
+ # functional idioms.. check to be sure:
+ self.assertTrue(overpacked.maximum_size < overpacked._current_size())
+ node = InternalNode()
+ # We're not testing that the internal node rebalances yet
+ node.set_maximum_size(0)
+ node._add_node(overpacked)
+ # 13 is the length of strange_thing serialised; as there is no node size
+ # set, we pack the internal node as densely as possible.
+ self.assertEqual(13, node._node_width)
+ self.assertEqual(set(["strange thing", "foo bar\x00\x00\x00\x00\x00\x00"]),
+ set(node._items.keys()))
+ self.assertEqual(2, len(node))
+ self.assertEqual({('strange thing',): 'it is'},
+ self.to_dict(node._items["strange thing"]))
+ self.assertEqual({('foo bar',): 'baz'},
+ self.to_dict(node._items["foo bar\x00\x00\x00\x00\x00\x00"]))
+
+ def test_iteritems_empty(self):
+ node = InternalNode()
+ self.assertEqual([], sorted(node.iteritems()))
+
+ def test_iteritems_two_children(self):
+ node = InternalNode()
+ leaf1 = LeafNode()
+ leaf1.map(('foo bar',), 'quux')
+ leaf2 = LeafNode()
+ leaf2 = LeafNode()
+ leaf2.map(('strange',), 'beast')
+ node._items['foo ba'] = leaf1
+ node._items['strang'] = leaf2
+ self.assertEqual([(('foo bar',), 'quux'), (('strange',), 'beast')],
+ sorted(node.iteritems()))
+
+ def test_iteritems_two_children_partial(self):
+ node = InternalNode()
+ leaf2 = LeafNode()
+ leaf2 = LeafNode()
+ leaf2.map(('strange',), 'beast')
+ # This sets up a path that should not be followed - it will error if
+ # the code tries to.
+ node._items['foo ba'] = None
+ node._items['strang'] = leaf2
+ node._node_width = 6
+ self.assertEqual([(('strange',), 'beast')],
+ sorted(node.iteritems([('strange',), ('weird',)])))
+
+ def test_iteritems_partial_empty(self):
+ node = InternalNode()
+ self.assertEqual([], sorted(node.iteritems([('missing',)])))
+
+ def test_map_to_existing_child(self):
+ # mapping a new key which is in a child of an internal node maps
+ # recursively.
+ overpacked = LeafNode()
+ overpacked.set_maximum_size(10)
+ overpacked.map(("foo bar",), "baz")
+ node = overpacked.map(("foo baz",), "it is")
+ self.assertIsInstance(node, InternalNode)
+ # Now, increase the maximum size limit on the subnode for foo bar
+ child = node._items[node._serialised_key(("foo bar",))]
+ child.set_maximum_size(200)
+ # And map a new key into node, which will land in the same child node
+ result = node.map(("foo bar baz",), "new value")
+ self.assertTrue(result is node)
+ self.assertEqual(3, len(result))
+ self.assertEqual(2, len(child))
+ self.assertEqual({('foo bar',): 'baz',
+ ('foo bar baz',): 'new value', ('foo baz',): 'it is'},
+ self.to_dict(node))
+
+ def test_map_to_existing_child_exceed_child_size_not_internal_size(self):
+ # mapping a new key which is in a child of an internal node maps
+ # recursively, and when the child splits that is accomodated within the
+ # internal node if there is room for another child pointer.
+ overpacked = LeafNode()
+ overpacked.set_maximum_size(40)
+ overpacked.map(("foo bar",), "baz baz baz baz baz baz baz")
+ node = overpacked.map(("foo baz",), "it is it is it is it is it is")
+ self.assertIsInstance(node, InternalNode)
+ # And map a new key into node, which will land in the same child path
+ # within node, but trigger a spill event on the child, and should end
+ # up with 3 pointers in node (as the pointers can fit in the node
+ # space.
+ result = node.map(("foo bar baz",),
+ "new value new value new value new value new value")
+ self.assertTrue(result is node)
+ self.assertEqual(3, len(result))
+ # We should have one child for foo bar
+ child = node._items[node._serialised_key(("foo bar\x00",))]
+ self.assertIsInstance(child, LeafNode)
+ self.assertEqual(1, len(child))
+ # And one for 'foo bar '
+ child = node._items[node._serialised_key(("foo bar ",))]
+ self.assertIsInstance(child, LeafNode)
+ self.assertEqual(1, len(child))
+ self.assertEqual({('foo bar',): 'baz baz baz baz baz baz baz',
+ ('foo bar baz',): 'new value new value new value',
+ ('foo baz',): 'it is it is it is it is it is'},
+ self.to_dict(node))
+
+ def test_map_to_new_child(self):
+ # mapping a new key which is in a child of an internal node maps
+ # recursively.
+ overpacked = LeafNode()
+ overpacked.set_maximum_size(10)
+ overpacked.map(("foo bar",), "baz")
+ node = overpacked.map(("foo baz",), "it is")
+ self.assertIsInstance(node, InternalNode)
+ # Map a new key into node, which will land in a new child node
+ result = node.map(("quux",), "new value")
+ # Now, increase the maximum size limit on the subnode for foo bar
+ child = node._items[node._serialised_key(("quux",))]
+ self.assertTrue(result is node)
+ self.assertEqual(3, len(result))
+ self.assertEqual(1, len(child))
+ self.assertEqual({('foo bar',): 'baz',
+ ('quux',): 'new value', ('foo baz',): 'it is'},
+ self.to_dict(node))
+
+ def test_unmap_second_last_shrinks_to_other_branch(self):
+ # unmapping the second last child of an internal node downgrades it to
+ # a leaf node.
+ overpacked = LeafNode()
+ overpacked.set_maximum_size(10)
+ overpacked.map(("foo bar",), "baz")
+ node = overpacked.map(("strange thing",), "it is")
+ self.assertIsInstance(node, InternalNode)
+ result = node.unmap(("foo bar",))
+ self.assertIsInstance(result, LeafNode)
+ self.assertEqual({("strange thing",): "it is"}, self.to_dict(result))
=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py 2008-10-22 06:40:59 +0000
+++ b/bzrlib/tests/test_inv.py 2008-11-05 06:31:00 +0000
@@ -219,7 +219,7 @@
'chkinventory:\n',
'revision_id: foo\n',
'root_id: TREE_ROOT\n',
- 'id_to_entry: sha1:6fc2838d88d40dc7331087fb69d3ea72c023b3ee\n'
+ 'id_to_entry: sha1:3c84f56e89a6089ee0c6cc25becdcaa368e83632\n'
],
chk_inv.to_lines())
More information about the bazaar-commits
mailing list