Rev 2623: Build a combined graph index to use multiple indices at once. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Fri Jul 13 14:10:33 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2623
revision-id: robertc at robertcollins.net-20070713131030-xbo7bbks9zovdya4
parent: robertc at robertcollins.net-20070713112454-dsj464911g0l8wpa
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-07-13 23:10:30 +1000
message:
Build a combined graph index to use multiple indices at once.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-07-13 11:24:54 +0000
+++ b/bzrlib/index.py 2007-07-13 13:10:30 +0000
@@ -264,3 +264,55 @@
# iter_all validates completely at the moment, so just do that.
for node in self.iter_all_entries():
pass
+
+
+class CombinedGraphIndex(object):
+ """A GraphIndex made up from smaller GraphIndices.
+
+ The backing indices must implement GraphIndex, and are presumed to be
+ static data.
+ """
+
+ def __init__(self, indices):
+ """Create a CombinedGraphIndex backed by indices.
+
+ :param indices: The indices to query for data.
+ """
+ self._indices = indices
+
+ def iter_all_entries(self):
+ """Iterate over all keys within the index
+
+ :return: An iterable of (key, reference_lists, value). There is no
+ defined order for the result iteration - it will be in the most
+ efficient order for the index.
+ """
+ seen_keys = set()
+ for index in self._indices:
+ for node in index.iter_all_entries():
+ if node[0] not in seen_keys:
+ yield node
+ seen_keys.add(node[0])
+
+ def iter_entries(self, keys):
+ """Iterate over keys within the index.
+
+ :param keys: An iterable providing the keys to be retrieved.
+ :return: An iterable of (key, reference_lists, value). There is no
+ defined order for the result iteration - it will be in the most
+ efficient order for the index.
+ """
+ found = set()
+ keys = set(keys)
+ for node in self.iter_all_entries():
+ if node[0] in keys:
+ yield node
+ found.add(node[0])
+ missing = keys.difference(found)
+ if missing:
+ raise errors.MissingKey(self, missing.pop())
+
+ def validate(self):
+ """Validate that everything in the index can be accessed."""
+ for index in self._indices:
+ index.validate()
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2007-07-13 11:24:54 +0000
+++ b/bzrlib/tests/test_index.py 2007-07-13 13:10:30 +0000
@@ -17,7 +17,7 @@
"""Tests for indices."""
from bzrlib import errors
-from bzrlib.index import GraphIndexBuilder, GraphIndex
+from bzrlib.index import CombinedGraphIndex, GraphIndexBuilder, GraphIndex
from bzrlib.tests import TestCaseWithMemoryTransport
@@ -258,7 +258,7 @@
set(index.iter_entries(['name'])))
self.assertRaises(errors.MissingKey, list, index.iter_entries(['ref']))
- def test_iter_nothing_empty(self):
+ def test_iter_all_keys(self):
index = self.make_index(1, nodes=[
('name', (['ref'], ), 'data'),
('ref', ([], ), 'refdata')])
@@ -266,7 +266,7 @@
('ref', ((), ), 'refdata')]),
set(index.iter_entries(['name', 'ref'])))
- def test_iter_all_keys(self):
+ def test_iter_nothing_empty(self):
index = self.make_index()
self.assertEqual([], list(index.iter_entries([])))
@@ -312,3 +312,117 @@
def test_validate_no_refs_content(self):
index = self.make_index(nodes=[('key', (), 'value')])
index.validate()
+
+
+class TestCombinedGraphIndex(TestCaseWithMemoryTransport):
+
+ def make_index(self, name, ref_lists=0, nodes=[]):
+ builder = GraphIndexBuilder(ref_lists)
+ for node, references, value in nodes:
+ builder.add_node(node, references, value)
+ stream = builder.finish()
+ trans = self.get_transport()
+ trans.put_file(name, stream)
+ return GraphIndex(trans, name)
+
+ def test_open_missing_index_no_error(self):
+ trans = self.get_transport()
+ index1 = GraphIndex(trans, 'missing')
+ index = CombinedGraphIndex([index1])
+
+ def test_iter_all_entries_empty(self):
+ index = CombinedGraphIndex([])
+ self.assertEqual([], list(index.iter_all_entries()))
+
+ def test_iter_all_entries_children_empty(self):
+ index1 = self.make_index('name')
+ index = CombinedGraphIndex([index1])
+ self.assertEqual([], list(index.iter_all_entries()))
+
+ def test_iter_all_entries_simple(self):
+ index1 = self.make_index('name', nodes=[('name', (), 'data')])
+ index = CombinedGraphIndex([index1])
+ self.assertEqual([('name', (), 'data')],
+ list(index.iter_all_entries()))
+
+ def test_iter_all_entries_two_indices(self):
+ index1 = self.make_index('name1', nodes=[('name', (), 'data')])
+ index2 = self.make_index('name2', nodes=[('2', (), '')])
+ index = CombinedGraphIndex([index1, index2])
+ self.assertEqual([('name', (), 'data'),
+ ('2', (), '')],
+ list(index.iter_all_entries()))
+
+ def test_iter_all_entries_two_indices_dup_key(self):
+ index1 = self.make_index('name1', nodes=[('name', (), 'data')])
+ index2 = self.make_index('name2', nodes=[('name', (), 'data')])
+ index = CombinedGraphIndex([index1, index2])
+ self.assertEqual([('name', (), 'data')],
+ list(index.iter_all_entries()))
+
+ def test_iter_nothing_empty(self):
+ index = CombinedGraphIndex([])
+ self.assertEqual([], list(index.iter_entries([])))
+
+ def test_iter_nothing_children_empty(self):
+ index1 = self.make_index('name')
+ index = CombinedGraphIndex([index1])
+ self.assertEqual([], list(index.iter_entries([])))
+
+ def test_iter_all_keys(self):
+ index1 = self.make_index('1', 1, nodes=[
+ ('name', (['ref'], ), 'data')])
+ index2 = self.make_index('2', 1, nodes=[
+ ('ref', ([], ), 'refdata')])
+ index = CombinedGraphIndex([index1, index2])
+ self.assertEqual(set([('name', (('ref',),), 'data'),
+ ('ref', ((), ), 'refdata')]),
+ set(index.iter_entries(['name', 'ref'])))
+
+ def test_iter_all_keys_dup_entry(self):
+ index1 = self.make_index('1', 1, nodes=[
+ ('name', (['ref'], ), 'data'),
+ ('ref', ([], ), 'refdata')])
+ index2 = self.make_index('2', 1, nodes=[
+ ('ref', ([], ), 'refdata')])
+ index = CombinedGraphIndex([index1, index2])
+ self.assertEqual(set([('name', (('ref',),), 'data'),
+ ('ref', ((), ), 'refdata')]),
+ set(index.iter_entries(['name', 'ref'])))
+
+ def test_iter_missing_entry_empty(self):
+ index = CombinedGraphIndex([])
+ self.assertRaises(errors.MissingKey, list, index.iter_entries(['a']))
+
+ def test_iter_missing_entry_one_index(self):
+ index1 = self.make_index('1')
+ index = CombinedGraphIndex([index1])
+ self.assertRaises(errors.MissingKey, list, index.iter_entries(['a']))
+
+ def test_iter_missing_entry_two_index(self):
+ index1 = self.make_index('1')
+ index2 = self.make_index('2')
+ index = CombinedGraphIndex([index1, index2])
+ self.assertRaises(errors.MissingKey, list, index.iter_entries(['a']))
+
+ def test_iter_entry_present_one_index_only(self):
+ index1 = self.make_index('1', nodes=[('key', (), '')])
+ index2 = self.make_index('2', nodes=[])
+ index = CombinedGraphIndex([index1, index2])
+ self.assertEqual([('key', (), '')],
+ list(index.iter_entries(['key'])))
+ # and in the other direction
+ index = CombinedGraphIndex([index2, index1])
+ self.assertEqual([('key', (), '')],
+ list(index.iter_entries(['key'])))
+
+ def test_validate_bad_child_index_errors(self):
+ trans = self.get_transport()
+ trans.put_bytes('name', "not an index\n")
+ index1 = GraphIndex(trans, 'name')
+ index = CombinedGraphIndex([index1])
+ self.assertRaises(errors.BadIndexFormatSignature, index.validate)
+
+ def test_validate_empty(self):
+ index = CombinedGraphIndex([])
+ index.validate()
More information about the bazaar-commits
mailing list