Rev 2614: Node references are byte offsets. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Fri Jul 13 09:10:11 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2614
revision-id: robertc at robertcollins.net-20070713081009-uouct3cvz4dz1rtl
parent: robertc at robertcollins.net-20070713072951-zyno1jr1tjyo819y
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-07-13 18:10:09 +1000
message:
Node references are byte offsets.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-07-13 07:28:18 +0000
+++ b/bzrlib/index.py 2007-07-13 08:10:09 +0000
@@ -83,10 +83,56 @@
def finish(self):
lines = [_SIGNATURE]
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
+ prefix_length = len(lines[0]) + len(lines[1])
+ # references are byte offsets. To avoid having to do nasty
+ # polynomial work to resolve offsets (references to later in the
+ # file cannot be determined until all the inbetween references have
+ # been calculated too) we pad the offsets with 0's to make them be
+ # of consistent length. Using binary offsets would break the trivial
+ # file parsing.
+ # to calculate the width of zero's needed we do three passes:
+ # one to gather all the non-reference data and the number of references.
+ # one to pad all the data with reference-length and determine entry
+ # addresses.
+ # One to serialise.
+ non_ref_bytes = prefix_length
+ total_references = 0
+ # XXX: support 'a' field here.
+ for key, (references, value) in sorted(self._nodes.items(),reverse=True):
+ # key is literal, value is literal, there are 3 null's, 1 NL
+ # (ref_lists -1) tabs, (ref-1 cr's per ref_list)
+ non_ref_bytes += len(key) + 3 + 1 + self.reference_lists - 1
+ for ref_list in references:
+ # how many references across the whole file?
+ total_references += len(ref_list)
+ # accrue reference separators
+ non_ref_bytes += len(ref_list) - 1
+ # how many digits are needed to represent the total byte count?
+ digits = 1
+ possible_total_bytes = non_ref_bytes + total_references*digits
+ while 10 ** digits < possible_total_bytes:
+ digits += 1
+ possible_total_bytes = non_ref_bytes + total_references*digits
+ # resolve key addresses.
+ key_addresses = {}
+ current_offset = prefix_length
+ for key, (references, value) in sorted(self._nodes.items(),reverse=True):
+ key_addresses[key] = current_offset
+ current_offset += len(key) + 3 + 1 + self.reference_lists - 1
+ for ref_list in references:
+ # accrue reference separators
+ current_offset += len(ref_list) - 1
+ # accrue reference bytes
+ current_offset += digits * len(ref_list)
+ # serialise
+ format_string = '%%0%sd' % digits
for key, (references, value) in sorted(self._nodes.items(),reverse=True):
flattened_references = []
for ref_list in references:
- flattened_references.append('')
+ ref_addresses = []
+ for reference in ref_list:
+ ref_addresses.append(format_string % key_addresses[reference])
+ flattened_references.append('\r'.join(ref_addresses))
lines.append("%s\0\0%s\0%s\n" % (key,
'\t'.join(flattened_references), value))
lines.append('\n')
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2007-07-13 07:29:51 +0000
+++ b/bzrlib/tests/test_index.py 2007-07-13 08:10:09 +0000
@@ -92,6 +92,17 @@
"key\0\0\t\0data\n"
"\n", contents)
+ def test_node_references_are_byte_offsets(self):
+ builder = GraphIndexBuilder(reference_lists=1)
+ builder.add_node('reference', ([], ), 'data')
+ builder.add_node('key', (['reference'], ), 'data')
+ stream = builder.finish()
+ contents = stream.read()
+ self.assertEqual("Bazaar Graph Index 1\nnode_ref_lists=1\n"
+ "reference\0\0\0data\n"
+ "key\0\x0038\0data\n"
+ "\n", contents)
+
def test_add_node_bad_key(self):
builder = GraphIndexBuilder()
for bad_char in '\t\n\x0b\x0c\r\x00 ':
More information about the bazaar-commits
mailing list