Rev 2344: Rewrite the inner parsing loop, needs performance testing. in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate
John Arbash Meinel
john at arbash-meinel.com
Wed Feb 21 20:17:31 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate
------------------------------------------------------------
revno: 2344
revision-id: john at arbash-meinel.com-20070221201710-siq76rbcfpj68gqx
parent: robertc at robertcollins.net-20070220200447-pvv3g67iyk2vro42
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Wed 2007-02-21 14:17:10 -0600
message:
Rewrite the inner parsing loop, needs performance testing.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-02-20 20:04:47 +0000
+++ b/bzrlib/dirstate.py 2007-02-21 20:17:10 +0000
@@ -224,6 +224,12 @@
# of using int conversion rather than a dict here. AND BLAME ANDREW IF
# it is faster.
+ # TODO: jam 20070221 Make sure we handle when there are duplicated records
+ # (like when we remove + add the same path, or we have a rename)
+ # TODO: jam 20070221 Figure out what to do if we have a record that exceeds
+ # the BISECT_PAGE_SIZE.
+ BISECT_PAGE_SIZE = 4096
+
NOT_IN_MEMORY = 0
IN_MEMORY_UNMODIFIED = 1
IN_MEMORY_MODIFIED = 2
@@ -435,6 +441,19 @@
entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]
return '\0'.join(entire_entry)
+ def _fields_per_row(self):
+ """How many null separated fields should be in each entry row.
+
+ Each line now has an extra '\n' field which is not used
+ so we just skip over it
+ entry size:
+ 3 fields for the key
+ + number of fields per tree_data (5) * tree count
+ + newline
+ """
+ tree_count = 1 + self._num_present_parents()
+ return 3 + 5 * tree_count + 1
+
def _find_block(self, key, add_if_missing=False):
"""Return the block that key should be present in.
@@ -605,6 +624,91 @@
"""Create a line for the state file for parents information."""
return '\0'.join([str(len(parent_ids))] + parent_ids)
+ def _get_fields_to_entry(self):
+ """Get a function which converts entry fields into a entry record.
+
+ This handles kind, size, and executable, as well as parent records.
+
+ :return: A function which takes a list of fields, and returns an
+ appropriate record for storing in memory.
+ """
+ # This is intentionally unrolled for performance
+ num_present_parents = self._num_present_parents()
+ if num_present_parents == 0:
+ def fields_to_entry_0_parents(fields, _int=int, _tuple=tuple,
+ _mini_to_kind=self._minikind_to_kind):
+ path_name_file_id_key = _tuple(fields[:3])
+ return (path_name_file_id_key, [
+ ( # Current tree
+ _mini_to_kind[fields[3]], # kind
+ fields[4], # fingerprint
+ _int(fields[5]), # size
+ fields[6] == 'y', # executable
+ fields[7], # packed_stat or revision_id
+ )])
+ return fields_to_entry_0_parents
+ elif num_present_parents == 1:
+ def fields_to_entry_1_parent(fields, _int=int, _tuple=tuple,
+ _mini_to_kind=self._minikind_to_kind):
+ path_name_file_id_key = _tuple(fields[:3])
+ return (path_name_file_id_key, [
+ ( # Current tree
+ _mini_to_kind[fields[3]], # kind
+ fields[4], # fingerprint
+ _int(fields[5]), # size
+ fields[6] == 'y', # executable
+ fields[7], # packed_stat or revision_id
+ ),
+ ( # Parent 1
+ _mini_to_kind[fields[8]], # kind
+ fields[9], # fingerprint
+ _int(fields[10]), # size
+ fields[11] == 'y', # executable
+ fields[12], # packed_stat or revision_id
+ ),
+ ])
+ return fields_to_entry_1_parent
+ elif num_present_parents == 2:
+ def fields_to_entry_2_parents(fields, _int=int, _tuple=tuple,
+ _mini_to_kind=self._minikind_to_kind):
+ path_name_file_id_key = _tuple(fields[:3])
+ return (path_name_file_id_key, [
+ ( # Current tree
+ _mini_to_kind[fields[3]], # kind
+ fields[4], # fingerprint
+ _int(fields[5]), # size
+ fields[6] == 'y', # executable
+ fields[7], # packed_stat or revision_id
+ ),
+ ( # Parent 1
+ _mini_to_kind[fields[8]], # kind
+ fields[9], # fingerprint
+ _int(fields[10]), # size
+ fields[11] == 'y', # executable
+ fields[12], # packed_stat or revision_id
+ ),
+ ( # Parent 2
+ _mini_to_kind[fields[13]],# kind
+ fields[14], # fingerprint
+ _int(fields[15]), # size
+ fields[16] == 'y', # executable
+ fields[17], # packed_stat or revision_id
+ ),
+ ])
+ return fields_to_entry_2_parents
+ else:
+ def fields_to_entry_n_parents(fields, _int=int, _tuple=tuple,
+ _mini_to_kind=self._minikind_to_kind):
+ path_name_file_id_key = _tuple(fields[:3])
+ trees = [(_mini_to_kind[fields[cur]], # kind
+ fields[cur+1], # fingerprint
+ _int(fields[cur+2]), # size
+ fields[cur+3] == 'y', # executable
+ fields[cur+4], # stat or revision_id
+ ) for cur in xrange(3, len(fields)-1, 5)]
+ return path_name_file_id_key, trees
+ return fields_to_entry_n_parents
+
def get_parent_ids(self):
"""Return a list of the parent tree ids for the directory state."""
self._read_header_if_needed()
@@ -799,6 +903,10 @@
return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,
''), parents
+ def _num_present_parents(self):
+ """The number of parent entries in each record row."""
+ return len(self._parents) - len(self._ghosts)
+
@staticmethod
def on_file(path):
"""Construct a DirState on the file at path path."""
@@ -834,9 +942,9 @@
# 3 fields for the key
# + number of fields per tree_data (5) * tree count
# + newline
- num_present_parents = len(self._parents) - len(self._ghosts)
+ num_present_parents = self._num_present_parents()
tree_count = 1 + num_present_parents
- entry_size = 3 + 5 * tree_count + 1
+ entry_size = self._fields_per_row()
expected_field_count = entry_size * self._num_entries
if len(fields) - cur > expected_field_count:
fields = fields[:expected_field_count + cur]
@@ -850,51 +958,10 @@
field_count - cur, expected_field_count, entry_size,
self._num_entries, fields)
- # Fast path the case where there are 1 or 2 parents
- if num_present_parents == 0:
- # key, [current tree]
- entries = [(tuple(fields[pos:pos + 3]), [fields[pos + 3:pos + 8]])
- for pos in xrange(cur, field_count, entry_size)]
- elif num_present_parents == 1:
- # key,
- entries = [(tuple(fields[pos:pos + 3]),
- # [current tree, parent 1]
- [fields[pos + 3:pos + 8], fields[pos + 8:pos + 13], ])
- for pos in xrange(cur, field_count, entry_size)]
- elif num_present_parents == 2:
- # key,
- entries = [(tuple(fields[pos:pos + 3]),
- # [current tree, parent 1,
- [fields[pos + 3:pos + 8], fields[pos + 8:pos + 13],
- # parent 2]
- fields[pos + 13:pos + 18], ])
- for pos in xrange(cur, field_count, entry_size)]
- else:
- entries = [(
- tuple(fields[pos:pos+3]), #key
- tuple([fields[chunk:chunk+5] for
- chunk in xrange(pos + 3, pos+entry_size-1, 5)]))
- for pos in xrange(cur, field_count, entry_size)
- ]
-
- assert len(entries) == self._num_entries, '%s != %s entries' % (len(entries),
- self._num_entries)
-
- def _line_to_entry(line):
- """Convert freshly read tree details to the final form.
-
- This converts size and minikind for use and makes it into a
- tuple.
- """
- for tree in line[1]:
- # convert the minikind to kind
- tree[0] = self._minikind_to_kind[tree[0]]
- # convert the size to an int
- tree[2] = int(tree[2])
- tree[3] = tree[3] == 'y'
- return line[0], map(tuple, line[1])
- new_entries = map(_line_to_entry, entries)
- self._entries_to_current_state(new_entries)
+ fields_to_entry = self._get_fields_to_entry()
+ entries = [fields_to_entry(fields[pos:pos+entry_size])
+ for pos in xrange(cur, field_count, entry_size)]
+ self._entries_to_current_state(entries)
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
def _read_header(self):
More information about the bazaar-commits
mailing list