Rev 5853: Document the split_dirs reason. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-uncommit-faster
John Arbash Meinel
john at arbash-meinel.com
Wed May 11 01:58:57 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-uncommit-faster
------------------------------------------------------------
revno: 5853
revision-id: john at arbash-meinel.com-20110511015847-qi4i70gertbq6lj2
parent: john at arbash-meinel.com-20110510151545-ifc1h4a5yc3pg0wu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-uncommit-faster
timestamp: Wed 2011-05-11 03:58:47 +0200
message:
Document the split_dirs reason.
Shave another 3-500ms off by turning more tuples into StaticTuples.
Direct times with a pre-warmed RevisionTree is ~2.4s down to 2.0s
for set_parent_trees().
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2011-05-10 15:15:45 +0000
+++ b/bzrlib/dirstate.py 2011-05-11 01:58:47 +0000
@@ -2141,7 +2141,8 @@
executable = False
else:
raise Exception("can't pack %s" % inv_entry)
- return (minikind, fingerprint, size, executable, tree_data)
+ return static_tuple.StaticTuple(minikind, fingerprint, size,
+ executable, tree_data)
def _iter_child_entries(self, tree_index, path_utf8):
"""Iterate over all the entries that are children of path_utf.
@@ -2525,6 +2526,7 @@
parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]
# how many trees do we end up with
parent_count = len(parent_trees)
+ st = static_tuple.StaticTuple
# one: the current tree
for entry in self._iter_entries():
@@ -2547,6 +2549,7 @@
# the suffix is from tree_index+1:parent_count+1.
new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
# now stitch in all the entries from this tree
+ last_dirname = None
for path, entry in tree.iter_entries_by_dir():
# here we process each trees details for each item in the tree.
# we first update any existing entries for the id at other paths,
@@ -2560,10 +2563,16 @@
file_id = entry.file_id
path_utf8 = path.encode('utf8')
dirname, basename = osutils.split(path_utf8)
- new_entry_key = (dirname, basename, file_id)
+ if dirname == last_dirname:
+ # Try to re-use objects as much as possible
+ dirname = last_dirname
+ else:
+ last_dirname = dirname
+ new_entry_key = st(dirname, basename, file_id)
# tree index consistency: All other paths for this id in this tree
# index must point to the correct path.
- for entry_key in id_index.get(file_id, ()):
+ entry_keys = id_index.get(file_id, ())
+ for entry_key in entry_keys:
# TODO:PROFILING: It might be faster to just update
# rather than checking if we need to, and then overwrite
# the one we are located at.
@@ -2572,12 +2581,14 @@
# other trees, so put absent pointers there
# This is the vertical axis in the matrix, all pointing
# to the real path.
- by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
- # by path consistency: Insert into an existing path record (trivial), or
- # add a new one with relocation pointers for the other tree indexes.
- entry_keys = id_index.get(file_id, ())
+ by_path[entry_key][tree_index] = st('r', path_utf8, 0,
+ False, '')
+ # by path consistency: Insert into an existing path record
+ # (trivial), or add a new one with relocation pointers for the
+ # other tree indexes.
if new_entry_key in entry_keys:
- # there is already an entry where this data belongs, just insert it.
+ # there is already an entry where this data belongs, just
+ # insert it.
by_path[new_entry_key][tree_index] = \
self._inv_entry_to_details(entry)
else:
@@ -2593,17 +2604,16 @@
new_details.append(DirState.NULL_PARENT_DETAILS)
else:
# grab any one entry, use it to find the right path.
- # TODO: optimise this to reduce memory use in highly
- # fragmented situations by reusing the relocation
- # records.
a_key = iter(entry_keys).next()
if by_path[a_key][lookup_index][0] in ('r', 'a'):
- # its a pointer or missing statement, use it as is.
+ # its a pointer or missing statement, use it as
+ # is.
new_details.append(by_path[a_key][lookup_index])
else:
# we have the right key, make a pointer to it.
real_path = ('/'.join(a_key[0:2])).strip('/')
- new_details.append(('r', real_path, 0, False, ''))
+ new_details.append(st('r', real_path, 0, False,
+ ''))
new_details.append(self._inv_entry_to_details(entry))
new_details.extend(new_location_suffix)
by_path[new_entry_key] = new_details
@@ -2625,6 +2635,10 @@
try to keep everything in sorted blocks all the time, but sometimes
it's easier to sort after the fact.
"""
+ # When sorting, we usually have 10x more entries than directories. (69k
+ # total entries, 4k directories). So cache the results of splitting.
+ # Saving time and objects. Also, use StaticTuple to avoid putting all
+ # of these object into python's garbage collector.
split_dirs = {}
def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple):
# sort by: directory parts, file name, file id
More information about the bazaar-commits
mailing list