Rev 4736: Shave off another 19MB in the test script by making the caches lazy. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-chk-memory
John Arbash Meinel
john at arbash-meinel.com
Thu Oct 8 22:44:04 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-chk-memory
------------------------------------------------------------
revno: 4736
revision-id: john at arbash-meinel.com-20091008214343-bff832zjcytjf1q9
parent: john at arbash-meinel.com-20091008212628-q5oh7rdg7ikvy7jo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-chk-memory
timestamp: Thu 2009-10-08 16:43:43 -0500
message:
Shave off another 19MB in the test script by making the caches lazy.
Point them to None until we actually have use for them.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2009-10-08 21:26:28 +0000
+++ b/bzrlib/inventory.py 2009-10-08 21:43:43 +0000
@@ -960,7 +960,7 @@
descend(self.root, u'')
return accum
- def path2id(self, name):
+ def path2id(self, relpath):
"""Walk down through directories to return entry of last component.
names may be either a list of path components, or a single
@@ -971,10 +971,10 @@
Returns None IFF the path is not found.
"""
- if isinstance(name, basestring):
- name = osutils.splitpath(name)
+ if isinstance(relpath, basestring):
+ relpath = osutils.splitpath(relpath)
- # mutter("lookup path %r" % name)
+ # mutter("lookup path %r" % relpath)
try:
parent = self.root
@@ -983,7 +983,7 @@
return None
if parent is None:
return None
- for f in name:
+ for f in relpath:
try:
children = getattr(parent, 'children', None)
if children is None:
@@ -1511,8 +1511,8 @@
# Note: if just loading all CHKInventory objects, these two empty
# dictionaries end up costing 7MB, because you have 25k
# inventories * 2 dicts * 140bytes for an empty dictionary.
- self._fileid_to_entry_cache = {}
- self._path_to_fileid_cache = {}
+ self._fileid_to_entry_cache = None
+ self._path_to_fileid_cache = None
self._search_key_name = search_key_name
self.root_id = None
@@ -1616,8 +1616,6 @@
interesting.add(None) # this will auto-filter it in the loop
remaining_parents.discard(None)
while remaining_parents:
- if None in remaining_parents:
- import pdb; pdb.set_trace()
next_parents = set()
for entry in self._getitems(remaining_parents):
next_parents.add(entry.parent_id)
@@ -1673,7 +1671,6 @@
try:
remaining_children = collections.deque(parent_to_children[self.root_id])
except:
- import pdb; pdb.set_trace()
raise
while remaining_children:
file_id = remaining_children.popleft()
@@ -1726,6 +1723,8 @@
result.revision = sections[3]
if result.parent_id == '':
result.parent_id = None
+ if self._fileid_to_entry_cache is None:
+ self._fileid_to_entry_cache = {}
self._fileid_to_entry_cache[result.file_id] = result
return result
@@ -1755,7 +1754,9 @@
result = CHKInventory(self._search_key_name)
if propagate_caches:
# Just propagate the path-to-fileid cache for now
- result._path_to_fileid_cache = dict(self._path_to_fileid_cache.iteritems())
+ result._path_to_fileid_cache = dict(self._path_to_fileid_cache)
+ else:
+ result._path_to_fileid_cache = {}
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
self.id_to_entry._ensure_root()
maximum_size = self.id_to_entry._root_node.maximum_size
@@ -2019,7 +2020,10 @@
"""map a single file_id -> InventoryEntry."""
if file_id is None:
raise errors.NoSuchId(self, file_id)
- result = self._fileid_to_entry_cache.get(file_id, None)
+ if self._fileid_to_entry_cache is None:
+ result = None
+ else:
+ result = self._fileid_to_entry_cache.get(file_id, None)
if result is not None:
return result
try:
@@ -2037,22 +2041,25 @@
"""
result = []
remaining = []
- for file_id in file_ids:
- entry = self._fileid_to_entry_cache.get(file_id, None)
- if entry is None:
- remaining.append(file_id)
- else:
- result.append(entry)
+ if self._fileid_to_entry_cache is None:
+ # We know that nothing will be in the cache anyway
+ self._fileid_to_entry_cache = {}
+ else:
+ for file_id in file_ids:
+ entry = self._fileid_to_entry_cache.get(file_id, None)
+ if entry is None:
+ remaining.append(file_id)
+ else:
+ result.append(entry)
file_keys = [(f,) for f in remaining]
for file_key, value in self.id_to_entry.iteritems(file_keys):
entry = self._bytes_to_entry(value)
result.append(entry)
- self._fileid_to_entry_cache[entry.file_id] = entry
return result
def has_id(self, file_id):
# Perhaps have an explicit 'contains' method on CHKMap ?
- if self._fileid_to_entry_cache.get(file_id, None) is not None:
+ if file_id in self._fileid_to_entry_cache:
return True
return len(list(self.id_to_entry.iteritems([(file_id,)]))) == 1
@@ -2082,12 +2089,13 @@
XXX: We may not want to merge this into bzr.dev.
"""
- for key, entry in self.id_to_entry.iteritems():
+ if self._fileid_to_entry_cache is None:
+ self._fileid_to_entry_cache = {}
+ for key, bytes in self.id_to_entry.iteritems():
file_id = key[0]
ie = self._fileid_to_entry_cache.get(file_id, None)
if ie is None:
- ie = self._bytes_to_entry(entry)
- self._fileid_to_entry_cache[file_id] = ie
+ ie = self._bytes_to_entry(bytes)
yield ie
def iter_changes(self, basis):
@@ -2181,7 +2189,6 @@
old_path = None
if self_value is not None:
entry = self._bytes_to_entry(self_value)
- self._fileid_to_entry_cache[file_id] = entry
new_path = self.id2path(file_id)
else:
entry = None
@@ -2189,16 +2196,18 @@
delta.append((old_path, new_path, file_id, entry))
return delta
- def path2id(self, name):
+ def path2id(self, relpath):
"""See CommonInventory.path2id()."""
# TODO: perhaps support negative hits?
- result = self._path_to_fileid_cache.get(name, None)
+ if self._path_to_fileid_cache is None:
+ self._path_to_fileid_cache = {}
+ result = self._path_to_fileid_cache.get(relpath, None)
if result is not None:
return result
- if isinstance(name, basestring):
- names = osutils.splitpath(name)
+ if isinstance(relpath, basestring):
+ names = osutils.splitpath(relpath)
else:
- names = name
+ names = relpath
current_id = self.root_id
if current_id is None:
return None
@@ -2217,7 +2226,7 @@
if file_id is None:
return None
current_id = file_id
- self._path_to_fileid_cache[name] = current_id
+ self._path_to_fileid_cache[relpath] = current_id
return current_id
def to_lines(self):
@@ -2278,28 +2287,15 @@
if self._chk_inventory.parent_id_basename_to_file_id is None:
raise AssertionError("Inventories without"
" parent_id_basename_to_file_id are no longer supported")
- result = {}
# XXX: Todo - use proxy objects for the children rather than loading
# all when the attribute is referenced.
parent_id_index = self._chk_inventory.parent_id_basename_to_file_id
- child_keys = set()
+ child_ids = set()
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
key_filter=[(self.file_id,)]):
- child_keys.add((file_id,))
- cached = set()
- for file_id_key in child_keys:
- entry = self._chk_inventory._fileid_to_entry_cache.get(
- file_id_key[0], None)
- if entry is not None:
- result[entry.name] = entry
- cached.add(file_id_key)
- child_keys.difference_update(cached)
- # populate; todo: do by name
- id_to_entry = self._chk_inventory.id_to_entry
- for file_id_key, bytes in id_to_entry.iteritems(child_keys):
- entry = self._chk_inventory._bytes_to_entry(bytes)
- result[entry.name] = entry
- self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry
+ child_ids.add(file_id)
+ child_entries = self._chk_inventory._getitems(child_ids)
+ result = dict([(entry.name, entry) for entry in child_entries])
self._children = result
return result
More information about the bazaar-commits
mailing list