Rev 4736: Shave off another 19MB in the test script by making the caches lazy. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-chk-memory

Thu Oct 8 22:44:04 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/2.1-chk-memory

------------------------------------------------------------
revno: 4736
revision-id: john at arbash-meinel.com-20091008214343-bff832zjcytjf1q9
parent: john at arbash-meinel.com-20091008212628-q5oh7rdg7ikvy7jo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-chk-memory
timestamp: Thu 2009-10-08 16:43:43 -0500
message:
  Shave off another 19MB in the test script by making the caches lazy.
  
  Point them to None until we actually have use for them.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'

--- a/bzrlib/inventory.py	2009-10-08 21:26:28 +0000
+++ b/bzrlib/inventory.py	2009-10-08 21:43:43 +0000
@@ -960,7 +960,7 @@
         descend(self.root, u'')
         return accum
 
-    def path2id(self, name):
+    def path2id(self, relpath):
         """Walk down through directories to return entry of last component.
 
         names may be either a list of path components, or a single
@@ -971,10 +971,10 @@
 
         Returns None IFF the path is not found.
         """
-        if isinstance(name, basestring):
-            name = osutils.splitpath(name)
+        if isinstance(relpath, basestring):
+            relpath = osutils.splitpath(relpath)
 
-        # mutter("lookup path %r" % name)
+        # mutter("lookup path %r" % relpath)
 
         try:
             parent = self.root
@@ -983,7 +983,7 @@
             return None
         if parent is None:
             return None
-        for f in name:
+        for f in relpath:
             try:
                 children = getattr(parent, 'children', None)
                 if children is None:
@@ -1511,8 +1511,8 @@
         # Note: if just loading all CHKInventory objects, these two empty
         #       dictionaries end up costing 7MB, because you have 25k
         #       inventories * 2 dicts * 140bytes for an empty dictionary.
-        self._fileid_to_entry_cache = {}
-        self._path_to_fileid_cache = {}
+        self._fileid_to_entry_cache = None
+        self._path_to_fileid_cache = None
         self._search_key_name = search_key_name
         self.root_id = None
 
@@ -1616,8 +1616,6 @@
         interesting.add(None) # this will auto-filter it in the loop
         remaining_parents.discard(None) 
         while remaining_parents:
-            if None in remaining_parents:
-                import pdb; pdb.set_trace()
             next_parents = set()
             for entry in self._getitems(remaining_parents):
                 next_parents.add(entry.parent_id)
@@ -1673,7 +1671,6 @@
         try:
             remaining_children = collections.deque(parent_to_children[self.root_id])
         except:
-            import pdb; pdb.set_trace()
             raise
         while remaining_children:
             file_id = remaining_children.popleft()
@@ -1726,6 +1723,8 @@
         result.revision = sections[3]
         if result.parent_id == '':
             result.parent_id = None
+        if self._fileid_to_entry_cache is None:
+            self._fileid_to_entry_cache = {}
         self._fileid_to_entry_cache[result.file_id] = result
         return result
 
@@ -1755,7 +1754,9 @@
         result = CHKInventory(self._search_key_name)
         if propagate_caches:
             # Just propagate the path-to-fileid cache for now
-            result._path_to_fileid_cache = dict(self._path_to_fileid_cache.iteritems())
+            result._path_to_fileid_cache = dict(self._path_to_fileid_cache)
+        else:
+            result._path_to_fileid_cache = {}
         search_key_func = chk_map.search_key_registry.get(self._search_key_name)
         self.id_to_entry._ensure_root()
         maximum_size = self.id_to_entry._root_node.maximum_size
@@ -2019,7 +2020,10 @@
         """map a single file_id -> InventoryEntry."""
         if file_id is None:
             raise errors.NoSuchId(self, file_id)
-        result = self._fileid_to_entry_cache.get(file_id, None)
+        if self._fileid_to_entry_cache is None:
+            result = None
+        else:
+            result = self._fileid_to_entry_cache.get(file_id, None)
         if result is not None:
             return result
         try:
@@ -2037,22 +2041,25 @@
         """
         result = []
         remaining = []
-        for file_id in file_ids:
-            entry = self._fileid_to_entry_cache.get(file_id, None)
-            if entry is None:
-                remaining.append(file_id)
-            else:
-                result.append(entry)
+        if self._fileid_to_entry_cache is None:
+            # We know that nothing will be in the cache anyway
+            self._fileid_to_entry_cache = {}
+        else:
+            for file_id in file_ids:
+                entry = self._fileid_to_entry_cache.get(file_id, None)
+                if entry is None:
+                    remaining.append(file_id)
+                else:
+                    result.append(entry)
         file_keys = [(f,) for f in remaining]
         for file_key, value in self.id_to_entry.iteritems(file_keys):
             entry = self._bytes_to_entry(value)
             result.append(entry)
-            self._fileid_to_entry_cache[entry.file_id] = entry
         return result
 
     def has_id(self, file_id):
         # Perhaps have an explicit 'contains' method on CHKMap ?
-        if self._fileid_to_entry_cache.get(file_id, None) is not None:
+        if file_id in self._fileid_to_entry_cache:
             return True
         return len(list(self.id_to_entry.iteritems([(file_id,)]))) == 1
 
@@ -2082,12 +2089,13 @@
 
         XXX: We may not want to merge this into bzr.dev.
         """
-        for key, entry in self.id_to_entry.iteritems():
+        if self._fileid_to_entry_cache is None:
+            self._fileid_to_entry_cache = {}
+        for key, bytes in self.id_to_entry.iteritems():
             file_id = key[0]
             ie = self._fileid_to_entry_cache.get(file_id, None)
             if ie is None:
-                ie = self._bytes_to_entry(entry)
-                self._fileid_to_entry_cache[file_id] = ie
+                ie = self._bytes_to_entry(bytes)
             yield ie
 
     def iter_changes(self, basis):
@@ -2181,7 +2189,6 @@
                 old_path = None
             if self_value is not None:
                 entry = self._bytes_to_entry(self_value)
-                self._fileid_to_entry_cache[file_id] = entry
                 new_path = self.id2path(file_id)
             else:
                 entry = None
@@ -2189,16 +2196,18 @@
             delta.append((old_path, new_path, file_id, entry))
         return delta
 
-    def path2id(self, name):
+    def path2id(self, relpath):
         """See CommonInventory.path2id()."""
         # TODO: perhaps support negative hits?
-        result = self._path_to_fileid_cache.get(name, None)
+        if self._path_to_fileid_cache is None:
+            self._path_to_fileid_cache = {}
+        result = self._path_to_fileid_cache.get(relpath, None)
         if result is not None:
             return result
-        if isinstance(name, basestring):
-            names = osutils.splitpath(name)
+        if isinstance(relpath, basestring):
+            names = osutils.splitpath(relpath)
         else:
-            names = name
+            names = relpath
         current_id = self.root_id
         if current_id is None:
             return None
@@ -2217,7 +2226,7 @@
             if file_id is None:
                 return None
             current_id = file_id
-        self._path_to_fileid_cache[name] = current_id
+        self._path_to_fileid_cache[relpath] = current_id
         return current_id
 
     def to_lines(self):
@@ -2278,28 +2287,15 @@
         if self._chk_inventory.parent_id_basename_to_file_id is None:
             raise AssertionError("Inventories without"
                 " parent_id_basename_to_file_id are no longer supported")
-        result = {}
         # XXX: Todo - use proxy objects for the children rather than loading
         # all when the attribute is referenced.
         parent_id_index = self._chk_inventory.parent_id_basename_to_file_id
-        child_keys = set()
+        child_ids = set()
         for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
             key_filter=[(self.file_id,)]):
-            child_keys.add((file_id,))
-        cached = set()
-        for file_id_key in child_keys:
-            entry = self._chk_inventory._fileid_to_entry_cache.get(
-                file_id_key[0], None)
-            if entry is not None:
-                result[entry.name] = entry
-                cached.add(file_id_key)
-        child_keys.difference_update(cached)
-        # populate; todo: do by name
-        id_to_entry = self._chk_inventory.id_to_entry
-        for file_id_key, bytes in id_to_entry.iteritems(child_keys):
-            entry = self._chk_inventory._bytes_to_entry(bytes)
-            result[entry.name] = entry
-            self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry
+            child_ids.add(file_id)
+        child_entries = self._chk_inventory._getitems(child_ids)
+        result = dict([(entry.name, entry) for entry in child_entries])
         self._children = result
         return result