Rev 4475: Assert that entries in the annotation cache also get cleaned up. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

Thu Jun 18 22:02:22 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate

------------------------------------------------------------
revno: 4475
revision-id: john at arbash-meinel.com-20090618210201-o1dwswy4e6x7fpmb
parent: john at arbash-meinel.com-20090618204840-x1q1wiilwdmjh94q
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Thu 2009-06-18 16:02:01 -0500
message:
  Assert that entries in the annotation cache also get cleaned up.
  This actually drops peak mem by almost 100MB, I wouldn't think the annotations
  would be that expensive...
  Anyway, texts themselves are still held by the VF stream, so we only save
  for other things that we might be caching.
-------------- next part --------------
=== modified file 'bzrlib/_annotator_py.py'

--- a/bzrlib/_annotator_py.py	2009-06-18 20:48:40 +0000
+++ b/bzrlib/_annotator_py.py	2009-06-18 21:02:01 +0000
@@ -22,6 +22,7 @@
     graph as _mod_graph,
     osutils,
     patiencediff,
+    ui,
     )
 
 
@@ -60,7 +61,7 @@
         keys = parent_map.keys()
         return keys
 
-    def _get_needed_texts(self, key):
+    def _get_needed_texts(self, key, pb=None):
         """Get the texts we need to properly annotate key.
 
         :param key: A Key that is present in self._vf
@@ -70,7 +71,12 @@
             future improvements may change this to a simple text string.
         """
         keys = self._get_needed_keys(key)
-        for record in self._vf.get_record_stream(keys, 'topological', True):
+        if pb is not None:
+            pb.update('getting stream', 0, len(keys))
+        stream  = self._vf.get_record_stream(keys, 'topological', True)
+        for idx, record in enumerate(stream):
+            if pb is not None:
+                pb.update('extracting', 0, len(keys))
             this_key = record.key
             lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
             num_lines = len(lines)
@@ -118,6 +124,9 @@
         # TODO: consider making all annotations unique and then using 'is'
         #       everywhere. Current results claim that isn't any faster,
         #       because of the time spent deduping
+        #       deduping also saves a bit of memory. For NEWS it saves ~1MB,
+        #       but that is out of 200-300MB for extracting everything, so a
+        #       fairly trivial amount
         for parent_idx, lines_idx, match_len in matching_blocks:
             # For lines which match this parent, we will now resolve whether
             # this parent wins over the current annotation
@@ -166,6 +175,7 @@
             num -= 1
             if num == 0:
                 del self._text_cache[parent_key]
+                del self._annotations_cache[parent_key]
                 # Do we want to clean up _num_needed_children at this point as
                 # well?
             self._num_needed_children[parent_key] = num
@@ -173,17 +183,21 @@
     def annotate(self, key):
         """Return annotated fulltext for the given key."""
         keys = self._get_needed_texts(key)
-        for text_key, text, num_lines in self._get_needed_texts(key):
-            (this_annotation,
-             annotations) = self._init_annotations(text_key, num_lines)
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            for text_key, text, num_lines in self._get_needed_texts(key, pb=pb):
+                (this_annotation,
+                 annotations) = self._init_annotations(text_key, num_lines)
 
-            parent_keys = self._parent_map[text_key]
-            if parent_keys:
-                self._update_from_one_parent(annotations, text, parent_keys[0])
-                for parent in parent_keys[1:]:
-                    self._update_from_other_parents(annotations, text,
-                                                    this_annotation, parent)
-            self._record_annotation(text_key, parent_keys, annotations)
+                parent_keys = self._parent_map[text_key]
+                if parent_keys:
+                    self._update_from_one_parent(annotations, text, parent_keys[0])
+                    for parent in parent_keys[1:]:
+                        self._update_from_other_parents(annotations, text,
+                                                        this_annotation, parent)
+                self._record_annotation(text_key, parent_keys, annotations)
+        finally:
+            pb.finished()
         try:
             annotations = self._annotations_cache[key]
         except KeyError:

=== modified file 'bzrlib/tests/test__annotator.py'
--- a/bzrlib/tests/test__annotator.py	2009-06-18 20:35:31 +0000
+++ b/bzrlib/tests/test__annotator.py	2009-06-18 21:02:01 +0000
@@ -268,6 +268,7 @@
                           self.ff_key: 1,
                          }, self.ann._num_needed_children)
         self.assertTrue(self.fa_key in self.ann._text_cache)
+        self.assertTrue(self.fa_key in self.ann._annotations_cache)
         self.ann._record_annotation(self.fd_key, [self.fb_key, self.fc_key], [])
         self.assertEqual({self.fa_key: 2,
                           self.fb_key: 0,
@@ -277,5 +278,8 @@
                           self.ff_key: 1,
                          }, self.ann._num_needed_children)
         self.assertTrue(self.fa_key in self.ann._text_cache)
+        self.assertTrue(self.fa_key in self.ann._annotations_cache)
         self.assertFalse(self.fb_key in self.ann._text_cache)
+        self.assertFalse(self.fa_key in self.ann._annotations_cache)
         self.assertFalse(self.fc_key in self.ann._text_cache)
+        self.assertFalse(self.fa_key in self.ann._annotations_cache)