Rev 2989: Refactor fetch's xml inventory parsing into a core routine that extracts the data and a separate one that filters for fetch. in http://people.ubuntu.com/~robertc/baz2.0/reconcile

Robert Collins robertc at robertcollins.net
Wed Nov 14 02:25:43 GMT 2007


At http://people.ubuntu.com/~robertc/baz2.0/reconcile

------------------------------------------------------------
revno: 2989
revision-id:robertc at robertcollins.net-20071114022530-prb0wv25n7os0ee1
parent: pqm at pqm.ubuntu.com-20071114000124-2rkxwrcwid2wgcqn
committer: Robert Collins <robertc at robertcollins.net>
branch nick: find_text_key_references
timestamp: Wed 2007-11-14 13:25:30 +1100
message:
  Refactor fetch's xml inventory parsing into a core routine that extracts the data and a separate one that filters for fetch.
modified:
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-11-09 17:50:31 +0000
+++ b/bzrlib/repository.py	2007-11-14 02:25:30 +0000
@@ -1055,20 +1055,18 @@
                                                          signature,
                                                          self.get_transaction())
 
-    def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
-        revision_ids):
-        """Helper routine for fileids_altered_by_revision_ids.
+    def _find_text_key_references_from_xml_inventory_lines(self,
+        line_iterator):
+        """Core routine for extracting references to texts from inventories.
 
         This performs the translation of xml lines to revision ids.
 
         :param line_iterator: An iterator of lines, origin_version_id
-        :param revision_ids: The revision ids to filter for. This should be a
-            set or other type which supports efficient __contains__ lookups, as
-            the revision id from each parsed line will be looked up in the
-            revision_ids filter.
-        :return: a dictionary mapping altered file-ids to an iterable of
-        revision_ids. Each altered file-ids has the exact revision_ids that
-        altered it listed explicitly.
+        :return: A dictionary mapping text keys ((fileid, revision_id) tuples)
+            to whether they were referred to by the inventory of the
+            revision_id that they contain. Note that if that revision_id was
+            not part of the line_iterator's output then False will be given -
+            even though it may actually refer to that key.
         """
         result = {}
 
@@ -1115,15 +1113,46 @@
                 unescape_revid_cache[revision_id] = unescaped
                 revision_id = unescaped
 
+            # Note that unescaping always means that on a fulltext cached
+            # inventory we deserialised every fileid, which for general 'pull'
+            # is not great, but we don't really want to have some many
+            # fulltexts that this matters anyway. RBC 20071114.
+            try:
+                file_id = unescape_fileid_cache[file_id]
+            except KeyError:
+                unescaped = unescape(file_id)
+                unescape_fileid_cache[file_id] = unescaped
+                file_id = unescaped
+
+            key = (file_id, revision_id)
+            setdefault(key, False)
+            if revision_id == version_id:
+                result[key] = True
+        return result
+
+    def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
+        revision_ids):
+        """Helper routine for fileids_altered_by_revision_ids.
+
+        This performs the translation of xml lines to revision ids.
+
+        :param line_iterator: An iterator of lines, origin_version_id
+        :param revision_ids: The revision ids to filter for. This should be a
+            set or other type which supports efficient __contains__ lookups, as
+            the revision id from each parsed line will be looked up in the
+            revision_ids filter.
+        :return: a dictionary mapping altered file-ids to an iterable of
+        revision_ids. Each altered file-ids has the exact revision_ids that
+        altered it listed explicitly.
+        """
+        result = {}
+        setdefault = result.setdefault
+        for file_id, revision_id in \
+            self._find_text_key_references_from_xml_inventory_lines(
+                line_iterator).iterkeys():
             # once data is all ensured-consistent; then this is
             # if revision_id == version_id
             if revision_id in revision_ids:
-                try:
-                    file_id = unescape_fileid_cache[file_id]
-                except KeyError:
-                    unescaped = unescape(file_id)
-                    unescape_fileid_cache[file_id] = unescaped
-                    file_id = unescaped
                 setdefault(file_id, set()).add(revision_id)
         return result
 



More information about the bazaar-commits mailing list