Rev 1: Initial work on some simple prototypes for how we could store in http://bzr.arbash-meinel.com/plugins/per_file_graph

John Arbash Meinel john at arbash-meinel.com
Thu Oct 29 15:18:43 GMT 2009


At http://bzr.arbash-meinel.com/plugins/per_file_graph

------------------------------------------------------------
revno: 1
revision-id: john at arbash-meinel.com-20091029151830-qn01ubl669owb7wm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: per_file_graph
timestamp: Thu 2009-10-29 10:18:30 -0500
message:
  Initial work on some simple prototypes for how we could store
  the per-file graphs.
-------------- next part --------------
=== added file '__init__.py'
--- a/__init__.py	1970-01-01 00:00:00 +0000
+++ b/__init__.py	2009-10-29 15:18:30 +0000
@@ -0,0 +1,63 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Prototype of various storage methods for handling the per-file graph."""
+
+from bzrlib import (
+    commands,
+    errors,
+    option,
+    registry
+    )
+
+format_registry = registry.Registry()
+format_registry.register_lazy('simple-btree',
+    'bzrlib.plugins.per_file_graph.simple_btree', 'SimpleBTreeBuilder',
+    help='a regular BTreeGraphIndex with obvious layout')
+format_registry.register_lazy('simple-btree-8k',
+    'bzrlib.plugins.per_file_graph.simple_btree', 'SimpleBTreeBuilder8k',
+    help='a regular BTreeGraphIndex with 8k pages')
+format_registry.register_lazy('simple-btree-16k',
+    'bzrlib.plugins.per_file_graph.simple_btree', 'SimpleBTreeBuilder16k',
+    help='a regular BTreeGraphIndex with 16k pages')
+format_registry.default_key = 'simple-btree'
+
+
+class cmd_build_per_file_graph(commands.Command):
+    """Build a per-file-graph in a given format."""
+
+    takes_options = [option.RegistryOption('format',
+                     help='Create the per-file-graph in the given format.',
+                     value_switches=True,
+                     title='Possible Formats',
+                     registry=format_registry),
+                    ]
+    takes_args = ['source', 'target']
+
+    def run(self, source, target, format=None):
+        from bzrlib import repository
+        if format is None:
+            format = format_registry.get()
+        repo = repository.Repository.open(source)
+        repo.lock_read()
+        try:
+            graph_builder = format(target, repo)
+            graph_builder.build()
+        finally:
+            repo.unlock()
+
+
+commands.register_command(cmd_build_per_file_graph)

=== added file 'builder.py'
--- a/builder.py	1970-01-01 00:00:00 +0000
+++ b/builder.py	2009-10-29 15:18:30 +0000
@@ -0,0 +1,46 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Basic interface for builders"""
+
+from bzrlib import osutils, transport, ui
+
+
+class Builder(object):
+
+    def __init__(self, target_filename, repo):
+        self.target_filename = target_filename
+        self.repo = repo
+        dirname, filename = osutils.split(target_filename)
+        self.transport = transport.get_transport(dirname)
+        self.filename = filename
+
+    def get_per_file_graph(self):
+        """Return the per-file graph from this repository."""
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            pb.update('Finding text keys')
+            keys = self.repo.texts.keys()
+            pb.update('Found %d text keys, getting parent map' % (len(keys),))
+            pm = self.repo.texts.get_parent_map(keys)
+            pb.update('loaded parent map')
+        finally:
+            pb.finished()
+        return pm
+
+    def build(self):
+        """Actually build the index."""
+        raise NotImplementedError(self.build)

=== added file 'simple_btree.py'
--- a/simple_btree.py	1970-01-01 00:00:00 +0000
+++ b/simple_btree.py	2009-10-29 15:18:30 +0000
@@ -0,0 +1,83 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""A per-file graph that just stores the simple values in a btree."""
+
+from bzrlib import btree_index, ui
+
+from bzrlib.plugins.per_file_graph import builder
+
+
+class SimpleBTreeBuilder(builder.Builder):
+    """Builds a simple BTreeGraphIndex with no 'values'."""
+
+    def build(self):
+        """Actually build the index."""
+        parent_map = self.get_per_file_graph()
+        # Avoid spilling to disk, it takes time, but more importantly, if we
+        # change the page size, then we will fail to read the index that was
+        # spilled
+        builder = btree_index.BTreeBuilder(reference_lists=1, key_elements=2,
+                                           spill_at=1*1000*1000)
+        total = len(parent_map)
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            count = 0
+            for key, parents in parent_map.iteritems():
+                count += 1
+                if count & 0xFF == 0:
+                    pb.update('building', count, total)
+                builder.add_node(key, '', (parents,))
+            self.mark_writing(pb, total, builder)
+            self.finish(builder)
+        finally:
+            pb.finished()
+
+    def mark_writing(self, pb, total, builder):
+        """Just override some btree functions to give progress indication."""
+        orig = builder.iter_all_entries
+        def iter_all_entries():
+            for idx, e in enumerate(orig()):
+                if idx & 0xFF:
+                    pb.update('writing', idx, total)
+                yield e
+        builder.iter_all_entries = iter_all_entries
+
+    def finish(self, builder):
+        stream = builder.finish()
+        self.transport.put_file(self.filename, stream)
+
+
+class SimpleBTreeBuilder8k(SimpleBTreeBuilder):
+    """Use 8k pages in the btree graph.
+    
+    Note that this is a 'hack' that makes incompatible btrees. But we'll give
+    it a shot.
+    """
+
+    _NEW_PAGE_SIZE = 8*1024
+
+    def finish(self, builder):
+        old = btree_index._PAGE_SIZE
+        try:
+            btree_index._PAGE_SIZE = self._NEW_PAGE_SIZE
+            super(SimpleBTreeBuilder8k, self).finish(builder)
+        finally:
+            btree_index._PAGE_SIZE = old
+
+
+class SimpleBTreeBuilder16k(SimpleBTreeBuilder8k):
+    _NEW_PAGE_SIZE = 16*1024



More information about the bazaar-commits mailing list