Rev 1: Initial work on some simple prototypes for how we could store in http://bzr.arbash-meinel.com/plugins/per_file_graph
John Arbash Meinel
john at arbash-meinel.com
Thu Oct 29 15:18:43 GMT 2009
At http://bzr.arbash-meinel.com/plugins/per_file_graph
------------------------------------------------------------
revno: 1
revision-id: john at arbash-meinel.com-20091029151830-qn01ubl669owb7wm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: per_file_graph
timestamp: Thu 2009-10-29 10:18:30 -0500
message:
Initial work on some simple prototypes for how we could store
the per-file graphs.
-------------- next part --------------
=== added file '__init__.py'
--- a/__init__.py 1970-01-01 00:00:00 +0000
+++ b/__init__.py 2009-10-29 15:18:30 +0000
@@ -0,0 +1,63 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Prototype of various storage methods for handling the per-file graph."""
+
+from bzrlib import (
+ commands,
+ errors,
+ option,
+ registry
+ )
+
+format_registry = registry.Registry()
+format_registry.register_lazy('simple-btree',
+ 'bzrlib.plugins.per_file_graph.simple_btree', 'SimpleBTreeBuilder',
+ help='a regular BTreeGraphIndex with obvious layout')
+format_registry.register_lazy('simple-btree-8k',
+ 'bzrlib.plugins.per_file_graph.simple_btree', 'SimpleBTreeBuilder8k',
+ help='a regular BTreeGraphIndex with 8k pages')
+format_registry.register_lazy('simple-btree-16k',
+ 'bzrlib.plugins.per_file_graph.simple_btree', 'SimpleBTreeBuilder16k',
+ help='a regular BTreeGraphIndex with 16k pages')
+format_registry.default_key = 'simple-btree'
+
+
+class cmd_build_per_file_graph(commands.Command):
+ """Build a per-file-graph in a given format."""
+
+ takes_options = [option.RegistryOption('format',
+ help='Create the per-file-graph in the given format.',
+ value_switches=True,
+ title='Possible Formats',
+ registry=format_registry),
+ ]
+ takes_args = ['source', 'target']
+
+ def run(self, source, target, format=None):
+ from bzrlib import repository
+ if format is None:
+ format = format_registry.get()
+ repo = repository.Repository.open(source)
+ repo.lock_read()
+ try:
+ graph_builder = format(target, repo)
+ graph_builder.build()
+ finally:
+ repo.unlock()
+
+
+commands.register_command(cmd_build_per_file_graph)
=== added file 'builder.py'
--- a/builder.py 1970-01-01 00:00:00 +0000
+++ b/builder.py 2009-10-29 15:18:30 +0000
@@ -0,0 +1,46 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Basic interface for builders"""
+
+from bzrlib import osutils, transport, ui
+
+
+class Builder(object):
+
+ def __init__(self, target_filename, repo):
+ self.target_filename = target_filename
+ self.repo = repo
+ dirname, filename = osutils.split(target_filename)
+ self.transport = transport.get_transport(dirname)
+ self.filename = filename
+
+ def get_per_file_graph(self):
+ """Return the per-file graph from this repository."""
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ pb.update('Finding text keys')
+ keys = self.repo.texts.keys()
+ pb.update('Found %d text keys, getting parent map' % (len(keys),))
+ pm = self.repo.texts.get_parent_map(keys)
+ pb.update('loaded parent map')
+ finally:
+ pb.finished()
+ return pm
+
+ def build(self):
+ """Actually build the index."""
+ raise NotImplementedError(self.build)
=== added file 'simple_btree.py'
--- a/simple_btree.py 1970-01-01 00:00:00 +0000
+++ b/simple_btree.py 2009-10-29 15:18:30 +0000
@@ -0,0 +1,83 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""A per-file graph that just stores the simple values in a btree."""
+
+from bzrlib import btree_index, ui
+
+from bzrlib.plugins.per_file_graph import builder
+
+
+class SimpleBTreeBuilder(builder.Builder):
+ """Builds a simple BTreeGraphIndex with no 'values'."""
+
+ def build(self):
+ """Actually build the index."""
+ parent_map = self.get_per_file_graph()
+ # Avoid spilling to disk, it takes time, but more importantly, if we
+ # change the page size, then we will fail to read the index that was
+ # spilled
+ builder = btree_index.BTreeBuilder(reference_lists=1, key_elements=2,
+ spill_at=1*1000*1000)
+ total = len(parent_map)
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ count = 0
+ for key, parents in parent_map.iteritems():
+ count += 1
+ if count & 0xFF == 0:
+ pb.update('building', count, total)
+ builder.add_node(key, '', (parents,))
+ self.mark_writing(pb, total, builder)
+ self.finish(builder)
+ finally:
+ pb.finished()
+
+ def mark_writing(self, pb, total, builder):
+ """Just override some btree functions to give progress indication."""
+ orig = builder.iter_all_entries
+ def iter_all_entries():
+ for idx, e in enumerate(orig()):
+ if idx & 0xFF:
+ pb.update('writing', idx, total)
+ yield e
+ builder.iter_all_entries = iter_all_entries
+
+ def finish(self, builder):
+ stream = builder.finish()
+ self.transport.put_file(self.filename, stream)
+
+
+class SimpleBTreeBuilder8k(SimpleBTreeBuilder):
+ """Use 8k pages in the btree graph.
+
+ Note that this is a 'hack' that makes incompatible btrees. But we'll give
+ it a shot.
+ """
+
+ _NEW_PAGE_SIZE = 8*1024
+
+ def finish(self, builder):
+ old = btree_index._PAGE_SIZE
+ try:
+ btree_index._PAGE_SIZE = self._NEW_PAGE_SIZE
+ super(SimpleBTreeBuilder8k, self).finish(builder)
+ finally:
+ btree_index._PAGE_SIZE = old
+
+
+class SimpleBTreeBuilder16k(SimpleBTreeBuilder8k):
+ _NEW_PAGE_SIZE = 16*1024
More information about the bazaar-commits
mailing list