Rev 78: Start working on adding the hooks into bzrlib infrastructure. in http://bzr.arbash-meinel.com/plugins/history_db

John Arbash Meinel john at arbash-meinel.com
Mon Apr 12 20:15:06 BST 2010


At http://bzr.arbash-meinel.com/plugins/history_db

------------------------------------------------------------
revno: 78
revision-id: john at arbash-meinel.com-20100412191459-x75cvx7mc17j8p3h
parent: john at arbash-meinel.com-20100409205654-0crla2ssb43k29xo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Mon 2010-04-12 14:14:59 -0500
message:
  Start working on adding the hooks into bzrlib infrastructure.
  
  This should make stuff like 'revision_id_to_dotted_revno()' just faster, as well
  as log, etc.
  
  Also, add db query support for mapping back from dotted_revno => revision_ids.
  It shows the fairly expected performance so far (80ms for a really-old revno).
  Which I'm happy to see.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py	2010-04-09 18:25:25 +0000
+++ b/__init__.py	2010-04-12 19:14:59 +0000
@@ -16,13 +16,20 @@
 
 """Store history information in a database."""
 
+import time
+
 from bzrlib import (
+    branch,
     commands,
+    lazy_import,
     option,
     registry,
     trace,
     )
-import time
+
+lazy_import.lazy_import(globals(), """
+from bzrlib.plugins.history_db import history_db as _mod_history_db
+""")
 
 
 class cmd_create_history_db(commands.Command):
@@ -43,13 +50,12 @@
     def run(self, directory='.', db=None, expand_all=False, incremental=False,
             validate=False):
         import pprint
-        from bzrlib.plugins.history_db import history_db
         from bzrlib import branch
         b = branch.Branch.open(directory)
         b.lock_read()
         try:
-            importer = history_db.Importer(db, b, incremental=incremental,
-                                           validate=validate)
+            importer = _mod_history_db.Importer(db, b, incremental=incremental,
+                                                validate=validate)
             importer.do_import(expand_all=expand_all)
             importer.build_mainline_cache()
         finally:
@@ -81,7 +87,6 @@
     def run(self, directory='.', db=None, revision=None,
             method=None):
         import pprint
-        from bzrlib.plugins.history_db import history_db
         from bzrlib import branch
         b = branch.Branch.open(directory)
         if revision is None:
@@ -102,7 +107,7 @@
                 finally:
                     b2.unlock()
             else:
-                query = history_db.Querier(db, b)
+                query = _mod_history_db.Querier(db, b)
                 if method == 'db-db-id':
                     revnos = [(query.get_dotted_revno_db_ids(rev_id), rev_id)
                               for rev_id in rev_ids]
@@ -135,6 +140,72 @@
         trace.note('Time: %.3fs' % (tdelta,))
 
 
+_dotted_to_rev_walk_types = registry.Registry()
+_dotted_to_rev_walk_types.register('db-range', None)
+_dotted_to_rev_walk_types.register('bzr', None)
+
+class cmd_dotted_to_rev(commands.Command):
+    """Query the db for a dotted revno => revision_id
+    """
+
+    takes_args = ['revno+']
+    takes_options = [option.Option('db', type=unicode,
+                        help='Use this as the database for storage'),
+                     option.Option('directory', type=unicode, short_name='d',
+                        help='Import this location instead of "."'),
+                     'revision',
+                     option.RegistryOption('method',
+                        help='How do you want to do the walking.',
+                        converter=str, registry=_dotted_to_rev_walk_types)
+                    ]
+
+    def run(self, directory='.', db=None, revno_list=None,
+            method=None):
+        import pprint
+        from bzrlib import branch
+        b = branch.Branch.open(directory)
+        b.lock_read()
+        try:
+            # Map back into integer dotted revnos
+            revno_list = [tuple(map(int, r.split('.'))) for r in revno_list]
+            t = time.time()
+            if method == 'bzr':
+                b2 = b.bzrdir.open_branch()
+                b2.lock_read()
+                try:
+                    _orig_do_dotted_revno
+                    revision_ids = [(r, _orig_do_dotted_revno(b2, r))
+                                    for r in revno_list]
+                finally:
+                    b2.unlock()
+            else:
+                query = _mod_history_db.Querier(db, b)
+                if method == 'db-range':
+                    revision_ids = [(r, query.get_revision_id(r))
+                                     for r in revno_list]
+                else:
+                    assert method == 'db-range-multi'
+                    revno_map = query.get_dotted_revno_range_multi(rev_ids)
+                    revnos = [(revno_map[rev_id], rev_id) for rev_id in rev_ids]
+                trace.note('Stats:\n%s' % (pprint.pformat(dict(query._stats)),))
+            tdelta = time.time() - t
+            revno_strs = []
+            max_len = 0
+            for revno, rev_id in revision_ids:
+                if revno is None:
+                    s = '?'
+                else:
+                    s = '.'.join(map(str, revno))
+                if len(s) > max_len:
+                    max_len = len(s)
+                revno_strs.append((s, rev_id))
+            self.outf.write(''.join(['%*s %s\n' % (max_len, s, r)
+                                     for s, r in revno_strs]))
+        finally:
+            b.unlock()
+        trace.note('Time: %.3fs' % (tdelta,))
+
+
 _mainline_walk_types = registry.Registry()
 _mainline_walk_types.register('db-rev-id', None)
 _mainline_walk_types.register('db-db-id', None)
@@ -154,7 +225,6 @@
                     ]
 
     def run(self, directory='.', db=None, method=None):
-        from bzrlib.plugins.history_db import history_db
         from bzrlib import branch
         import pprint
         import time
@@ -163,7 +233,7 @@
         self.add_cleanup(b.unlock)
         t = time.time()
         if method.startswith('db'):
-            query = history_db.Querier(db, b)
+            query = _mod_history_db.Querier(db, b)
             if method == 'db-db-id':
                 mainline = query.walk_mainline_db_ids()
             elif method == 'db-rev-id':
@@ -211,7 +281,6 @@
                     ]
 
     def run(self, directory='.', db=None, method=None):
-        from bzrlib.plugins.history_db import history_db
         from bzrlib import branch
         import pprint
         b = branch.Branch.open(directory)
@@ -219,7 +288,7 @@
         self.add_cleanup(b.unlock)
         t = time.time()
         if method.startswith('db'):
-            query = history_db.Querier(db, b)
+            query = _mod_history_db.Querier(db, b)
             if method == 'db-db-id':
                 ancestors = query.walk_ancestry_db_ids()
             elif method == 'db-rev-id':
@@ -242,10 +311,100 @@
 
 commands.register_command(cmd_create_history_db)
 commands.register_command(cmd_get_dotted_revno)
+commands.register_command(cmd_dotted_to_rev)
 commands.register_command(cmd_walk_mainline)
 commands.register_command(cmd_walk_ancestry)
 
 
+_orig_do_dotted_revno = getattr(branch.Branch,
+    '_do_dotted_revno_to_revision_id', None)
+_orig_iter_merge_sorted = getattr(branch.Branch,
+    'iter_merge_sorted_revisions', None)
+
+
+def _get_history_db_path(a_branch):
+    """Return the path to the history DB cache or None."""
+    # TODO: Consider allowing a relative path to the branch root
+    #       Or to the repository, or ?
+    #       For now, the user could just configure an absolute path on the
+    #       Repository in locations.conf and have that inherited to the
+    #       branches.
+    path = a_branch.get_config().get_user_option('history_db_path')
+    return path
+
+
+def _history_db_iter_merge_sorted_revisions(self, start_revision_id=None,
+    stop_revision_id=None, stop_rule='exclude', direction='reverse'):
+    """See Branch.iter_merge_sorted_revisions()
+
+    This is a monkeypatch that overrides the default behavior, extracting data
+    from the history db if it is enabled.
+    """
+    history_db_path = _get_history_db_path(self)
+    if history_db_path is None:
+        # TODO: Consider other cases where we may want to fall back, like
+        #       special arguments, etc that we don't handle well yet.
+        trace.mutter('history_db falling back to original'
+                     'iter_merge_sorted_revisions, "history_db_path" not set')
+        return _orig_iter_merge_sorted(start_revision_id=start_revision_id,
+            stop_revision_id=stop_revision_id, stop_rule=stop_rule,
+            direction=direction)
+    # TODO: Consider what to do if the branch has not been imported yet. My gut
+    #       feeling is that we really want to do the import at this time. Since
+    #       the user would want the data, and it is possible to update a cache
+    #       with new values and return them *faster* than you could get them
+    #       out from scratch.
+    return _orig_iter_merge_sorted(start_revision_id=start_revision_id,
+        stop_revision_id=stop_revision_id, stop_rule=stop_rule,
+        direction=direction)
+
+
+def _history_db_dotted_revno_to_revision_id(self, revno):
+    """See Branch._do_dotted_revno_to_revision_id."""
+    # revno should be a dotted revno, aka either 1-part or 3-part tuple
+    history_db_path = _get_history_db_path(self)
+    if history_db_path is None:
+        trace.mutter('history_db falling back to original'
+                     'dotted_revno => revision_id, "history_db_path" not set')
+        return _orig_do_dotted_revno(self, revno)
+    query = _mod_history_db.Querier(history_db_path, b)
+    return _orig_do_dotted_revno(self, revno)
+
+
+def _history_db_post_change_branch_tip_hook(params):
+    """Run when the tip of a branch changes revision_id."""
+    t0 = time.clock()
+    history_db_path = _get_history_db_path(self)
+    if history_db_path is None:
+        trace.mutter('Note updating history-db, "history_db_path"'
+                     ' not configured')
+        return
+    importer = _mod_history_db.Importer(db, params.branch, incremental=True)
+    t1 = time.clock()
+    importer.do_import()
+    t2 = time.clock()
+    importer.build_mainline_cache()
+    t3 = time.clock()
+    trace.note('History DB took %.3fs, %.3fs to init, %.3fs to import'
+               % (t3-t0, t1-t0, t2-t1))
+    trace.note('Stats:\n%s' % (pprint.pformat(dict(importer._stats)),))
+
+
+
+def _register_history_db_hooks():
+    if _orig_do_dotted_revno is None:
+        trace.mutter('Unable to enable history-db, needs bzr 1.12 or later')
+        return
+    branch.Branch._do_dotted_revno_to_revision_id = \
+        _history_db_dotted_revno_to_revision_id
+    branch.Branch.iter_merge_sorted_revisions = \
+        _history_db_iter_merge_sorted_revisions
+    branch.Branch.hoosk.install_named_hook('post_change_branch_tip',
+        _history_db_post_change_branch_tip_hook, 'history_db')
+
+
+# _register_history_db_hooks()
+
 def load_tests(standard_tests, module, loader):
     standard_tests.addTests(loader.loadTestsFromModuleNames([
         (__name__ + '.' + x) for x in [

=== modified file 'history_db.py'
--- a/history_db.py	2010-04-09 20:56:54 +0000
+++ b/history_db.py	2010-04-12 19:14:59 +0000
@@ -1338,6 +1338,50 @@
         self._stats['query_time'] += (time.time() - t)
         return revnos
 
+    def get_revision_id(self, revno):
+        """Map from a dotted-revno back into a revision_id."""
+        t = time.time()
+        tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+        # TODO: If tip_db_id is None, maybe we want to raise an exception here?
+        #       To indicate that the branch has not been imported yet
+        revno_str = '.'.join(map(str, revno))
+        merged_revision_id = None
+        while tip_db_id is not None:
+            self._stats['num_steps'] += 1
+            range_res = self._cursor.execute(
+                "SELECT pkey, tail"
+                "  FROM mainline_parent_range"
+                " WHERE head = ?"
+                " ORDER BY count DESC LIMIT 1",
+                (tip_db_id,)).fetchone()
+            if range_res is None:
+                revision_res = self._cursor.execute(
+                    "SELECT revision_id, revno"
+                    "  FROM dotted_revno, revision"
+                    " WHERE merged_revision = revision.db_id"
+                    "   tip_revision = ?"
+                    "   AND revno = ?",
+                    (tip_db_id, revno_str)).fetchall()
+                next_db_id = self._get_lh_parent_db_id(tip_db_id)
+            else:
+                pkey, next_db_id = range_res
+                revision_res = self._cursor.execute(
+                    "SELECT revision_id, revno"
+                    "  FROM dotted_revno, mainline_parent, revision"
+                    " WHERE tip_revision = mainline_parent.revision"
+                    "   AND merged_revision = revision.db_id"
+                    "   AND mainline_parent.range = ?"
+                    "   AND revno = ?",
+                    (pkey, revno_str)).fetchall()
+            tip_db_id = next_db_id
+            if revision_res:
+                assert len(revision_res) == 1
+                merged_revision_id, db_revno_str = revision_res[0]
+                assert db_revno_str == revno_str
+                break
+        self._stats['query_time'] += (time.time() - t)
+        return merged_revision_id
+
     def walk_mainline(self):
         """Walk the db, and grab all the mainline identifiers."""
         t = time.time()



More information about the bazaar-commits mailing list