Rev 28: Get ready to do something similar grabbing all ancestors. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Fri Apr 2 23:27:47 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 28
revision-id: john at arbash-meinel.com-20100402222713-05ufbktsvladp3lm
parent: john at arbash-meinel.com-20100402222006-79xj0syl4lodf352
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Fri 2010-04-02 17:27:13 -0500
message:
Get ready to do something similar grabbing all ancestors.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py 2010-04-02 22:20:06 +0000
+++ b/__init__.py 2010-04-02 22:27:13 +0000
@@ -155,6 +155,7 @@
_ancestry_walk_types = registry.Registry()
_ancestry_walk_types.register('db-rev-id', None)
_ancestry_walk_types.register('db-db-id', None)
+_ancestry_walk_types.register('db-range', None)
_ancestry_walk_types.register('bzr-iter-anc', None)
_ancestry_walk_types.register('bzr-kg', None)
@@ -184,6 +185,9 @@
count = query.walk_ancestry_db_ids()
elif method == 'db-rev-id':
count = query.walk_ancestry()
+ else:
+ assert method == 'db-range'
+ count = query.walk_ancestry_range()
trace.note('Stats:\n%s' % (pprint.pformat(dict(query._stats)),))
elif method == 'bzr-iter-anc':
g = b.repository.get_graph()
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-02 22:17:01 +0000
+++ b/history_db.py 2010-04-02 22:27:13 +0000
@@ -355,34 +355,45 @@
self._stats['query_time'] += (time.time() - t)
return all_ids
+ def _get_mainline_range_starting_at(self, head_db_id):
+ """Try to find a range at this tip.
+
+ If a range cannot be found, just find the next parent.
+ :return: (range_or_None, next_db_id)
+ """
+ range_res = self._cursor.execute(
+ "SELECT pkey, tail"
+ " FROM mainline_parent_range"
+ " WHERE head = ?"
+ " ORDER BY count DESC LIMIT 1",
+ (head_db_id,)).fetchone()
+ if range_res is None:
+ parent_db_id = self._get_lh_parent_db_id(head_db_id)
+ return None, parent_db_id
+ range_key, tail_db_id = range_res
+ # TODO: Is ORDER BY dist ASC expensive? We know a priori that the list
+ # is probably already in sorted order, but does sqlite know that?
+ range_db_ids = self._cursor.execute(
+ "SELECT revision FROM mainline_parent"
+ " WHERE range = ? ORDER BY dist ASC",
+ (range_key,)).fetchall()
+ db_ids = [r[0] for r in range_db_ids]
+ return db_ids, tail_db_id
+
def walk_mainline_using_ranges(self):
t = time.time()
db_id = self._get_db_id(self._branch_tip_rev_id)
all_ids = []
while db_id is not None:
self._stats['num_steps'] += 1
- range_res = self._cursor.execute(
- "SELECT pkey, tail"
- " FROM mainline_parent_range"
- " WHERE head = ?"
- " ORDER BY count DESC LIMIT 1",
- (db_id,)).fetchone()
- if range_res is None:
+ next_range, next_db_id = self._get_mainline_range_starting_at(db_id)
+ if next_range is None:
# No range, so switch to using by-parent search
all_ids.append(db_id)
- db_id = self._get_lh_parent_db_id(db_id)
else:
- # We have a range, so read in the whole range, and append the
- # info. Note that we already have db_id itself, so
- range_key, tail_db_id = range_res
- range_db_ids = self._cursor.execute(
- "SELECT revision FROM mainline_parent"
- " WHERE range = ? ORDER BY dist ASC",
- (range_key,)).fetchall()
- db_ids = [r[0] for r in range_db_ids]
- assert db_ids[0] == db_id
- all_ids.extend(db_ids)
- db_id = tail_db_id
+ assert next_range[0] == db_id
+ all_ids.extend(next_range)
+ db_id = next_db_id
self._stats['query_time'] += (time.time() - t)
return all_ids
@@ -422,6 +433,27 @@
remaining.extend(next_p)
return len(all_ancestors)
+ def walk_ancestry_range(self):
+ """Walk the whole ancestry.
+
+ Use the mainline_parent_range/mainline_parent table to speed things up.
+ """
+ _exec = self._cursor.execute
+ all_ancestors = set()
+ db_id = self._get_db_id(self._branch_tip_rev_id)
+ all_ancestors.add(db_id)
+ remaining = [db_id]
+ while remaining:
+ self._stats['num_steps'] += 1
+ next = remaining[:1000]
+ remaining = remaining[len(next):]
+ res = _exec("SELECT parent FROM parent WHERE child in (%s)"
+ % (', '.join('?'*len(next))), tuple(next))
+ next_p = [p[0] for p in res if p[0] not in all_ancestors]
+ all_ancestors.update(next_p)
+ remaining.extend(next_p)
+ return len(all_ancestors)
+
def heads(self, revision_ids):
"""Compute Graph.heads() on the given data."""
raise NotImplementedError(self.heads)
More information about the bazaar-commits
mailing list