Rev 4349: Check revisions as we cross check the revision index, rather than in a separate pass. in http://people.ubuntu.com/~robertc/baz2.0/check
Robert Collins
robertc at robertcollins.net
Tue May 12 06:34:17 BST 2009
At http://people.ubuntu.com/~robertc/baz2.0/check
------------------------------------------------------------
revno: 4349
revision-id: robertc at robertcollins.net-20090512053415-20rkucg2p211zdui
parent: robertc at robertcollins.net-20090512042457-y91nn2suuc7syinj
committer: Robert Collins <robertc at robertcollins.net>
branch nick: check
timestamp: Tue 2009-05-12 15:34:15 +1000
message:
Check revisions as we cross check the revision index, rather than in a separate pass.
=== modified file 'bzrlib/check.py'
--- a/bzrlib/check.py 2009-05-12 04:21:17 +0000
+++ b/bzrlib/check.py 2009-05-12 05:34:15 +0000
@@ -68,7 +68,7 @@
self.repository = repository
self.checked_text_cnt = 0
self.checked_rev_cnt = 0
- self.ghosts = []
+ self.ghosts = set()
self.repeated_text_cnt = 0
self.missing_parent_links = {}
self.missing_inventory_sha_cnt = 0
@@ -82,6 +82,11 @@
self.text_key_references = {}
self.check_repo = check_repo
self.other_results = []
+ # Plain text lines to include in the report
+ self._report_items = []
+ # Sha1 expectations; may be large and need spilling to disk.
+ # key->(sha1, first-referer)
+ self.expected_sha1 = {}
# Ancestors map for all of revisions being checked; while large helper
# functions we call would create it anyway, so better to have once and
# keep.
@@ -107,7 +112,7 @@
self.progress.update('checking revision', revno,
len(self.planned_revisions))
revno += 1
- self.check_one_rev(rev_id)
+ self._check_revision_tree(rev_id)
# check_weaves is done after the revision scan so that
# revision index is known to be valid.
self.check_weaves()
@@ -156,13 +161,34 @@
self.progress.finished()
self.repository.unlock()
+ def check_revisions(self, revisions_iterator):
+ """Check revision objects by decorating a generator.
+
+ :param revisions_iterator: An iterator of(revid, Revision-or-None).
+ :return: A generator of the contents of revisions_iterator.
+ """
+ self.planned_revisions = set()
+ for revid, revision in revisions_iterator:
+ yield revid, revision
+ self._check_one_rev(revid, revision)
+
def check_revision_graph(self):
+ revision_iterator = self.repository._iter_revisions(None)
+ revision_iterator = self.check_revisions(revision_iterator)
+ # We read the all revisions here:
+ # - doing this allows later code to depend on the revision index.
+ # - we can fill out existence flags at this point
+ # - we can read the revision inventory sha at this point
+ # - we can check properties and serialisers etc.
if not self.repository.revision_graph_can_have_wrong_parents():
- # This check is not necessary.
+ # The check against the index isn't needed.
self.revs_with_bad_parents_in_index = None
- return
- bad_revisions = self.repository._find_inconsistent_revision_parents()
- self.revs_with_bad_parents_in_index = list(bad_revisions)
+ for thing in revision_iterator:
+ pass
+ else:
+ bad_revisions = self.repository._find_inconsistent_revision_parents(
+ revision_iterator)
+ self.revs_with_bad_parents_in_index = list(bad_revisions)
def plan_revisions(self):
repository = self.repository
@@ -188,8 +214,13 @@
note('%6d file-ids', len(self.checked_weaves))
note('%6d unique file texts', self.checked_text_cnt)
note('%6d repeated file texts', self.repeated_text_cnt)
- note('%6d unreferenced text versions',
- len(self.unreferenced_versions))
+ if verbose:
+ note('%6d unreferenced text versions',
+ len(self.unreferenced_versions))
+ if verbose and len(self.unreferenced_versions):
+ for file_id, revision_id in self.unreferenced_versions:
+ log_error('unreferenced version: {%s} in %s', revision_id,
+ file_id)
if self.missing_inventory_sha_cnt:
note('%6d revisions are missing inventory_sha1',
self.missing_inventory_sha_cnt)
@@ -209,10 +240,6 @@
note(' %s should be in the ancestry for:', link)
for linker in linkers:
note(' * %s', linker)
- if verbose:
- for file_id, revision_id in self.unreferenced_versions:
- log_error('unreferenced version: {%s} in %s', revision_id,
- file_id)
if len(self.inconsistent_parents):
note('%6d inconsistent parents', len(self.inconsistent_parents))
if verbose:
@@ -232,49 +259,53 @@
' %s has wrong parents in index: '
'%r should be %r',
revision_id, index_parents, actual_parents)
-
- def check_one_rev(self, rev_id):
- """Check one revision.
-
- rev_id - the one to check
+ for item in self._report_items:
+ note(item)
+
+ def _check_one_rev(self, rev_id, rev):
+ """Cross-check one revision.
+
+ :param rev_id: A revision id to check.
+ :param rev: A revision or None to indicate a missing revision.
"""
- rev = self.repository.get_revision(rev_id)
-
if rev.revision_id != rev_id:
- raise BzrCheckError('wrong internal revision id in revision {%s}'
- % rev_id)
-
+ self._report_items.append(
+ 'Mismatched internal revid {%s} and index revid {%s}' % (
+ rev.revision_id, rev_id))
+ rev_id = rev.revision_id
+ # Check this revision tree etc, and count as seen when we encounter a
+ # reference to it.
+ self.planned_revisions.add(rev_id)
+ # It is not a ghost
+ self.ghosts.discard(rev_id)
+ # Count all parents as ghosts if we haven't seen them yet.
for parent in rev.parent_ids:
if not parent in self.planned_revisions:
- # rev has a parent we didn't know about.
- missing_links = self.missing_parent_links.get(parent, [])
- missing_links.append(rev_id)
- self.missing_parent_links[parent] = missing_links
- # list based so somewhat slow,
- # TODO have a planned_revisions list and set.
- if self.repository.has_revision(parent):
- missing_ancestry = self.repository.get_ancestry(parent)
- for missing in missing_ancestry:
- if (missing is not None
- and missing not in self.planned_revisions):
- self.planned_revisions.append(missing)
- else:
- self.ghosts.append(rev_id)
-
+ self.ghosts.add(parent)
+
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
+ # If the revision has an inventory sha, we want to cross check it later.
if rev.inventory_sha1:
- # Loopback - this is currently circular logic as the
- # knit get_inventory_sha1 call returns rev.inventory_sha1.
- # Repository.py's get_inventory_sha1 should instead return
- # inventories.get_record_stream([(revid,)]).next().sha1 or
- # similar.
- inv_sha1 = self.repository.get_inventory_sha1(rev_id)
- if inv_sha1 != rev.inventory_sha1:
- raise BzrCheckError('Inventory sha1 hash doesn\'t match'
- ' value in revision {%s}' % rev_id)
- self._check_revision_tree(rev_id)
+ self.add_sha_check(rev_id, ('inventories', rev_id),
+ rev.inventory_sha1)
self.checked_rev_cnt += 1
+ def add_sha_check(self, referer, key, sha1):
+ """Add a reference to a sha1 to be cross checked against a key.
+
+ :param referer: The referer that expects key to have sha1.
+ :param key: A storage key e.g. ('texts', 'foo at bar-20040504-1234')
+ :param sha1: A hex sha1.
+ """
+ existing = self.expected_sha1.get(key)
+ if existing:
+ if sha1 != existing[0]:
+ self._report_items.append('Multiple expected sha1s for %s. {%s}'
+ ' expects {%s}, {%s} expects {%s}', (
+ key, referer, sha1, existing[1], existing[0]))
+ else:
+ self.expected_sha1[key] = (sha1, referer)
+
def check_weaves(self):
"""Check all the weaves we can get our hands on.
"""
=== modified file 'bzrlib/smart/medium.py'
--- a/bzrlib/smart/medium.py 2009-04-04 02:50:01 +0000
+++ b/bzrlib/smart/medium.py 2009-05-12 05:34:15 +0000
@@ -37,7 +37,6 @@
from bzrlib import (
debug,
errors,
- osutils,
symbol_versioning,
trace,
ui,
@@ -46,7 +45,8 @@
from bzrlib.smart import client, protocol
from bzrlib.transport import ssh
""")
-
+#usually already imported, and getting IllegalScoperReplacer on it here.
+from bzrlib import osutils
# We must not read any more than 64k at a time so we don't risk "no buffer
# space available" errors on some platforms. Windows in particular is likely
=== modified file 'bzrlib/tests/blackbox/test_check.py'
--- a/bzrlib/tests/blackbox/test_check.py 2009-05-11 03:48:49 +0000
+++ b/bzrlib/tests/blackbox/test_check.py 2009-05-12 05:34:15 +0000
@@ -41,7 +41,7 @@
r" 0 file-ids\n"
r" 0 unique file texts\n"
r" 0 repeated file texts\n"
- r" 0 unreferenced text versions\n")
+ )
self.assertContainsRe(err, r"Checking branch at '.*'\.\n")
self.assertContainsRe(err, r"checked branch.*")
@@ -61,8 +61,8 @@
r" 1 revisions\n"
r" 0 file-ids\n"
r" 0 unique file texts\n"
- r" 0 repeated file texts\n"
- r" 0 unreferenced text versions$")
+ r" 0 repeated file texts\n$"
+ )
def test_check_tree(self):
tree = self.make_branch_and_tree('.')
=== modified file 'bzrlib/tests/per_repository/test_check.py'
--- a/bzrlib/tests/per_repository/test_check.py 2009-05-11 03:48:49 +0000
+++ b/bzrlib/tests/per_repository/test_check.py 2009-05-12 05:34:15 +0000
@@ -43,9 +43,7 @@
check_object = tree.branch.repository.check([revid1, revid2])
check_object.report_results(verbose=True)
log = self._get_log(keep_log_file=True)
- self.assertContainsRe(
- log,
- "0 unreferenced text versions")
+ self.assertContainsRe(log, "0 unreferenced text versions")
class TestFindInconsistentRevisionParents(TestCaseWithBrokenRevisionIndex):
More information about the bazaar-commits
mailing list