Rev 4509: Repeated path/id corruption detected. in http://people.ubuntu.com/~robertc/baz2.0/pending/apply-inventory-delta
Robert Collins
robertc at robertcollins.net
Wed Jul 8 03:25:44 BST 2009
At http://people.ubuntu.com/~robertc/baz2.0/pending/apply-inventory-delta
------------------------------------------------------------
revno: 4509
revision-id: robertc at robertcollins.net-20090708022539-lse8fiw0zbmdhgu0
parent: robertc at robertcollins.net-20090707043425-3cs9g40rei1siu1q
committer: Robert Collins <robertc at robertcollins.net>
branch nick: apply-inventory-delta
timestamp: Wed 2009-07-08 12:25:39 +1000
message:
Repeated path/id corruption detected.
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2009-06-22 14:32:48 +0000
+++ b/bzrlib/dirstate.py 2009-07-08 02:25:39 +0000
@@ -1400,6 +1400,9 @@
# inventory entries to dirstate.
root_only = ('', '')
for old_path, new_path, file_id, inv_entry in delta:
+ if inv_entry is not None and file_id != inv_entry.file_id:
+ raise errors.InconsistentDelta(new_path, file_id,
+ "mismatched entry file_id %r" % inv_entry)
if old_path is None:
adds.append((None, encode(new_path), file_id,
inv_to_entry(inv_entry), True))
@@ -1453,12 +1456,20 @@
changes.append((encode(old_path), encode(new_path), file_id,
inv_to_entry(inv_entry)))
- # Finish expunging deletes/first half of renames.
- self._update_basis_apply_deletes(deletes)
- # Reinstate second half of renames and new paths.
- self._update_basis_apply_adds(adds)
- # Apply in-situ changes.
- self._update_basis_apply_changes(changes)
+ try:
+ # Finish expunging deletes/first half of renames.
+ self._update_basis_apply_deletes(deletes)
+ # Reinstate second half of renames and new paths.
+ self._update_basis_apply_adds(adds)
+ # Apply in-situ changes.
+ self._update_basis_apply_changes(changes)
+ except errors.BzrError:
+ # _get_entry raises BzrError when a request is inconsistent; we
+ # want such errors to be shown as InconsistentDelta - and that
+ # fits the behaviour we trigger. Partof this is driven by dirstate
+ # only supporting deltas that turn the basis into a closer fit to
+ # the active tree.
+ raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
self._header_state = DirState.IN_MEMORY_MODIFIED
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2009-07-07 04:34:25 +0000
+++ b/bzrlib/inventory.py 2009-07-08 02:25:39 +0000
@@ -1127,12 +1127,11 @@
"""
# Check that the delta is legal. It would be nice if this could be
# done within the loops below but it's safer to validate the delta
- # before starting to mutate the inventory.
- unique_file_ids = set([f for _, _, f, _ in delta])
- if len(unique_file_ids) != len(delta):
- raise errors.InconsistentDeltaDelta(delta,
- "a file-id appears multiple times")
- del unique_file_ids
+ # before starting to mutate the inventory, as there isn't a rollback
+ # facility.
+ list(_check_delta_unique_ids(_check_delta_unique_new_paths(
+ _check_delta_unique_old_paths(_check_delta_ids_match_entry(
+ delta)))))
children = {}
# Remove all affected items which were in the original inventory,
@@ -1269,9 +1268,10 @@
entry.parent_id)
if entry.name in parent.children:
- raise BzrError("%s is already versioned" %
- osutils.pathjoin(self.id2path(parent.file_id),
- entry.name).encode('utf-8'))
+ raise errors.InconsistentDelta(
+ self.id2path(parent.children[entry.name].file_id),
+ entry.file_id,
+ "Path already versioned")
parent.children[entry.name] = entry
return self._add_child(entry)
@@ -1619,9 +1619,17 @@
result.parent_id_basename_to_file_id = None
result.root_id = self.root_id
id_to_entry_delta = []
- id_set = set()
+ # inventory_delta is only traversed once, so we just update the
+ # variable.
+ # Check for repeated file ids
+ inventory_delta = _check_delta_unique_ids(inventory_delta)
+ # Repeated old paths
+ inventory_delta = _check_delta_unique_old_paths(inventory_delta)
+ # Check for repeated new paths
+ inventory_delta = _check_delta_unique_new_paths(inventory_delta)
+ # Check for entries that don't match the fileid
+ inventory_delta = _check_delta_ids_match_entry(inventory_delta)
for old_path, new_path, file_id, entry in inventory_delta:
- id_set.add(file_id)
# file id changes
if new_path == '':
result.root_id = file_id
@@ -1663,9 +1671,6 @@
# If the two keys are the same, the value will be unchanged
# as its always the file id.
parent_id_basename_delta.append((old_key, new_key, new_value))
- if len(id_set) != len(inventory_delta):
- raise errors.InconsistentDeltaDelta(inventory_delta,
- 'repeated file id')
result.id_to_entry.apply_delta(id_to_entry_delta)
if parent_id_basename_delta:
result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
@@ -2077,3 +2082,64 @@
_NAME_RE = re.compile(r'^[^/\\]+$')
return bool(_NAME_RE.match(name))
+
+
+def _check_delta_unique_ids(delta):
+ """Decorate a delta and check that the file ids in it are unique.
+
+ :return: A generator over delta.
+ """
+ ids = set()
+ for item in delta:
+ length = len(ids) + 1
+ ids.add(item[2])
+ if len(ids) != length:
+ raise errors.InconsistentDelta(item[0] or item[1], item[2],
+ "repeated file_id")
+ yield item
+
+
+def _check_delta_unique_new_paths(delta):
+ """Decorate a delta and check that the new paths in it are unique.
+
+ :return: A generator over delta.
+ """
+ paths = set()
+ for item in delta:
+ length = len(paths) + 1
+ path = item[1]
+ if path is not None:
+ paths.add(path)
+ if len(paths) != length:
+ raise errors.InconsistentDelta(path, item[2], "repeated path")
+ yield item
+
+
+def _check_delta_unique_old_paths(delta):
+ """Decorate a delta and check that the old paths in it are unique.
+
+ :return: A generator over delta.
+ """
+ paths = set()
+ for item in delta:
+ length = len(paths) + 1
+ path = item[0]
+ if path is not None:
+ paths.add(path)
+ if len(paths) != length:
+ raise errors.InconsistentDelta(path, item[2], "repeated path")
+ yield item
+
+
+def _check_delta_ids_match_entry(delta):
+ """Decorate a delta and check that the ids in it match the entry.file_id.
+
+ :return: A generator over delta.
+ """
+ for item in delta:
+ entry = item[3]
+ if entry is not None:
+ if entry.file_id != item[2]:
+ raise errors.InconsistentDelta(item[0] or item[1], item[2],
+ "mismatched id with %r" % entry)
+ yield item
=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py 2009-07-07 04:34:25 +0000
+++ b/bzrlib/tests/test_inv.py 2009-07-08 02:25:39 +0000
@@ -75,6 +75,10 @@
"""Apply delta to basis and return the result.
This sets the parent and then calls update_basis_by_delta.
+ It also puts the basis in the repository under both 'basis' and 'result' to
+ allow safety checks made by the WT to succeed, and finally ensures that all
+ items in the delta with a new path are present in the WT before calling
+ update_basis_by_delta.
:param basis: An inventory to be used as the basis.
:param delta: The inventory delta to apply:
@@ -109,6 +113,30 @@
# This reads basis from the repo and puts it into the tree's local
# cache, if it has one.
tree.set_parent_ids(['basis'])
+ inv = tree.inventory
+ paths = {}
+ parents = set()
+ for old, new, id, entry in delta:
+ if entry is None:
+ continue
+ paths[new] = (entry.file_id, entry.kind)
+ parents.add(osutils.dirname(new))
+ parents = osutils.minimum_path_selection(parents)
+ parents.discard('')
+ if parents:
+ # Put place holders in the tree to permit adding the other entries.
+ import pdb;pdb.set_trace()
+ if paths:
+ # Many deltas may cause this mini-apply to fail, but we want to see what
+ # the delta application code says, not the prep that we do to deal with
+ # limitations of dirstate's update_basis code.
+ for path, (file_id, kind) in sorted(paths.items()):
+ try:
+ tree.add([path], [file_id], [kind])
+ except (KeyboardInterrupt, SystemExit):
+ raise
+ except:
+ pass
finally:
tree.unlock()
# Fresh lock, reads disk again.
@@ -218,6 +246,52 @@
self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
inv, delta)
+ def test_repeated_new_path(self):
+ inv = self.get_empty_inventory()
+ file1 = inventory.InventoryFile('id1', 'path', inv.root.file_id)
+ file1.revision = 'result'
+ file1.text_size = 0
+ file1.text_sha1 = ""
+ file2 = inventory.InventoryFile('id2', 'path', inv.root.file_id)
+ file2.revision = 'result'
+ file2.text_size = 0
+ file2.text_sha1 = ""
+ delta = [(None, 'path', 'id1', file1), (None, 'path', 'id2', file2)]
+ self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
+ inv, delta)
+
+ def test_repeated_old_path(self):
+ inv = self.get_empty_inventory()
+ file1 = inventory.InventoryFile('id1', 'path', inv.root.file_id)
+ file1.revision = 'result'
+ file1.text_size = 0
+ file1.text_sha1 = ""
+ # We can't *create* a source inventory with the same path, but
+ # a badly generated partial delta might claim the same source twice.
+ # This would be buggy in two ways: the path is repeated in the delta,
+ # And the path for one of the file ids doesn't match the source
+ # location. Alternatively, we could have a repeated fileid, but that
+ # is separately checked for.
+ file2 = inventory.InventoryFile('id2', 'path2', inv.root.file_id)
+ file2.revision = 'result'
+ file2.text_size = 0
+ file2.text_sha1 = ""
+ inv.add(file1)
+ inv.add(file2)
+ delta = [('path', None, 'id1', None), ('path', None, 'id2', None)]
+ self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
+ inv, delta)
+
+ def test_mismatched_id_entry_id(self):
+ inv = self.get_empty_inventory()
+ file1 = inventory.InventoryFile('id1', 'path', inv.root.file_id)
+ file1.revision = 'result'
+ file1.text_size = 0
+ file1.text_sha1 = ""
+ delta = [(None, 'path', 'id', file1)]
+ self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
+ inv, delta)
+
class TestInventoryEntry(TestCase):
More information about the bazaar-commits
mailing list