Rev 2930: * Commit updates the state of the working tree via a delta rather than in http://people.ubuntu.com/~robertc/baz2.0/dirstate.update_basis
Robert Collins
robertc at robertcollins.net
Tue Oct 23 23:14:50 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/dirstate.update_basis
------------------------------------------------------------
revno: 2930
revision-id:robertc at robertcollins.net-20071023221432-j8zndh1oiegql3cu
parent: pqm at pqm.ubuntu.com-20071023082111-h6u34i4gvlb2nwch
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate.update_basis
timestamp: Wed 2007-10-24 08:14:32 +1000
message:
* Commit updates the state of the working tree via a delta rather than
supplying entirely new basis trees. For commit of a single specified file
this reduces the wall clock time for commit by roughly a 30%.
(Robert Collins, Martin Pool)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/mutabletree.py mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
bzrlib/tests/test_dirstate.py test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
bzrlib/tests/workingtree_implementations/test_parents.py test_set_parents.py-20060807231740-yicmnlci1mj8smu1-1
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS 2007-10-23 08:21:11 +0000
+++ b/NEWS 2007-10-23 22:14:32 +0000
@@ -52,6 +52,11 @@
changed to determine if the tree is unchanged rather than recalculating
it at the end of the commit process. (Robert Collins)
+ * Commit updates the state of the working tree via a delta rather than
+ supplying entirely new basis trees. For commit of a single specified file
+ this reduces the wall clock time for commit by roughly a 30%.
+ (Robert Collins, Martin Pool)
+
* Inventory serialisation no longer double-sha's the content.
(Robert Collins)
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2007-10-15 05:02:05 +0000
+++ b/bzrlib/commit.py 2007-10-23 22:14:32 +0000
@@ -383,17 +383,8 @@
# Make the working tree up to date with the branch
self._set_progress_stage("Updating the working tree")
- rev_tree = self.builder.revision_tree()
- # XXX: This will need to be changed if we support doing a
- # selective commit while a merge is still pending - then we'd
- # still have multiple parents after the commit.
- #
- # XXX: update_basis_by_delta is slower at present because it works
- # on inventories, so this is not active until there's a native
- # dirstate implementation.
- ## self.work_tree.update_basis_by_delta(self.rev_id,
- ## self._basis_delta)
- self.work_tree.set_parent_trees([(self.rev_id, rev_tree)])
+ self.work_tree.update_basis_by_delta(self.rev_id,
+ self._basis_delta)
self.reporter.completed(new_revno, self.rev_id)
self._process_post_hooks(old_revno, new_revno)
finally:
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-10-22 20:05:12 +0000
+++ b/bzrlib/dirstate.py 2007-10-23 22:14:32 +0000
@@ -212,6 +212,7 @@
import zlib
from bzrlib import (
+ cache_utf8,
debug,
errors,
inventory,
@@ -305,15 +306,6 @@
def __init__(self, path):
"""Create a DirState object.
- Attributes of note:
-
- :attr _root_entrie: The root row of the directory/file information,
- - contains the path to / - '', ''
- - kind of 'directory',
- - the file id of the root in utf8
- - size of 0
- - a packed state
- - and no sha information.
:param path: The path at which the dirstate file on disk should live.
"""
# _header_state and _dirblock_state represent the current state
@@ -897,6 +889,48 @@
processed_dirs.update(pending_dirs)
return found
+ def _discard_merge_parents(self):
+ """Discard any parents trees beyond the first.
+
+ Note that if this fails the dirstate is corrupted.
+
+ After this function returns the dirstate contains 2 trees, neither of
+ which are ghosted.
+ """
+ self._read_header_if_needed()
+ parents = self.get_parent_ids()
+ if len(parents) < 1:
+ return
+ # only require all dirblocks if we are doing a full-pass removal.
+ self._read_dirblocks_if_needed()
+ dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])
+ def iter_entries_removable():
+ for block in self._dirblocks:
+ deleted_positions = []
+ for pos, entry in enumerate(block[1]):
+ yield entry
+ if (entry[1][0][0], entry[1][1][0]) in dead_patterns:
+ deleted_positions.append(pos)
+ if deleted_positions:
+ if len(deleted_positions) == len(block):
+ del block[1][:]
+ else:
+ for pos in reversed(deleted_positions):
+ del block[1][pos]
+ # if the first parent is a ghost:
+ if parents[0] in self.get_ghosts():
+ empty_parent = [DirState.NULL_PARENT_DETAILS]
+ for entry in iter_entries_removable():
+ entry[1][1:] = empty_parent
+ else:
+ for entry in iter_entries_removable():
+ del entry[1][2:]
+
+ self._ghosts = []
+ self._parents = [parents[0]]
+ self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+ self._header_state = DirState.IN_MEMORY_MODIFIED
+
def _empty_parent_info(self):
return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
len(self._ghosts))
@@ -1127,6 +1161,184 @@
raise
return result
+ def update_basis_by_delta(self, delta, new_revid):
+ """Update the parents of this tree after a commit.
+
+ This gives the tree one parent, with revision id new_revid. The
+ inventory delta is applied to the current basis tree to generate the
+ inventory for the parent new_revid, and all other parent trees are
+ discarded.
+
+ Note that an exception during the operation of this method will leave
+ the dirstate in a corrupt state where it should not be saved.
+
+ Finally, we expect all changes to be synchronising the basis tree with
+ the working tree.
+
+ :param new_revid: The new revision id for the trees parent.
+ :param delta: An inventory delta (see apply_inventory_delta) describing
+ the changes from the current left most parent revision to new_revid.
+ """
+ self._read_dirblocks_if_needed()
+ self._discard_merge_parents()
+ if self._ghosts != []:
+ raise NotImplementedError(self.update_basis_by_delta)
+ if len(self._parents) == 0:
+ # setup a blank tree, the most simple way.
+ empty_parent = DirState.NULL_PARENT_DETAILS
+ for entry in self._iter_entries():
+ entry[1].append(empty_parent)
+ self._parents.append(new_revid)
+
+ self._parents[0] = new_revid
+
+ delta = sorted(delta, reverse=True)
+ adds = []
+ changes = []
+ deletes = []
+ # The paths this function accepts are unicode and must be encoded as we
+ # go.
+ encode = cache_utf8.encode
+ inv_to_entry = self._inv_entry_to_details
+ # delta is now (deletes, changes), (adds) in reverse lexographical
+ # order.
+ # deletes in reverse lexographic order are safe to process in situ.
+ # renames are not, as a rename from any path could go to a path
+ # lexographically lower, so we transform renames into delete, add pairs,
+ # expanding them recursively as needed.
+ # At the same time, to reduce interface friction we convert the input
+ # inventory entries to dirstate.
+
+ for old_path, new_path, file_id, inv_entry in delta:
+ if old_path is None:
+ adds.append((None, encode(new_path), file_id,
+ inv_to_entry(inv_entry)))
+ elif new_path is None:
+ deletes.append((encode(old_path), None, file_id, None))
+ elif old_path != new_path:
+ # Renames:
+ # Because renames must preserve their children we must have
+ # processed all reloations and removes before hand. The sort
+ # order ensures we've examined the child paths, but we also
+ # have to execute the removals, or the split to an add/delete
+ # pair will result in the deleted item being reinserted, or
+ # renamed items being reinserted twice - and possibly at the
+ # wrong place. Splitting into a delete/add pair also simplifies
+ # the handling of entries with ('f', ...), ('r' ...) because
+ # the target of the 'r' is old_path here, and we add that to
+ # deletes, meaning that the add handler does not need to check
+ # for 'r' items on every pass.
+ self._update_basis_apply_deletes(deletes)
+ deletes = []
+ new_path_utf8 = encode(new_path)
+ # Split into an add/delete pair recursively.
+ adds.append((None, new_path_utf8, file_id,
+ inv_to_entry(inv_entry)))
+ # Remove the current contents of the tree at orig_path, and
+ # reinsert at the correct new path.
+ new_deletes = list(reversed(list(self._iter_child_entries(1,
+ encode(old_path)))))
+ for entry in new_deletes:
+ source_path = '/'.join(entry[0][0:2])
+ target_path = new_path_utf8 + source_path[len(old_path):]
+ adds.append((None, target_path, entry[0][2], entry[1][1]))
+ deletes.append((source_path, None, entry[0][2], None))
+ deletes.append((encode(old_path), None, file_id, None))
+ else:
+ changes.append((encode(old_path), encode(new_path), file_id,
+ inv_to_entry(inv_entry)))
+
+ self._update_basis_apply_deletes(deletes)
+ self._update_basis_apply_changes(changes)
+ self._update_basis_apply_adds(adds)
+
+ # remove all deletes
+ self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+ self._header_state = DirState.IN_MEMORY_MODIFIED
+ return
+
+ def _update_basis_apply_adds(self, adds):
+ """Apply a sequence of adds to tree 1 during update_basis_by_delta.
+
+ They may be adds, or renames that have been split into add/delete
+ pairs.
+
+ :param adds: A sequence of adds. Each add is a tuple:
+ (None, new_path_utf8, file_id, (entry_details))
+ """
+ # Adds are accumulated partly from renames, so can be in any input
+ # order - sort it.
+ adds.sort()
+ # adds is now in lexographic order, which places all parents before
+ # their children, so we can process it linearly.
+ absent = ('r', 'a')
+ for old_path, new_path, file_id, new_details in adds:
+ assert old_path is None
+ # the entry for this file_id must be in tree 0.
+ entry = self._get_entry(0, file_id, new_path)
+ if entry[0][2] != file_id:
+ raise errors.BzrError('dirstate: cannot apply delta, working'
+ ' tree does not contain new entry %r %r' %
+ (new_path, file_id))
+ if entry[1][1][0] not in absent:
+ raise errors.BzrError('dirstate: inconsistent delta, with '
+ 'tree 0. %r %r' % (new_path, file_id))
+ # We don't need to update the target of an 'r' because the handling
+ # of renames turns all 'r' situations into a delete at the original
+ # location.
+ entry[1][1] = new_details
+
+ def _update_basis_apply_changes(self, changes):
+ """Apply a sequence of changes to tree 1 during update_basis_by_delta.
+
+ :param adds: A sequence of changes. Each change is a tuple:
+ (path_utf8, path_utf8, file_id, (entry_details))
+ """
+ absent = ('a', 'r')
+ for old_path, new_path, file_id, new_details in changes:
+ assert old_path == new_path
+ # the entry for this file_id must be in tree 0.
+ entry = self._get_entry(0, file_id, new_path)
+ if entry[0][2] != file_id:
+ raise errors.BzrError('dirstate: cannot apply delta, working'
+ ' tree does not contain new entry %r %r' %
+ (new_path, file_id))
+ if (entry[1][0][0] in absent or
+ entry[1][1][0] in absent):
+ raise errors.BzrError('dirstate: inconsistent delta, with '
+ 'tree 0. %r %r' % (new_path, file_id))
+ entry[1][1] = new_details
+
+ def _update_basis_apply_deletes(self, deletes):
+ """Apply a sequence of deletes to tree 1 during update_basis_by_delta.
+
+ They may be deletes, or renames that have been split into add/delete
+ pairs.
+
+ :param adds: A sequence of deletes. Each delete is a tuple:
+ (old_path_utf8, None, file_id, None)
+ """
+ absent = ('r', 'a')
+ # Deletes are accumulated in lexographical order.
+ for old_path, new_path, file_id, _ in deletes:
+ assert new_path is None
+ # the entry for this file_id must be in tree 1.
+ dirname, basename = osutils.split(old_path)
+ block_index, entry_index, dir_present, file_present = \
+ self._get_block_entry_index(dirname, basename, 1)
+ if not file_present:
+ raise errors.BzrError('dirstate: cannot apply delta, basis'
+ ' tree does not contain new entry %r %r' %
+ (old_path, file_id))
+ entry = self._dirblocks[block_index][1][entry_index]
+ if entry[0][2] != file_id:
+ raise errors.BzrError('mismatched file_id in tree 1 %r %r' %
+ (old_path, file_id))
+ if entry[1][0][0] not in absent:
+ raise errors.BzrError('dirstate: inconsistent delta, with '
+ 'tree 0. %r %r' % (old_path, file_id))
+ del self._dirblocks[block_index][1][entry_index]
+
def update_entry(self, entry, abspath, stat_value,
_stat_to_minikind=_stat_to_minikind,
_pack_stat=pack_stat):
@@ -1526,6 +1738,37 @@
raise Exception("can't pack %s" % inv_entry)
return (minikind, fingerprint, size, executable, tree_data)
+ def _iter_child_entries(self, tree_index, path_utf8):
+ """Iterate over all the entries that are children of path_utf.
+
+ This only returns entries that are present (not in 'a', 'r') in
+ tree_index. tree_index data is not refreshed, so if tree 0 is used,
+ results may differ from that obtained if paths were statted to
+ determine what ones were directories.
+
+ Asking for the children of a non-directory will return an empty
+ iterator.
+ """
+ pending_dirs = []
+ next_pending_dirs = [path_utf8]
+ absent = ('r', 'a')
+ while next_pending_dirs:
+ pending_dirs = next_pending_dirs
+ next_pending_dirs = []
+ for path in pending_dirs:
+ block_index, present = self._find_block_index_from_key(
+ (path, '', ''))
+ if not present:
+ # children of a non-directory asked for.
+ continue
+ block = self._dirblocks[block_index]
+ for entry in block[1]:
+ kind = entry[1][tree_index][0]
+ if kind not in absent:
+ yield entry
+ if kind == 'd':
+ next_pending_dirs.append('/'.join(entry[0][0:2]))
+
def _iter_entries(self):
"""Iterate over all the entries in the dirstate.
=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py 2007-10-12 08:00:07 +0000
+++ b/bzrlib/mutabletree.py 2007-10-23 22:14:32 +0000
@@ -437,6 +437,13 @@
inventory for the parent new_revid, and all other parent trees are
discarded.
+ All the changes in the delta should be changes synchronising the basis
+ tree with some or all of the working tree, with a change to a directory
+ requiring that its contents have been recursively included. That is,
+ this is not a general purpose tree modification routine, but a helper
+ for commit which is not required to handle situations that do not arise
+ outside of commit.
+
:param new_revid: The new revision id for the trees parent.
:param delta: An inventory delta (see apply_inventory_delta) describing
the changes from the current left most parent revision to new_revid.
=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py 2007-10-10 00:07:04 +0000
+++ b/bzrlib/tests/test_dirstate.py 2007-10-23 22:14:32 +0000
@@ -1394,6 +1394,107 @@
state.unlock()
+class TestIterChildEntries(TestCaseWithDirState):
+
+ def create_dirstate_with_two_trees(self):
+ """This dirstate contains multiple files and directories.
+
+ / a-root-value
+ a/ a-dir
+ b/ b-dir
+ c c-file
+ d d-file
+ a/e/ e-dir
+ a/f f-file
+ b/g g-file
+ b/h\xc3\xa5 h-\xc3\xa5-file #This is u'\xe5' encoded into utf-8
+
+ Notice that a/e is an empty directory.
+
+ There is one parent tree, which has the same shape with the following variations:
+ b/g in the parent is gone.
+ b/h in the parent has a different id
+ b/i is new in the parent
+ c is renamed to b/j in the parent
+
+ :return: The dirstate, still write-locked.
+ """
+ packed_stat = 'AAAAREUHaIpFB2iKAAADAQAtkqUAAIGk'
+ null_sha = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
+ NULL_PARENT_DETAILS = dirstate.DirState.NULL_PARENT_DETAILS
+ root_entry = ('', '', 'a-root-value'), [
+ ('d', '', 0, False, packed_stat),
+ ('d', '', 0, False, 'parent-revid'),
+ ]
+ a_entry = ('', 'a', 'a-dir'), [
+ ('d', '', 0, False, packed_stat),
+ ('d', '', 0, False, 'parent-revid'),
+ ]
+ b_entry = ('', 'b', 'b-dir'), [
+ ('d', '', 0, False, packed_stat),
+ ('d', '', 0, False, 'parent-revid'),
+ ]
+ c_entry = ('', 'c', 'c-file'), [
+ ('f', null_sha, 10, False, packed_stat),
+ ('r', 'b/j', 0, False, ''),
+ ]
+ d_entry = ('', 'd', 'd-file'), [
+ ('f', null_sha, 20, False, packed_stat),
+ ('f', 'd', 20, False, 'parent-revid'),
+ ]
+ e_entry = ('a', 'e', 'e-dir'), [
+ ('d', '', 0, False, packed_stat),
+ ('d', '', 0, False, 'parent-revid'),
+ ]
+ f_entry = ('a', 'f', 'f-file'), [
+ ('f', null_sha, 30, False, packed_stat),
+ ('f', 'f', 20, False, 'parent-revid'),
+ ]
+ g_entry = ('b', 'g', 'g-file'), [
+ ('f', null_sha, 30, False, packed_stat),
+ NULL_PARENT_DETAILS,
+ ]
+ h_entry1 = ('b', 'h\xc3\xa5', 'h-\xc3\xa5-file1'), [
+ ('f', null_sha, 40, False, packed_stat),
+ NULL_PARENT_DETAILS,
+ ]
+ h_entry2 = ('b', 'h\xc3\xa5', 'h-\xc3\xa5-file2'), [
+ NULL_PARENT_DETAILS,
+ ('f', 'h', 20, False, 'parent-revid'),
+ ]
+ i_entry = ('b', 'i', 'i-file'), [
+ NULL_PARENT_DETAILS,
+ ('f', 'h', 20, False, 'parent-revid'),
+ ]
+ j_entry = ('b', 'j', 'c-file'), [
+ ('r', 'c', 0, False, ''),
+ ('f', 'j', 20, False, 'parent-revid'),
+ ]
+ dirblocks = []
+ dirblocks.append(('', [root_entry]))
+ dirblocks.append(('', [a_entry, b_entry, c_entry, d_entry]))
+ dirblocks.append(('a', [e_entry, f_entry]))
+ dirblocks.append(('b', [g_entry, h_entry1, h_entry2, i_entry, j_entry]))
+ state = dirstate.DirState.initialize('dirstate')
+ state._validate()
+ try:
+ state._set_data(['parent'], dirblocks)
+ except:
+ state.unlock()
+ raise
+ return state, dirblocks
+
+ def test_iter_children_b(self):
+ state, dirblocks = self.create_dirstate_with_two_trees()
+ self.addCleanup(state.unlock)
+ expected_result = []
+ expected_result.append(dirblocks[3][1][2]) # h2
+ expected_result.append(dirblocks[3][1][3]) # i
+ expected_result.append(dirblocks[3][1][4]) # j
+ self.assertEqual(expected_result,
+ list(state._iter_child_entries(1, 'b')))
+
+
class TestDirstateSortOrder(TestCaseWithTransport):
"""Test that DirState adds entries in the right order."""
=== modified file 'bzrlib/tests/workingtree_implementations/test_parents.py'
--- a/bzrlib/tests/workingtree_implementations/test_parents.py 2007-10-12 08:00:07 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_parents.py 2007-10-23 22:14:32 +0000
@@ -260,7 +260,11 @@
def assertDeltaApplicationResultsInExpectedBasis(self, tree, revid, delta,
expected_inventory):
- tree.update_basis_by_delta(revid, delta)
+ tree.lock_write()
+ try:
+ tree.update_basis_by_delta(revid, delta)
+ finally:
+ tree.unlock()
# check the last revision was adjusted to rev_id
self.assertEqual(revid, tree.last_revision())
# check the parents are what we expect
@@ -354,6 +358,8 @@
parents.append(extra_parent)
tree.set_parent_ids(parents)
self.fake_up_revision(tree, new_revid, new_shape)
+ # give tree an inventory of new_shape
+ tree._write_inventory(new_shape)
self.assertDeltaApplicationResultsInExpectedBasis(tree, new_revid,
delta, new_shape)
osutils.rmtree('tree')
@@ -575,3 +581,21 @@
self.add_link(new_shape, new_revid, 'link-id-B', 'root-id', 'B', 'C')
self.assertTransitionFromBasisToShape(basis_shape, old_revid,
new_shape, new_revid)
+
+ def test_move_moves_children_recursively(self):
+ old_revid = 'old-parent'
+ basis_shape = Inventory(root_id=None)
+ self.add_dir(basis_shape, old_revid, 'root-id', None, '')
+ self.add_dir(basis_shape, old_revid, 'dir-id-A', 'root-id', 'A')
+ self.add_dir(basis_shape, old_revid, 'dir-id-B', 'dir-id-A', 'B')
+ self.add_link(basis_shape, old_revid, 'link-id-C', 'dir-id-B', 'C', 'D')
+ new_revid = 'new-parent'
+ new_shape = Inventory(root_id=None)
+ self.add_new_root(new_shape, old_revid, new_revid)
+ # the moved path:
+ self.add_dir(new_shape, new_revid, 'dir-id-A', 'root-id', 'B')
+ # unmoved children.
+ self.add_dir(new_shape, old_revid, 'dir-id-B', 'dir-id-A', 'B')
+ self.add_link(new_shape, old_revid, 'link-id-C', 'dir-id-B', 'C', 'D')
+ self.assertTransitionFromBasisToShape(basis_shape, old_revid,
+ new_shape, new_revid)
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-10-17 17:03:06 +0000
+++ b/bzrlib/workingtree_4.py 2007-10-23 22:14:32 +0000
@@ -1220,6 +1220,11 @@
for file_id in file_ids:
self._inventory.remove_recursive_id(file_id)
+ def update_basis_by_delta(self, new_revid, delta):
+ """See MutableTree.update_basis_by_delta."""
+ assert self.last_revision() != new_revid
+ self.current_dirstate().update_basis_by_delta(delta, new_revid)
+
@needs_read_lock
def _validate(self):
self._dirstate._validate()
@@ -1227,7 +1232,8 @@
@needs_tree_write_lock
def _write_inventory(self, inv):
"""Write inventory as the current inventory."""
- assert not self._dirty, "attempting to write an inventory when the dirstate is dirty will cause data loss"
+ assert not self._dirty, ("attempting to write an inventory when the "
+ "dirstate is dirty will cause data loss")
self.current_dirstate().set_state_from_inventory(inv)
self._make_dirty(reset_inventory=False)
if self._inventory is not None:
More information about the bazaar-commits
mailing list