Rev 3722: Integrate in stat-cache-updates-during-commit with faster iter-changes. in http://people.ubuntu.com/~robertc/baz2.0/readdir

Robert Collins robertc at robertcollins.net
Thu Sep 25 02:11:43 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/readdir

------------------------------------------------------------
revno: 3722
revision-id: robertc at robertcollins.net-20080925011130-2ct9kz8v1cvptavi
parent: robertc at robertcollins.net-20080924030814-67isjttmo6jksr8p
parent: robertc at robertcollins.net-20080923224605-vqw2yezm8f8m2non
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-updates
timestamp: Thu 2008-09-25 11:11:30 +1000
message:
  Integrate in stat-cache-updates-during-commit with faster iter-changes.
added:
  bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py test_get_file_with_s-20080922035909-lhdovrr36jpxmu0v-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
  bzrlib/tests/test__dirstate_helpers.py test_dirstate_helper-20070504035751-jsbn00xodv0y1eve-2
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  bzrlib/tests/test_selftest.py  test_selftest.py-20051202044319-c110a115d8c0456a
  bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
  bzrlib/tests/workingtree_implementations/__init__.py __init__.py-20060203003124-b2aa5aca21a8bfad
  bzrlib/workingtree.py          workingtree.py-20050511021032-29b6ec0a681e02e3
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
    ------------------------------------------------------------
    revno: 3696.35.3
    revision-id: robertc at robertcollins.net-20080923224605-vqw2yezm8f8m2non
    parent: robertc at robertcollins.net-20080922051520-uhr3pn61w141kagv
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit-dirstate
    timestamp: Wed 2008-09-24 08:46:05 +1000
    message:
      NEWS for the record_entry_contents change.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
    ------------------------------------------------------------
    revno: 3696.35.2
    revision-id: robertc at robertcollins.net-20080922051520-uhr3pn61w141kagv
    parent: robertc at robertcollins.net-20080919065341-5t5w1p2gi926nfia
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit-dirstate
    timestamp: Mon 2008-09-22 15:15:20 +1000
    message:
      Race-free stat-fingerprint updating during commit via a new method get_file_with_stat.
    added:
      bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py test_get_file_with_s-20080922035909-lhdovrr36jpxmu0v-1
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
      bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
      bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
      bzrlib/tests/test_selftest.py  test_selftest.py-20051202044319-c110a115d8c0456a
      bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
      bzrlib/tests/workingtree_implementations/__init__.py __init__.py-20060203003124-b2aa5aca21a8bfad
      bzrlib/workingtree.py          workingtree.py-20050511021032-29b6ec0a681e02e3
      bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
    ------------------------------------------------------------
    revno: 3696.35.1
    revision-id: robertc at robertcollins.net-20080919065341-5t5w1p2gi926nfia
    parent: pqm at pqm.ubuntu.com-20080916010540-7l7uexkq5aelzv5p
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit-dirstate
    timestamp: Fri 2008-09-19 16:53:41 +1000
    message:
      First cut - make it work - at updating the tree stat cache during commit.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
      bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
      bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
      bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
      bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
      bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
      bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS	2008-09-24 00:09:49 +0000
+++ b/NEWS	2008-09-25 01:11:30 +0000
@@ -26,6 +26,9 @@
       push.  It is also faster if there are no tags in the local branch.
       (Andrew Bennetts)
 
+    * File changes during a commit will update the tree stat cache.
+      (Robert Collins)
+
     * Location aliases can now accept a trailing path.  (Micheal Hudson)
 
     * New hooks ``Lock.hooks`` when LockDirs are acquired and released. 
@@ -72,6 +75,10 @@
 
   API CHANGES:
 
+    * ``CommitBuilder.record_entry_contents`` returns one more element in
+      its result tuple - an optional file system hash for the hash cache
+      to use. (Robert Collins)
+
   TESTING:
 
     * ``bzrlib.tests.repository_implementations`` has been renamed to
@@ -97,6 +104,9 @@
       tree walking, and modular directory listing code to aid future
       performance optimisations and refactoring. (Robert Collins)
 
+    * New race-free method on MutableTree ``get_file_with_stat`` for use
+      when generating stat cache results. (Robert Collins)
+
     * New win32utils.get_local_appdata_location() provides access to a local
       directory for storing data.  (Mark Hammond)
 

=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2008-08-05 04:10:43 +0000
+++ b/bzrlib/commit.py	2008-09-19 06:53:41 +0000
@@ -690,11 +690,12 @@
                 # required after that changes.
                 if len(self.parents) > 1:
                     ie.revision = None
-                delta, version_recorded = self.builder.record_entry_contents(
+                delta, version_recorded, _ = self.builder.record_entry_contents(
                     ie, self.parent_invs, path, self.basis_tree, None)
                 if version_recorded:
                     self.any_entries_changed = True
-                if delta: self._basis_delta.append(delta)
+                if delta:
+                    self._basis_delta.append(delta)
 
     def _report_and_accumulate_deletes(self):
         # XXX: Could the list of deleted paths and ids be instead taken from
@@ -843,14 +844,18 @@
         else:
             ie = existing_ie.copy()
             ie.revision = None
-        delta, version_recorded = self.builder.record_entry_contents(ie,
-            self.parent_invs, path, self.work_tree, content_summary)
+        # For carried over entries we don't care about the fs hash - the repo
+        # isn't generating a sha, so we're not saving computation time.
+        delta, version_recorded, fs_hash = self.builder.record_entry_contents(
+            ie, self.parent_invs, path, self.work_tree, content_summary)
         if delta:
             self._basis_delta.append(delta)
         if version_recorded:
             self.any_entries_changed = True
         if report_changes:
             self._report_change(ie, path)
+        if fs_hash:
+            self.work_tree._observed_sha1(ie.file_id, path, fs_hash)
         return ie
 
     def _report_change(self, ie, path):

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2008-09-24 03:08:14 +0000
+++ b/bzrlib/dirstate.py	2008-09-25 01:11:30 +0000
@@ -1507,6 +1507,29 @@
                     # it is being resurrected here, so blank it out temporarily.
                     self._dirblocks[block_index][1][entry_index][1][1] = null
 
+    def _observed_sha1(self, entry, sha1, stat_value,
+        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
+        """Note the sha1 of a file.
+
+        :param entry: The entry the sha1 is for.
+        :param sha1: The observed sha1.
+        :param stat_value: The os.lstat for the file.
+        """
+        try:
+            minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
+        except KeyError:
+            # Unhandled kind
+            return None
+        packed_stat = _pack_stat(stat_value)
+        if minikind == 'f':
+            if self._cutoff_time is None:
+                self._sha_cutoff_time()
+            if (stat_value.st_mtime < self._cutoff_time
+                and stat_value.st_ctime < self._cutoff_time):
+                entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
+                    packed_stat)
+                self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+
     def _sha_cutoff_time(self):
         """Return cutoff time.
 

=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py	2008-05-08 04:33:38 +0000
+++ b/bzrlib/mutabletree.py	2008-09-22 05:15:20 +0000
@@ -201,6 +201,20 @@
         """Helper function for add - sets the entries of kinds."""
         raise NotImplementedError(self._gather_kinds)
 
+    def get_file_with_stat(self, file_id, path=None):
+        """Get a file handle and stat object for file_id.
+
+        The default implementation returns (self.get_file, None) for backwards
+        compatibility.
+
+        :param file_id: The file id to read.
+        :param path: The path of the file, if it is known.
+        :return: A tuple (file_handle, stat_value_or_None). If the tree has
+            no stat facility, or need for a stat cache feedback during commit,
+            it may return None for the second element of the tuple.
+        """
+        return (self.get_file(file_id, path), None)
+
     @needs_read_lock
     def last_revision(self):
         """Return the revision id of the last commit performed in this tree.
@@ -247,6 +261,23 @@
         """
         raise NotImplementedError(self.mkdir)
 
+    def _observed_sha1(self, file_id, path, (sha1, stat_value)):
+        """Tell the tree we have observed a paths sha1.
+
+        The intent of this function is to allow trees that have a hashcache to
+        update the hashcache during commit. If the observed file is too new
+        (based on the stat_value) to be safely hash-cached the tree will ignore
+        it. 
+
+        The default implementation does nothing.
+
+        :param file_id: The file id
+        :param path: The file path
+        :param sha1: The sha 1 that was observed.
+        :param stat_value: A stat result for the file the sha1 was read from.
+        :return: None
+        """
+
     @needs_write_lock
     def put_file_bytes_non_atomic(self, file_id, bytes):
         """Update the content of a file in the tree.

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-09-04 14:22:34 +0000
+++ b/bzrlib/repository.py	2008-09-23 22:46:05 +0000
@@ -241,12 +241,16 @@
             content - stat, length, exec, sha/link target. This is only
             accessed when the entry has a revision of None - that is when it is
             a candidate to commit.
-        :return: A tuple (change_delta, version_recorded). change_delta is 
-            an inventory_delta change for this entry against the basis tree of
-            the commit, or None if no change occured against the basis tree.
+        :return: A tuple (change_delta, version_recorded, fs_hash).
+            change_delta is an inventory_delta change for this entry against
+            the basis tree of the commit, or None if no change occured against
+            the basis tree.
             version_recorded is True if a new version of the entry has been
             recorded. For instance, committing a merge where a file was only
             changed on the other side will return (delta, False).
+            fs_hash is either None, or the hash details for the path (currently
+            a tuple of the contents sha1 and the statvalue returned by
+            tree.get_file_with_stat()).
         """
         if self.new_inventory.root is None:
             if ie.parent_id is not None:
@@ -287,7 +291,7 @@
                 else:
                     # add
                     delta = (None, path, ie.file_id, ie)
-                return delta, False
+                return delta, False, None
             else:
                 # we don't need to commit this, because the caller already
                 # determined that an existing revision of this file is
@@ -298,7 +302,7 @@
                     raise AssertionError("Impossible situation, a skipped "
                         "inventory entry (%r) claims to be modified in this "
                         "commit (%r).", (ie, self._new_revision_id))
-                return None, False
+                return None, False, None
         # XXX: Friction: parent_candidates should return a list not a dict
         #      so that we don't have to walk the inventories again.
         parent_candiate_entries = ie.parent_candidates(parent_invs)
@@ -334,6 +338,9 @@
             # if the kind changed the content obviously has
             if kind != parent_entry.kind:
                 store = True
+        # Stat cache fingerprint feedback for the caller - None as we usually
+        # don't generate one.
+        fingerprint = None
         if kind == 'file':
             if content_summary[2] is None:
                 raise ValueError("Files must not have executable = None")
@@ -350,7 +357,7 @@
                     ie.text_size = parent_entry.text_size
                     ie.text_sha1 = parent_entry.text_sha1
                     ie.executable = parent_entry.executable
-                    return self._get_delta(ie, basis_inv, path), False
+                    return self._get_delta(ie, basis_inv, path), False, None
                 else:
                     # Either there is only a hash change(no hash cache entry,
                     # or same size content change), or there is no change on
@@ -363,10 +370,16 @@
                 # absence of a content change in the file.
                 nostore_sha = None
             ie.executable = content_summary[2]
-            lines = tree.get_file(ie.file_id, path).readlines()
+            file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
+            try:
+                lines = file_obj.readlines()
+            finally:
+                file_obj.close()
             try:
                 ie.text_sha1, ie.text_size = self._add_text_to_weave(
                     ie.file_id, lines, heads, nostore_sha)
+                # Let the caller know we generated a stat fingerprint.
+                fingerprint = (ie.text_sha1, stat_value)
             except errors.ExistingContent:
                 # Turns out that the file content was unchanged, and we were
                 # only going to store a new node if it was changed. Carry over
@@ -375,13 +388,13 @@
                 ie.text_size = parent_entry.text_size
                 ie.text_sha1 = parent_entry.text_sha1
                 ie.executable = parent_entry.executable
-                return self._get_delta(ie, basis_inv, path), False
+                return self._get_delta(ie, basis_inv, path), False, None
         elif kind == 'directory':
             if not store:
                 # all data is meta here, nothing specific to directory, so
                 # carry over:
                 ie.revision = parent_entry.revision
-                return self._get_delta(ie, basis_inv, path), False
+                return self._get_delta(ie, basis_inv, path), False, None
             lines = []
             self._add_text_to_weave(ie.file_id, lines, heads, None)
         elif kind == 'symlink':
@@ -395,7 +408,7 @@
                 # unchanged, carry over.
                 ie.revision = parent_entry.revision
                 ie.symlink_target = parent_entry.symlink_target
-                return self._get_delta(ie, basis_inv, path), False
+                return self._get_delta(ie, basis_inv, path), False, None
             ie.symlink_target = current_link_target
             lines = []
             self._add_text_to_weave(ie.file_id, lines, heads, None)
@@ -407,14 +420,14 @@
                 # unchanged, carry over.
                 ie.reference_revision = parent_entry.reference_revision
                 ie.revision = parent_entry.revision
-                return self._get_delta(ie, basis_inv, path), False
+                return self._get_delta(ie, basis_inv, path), False, None
             ie.reference_revision = content_summary[3]
             lines = []
             self._add_text_to_weave(ie.file_id, lines, heads, None)
         else:
             raise NotImplementedError('unknown kind')
         ie.revision = self._new_revision_id
-        return self._get_delta(ie, basis_inv, path), True
+        return self._get_delta(ie, basis_inv, path), True, fingerprint
 
     def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
         # Note: as we read the content directly from the tree, we know its not

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2008-09-23 16:56:38 +0000
+++ b/bzrlib/tests/__init__.py	2008-09-25 01:11:30 +0000
@@ -877,6 +877,21 @@
         self.assertEqual(mode, mode_test,
                          'mode mismatch %o != %o' % (mode, mode_test))
 
+    def assertEqualStat(self, expected, actual):
+        """assert that expected and actual are the same stat result.
+
+        :param expected: A stat result.
+        :param actual: A stat result.
+        :raises AssertionError: If the expected and actual stat values differ
+            other than by atime.
+        """
+        self.assertEqual(expected.st_size, actual.st_size)
+        self.assertEqual(expected.st_mtime, actual.st_mtime)
+        self.assertEqual(expected.st_ctime, actual.st_ctime)
+        self.assertEqual(expected.st_dev, actual.st_dev)
+        self.assertEqual(expected.st_ino, actual.st_ino)
+        self.assertEqual(expected.st_mode, actual.st_mode)
+
     def assertPositive(self, val):
         """Assert that val is greater than 0."""
         self.assertTrue(val > 0, 'expected a positive value, but got %s' % val)

=== modified file 'bzrlib/tests/per_repository/test_commit_builder.py'
--- a/bzrlib/tests/per_repository/test_commit_builder.py	2008-09-04 20:32:04 +0000
+++ b/bzrlib/tests/per_repository/test_commit_builder.py	2008-09-22 05:15:20 +0000
@@ -151,7 +151,7 @@
         try:
             ie = inventory.make_entry('directory', '', None,
                     tree.get_root_id())
-            delta, version_recorded = builder.record_entry_contents(
+            delta, version_recorded, fs_hash = builder.record_entry_contents(
                 ie, [parent_tree.inventory], '', tree,
                 tree.path_content_summary(''))
             self.assertFalse(version_recorded)
@@ -164,6 +164,8 @@
                     delta)
             else:
                 self.assertEqual(None, delta)
+            # Directories do not get hashed.
+            self.assertEqual(None, fs_hash)
             builder.abort()
         except:
             builder.abort()
@@ -288,10 +290,12 @@
         os.symlink('target', 'link')
         self._add_commit_check_unchanged(tree, 'link')
 
-    def _add_commit_renamed_check_changed(self, tree, name):
+    def _add_commit_renamed_check_changed(self, tree, name,
+        expect_fs_hash=False):
         def rename():
             tree.rename_one(name, 'new_' + name)
-        self._add_commit_change_check_changed(tree, name, rename)
+        self._add_commit_change_check_changed(tree, name, rename,
+            expect_fs_hash=expect_fs_hash)
 
     def test_last_modified_revision_after_rename_dir_changes(self):
         # renaming a dir changes the last modified.
@@ -303,7 +307,8 @@
         # renaming a file changes the last modified.
         tree = self.make_branch_and_tree('.')
         self.build_tree(['file'])
-        self._add_commit_renamed_check_changed(tree, 'file')
+        self._add_commit_renamed_check_changed(tree, 'file',
+            expect_fs_hash=True)
 
     def test_last_modified_revision_after_rename_link_changes(self):
         # renaming a link changes the last modified.
@@ -312,12 +317,14 @@
         os.symlink('target', 'link')
         self._add_commit_renamed_check_changed(tree, 'link')
 
-    def _add_commit_reparent_check_changed(self, tree, name):
+    def _add_commit_reparent_check_changed(self, tree, name,
+        expect_fs_hash=False):
         self.build_tree(['newparent/'])
         tree.add(['newparent'])
         def reparent():
             tree.rename_one(name, 'newparent/new_' + name)
-        self._add_commit_change_check_changed(tree, name, reparent)
+        self._add_commit_change_check_changed(tree, name, reparent,
+            expect_fs_hash=expect_fs_hash)
 
     def test_last_modified_revision_after_reparent_dir_changes(self):
         # reparenting a dir changes the last modified.
@@ -329,7 +336,8 @@
         # reparenting a file changes the last modified.
         tree = self.make_branch_and_tree('.')
         self.build_tree(['file'])
-        self._add_commit_reparent_check_changed(tree, 'file')
+        self._add_commit_reparent_check_changed(tree, 'file',
+            expect_fs_hash=True)
 
     def test_last_modified_revision_after_reparent_link_changes(self):
         # reparenting a link changes the last modified.
@@ -338,11 +346,13 @@
         os.symlink('target', 'link')
         self._add_commit_reparent_check_changed(tree, 'link')
 
-    def _add_commit_change_check_changed(self, tree, name, changer):
+    def _add_commit_change_check_changed(self, tree, name, changer,
+        expect_fs_hash=False):
         tree.add([name], [name + 'id'])
         rev1 = tree.commit('')
         changer()
-        rev2 = self.mini_commit(tree, name, tree.id2path(name + 'id'))
+        rev2 = self.mini_commit(tree, name, tree.id2path(name + 'id'),
+            expect_fs_hash=expect_fs_hash)
         tree1, tree2 = self._get_revtrees(tree, [rev1, rev2])
         self.assertEqual(rev1, tree1.inventory[name + 'id'].revision)
         self.assertEqual(rev2, tree2.inventory[name + 'id'].revision)
@@ -353,7 +363,7 @@
         self.assertFileGraph(expected_graph, tree, (file_id, rev2))
 
     def mini_commit(self, tree, name, new_name, records_version=True,
-        delta_against_basis=True):
+        delta_against_basis=True, expect_fs_hash=False):
         """Perform a miniature commit looking for record entry results.
         
         :param tree: The tree to commit.
@@ -363,6 +373,8 @@
             record a new version.
         :param delta_against_basis: True of the commit of new_name is expected
             to have a delta against the basis.
+        :param expect_fs_hash: True or false to indicate whether we expect a
+            file hash to be returned from the record_entry_contents call.
         """
         tree.lock_write()
         try:
@@ -396,18 +408,24 @@
                 commit_id(parent_id)
             # because a change of some sort is meant to have occurred,
             # recording the entry must return True.
-            delta, version_recorded = commit_id(file_id)
+            delta, version_recorded, fs_hash = commit_id(file_id)
             if records_version:
                 self.assertTrue(version_recorded)
             else:
                 self.assertFalse(version_recorded)
+            if expect_fs_hash:
+                tree_file_stat = tree.get_file_with_stat(file_id)
+                tree_file_stat[0].close()
+                self.assertEqual(2, len(fs_hash))
+                self.assertEqual(tree.get_file_sha1(file_id), fs_hash[0])
+                self.assertEqualStat(tree_file_stat[1], fs_hash[1])
+            else:
+                self.assertEqual(None, fs_hash)
             new_entry = builder.new_inventory[file_id]
             if delta_against_basis:
                 expected_delta = (name, new_name, file_id, new_entry)
             else:
                 expected_delta = None
-            if expected_delta != delta:
-                import pdb;pdb.set_trace()
             self.assertEqual(expected_delta, delta)
             builder.finish_inventory()
             rev2 = builder.commit('')
@@ -434,7 +452,8 @@
         self.build_tree(['file'])
         def change_file():
             tree.put_file_bytes_non_atomic('fileid', 'new content')
-        self._add_commit_change_check_changed(tree, 'file', change_file)
+        self._add_commit_change_check_changed(tree, 'file', change_file,
+            expect_fs_hash=True)
 
     def test_last_modified_revision_after_content_link_changes(self):
         # changing a link changes the last modified.
@@ -455,13 +474,14 @@
         tree.rename_one(name, 'new_' + name)
         return tree.commit('')
 
-    def _commit_sprout_rename_merge(self, tree1, name):
+    def _commit_sprout_rename_merge(self, tree1, name, expect_fs_hash=False):
         rev1, tree2 = self._commit_sprout(tree1, name)
         # change both sides equally
         rev2 = self._rename_in_tree(tree1, name)
         rev3 = self._rename_in_tree(tree2, name)
         tree1.merge_from_branch(tree2.branch)
-        rev4 = self.mini_commit(tree1, 'new_' + name, 'new_' + name)
+        rev4 = self.mini_commit(tree1, 'new_' + name, 'new_' + name,
+            expect_fs_hash=expect_fs_hash)
         tree3, = self._get_revtrees(tree1, [rev4])
         self.assertEqual(rev4, tree3.inventory[name + 'id'].revision)
         file_id = name + 'id'
@@ -482,7 +502,7 @@
         # merge a file changes the last modified.
         tree1 = self.make_branch_and_tree('t1')
         self.build_tree(['t1/file'])
-        self._commit_sprout_rename_merge(tree1, 'file')
+        self._commit_sprout_rename_merge(tree1, 'file', expect_fs_hash=True)
 
     def test_last_modified_revision_after_merge_link_changes(self):
         # merge a link changes the last modified.
@@ -534,7 +554,7 @@
         self.requireFeature(tests.SymlinkFeature)
         os.symlink('target', name)
 
-    def _check_kind_change(self, make_before, make_after):
+    def _check_kind_change(self, make_before, make_after, expect_fs_hash=False):
         tree = self.make_branch_and_tree('.')
         path = 'name'
         make_before(path)
@@ -543,16 +563,19 @@
             osutils.delete_any(path)
             make_after(path)
 
-        self._add_commit_change_check_changed(tree, path, change_kind)
+        self._add_commit_change_check_changed(tree, path, change_kind,
+            expect_fs_hash=expect_fs_hash)
 
     def test_last_modified_dir_file(self):
-        self._check_kind_change(self.make_dir, self.make_file)
+        self._check_kind_change(self.make_dir, self.make_file,
+            expect_fs_hash=True)
 
     def test_last_modified_dir_link(self):
         self._check_kind_change(self.make_dir, self.make_link)
 
     def test_last_modified_link_file(self):
-        self._check_kind_change(self.make_link, self.make_file)
+        self._check_kind_change(self.make_link, self.make_file,
+            expect_fs_hash=True)
 
     def test_last_modified_link_dir(self):
         self._check_kind_change(self.make_link, self.make_dir)

=== modified file 'bzrlib/tests/test__dirstate_helpers.py'
--- a/bzrlib/tests/test__dirstate_helpers.py	2008-09-18 01:37:47 +0000
+++ b/bzrlib/tests/test__dirstate_helpers.py	2008-09-25 01:11:30 +0000
@@ -18,6 +18,7 @@
 
 import bisect
 import os
+import time
 
 from bzrlib import (
     dirstate,
@@ -800,6 +801,28 @@
     def set_update_entry(self):
         self.update_entry = dirstate.py_update_entry
 
+    def test_observed_sha1_cachable(self):
+        state, entry = self.get_state_with_a()
+        atime = time.time() - 10
+        self.build_tree(['a'])
+        statvalue = os.lstat('a')
+        statvalue = test_dirstate._FakeStat(statvalue.st_size, atime, atime,
+            statvalue.st_dev, statvalue.st_ino, statvalue.st_mode)
+        state._observed_sha1(entry, "foo", statvalue)
+        self.assertEqual('foo', entry[1][0][1])
+        packed_stat = dirstate.pack_stat(statvalue)
+        self.assertEqual(packed_stat, entry[1][0][4])
+
+    def test_observed_sha1_not_cachable(self):
+        state, entry = self.get_state_with_a()
+        oldval = entry[1][0][1]
+        oldstat = entry[1][0][4]
+        self.build_tree(['a'])
+        statvalue = os.lstat('a')
+        state._observed_sha1(entry, "foo", statvalue)
+        self.assertEqual(oldval, entry[1][0][1])
+        self.assertEqual(oldstat, entry[1][0][4])
+
     def test_update_entry(self):
         state, entry = self.get_state_with_a()
         self.build_tree(['a'])

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2008-09-24 03:08:14 +0000
+++ b/bzrlib/tests/test_dirstate.py	2008-09-25 01:11:30 +0000
@@ -18,7 +18,6 @@
 
 import bisect
 import os
-import time
 
 from bzrlib import (
     dirstate,

=== modified file 'bzrlib/tests/test_selftest.py'
--- a/bzrlib/tests/test_selftest.py	2008-09-04 21:05:18 +0000
+++ b/bzrlib/tests/test_selftest.py	2008-09-22 05:15:20 +0000
@@ -502,6 +502,19 @@
         self.assertIsSameRealPath(self.test_dir, cwd)
         self.assertIsSameRealPath(self.test_home_dir, os.environ['HOME'])
 
+    def test_assertEqualStat_equal(self):
+        from bzrlib.tests.test_dirstate import _FakeStat
+        self.build_tree(["foo"])
+        real = os.lstat("foo")
+        fake = _FakeStat(real.st_size, real.st_mtime, real.st_ctime,
+            real.st_dev, real.st_ino, real.st_mode)
+        self.assertEqualStat(real, fake)
+
+    def test_assertEqualStat_notequal(self):
+        self.build_tree(["foo", "bar"])
+        self.assertRaises(AssertionError, self.assertEqualStat,
+            os.lstat("foo"), os.lstat("bar"))
+
 
 class TestTestCaseWithMemoryTransport(TestCaseWithMemoryTransport):
 

=== modified file 'bzrlib/tests/test_workingtree_4.py'
--- a/bzrlib/tests/test_workingtree_4.py	2008-07-08 14:55:19 +0000
+++ b/bzrlib/tests/test_workingtree_4.py	2008-09-22 05:15:20 +0000
@@ -18,6 +18,7 @@
 """Tests for WorkingTreeFormat4"""
 
 import os
+import time
 
 from bzrlib import (
     bzrdir,
@@ -577,6 +578,71 @@
         self.assertEqual([], changes)
         self.assertEqual(['', 'versioned', 'versioned2'], returned)
 
+    def get_tree_with_cachable_file_foo(self):
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        # a 4 second old timestamp is always hashable - sucks to delay 
+        # the test suite, but not testing this is worse.
+        time.sleep(4)
+        return tree
+
+    def test_commit_updates_hash_cache(self):
+        tree = self.get_tree_with_cachable_file_foo()
+        revid = tree.commit('a commit')
+        # tree's dirstate should now have a valid stat entry for foo.
+        tree.lock_read()
+        entry = tree._get_entry(path='foo')
+        expected_sha1 = osutils.sha_file_by_name('foo')
+        self.assertEqual(expected_sha1, entry[1][0][1])
+
+    def test_observed_sha1_cachable(self):
+        tree = self.get_tree_with_cachable_file_foo()
+        expected_sha1 = osutils.sha_file_by_name('foo')
+        statvalue = os.lstat("foo")
+        tree.lock_write()
+        try:
+            tree._observed_sha1("foo-id", "foo", (expected_sha1, statvalue))
+            self.assertEqual(expected_sha1,
+                tree._get_entry(path="foo")[1][0][1])
+        finally:
+            tree.unlock()
+        tree = tree.bzrdir.open_workingtree()
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        self.assertEqual(expected_sha1, tree._get_entry(path="foo")[1][0][1])
+
+    def test_observed_sha1_new_file(self):
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        try:
+            current_sha1 = tree._get_entry(path="foo")[1][0][1]
+        finally:
+            tree.unlock()
+        tree.lock_write()
+        try:
+            tree._observed_sha1("foo-id", "foo",
+                (osutils.sha_file_by_name('foo'), os.lstat("foo")))
+            # Must not have changed
+            self.assertEqual(current_sha1,
+                tree._get_entry(path="foo")[1][0][1])
+        finally:
+            tree.unlock()
+
+    def test_get_file_with_stat_id_only(self):
+        # Explicit test to ensure we get a lstat value from WT4 trees.
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        file_obj, statvalue = tree.get_file_with_stat('foo-id')
+        expected = os.lstat('foo')
+        self.assertEqualStat(expected, statvalue)
+        self.assertEqual(["contents of foo\n"], file_obj.readlines())
+
 
 class TestCorruptDirstate(TestCaseWithTransport):
     """Tests for how we handle when the dirstate has been corrupted."""

=== modified file 'bzrlib/tests/workingtree_implementations/__init__.py'
--- a/bzrlib/tests/workingtree_implementations/__init__.py	2008-07-17 00:42:40 +0000
+++ b/bzrlib/tests/workingtree_implementations/__init__.py	2008-09-22 05:15:20 +0000
@@ -101,6 +101,7 @@
         'bzrlib.tests.workingtree_implementations.test_commit',
         'bzrlib.tests.workingtree_implementations.test_executable',
         'bzrlib.tests.workingtree_implementations.test_flush',
+        'bzrlib.tests.workingtree_implementations.test_get_file_with_stat',
         'bzrlib.tests.workingtree_implementations.test_get_file_mtime',
         'bzrlib.tests.workingtree_implementations.test_get_parent_ids',
         'bzrlib.tests.workingtree_implementations.test_inv',

=== added file 'bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py'
--- a/bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py	2008-09-22 05:15:20 +0000
@@ -0,0 +1,49 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Test that all WorkingTree's implement get_file_with_stat."""
+
+import os
+
+from bzrlib.tests.workingtree_implementations import TestCaseWithWorkingTree
+
+
+class TestGetFileWithStat(TestCaseWithWorkingTree):
+
+    def test_get_file_with_stat_id_only(self):
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        file_obj, statvalue = tree.get_file_with_stat('foo-id')
+        if statvalue is not None:
+            expected = os.lstat('foo')
+            self.assertEqualStat(expected, statvalue)
+        self.assertEqual(["contents of foo\n"], file_obj.readlines())
+
+    def test_get_file_with_stat_id_and_path(self):
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['foo'])
+        tree.add(['foo'], ['foo-id'])
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        file_obj, statvalue = tree.get_file_with_stat('foo-id', 'foo')
+        expected = os.lstat('foo')
+        if statvalue is not None:
+            expected = os.lstat('foo')
+            self.assertEqualStat(expected, statvalue)
+        self.assertEqual(["contents of foo\n"], file_obj.readlines())

=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	2008-09-22 09:43:40 +0000
+++ b/bzrlib/workingtree.py	2008-09-25 01:11:30 +0000
@@ -424,9 +424,14 @@
         return osutils.lexists(self.abspath(filename))
 
     def get_file(self, file_id, path=None):
+        return self.get_file_with_stat(file_id, path)[0]
+
+    def get_file_with_stat(self, file_id, path=None, _fstat=os.fstat):
+        """See MutableTree.get_file_with_stat."""
         if path is None:
             path = self.id2path(file_id)
-        return self.get_file_byname(path)
+        file_obj = self.get_file_byname(path)
+        return (file_obj, _fstat(file_obj.fileno()))
 
     def get_file_text(self, file_id):
         return self.get_file(file_id).read()

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2008-09-24 01:54:18 +0000
+++ b/bzrlib/workingtree_4.py	2008-09-25 01:11:30 +0000
@@ -543,6 +543,12 @@
                 # path is missing on disk.
                 continue
 
+    def _observed_sha1(self, file_id, path, (sha1, statvalue)):
+        """See MutableTree._observed_sha1."""
+        state = self.current_dirstate()
+        entry = self._get_entry(file_id=file_id, path=path)
+        state._observed_sha1(entry, sha1, statvalue)
+
     def kind(self, file_id):
         """Return the kind of a file.
 




More information about the bazaar-commits mailing list