Rev 2354: Use the raw encoder for a bit more performance boost. Is it worth the ugliness? (about 100ms) in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

John Arbash Meinel john at arbash-meinel.com
Thu Feb 22 00:56:39 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

------------------------------------------------------------
revno: 2354
revision-id: john at arbash-meinel.com-20070222005637-nx0n1qppaobl0j6s
parent: john at arbash-meinel.com-20070222003946-7huladldtgb9iz8v
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Wed 2007-02-21 18:56:37 -0600
message:
  Use the raw encoder for a bit more performance boost. Is it worth the ugliness? (about 100ms)
modified:
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-22 00:39:46 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-22 00:56:37 +0000
@@ -96,12 +96,8 @@
 from bzrlib.workingtree import WorkingTree, WorkingTree3, WorkingTreeFormat3
 
 
-def _utf8_encode(unicode_str, _encoder=cache_utf8._utf8_encode):
-    return _encoder(unicode_str)[0]
-
-
-def _utf8_decode(utf8_str, _decoder=cache_utf8._utf8_decode):
-    return _decoder(utf8_str)[0]
+_utf8_encode = cache_utf8._utf8_encode
+_utf8_decode = cache_utf8._utf8_decode
 
 
 class WorkingTree4(WorkingTree3):
@@ -215,7 +211,7 @@
         # TODO we want a paths_to_dirblocks helper I think
         encode = _utf8_encode
         for path in paths:
-            dirname, basename = os.path.split(encode(path))
+            dirname, basename = os.path.split(encode(path)[0])
             _, _, _, path_is_versioned = state._get_block_entry_index(
                 dirname, basename, 0)
             if path_is_versioned:
@@ -242,7 +238,7 @@
         state._read_dirblocks_if_needed()
         root_key, current_entry = self._get_entry(path='')
         decode = _utf8_decode
-        current_id = decode(root_key[2])
+        current_id = decode(root_key[2])[0]
         assert current_entry[0][0] == 'directory'
         inv = Inventory(root_id=current_id)
         # we could do this straight out of the dirstate; it might be fast
@@ -259,8 +255,8 @@
                 if entry[0][0] in ('absent', 'relocated'):
                     # a parent tree only entry
                     continue
-                name = decode(key[1])
-                file_id = decode(key[2])
+                name = decode(key[1])[0]
+                file_id = decode(key[2])[0]
                 kind, link_or_sha1, size, executable, stat = entry[0]
                 inv_entry = entry_factory[kind](file_id, name, parent_id)
                 if kind == 'file':
@@ -291,9 +287,9 @@
         state = self.current_dirstate()
         encode = _utf8_encode
         if file_id is not None:
-            file_id = encode(file_id)
+            file_id = encode(file_id)[0]
         if path is not None:
-            path = encode(path)
+            path = encode(path)[0]
         return state._get_entry(0, fileid_utf8=file_id, path_utf8=path)
 
     def get_file_sha1(self, file_id, path=None, stat_value=None):
@@ -303,7 +299,7 @@
         # TODO:
         # if row stat is valid, use cached sha1, else, get a new sha1.
         if path is None:
-            path = _utf8_decode(os.path.join(*key[0:2]))
+            path = _utf8_decode(os.path.join(*key[0:2]))[0]
         return self._hashcache.get_sha1(path, stat_value)
 
     def _get_inventory(self):
@@ -327,7 +323,7 @@
     @needs_read_lock
     def get_root_id(self):
         """Return the id of this trees root"""
-        return _utf8_decode(self._get_entry(path='')[0][2])
+        return _utf8_decode(self._get_entry(path='')[0][2])[0]
 
     def has_id(self, file_id):
         state = self.current_dirstate()
@@ -336,14 +332,14 @@
             return False
         decode = _utf8_decode
         return osutils.lexists(pathjoin(
-                    self.basedir, decode(row[0]), decode(row[1])))
+                    self.basedir, decode(row[0])[0], decode(row[1])[0]))
 
     @needs_read_lock
     def id2path(self, fileid):
         state = self.current_dirstate()
         fileid_utf8 = fileid.encode('utf8')
         key, tree_details = state._get_entry(0, fileid_utf8=fileid_utf8)
-        return _utf8_decode(os.path.join(*key[0:2]))
+        return _utf8_decode(os.path.join(*key[0:2]))[0]
 
     @needs_read_lock
     def __iter__(self):
@@ -358,9 +354,9 @@
                 # not relevant to the working tree
                 continue
             decode = _utf8_decode
-            path = pathjoin(self.basedir, decode(key[0]), decode(key[1]))
+            path = pathjoin(self.basedir, decode(key[0])[0], decode(key[1])[0])
             if osutils.lexists(path):
-                result.append(decode(key[2]))
+                result.append(decode(key[2])[0])
         return iter(result)
 
     @needs_read_lock
@@ -393,7 +389,7 @@
         assert not isinstance(from_paths, basestring)
         encode = _utf8_encode
         decode = _utf8_decode
-        to_dir_utf8 = encode(to_dir)
+        to_dir_utf8 = encode(to_dir)[0]
         to_entry_dirname, to_basename = os.path.split(to_dir_utf8)
         # check destination directory
         # get the details for it
@@ -420,7 +416,7 @@
             update_inventory = True
             inv = self.inventory
             to_dir_ie = inv[to_dir_id]
-            to_dir_id = decode(to_entry[0][2])
+            to_dir_id = decode(to_entry[0][2])[0]
         else:
             update_inventory = False
 
@@ -428,13 +424,13 @@
         for from_rel in from_paths:
             # from_rel is 'pathinroot/foo/bar'
             from_dirname, from_tail = os.path.split(from_rel)
-            from_dirname = encode(from_dirname)
+            from_dirname = encode(from_dirname)[0]
             from_entry = self._get_entry(path=from_rel)
             if from_entry == (None, None):
                 raise errors.BzrMoveFailedError(from_rel,to_dir,
                     errors.NotVersionedError(path=str(from_rel)))
 
-            from_id = decode(from_entry[0][2])
+            from_id = decode(from_entry[0][2])[0]
             to_rel = pathjoin(to_dir, from_tail)
             item_to_entry = self._get_entry(path=to_rel)
             if item_to_entry != (None, None):
@@ -500,7 +496,7 @@
                         lambda: inv.rename(from_id, current_parent, from_tail))
                 # finally do the rename in the dirstate, which is a little
                 # tricky to rollback, but least likely to need it.
-                basename = encode(from_tail)
+                basename = encode(from_tail)[0]
                 old_block_index, old_entry_index, dir_present, file_present = \
                     state._get_block_entry_index(from_dirname, basename, 0)
                 old_block = state._dirblocks[old_block_index][1]
@@ -518,7 +514,7 @@
                         packed_stat=old_entry_details[0][4],
                         size=old_entry_details[0][2],
                         id_index=state._get_id_index(),
-                        path_utf8=encode(from_rel)))
+                        path_utf8=encode(from_rel)[0]))
                 # create new row in current block
                 state.update_minimal(to_key,
                         old_entry_details[0][0],
@@ -528,7 +524,7 @@
                         packed_stat=old_entry_details[0][4],
                         size=old_entry_details[0][2],
                         id_index=state._get_id_index(),
-                        path_utf8=encode(to_rel))
+                        path_utf8=encode(to_rel)[0])
                 added_entry_index, _ = state._find_entry_index(to_key, to_block[1])
                 new_entry = to_block[added_entry_index]
                 rollbacks.append(lambda:state._make_absent(new_entry))
@@ -562,7 +558,7 @@
         entry = self._get_entry(path=path)
         if entry == (None, None):
             return None
-        return _utf8_decode(entry[0][2])
+        return _utf8_decode(entry[0][2])[0]
 
     def read_working_inventory(self):
         """Read the working inventory.
@@ -696,7 +692,7 @@
         ids_to_unversion = set()
         encode = _utf8_encode
         for fileid in file_ids:
-            ids_to_unversion.add(encode(fileid))
+            ids_to_unversion.add(encode(fileid)[0])
         paths_to_unversion = set()
         # sketch:
         # check if the root is to be unversioned, if so, assert for now.
@@ -894,7 +890,7 @@
         # for the tree index use.
         root_key, current_entry = self._dirstate._get_entry(parent_index, path_utf8='')
         decode = _utf8_decode
-        current_id = decode(root_key[2])
+        current_id = decode(root_key[2])[0]
         assert current_entry[parent_index][0] == 'directory'
         inv = Inventory(root_id=current_id, revision_id=self._revision_id)
         inv.root.revision = current_entry[parent_index][4]
@@ -912,8 +908,8 @@
                 if entry[parent_index][0] in ('absent', 'relocated'):
                     # not this tree
                     continue
-                name = decode(key[1])
-                file_id = decode(key[2])
+                name = decode(key[1])[0]
+                file_id = decode(key[2])[0]
                 kind, link_or_sha1, size, executable, revid = entry[parent_index]
                 inv_entry = entry_factory[kind](file_id, name, parent_id)
                 inv_entry.revision = revid
@@ -926,7 +922,7 @@
                 elif kind == 'symlink':
                     inv_entry.executable = False
                     inv_entry.text_size = size
-                    inv_entry.symlink_target = decode(link_or_sha1)
+                    inv_entry.symlink_target = decode(link_or_sha1)[0]
                 else:
                     raise Exception, kind
                 inv.add(inv_entry)



More information about the bazaar-commits mailing list