Rev 3899: Shave 5s->3.3s in add_source by copying old entries across directly. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/gc_delta_index_room
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 19 04:45:06 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/gc_delta_index_room
------------------------------------------------------------
revno: 3899
revision-id: john at arbash-meinel.com-20090319044457-py567xqq3nyzwau4
parent: john at arbash-meinel.com-20090319040218-pqex5298ifl1ds54
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: gc_delta_index_room
timestamp: Wed 2009-03-18 23:44:57 -0500
message:
Shave 5s->3.3s in add_source by copying old entries across directly.
This works quite a bit better than copying them over into the hash
and then copying them *back* into the final packed form.
-------------- next part --------------
=== modified file 'bzrlib/diff-delta.c'
--- a/bzrlib/diff-delta.c 2009-03-19 04:02:18 +0000
+++ b/bzrlib/diff-delta.c 2009-03-19 04:44:57 +0000
@@ -209,12 +209,12 @@
static struct delta_index *
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
- unsigned int num_entries)
+ unsigned int num_entries, const struct delta_index *old_index)
{
- unsigned int i, j, memsize;
+ unsigned int i, j, hmask, memsize;
struct unpacked_index_entry *entry;
struct delta_index *index;
- struct index_entry *packed_entry, **packed_hash;
+ struct index_entry *packed_entry, **packed_hash, *old_entry;
struct index_entry null_entry = {0};
void *mem;
/*
@@ -230,10 +230,14 @@
return NULL;
}
+ hmask = hsize - 1;
index = mem;
index->memsize = memsize;
- index->hash_mask = hsize - 1;
+ index->hash_mask = hmask;
index->num_entries = num_entries;
+ if (old_index) {
+ assert(hmask >= old_index->hash_mask);
+ }
mem = index->hash;
packed_hash = mem;
@@ -246,6 +250,22 @@
* into consecutive array entries.
*/
packed_hash[i] = packed_entry;
+ /* Old comes earlier as a source, so it always comes first in a given
+ * hash bucket.
+ */
+ if (old_index) {
+ /* Could we optimize this to use memcpy when hmask ==
+ * old_index->hash_mask? Would it make any real difference?
+ */
+ j = i & old_index->hash_mask;
+ for (old_entry = old_index->hash[j];
+ old_entry < old_index->hash[j + 1] && old_entry->ptr != NULL;
+ old_entry++) {
+ if ((old_entry->val & hmask) == i) {
+ *packed_entry++ = *old_entry;
+ }
+ }
+ }
for (entry = hash[i]; entry; entry = entry->next) {
*packed_entry++ = entry->entry;
}
@@ -270,37 +290,6 @@
return index;
}
-void include_entries_from_index(struct unpacked_index_entry **hash,
- unsigned int *hash_count,
- unsigned int hsize,
- struct unpacked_index_entry *entry,
- const struct delta_index *old)
-{
- unsigned int hmask, masked_val;
- struct index_entry *old_entry;
-
- hmask = hsize - 1;
- /* Iterate backwards through the existing entries
- * This is so that matches early in the file end up being the first pointer
- * in the linked list.
- * TODO: We know that all old entries are going to occur before the new
- * entries, and that we are going to follow this with a limit and
- * then pack step. There is probably a way we could optimize this
- * step, so that we wouldn't have to copy everything into a new
- * buffer, and then copy everything again into yet another buffer.
- */
- for (old_entry = old->last_entry; old_entry >= old->hash[0]; old_entry--) {
- masked_val = old_entry->val & hmask;
- if (old_entry->ptr != NULL) {
- /* Skip the null entries */
- entry->entry = *old_entry;
- entry->next = hash[masked_val];
- hash[masked_val] = entry++;
- hash_count[masked_val]++;
- }
- }
-}
-
struct delta_index *
create_index_from_old_and_hash(const struct delta_index *old,
@@ -501,17 +490,11 @@
hash_count[i]++;
}
}
- /* If we have an existing delta_index, we want to bring its info into the
- * new structure. We assume that the existing structure's objects all occur
- * before the new source, so they get first precedence in the index.
- */
- if (old != NULL)
- include_entries_from_index(hash, hash_count, hsize, entry, old);
-
+ /* TODO: It would be nice to limit_hash_buckets at a better time. */
total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
total_num_entries);
free(hash_count);
- index = pack_delta_index(hash, hsize, total_num_entries);
+ index = pack_delta_index(hash, hsize, total_num_entries, old);
index->last_src = src;
free(hash);
if (!index) {
@@ -530,7 +513,7 @@
_put_entries_into_hash(struct index_entry *entries, unsigned int num_entries,
unsigned int hsize)
{
- unsigned int i, hash_offset, hmask, memsize;
+ unsigned int hash_offset, hmask, memsize;
struct index_entry *entry, *last_entry;
struct index_entry_linked_list *out_entry, **hash;
void *mem;
More information about the bazaar-commits
mailing list