Rev 49: Bring in the missing update from 'trunk' in http://bazaar.launchpad.net/%7Ejameinel/bzr-groupcompress/experimental
John Arbash Meinel
john at arbash-meinel.com
Thu Feb 26 22:11:27 GMT 2009
At http://bazaar.launchpad.net/%7Ejameinel/bzr-groupcompress/experimental
------------------------------------------------------------
revno: 49
revision-id: john at arbash-meinel.com-20090226220934-lnqvbe6uqle8eoum
parent: john at arbash-meinel.com-20090226215937-4n69g2lfbjm3yyip
parent: john at arbash-meinel.com-20090225230422-4oigw03k7fq62eyb
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: experimental
timestamp: Thu 2009-02-26 16:09:34 -0600
message:
Bring in the missing update from 'trunk'
modified:
groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
repofmt.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
------------------------------------------------------------
revno: 45.1.3
revision-id: john at arbash-meinel.com-20090225230422-4oigw03k7fq62eyb
parent: john at arbash-meinel.com-20090225225958-jnsftmx4zcmmlo2a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-02-25 17:04:22 -0600
message:
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
This is because the 'autopack' code will convert to 'gc-optimal',
which means that 'unordered' will then continue the 'gc-optimal' route.
modified:
repofmt.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
------------------------------------------------------------
revno: 45.1.2
revision-id: john at arbash-meinel.com-20090225225958-jnsftmx4zcmmlo2a
parent: john at arbash-meinel.com-20090225222123-frjzu1meidd6qafi
parent: john at arbash-meinel.com-20090225221429-l0is3qxy1hvzuhes
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-02-25 16:59:58 -0600
message:
Groupcompress now supports 'autopack' and 'pack'.
It does this by just creating a new pack file, wrapping a GCVersionedFiles
around it, and streaming in the data in 'gc-optimal' ordering.
This actually seems to work fairly well.
modified:
repofmt.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
------------------------------------------------------------
revno: 45.1.1
revision-id: john at arbash-meinel.com-20090225222123-frjzu1meidd6qafi
parent: john at arbash-meinel.com-20090225221102-z0qgfbrfwwe2yscf
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-02-25 16:21:23 -0600
message:
Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()
Now works on bzr.dev's new streaming code.
modified:
groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
-------------- next part --------------
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-02-26 21:57:57 +0000
+++ b/groupcompress.py 2009-02-26 22:09:34 +0000
@@ -556,6 +556,16 @@
# recipe, and we often want to end up with lines anyway.
return plain, split_lines(plain[index_memo[3]:index_memo[4]])
+ def get_missing_compression_parent_keys(self):
+ """Return the keys of missing compression parents.
+
+ Missing compression parents occur when a record stream was missing
+ basis texts, or a index was scanned that had missing basis texts.
+ """
+ # GroupCompress cannot currently reference texts that are not in the
+ # group, so this is valid for now
+ return frozenset()
+
def get_record_stream(self, keys, ordering, include_delta_closure):
"""Get a stream of records for keys.
@@ -692,9 +702,9 @@
# Raise an error when a record is missing.
if record.storage_kind == 'absent':
raise errors.RevisionNotPresent([record.key], self)
- elif record.storage_kind in ('chunked', 'fulltext'):
+ try:
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
- else:
+ except errors.UnavailableRepresentation:
adapter_key = record.storage_kind, 'fulltext'
adapter = get_adapter(adapter_key)
bytes = adapter.get_bytes(record,
=== modified file 'repofmt.py'
--- a/repofmt.py 2009-02-26 21:59:37 +0000
+++ b/repofmt.py 2009-02-26 22:09:34 +0000
@@ -20,7 +20,14 @@
import md5
import time
-from bzrlib import debug, errors, pack, repository
+from bzrlib import (
+ debug,
+ errors,
+ knit,
+ pack,
+ repository,
+ ui,
+ )
from bzrlib.btree_index import (
BTreeBuilder,
BTreeGraphIndex,
@@ -276,25 +283,49 @@
raise errors.BzrError('call to %s.pack() while another pack is'
' being written.'
% (self.__class__.__name__,))
+ new_pack = self.pack_factory(self, 'autopack',
+ self.repo.bzrdir._get_file_mode())
+ new_pack.set_write_cache_size(1024*1024)
# TODO: A better alternative is to probably use Packer.open_pack(), and
# then create a GroupCompressVersionedFiles() around the
# target pack to insert into.
- self._start_write_group()
+ pb = ui.ui_factory.nested_progress_bar()
try:
- for index_name, vf_name in to_copy:
+ for idx, (index_name, vf_name) in enumerate(to_copy):
+ pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
keys = set()
- new_index = getattr(self._new_pack, index_name)
+ new_index = getattr(new_pack, index_name)
new_index.set_optimize(for_size=True)
for pack in packs:
source_index = getattr(pack, index_name)
keys.update(e[1] for e in source_index.iter_all_entries())
- vf = getattr(self.repo, vf_name)
- stream = vf.get_record_stream(keys, 'gc-optimal', True)
- vf.insert_record_stream(stream)
+ source_vf = getattr(self.repo, vf_name)
+ target_access = knit._DirectPackAccess({})
+ target_access.set_writer(new_pack._writer, new_index,
+ new_pack.access_tuple())
+ target_vf = GroupCompressVersionedFiles(
+ _GCGraphIndex(new_index,
+ add_callback=new_index.add_nodes,
+ parents=source_vf._index._parents,
+ is_locked=self.repo.is_locked),
+ access=target_access,
+ delta=source_vf._delta)
+ stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
+ target_vf.insert_record_stream(stream)
+ new_pack._check_references() # shouldn't be needed
except:
- self._abort_write_group()
+ pb.finished()
+ new_pack.abort()
+ raise
else:
- self._commit_write_group()
+ pb.finished()
+ if not new_pack.data_inserted():
+ raise AssertionError('We copied from pack files,'
+ ' but had no data copied')
+ # we need to abort somehow, because we don't want to remove
+ # the other packs
+ new_pack.finish()
+ self.allocate(new_pack)
for pack in packs:
self._remove_pack_from_memory(pack)
# record the newly available packs and stop advertising the old
@@ -382,7 +413,7 @@
# because the source can be smart about extracting multiple
# in-a-row (and sharing strings). Topological is better for
# remote, because we access less data.
- self._fetch_order = 'topological'
+ self._fetch_order = 'unordered'
self._fetch_gc_optimal = True
self._fetch_uses_deltas = False
@@ -446,7 +477,7 @@
self._reconcile_does_inventory_gc = True
self._reconcile_fixes_text_parents = True
self._reconcile_backsup_inventory = False
- self._fetch_order = 'topological'
+ self._fetch_order = 'unordered'
self._fetch_gc_optimal = True
self._fetch_uses_deltas = False
More information about the bazaar-commits
mailing list