diff --git a/backup_collector.cc b/backup_collector.cc index 79cc478..0a2ee08 100644 --- a/backup_collector.cc +++ b/backup_collector.cc @@ -14,11 +14,11 @@ void BundleCollector::startIndex( string const & indexFn ) void BundleCollector::finishIndex( string const & indexFn ) { + verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n", + indexUsedChunks, indexTotalChunks, indexKeptBundles, + indexModifiedBundles, indexRemovedBundles ); if ( indexModified ) { - verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n", - indexUsedChunks, indexTotalChunks, indexKeptBundles, - indexModifiedBundles, indexRemovedBundles ); filesToUnlink.push_back( indexFn ); commit(); } @@ -75,19 +75,40 @@ void BundleCollector::finishBundle( Bundle::Id const & bundleId, BundleInfo cons } else { - if ( !deepGC ) - { - chunkStorageWriter->addBundle( info, savedId ); - dPrintf( "Keeping %s bundle\n", i.c_str() ); - indexKeptBundles++; - } - else + if ( gcRepack ) { filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) ); indexModified = true; copyUsedChunks( info ); indexModifiedBundles++; } + else + { + if ( 0 == totalChunks ) + { + if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() ) + { + overallBundleSet.insert( bundleId ); + dPrintf( "Deleting %s bundle\n", i.c_str() ); + filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) ); + indexModified = true; + indexRemovedBundles++; + } + else + { + // trigger index update + indexModified = true; + } + } + else + { + if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() ) + overallBundleSet.insert( bundleId ); + chunkStorageWriter->addBundle( info, savedId ); + dPrintf( "Keeping %s bundle\n", i.c_str() ); + indexKeptBundles++; + } + } } } diff --git a/backup_collector.hh b/backup_collector.hh index 5e1a93d..8144051 100644 --- a/backup_collector.hh +++ b/backup_collector.hh @@ -23,6 +23,7 @@ private: bool indexModified, indexNecessary; vector< string > filesToUnlink; BackupRestorer::ChunkSet overallChunkSet; + std::set< Bundle::Id > overallBundleSet; void copyUsedChunks( BundleInfo const & info ); @@ -31,7 +32,7 @@ public: ChunkStorage::Reader *chunkStorageReader; ChunkStorage::Writer *chunkStorageWriter; BackupRestorer::ChunkSet usedChunkSet; - bool deepGC; + bool gcRepack; void startIndex( string const & indexFn ); diff --git a/bundle.hh b/bundle.hh index 709a126..f800774 100644 --- a/bundle.hh +++ b/bundle.hh @@ -42,6 +42,8 @@ struct Id { return memcmp( blob, other.blob, sizeof( blob ) ) == 0; } bool operator != ( Id const & other ) const { return ! operator == ( other ); } + bool operator < ( Id const & other ) const + { return memcmp( blob, other.blob, sizeof( blob ) ) < 0; } }; STATIC_ASSERT( sizeof( Id ) == IdSize ); diff --git a/config.cc b/config.cc index 967d566..cdeba8c 100644 --- a/config.cc +++ b/config.cc @@ -104,8 +104,8 @@ void Config::prefillKeywords() "cache-size", Config::oRuntime_cacheSize, Config::Runtime, - "Cache size to use in restore process\n" - "Affects restore process speed directly\n" + "Cache size to use in restore process.\n" + "Affects restore process speed directly.\n" VALID_SUFFIXES "Default is %sMiB", Utils::numberToString( runtime.cacheSize / 1024 / 1024 ) @@ -114,25 +114,23 @@ void Config::prefillKeywords() "exchange", Config::oRuntime_exchange, Config::Runtime, - "Data to exchange between repositories in import/export process\n" - "Can be specified multiple times\n" + "Data to exchange between repositories in import/export process.\n" + "Can be specified multiple times.\n" "Valid values:\n" "backups - exchange backup instructions (files in backups/ directory)\n" "bundles - exchange bundles with data (files in bunles/ directory)\n" "indexes - exchange indexes of chunks (files in index/ directory)\n" - "No default value, you should specify it explicitly" + "No default value, you should specify it explicitly." }, { - "gc-deep", - Config::oRuntime_gcDeep, + "gc-repack", + Config::oRuntime_gcRepack, Config::Runtime, - "Perform inter-bundle and inter-index deduplication\n" - "during garbage collection\n" - "You would probably need it after exchange operation\n" - "You could also use this switch to repack all bundles\n" - "Beware that this switch causes very intensive IO!\n" - "This switch is not used by default, specify to enable" + "Repack indexes and bundles during garbage collection.\n" + "Normally you would not need this.\n" + "Beware that this options causes very intensive IO!\n" + "Not default, you should specify it explicitly." }, { "", Config::oBadOption, Config::None } @@ -468,10 +466,10 @@ bool Config::parseOrValidate( const string & option, const OptionType type, /* NOTREACHED */ break; - case oRuntime_gcDeep: - runtime.gcDeep = true; + case oRuntime_gcRepack: + runtime.gcRepack = true; - dPrintf( "runtime[gcDeep] = true\n" ); + dPrintf( "runtime[gcRepack] = true\n" ); return true; /* NOTREACHED */ diff --git a/config.hh b/config.hh index 31cd5be..a721055 100644 --- a/config.hh +++ b/config.hh @@ -29,13 +29,13 @@ public: size_t threads; size_t cacheSize; bitset< BackupExchanger::Flags > exchange; - bool gcDeep; + bool gcRepack; // Default runtime config RuntimeConfig(): threads( getNumberOfCpus() ), cacheSize( 40 * 1024 * 1024 ), // 40 MB - gcDeep ( false ) + gcRepack ( false ) { } }; @@ -60,7 +60,7 @@ public: oRuntime_threads, oRuntime_cacheSize, oRuntime_exchange, - oRuntime_gcDeep, + oRuntime_gcRepack, oDeprecated, oUnsupported } OpCodes; diff --git a/zutils.cc b/zutils.cc index a31bad1..fbffce5 100644 --- a/zutils.cc +++ b/zutils.cc @@ -316,15 +316,13 @@ void ZCollector::gc() string fileName; - Dir::Entry entry; - BundleCollector collector; collector.bundlesPath = getBundlesPath(); collector.chunkStorageReader = &this->chunkStorageReader; collector.chunkStorageWriter = &chunkStorageWriter; - collector.deepGC = config.runtime.gcDeep; + collector.gcRepack = config.runtime.gcRepack; - verbosePrintf( "Checking used chunks...\n" ); + verbosePrintf( "Performing garbage collection...\n" ); verbosePrintf( "Searching for backups...\n" ); vector< string > backups = BackupExchanger::findOrRebuild( getBackupsPath() ); @@ -356,6 +354,7 @@ void ZCollector::gc() string bundlesPath = getBundlesPath(); Dir::Listing bundleLst( bundlesPath ); + Dir::Entry entry; while( bundleLst.getNext( entry ) ) { const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());