diff --git a/backup_collector.cc b/backup_collector.cc index 147f874..0a2ee08 100644 --- a/backup_collector.cc +++ b/backup_collector.cc @@ -3,39 +3,31 @@ #include "backup_collector.hh" -#include -#include - -#include "bundle.hh" -#include "chunk_index.hh" -#include "backup_restorer.hh" -#include "backup_file.hh" -#include "backup_exchanger.hh" - -#include "debug.hh" - using std::string; void BundleCollector::startIndex( string const & indexFn ) { - indexModified = false; + indexModified = indexNecessary = false; indexTotalChunks = indexUsedChunks = 0; indexModifiedBundles = indexKeptBundles = indexRemovedBundles = 0; } void BundleCollector::finishIndex( string const & indexFn ) { + verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n", + indexUsedChunks, indexTotalChunks, indexKeptBundles, + indexModifiedBundles, indexRemovedBundles ); if ( indexModified ) { - verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n", - indexUsedChunks, indexTotalChunks, indexKeptBundles, - indexModifiedBundles, indexRemovedBundles); filesToUnlink.push_back( indexFn ); commit(); } else { chunkStorageWriter->reset(); + if ( !indexNecessary ) + // this index was a complete copy so we don't need it + filesToUnlink.push_back( indexFn ); } } @@ -48,10 +40,16 @@ void BundleCollector::startBundle( Bundle::Id const & bundleId ) void BundleCollector::processChunk( ChunkId const & chunkId ) { + if ( overallChunkSet.find ( chunkId ) == overallChunkSet.end() ) + overallChunkSet.insert( chunkId ); + else + return; + totalChunks++; if ( usedChunkSet.find( chunkId ) != usedChunkSet.end() ) { usedChunks++; + indexNecessary = true; } } @@ -60,38 +58,74 @@ void BundleCollector::finishBundle( Bundle::Id const & bundleId, BundleInfo cons string i = Bundle::generateFileName( savedId, "", false ); indexTotalChunks += totalChunks; indexUsedChunks += usedChunks; - if ( usedChunks == 0 ) + if ( 0 == usedChunks && 0 != totalChunks ) { - verbosePrintf( "Deleting %s bundle\n", i.c_str() ); + dPrintf( "Deleting %s bundle\n", i.c_str() ); filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) ); indexModified = true; indexRemovedBundles++; } else if ( usedChunks < totalChunks ) { - verbosePrintf( "%s: used %d/%d chunks\n", i.c_str(), usedChunks, totalChunks ); + dPrintf( "%s: used %d/%d chunks\n", i.c_str(), usedChunks, totalChunks ); filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) ); indexModified = true; - // Copy used chunks to the new index - string chunk; - size_t chunkSize; - for ( int x = info.chunk_record_size(); x--; ) - { - BundleInfo_ChunkRecord const & record = info.chunk_record( x ); - ChunkId id( record.id() ); - if ( usedChunkSet.find( id ) != usedChunkSet.end() ) - { - chunkStorageReader->get( id, chunk, chunkSize ); - chunkStorageWriter->add( id, chunk.data(), chunkSize ); - } - } + copyUsedChunks( info ); indexModifiedBundles++; } else { - chunkStorageWriter->addBundle( info, savedId ); - verbosePrintf( "Keeping %s bundle\n", i.c_str() ); - indexKeptBundles++; + if ( gcRepack ) + { + filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) ); + indexModified = true; + copyUsedChunks( info ); + indexModifiedBundles++; + } + else + { + if ( 0 == totalChunks ) + { + if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() ) + { + overallBundleSet.insert( bundleId ); + dPrintf( "Deleting %s bundle\n", i.c_str() ); + filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) ); + indexModified = true; + indexRemovedBundles++; + } + else + { + // trigger index update + indexModified = true; + } + } + else + { + if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() ) + overallBundleSet.insert( bundleId ); + chunkStorageWriter->addBundle( info, savedId ); + dPrintf( "Keeping %s bundle\n", i.c_str() ); + indexKeptBundles++; + } + } + } +} + +void BundleCollector::copyUsedChunks( BundleInfo const & info ) +{ + // Copy used chunks to the new index + string chunk; + size_t chunkSize; + for ( int x = info.chunk_record_size(); x--; ) + { + BundleInfo_ChunkRecord const & record = info.chunk_record( x ); + ChunkId id( record.id() ); + if ( usedChunkSet.find( id ) != usedChunkSet.end() ) + { + chunkStorageReader->get( id, chunk, chunkSize ); + chunkStorageWriter->add( id, chunk.data(), chunkSize ); + } } } @@ -99,6 +133,7 @@ void BundleCollector::commit() { for ( int i = filesToUnlink.size(); i--; ) { + dPrintf( "Unlinking %s\n", filesToUnlink[i].c_str() ); unlink( filesToUnlink[i].c_str() ); } filesToUnlink.clear(); diff --git a/backup_collector.hh b/backup_collector.hh index 96669e7..8144051 100644 --- a/backup_collector.hh +++ b/backup_collector.hh @@ -4,18 +4,11 @@ #ifndef BACKUP_COLLECTOR_HH_INCLUDED #define BACKUP_COLLECTOR_HH_INCLUDED -#include "zbackup_base.hh" -#include "chunk_storage.hh" - #include #include -#include -#include "bundle.hh" -#include "chunk_index.hh" #include "backup_restorer.hh" #include "backup_file.hh" -#include "backup_exchanger.hh" #include "debug.hh" @@ -27,14 +20,19 @@ private: Bundle::Id savedId; int totalChunks, usedChunks, indexTotalChunks, indexUsedChunks; int indexModifiedBundles, indexKeptBundles, indexRemovedBundles; - bool indexModified; + bool indexModified, indexNecessary; vector< string > filesToUnlink; + BackupRestorer::ChunkSet overallChunkSet; + std::set< Bundle::Id > overallBundleSet; + + void copyUsedChunks( BundleInfo const & info ); public: string bundlesPath; ChunkStorage::Reader *chunkStorageReader; ChunkStorage::Writer *chunkStorageWriter; BackupRestorer::ChunkSet usedChunkSet; + bool gcRepack; void startIndex( string const & indexFn ); diff --git a/backup_exchanger.hh b/backup_exchanger.hh index 457c48f..d32ae73 100644 --- a/backup_exchanger.hh +++ b/backup_exchanger.hh @@ -17,7 +17,7 @@ using std::pair; enum { backups, bundles, - index, + indexes, Flags }; diff --git a/bundle.hh b/bundle.hh index 709a126..f800774 100644 --- a/bundle.hh +++ b/bundle.hh @@ -42,6 +42,8 @@ struct Id { return memcmp( blob, other.blob, sizeof( blob ) ) == 0; } bool operator != ( Id const & other ) const { return ! operator == ( other ); } + bool operator < ( Id const & other ) const + { return memcmp( blob, other.blob, sizeof( blob ) ) < 0; } }; STATIC_ASSERT( sizeof( Id ) == IdSize ); diff --git a/chunk_index.hh b/chunk_index.hh index bd34a18..09108a4 100644 --- a/chunk_index.hh +++ b/chunk_index.hh @@ -87,7 +87,7 @@ public: DEF_EX( Ex, "Chunk index exception", std::exception ) DEF_EX( exIncorrectChunkIdSize, "Incorrect chunk id size encountered", Ex ) - ChunkIndex( EncryptionKey const &, TmpMgr &, string const & indexPath, bool prohibitChunkIndexLoading ); + ChunkIndex( EncryptionKey const &, TmpMgr &, string const & indexPath, bool ); struct ChunkInfoInterface { diff --git a/config.cc b/config.cc index d29195c..cdeba8c 100644 --- a/config.cc +++ b/config.cc @@ -104,8 +104,8 @@ void Config::prefillKeywords() "cache-size", Config::oRuntime_cacheSize, Config::Runtime, - "Cache size to use in restore process\n" - "Affects restore process speed directly\n" + "Cache size to use in restore process.\n" + "Affects restore process speed directly.\n" VALID_SUFFIXES "Default is %sMiB", Utils::numberToString( runtime.cacheSize / 1024 / 1024 ) @@ -114,13 +114,23 @@ void Config::prefillKeywords() "exchange", Config::oRuntime_exchange, Config::Runtime, - "Data to exchange between repositories in import/export process\n" - "Can be specified multiple times\n" + "Data to exchange between repositories in import/export process.\n" + "Can be specified multiple times.\n" "Valid values:\n" "backups - exchange backup instructions (files in backups/ directory)\n" "bundles - exchange bundles with data (files in bunles/ directory)\n" - "index - exchange indicies of chunks (files in index/ directory)\n" - "No default value, you should specify it explicitly" + "indexes - exchange indexes of chunks (files in index/ directory)\n" + "No default value, you should specify it explicitly." + }, + + { + "gc-repack", + Config::oRuntime_gcRepack, + Config::Runtime, + "Repack indexes and bundles during garbage collection.\n" + "Normally you would not need this.\n" + "Beware that this options causes very intensive IO!\n" + "Not default, you should specify it explicitly." }, { "", Config::oBadOption, Config::None } @@ -439,12 +449,13 @@ bool Config::parseOrValidate( const string & option, const OptionType type, if ( strcmp( optionValue, "bundles" ) == 0 ) runtime.exchange.set( BackupExchanger::bundles ); else - if ( strcmp( optionValue, "index" ) == 0 ) - runtime.exchange.set( BackupExchanger::index ); + if ( strcmp( optionValue, "indexes" ) == 0 || + strcmp( optionValue, "index" ) == 0 ) + runtime.exchange.set( BackupExchanger::indexes ); else { fprintf( stderr, "Invalid exchange value specified: %s\n" - "Must be one of the following: backups, bundles, index.\n", + "Must be one of the following: backups, bundles, indexes.\n", optionValue ); return false; } @@ -455,6 +466,15 @@ bool Config::parseOrValidate( const string & option, const OptionType type, /* NOTREACHED */ break; + case oRuntime_gcRepack: + runtime.gcRepack = true; + + dPrintf( "runtime[gcRepack] = true\n" ); + + return true; + /* NOTREACHED */ + break; + case oBadOption: default: return false; diff --git a/config.hh b/config.hh index 871501d..a721055 100644 --- a/config.hh +++ b/config.hh @@ -29,11 +29,13 @@ public: size_t threads; size_t cacheSize; bitset< BackupExchanger::Flags > exchange; + bool gcRepack; // Default runtime config RuntimeConfig(): threads( getNumberOfCpus() ), - cacheSize( 40 * 1024 * 1024 ) // 40 MB + cacheSize( 40 * 1024 * 1024 ), // 40 MB + gcRepack ( false ) { } }; @@ -58,6 +60,7 @@ public: oRuntime_threads, oRuntime_cacheSize, oRuntime_exchange, + oRuntime_gcRepack, oDeprecated, oUnsupported } OpCodes; diff --git a/zbackup.cc b/zbackup.cc index 04479a2..59a7eb2 100644 --- a/zbackup.cc +++ b/zbackup.cc @@ -172,7 +172,8 @@ invalid_option: " performs import from source to destination storage,\n" " for export/import storage path must be\n" " a valid (initialized) storage\n" -" gc - performs chunk garbage collection\n" +" gc [chunks|indexes] - performs garbage\n" +" collection (default is chunks)\n" " passwd - changes repo info file passphrase\n" //" info - shows repo information\n" " config [show|edit|set|reset] - performs\n" @@ -278,14 +279,16 @@ invalid_option: else if ( strcmp( args[ 0 ], "gc" ) == 0 ) { - // Perform the restore + // Perform the garbage collection if ( args.size() != 2 ) { fprintf( stderr, "Usage: %s %s \n", *argv, args[ 0 ] ); return EXIT_FAILURE; } - ZCollector zc( args[ 1 ], passwords[ 0 ], config ); + + ZCollector zc( ZBackupBase::deriveStorageDirFromBackupsFile( args[ 1 ], true ), + passwords[ 0 ], config ); zc.gc(); } else diff --git a/zutils.cc b/zutils.cc index d57a524..fbffce5 100644 --- a/zutils.cc +++ b/zutils.cc @@ -207,13 +207,13 @@ void ZExchange::exchange() verbosePrintf( "Bundle exchange completed.\n" ); } - if ( config.runtime.exchange.test( BackupExchanger::index ) ) + if ( config.runtime.exchange.test( BackupExchanger::indexes ) ) { - verbosePrintf( "Searching for indicies...\n" ); - vector< string > indicies = BackupExchanger::findOrRebuild( + verbosePrintf( "Searching for indexes...\n" ); + vector< string > indexes = BackupExchanger::findOrRebuild( srcZBackupBase.getIndexPath(), dstZBackupBase.getIndexPath() ); - for ( std::vector< string >::iterator it = indicies.begin(); it != indicies.end(); ++it ) + for ( std::vector< string >::iterator it = indexes.begin(); it != indexes.end(); ++it ) { verbosePrintf( "Processing index file %s... ", it->c_str() ); string outputFileName ( Dir::addPath( dstZBackupBase.getIndexPath(), *it ) ); @@ -316,14 +316,13 @@ void ZCollector::gc() string fileName; - Dir::Entry entry; - BundleCollector collector; collector.bundlesPath = getBundlesPath(); collector.chunkStorageReader = &this->chunkStorageReader; collector.chunkStorageWriter = &chunkStorageWriter; + collector.gcRepack = config.runtime.gcRepack; - verbosePrintf( "Checking used chunks...\n" ); + verbosePrintf( "Performing garbage collection...\n" ); verbosePrintf( "Searching for backups...\n" ); vector< string > backups = BackupExchanger::findOrRebuild( getBackupsPath() ); @@ -355,6 +354,7 @@ void ZCollector::gc() string bundlesPath = getBundlesPath(); Dir::Listing bundleLst( bundlesPath ); + Dir::Entry entry; while( bundleLst.getNext( entry ) ) { const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());