Initial implementation of deep GC

master
Vladimir Stackov 2015-08-18 17:27:27 +03:00
parent a064d9a1d1
commit 0a042c4bd2
5 changed files with 57 additions and 39 deletions

View File

@ -18,14 +18,15 @@ void BundleCollector::finishIndex( string const & indexFn )
{
verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n",
indexUsedChunks, indexTotalChunks, indexKeptBundles,
indexModifiedBundles, indexRemovedBundles);
indexModifiedBundles, indexRemovedBundles );
filesToUnlink.push_back( indexFn );
commit();
}
else
{
chunkStorageWriter->reset();
if ( indexGC && !indexNecessary )
if ( !indexNecessary )
// this index was a complete copy so we don't need it
filesToUnlink.push_back( indexFn );
}
}
@ -39,13 +40,11 @@ void BundleCollector::startBundle( Bundle::Id const & bundleId )
void BundleCollector::processChunk( ChunkId const & chunkId )
{
if ( indexGC )
{
if ( overallChunkSet.find ( chunkId ) == overallChunkSet.end() )
overallChunkSet.insert( chunkId );
else
return;
}
if ( overallChunkSet.find ( chunkId ) == overallChunkSet.end() )
overallChunkSet.insert( chunkId );
else
return;
totalChunks++;
if ( usedChunkSet.find( chunkId ) != usedChunkSet.end() )
{
@ -71,26 +70,41 @@ void BundleCollector::finishBundle( Bundle::Id const & bundleId, BundleInfo cons
dPrintf( "%s: used %d/%d chunks\n", i.c_str(), usedChunks, totalChunks );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
// Copy used chunks to the new index
string chunk;
size_t chunkSize;
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
ChunkId id( record.id() );
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
{
chunkStorageReader->get( id, chunk, chunkSize );
chunkStorageWriter->add( id, chunk.data(), chunkSize );
}
}
copyUsedChunks( info );
indexModifiedBundles++;
}
else
{
chunkStorageWriter->addBundle( info, savedId );
dPrintf( "Keeping %s bundle\n", i.c_str() );
indexKeptBundles++;
if ( !deepGC )
{
chunkStorageWriter->addBundle( info, savedId );
dPrintf( "Keeping %s bundle\n", i.c_str() );
indexKeptBundles++;
}
else
{
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
copyUsedChunks( info );
indexModifiedBundles++;
}
}
}
void BundleCollector::copyUsedChunks( BundleInfo const & info )
{
// Copy used chunks to the new index
string chunk;
size_t chunkSize;
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
ChunkId id( record.id() );
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
{
chunkStorageReader->get( id, chunk, chunkSize );
chunkStorageWriter->add( id, chunk.data(), chunkSize );
}
}
}

View File

@ -24,12 +24,14 @@ private:
vector< string > filesToUnlink;
BackupRestorer::ChunkSet overallChunkSet;
void copyUsedChunks( BundleInfo const & info );
public:
string bundlesPath;
ChunkStorage::Reader *chunkStorageReader;
ChunkStorage::Writer *chunkStorageWriter;
BackupRestorer::ChunkSet usedChunkSet;
bool indexGC;
bool deepGC;
void startIndex( string const & indexFn );

View File

@ -124,13 +124,15 @@ void Config::prefillKeywords()
},
{
"gc-indexes",
Config::oRuntime_gcIndexes,
"gc-deep",
Config::oRuntime_gcDeep,
Config::Runtime,
"Purge duplicated indexes from repo during\n"
"garbage collection\n"
"Normally you would not need this\n"
"No value, specify to enable"
"Perform inter-bundle and inter-index deduplication\n"
"during garbage collection\n"
"You would probably need it after exchange operation\n"
"You could also use this switch to repack all bundles\n"
"Beware that this switch causes very intensive IO!\n"
"This switch is not used by default, specify to enable"
},
{ "", Config::oBadOption, Config::None }
@ -466,10 +468,10 @@ bool Config::parseOrValidate( const string & option, const OptionType type,
/* NOTREACHED */
break;
case oRuntime_gcIndexes:
runtime.gcIndexes = true;
case oRuntime_gcDeep:
runtime.gcDeep = true;
dPrintf( "runtime[gcIndexes] = true\n" );
dPrintf( "runtime[gcDeep] = true\n" );
return true;
/* NOTREACHED */

View File

@ -29,13 +29,13 @@ public:
size_t threads;
size_t cacheSize;
bitset< BackupExchanger::Flags > exchange;
bool gcIndexes;
bool gcDeep;
// Default runtime config
RuntimeConfig():
threads( getNumberOfCpus() ),
cacheSize( 40 * 1024 * 1024 ), // 40 MB
gcIndexes ( false )
gcDeep ( false )
{
}
};
@ -60,7 +60,7 @@ public:
oRuntime_threads,
oRuntime_cacheSize,
oRuntime_exchange,
oRuntime_gcIndexes,
oRuntime_gcDeep,
oDeprecated, oUnsupported
} OpCodes;

View File

@ -322,7 +322,7 @@ void ZCollector::gc()
collector.bundlesPath = getBundlesPath();
collector.chunkStorageReader = &this->chunkStorageReader;
collector.chunkStorageWriter = &chunkStorageWriter;
collector.indexGC = config.runtime.gcIndexes;
collector.deepGC = config.runtime.gcDeep;
verbosePrintf( "Checking used chunks...\n" );