mirror of https://github.com/vitalif/zbackup
Initial implementation of deep GC
parent
a064d9a1d1
commit
0a042c4bd2
|
@ -18,14 +18,15 @@ void BundleCollector::finishIndex( string const & indexFn )
|
|||
{
|
||||
verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n",
|
||||
indexUsedChunks, indexTotalChunks, indexKeptBundles,
|
||||
indexModifiedBundles, indexRemovedBundles);
|
||||
indexModifiedBundles, indexRemovedBundles );
|
||||
filesToUnlink.push_back( indexFn );
|
||||
commit();
|
||||
}
|
||||
else
|
||||
{
|
||||
chunkStorageWriter->reset();
|
||||
if ( indexGC && !indexNecessary )
|
||||
if ( !indexNecessary )
|
||||
// this index was a complete copy so we don't need it
|
||||
filesToUnlink.push_back( indexFn );
|
||||
}
|
||||
}
|
||||
|
@ -39,13 +40,11 @@ void BundleCollector::startBundle( Bundle::Id const & bundleId )
|
|||
|
||||
void BundleCollector::processChunk( ChunkId const & chunkId )
|
||||
{
|
||||
if ( indexGC )
|
||||
{
|
||||
if ( overallChunkSet.find ( chunkId ) == overallChunkSet.end() )
|
||||
overallChunkSet.insert( chunkId );
|
||||
else
|
||||
return;
|
||||
}
|
||||
if ( overallChunkSet.find ( chunkId ) == overallChunkSet.end() )
|
||||
overallChunkSet.insert( chunkId );
|
||||
else
|
||||
return;
|
||||
|
||||
totalChunks++;
|
||||
if ( usedChunkSet.find( chunkId ) != usedChunkSet.end() )
|
||||
{
|
||||
|
@ -71,26 +70,41 @@ void BundleCollector::finishBundle( Bundle::Id const & bundleId, BundleInfo cons
|
|||
dPrintf( "%s: used %d/%d chunks\n", i.c_str(), usedChunks, totalChunks );
|
||||
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
|
||||
indexModified = true;
|
||||
// Copy used chunks to the new index
|
||||
string chunk;
|
||||
size_t chunkSize;
|
||||
for ( int x = info.chunk_record_size(); x--; )
|
||||
{
|
||||
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
|
||||
ChunkId id( record.id() );
|
||||
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
|
||||
{
|
||||
chunkStorageReader->get( id, chunk, chunkSize );
|
||||
chunkStorageWriter->add( id, chunk.data(), chunkSize );
|
||||
}
|
||||
}
|
||||
copyUsedChunks( info );
|
||||
indexModifiedBundles++;
|
||||
}
|
||||
else
|
||||
{
|
||||
chunkStorageWriter->addBundle( info, savedId );
|
||||
dPrintf( "Keeping %s bundle\n", i.c_str() );
|
||||
indexKeptBundles++;
|
||||
if ( !deepGC )
|
||||
{
|
||||
chunkStorageWriter->addBundle( info, savedId );
|
||||
dPrintf( "Keeping %s bundle\n", i.c_str() );
|
||||
indexKeptBundles++;
|
||||
}
|
||||
else
|
||||
{
|
||||
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
|
||||
indexModified = true;
|
||||
copyUsedChunks( info );
|
||||
indexModifiedBundles++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BundleCollector::copyUsedChunks( BundleInfo const & info )
|
||||
{
|
||||
// Copy used chunks to the new index
|
||||
string chunk;
|
||||
size_t chunkSize;
|
||||
for ( int x = info.chunk_record_size(); x--; )
|
||||
{
|
||||
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
|
||||
ChunkId id( record.id() );
|
||||
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
|
||||
{
|
||||
chunkStorageReader->get( id, chunk, chunkSize );
|
||||
chunkStorageWriter->add( id, chunk.data(), chunkSize );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,12 +24,14 @@ private:
|
|||
vector< string > filesToUnlink;
|
||||
BackupRestorer::ChunkSet overallChunkSet;
|
||||
|
||||
void copyUsedChunks( BundleInfo const & info );
|
||||
|
||||
public:
|
||||
string bundlesPath;
|
||||
ChunkStorage::Reader *chunkStorageReader;
|
||||
ChunkStorage::Writer *chunkStorageWriter;
|
||||
BackupRestorer::ChunkSet usedChunkSet;
|
||||
bool indexGC;
|
||||
bool deepGC;
|
||||
|
||||
void startIndex( string const & indexFn );
|
||||
|
||||
|
|
20
config.cc
20
config.cc
|
@ -124,13 +124,15 @@ void Config::prefillKeywords()
|
|||
},
|
||||
|
||||
{
|
||||
"gc-indexes",
|
||||
Config::oRuntime_gcIndexes,
|
||||
"gc-deep",
|
||||
Config::oRuntime_gcDeep,
|
||||
Config::Runtime,
|
||||
"Purge duplicated indexes from repo during\n"
|
||||
"garbage collection\n"
|
||||
"Normally you would not need this\n"
|
||||
"No value, specify to enable"
|
||||
"Perform inter-bundle and inter-index deduplication\n"
|
||||
"during garbage collection\n"
|
||||
"You would probably need it after exchange operation\n"
|
||||
"You could also use this switch to repack all bundles\n"
|
||||
"Beware that this switch causes very intensive IO!\n"
|
||||
"This switch is not used by default, specify to enable"
|
||||
},
|
||||
|
||||
{ "", Config::oBadOption, Config::None }
|
||||
|
@ -466,10 +468,10 @@ bool Config::parseOrValidate( const string & option, const OptionType type,
|
|||
/* NOTREACHED */
|
||||
break;
|
||||
|
||||
case oRuntime_gcIndexes:
|
||||
runtime.gcIndexes = true;
|
||||
case oRuntime_gcDeep:
|
||||
runtime.gcDeep = true;
|
||||
|
||||
dPrintf( "runtime[gcIndexes] = true\n" );
|
||||
dPrintf( "runtime[gcDeep] = true\n" );
|
||||
|
||||
return true;
|
||||
/* NOTREACHED */
|
||||
|
|
|
@ -29,13 +29,13 @@ public:
|
|||
size_t threads;
|
||||
size_t cacheSize;
|
||||
bitset< BackupExchanger::Flags > exchange;
|
||||
bool gcIndexes;
|
||||
bool gcDeep;
|
||||
|
||||
// Default runtime config
|
||||
RuntimeConfig():
|
||||
threads( getNumberOfCpus() ),
|
||||
cacheSize( 40 * 1024 * 1024 ), // 40 MB
|
||||
gcIndexes ( false )
|
||||
gcDeep ( false )
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -60,7 +60,7 @@ public:
|
|||
oRuntime_threads,
|
||||
oRuntime_cacheSize,
|
||||
oRuntime_exchange,
|
||||
oRuntime_gcIndexes,
|
||||
oRuntime_gcDeep,
|
||||
|
||||
oDeprecated, oUnsupported
|
||||
} OpCodes;
|
||||
|
|
|
@ -322,7 +322,7 @@ void ZCollector::gc()
|
|||
collector.bundlesPath = getBundlesPath();
|
||||
collector.chunkStorageReader = &this->chunkStorageReader;
|
||||
collector.chunkStorageWriter = &chunkStorageWriter;
|
||||
collector.indexGC = config.runtime.gcIndexes;
|
||||
collector.deepGC = config.runtime.gcDeep;
|
||||
|
||||
verbosePrintf( "Checking used chunks...\n" );
|
||||
|
||||
|
|
Loading…
Reference in New Issue