mirror of https://github.com/vitalif/zbackup
GC collects duplicates among all repo data
parent
0a042c4bd2
commit
ff13dd72ad
|
@ -13,12 +13,12 @@ void BundleCollector::startIndex( string const & indexFn )
|
||||||
}
|
}
|
||||||
|
|
||||||
void BundleCollector::finishIndex( string const & indexFn )
|
void BundleCollector::finishIndex( string const & indexFn )
|
||||||
{
|
|
||||||
if ( indexModified )
|
|
||||||
{
|
{
|
||||||
verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n",
|
verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n",
|
||||||
indexUsedChunks, indexTotalChunks, indexKeptBundles,
|
indexUsedChunks, indexTotalChunks, indexKeptBundles,
|
||||||
indexModifiedBundles, indexRemovedBundles );
|
indexModifiedBundles, indexRemovedBundles );
|
||||||
|
if ( indexModified )
|
||||||
|
{
|
||||||
filesToUnlink.push_back( indexFn );
|
filesToUnlink.push_back( indexFn );
|
||||||
commit();
|
commit();
|
||||||
}
|
}
|
||||||
|
@ -75,19 +75,40 @@ void BundleCollector::finishBundle( Bundle::Id const & bundleId, BundleInfo cons
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ( !deepGC )
|
if ( gcRepack )
|
||||||
{
|
|
||||||
chunkStorageWriter->addBundle( info, savedId );
|
|
||||||
dPrintf( "Keeping %s bundle\n", i.c_str() );
|
|
||||||
indexKeptBundles++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
|
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
|
||||||
indexModified = true;
|
indexModified = true;
|
||||||
copyUsedChunks( info );
|
copyUsedChunks( info );
|
||||||
indexModifiedBundles++;
|
indexModifiedBundles++;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( 0 == totalChunks )
|
||||||
|
{
|
||||||
|
if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() )
|
||||||
|
{
|
||||||
|
overallBundleSet.insert( bundleId );
|
||||||
|
dPrintf( "Deleting %s bundle\n", i.c_str() );
|
||||||
|
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
|
||||||
|
indexModified = true;
|
||||||
|
indexRemovedBundles++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// trigger index update
|
||||||
|
indexModified = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() )
|
||||||
|
overallBundleSet.insert( bundleId );
|
||||||
|
chunkStorageWriter->addBundle( info, savedId );
|
||||||
|
dPrintf( "Keeping %s bundle\n", i.c_str() );
|
||||||
|
indexKeptBundles++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ private:
|
||||||
bool indexModified, indexNecessary;
|
bool indexModified, indexNecessary;
|
||||||
vector< string > filesToUnlink;
|
vector< string > filesToUnlink;
|
||||||
BackupRestorer::ChunkSet overallChunkSet;
|
BackupRestorer::ChunkSet overallChunkSet;
|
||||||
|
std::set< Bundle::Id > overallBundleSet;
|
||||||
|
|
||||||
void copyUsedChunks( BundleInfo const & info );
|
void copyUsedChunks( BundleInfo const & info );
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ public:
|
||||||
ChunkStorage::Reader *chunkStorageReader;
|
ChunkStorage::Reader *chunkStorageReader;
|
||||||
ChunkStorage::Writer *chunkStorageWriter;
|
ChunkStorage::Writer *chunkStorageWriter;
|
||||||
BackupRestorer::ChunkSet usedChunkSet;
|
BackupRestorer::ChunkSet usedChunkSet;
|
||||||
bool deepGC;
|
bool gcRepack;
|
||||||
|
|
||||||
void startIndex( string const & indexFn );
|
void startIndex( string const & indexFn );
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,8 @@ struct Id
|
||||||
{ return memcmp( blob, other.blob, sizeof( blob ) ) == 0; }
|
{ return memcmp( blob, other.blob, sizeof( blob ) ) == 0; }
|
||||||
bool operator != ( Id const & other ) const
|
bool operator != ( Id const & other ) const
|
||||||
{ return ! operator == ( other ); }
|
{ return ! operator == ( other ); }
|
||||||
|
bool operator < ( Id const & other ) const
|
||||||
|
{ return memcmp( blob, other.blob, sizeof( blob ) ) < 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
STATIC_ASSERT( sizeof( Id ) == IdSize );
|
STATIC_ASSERT( sizeof( Id ) == IdSize );
|
||||||
|
|
30
config.cc
30
config.cc
|
@ -104,8 +104,8 @@ void Config::prefillKeywords()
|
||||||
"cache-size",
|
"cache-size",
|
||||||
Config::oRuntime_cacheSize,
|
Config::oRuntime_cacheSize,
|
||||||
Config::Runtime,
|
Config::Runtime,
|
||||||
"Cache size to use in restore process\n"
|
"Cache size to use in restore process.\n"
|
||||||
"Affects restore process speed directly\n"
|
"Affects restore process speed directly.\n"
|
||||||
VALID_SUFFIXES
|
VALID_SUFFIXES
|
||||||
"Default is %sMiB",
|
"Default is %sMiB",
|
||||||
Utils::numberToString( runtime.cacheSize / 1024 / 1024 )
|
Utils::numberToString( runtime.cacheSize / 1024 / 1024 )
|
||||||
|
@ -114,25 +114,23 @@ void Config::prefillKeywords()
|
||||||
"exchange",
|
"exchange",
|
||||||
Config::oRuntime_exchange,
|
Config::oRuntime_exchange,
|
||||||
Config::Runtime,
|
Config::Runtime,
|
||||||
"Data to exchange between repositories in import/export process\n"
|
"Data to exchange between repositories in import/export process.\n"
|
||||||
"Can be specified multiple times\n"
|
"Can be specified multiple times.\n"
|
||||||
"Valid values:\n"
|
"Valid values:\n"
|
||||||
"backups - exchange backup instructions (files in backups/ directory)\n"
|
"backups - exchange backup instructions (files in backups/ directory)\n"
|
||||||
"bundles - exchange bundles with data (files in bunles/ directory)\n"
|
"bundles - exchange bundles with data (files in bunles/ directory)\n"
|
||||||
"indexes - exchange indexes of chunks (files in index/ directory)\n"
|
"indexes - exchange indexes of chunks (files in index/ directory)\n"
|
||||||
"No default value, you should specify it explicitly"
|
"No default value, you should specify it explicitly."
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"gc-deep",
|
"gc-repack",
|
||||||
Config::oRuntime_gcDeep,
|
Config::oRuntime_gcRepack,
|
||||||
Config::Runtime,
|
Config::Runtime,
|
||||||
"Perform inter-bundle and inter-index deduplication\n"
|
"Repack indexes and bundles during garbage collection.\n"
|
||||||
"during garbage collection\n"
|
"Normally you would not need this.\n"
|
||||||
"You would probably need it after exchange operation\n"
|
"Beware that this options causes very intensive IO!\n"
|
||||||
"You could also use this switch to repack all bundles\n"
|
"Not default, you should specify it explicitly."
|
||||||
"Beware that this switch causes very intensive IO!\n"
|
|
||||||
"This switch is not used by default, specify to enable"
|
|
||||||
},
|
},
|
||||||
|
|
||||||
{ "", Config::oBadOption, Config::None }
|
{ "", Config::oBadOption, Config::None }
|
||||||
|
@ -468,10 +466,10 @@ bool Config::parseOrValidate( const string & option, const OptionType type,
|
||||||
/* NOTREACHED */
|
/* NOTREACHED */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case oRuntime_gcDeep:
|
case oRuntime_gcRepack:
|
||||||
runtime.gcDeep = true;
|
runtime.gcRepack = true;
|
||||||
|
|
||||||
dPrintf( "runtime[gcDeep] = true\n" );
|
dPrintf( "runtime[gcRepack] = true\n" );
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
/* NOTREACHED */
|
/* NOTREACHED */
|
||||||
|
|
|
@ -29,13 +29,13 @@ public:
|
||||||
size_t threads;
|
size_t threads;
|
||||||
size_t cacheSize;
|
size_t cacheSize;
|
||||||
bitset< BackupExchanger::Flags > exchange;
|
bitset< BackupExchanger::Flags > exchange;
|
||||||
bool gcDeep;
|
bool gcRepack;
|
||||||
|
|
||||||
// Default runtime config
|
// Default runtime config
|
||||||
RuntimeConfig():
|
RuntimeConfig():
|
||||||
threads( getNumberOfCpus() ),
|
threads( getNumberOfCpus() ),
|
||||||
cacheSize( 40 * 1024 * 1024 ), // 40 MB
|
cacheSize( 40 * 1024 * 1024 ), // 40 MB
|
||||||
gcDeep ( false )
|
gcRepack ( false )
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -60,7 +60,7 @@ public:
|
||||||
oRuntime_threads,
|
oRuntime_threads,
|
||||||
oRuntime_cacheSize,
|
oRuntime_cacheSize,
|
||||||
oRuntime_exchange,
|
oRuntime_exchange,
|
||||||
oRuntime_gcDeep,
|
oRuntime_gcRepack,
|
||||||
|
|
||||||
oDeprecated, oUnsupported
|
oDeprecated, oUnsupported
|
||||||
} OpCodes;
|
} OpCodes;
|
||||||
|
|
|
@ -316,15 +316,13 @@ void ZCollector::gc()
|
||||||
|
|
||||||
string fileName;
|
string fileName;
|
||||||
|
|
||||||
Dir::Entry entry;
|
|
||||||
|
|
||||||
BundleCollector collector;
|
BundleCollector collector;
|
||||||
collector.bundlesPath = getBundlesPath();
|
collector.bundlesPath = getBundlesPath();
|
||||||
collector.chunkStorageReader = &this->chunkStorageReader;
|
collector.chunkStorageReader = &this->chunkStorageReader;
|
||||||
collector.chunkStorageWriter = &chunkStorageWriter;
|
collector.chunkStorageWriter = &chunkStorageWriter;
|
||||||
collector.deepGC = config.runtime.gcDeep;
|
collector.gcRepack = config.runtime.gcRepack;
|
||||||
|
|
||||||
verbosePrintf( "Checking used chunks...\n" );
|
verbosePrintf( "Performing garbage collection...\n" );
|
||||||
|
|
||||||
verbosePrintf( "Searching for backups...\n" );
|
verbosePrintf( "Searching for backups...\n" );
|
||||||
vector< string > backups = BackupExchanger::findOrRebuild( getBackupsPath() );
|
vector< string > backups = BackupExchanger::findOrRebuild( getBackupsPath() );
|
||||||
|
@ -356,6 +354,7 @@ void ZCollector::gc()
|
||||||
|
|
||||||
string bundlesPath = getBundlesPath();
|
string bundlesPath = getBundlesPath();
|
||||||
Dir::Listing bundleLst( bundlesPath );
|
Dir::Listing bundleLst( bundlesPath );
|
||||||
|
Dir::Entry entry;
|
||||||
while( bundleLst.getNext( entry ) )
|
while( bundleLst.getNext( entry ) )
|
||||||
{
|
{
|
||||||
const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());
|
const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());
|
||||||
|
|
Loading…
Reference in New Issue