Merge pull request #87 from Am1GO/master

Updated GC logic
master
Am1GO 2015-08-18 19:09:24 +03:00
commit c32ce15e51
9 changed files with 125 additions and 64 deletions

View File

@ -3,39 +3,31 @@
#include "backup_collector.hh"
#include <string>
#include <vector>
#include "bundle.hh"
#include "chunk_index.hh"
#include "backup_restorer.hh"
#include "backup_file.hh"
#include "backup_exchanger.hh"
#include "debug.hh"
using std::string;
void BundleCollector::startIndex( string const & indexFn )
{
indexModified = false;
indexModified = indexNecessary = false;
indexTotalChunks = indexUsedChunks = 0;
indexModifiedBundles = indexKeptBundles = indexRemovedBundles = 0;
}
void BundleCollector::finishIndex( string const & indexFn )
{
verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n",
indexUsedChunks, indexTotalChunks, indexKeptBundles,
indexModifiedBundles, indexRemovedBundles );
if ( indexModified )
{
verbosePrintf( "Chunks used: %d/%d, bundles: %d kept, %d modified, %d removed\n",
indexUsedChunks, indexTotalChunks, indexKeptBundles,
indexModifiedBundles, indexRemovedBundles);
filesToUnlink.push_back( indexFn );
commit();
}
else
{
chunkStorageWriter->reset();
if ( !indexNecessary )
// this index was a complete copy so we don't need it
filesToUnlink.push_back( indexFn );
}
}
@ -48,10 +40,16 @@ void BundleCollector::startBundle( Bundle::Id const & bundleId )
void BundleCollector::processChunk( ChunkId const & chunkId )
{
if ( overallChunkSet.find ( chunkId ) == overallChunkSet.end() )
overallChunkSet.insert( chunkId );
else
return;
totalChunks++;
if ( usedChunkSet.find( chunkId ) != usedChunkSet.end() )
{
usedChunks++;
indexNecessary = true;
}
}
@ -60,38 +58,74 @@ void BundleCollector::finishBundle( Bundle::Id const & bundleId, BundleInfo cons
string i = Bundle::generateFileName( savedId, "", false );
indexTotalChunks += totalChunks;
indexUsedChunks += usedChunks;
if ( usedChunks == 0 )
if ( 0 == usedChunks && 0 != totalChunks )
{
verbosePrintf( "Deleting %s bundle\n", i.c_str() );
dPrintf( "Deleting %s bundle\n", i.c_str() );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
indexRemovedBundles++;
}
else if ( usedChunks < totalChunks )
{
verbosePrintf( "%s: used %d/%d chunks\n", i.c_str(), usedChunks, totalChunks );
dPrintf( "%s: used %d/%d chunks\n", i.c_str(), usedChunks, totalChunks );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
// Copy used chunks to the new index
string chunk;
size_t chunkSize;
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
ChunkId id( record.id() );
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
{
chunkStorageReader->get( id, chunk, chunkSize );
chunkStorageWriter->add( id, chunk.data(), chunkSize );
}
}
copyUsedChunks( info );
indexModifiedBundles++;
}
else
{
chunkStorageWriter->addBundle( info, savedId );
verbosePrintf( "Keeping %s bundle\n", i.c_str() );
indexKeptBundles++;
if ( gcRepack )
{
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
copyUsedChunks( info );
indexModifiedBundles++;
}
else
{
if ( 0 == totalChunks )
{
if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() )
{
overallBundleSet.insert( bundleId );
dPrintf( "Deleting %s bundle\n", i.c_str() );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
indexRemovedBundles++;
}
else
{
// trigger index update
indexModified = true;
}
}
else
{
if ( overallBundleSet.find ( bundleId ) == overallBundleSet.end() )
overallBundleSet.insert( bundleId );
chunkStorageWriter->addBundle( info, savedId );
dPrintf( "Keeping %s bundle\n", i.c_str() );
indexKeptBundles++;
}
}
}
}
void BundleCollector::copyUsedChunks( BundleInfo const & info )
{
// Copy used chunks to the new index
string chunk;
size_t chunkSize;
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
ChunkId id( record.id() );
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
{
chunkStorageReader->get( id, chunk, chunkSize );
chunkStorageWriter->add( id, chunk.data(), chunkSize );
}
}
}
@ -99,6 +133,7 @@ void BundleCollector::commit()
{
for ( int i = filesToUnlink.size(); i--; )
{
dPrintf( "Unlinking %s\n", filesToUnlink[i].c_str() );
unlink( filesToUnlink[i].c_str() );
}
filesToUnlink.clear();

View File

@ -4,18 +4,11 @@
#ifndef BACKUP_COLLECTOR_HH_INCLUDED
#define BACKUP_COLLECTOR_HH_INCLUDED
#include "zbackup_base.hh"
#include "chunk_storage.hh"
#include <string>
#include <vector>
#include <unistd.h>
#include "bundle.hh"
#include "chunk_index.hh"
#include "backup_restorer.hh"
#include "backup_file.hh"
#include "backup_exchanger.hh"
#include "debug.hh"
@ -27,14 +20,19 @@ private:
Bundle::Id savedId;
int totalChunks, usedChunks, indexTotalChunks, indexUsedChunks;
int indexModifiedBundles, indexKeptBundles, indexRemovedBundles;
bool indexModified;
bool indexModified, indexNecessary;
vector< string > filesToUnlink;
BackupRestorer::ChunkSet overallChunkSet;
std::set< Bundle::Id > overallBundleSet;
void copyUsedChunks( BundleInfo const & info );
public:
string bundlesPath;
ChunkStorage::Reader *chunkStorageReader;
ChunkStorage::Writer *chunkStorageWriter;
BackupRestorer::ChunkSet usedChunkSet;
bool gcRepack;
void startIndex( string const & indexFn );

View File

@ -17,7 +17,7 @@ using std::pair;
enum {
backups,
bundles,
index,
indexes,
Flags
};

View File

@ -42,6 +42,8 @@ struct Id
{ return memcmp( blob, other.blob, sizeof( blob ) ) == 0; }
bool operator != ( Id const & other ) const
{ return ! operator == ( other ); }
bool operator < ( Id const & other ) const
{ return memcmp( blob, other.blob, sizeof( blob ) ) < 0; }
};
STATIC_ASSERT( sizeof( Id ) == IdSize );

View File

@ -87,7 +87,7 @@ public:
DEF_EX( Ex, "Chunk index exception", std::exception )
DEF_EX( exIncorrectChunkIdSize, "Incorrect chunk id size encountered", Ex )
ChunkIndex( EncryptionKey const &, TmpMgr &, string const & indexPath, bool prohibitChunkIndexLoading );
ChunkIndex( EncryptionKey const &, TmpMgr &, string const & indexPath, bool );
struct ChunkInfoInterface
{

View File

@ -104,8 +104,8 @@ void Config::prefillKeywords()
"cache-size",
Config::oRuntime_cacheSize,
Config::Runtime,
"Cache size to use in restore process\n"
"Affects restore process speed directly\n"
"Cache size to use in restore process.\n"
"Affects restore process speed directly.\n"
VALID_SUFFIXES
"Default is %sMiB",
Utils::numberToString( runtime.cacheSize / 1024 / 1024 )
@ -114,13 +114,23 @@ void Config::prefillKeywords()
"exchange",
Config::oRuntime_exchange,
Config::Runtime,
"Data to exchange between repositories in import/export process\n"
"Can be specified multiple times\n"
"Data to exchange between repositories in import/export process.\n"
"Can be specified multiple times.\n"
"Valid values:\n"
"backups - exchange backup instructions (files in backups/ directory)\n"
"bundles - exchange bundles with data (files in bunles/ directory)\n"
"index - exchange indicies of chunks (files in index/ directory)\n"
"No default value, you should specify it explicitly"
"indexes - exchange indexes of chunks (files in index/ directory)\n"
"No default value, you should specify it explicitly."
},
{
"gc-repack",
Config::oRuntime_gcRepack,
Config::Runtime,
"Repack indexes and bundles during garbage collection.\n"
"Normally you would not need this.\n"
"Beware that this options causes very intensive IO!\n"
"Not default, you should specify it explicitly."
},
{ "", Config::oBadOption, Config::None }
@ -439,12 +449,13 @@ bool Config::parseOrValidate( const string & option, const OptionType type,
if ( strcmp( optionValue, "bundles" ) == 0 )
runtime.exchange.set( BackupExchanger::bundles );
else
if ( strcmp( optionValue, "index" ) == 0 )
runtime.exchange.set( BackupExchanger::index );
if ( strcmp( optionValue, "indexes" ) == 0 ||
strcmp( optionValue, "index" ) == 0 )
runtime.exchange.set( BackupExchanger::indexes );
else
{
fprintf( stderr, "Invalid exchange value specified: %s\n"
"Must be one of the following: backups, bundles, index.\n",
"Must be one of the following: backups, bundles, indexes.\n",
optionValue );
return false;
}
@ -455,6 +466,15 @@ bool Config::parseOrValidate( const string & option, const OptionType type,
/* NOTREACHED */
break;
case oRuntime_gcRepack:
runtime.gcRepack = true;
dPrintf( "runtime[gcRepack] = true\n" );
return true;
/* NOTREACHED */
break;
case oBadOption:
default:
return false;

View File

@ -29,11 +29,13 @@ public:
size_t threads;
size_t cacheSize;
bitset< BackupExchanger::Flags > exchange;
bool gcRepack;
// Default runtime config
RuntimeConfig():
threads( getNumberOfCpus() ),
cacheSize( 40 * 1024 * 1024 ) // 40 MB
cacheSize( 40 * 1024 * 1024 ), // 40 MB
gcRepack ( false )
{
}
};
@ -58,6 +60,7 @@ public:
oRuntime_threads,
oRuntime_cacheSize,
oRuntime_exchange,
oRuntime_gcRepack,
oDeprecated, oUnsupported
} OpCodes;

View File

@ -172,7 +172,8 @@ invalid_option:
" performs import from source to destination storage,\n"
" for export/import storage path must be\n"
" a valid (initialized) storage\n"
" gc <storage path> - performs chunk garbage collection\n"
" gc [chunks|indexes] <storage path> - performs garbage\n"
" collection (default is chunks)\n"
" passwd <storage path> - changes repo info file passphrase\n"
//" info <storage path> - shows repo information\n"
" config [show|edit|set|reset] <storage path> - performs\n"
@ -278,14 +279,16 @@ invalid_option:
else
if ( strcmp( args[ 0 ], "gc" ) == 0 )
{
// Perform the restore
// Perform the garbage collection
if ( args.size() != 2 )
{
fprintf( stderr, "Usage: %s %s <storage path>\n",
*argv, args[ 0 ] );
return EXIT_FAILURE;
}
ZCollector zc( args[ 1 ], passwords[ 0 ], config );
ZCollector zc( ZBackupBase::deriveStorageDirFromBackupsFile( args[ 1 ], true ),
passwords[ 0 ], config );
zc.gc();
}
else

View File

@ -207,13 +207,13 @@ void ZExchange::exchange()
verbosePrintf( "Bundle exchange completed.\n" );
}
if ( config.runtime.exchange.test( BackupExchanger::index ) )
if ( config.runtime.exchange.test( BackupExchanger::indexes ) )
{
verbosePrintf( "Searching for indicies...\n" );
vector< string > indicies = BackupExchanger::findOrRebuild(
verbosePrintf( "Searching for indexes...\n" );
vector< string > indexes = BackupExchanger::findOrRebuild(
srcZBackupBase.getIndexPath(), dstZBackupBase.getIndexPath() );
for ( std::vector< string >::iterator it = indicies.begin(); it != indicies.end(); ++it )
for ( std::vector< string >::iterator it = indexes.begin(); it != indexes.end(); ++it )
{
verbosePrintf( "Processing index file %s... ", it->c_str() );
string outputFileName ( Dir::addPath( dstZBackupBase.getIndexPath(), *it ) );
@ -316,14 +316,13 @@ void ZCollector::gc()
string fileName;
Dir::Entry entry;
BundleCollector collector;
collector.bundlesPath = getBundlesPath();
collector.chunkStorageReader = &this->chunkStorageReader;
collector.chunkStorageWriter = &chunkStorageWriter;
collector.gcRepack = config.runtime.gcRepack;
verbosePrintf( "Checking used chunks...\n" );
verbosePrintf( "Performing garbage collection...\n" );
verbosePrintf( "Searching for backups...\n" );
vector< string > backups = BackupExchanger::findOrRebuild( getBackupsPath() );
@ -355,6 +354,7 @@ void ZCollector::gc()
string bundlesPath = getBundlesPath();
Dir::Listing bundleLst( bundlesPath );
Dir::Entry entry;
while( bundleLst.getNext( entry ) )
{
const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());