refactoring to eject gc code from ZRestore and make world a bit better place

Signed-off-by: Gleb Golubitsky <sectoid@gnolltech.org>
master
Gleb Golubitsky 2014-11-27 18:20:41 +02:00
parent d90d793c5f
commit e5cebd17e6
6 changed files with 410 additions and 325 deletions

View File

@ -26,124 +26,12 @@
#include "zbackup.hh"
#include "index_file.hh"
#include "bundle.hh"
#include "zcollector.hh"
using std::vector;
using std::bitset;
using std::iterator;
Paths::Paths( string const & storageDir ): storageDir( storageDir )
{
}
string Paths::getTmpPath()
{
return string( Dir::addPath( storageDir, "tmp" ) );
}
string Paths::getBundlesPath()
{
return string( Dir::addPath( storageDir, "bundles" ) );
}
string Paths::getStorageInfoPath()
{
return string( Dir::addPath( storageDir, "info" ) );
}
string Paths::getIndexPath()
{
return string( Dir::addPath( storageDir, "index" ) );
}
string Paths::getBackupsPath()
{
return string( Dir::addPath( storageDir, "backups" ) );
}
ZBackupBase::ZBackupBase( string const & storageDir, string const & password ):
Paths( storageDir ), storageInfo( loadStorageInfo() ),
encryptionkey( password, storageInfo.has_encryption_key() ?
&storageInfo.encryption_key() : 0 ),
tmpMgr( getTmpPath() ),
chunkIndex( encryptionkey, tmpMgr, getIndexPath(), false )
{
}
ZBackupBase::ZBackupBase( string const & storageDir, string const & password,
bool prohibitChunkIndexLoading ):
Paths( storageDir ), storageInfo( loadStorageInfo() ),
encryptionkey( password, storageInfo.has_encryption_key() ?
&storageInfo.encryption_key() : 0 ),
tmpMgr( getTmpPath() ),
chunkIndex( encryptionkey, tmpMgr, getIndexPath(), prohibitChunkIndexLoading )
{
}
StorageInfo ZBackupBase::loadStorageInfo()
{
StorageInfo storageInfo;
StorageInfoFile::load( getStorageInfoPath(), storageInfo );
return storageInfo;
}
void ZBackupBase::initStorage( string const & storageDir,
string const & password,
bool isEncrypted )
{
StorageInfo storageInfo;
// TODO: make the following configurable
storageInfo.set_chunk_max_size( 65536 );
storageInfo.set_bundle_max_payload_size( 0x200000 );
if ( isEncrypted )
EncryptionKey::generate( password,
*storageInfo.mutable_encryption_key() );
Paths paths( storageDir );
if ( !Dir::exists( storageDir ) )
Dir::create( storageDir );
if ( !Dir::exists( paths.getBundlesPath() ) )
Dir::create( paths.getBundlesPath() );
if ( !Dir::exists( paths.getBackupsPath() ) )
Dir::create( paths.getBackupsPath() );
if ( !Dir::exists( paths.getIndexPath() ) )
Dir::create( paths.getIndexPath() );
string storageInfoPath( paths.getStorageInfoPath() );
if ( File::exists( storageInfoPath ) )
throw exWontOverwrite( storageInfoPath );
StorageInfoFile::save( storageInfoPath, storageInfo );
}
string ZBackupBase::deriveStorageDirFromBackupsFile( string const &
backupsFile, bool allowOutside )
{
// TODO: handle cases when there's a backup/ folder within the backup/ folder
// correctly
if ( allowOutside )
return Dir::getRealPath( backupsFile );
string realPath = Dir::getRealPath( Dir::getDirName( backupsFile ) );
size_t pos;
if ( realPath.size() >= 8 && strcmp( realPath.c_str() + realPath.size() - 8,
"/backups") == 0 )
pos = realPath.size() - 8;
else
pos = realPath.rfind( "/backups/" );
if ( pos == string::npos )
throw exCantDeriveStorageDir( backupsFile );
else
return realPath.substr( 0, pos );
}
ZBackup::ZBackup( string const & storageDir, string const & password,
size_t threads ):
ZBackupBase( storageDir, password ),
@ -291,166 +179,6 @@ void ZRestore::restoreToStdin( string const & inputFileName )
throw exChecksumError();
}
void ZRestore::gc()
{
ChunkIndex chunkReindex( encryptionkey, tmpMgr, getIndexPath(), true );
ChunkStorage::Writer chunkStorageWriter( storageInfo, encryptionkey, tmpMgr, chunkReindex,
getBundlesPath(), getIndexPath(), threads );
string fileName;
string backupsPath = getBackupsPath();
Dir::Listing lst( backupsPath );
Dir::Entry entry;
class BundleChecker: public IndexProcessor
{
private:
Bundle::Id savedId;
int totalChunks, usedChunks, indexTotalChunks, indexUsedChunks;
int indexModifiedBundles, indexKeptBundles, indexRemovedBundles;
bool indexModified;
vector< string > filesToUnlink;
public:
string bundlesPath;
bool verbose;
ChunkStorage::Reader *chunkStorageReader;
ChunkStorage::Writer *chunkStorageWriter;
BackupRestorer::ChunkSet usedChunkSet;
void startIndex( string const & indexFn )
{
indexModified = false;
indexTotalChunks = indexUsedChunks = 0;
indexModifiedBundles = indexKeptBundles = indexRemovedBundles = 0;
}
void finishIndex( string const & indexFn )
{
if ( indexModified )
{
verbosePrintf( "Chunks: %d used / %d total, bundles: %d kept / %d modified / %d removed\n",
indexUsedChunks, indexTotalChunks, indexKeptBundles, indexModifiedBundles, indexRemovedBundles);
filesToUnlink.push_back( indexFn );
}
}
void startBundle( Bundle::Id const & bundleId )
{
savedId = bundleId;
totalChunks = 0;
usedChunks = 0;
}
void processChunk( ChunkId const & chunkId )
{
totalChunks++;
if ( usedChunkSet.find( chunkId ) != usedChunkSet.end() )
{
usedChunks++;
}
}
void finishBundle( Bundle::Id const & bundleId, BundleInfo const & info )
{
string i = Bundle::generateFileName( savedId, "", false );
indexTotalChunks += totalChunks;
indexUsedChunks += usedChunks;
if ( usedChunks == 0 )
{
if ( verbose )
printf( "delete %s\n", i.c_str() );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
indexRemovedBundles++;
}
else if ( usedChunks < totalChunks )
{
if ( verbose )
printf( "%s: used %d/%d\n", i.c_str(), usedChunks, totalChunks );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
// Copy used chunks to the new index
string chunk;
size_t chunkSize;
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
ChunkId id( record.id() );
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
{
chunkStorageReader->get( id, chunk, chunkSize );
chunkStorageWriter->add( id, chunk.data(), chunkSize );
}
}
indexModifiedBundles++;
}
else
{
chunkStorageWriter->addBundle( info, savedId );
if ( verbose )
printf( "keep %s\n", i.c_str() );
indexKeptBundles++;
}
}
void commit()
{
for ( int i = filesToUnlink.size(); i--; )
{
unlink( filesToUnlink[i].c_str() );
}
filesToUnlink.clear();
chunkStorageWriter->commit();
}
} checker;
checker.bundlesPath = getBundlesPath();
checker.chunkStorageReader = &this->chunkStorageReader;
checker.chunkStorageWriter = &chunkStorageWriter;
checker.verbose = false;
verbosePrintf( "Checking used chunks...\n" );
while( lst.getNext( entry ) )
{
verbosePrintf( "Checking backup %s...\n", entry.getFileName().c_str() );
BackupInfo backupInfo;
BackupFile::load( Dir::addPath( backupsPath, entry.getFileName() ), encryptionkey, backupInfo );
string backupData;
BackupRestorer::restoreIterations( chunkStorageReader, backupInfo, backupData, &checker.usedChunkSet );
BackupRestorer::restore( chunkStorageReader, backupData, NULL, &checker.usedChunkSet );
}
verbosePrintf( "Checking bundles...\n" );
chunkIndex.loadIndex( checker );
checker.commit();
verbosePrintf( "Cleaning up...\n" );
string bundlesPath = getBundlesPath();
Dir::Listing bundleLst( bundlesPath );
while( bundleLst.getNext( entry ) )
{
const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());
if (entry.isDir() && Dir::isDirEmpty(dirPath)) {
Dir::remove(dirPath);
}
}
verbosePrintf( "Garbage collection complete\n" );
}
ZExchange::ZExchange( string const & srcStorageDir, string const & srcPassword,
string const & dstStorageDir, string const & dstPassword,
bool prohibitChunkIndexLoading ):
@ -842,7 +570,7 @@ int main( int argc, char *argv[] )
*argv );
return EXIT_FAILURE;
}
ZRestore zr( args[ 1 ], passwords[ 0 ], threads, cacheSizeMb * 1048576 );
ZCollector zr( args[ 1 ], passwords[ 0 ], threads, cacheSizeMb * 1048576 );
zr.gc();
}
else

View File

@ -17,63 +17,13 @@
#include "ex.hh"
#include "tmp_mgr.hh"
#include "zbackup.pb.h"
#include "zbackup_base.hh"
#include "backup_exchanger.hh"
using std::string;
using std::vector;
using std::bitset;
struct Paths
{
string storageDir;
Paths( string const & storageDir );
string getTmpPath();
string getRestorePath();
string getCreatePath();
string getBundlesPath();
string getStorageInfoPath();
string getIndexPath();
string getBackupsPath();
};
class ZBackupBase: public Paths
{
public:
DEF_EX( Ex, "ZBackup exception", std::exception )
DEF_EX_STR( exWontOverwrite, "Won't overwrite existing file", Ex )
DEF_EX( exStdinError, "Error reading from standard input", Ex )
DEF_EX( exWontReadFromTerminal, "Won't read data from a terminal", exStdinError )
DEF_EX( exStdoutError, "Error writing to standard output", Ex )
DEF_EX( exWontWriteToTerminal, "Won't write data to a terminal", exStdoutError )
DEF_EX( exSerializeError, "Failed to serialize data", Ex )
DEF_EX( exParseError, "Failed to parse data", Ex )
DEF_EX( exChecksumError, "Checksum error", Ex )
DEF_EX_STR( exCantDeriveStorageDir, "The path must be within the backups/ dir:", Ex )
/// Opens the storage
ZBackupBase( string const & storageDir, string const & password );
ZBackupBase( string const & storageDir, string const & password, bool prohibitChunkIndexLoading );
/// Creates new storage
static void initStorage( string const & storageDir, string const & password,
bool isEncrypted );
/// For a given file within the backups/ dir in the storage, returns its
/// storage dir or throws an exception
static string deriveStorageDirFromBackupsFile( string const & backupsFile, bool allowOutside = false );
StorageInfo storageInfo;
EncryptionKey encryptionkey;
TmpMgr tmpMgr;
ChunkIndex chunkIndex;
protected:
private:
StorageInfo loadStorageInfo();
};
class ZBackup: public ZBackupBase
{
ChunkStorage::Writer chunkStorageWriter;
@ -97,6 +47,16 @@ public:
/// Restores the data to stdin
void restoreToStdin( string const & inputFileName );
};
class ZCollect: public ZBackupBase
{
ChunkStorage::Reader chunkStorageReader;
size_t threads;
public:
ZCollect( string const & storageDir, string const & password,
size_t threads, size_t cacheSize );
void gc();
};

121
zbackup_base.cc Normal file
View File

@ -0,0 +1,121 @@
// Copyright (c) 2012-2014 Konstantin Isakov <ikm@zbackup.org>
// Part of ZBackup. Licensed under GNU GPLv2 or later + OpenSSL, see LICENSE
#include "zbackup_base.hh"
#include "storage_info_file.hh"
using std::string;
Paths::Paths( string const & storageDir ): storageDir( storageDir )
{
}
string Paths::getTmpPath()
{
return string( Dir::addPath( storageDir, "tmp" ) );
}
string Paths::getBundlesPath()
{
return string( Dir::addPath( storageDir, "bundles" ) );
}
string Paths::getStorageInfoPath()
{
return string( Dir::addPath( storageDir, "info" ) );
}
string Paths::getIndexPath()
{
return string( Dir::addPath( storageDir, "index" ) );
}
string Paths::getBackupsPath()
{
return string( Dir::addPath( storageDir, "backups" ) );
}
ZBackupBase::ZBackupBase( string const & storageDir, string const & password ):
Paths( storageDir ), storageInfo( loadStorageInfo() ),
encryptionkey( password, storageInfo.has_encryption_key() ?
&storageInfo.encryption_key() : 0 ),
tmpMgr( getTmpPath() ),
chunkIndex( encryptionkey, tmpMgr, getIndexPath(), false )
{
}
ZBackupBase::ZBackupBase( string const & storageDir, string const & password,
bool prohibitChunkIndexLoading ):
Paths( storageDir ), storageInfo( loadStorageInfo() ),
encryptionkey( password, storageInfo.has_encryption_key() ?
&storageInfo.encryption_key() : 0 ),
tmpMgr( getTmpPath() ),
chunkIndex( encryptionkey, tmpMgr, getIndexPath(), prohibitChunkIndexLoading )
{
}
StorageInfo ZBackupBase::loadStorageInfo()
{
StorageInfo storageInfo;
StorageInfoFile::load( getStorageInfoPath(), storageInfo );
return storageInfo;
}
void ZBackupBase::initStorage( string const & storageDir,
string const & password,
bool isEncrypted )
{
StorageInfo storageInfo;
// TODO: make the following configurable
storageInfo.set_chunk_max_size( 65536 );
storageInfo.set_bundle_max_payload_size( 0x200000 );
if ( isEncrypted )
EncryptionKey::generate( password,
*storageInfo.mutable_encryption_key() );
Paths paths( storageDir );
if ( !Dir::exists( storageDir ) )
Dir::create( storageDir );
if ( !Dir::exists( paths.getBundlesPath() ) )
Dir::create( paths.getBundlesPath() );
if ( !Dir::exists( paths.getBackupsPath() ) )
Dir::create( paths.getBackupsPath() );
if ( !Dir::exists( paths.getIndexPath() ) )
Dir::create( paths.getIndexPath() );
string storageInfoPath( paths.getStorageInfoPath() );
if ( File::exists( storageInfoPath ) )
throw exWontOverwrite( storageInfoPath );
StorageInfoFile::save( storageInfoPath, storageInfo );
}
string ZBackupBase::deriveStorageDirFromBackupsFile( string const &
backupsFile, bool allowOutside )
{
// TODO: handle cases when there's a backup/ folder within the backup/ folder
// correctly
if ( allowOutside )
return Dir::getRealPath( backupsFile );
string realPath = Dir::getRealPath( Dir::getDirName( backupsFile ) );
size_t pos;
if ( realPath.size() >= 8 && strcmp( realPath.c_str() + realPath.size() - 8,
"/backups") == 0 )
pos = realPath.size() - 8;
else
pos = realPath.rfind( "/backups/" );
if ( pos == string::npos )
throw exCantDeriveStorageDir( backupsFile );
else
return realPath.substr( 0, pos );
}

64
zbackup_base.hh Normal file
View File

@ -0,0 +1,64 @@
// Copyright (c) 2012-2014 Konstantin Isakov <ikm@zbackup.org>
// Part of ZBackup. Licensed under GNU GPLv2 or later + OpenSSL, see LICENSE
#ifndef ZBACKUP_BASE_HH_INCLUDED__
#define ZBACKUP_BASE_HH_INCLUDED__
#include <exception>
#include <string>
#include "ex.hh"
#include "chunk_index.hh"
struct Paths
{
std::string storageDir;
Paths( std::string const & storageDir );
std::string getTmpPath();
std::string getRestorePath();
std::string getCreatePath();
std::string getBundlesPath();
std::string getStorageInfoPath();
std::string getIndexPath();
std::string getBackupsPath();
};
class ZBackupBase: public Paths
{
public:
DEF_EX( Ex, "ZBackup exception", std::exception )
DEF_EX_STR( exWontOverwrite, "Won't overwrite existing file", Ex )
DEF_EX( exStdinError, "Error reading from standard input", Ex )
DEF_EX( exWontReadFromTerminal, "Won't read data from a terminal", exStdinError )
DEF_EX( exStdoutError, "Error writing to standard output", Ex )
DEF_EX( exWontWriteToTerminal, "Won't write data to a terminal", exStdoutError )
DEF_EX( exSerializeError, "Failed to serialize data", Ex )
DEF_EX( exParseError, "Failed to parse data", Ex )
DEF_EX( exChecksumError, "Checksum error", Ex )
DEF_EX_STR( exCantDeriveStorageDir, "The path must be within the backups/ dir:", Ex )
/// Opens the storage
ZBackupBase( std::string const & storageDir, std::string const & password );
ZBackupBase( std::string const & storageDir, std::string const & password, bool prohibitChunkIndexLoading );
/// Creates new storage
static void initStorage( std::string const & storageDir, std::string const & password,
bool isEncrypted );
/// For a given file within the backups/ dir in the storage, returns its
/// storage dir or throws an exception
static std::string deriveStorageDirFromBackupsFile( std::string const & backupsFile, bool allowOutside = false );
StorageInfo storageInfo;
EncryptionKey encryptionkey;
TmpMgr tmpMgr;
ChunkIndex chunkIndex;
private:
StorageInfo loadStorageInfo();
};
#endif

190
zcollector.cc Normal file
View File

@ -0,0 +1,190 @@
// Copyright (c) 2012-2014 Konstantin Isakov <ikm@zbackup.org>
// Part of ZBackup. Licensed under GNU GPLv2 or later + OpenSSL, see LICENSE
#include "zcollector.hh"
#include <string>
#include <vector>
#include "bundle.hh"
#include "chunk_index.hh"
#include "backup_restorer.hh"
#include "backup_file.hh"
#include "debug.hh"
using std::string;
namespace {
class BundleCollector: public IndexProcessor
{
private:
Bundle::Id savedId;
int totalChunks, usedChunks, indexTotalChunks, indexUsedChunks;
int indexModifiedBundles, indexKeptBundles, indexRemovedBundles;
bool indexModified;
vector< string > filesToUnlink;
public:
string bundlesPath;
bool verbose;
ChunkStorage::Reader *chunkStorageReader;
ChunkStorage::Writer *chunkStorageWriter;
BackupRestorer::ChunkSet usedChunkSet;
void startIndex( string const & indexFn )
{
indexModified = false;
indexTotalChunks = indexUsedChunks = 0;
indexModifiedBundles = indexKeptBundles = indexRemovedBundles = 0;
}
void finishIndex( string const & indexFn )
{
if ( indexModified )
{
verbosePrintf( "Chunks: %d used / %d total, bundles: %d kept / %d modified / %d removed\n",
indexUsedChunks, indexTotalChunks, indexKeptBundles, indexModifiedBundles, indexRemovedBundles);
filesToUnlink.push_back( indexFn );
}
}
void startBundle( Bundle::Id const & bundleId )
{
savedId = bundleId;
totalChunks = 0;
usedChunks = 0;
}
void processChunk( ChunkId const & chunkId )
{
totalChunks++;
if ( usedChunkSet.find( chunkId ) != usedChunkSet.end() )
{
usedChunks++;
}
}
void finishBundle( Bundle::Id const & bundleId, BundleInfo const & info )
{
string i = Bundle::generateFileName( savedId, "", false );
indexTotalChunks += totalChunks;
indexUsedChunks += usedChunks;
if ( usedChunks == 0 )
{
if ( verbose )
printf( "delete %s\n", i.c_str() );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
indexRemovedBundles++;
}
else if ( usedChunks < totalChunks )
{
if ( verbose )
printf( "%s: used %d/%d\n", i.c_str(), usedChunks, totalChunks );
filesToUnlink.push_back( Dir::addPath( bundlesPath, i ) );
indexModified = true;
// Copy used chunks to the new index
string chunk;
size_t chunkSize;
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
ChunkId id( record.id() );
if ( usedChunkSet.find( id ) != usedChunkSet.end() )
{
chunkStorageReader->get( id, chunk, chunkSize );
chunkStorageWriter->add( id, chunk.data(), chunkSize );
}
}
indexModifiedBundles++;
}
else
{
chunkStorageWriter->addBundle( info, savedId );
if ( verbose )
printf( "keep %s\n", i.c_str() );
indexKeptBundles++;
}
}
void commit()
{
for ( int i = filesToUnlink.size(); i--; )
{
unlink( filesToUnlink[i].c_str() );
}
filesToUnlink.clear();
chunkStorageWriter->commit();
}
};
}
ZCollector::ZCollector( string const & storageDir, string const & password,
size_t threads, size_t cacheSize ):
ZBackupBase( storageDir, password ),
chunkStorageReader( storageInfo, encryptionkey, chunkIndex, getBundlesPath(),
cacheSize )
{
this->threads = threads;
}
void ZCollector::gc()
{
ChunkIndex chunkReindex( encryptionkey, tmpMgr, getIndexPath(), true );
ChunkStorage::Writer chunkStorageWriter( storageInfo, encryptionkey, tmpMgr, chunkReindex,
getBundlesPath(), getIndexPath(), threads );
string fileName;
string backupsPath = getBackupsPath();
Dir::Listing lst( backupsPath );
Dir::Entry entry;
BundleCollector collector;
collector.bundlesPath = getBundlesPath();
collector.chunkStorageReader = &this->chunkStorageReader;
collector.chunkStorageWriter = &chunkStorageWriter;
collector.verbose = false;
verbosePrintf( "Checking used chunks...\n" );
while( lst.getNext( entry ) )
{
verbosePrintf( "Checking backup %s...\n", entry.getFileName().c_str() );
BackupInfo backupInfo;
BackupFile::load( Dir::addPath( backupsPath, entry.getFileName() ), encryptionkey, backupInfo );
string backupData;
BackupRestorer::restoreIterations( chunkStorageReader, backupInfo, backupData, &collector.usedChunkSet );
BackupRestorer::restore( chunkStorageReader, backupData, NULL, &collector.usedChunkSet );
}
verbosePrintf( "Checking bundles...\n" );
chunkIndex.loadIndex( collector );
collector.commit();
verbosePrintf( "Cleaning up...\n" );
string bundlesPath = getBundlesPath();
Dir::Listing bundleLst( bundlesPath );
while( bundleLst.getNext( entry ) )
{
const string dirPath = Dir::addPath( bundlesPath, entry.getFileName());
if (entry.isDir() && Dir::isDirEmpty(dirPath)) {
Dir::remove(dirPath);
}
}
verbosePrintf( "Garbage collection complete\n" );
}

22
zcollector.hh Normal file
View File

@ -0,0 +1,22 @@
// Copyright (c) 2012-2014 Konstantin Isakov <ikm@zbackup.org>
// Part of ZBackup. Licensed under GNU GPLv2 or later + OpenSSL, see LICENSE
#ifndef Z_COLLECTOR_HH_INCLUDED__
#define Z_COLLECTOR_HH_INCLUDED__
#include "zbackup_base.hh"
#include "chunk_storage.hh"
class ZCollector : public ZBackupBase
{
ChunkStorage::Reader chunkStorageReader;
size_t threads;
public:
ZCollector( std::string const & storageDir, std::string const & password,
size_t threads, size_t cacheSize );
void gc();
};
#endif