zbackup/chunk_index.cc

198 lines
4.9 KiB
C++
Raw Permalink Normal View History

2014-12-11 10:50:15 +03:00
// Copyright (c) 2012-2014 Konstantin Isakov <ikm@zbackup.org> and ZBackup contributors, see CONTRIBUTORS
// Part of ZBackup. Licensed under GNU GPLv2 or later + OpenSSL, see LICENSE
2013-07-18 21:33:25 +04:00
#include <stdio.h>
#include <string.h>
#include <new>
#include <utility>
#include "chunk_index.hh"
#include "debug.hh"
#include "dir.hh"
#include "index_file.hh"
#include "zbackup.pb.h"
ChunkIndex::Chain::Chain( ChunkId const & id, uint32_t size, Bundle::Id const * bundleId ):
next( 0 ), size( size ), bundleId( bundleId )
2013-07-18 21:33:25 +04:00
{
memcpy( cryptoHash, id.cryptoHash, sizeof( cryptoHash ) );
}
bool ChunkIndex::Chain::equalsTo( ChunkId const & id )
{
return memcmp( cryptoHash, id.cryptoHash, sizeof( cryptoHash ) ) == 0;
2013-07-18 21:33:25 +04:00
}
2014-07-14 00:11:01 +04:00
void ChunkIndex::loadIndex( IndexProcessor & ip )
2013-07-18 21:33:25 +04:00
{
Dir::Listing lst( indexPath );
Dir::Entry entry;
verbosePrintf( "Loading index...\n" );
while( lst.getNext( entry ) )
{
verbosePrintf( "Loading index file %s...\n", entry.getFileName().c_str() );
try
{
string indexFn = Dir::addPath( indexPath, entry.getFileName() );
IndexFile::Reader reader( key, indexFn );
2013-07-18 21:33:25 +04:00
ip.startIndex( indexFn );
2014-07-14 00:11:01 +04:00
BundleInfo info;
Bundle::Id bundleId;
while( reader.readNextRecord( info, bundleId ) )
{
Bundle::Id * savedId = storage.allocateObjects< Bundle::Id >( 1 );
memcpy( savedId, &bundleId, sizeof( bundleId ) );
2013-07-18 21:33:25 +04:00
ChunkId id;
2013-07-18 21:33:25 +04:00
ip.startBundle( *savedId );
2013-07-18 21:33:25 +04:00
for ( int x = info.chunk_record_size(); x--; )
{
BundleInfo_ChunkRecord const & record = info.chunk_record( x );
2014-07-14 00:11:01 +04:00
if ( record.id().size() != ChunkId::BlobSize )
throw exIncorrectChunkIdSize();
2013-07-18 21:33:25 +04:00
id.setFromBlob( record.id().data() );
ip.processChunk( id, record.size() );
}
2013-07-18 21:33:25 +04:00
ip.finishBundle( *savedId, info );
2013-07-18 21:33:25 +04:00
}
2014-07-14 00:11:01 +04:00
ip.finishIndex( indexFn );
2013-07-18 21:33:25 +04:00
}
catch( std::exception & e )
{
2014-12-30 17:13:48 +03:00
verbosePrintf( "error: %s\n", e.what() );
continue;
}
2013-07-18 21:33:25 +04:00
}
verbosePrintf( "Index loaded.\n" );
}
2014-07-14 00:11:01 +04:00
void ChunkIndex::startIndex( string const & )
{
}
void ChunkIndex::startBundle( Bundle::Id const & bundleId )
{
lastBundleId = &bundleId;
}
void ChunkIndex::processChunk( ChunkId const & chunkId, uint32_t size )
2014-07-14 00:11:01 +04:00
{
registerNewChunkId( chunkId, size, lastBundleId );
2014-07-14 00:11:01 +04:00
}
void ChunkIndex::finishBundle( Bundle::Id const &, BundleInfo const & )
{
}
void ChunkIndex::finishIndex( string const & )
{
}
2013-07-18 21:33:25 +04:00
ChunkIndex::ChunkIndex( EncryptionKey const & key, TmpMgr & tmpMgr,
string const & indexPath, bool prohibitChunkIndexLoading ):
2013-07-18 21:33:25 +04:00
key( key ), tmpMgr( tmpMgr ), indexPath( indexPath ), storage( 65536, 1 ),
lastBundleId( NULL )
{
if ( !prohibitChunkIndexLoading )
2014-07-14 00:11:01 +04:00
loadIndex( *this );
2015-01-20 15:46:08 +03:00
dPrintf( "%s for %s is instantiated and initialized, hasKey: %s\n",
__CLASS, indexPath.c_str(), key.hasKey() ? "true" : "false" );
2013-07-18 21:33:25 +04:00
}
Bundle::Id const * ChunkIndex::findChunk( ChunkId::RollingHashPart rollingHash,
ChunkInfoInterface & chunkInfo, uint32_t *size )
2013-07-18 21:33:25 +04:00
{
HashTable::iterator i = hashTable.find( rollingHash );
ChunkId const * id = 0;
if ( i != hashTable.end() )
{
if ( !id )
id = &chunkInfo.getChunkId();
// Check the chains
for ( Chain * chain = i->second; chain; chain = chain->next )
{
2013-07-18 21:33:25 +04:00
if ( chain->equalsTo( *id ) )
{
if ( size )
*size = chain->size;
2013-07-18 21:33:25 +04:00
return chain->bundleId;
}
}
2013-07-18 21:33:25 +04:00
}
return NULL;
}
namespace {
struct ChunkInfoImmediate: public ChunkIndex::ChunkInfoInterface
{
ChunkId const & id;
ChunkInfoImmediate( ChunkId const & id ): id( id ) {}
virtual ChunkId const & getChunkId()
{ return id; }
};
}
Bundle::Id const * ChunkIndex::findChunk( ChunkId const & chunkId, uint32_t *size )
2013-07-18 21:33:25 +04:00
{
ChunkInfoImmediate chunkInfo( chunkId );
return findChunk( chunkId.rollingHash, chunkInfo, size );
2013-07-18 21:33:25 +04:00
}
ChunkIndex::Chain * ChunkIndex::registerNewChunkId( ChunkId const & id, uint32_t size,
2013-07-18 21:33:25 +04:00
Bundle::Id const * bundleId )
{
HashTable::iterator i =
hashTable.insert( std::make_pair( id.rollingHash, ( Chain *) 0 ) ).first;
Chain ** chain = &i->second;
2013-07-18 21:33:25 +04:00
// Check the chains
for ( ; *chain; chain = &( ( *chain )->next ) )
if ( ( *chain )->equalsTo( id ) )
2013-07-18 21:33:25 +04:00
{
return NULL; // The entry existed already
}
// Create a new chain
*chain = new ( storage.allocateObjects< Chain >( 1 ) ) Chain( id, size, bundleId );
2013-07-18 21:33:25 +04:00
return *chain;
2013-07-18 21:33:25 +04:00
}
bool ChunkIndex::addChunk( ChunkId const & id, uint32_t size, Bundle::Id const & bundleId )
2013-07-18 21:33:25 +04:00
{
if ( Chain * chain = registerNewChunkId( id, size, NULL ) )
2013-07-18 21:33:25 +04:00
{
// Allocate or re-use bundle id
if ( !lastBundleId || *lastBundleId != bundleId )
{
Bundle::Id * allocatedId = storage.allocateObjects< Bundle::Id >( 1 );
memcpy( allocatedId, &bundleId, Bundle::IdSize );
lastBundleId = allocatedId;
}
chain->bundleId = lastBundleId;
return true;
}
else
return false;
}