2013-10-03 05:51:24 +04:00
|
|
|
// Copyright (c) 2012-2013 Konstantin Isakov <ikm@zbackup.org>
|
|
|
|
// Part of ZBackup. Licensed under GNU GPLv2 or later
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
#include <string>
|
|
|
|
|
2013-10-03 05:51:24 +04:00
|
|
|
#include "compression.hh"
|
|
|
|
#include "check.hh"
|
|
|
|
|
|
|
|
EnDecoder::EnDecoder() { }
|
|
|
|
EnDecoder::EnDecoder(const EnDecoder&) { }
|
2013-10-08 03:17:15 +04:00
|
|
|
EnDecoder::~EnDecoder() { }
|
2013-10-03 05:51:24 +04:00
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
Compression::~Compression() { }
|
2013-10-03 05:51:24 +04:00
|
|
|
|
|
|
|
|
|
|
|
// LZMA
|
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
#include <lzma.h>
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZMAEnDecoder : public EnDecoder
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
protected:
|
|
|
|
static lzma_stream init_value;
|
|
|
|
lzma_stream strm;
|
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
LZMAEnDecoder()
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
strm = init_value;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
void setInput( const void* data, size_t size )
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
strm.next_in = (const uint8_t *) data;
|
|
|
|
strm.avail_in = size;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
void setOutput( void* data, size_t size )
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
strm.next_out = (uint8_t *) data;
|
|
|
|
strm.avail_out = size;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t getAvailableInput()
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
return strm.avail_in;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t getAvailableOutput()
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
return strm.avail_out;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool process( bool finish )
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
lzma_ret ret = lzma_code( &strm, ( finish ? LZMA_FINISH : LZMA_RUN ) );
|
|
|
|
|
|
|
|
CHECK( ret == LZMA_OK || ret == LZMA_STREAM_END, "lzma_code error: %d", (int) ret );
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
return ( ret == LZMA_STREAM_END );
|
2013-10-03 05:51:24 +04:00
|
|
|
}
|
|
|
|
};
|
|
|
|
lzma_stream LZMAEnDecoder::init_value = LZMA_STREAM_INIT;
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZMAEncoder : public LZMAEnDecoder
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
LZMAEncoder()
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
uint32_t preset = 6; // TODO: make this customizable, although 6 seems to be
|
|
|
|
// the best option
|
|
|
|
lzma_ret ret = lzma_easy_encoder( &strm, preset, LZMA_CHECK_CRC64 );
|
|
|
|
CHECK( ret == LZMA_OK, "lzma_easy_encoder error: %d", (int) ret );
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZMADecoder : public LZMAEnDecoder
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
LZMADecoder()
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
lzma_ret ret = lzma_stream_decoder( &strm, UINT64_MAX, 0 );
|
|
|
|
CHECK( ret == LZMA_OK,"lzma_stream_decoder error: %d", (int) ret );
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZMACompression : public Compression
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
sptr<EnDecoder> createEncoder() const
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
return new LZMAEncoder();
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
sptr<EnDecoder> createDecoder() const
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
return new LZMADecoder();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string getName() const { return "lzma"; }
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
// LZO
|
|
|
|
|
|
|
|
// liblzo implements a lot of algorithms "for unlimited backward compatibility"
|
|
|
|
|
|
|
|
// The web site says:
|
|
|
|
// "My experiments have shown that LZO1B is good with a large blocksize
|
|
|
|
// or with very redundant data, LZO1F is good with a small blocksize or
|
|
|
|
// with binary data and that LZO1X is often the best choice of all.
|
|
|
|
// LZO1Y and LZO1Z are almost identical to LZO1X - they can achieve a
|
|
|
|
// better compression ratio on some files.
|
|
|
|
// Beware, your mileage may vary."
|
|
|
|
// => I'm using LZO1X, as suggested
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
// Unfortunately, liblzo always works with the whole data, so it doesn't support
|
|
|
|
// the streaming approach that most other libraries use. This means that we have
|
|
|
|
// to use a big buffer for the data. The class NoStreamEnDecoder implements this
|
|
|
|
// so we can use it, if there is another library like liblzo.
|
|
|
|
|
|
|
|
// Collect all data and process it in one pass
|
2013-10-08 03:17:15 +04:00
|
|
|
class NoStreamEnDecoder : public EnDecoder
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
std::string acc_data_in, acc_data_out;
|
|
|
|
const char* data_in;
|
|
|
|
char* data_out;
|
|
|
|
size_t avail_in, avail_out;
|
|
|
|
bool processed;
|
|
|
|
size_t pos_in_acc_data_out;
|
|
|
|
protected:
|
|
|
|
// you must implement these:
|
|
|
|
|
|
|
|
// Should we try with the existing output buffer which has avail_out
|
|
|
|
// bytes of free space? If you know that this will fail, return false.
|
|
|
|
// You may peek into data_in which contains the complete compressed data.
|
|
|
|
virtual bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) =0;
|
|
|
|
|
|
|
|
// We will allocate a buffer for the output data. How big should it be?
|
|
|
|
// You may peek into data_in which contains the complete compressed data.
|
|
|
|
virtual size_t suggestOutputSize( const char* data_in, size_t avail_in ) =0;
|
|
|
|
|
2013-10-03 11:32:58 +04:00
|
|
|
// Is this input complete?
|
|
|
|
// An encoder should return false.
|
|
|
|
virtual bool isCompleteInput( const char* data_in, size_t avail_in ) =0;
|
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
// Process the data in data_in and put the result into data_out. You musn't
|
|
|
|
// write more than avail_out bytes! If the output buffer is big enough,
|
|
|
|
// process the data and store the output size in output_size. If the output
|
|
|
|
// buffer is too small, return false and we will give you a bigger one. If
|
|
|
|
// any other error occurrs, abort the program. We don't have any better
|
|
|
|
// error handling. Sorry. Do NOT return false for errors that won't be
|
|
|
|
// remedied by a bigger buffer!
|
|
|
|
virtual bool do_process( const char* data_in, size_t avail_in,
|
|
|
|
char* data_out, size_t avail_out, size_t& output_size ) =0;
|
2013-10-03 11:32:58 +04:00
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
void setUnusedInput( size_t unused )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
this->data_in += avail_in - unused;
|
|
|
|
this->avail_in = unused;
|
|
|
|
}
|
2013-10-03 07:59:43 +04:00
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
NoStreamEnDecoder()
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
data_in = data_out = NULL;
|
|
|
|
avail_in = avail_out = pos_in_acc_data_out = 0;
|
|
|
|
processed = false;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
void setInput( const void* data, size_t size )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
data_in = (const char *) data;
|
|
|
|
avail_in = size;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
void setOutput( void* data, size_t size )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
data_out = (char *) data;
|
|
|
|
avail_out = size;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t getAvailableInput()
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
return avail_in;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t getAvailableOutput()
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
return avail_out;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool process( bool finish )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// try to process the input, if we haven't done it, yet
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( !processed )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// data has not been encoded
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( acc_data_in.empty() )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// this is the first piece of data
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( finish || isCompleteInput( data_in, avail_in ) )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// special case: all the data has been passed at once
|
|
|
|
// -> process it without using acc_data_in
|
|
|
|
process_finish( data_in, avail_in );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we didn't process the data, put it into accumulator
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( !processed )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// accumulate data in acc_data_in
|
|
|
|
acc_data_in.append( data_in, avail_in );
|
|
|
|
|
|
|
|
// If this was the last bit of data, we process it, now.
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( finish || isCompleteInput( acc_data_in.data(), acc_data_in.size() ) )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
process_finish( acc_data_in.data(), acc_data_in.size() );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the input has been processed, try to copy some of it to the output buffer.
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( processed )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// data has been encoded or decoded, remaining output is in acc_data_out
|
|
|
|
// -> copy to output
|
2013-10-08 03:17:15 +04:00
|
|
|
if (avail_out > 0 && acc_data_out.size() - pos_in_acc_data_out > 0)
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
size_t sz = avail_out;
|
|
|
|
if ( sz > acc_data_out.size() - pos_in_acc_data_out )
|
|
|
|
sz = acc_data_out.size() - pos_in_acc_data_out;
|
|
|
|
|
2013-10-03 11:32:58 +04:00
|
|
|
memcpy( data_out, acc_data_out.data() + pos_in_acc_data_out, sz );
|
2013-10-03 07:59:43 +04:00
|
|
|
data_out += sz;
|
|
|
|
avail_out -= sz;
|
|
|
|
pos_in_acc_data_out += sz;
|
|
|
|
}
|
|
|
|
|
2013-10-03 11:32:58 +04:00
|
|
|
// no more data left? -> return true
|
|
|
|
return ( acc_data_out.size() - pos_in_acc_data_out == 0 );
|
|
|
|
} else {
|
|
|
|
// not yet processed, so we cannot be done
|
|
|
|
return false;
|
2013-10-03 07:59:43 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2013-10-08 03:17:15 +04:00
|
|
|
void process_finish( const char* data_in, size_t avail_in )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// should we try with the existing output buffer?
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( shouldTryWith( data_in, avail_in, avail_out ) )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
size_t output_size;
|
2013-10-08 03:17:15 +04:00
|
|
|
if ( do_process( data_in, avail_in, data_out, avail_out, output_size ) )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// it worked :-)
|
|
|
|
processed = true;
|
2013-10-03 11:32:58 +04:00
|
|
|
avail_out -= output_size;
|
2013-10-03 07:59:43 +04:00
|
|
|
return ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// we use our own buffer
|
|
|
|
size_t buffer_size = suggestOutputSize( data_in, avail_in );
|
|
|
|
do {
|
|
|
|
acc_data_out.resize(buffer_size);
|
|
|
|
|
|
|
|
size_t output_size;
|
|
|
|
//TODO doc says we mustn't modify the pointer returned by data()...
|
2013-10-03 11:32:58 +04:00
|
|
|
if ( do_process( data_in, avail_in,
|
2013-10-08 03:17:15 +04:00
|
|
|
(char*) acc_data_out.data(), buffer_size, output_size ) )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// buffer is big enough
|
|
|
|
acc_data_out.resize( output_size );
|
2013-10-03 11:32:58 +04:00
|
|
|
processed = true;
|
2013-10-03 07:59:43 +04:00
|
|
|
return ;
|
|
|
|
}
|
|
|
|
|
|
|
|
// try a bigger one
|
|
|
|
buffer_size *= 2;
|
|
|
|
} while (true);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#include <endian.h>
|
|
|
|
|
|
|
|
// like NoStreamEnDecoder, but also adds the uncompressed size before the stream
|
|
|
|
//NOTE You should make sure that the compression function doesn't overwrite any
|
|
|
|
// memory, if this information is corrupted! This could be exploited by a
|
|
|
|
// malicious person and there is nothing I can do about it. I could check for
|
|
|
|
// an overflow, but when control gets back to this class, it is already too
|
|
|
|
// late, as one 'ret' instruction is enough to do harm.
|
2013-10-08 03:17:15 +04:00
|
|
|
class NoStreamAndUnknownSizeDecoder : public NoStreamEnDecoder
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
protected:
|
|
|
|
// You implement this one:
|
|
|
|
// If you don't know the real decoded size, don't change output_size.
|
|
|
|
virtual bool do_process_no_size( const char* data_in, size_t avail_in,
|
|
|
|
char* data_out, size_t avail_out, size_t& output_size ) =0;
|
|
|
|
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
return suggestOutputSize( data_in, avail_in ) <= avail_out;
|
|
|
|
}
|
|
|
|
|
2013-10-03 11:32:58 +04:00
|
|
|
// Is this input complete?
|
2013-10-08 03:17:15 +04:00
|
|
|
bool isCompleteInput( const char* data_in, size_t avail_in )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
if ( avail_in < 2*sizeof(uint64_t) )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
data_in += sizeof(uint64_t);
|
|
|
|
|
|
|
|
size_t input_size = le32toh( *(uint32_t*) data_in );
|
|
|
|
|
|
|
|
return ( avail_in >= input_size + 2*sizeof(uint64_t) );
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t suggestOutputSize( const char* data_in, size_t avail_in )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
CHECK( avail_in >= sizeof(uint64_t), "not enough input data" );
|
|
|
|
// We're not using size_t because we need a type that has the same size on all
|
|
|
|
// architectures. A 32-bit host won't be able to open files with more than
|
|
|
|
// 4GB (actually much less), so 4 byte are enough. Even a 64-bit host would
|
|
|
|
// have some trouble with allocating 8GB of RAM just for our buffers ;-)
|
|
|
|
//NOTE If your compiler doesn't accept this cast, your size_t is smaller than
|
|
|
|
// uint32_t. In that case, you are in trouble...
|
|
|
|
size_t output_size = le32toh( *(uint32_t*) data_in );
|
|
|
|
return output_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool do_process( const char* data_in, size_t avail_in,
|
2013-10-08 03:17:15 +04:00
|
|
|
char* data_out, size_t avail_out, size_t& output_size )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
if ( avail_in < 2*sizeof( uint64_t ) )
|
2013-10-03 07:59:43 +04:00
|
|
|
return false;
|
|
|
|
|
|
|
|
//NOTE We skip 8 bytes. If we later decide to drop compatibility with 32-bit
|
|
|
|
// hosts, we can save a 64-bit size. Well, that will be much later, when
|
|
|
|
// we can easily hold two copies of a 4GB file in main memory :-D
|
2013-10-03 11:32:58 +04:00
|
|
|
|
|
|
|
size_t needed_output_size = le32toh( *(uint32_t*) data_in );
|
|
|
|
data_in += sizeof(uint64_t);
|
|
|
|
size_t input_size = le32toh( *(uint32_t*) data_in );
|
|
|
|
data_in += sizeof(uint64_t);
|
|
|
|
if ( output_size < needed_output_size )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
avail_in -= 2*sizeof( uint64_t );
|
|
|
|
|
|
|
|
// We might not need all of our input data.
|
|
|
|
setUnusedInput( avail_in - input_size );
|
|
|
|
avail_in = input_size;
|
2013-10-03 07:59:43 +04:00
|
|
|
|
|
|
|
size_t reported_output_size = needed_output_size;
|
|
|
|
if ( !do_process_no_size( data_in, avail_in, data_out, avail_out, reported_output_size ) )
|
|
|
|
return false;
|
|
|
|
|
2013-10-03 11:32:58 +04:00
|
|
|
CHECK( reported_output_size == needed_output_size,
|
|
|
|
"Size of decoded data is different than expected" );
|
2013-10-03 07:59:43 +04:00
|
|
|
|
|
|
|
output_size = needed_output_size;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// encoder for NoStreamAndUnknownSizeDecoder
|
2013-10-08 03:17:15 +04:00
|
|
|
class NoStreamAndUnknownSizeEncoder : public NoStreamEnDecoder
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
protected:
|
|
|
|
// You implement this one:
|
|
|
|
virtual bool do_process_no_size( const char* data_in, size_t avail_in,
|
|
|
|
char* data_out, size_t avail_out, size_t& output_size ) =0;
|
|
|
|
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool shouldTryWith( const char*, size_t, size_t avail_out )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// If the compression doesn't use any spaces...
|
|
|
|
return avail_out > sizeof( uint64_t );
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool isCompleteInput( const char* data_in, size_t avail_in )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// We cannot know whether the user wants to send more data.
|
|
|
|
// -> return false; user must use finish=true to signal end of data
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t getOverhead()
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
return 2*sizeof( uint64_t );
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t suggestOutputSize( const char*, size_t avail_in )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// We assume that the compression won't make the data any bigger.
|
2013-10-03 11:32:58 +04:00
|
|
|
return avail_in + getOverhead();
|
2013-10-03 07:59:43 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
bool do_process( const char* data_in, size_t avail_in,
|
2013-10-08 03:17:15 +04:00
|
|
|
char* data_out, size_t avail_out, size_t& output_size )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
CHECK( avail_in <= UINT32_MAX,
|
|
|
|
"You want to compress more than 4GB of data?! Sorry, we don't support that, yet." );
|
|
|
|
|
|
|
|
memcpy(data_out, "ABCDEFGHIJKLMNOP", 16);
|
2013-10-03 07:59:43 +04:00
|
|
|
|
|
|
|
// store size
|
2013-10-03 11:32:58 +04:00
|
|
|
*(uint32_t*)data_out = htole32( avail_in );
|
|
|
|
uint32_t* compressed_size = (uint32_t*) ( data_out + sizeof( uint64_t ) );
|
2013-10-03 07:59:43 +04:00
|
|
|
|
|
|
|
// compressed data goes after the size
|
|
|
|
// We skip more than we actually use; see NoStreamAndUnknownSizeDecoder::do_process(...).
|
2013-10-03 11:32:58 +04:00
|
|
|
data_out += getOverhead();
|
|
|
|
avail_out -= getOverhead();
|
2013-10-03 07:59:43 +04:00
|
|
|
|
|
|
|
if ( !do_process_no_size( data_in, avail_in, data_out, avail_out, output_size ) )
|
|
|
|
return false;
|
|
|
|
|
2013-10-03 11:32:58 +04:00
|
|
|
CHECK( output_size <= UINT32_MAX,
|
|
|
|
"The compressed data is more than 4GB?! Sorry, we don't support that, yet." );
|
|
|
|
*compressed_size = htole32( (uint32_t) output_size );
|
|
|
|
|
|
|
|
output_size += getOverhead();
|
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_LIBLZO
|
|
|
|
|
|
|
|
#include <lzo/lzo1x.h>
|
|
|
|
|
|
|
|
// finally, we can implement lzo
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZO1X_1_Decoder : public NoStreamAndUnknownSizeDecoder
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
protected:
|
|
|
|
bool do_process_no_size( const char* data_in, size_t avail_in,
|
2013-10-08 03:17:15 +04:00
|
|
|
char* data_out, size_t avail_out, size_t& output_size )
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
// same argument is used for available output size and size of decompressed data
|
|
|
|
output_size = avail_out;
|
|
|
|
int ret = lzo1x_decompress_safe( (const lzo_bytep) data_in, avail_in,
|
|
|
|
(lzo_bytep) data_out, &output_size, NULL );
|
2013-10-03 11:32:58 +04:00
|
|
|
|
|
|
|
if ( ret == LZO_E_OUTPUT_OVERRUN )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CHECK( ret >= LZO_E_OK, "lzo1x_decompress_safe failed (code %d)", ret );
|
|
|
|
|
|
|
|
return true;
|
2013-10-03 07:59:43 +04:00
|
|
|
}
|
|
|
|
};
|
|
|
|
class LZO1X_1_Compression;
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZO1X_1_Encoder : public NoStreamAndUnknownSizeEncoder
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
const LZO1X_1_Compression* compression;
|
2013-10-03 11:32:58 +04:00
|
|
|
static size_t calcMaxCompressedSize(size_t avail_in);
|
2013-10-03 07:59:43 +04:00
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
LZO1X_1_Encoder(const LZO1X_1_Compression* compression)
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
this->compression = compression;
|
|
|
|
}
|
2013-10-03 11:32:58 +04:00
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
protected:
|
|
|
|
bool do_process_no_size( const char* data_in, size_t avail_in,
|
|
|
|
char* data_out, size_t avail_out, size_t& output_size );
|
2013-10-03 11:32:58 +04:00
|
|
|
bool shouldTryWith( const char*, size_t, size_t avail_out );
|
|
|
|
size_t suggestOutputSize( const char*, size_t avail_in );
|
2013-10-03 07:59:43 +04:00
|
|
|
};
|
2013-10-08 03:17:15 +04:00
|
|
|
class LZO1X_1_Compression : public Compression
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
static bool initialized;
|
2013-10-08 03:17:15 +04:00
|
|
|
static void init()
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
//TODO This is not thread-safe. Does it have to be?
|
2013-10-08 03:17:15 +04:00
|
|
|
if (!initialized)
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
int ret = lzo_init();
|
|
|
|
CHECK( ret == LZO_E_OK, "lzo_init failed (%d)", ret );
|
|
|
|
initialized = true;
|
|
|
|
}
|
|
|
|
}
|
2013-10-03 07:59:43 +04:00
|
|
|
public:
|
2013-10-08 03:17:15 +04:00
|
|
|
sptr<EnDecoder> createEncoder() const
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
init();
|
2013-10-03 07:59:43 +04:00
|
|
|
return new LZO1X_1_Encoder(this);
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
sptr<EnDecoder> createDecoder() const
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
init();
|
2013-10-03 07:59:43 +04:00
|
|
|
return new LZO1X_1_Decoder();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string getName() const { return "lzo1x_1"; }
|
|
|
|
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
lzo_voidp getWorkmem( size_t size ) const
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
return new char[size];
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
void giveBackWorkmem( lzo_voidp wrkmem ) const
|
|
|
|
{
|
2013-10-03 07:59:43 +04:00
|
|
|
//TODO I think we should keep the memory around and reuse it. After all
|
|
|
|
// it is only a few kilobytes and we will need it a lot. However, I
|
|
|
|
// won't risk anything here because I don't know whether this will be
|
|
|
|
// called by more than one thread.
|
|
|
|
delete[] (char*)wrkmem;
|
|
|
|
}
|
|
|
|
};
|
2013-10-03 11:32:58 +04:00
|
|
|
|
|
|
|
bool LZO1X_1_Compression::initialized = false;
|
|
|
|
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t LZO1X_1_Encoder::calcMaxCompressedSize( size_t avail_in )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// It seems that lzo1x_1_compress does NOT check whether the buffer is big enough.
|
|
|
|
// The documentation refers to example/simple.c which says:
|
|
|
|
// "Because the input block may be incompressible, we must provide a little more
|
|
|
|
// output space in case that compression is not possible."
|
|
|
|
// -> We use the same formula.
|
|
|
|
return (avail_in + avail_in / 16 + 64 + 3);
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool LZO1X_1_Encoder::shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
return avail_out >= suggestOutputSize( data_in, avail_in );
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
size_t LZO1X_1_Encoder::suggestOutputSize( const char*, size_t avail_in )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// It seems that lzo1x_1_compress does NOT check whether the buffer is big enough.
|
|
|
|
// The documentation refers to example/simple.c which says:
|
|
|
|
// "Because the input block may be incompressible, we must provide a little more
|
|
|
|
// output space in case that compression is not possible."
|
|
|
|
// -> We use the same formula.
|
|
|
|
return calcMaxCompressedSize( avail_in ) + getOverhead();
|
|
|
|
}
|
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
bool LZO1X_1_Encoder::do_process_no_size( const char* data_in, size_t avail_in,
|
2013-10-08 03:17:15 +04:00
|
|
|
char* data_out, size_t avail_out, size_t& output_size )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// It seems that lzo1x_1_compress does NOT check whether the buffer is big enough.
|
|
|
|
// Therefore, we won't try it unless we are sure that the buffer is big enough.
|
|
|
|
if ( avail_out < calcMaxCompressedSize( avail_in ) )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// same argument is used for available output size (haha, see above)
|
|
|
|
// and size of decompressed data
|
2013-10-03 07:59:43 +04:00
|
|
|
output_size = avail_out;
|
|
|
|
|
|
|
|
lzo_voidp wrkmem = compression->getWorkmem(LZO1X_1_MEM_COMPRESS);
|
2013-10-03 12:24:47 +04:00
|
|
|
int ret = lzo1x_1_compress( (const lzo_bytep) data_in, avail_in,
|
2013-10-03 07:59:43 +04:00
|
|
|
(lzo_bytep) data_out, &output_size, wrkmem );
|
|
|
|
compression->giveBackWorkmem(wrkmem);
|
2013-10-03 11:32:58 +04:00
|
|
|
|
|
|
|
if ( ret == LZO_E_OUTPUT_OVERRUN )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CHECK( ret >= LZO_E_OK, "lzo1x_1_compress failed (code %d)", ret );
|
|
|
|
|
|
|
|
return true;
|
2013-10-03 07:59:43 +04:00
|
|
|
}
|
2013-10-03 05:51:24 +04:00
|
|
|
|
2013-10-03 07:59:43 +04:00
|
|
|
#endif // HAVE_LIBLZO
|
2013-10-03 05:51:24 +04:00
|
|
|
|
|
|
|
|
|
|
|
// register them
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
static const_sptr<Compression> const compressions[] = {
|
2013-10-03 05:51:24 +04:00
|
|
|
new LZMACompression(),
|
2013-10-03 07:59:43 +04:00
|
|
|
# ifdef HAVE_LIBLZO
|
|
|
|
new LZO1X_1_Compression(),
|
|
|
|
# endif
|
|
|
|
// NULL entry marks end of list. Don't remove it!
|
2013-10-03 05:51:24 +04:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
const_sptr<Compression> Compression::default_compression = compressions[0];
|
2013-10-03 05:51:24 +04:00
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
const_sptr<Compression> Compression::findCompression( const std::string& name, bool optional )
|
|
|
|
{
|
|
|
|
for ( const const_sptr<Compression>* c = compressions+0; *c; ++c )
|
|
|
|
{
|
|
|
|
if ( (*c)->getName() == name )
|
|
|
|
{
|
2013-10-03 05:51:24 +04:00
|
|
|
return (*c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK( !optional, "Couldn't find compression method '%s'", name.c_str() );
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-10-03 11:32:58 +04:00
|
|
|
|
|
|
|
// iterator over compressions
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
Compression::iterator::iterator( const const_sptr<Compression>* ptr ) : ptr( ptr) { }
|
|
|
|
Compression::iterator::iterator( const iterator& it ) : ptr(it.ptr) { }
|
2013-10-03 11:32:58 +04:00
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
Compression::iterator& Compression::iterator::operator =( const iterator& it )
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
this->ptr = it.ptr;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool Compression::iterator::operator ==( const iterator& other ) const
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
// special case: one has ptr==NULL (end iterator returned by end()) and the
|
|
|
|
// other has *ptr==NULL (end iterator obtained by calling ++)
|
|
|
|
if ( !ptr && ( !other.ptr || !*other.ptr ) )
|
|
|
|
return true;
|
|
|
|
else if ( !other.ptr && ( !ptr || !*ptr ) )
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return (ptr == other.ptr);
|
|
|
|
}
|
2013-10-08 03:17:15 +04:00
|
|
|
bool Compression::iterator::operator !=( const iterator& other ) const
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
return !( *this == other );
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
bool Compression::iterator::at_end() const
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
return !ptr || !*ptr;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
Compression::iterator& Compression::iterator::operator ++()
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
CHECK( ptr && *ptr, "Cannot increment the end iterator" );
|
|
|
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
const_sptr<Compression> Compression::iterator::operator *()
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
CHECK( ptr && *ptr, "Cannot dereference the end iterator" );
|
|
|
|
|
|
|
|
return *ptr;
|
|
|
|
}
|
|
|
|
|
2013-10-08 03:17:15 +04:00
|
|
|
Compression::iterator Compression::begin()
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
return iterator(compressions);
|
|
|
|
}
|
2013-10-08 03:17:15 +04:00
|
|
|
Compression::iterator Compression::end()
|
|
|
|
{
|
2013-10-03 11:32:58 +04:00
|
|
|
return iterator(NULL);
|
|
|
|
}
|