mirror of https://github.com/vitalif/zbackup
Support for lzo; only internal and not tested
parent
6656866687
commit
272ff36f8b
|
@ -34,6 +34,15 @@ find_package( Threads REQUIRED )
|
|||
find_package( LibLZMA REQUIRED )
|
||||
include_directories( ${LIBLZMA_INCLUDE_DIRS} )
|
||||
|
||||
find_package( LibLZO COMPONENTS LIBLZO_HAS_LZO1X_DECOMPRESS_SAFE LIBLZO_HAS_LZO1X_1_COMPRESS )
|
||||
if (LIBLZO_FOUND)
|
||||
ADD_DEFINITIONS(-DHAVE_LIBLZO)
|
||||
include_directories( ${LIBLZO_INCLUDE_DIRS} )
|
||||
else (LIBLZO_FOUND)
|
||||
set(LIBLZO_LIBRARIES)
|
||||
endif (LIBLZO_FOUND)
|
||||
|
||||
|
||||
file( GLOB sourceFiles "*.cc" )
|
||||
add_executable( zbackup ${sourceFiles} ${protoSrcs} ${protoHdrs} )
|
||||
|
||||
|
@ -43,6 +52,7 @@ target_link_libraries( zbackup
|
|||
${CMAKE_THREAD_LIBS_INIT}
|
||||
${ZLIB_LIBRARIES}
|
||||
${LIBLZMA_LIBRARIES}
|
||||
${LIBLZO_LIBRARIES}
|
||||
)
|
||||
|
||||
install( TARGETS zbackup DESTINATION bin )
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
# - Find LibLZO
|
||||
# Find LibLZO headers and library
|
||||
#
|
||||
# LIBLZO_FOUND - True if liblzo is found.
|
||||
# LIBLZO_INCLUDE_DIRS - Directory where liblzo headers are located.
|
||||
# LIBLZO_LIBRARIES - Lzma libraries to link against.
|
||||
# LIBLZO_HAS_AUTO_DECODER - True if lzo_auto_decoder() is found (required).
|
||||
# LIBLZO_HAS_EASY_ENCODER - True if lzo_easy_encoder() is found (required).
|
||||
# LIBLZO_HAS_LZO_PRESET - True if lzo_lzo_preset() is found (required).
|
||||
# LIBLZO_VERSION_MAJOR - The major version of lzo
|
||||
# LIBLZO_VERSION_MINOR - The minor version of lzo
|
||||
# LIBLZO_VERSION_PATCH - The patch version of lzo
|
||||
# LIBLZO_VERSION_STRING - version number as a string (ex: "5.0.3")
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2008 Per Øyvind Karlsen <peroyvind@mandriva.org>
|
||||
# Copyright 2009 Alexander Neundorf <neundorf@kde.org>
|
||||
# Copyright 2009 Helio Chissini de Castro <helio@kde.org>
|
||||
# Copyright 2012 Mario Bensi <mbensi@ipsquad.net>
|
||||
# Adapted for liblzo (instead of liblzma) by Benjamin Koch <bbbsnowball@gmail.com>
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License"):
|
||||
#
|
||||
# CMake - Cross Platform Makefile Generator
|
||||
# Copyright 2000-2011 Kitware, Inc., Insight Software Consortium
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the names of Kitware, Inc., the Insight Software Consortium,
|
||||
# nor the names of their contributors may be used to endorse or promote
|
||||
# products derived from this software without specific prior written
|
||||
# permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# ------------------------------------------------------------------------------
|
||||
#
|
||||
# The above copyright and license notice applies to distributions of
|
||||
# CMake in source and binary form. Some source files contain additional
|
||||
# notices of original copyright by their contributors; see each source
|
||||
# for details. Third-party software packages supplied with CMake under
|
||||
# compatible licenses provide their own copyright notices documented in
|
||||
# corresponding subdirectories.
|
||||
#
|
||||
# ------------------------------------------------------------------------------
|
||||
#
|
||||
# CMake was initially developed by Kitware with the following sponsorship:
|
||||
#
|
||||
# * National Library of Medicine at the National Institutes of Health
|
||||
# as part of the Insight Segmentation and Registration Toolkit (ITK).
|
||||
#
|
||||
# * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
|
||||
# Visualization Initiative.
|
||||
#
|
||||
# * National Alliance for Medical Image Computing (NAMIC) is funded by the
|
||||
# National Institutes of Health through the NIH Roadmap for Medical Research,
|
||||
# Grant U54 EB005149.
|
||||
#
|
||||
# * Kitware, Inc.
|
||||
#=============================================================================
|
||||
|
||||
|
||||
find_path(LIBLZO_INCLUDE_DIR lzo/lzo1x.h )
|
||||
find_library(LIBLZO_LIBRARY lzo2)
|
||||
|
||||
if(LIBLZO_INCLUDE_DIR AND EXISTS "${LIBLZO_INCLUDE_DIR}/lzo/version.h")
|
||||
file(STRINGS "${LIBLZO_INCLUDE_DIR}/lzo/version.h" LIBLZO_HEADER_CONTENTS REGEX "#define LZO_VERSION_[A-Z]+ [0-9]+")
|
||||
|
||||
string(REGEX REPLACE ".*#define LZO_VERSION_MAJOR ([0-9]+).*" "\\1" LIBLZO_VERSION_MAJOR "${LIBLZO_HEADER_CONTENTS}")
|
||||
string(REGEX REPLACE ".*#define LZO_VERSION_MINOR ([0-9]+).*" "\\1" LIBLZO_VERSION_MINOR "${LIBLZO_HEADER_CONTENTS}")
|
||||
string(REGEX REPLACE ".*#define LZO_VERSION_PATCH ([0-9]+).*" "\\1" LIBLZO_VERSION_PATCH "${LIBLZO_HEADER_CONTENTS}")
|
||||
|
||||
set(LIBLZO_VERSION_STRING "${LIBLZO_VERSION_MAJOR}.${LIBLZO_VERSION_MINOR}.${LIBLZO_VERSION_PATCH}")
|
||||
unset(LIBLZO_HEADER_CONTENTS)
|
||||
endif()
|
||||
|
||||
# We're just using two functions.
|
||||
if (LIBLZO_LIBRARY)
|
||||
include(CheckLibraryExists)
|
||||
CHECK_LIBRARY_EXISTS(${LIBLZO_LIBRARY} lzo1x_decompress_safe "" LIBLZO_HAS_LZO1X_DECOMPRESS_SAFE)
|
||||
CHECK_LIBRARY_EXISTS(${LIBLZO_LIBRARY} lzo1x_1_compress "" LIBLZO_HAS_LZO1X_1_COMPRESS)
|
||||
endif ()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibLZO DEFAULT_MSG LIBLZO_INCLUDE_DIR
|
||||
LIBLZO_LIBRARY
|
||||
LIBLZO_HAS_LZO1X_DECOMPRESS_SAFE
|
||||
LIBLZO_HAS_LZO1X_1_COMPRESS
|
||||
)
|
||||
|
||||
if (LIBLZO_FOUND)
|
||||
set(LIBLZO_LIBRARIES ${LIBLZO_LIBRARY})
|
||||
set(LIBLZO_INCLUDE_DIRS ${LIBLZO_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
||||
mark_as_advanced( LIBLZO_INCLUDE_DIR LIBLZO_LIBRARY )
|
312
compression.cc
312
compression.cc
|
@ -1,8 +1,6 @@
|
|||
// Copyright (c) 2012-2013 Konstantin Isakov <ikm@zbackup.org>
|
||||
// Part of ZBackup. Licensed under GNU GPLv2 or later
|
||||
|
||||
#include <lzma.h>
|
||||
|
||||
#include "compression.hh"
|
||||
#include "check.hh"
|
||||
|
||||
|
@ -15,6 +13,8 @@ Compression::~Compression() {}
|
|||
|
||||
// LZMA
|
||||
|
||||
#include <lzma.h>
|
||||
|
||||
class LZMAEnDecoder : public EnDecoder {
|
||||
protected:
|
||||
static lzma_stream init_value;
|
||||
|
@ -84,15 +84,319 @@ public:
|
|||
};
|
||||
|
||||
|
||||
// LZOP
|
||||
// LZO
|
||||
|
||||
//TODO
|
||||
// liblzo implements a lot of algorithms "for unlimited backward compatibility"
|
||||
|
||||
// The web site says:
|
||||
// "My experiments have shown that LZO1B is good with a large blocksize
|
||||
// or with very redundant data, LZO1F is good with a small blocksize or
|
||||
// with binary data and that LZO1X is often the best choice of all.
|
||||
// LZO1Y and LZO1Z are almost identical to LZO1X - they can achieve a
|
||||
// better compression ratio on some files.
|
||||
// Beware, your mileage may vary."
|
||||
// => I'm using LZO1X, as suggested
|
||||
|
||||
#include <string.h>
|
||||
|
||||
// Unfortunately, liblzo always works with the whole data, so it doesn't support
|
||||
// the streaming approach that most other libraries use. This means that we have
|
||||
// to use a big buffer for the data. The class NoStreamEnDecoder implements this
|
||||
// so we can use it, if there is another library like liblzo.
|
||||
|
||||
// Collect all data and process it in one pass
|
||||
class NoStreamEnDecoder : public EnDecoder {
|
||||
std::string acc_data_in, acc_data_out;
|
||||
const char* data_in;
|
||||
char* data_out;
|
||||
size_t avail_in, avail_out;
|
||||
bool processed;
|
||||
size_t pos_in_acc_data_out;
|
||||
protected:
|
||||
// you must implement these:
|
||||
|
||||
// Should we try with the existing output buffer which has avail_out
|
||||
// bytes of free space? If you know that this will fail, return false.
|
||||
// You may peek into data_in which contains the complete compressed data.
|
||||
virtual bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) =0;
|
||||
|
||||
// We will allocate a buffer for the output data. How big should it be?
|
||||
// You may peek into data_in which contains the complete compressed data.
|
||||
virtual size_t suggestOutputSize( const char* data_in, size_t avail_in ) =0;
|
||||
|
||||
// Process the data in data_in and put the result into data_out. You musn't
|
||||
// write more than avail_out bytes! If the output buffer is big enough,
|
||||
// process the data and store the output size in output_size. If the output
|
||||
// buffer is too small, return false and we will give you a bigger one. If
|
||||
// any other error occurrs, abort the program. We don't have any better
|
||||
// error handling. Sorry. Do NOT return false for errors that won't be
|
||||
// remedied by a bigger buffer!
|
||||
virtual bool do_process( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) =0;
|
||||
public:
|
||||
NoStreamEnDecoder() {
|
||||
data_in = data_out = NULL;
|
||||
avail_in = avail_out = pos_in_acc_data_out = 0;
|
||||
processed = false;
|
||||
}
|
||||
|
||||
void setInput(const void* data, size_t size) {
|
||||
data_in = (const char *) data;
|
||||
avail_in = size;
|
||||
}
|
||||
|
||||
void setOutput(void* data, size_t size) {
|
||||
data_out = (char *) data;
|
||||
avail_out = size;
|
||||
}
|
||||
|
||||
size_t getAvailableInput() {
|
||||
return avail_in;
|
||||
}
|
||||
|
||||
size_t getAvailableOutput() {
|
||||
return avail_out;
|
||||
}
|
||||
|
||||
bool process(bool finish) {
|
||||
if ( processed ) {
|
||||
// data has been encoded or decoded, remaining output is in acc_data_out
|
||||
// -> copy to output
|
||||
if (avail_out > 0 && acc_data_out.size() - pos_in_acc_data_out > 0) {
|
||||
size_t sz = avail_out;
|
||||
if ( sz > acc_data_out.size() - pos_in_acc_data_out )
|
||||
sz = acc_data_out.size() - pos_in_acc_data_out;
|
||||
|
||||
memcpy( data_out, acc_data_out.c_str(), sz );
|
||||
data_out += sz;
|
||||
avail_out -= sz;
|
||||
pos_in_acc_data_out += sz;
|
||||
|
||||
// no more data left? -> return true
|
||||
return ( acc_data_out.size() - pos_in_acc_data_out == 0 );
|
||||
}
|
||||
} else {
|
||||
// data has not been encoded
|
||||
if ( finish && acc_data_in.empty() ) {
|
||||
// special case: all the data has been passed at once
|
||||
// -> process it without using acc_data_in
|
||||
process_finish( data_in, avail_in );
|
||||
return true;
|
||||
} else {
|
||||
// accumulate data in acc_data_in
|
||||
acc_data_in.append( data_in, avail_in );
|
||||
|
||||
// If this was the last bit of data, we process it, now.
|
||||
if ( finish ) {
|
||||
process_finish( acc_data_in.c_str(), acc_data_in.size() );
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void process_finish(const char* data_in, size_t avail_in) {
|
||||
// should we try with the existing output buffer?
|
||||
if ( shouldTryWith( data_in, avail_in, avail_out ) ) {
|
||||
if ( do_process( data_in, avail_in, data_out, avail_out, avail_out ) ) {
|
||||
// it worked :-)
|
||||
processed = true;
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
// we use our own buffer
|
||||
size_t buffer_size = suggestOutputSize( data_in, avail_in );
|
||||
do {
|
||||
acc_data_out.resize(buffer_size);
|
||||
|
||||
size_t output_size;
|
||||
//TODO doc says we mustn't modify the pointer returned by data()...
|
||||
if ( do_process( data_in, avail_in, (char*) acc_data_out.data(), avail_out, output_size ) ) {
|
||||
// buffer is big enough
|
||||
acc_data_out.resize( output_size );
|
||||
return ;
|
||||
}
|
||||
|
||||
// try a bigger one
|
||||
buffer_size *= 2;
|
||||
} while (true);
|
||||
}
|
||||
};
|
||||
|
||||
#include <endian.h>
|
||||
|
||||
// like NoStreamEnDecoder, but also adds the uncompressed size before the stream
|
||||
//NOTE You should make sure that the compression function doesn't overwrite any
|
||||
// memory, if this information is corrupted! This could be exploited by a
|
||||
// malicious person and there is nothing I can do about it. I could check for
|
||||
// an overflow, but when control gets back to this class, it is already too
|
||||
// late, as one 'ret' instruction is enough to do harm.
|
||||
class NoStreamAndUnknownSizeDecoder : public NoStreamEnDecoder {
|
||||
protected:
|
||||
// You implement this one:
|
||||
// If you don't know the real decoded size, don't change output_size.
|
||||
virtual bool do_process_no_size( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) =0;
|
||||
|
||||
|
||||
bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) {
|
||||
return suggestOutputSize( data_in, avail_in ) <= avail_out;
|
||||
}
|
||||
|
||||
size_t suggestOutputSize( const char* data_in, size_t avail_in ) {
|
||||
CHECK( avail_in >= sizeof(uint64_t), "not enough input data" );
|
||||
// We're not using size_t because we need a type that has the same size on all
|
||||
// architectures. A 32-bit host won't be able to open files with more than
|
||||
// 4GB (actually much less), so 4 byte are enough. Even a 64-bit host would
|
||||
// have some trouble with allocating 8GB of RAM just for our buffers ;-)
|
||||
//NOTE If your compiler doesn't accept this cast, your size_t is smaller than
|
||||
// uint32_t. In that case, you are in trouble...
|
||||
size_t output_size = le32toh( *(uint32_t*) data_in );
|
||||
return output_size;
|
||||
}
|
||||
|
||||
bool do_process( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) {
|
||||
size_t needed_output_size = le32toh( *(uint32_t*) data_in );
|
||||
if ( output_size < needed_output_size )
|
||||
return false;
|
||||
|
||||
//NOTE We skip 8 bytes. If we later decide to drop compatibility with 32-bit
|
||||
// hosts, we can save a 64-bit size. Well, that will be much later, when
|
||||
// we can easily hold two copies of a 4GB file in main memory :-D
|
||||
data_in += sizeof( uint64_t );
|
||||
|
||||
size_t reported_output_size = needed_output_size;
|
||||
if ( !do_process_no_size( data_in, avail_in, data_out, avail_out, reported_output_size ) )
|
||||
return false;
|
||||
|
||||
CHECK( reported_output_size == needed_output_size, "Size of decoded data is different than expected" );
|
||||
|
||||
output_size = needed_output_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// encoder for NoStreamAndUnknownSizeDecoder
|
||||
class NoStreamAndUnknownSizeEncoder : public NoStreamEnDecoder {
|
||||
protected:
|
||||
// You implement this one:
|
||||
virtual bool do_process_no_size( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) =0;
|
||||
|
||||
|
||||
bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) {
|
||||
// If the compression doesn't use any spaces...
|
||||
return avail_out > sizeof( uint64_t );
|
||||
}
|
||||
|
||||
size_t suggestOutputSize( const char* data_in, size_t avail_in ) {
|
||||
// We assume that the compression won't make the data any bigger.
|
||||
return avail_in + sizeof( uint64_t );
|
||||
}
|
||||
|
||||
bool do_process( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) {
|
||||
CHECK( avail_in <= UINT32_MAX, "You want to compress more than 4GB of data?! Sorry, we don't support that, yet." );
|
||||
|
||||
// store size
|
||||
*(uint32_t*)data_in = htole32( avail_in );
|
||||
|
||||
// compressed data goes after the size
|
||||
// We skip more than we actually use; see NoStreamAndUnknownSizeDecoder::do_process(...).
|
||||
data_in += sizeof( uint64_t );
|
||||
|
||||
if ( !do_process_no_size( data_in, avail_in, data_out, avail_out, output_size ) )
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#ifdef HAVE_LIBLZO
|
||||
|
||||
#include <lzo/lzo1x.h>
|
||||
|
||||
// finally, we can implement lzo
|
||||
class LZO1X_1_Decoder : public NoStreamAndUnknownSizeDecoder {
|
||||
protected:
|
||||
// You implement this one:
|
||||
bool do_process_no_size( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) {
|
||||
// same argument is used for available output size and size of decompressed data
|
||||
output_size = avail_out;
|
||||
int ret = lzo1x_decompress_safe( (const lzo_bytep) data_in, avail_in,
|
||||
(lzo_bytep) data_out, &output_size, NULL );
|
||||
//TODO look up exit codes
|
||||
CHECK( ret == 0, "lzo1x_decompress_safe probably failed" );
|
||||
}
|
||||
};
|
||||
class LZO1X_1_Compression;
|
||||
class LZO1X_1_Encoder : public NoStreamAndUnknownSizeEncoder {
|
||||
const LZO1X_1_Compression* compression;
|
||||
public:
|
||||
LZO1X_1_Encoder(const LZO1X_1_Compression* compression) {
|
||||
this->compression = compression;
|
||||
}
|
||||
protected:
|
||||
// You implement this one:
|
||||
bool do_process_no_size( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size );
|
||||
};
|
||||
class LZO1X_1_Compression : public Compression {
|
||||
public:
|
||||
EnDecoder* getEncoder() const {
|
||||
return new LZO1X_1_Encoder(this);
|
||||
}
|
||||
|
||||
EnDecoder* getDecoder() const {
|
||||
return new LZO1X_1_Decoder();
|
||||
}
|
||||
|
||||
std::string getName() const { return "lzo1x_1"; }
|
||||
|
||||
|
||||
lzo_voidp getWorkmem(size_t size) const {
|
||||
return new char[size];
|
||||
}
|
||||
|
||||
void giveBackWorkmem(lzo_voidp wrkmem) const {
|
||||
//TODO I think we should keep the memory around and reuse it. After all
|
||||
// it is only a few kilobytes and we will need it a lot. However, I
|
||||
// won't risk anything here because I don't know whether this will be
|
||||
// called by more than one thread.
|
||||
delete[] (char*)wrkmem;
|
||||
}
|
||||
};
|
||||
bool LZO1X_1_Encoder::do_process_no_size( const char* data_in, size_t avail_in,
|
||||
char* data_out, size_t avail_out, size_t& output_size ) {
|
||||
// same argument is used for available output size and size of decompressed data
|
||||
output_size = avail_out;
|
||||
|
||||
lzo_voidp wrkmem = compression->getWorkmem(LZO1X_1_MEM_COMPRESS);
|
||||
int ret = lzo1x_1_compress( (const lzo_bytep) data_in, avail_in,
|
||||
(lzo_bytep) data_out, &output_size, wrkmem );
|
||||
compression->giveBackWorkmem(wrkmem);
|
||||
//TODO look up exit codes
|
||||
CHECK( ret == 0, "lzo1x_decompress_safe probably failed" );
|
||||
}
|
||||
|
||||
#endif // HAVE_LIBLZO
|
||||
|
||||
|
||||
// register them
|
||||
|
||||
static const Compression* compressions[] = {
|
||||
new LZMACompression(),
|
||||
# ifdef HAVE_LIBLZO
|
||||
new LZO1X_1_Compression(),
|
||||
# endif
|
||||
// NULL entry marks end of list. Don't remove it!
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue