Support for lzo; only internal and not tested

master
benny 2013-10-03 05:59:43 +02:00
parent 6656866687
commit 272ff36f8b
3 changed files with 434 additions and 4 deletions

View File

@ -34,6 +34,15 @@ find_package( Threads REQUIRED )
find_package( LibLZMA REQUIRED )
include_directories( ${LIBLZMA_INCLUDE_DIRS} )
find_package( LibLZO COMPONENTS LIBLZO_HAS_LZO1X_DECOMPRESS_SAFE LIBLZO_HAS_LZO1X_1_COMPRESS )
if (LIBLZO_FOUND)
ADD_DEFINITIONS(-DHAVE_LIBLZO)
include_directories( ${LIBLZO_INCLUDE_DIRS} )
else (LIBLZO_FOUND)
set(LIBLZO_LIBRARIES)
endif (LIBLZO_FOUND)
file( GLOB sourceFiles "*.cc" )
add_executable( zbackup ${sourceFiles} ${protoSrcs} ${protoHdrs} )
@ -43,6 +52,7 @@ target_link_libraries( zbackup
${CMAKE_THREAD_LIBS_INIT}
${ZLIB_LIBRARIES}
${LIBLZMA_LIBRARIES}
${LIBLZO_LIBRARIES}
)
install( TARGETS zbackup DESTINATION bin )

116
cmake/FindLibLZO.cmake Normal file
View File

@ -0,0 +1,116 @@
# - Find LibLZO
# Find LibLZO headers and library
#
# LIBLZO_FOUND - True if liblzo is found.
# LIBLZO_INCLUDE_DIRS - Directory where liblzo headers are located.
# LIBLZO_LIBRARIES - Lzma libraries to link against.
# LIBLZO_HAS_AUTO_DECODER - True if lzo_auto_decoder() is found (required).
# LIBLZO_HAS_EASY_ENCODER - True if lzo_easy_encoder() is found (required).
# LIBLZO_HAS_LZO_PRESET - True if lzo_lzo_preset() is found (required).
# LIBLZO_VERSION_MAJOR - The major version of lzo
# LIBLZO_VERSION_MINOR - The minor version of lzo
# LIBLZO_VERSION_PATCH - The patch version of lzo
# LIBLZO_VERSION_STRING - version number as a string (ex: "5.0.3")
#=============================================================================
# Copyright 2008 Per Øyvind Karlsen <peroyvind@mandriva.org>
# Copyright 2009 Alexander Neundorf <neundorf@kde.org>
# Copyright 2009 Helio Chissini de Castro <helio@kde.org>
# Copyright 2012 Mario Bensi <mbensi@ipsquad.net>
# Adapted for liblzo (instead of liblzma) by Benjamin Koch <bbbsnowball@gmail.com>
#
# Distributed under the OSI-approved BSD License (the "License"):
#
# CMake - Cross Platform Makefile Generator
# Copyright 2000-2011 Kitware, Inc., Insight Software Consortium
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the names of Kitware, Inc., the Insight Software Consortium,
# nor the names of their contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# ------------------------------------------------------------------------------
#
# The above copyright and license notice applies to distributions of
# CMake in source and binary form. Some source files contain additional
# notices of original copyright by their contributors; see each source
# for details. Third-party software packages supplied with CMake under
# compatible licenses provide their own copyright notices documented in
# corresponding subdirectories.
#
# ------------------------------------------------------------------------------
#
# CMake was initially developed by Kitware with the following sponsorship:
#
# * National Library of Medicine at the National Institutes of Health
# as part of the Insight Segmentation and Registration Toolkit (ITK).
#
# * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
# Visualization Initiative.
#
# * National Alliance for Medical Image Computing (NAMIC) is funded by the
# National Institutes of Health through the NIH Roadmap for Medical Research,
# Grant U54 EB005149.
#
# * Kitware, Inc.
#=============================================================================
find_path(LIBLZO_INCLUDE_DIR lzo/lzo1x.h )
find_library(LIBLZO_LIBRARY lzo2)
if(LIBLZO_INCLUDE_DIR AND EXISTS "${LIBLZO_INCLUDE_DIR}/lzo/version.h")
file(STRINGS "${LIBLZO_INCLUDE_DIR}/lzo/version.h" LIBLZO_HEADER_CONTENTS REGEX "#define LZO_VERSION_[A-Z]+ [0-9]+")
string(REGEX REPLACE ".*#define LZO_VERSION_MAJOR ([0-9]+).*" "\\1" LIBLZO_VERSION_MAJOR "${LIBLZO_HEADER_CONTENTS}")
string(REGEX REPLACE ".*#define LZO_VERSION_MINOR ([0-9]+).*" "\\1" LIBLZO_VERSION_MINOR "${LIBLZO_HEADER_CONTENTS}")
string(REGEX REPLACE ".*#define LZO_VERSION_PATCH ([0-9]+).*" "\\1" LIBLZO_VERSION_PATCH "${LIBLZO_HEADER_CONTENTS}")
set(LIBLZO_VERSION_STRING "${LIBLZO_VERSION_MAJOR}.${LIBLZO_VERSION_MINOR}.${LIBLZO_VERSION_PATCH}")
unset(LIBLZO_HEADER_CONTENTS)
endif()
# We're just using two functions.
if (LIBLZO_LIBRARY)
include(CheckLibraryExists)
CHECK_LIBRARY_EXISTS(${LIBLZO_LIBRARY} lzo1x_decompress_safe "" LIBLZO_HAS_LZO1X_DECOMPRESS_SAFE)
CHECK_LIBRARY_EXISTS(${LIBLZO_LIBRARY} lzo1x_1_compress "" LIBLZO_HAS_LZO1X_1_COMPRESS)
endif ()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibLZO DEFAULT_MSG LIBLZO_INCLUDE_DIR
LIBLZO_LIBRARY
LIBLZO_HAS_LZO1X_DECOMPRESS_SAFE
LIBLZO_HAS_LZO1X_1_COMPRESS
)
if (LIBLZO_FOUND)
set(LIBLZO_LIBRARIES ${LIBLZO_LIBRARY})
set(LIBLZO_INCLUDE_DIRS ${LIBLZO_INCLUDE_DIR})
endif ()
mark_as_advanced( LIBLZO_INCLUDE_DIR LIBLZO_LIBRARY )

View File

@ -1,8 +1,6 @@
// Copyright (c) 2012-2013 Konstantin Isakov <ikm@zbackup.org>
// Part of ZBackup. Licensed under GNU GPLv2 or later
#include <lzma.h>
#include "compression.hh"
#include "check.hh"
@ -15,6 +13,8 @@ Compression::~Compression() {}
// LZMA
#include <lzma.h>
class LZMAEnDecoder : public EnDecoder {
protected:
static lzma_stream init_value;
@ -84,15 +84,319 @@ public:
};
// LZOP
// LZO
//TODO
// liblzo implements a lot of algorithms "for unlimited backward compatibility"
// The web site says:
// "My experiments have shown that LZO1B is good with a large blocksize
// or with very redundant data, LZO1F is good with a small blocksize or
// with binary data and that LZO1X is often the best choice of all.
// LZO1Y and LZO1Z are almost identical to LZO1X - they can achieve a
// better compression ratio on some files.
// Beware, your mileage may vary."
// => I'm using LZO1X, as suggested
#include <string.h>
// Unfortunately, liblzo always works with the whole data, so it doesn't support
// the streaming approach that most other libraries use. This means that we have
// to use a big buffer for the data. The class NoStreamEnDecoder implements this
// so we can use it, if there is another library like liblzo.
// Collect all data and process it in one pass
class NoStreamEnDecoder : public EnDecoder {
std::string acc_data_in, acc_data_out;
const char* data_in;
char* data_out;
size_t avail_in, avail_out;
bool processed;
size_t pos_in_acc_data_out;
protected:
// you must implement these:
// Should we try with the existing output buffer which has avail_out
// bytes of free space? If you know that this will fail, return false.
// You may peek into data_in which contains the complete compressed data.
virtual bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) =0;
// We will allocate a buffer for the output data. How big should it be?
// You may peek into data_in which contains the complete compressed data.
virtual size_t suggestOutputSize( const char* data_in, size_t avail_in ) =0;
// Process the data in data_in and put the result into data_out. You musn't
// write more than avail_out bytes! If the output buffer is big enough,
// process the data and store the output size in output_size. If the output
// buffer is too small, return false and we will give you a bigger one. If
// any other error occurrs, abort the program. We don't have any better
// error handling. Sorry. Do NOT return false for errors that won't be
// remedied by a bigger buffer!
virtual bool do_process( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) =0;
public:
NoStreamEnDecoder() {
data_in = data_out = NULL;
avail_in = avail_out = pos_in_acc_data_out = 0;
processed = false;
}
void setInput(const void* data, size_t size) {
data_in = (const char *) data;
avail_in = size;
}
void setOutput(void* data, size_t size) {
data_out = (char *) data;
avail_out = size;
}
size_t getAvailableInput() {
return avail_in;
}
size_t getAvailableOutput() {
return avail_out;
}
bool process(bool finish) {
if ( processed ) {
// data has been encoded or decoded, remaining output is in acc_data_out
// -> copy to output
if (avail_out > 0 && acc_data_out.size() - pos_in_acc_data_out > 0) {
size_t sz = avail_out;
if ( sz > acc_data_out.size() - pos_in_acc_data_out )
sz = acc_data_out.size() - pos_in_acc_data_out;
memcpy( data_out, acc_data_out.c_str(), sz );
data_out += sz;
avail_out -= sz;
pos_in_acc_data_out += sz;
// no more data left? -> return true
return ( acc_data_out.size() - pos_in_acc_data_out == 0 );
}
} else {
// data has not been encoded
if ( finish && acc_data_in.empty() ) {
// special case: all the data has been passed at once
// -> process it without using acc_data_in
process_finish( data_in, avail_in );
return true;
} else {
// accumulate data in acc_data_in
acc_data_in.append( data_in, avail_in );
// If this was the last bit of data, we process it, now.
if ( finish ) {
process_finish( acc_data_in.c_str(), acc_data_in.size() );
return true;
} else
return false;
}
}
}
private:
void process_finish(const char* data_in, size_t avail_in) {
// should we try with the existing output buffer?
if ( shouldTryWith( data_in, avail_in, avail_out ) ) {
if ( do_process( data_in, avail_in, data_out, avail_out, avail_out ) ) {
// it worked :-)
processed = true;
return ;
}
}
// we use our own buffer
size_t buffer_size = suggestOutputSize( data_in, avail_in );
do {
acc_data_out.resize(buffer_size);
size_t output_size;
//TODO doc says we mustn't modify the pointer returned by data()...
if ( do_process( data_in, avail_in, (char*) acc_data_out.data(), avail_out, output_size ) ) {
// buffer is big enough
acc_data_out.resize( output_size );
return ;
}
// try a bigger one
buffer_size *= 2;
} while (true);
}
};
#include <endian.h>
// like NoStreamEnDecoder, but also adds the uncompressed size before the stream
//NOTE You should make sure that the compression function doesn't overwrite any
// memory, if this information is corrupted! This could be exploited by a
// malicious person and there is nothing I can do about it. I could check for
// an overflow, but when control gets back to this class, it is already too
// late, as one 'ret' instruction is enough to do harm.
class NoStreamAndUnknownSizeDecoder : public NoStreamEnDecoder {
protected:
// You implement this one:
// If you don't know the real decoded size, don't change output_size.
virtual bool do_process_no_size( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) =0;
bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) {
return suggestOutputSize( data_in, avail_in ) <= avail_out;
}
size_t suggestOutputSize( const char* data_in, size_t avail_in ) {
CHECK( avail_in >= sizeof(uint64_t), "not enough input data" );
// We're not using size_t because we need a type that has the same size on all
// architectures. A 32-bit host won't be able to open files with more than
// 4GB (actually much less), so 4 byte are enough. Even a 64-bit host would
// have some trouble with allocating 8GB of RAM just for our buffers ;-)
//NOTE If your compiler doesn't accept this cast, your size_t is smaller than
// uint32_t. In that case, you are in trouble...
size_t output_size = le32toh( *(uint32_t*) data_in );
return output_size;
}
bool do_process( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) {
size_t needed_output_size = le32toh( *(uint32_t*) data_in );
if ( output_size < needed_output_size )
return false;
//NOTE We skip 8 bytes. If we later decide to drop compatibility with 32-bit
// hosts, we can save a 64-bit size. Well, that will be much later, when
// we can easily hold two copies of a 4GB file in main memory :-D
data_in += sizeof( uint64_t );
size_t reported_output_size = needed_output_size;
if ( !do_process_no_size( data_in, avail_in, data_out, avail_out, reported_output_size ) )
return false;
CHECK( reported_output_size == needed_output_size, "Size of decoded data is different than expected" );
output_size = needed_output_size;
return true;
}
};
// encoder for NoStreamAndUnknownSizeDecoder
class NoStreamAndUnknownSizeEncoder : public NoStreamEnDecoder {
protected:
// You implement this one:
virtual bool do_process_no_size( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) =0;
bool shouldTryWith( const char* data_in, size_t avail_in, size_t avail_out ) {
// If the compression doesn't use any spaces...
return avail_out > sizeof( uint64_t );
}
size_t suggestOutputSize( const char* data_in, size_t avail_in ) {
// We assume that the compression won't make the data any bigger.
return avail_in + sizeof( uint64_t );
}
bool do_process( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) {
CHECK( avail_in <= UINT32_MAX, "You want to compress more than 4GB of data?! Sorry, we don't support that, yet." );
// store size
*(uint32_t*)data_in = htole32( avail_in );
// compressed data goes after the size
// We skip more than we actually use; see NoStreamAndUnknownSizeDecoder::do_process(...).
data_in += sizeof( uint64_t );
if ( !do_process_no_size( data_in, avail_in, data_out, avail_out, output_size ) )
return false;
return true;
}
};
#ifdef HAVE_LIBLZO
#include <lzo/lzo1x.h>
// finally, we can implement lzo
class LZO1X_1_Decoder : public NoStreamAndUnknownSizeDecoder {
protected:
// You implement this one:
bool do_process_no_size( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) {
// same argument is used for available output size and size of decompressed data
output_size = avail_out;
int ret = lzo1x_decompress_safe( (const lzo_bytep) data_in, avail_in,
(lzo_bytep) data_out, &output_size, NULL );
//TODO look up exit codes
CHECK( ret == 0, "lzo1x_decompress_safe probably failed" );
}
};
class LZO1X_1_Compression;
class LZO1X_1_Encoder : public NoStreamAndUnknownSizeEncoder {
const LZO1X_1_Compression* compression;
public:
LZO1X_1_Encoder(const LZO1X_1_Compression* compression) {
this->compression = compression;
}
protected:
// You implement this one:
bool do_process_no_size( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size );
};
class LZO1X_1_Compression : public Compression {
public:
EnDecoder* getEncoder() const {
return new LZO1X_1_Encoder(this);
}
EnDecoder* getDecoder() const {
return new LZO1X_1_Decoder();
}
std::string getName() const { return "lzo1x_1"; }
lzo_voidp getWorkmem(size_t size) const {
return new char[size];
}
void giveBackWorkmem(lzo_voidp wrkmem) const {
//TODO I think we should keep the memory around and reuse it. After all
// it is only a few kilobytes and we will need it a lot. However, I
// won't risk anything here because I don't know whether this will be
// called by more than one thread.
delete[] (char*)wrkmem;
}
};
bool LZO1X_1_Encoder::do_process_no_size( const char* data_in, size_t avail_in,
char* data_out, size_t avail_out, size_t& output_size ) {
// same argument is used for available output size and size of decompressed data
output_size = avail_out;
lzo_voidp wrkmem = compression->getWorkmem(LZO1X_1_MEM_COMPRESS);
int ret = lzo1x_1_compress( (const lzo_bytep) data_in, avail_in,
(lzo_bytep) data_out, &output_size, wrkmem );
compression->giveBackWorkmem(wrkmem);
//TODO look up exit codes
CHECK( ret == 0, "lzo1x_decompress_safe probably failed" );
}
#endif // HAVE_LIBLZO
// register them
static const Compression* compressions[] = {
new LZMACompression(),
# ifdef HAVE_LIBLZO
new LZO1X_1_Compression(),
# endif
// NULL entry marks end of list. Don't remove it!
NULL
};