Skip to content

Commit

Permalink
Add support for reading raw extracted data via a callback
Browse files Browse the repository at this point in the history
Close issue #122
  • Loading branch information
rikyoz committed Jun 18, 2024
1 parent 302752d commit 367428f
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ set( HEADERS
src/internal/cmultivolumeinstream.hpp
src/internal/cmultivolumeoutstream.hpp
src/internal/com.hpp
src/internal/crawoutstream.hpp
src/internal/cstdinstream.hpp
src/internal/cstdoutstream.hpp
src/internal/csymlinkinstream.hpp
Expand Down Expand Up @@ -96,6 +97,7 @@ set( HEADERS
src/internal/operationcategory.hpp
src/internal/operationresult.hpp
src/internal/processeditem.hpp
src/internal/rawdataextractcallback.hpp
src/internal/renameditem.hpp
src/internal/sequentialextractcallback.hpp
src/internal/stdinputitem.hpp
Expand Down Expand Up @@ -141,6 +143,7 @@ set( SOURCES
src/internal/cfixedbufferoutstream.cpp
src/internal/cmultivolumeinstream.cpp
src/internal/cmultivolumeoutstream.cpp
src/internal/crawoutstream.cpp
src/internal/cstdinstream.cpp
src/internal/cstdoutstream.cpp
src/internal/csymlinkinstream.cpp
Expand All @@ -165,6 +168,7 @@ set( SOURCES
src/internal/operationcategory.cpp
src/internal/operationresult.cpp
src/internal/processeditem.cpp
src/internal/rawdataextractcallback.cpp
src/internal/renameditem.cpp
src/internal/sequentialextractcallback.cpp
src/internal/stdinputitem.cpp
Expand Down
5 changes: 5 additions & 0 deletions include/bit7z/bitabstractarchivehandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ using FileCallback = std::function< void( tstring ) >;
*/
using PasswordCallback = std::function< tstring() >;

/**
* @brief A function providing the raw extracted data and its size to the user.
*/
using RawDataCallback = std::function< bool(const byte_t*, std::size_t) >;

/**
* @brief Enumeration representing how a handler should deal when an output file already exists.
*/
Expand Down
14 changes: 14 additions & 0 deletions include/bit7z/bitinputarchive.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,20 @@ class BitInputArchive {
*/
void extractTo( std::map< tstring, buffer_t >& outMap ) const;

/**
* @brief Extracts the content of the archive to the given raw data callback.
*
* @param callback a function providing the extracted raw data to the user.
*/
void extractTo( RawDataCallback callback ) const;

/**
* @brief Extracts the raw content of the archive to the given callback.
*
* @param callback a function providing the extracted raw data to the user.
*/
void extractTo( RawDataCallback callback, const std::vector< uint32_t >& indices ) const;

/**
* @brief Tests the archive without extracting its content.
*
Expand Down
18 changes: 18 additions & 0 deletions src/bitinputarchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "internal/fixedbufferextractcallback.hpp"
#include "internal/opencallback.hpp"
#include "internal/operationresult.hpp"
#include "internal/rawdataextractcallback.hpp"
#include "internal/sequentialextractcallback.hpp"
#include "internal/streamextractcallback.hpp"
#include "internal/stringutil.hpp"
Expand Down Expand Up @@ -331,6 +332,23 @@ void BitInputArchive::extractTo( std::map< tstring, buffer_t >& outMap ) const {
extractArchive( filesIndices, extractCallback, NAskMode::kExtract );
}

void BitInputArchive::extractTo( RawDataCallback callback ) const {
auto extractCallback = bit7z::make_com< RawDataExtractCallback, ExtractCallback >( *this, std::move( callback ) );
extractArchive( {}, extractCallback, NAskMode::kExtract );
}

void BitInputArchive::extractTo( RawDataCallback callback, const std::vector< uint32_t >& indices ) const {
// Find if any index passed by the user is not in the valid range [0, itemsCount() - 1]
const auto invalidIndex = findInvalidIndex( indices );
if ( invalidIndex != indices.cend() ) {
throw BitException( "Cannot extract item at the index " + std::to_string( *invalidIndex ),
make_error_code( BitError::InvalidIndex ) );
}

auto extractCallback = bit7z::make_com< RawDataExtractCallback, ExtractCallback >( *this, std::move( callback ) );
extractArchive( indices, extractCallback, NAskMode::kExtract );
}

void BitInputArchive::test() const {
testArchive( {} );
}
Expand Down
40 changes: 40 additions & 0 deletions src/internal/crawoutstream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check it.
// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com

/*
* bit7z - A C++ static library to interface with the 7-zip shared libraries.
* Copyright (c) 2014-2024 Riccardo Ostani - All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#include <utility>

#include "internal/crawoutstream.hpp"

namespace bit7z {

CRawOutStream::CRawOutStream( RawDataCallback callback ) : mOutputCallback{ std::move( callback ) } {}

COM_DECLSPEC_NOTHROW
STDMETHODIMP CRawOutStream::Write( const void* data, UInt32 size, UInt32* processedSize ) noexcept {
if ( processedSize != nullptr ) {
*processedSize = 0;
}

if ( size == 0 ) {
return S_OK;
}

bool callbackResult = mOutputCallback( static_cast< const byte_t* >( data ), size );

if ( processedSize != nullptr ) {
*processedSize = size;
}

return callbackResult ? S_OK : E_ABORT;
}

} // namespace bit7z
50 changes: 50 additions & 0 deletions src/internal/crawoutstream.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* bit7z - A C++ static library to interface with the 7-zip shared libraries.
* Copyright (c) 2014-2024 Riccardo Ostani - All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#ifndef CRAWOUTSTREAM_HPP
#define CRAWOUTSTREAM_HPP

#include "bitabstractarchivehandler.hpp"
#include "internal/com.hpp"
#include "internal/guids.hpp"
#include "internal/macros.hpp"

#include <7zip/IStream.h>

#include <ostream>

namespace bit7z {

class CRawOutStream : public ISequentialOutStream, public CMyUnknownImp {
public:
explicit CRawOutStream( RawDataCallback callback );

CRawOutStream( const CRawOutStream& ) = delete;

CRawOutStream( CRawOutStream&& ) = delete;

auto operator=( const CRawOutStream& ) -> CRawOutStream& = delete;

auto operator=( CRawOutStream&& ) -> CRawOutStream& = delete;

MY_UNKNOWN_VIRTUAL_DESTRUCTOR( ~CRawOutStream() ) = default;

// IOutStream
BIT7Z_STDMETHOD( Write, void const* data, UInt32 size, UInt32* processedSize );

// NOLINTNEXTLINE(modernize-use-noexcept, modernize-use-trailing-return-type, readability-identifier-length)
MY_UNKNOWN_IMP1( ISequentialOutStream ) //-V2507 //-V2511 //-V835

private:
RawDataCallback mOutputCallback;
};

} // namespace bit7z

#endif // CRAWOUTSTREAM_HPP
64 changes: 64 additions & 0 deletions src/internal/rawdataextractcallback.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check it.
// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com

/*
* bit7z - A C++ static library to interface with the 7-zip shared libraries.
* Copyright (c) 2014-2024 Riccardo Ostani - All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#include "internal/rawdataextractcallback.hpp"

#include "bitinputarchive.hpp"
#include "bitpropvariant.hpp"
#include "internal/crawoutstream.hpp"
#include "internal/extractcallback.hpp"
#include "internal/util.hpp"

#include <cstdint>
#include <ostream>
#include <utility>

using namespace NWindows;

namespace bit7z {

RawDataExtractCallback::RawDataExtractCallback( const BitInputArchive& inputArchive, RawDataCallback callback )
: ExtractCallback( inputArchive ),
mCallback( std::move( callback ) ) {}

void RawDataExtractCallback::releaseStream() {
mCallbackStream.Release();
}

auto RawDataExtractCallback::getOutStream( uint32_t index, ISequentialOutStream** outStream ) -> HRESULT {
if ( isItemFolder( index ) ) {
return S_OK;
}

// Get Name
const BitPropVariant prop = itemProperty( index, BitProperty::Path );
tstring fullPath;

if ( prop.isEmpty() ) {
fullPath = kEmptyFileAlias;
} else if ( prop.isString() ) {
fullPath = prop.getString();
} else {
return E_FAIL;
}

if ( mHandler.fileCallback() ) {
mHandler.fileCallback()( fullPath );
}

auto outStreamLoc = bit7z::make_com< CRawOutStream, ISequentialOutStream >( mCallback );
mCallbackStream = outStreamLoc;
*outStream = outStreamLoc.Detach();
return S_OK;
}

} // namespace bit7z
47 changes: 47 additions & 0 deletions src/internal/rawdataextractcallback.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* bit7z - A C++ static library to interface with the 7-zip shared libraries.
* Copyright (c) 2014-2024 Riccardo Ostani - All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#ifndef RAWDATAEXTRACTCALLBACK_HPP
#define RAWDATAEXTRACTCALLBACK_HPP

#include "bitabstractarchivehandler.hpp"
#include "internal/extractcallback.hpp"

#include <cstdint>

namespace bit7z {

class BitInputArchive;

class RawDataExtractCallback final : public ExtractCallback {
public:
RawDataExtractCallback( const BitInputArchive& inputArchive, RawDataCallback callback );

RawDataExtractCallback( const RawDataExtractCallback& ) = delete;

RawDataExtractCallback( RawDataExtractCallback&& ) = delete;

auto operator=( const RawDataExtractCallback& ) -> RawDataExtractCallback& = delete;

auto operator=( RawDataExtractCallback&& ) -> RawDataExtractCallback& = delete;

~RawDataExtractCallback() override = default;

private:
RawDataCallback mCallback;
CMyComPtr< ISequentialOutStream > mCallbackStream;

void releaseStream() override;

auto getOutStream( std::uint32_t index, ISequentialOutStream** outStream ) -> HRESULT override;
};

} // namespace bit7z

#endif // RAWDATAEXTRACTCALLBACK_HPP
40 changes: 40 additions & 0 deletions tests/src/test_bitinputarchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1297,4 +1297,44 @@ TEMPLATE_TEST_CASE( "BitInputArchive: Finding files in an archive", "[bitinputar
REQUIRE_FALSE( info.contains( BIT7Z_STRING( "folder\\clouds.jpg" ) ) );
#endif
}
}

// NOLINTNEXTLINE(*-err58-cpp)
TEMPLATE_TEST_CASE( "BitInputArchive: Extract to raw data callback",
"[bitinputarchive]", tstring, buffer_t, stream_t ) {
const TestDirectory testDir{ fs::path{ test_archives_dir } / "extraction" / "single_file" };

const auto testArchive = GENERATE( as< TestInputFormat >(),
TestInputFormat{ "7z", BitFormat::SevenZip },
TestInputFormat{ "bz2", BitFormat::BZip2 },
TestInputFormat{ "gz", BitFormat::GZip },
TestInputFormat{ "iso", BitFormat::Iso },
TestInputFormat{ "lzh", BitFormat::Lzh },
TestInputFormat{ "lzma", BitFormat::Lzma },
TestInputFormat{ "rar4.rar", BitFormat::Rar },
TestInputFormat{ "rar5.rar", BitFormat::Rar5 },
TestInputFormat{ "tar", BitFormat::Tar },
TestInputFormat{ "wim", BitFormat::Wim },
TestInputFormat{ "xz", BitFormat::Xz },
TestInputFormat{ "zip", BitFormat::Zip } );

DYNAMIC_SECTION( "Archive format: " << testArchive.extension ) {
const fs::path arcFileName = fs::path{ clouds.name }.concat( "." + testArchive.extension );

TestType inputArchive{};
getInputArchive( arcFileName, inputArchive );
const Bit7zLibrary lib{ test::sevenzip_lib_path() };
BitArchiveReader info( lib, inputArchive, testArchive.format );

std::size_t totalSize = 0;
std::uint32_t crcValue = 0;

info.extractTo([&totalSize, &crcValue]( const byte_t* data, std::size_t length ) {
totalSize += length;
crcValue = crc32( data, length, crcValue );
return true;
});
REQUIRE( totalSize == clouds.size );
REQUIRE( crcValue == clouds.crc32 );
}
}
11 changes: 5 additions & 6 deletions tests/src/utils/crc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,9 @@ template< std::size_t Bits >
using crc_table = std::array< const uint_t< Bits >, 256 >;

template< std::size_t Bits,
uint_t< Bits > initial,
uint_t< Bits > mask,
typename = typename std::enable_if< is_power_of_two( Bits ) >::type >
inline auto crc( const crc_table< Bits >& table, const void* buffer, std::size_t length ) noexcept -> uint_t< Bits > {
auto crc( const crc_table< Bits >& table, uint_t< Bits > initial, const void* buffer, std::size_t length ) noexcept -> uint_t< Bits > {
static constexpr auto last_byte_mask = 0xFFu;

if ( buffer == nullptr ) {
Expand All @@ -113,20 +112,20 @@ inline auto crc( const crc_table< Bits >& table, const void* buffer, std::size_t
const auto* byte_buffer = static_cast< const uint8_t* >( buffer );
for ( std::size_t i = 0; i < length; ++i ) {
// NOLINTNEXTLINE(*-pro-bounds-pointer-arithmetic, *-pro-bounds-constant-array-index)
crc = table[ byte_buffer[ i ] ^ ( crc & last_byte_mask ) ] ^ (crc >> 8);
crc = table[ byte_buffer[ i ] ^ ( crc & last_byte_mask ) ] ^ (crc >> 8U);
}
return crc ^ mask;
}

// NOLINTNEXTLINE(*-easily-swappable-parameters)
auto crc32( const void* buffer, std::size_t length ) noexcept -> uint32_t {
auto crc32( const void* buffer, std::size_t length, std::uint32_t initial ) noexcept -> uint32_t {
static constexpr auto crc32_mask = 0xFFFFFFFFu;
return crc< 32, 0, crc32_mask >( crc32_table, buffer, length );
return crc< 32, crc32_mask >( crc32_table, initial, buffer, length );
}

// NOLINTNEXTLINE(*-easily-swappable-parameters)
auto crc16( const void* buffer, std::size_t length ) noexcept -> uint16_t {
return crc< 16, 0, 0 >( crc16_table, buffer, length );
return crc< 16, 0 >( crc16_table, 0, buffer, length );
}

} // namespace test
Expand Down
Loading

0 comments on commit 367428f

Please sign in to comment.