From 59d8c3e4c32f3e521a09dc199b66a485716e2d99 Mon Sep 17 00:00:00 2001 From: vladikcomper Date: Fri, 4 Jun 2021 21:08:55 +0300 Subject: [PATCH] Introduce Comper-X compression format and toolset. --- CMakeLists.txt | 5 + include/mdcomp/comperx.hh | 41 +++++++ src/asm/ComperX.asm | 142 +++++++++++++++++++++++++ src/lib/comperx.cc | 217 ++++++++++++++++++++++++++++++++++++++ src/tools/comperx.cc | 159 ++++++++++++++++++++++++++++ 5 files changed, 564 insertions(+) create mode 100644 include/mdcomp/comperx.hh create mode 100644 src/asm/ComperX.asm create mode 100644 src/lib/comperx.cc create mode 100644 src/tools/comperx.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 3734258..aa0ba74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -305,6 +305,7 @@ endfunction() define_lib(artc42 "src/lib/artc42.cc" "include/mdcomp/artc42.hh") define_lib(comper "src/lib/comper.cc" "include/mdcomp/comper.hh") +define_lib(comperx "src/lib/comperx.cc" "include/mdcomp/comperx.hh") define_lib(enigma "src/lib/enigma.cc" "include/mdcomp/enigma.hh") define_lib(kosinski "src/lib/kosinski.cc" "include/mdcomp/kosinski.hh") define_lib(kosplus "src/lib/kosplus.cc" "include/mdcomp/kosplus.hh") @@ -315,6 +316,7 @@ define_lib(saxman "src/lib/saxman.cc" "include/mdcomp/saxman.hh") define_lib(snkrle "src/lib/snkrle.cc" "include/mdcomp/snkrle.hh") define_exe(compercmp "src/tools/compcmp.cc" comper compcmp) +define_exe(comperxcmp "src/tools/comperx.cc" comperx comperx) define_exe(enigmacmp "src/tools/enicmp.cc" enigma enicmp) define_exe(kosinskicmp "src/tools/koscmp.cc" kosinski koscmp) define_exe(kospluscmp "src/tools/kosplus.cc" kosplus kosplus) @@ -336,6 +338,7 @@ install( bigendian_io artc42 comper + comperx enigma kosinski kosplus @@ -345,6 +348,7 @@ install( saxman snkrle compercmp + comperxcmp enigmacmp kosinskicmp kospluscmp @@ -372,6 +376,7 @@ export( bigendian_io artc42 comper + comperx enigma kosinski kosplus diff --git a/include/mdcomp/comperx.hh b/include/mdcomp/comperx.hh new file mode 100644 index 0000000..196a82a --- /dev/null +++ b/include/mdcomp/comperx.hh @@ -0,0 +1,41 @@ +/* + * Copyright (C) Flamewing 2013-2016 + * + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#ifndef LIB_COMPERX_HH +#define LIB_COMPERX_HH + +#include +#include + +#include + +class comperx; +using basic_comperx = BasicDecoder; +using moduled_comperx = ModuledAdaptor; + +class comperx : public basic_comperx, public moduled_comperx { + friend basic_comperx; + friend moduled_comperx; + static bool encode(std::ostream& Dst, uint8_t const* data, size_t Size); + +public: + using basic_comperx::encode; + static bool decode(std::istream& Src, std::iostream& Dst); +}; + +#endif // LIB_COMPERX_HH diff --git a/src/asm/ComperX.asm b/src/asm/ComperX.asm new file mode 100644 index 0000000..9c0ef23 --- /dev/null +++ b/src/asm/ComperX.asm @@ -0,0 +1,142 @@ +; ----------------------------------------------------------------------------- +; Comper-X a newer, much faster implementation of Comper compression +; +; (c) 2021, vladikcomper +; ----------------------------------------------------------------------------- +; Permission to use, copy, modify, and/or distribute this software for any +; purpose with or without fee is hereby granted. +; +; THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +; WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +; MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +; ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +; ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +; OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +; ----------------------------------------------------------------------------- +; INPUT: +; a0 - Source Offset +; a1 - Destination Offset +; +; USES: +; d0-d4, a2 +; ----------------------------------------------------------------------------- + +; ----------------------------------------------------------------------------- +; Copy device for RLE transfers +; +; This is located above the compressor for accesibility reasons. +; ----------------------------------------------------------------------------- + + rts ; copy length = 0 stops decompression + rept 127 + move.l d4, (a1)+ + endm +ComperXDec_CopyDevice_RLE: + dbf d3, ComperXDec.fetch_flag ; if bits counter remains, parse the next word + ;bra ComperXDec.load_flags_field ; ... fall through ... + +; ----------------------------------------------------------------------------- +; Decompressor starts here ... +; ----------------------------------------------------------------------------- + +ComperXDec: + moveq #-1, d1 ; d1 is used for negative sign-extended displacement + moveq #0, d2 ; d2 is used as 8-bit index for copy jump tables + +.load_flags_field: + moveq #16-1, d3 ; d3 = description field bits counter + move.w (a0)+, d0 ; d0 = description field data + +.fetch_flag: + add.w d0, d0 ; roll description field + bcs.s .flag ; if a flag issued, branch + move.w (a0)+, (a1)+ ; otherwise, do uncompressed data + +.flag_next: + dbf d3, .fetch_flag ; if bits counter remains, parse the next word + bra.s .load_flags_field ; start a new block + +; ----------------------------------------------------------------------------- +.end rts + +; ----------------------------------------------------------------------------- +.flag: move.b (a0)+, d1 ; d1 = Displacement (words) (sign-extended) + beq.s .copy_rle ; displacement value of 0 (-1) triggers RLE mode + move.b (a0)+, d2 ; d2 = Copy length field + + add.w d1, d1 ; d1 = Displacement * 2 (sign-extended) + lea -2(a1,d1.w), a2 ; a2 = Start copy address + + moveq #-1, d1 ; restore the value of d1 now ... + + add.b d2, d2 ; test MSB of copy length field ... + bcc.s .copy_long_start ; if not set, then transfer is even words, branch ... + move.w (a2)+, (a1)+ ; otherwise, copy odd word before falling into longwords loop ... + +.copy_long_start: + jmp ComperXDec_CopyDevice(pc,d2.w) ; d2 = 0..$FE + +; ----------------------------------------------------------------------------- +.copy_rle: + move.b (a0)+, d1 ; d1 = - $100 + Copy length + + move.w -(a1), d4 + swap d4 + move.w (a1)+, d4 ; d4 = data to copy + + add.b d1, d1 ; test MSB of copy length field ... + bcc.s .copy_long_rle_start ; if not set, then transfer is even words, branch ... + move.w d4, (a1)+ ; otherwise, copy odd word before falling into longwords loop ... + +.copy_long_rle_start: + jmp ComperXDec_CopyDevice_RLE(pc,d1.w) ; d1 = -$100..-2 + +; ----------------------------------------------------------------------------- +; Copy device for RLE transfers +; +; This is located below the compressor for accesibility reasons. +; ----------------------------------------------------------------------------- + +ComperXDec_CopyDevice: + rts ; copy length = 0 stops decompression + rept 127 + move.l (a2)+, (a1)+ + endm + dbf d3, ComperXDec.fetch_flag ; if bits counter remains, parse the next word + bra ComperXDec.load_flags_field + +; ============================================================================= + + + + + + + + + +; ============================================================================= +; ----------------------------------------------------------------------------- +; Subroutine to decompress Moduled Comper-X +; ----------------------------------------------------------------------------- +; INPUT: +; a0 - Source Offset +; a1 - Destination buffer +; ----------------------------------------------------------------------------- + +ComperXMDec: + lea ComperXDec(pc), a3 + + move.w (a0)+, d0 + subq.w #1, d0 ; this is a trick to reduce number of blocks by one if size is modulo $1000 + rol.w #5, d0 + and.w #$1E, d0 ; d0 = Number of blocks to decompress * 2 (0..1E) + neg.w d0 + jmp .decompress_device(pc,d0) + + rept 16-1 + jsr (a3) + endm +.decompress_device: + jmp (a3) diff --git a/src/lib/comperx.cc b/src/lib/comperx.cc new file mode 100644 index 0000000..278a267 --- /dev/null +++ b/src/lib/comperx.cc @@ -0,0 +1,217 @@ +/* + * Copyright (C) Flamewing 2013-2016 + * + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using std::array; +using std::ios; +using std::iostream; +using std::istream; +using std::make_signed_t; +using std::numeric_limits; +using std::ostream; +using std::streamsize; +using std::stringstream; + +template <> +size_t moduled_comperx::PadMaskBits = 1U; + +class comperx_internal { + // NOTE: This has to be changed for other LZSS-based compression schemes. + struct ComperXAdaptor { + using stream_t = uint16_t; + using stream_endian_t = BigEndian; + using descriptor_t = uint16_t; + using descriptor_endian_t = BigEndian; + using SlidingWindow_t = SlidingWindow; + enum class EdgeType : size_t { invalid, symbolwise, dictionary }; + // Number of bits on descriptor bitfield. + constexpr static size_t const NumDescBits = sizeof(descriptor_t) * 8; + // Number of bits used in descriptor bitfield to signal the end-of-file + // marker sequence. + constexpr static size_t const NumTermBits = 1; + // Number of bits for end-of-file marker. + constexpr static size_t const TerminatorWeight = NumTermBits + 2 * 8; + // Flag that tells the compressor that new descriptor fields is needed + // when a new bit is needed and all bits in the previous one have been + // used up. + constexpr static bool const NeedEarlyDescriptor = false; + // Flag that marks the descriptor bits as being in big-endian bit + // order (that is, highest bits come out first). + constexpr static bool const DescriptorLittleEndianBits = false; + // How many characters to skip looking for matchs for at the start. + constexpr static size_t const FirstMatchPosition = 0; + // Size of the search buffer. + constexpr static size_t const SearchBufSize = 256; + // Size of the look-ahead buffer. + constexpr static size_t const LookAheadBufSize = 255; + // Total size of the sliding window. + constexpr static size_t const SlidingWindowSize + = SearchBufSize + LookAheadBufSize; + // Creates the (multilayer) sliding window structure. + static auto create_sliding_window( + stream_t const* dt, size_t const size) noexcept { + return array{SlidingWindow_t{ + dt, size, SearchBufSize, 2, LookAheadBufSize, + EdgeType::dictionary}}; + } + // Given an edge type, computes how many bits are used in the descriptor + // field. + constexpr static size_t desc_bits(EdgeType const type) noexcept { + // Comper always uses a single bit descriptor. + ignore_unused_variable_warning(type); + return 1; + } + // Given an edge type, computes how many bits are used in total by this + // edge. A return of "numeric_limits::max()" means "infinite", + // or "no edge". + constexpr static size_t edge_weight( + EdgeType const type, size_t length) noexcept { + ignore_unused_variable_warning(length); + switch (type) { + case EdgeType::symbolwise: + // 16-bit value. + return desc_bits(type) + 16; + case EdgeType::dictionary: + // 8-bit distance, 8-bit length. + return desc_bits(type) + 8 + 8; + case EdgeType::invalid: + return numeric_limits::max(); + } + __builtin_unreachable(); + } + // ComperX finds no additional matches over normal LZSS. + constexpr static bool extra_matches( + stream_t const* data, size_t const basenode, + size_t const ubound, size_t const lbound, + std::vector>& matches) noexcept { + ignore_unused_variable_warning( + data, basenode, ubound, lbound, matches); + // Do normal matches. + return false; + } + // ComperX needs no additional padding at the end-of-file. + constexpr static size_t get_padding(size_t const totallen) noexcept { + ignore_unused_variable_warning(totallen); + return 0; + } + }; + +public: + static void decode(istream& in, iostream& Dst) { + using CompIStream = LZSSIStream; + + CompIStream src(in); + + while (in.good()) { + if (src.descbit() == 0U) { + // Symbolwise match. + BigEndian::Write2(Dst, BigEndian::Read2(in)); + } else { + // Dictionary match. + // Distance and length of match. + uint8_t raw_dist = src.getbyte(); + uint8_t raw_len = src.getbyte(); + + if (raw_len == 0) { /* Stop processing */ + break; + } + + size_t const distance = raw_dist ? (0x100 - raw_dist + 1) * 2 : 2; + size_t const length = (0x100 - ((raw_len & 0x7F) << 1)) + ((raw_len & 0x80) >> 7); + + for (size_t i = 0; i < length; i++) { + size_t const Pointer = Dst.tellp(); + Dst.seekg(Pointer - distance); + uint16_t const Word = BigEndian::Read2(Dst); + Dst.seekp(Pointer); + BigEndian::Write2(Dst, Word); + } + } + } + } + + static void encode(ostream& Dst, uint8_t const* Data, size_t const Size) { + using EdgeType = typename ComperXAdaptor::EdgeType; + using CompOStream = LZSSOStream; + + // Compute optimal Comper parsing of input file. + auto list = find_optimal_lzss_parse(Data, Size, ComperXAdaptor{}); + CompOStream out(Dst); + + // Go through each edge in the optimal path. + for (auto const& edge : list) { + switch (edge.get_type()) { + case EdgeType::symbolwise: { + size_t const value = edge.get_symbol(); + size_t const high = (value >> 8U) & 0xFFU; + size_t const low = (value & 0xFFU); + out.descbit(0); + out.putbyte(high); + out.putbyte(low); + break; + } + case EdgeType::dictionary: { + size_t const len = edge.get_length(); + size_t const dist = edge.get_distance(); + + out.descbit(1); + out.putbyte(-dist+1); + out.putbyte((0x7F - ((len - 2) >> 1)) | ((len & 1) << 7)); + break; + } + case EdgeType::invalid: + // This should be unreachable. + std::cerr << "Compression produced invalid edge type " + << static_cast(edge.get_type()) << std::endl; + __builtin_unreachable(); + } + } + + // Push descriptor for end-of-file marker. + out.descbit(1); + + out.putbyte(-1); + out.putbyte(0); + } +}; + +bool comperx::decode(istream& Src, iostream& Dst) { + size_t const Location = Src.tellg(); + stringstream in(ios::in | ios::out | ios::binary); + extract(Src, in); + + comperx_internal::decode(in, Dst); + Src.seekg(Location + in.tellg()); + return true; +} + +bool comperx::encode(ostream& Dst, uint8_t const* data, size_t const Size) { + comperx_internal::encode(Dst, data, Size); + return true; +} diff --git a/src/tools/comperx.cc b/src/tools/comperx.cc new file mode 100644 index 0000000..a2c9ccb --- /dev/null +++ b/src/tools/comperx.cc @@ -0,0 +1,159 @@ +/* + * Copyright (C) Flamewing 2013-2015 + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include +#include +#include +#include + +using std::cerr; +using std::endl; +using std::fstream; +using std::ifstream; +using std::ios; +using std::ofstream; +using std::stringstream; + +static void usage(char* prog) { + cerr << "Usage: " << prog + << " [-c|--crunch|-x|--extract=[{pointer}]] [-m|--moduled] " + "{input_filename} " + "{output_filename}" + << endl; + cerr << endl; + cerr << "\t-x,--extract\tExtract from {pointer} address in file." << endl; + cerr << "\t-c,--crunch \tAssume input file is Comper-compressed and " + "recompress to output file." + << endl + << "\t \tIf --chunch is in effect, a missing " + "output_filename means recompress" + << endl + << "\t \tto input_filename." << endl; + cerr << "\t-m,--moduled\tUse compression in modules of 4096 bytes." << endl; +} + +int main(int argc, char* argv[]) { + static constexpr const std::array long_options{ + option{"extract", optional_argument, nullptr, 'x'}, + option{"moduled", no_argument, nullptr, 'm'}, + option{"crunch", no_argument, nullptr, 'c'}, + option{nullptr, 0, nullptr, 0}}; + + bool extract = false; + bool moduled = false; + bool crunch = false; + size_t pointer = 0; + + while (true) { + int option_index = 0; + int c = getopt_long( + argc, argv, "x::mc", long_options.data(), &option_index); + if (c == -1) { + break; + } + + switch (c) { + case 'x': + extract = true; + if (optarg != nullptr) { + pointer = strtoul(optarg, nullptr, 0); + } + break; + case 'c': + crunch = true; + break; + case 'm': + moduled = true; + break; + default: + break; + } + } + + if ((!crunch && argc - optind < 2) || (crunch && argc - optind < 1)) { + usage(argv[0]); + return 1; + } + + if (extract && crunch) { + cerr << "Error: --extract and --crunch can't be used at the same time." + << endl + << endl; + return 4; + } + + const char* outfile + = crunch && argc - optind < 2 ? argv[optind] : argv[optind + 1]; + + ifstream fin(argv[optind], ios::in | ios::binary); + if (!fin.good()) { + cerr << "Input file '" << argv[optind] << "' could not be opened." + << endl + << endl; + return 2; + } + + if (crunch) { + stringstream buffer(ios::in | ios::out | ios::binary); + fin.seekg(pointer); + comperx::decode(fin, buffer); + fin.close(); + buffer.seekg(0); + + fstream fout(outfile, ios::in | ios::out | ios::binary | ios::trunc); + if (!fout.good()) { + cerr << "Output file '" << argv[optind + 1] + << "' could not be opened." << endl + << endl; + return 3; + } + if (moduled) { + comperx::moduled_encode(buffer, fout); + } else { + comperx::encode(buffer, fout); + } + } else { + fstream fout(outfile, ios::in | ios::out | ios::binary | ios::trunc); + if (!fout.good()) { + cerr << "Output file '" << argv[optind + 1] + << "' could not be opened." << endl + << endl; + return 3; + } + + if (extract) { + fin.seekg(pointer); + if (moduled) { + comperx::moduled_decode(fin, fout); + } else { + comperx::decode(fin, fout); + } + } else { + if (moduled) { + comperx::moduled_encode(fin, fout); + } else { + comperx::encode(fin, fout); + } + } + } + + return 0; +}