Merge remote-tracking branch 'origin/GP-4391_ryanmkurtz_lzfse--SQUASHED'

This commit is contained in:
Ryan Kurtz
2024-03-20 13:41:21 -04:00
22 changed files with 5775 additions and 3 deletions
+10
View File
@@ -18,6 +18,8 @@ apply from: "$rootProject.projectDir/gradle/javaProject.gradle"
apply from: "$rootProject.projectDir/gradle/helpProject.gradle"
apply from: "$rootProject.projectDir/gradle/jacocoProject.gradle"
apply from: "$rootProject.projectDir/gradle/javaTestProject.gradle"
apply from: "$rootProject.projectDir/gradle/nativeProject.gradle"
apply from: "buildNatives.gradle"
apply plugin: 'eclipse'
eclipse.project.name = 'Features FileFormats'
@@ -51,6 +53,14 @@ dependencies {
testImplementation project(path: ':Base', configuration: 'integrationTestArtifacts')
}
// Include buildable native source in distribution
rootProject.assembleDistribution {
from (this.project.projectDir.toString()) {
include "src/lzfse/**"
into { getZipPath(this.project) }
}
}
// ***********************************************************************************************
// Sevenzip native library extract task
// ***********************************************************************************************
@@ -0,0 +1,54 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Native build files are already applied in development mode (indicated by presence of the
// Generic project). Only need to apply them if we are in a distribution.
if (findProject(':Generic') == null) {
// running from within Ghidra install
apply from: "../../../GPL/utils.gradle"
apply from: "../../../GPL/nativePlatforms.gradle"
apply from: "../../../GPL/nativeBuildProperties.gradle"
}
model {
components {
lzfse(NativeExecutableSpec) {
baseName "lzfse"
targetPlatform "win_x86_64"
targetPlatform "linux_x86_64"
targetPlatform "linux_arm_64"
targetPlatform "mac_x86_64"
targetPlatform "mac_arm_64"
sources {
c {
source {
srcDir "src/lzfse"
include "lzfse_encode.c"
include "lzfse_decode.c"
include "lzfse_encode_base.c"
include "lzfse_decode_base.c"
include "lzvn_encode_base.c"
include "lzvn_decode_base.c"
include "lzfse_fse.c"
include "lzfse_main.c"
}
}
}
}
}
}
@@ -1,6 +1,7 @@
##VERSION: 2.0
##MODULE IP: Apache License 2.0
##MODULE IP: Bouncy Castle License
##MODULE IP: BSD-3-APPLE
##MODULE IP: BSD-3-GRUVER
##MODULE IP: Copyright Distribution Permitted
##MODULE IP: Creative Commons Attribution 2.5
@@ -0,0 +1,139 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LZFSE_H
#define LZFSE_H
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_MSC_VER) && !defined(__clang__)
# define __attribute__(X)
# pragma warning(disable : 4068)
#endif
#if defined(LZFSE_DLL)
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(LZFSE_DLL_EXPORTS)
# define LZFSE_API __declspec(dllexport)
# else
# define LZFSE_API __declspec(dllimport)
# endif
# endif
#endif
#if !defined(LZFSE_API)
# if __GNUC__ >= 4
# define LZFSE_API __attribute__((visibility("default")))
# else
# define LZFSE_API
# endif
#endif
/*! @abstract Get the required scratch buffer size to compress using LZFSE. */
LZFSE_API size_t lzfse_encode_scratch_size();
/*! @abstract Compress a buffer using LZFSE.
*
* @param dst_buffer
* Pointer to the first byte of the destination buffer.
*
* @param dst_size
* Size of the destination buffer in bytes.
*
* @param src_buffer
* Pointer to the first byte of the source buffer.
*
* @param src_size
* Size of the source buffer in bytes.
*
* @param scratch_buffer
* If non-NULL, a pointer to scratch space for the routine to use as workspace;
* the routine may use up to lzfse_encode_scratch_size( ) bytes of workspace
* during its operation, and will not perform any internal allocations. If
* NULL, the routine may allocate its own memory to use during operation via
* a single call to malloc( ), and will release it by calling free( ) prior
* to returning. For most use, passing NULL is perfectly satisfactory, but if
* you require strict control over allocation, you will want to pass an
* explicit scratch buffer.
*
* @return
* The number of bytes written to the destination buffer if the input is
* successfully compressed. If the input cannot be compressed to fit into
* the provided buffer, or an error occurs, zero is returned, and the
* contents of dst_buffer are unspecified. */
LZFSE_API size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer,
size_t dst_size,
const uint8_t *__restrict src_buffer,
size_t src_size,
void *__restrict scratch_buffer);
/*! @abstract Get the required scratch buffer size to decompress using LZFSE. */
LZFSE_API size_t lzfse_decode_scratch_size();
/*! @abstract Decompress a buffer using LZFSE.
*
* @param dst_buffer
* Pointer to the first byte of the destination buffer.
*
* @param dst_size
* Size of the destination buffer in bytes.
*
* @param src_buffer
* Pointer to the first byte of the source buffer.
*
* @param src_size
* Size of the source buffer in bytes.
*
* @param scratch_buffer
* If non-NULL, a pointer to scratch space for the routine to use as workspace;
* the routine may use up to lzfse_decode_scratch_size( ) bytes of workspace
* during its operation, and will not perform any internal allocations. If
* NULL, the routine may allocate its own memory to use during operation via
* a single call to malloc( ), and will release it by calling free( ) prior
* to returning. For most use, passing NULL is perfectly satisfactory, but if
* you require strict control over allocation, you will want to pass an
* explicit scratch buffer.
*
* @return
* The number of bytes written to the destination buffer if the input is
* successfully decompressed. If there is not enough space in the destination
* buffer to hold the entire expanded output, only the first dst_size bytes
* will be written to the buffer and dst_size is returned. Note that this
* behavior differs from that of lzfse_encode_buffer. */
LZFSE_API size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer,
size_t dst_size,
const uint8_t *__restrict src_buffer,
size_t src_size,
void *__restrict scratch_buffer);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* LZFSE_H */
@@ -0,0 +1,75 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// LZFSE decode API
#include "lzfse.h"
#include "lzfse_internal.h"
size_t lzfse_decode_scratch_size() { return sizeof(lzfse_decoder_state); }
size_t lzfse_decode_buffer_with_scratch(uint8_t *__restrict dst_buffer,
size_t dst_size, const uint8_t *__restrict src_buffer,
size_t src_size, void *__restrict scratch_buffer) {
lzfse_decoder_state *s = (lzfse_decoder_state *)scratch_buffer;
memset(s, 0x00, sizeof(*s));
// Initialize state
s->src = src_buffer;
s->src_begin = src_buffer;
s->src_end = s->src + src_size;
s->dst = dst_buffer;
s->dst_begin = dst_buffer;
s->dst_end = dst_buffer + dst_size;
// Decode
int status = lzfse_decode(s);
if (status == LZFSE_STATUS_DST_FULL)
return dst_size;
if (status != LZFSE_STATUS_OK)
return 0; // failed
return (size_t)(s->dst - dst_buffer); // bytes written
}
size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size,
const uint8_t *__restrict src_buffer,
size_t src_size, void *__restrict scratch_buffer) {
int has_malloc = 0;
size_t ret = 0;
// Deal with the possible NULL pointer
if (scratch_buffer == NULL) {
// +1 in case scratch size could be zero
scratch_buffer = malloc(lzfse_decode_scratch_size() + 1);
has_malloc = 1;
}
if (scratch_buffer == NULL)
return 0;
ret = lzfse_decode_buffer_with_scratch(dst_buffer,
dst_size, src_buffer,
src_size, scratch_buffer);
if (has_malloc)
free(scratch_buffer);
return ret;
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,166 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// LZFSE encode API
#include "lzfse.h"
#include "lzfse_internal.h"
size_t lzfse_encode_scratch_size() {
size_t s1 = sizeof(lzfse_encoder_state);
size_t s2 = lzvn_encode_scratch_size();
return (s1 > s2) ? s1 : s2; // max(lzfse,lzvn)
}
size_t lzfse_encode_buffer_with_scratch(uint8_t *__restrict dst_buffer,
size_t dst_size, const uint8_t *__restrict src_buffer,
size_t src_size, void *__restrict scratch_buffer) {
const size_t original_size = src_size;
// If input is really really small, go directly to uncompressed buffer
// (because LZVN will refuse to encode it, and we will report a failure)
if (src_size < LZVN_ENCODE_MIN_SRC_SIZE)
goto try_uncompressed;
// If input is too small, try encoding with LZVN
if (src_size < LZFSE_ENCODE_LZVN_THRESHOLD) {
// need header + end-of-stream marker
size_t extra_size = 4 + sizeof(lzvn_compressed_block_header);
if (dst_size <= extra_size)
goto try_uncompressed; // DST is really too small, give up
size_t sz = lzvn_encode_buffer(
dst_buffer + sizeof(lzvn_compressed_block_header),
dst_size - extra_size, src_buffer, src_size, scratch_buffer);
if (sz == 0 || sz >= src_size)
goto try_uncompressed; // failed, or no compression, fall back to
// uncompressed block
// If we could encode, setup header and end-of-stream marker (we left room
// for them, no need to test)
lzvn_compressed_block_header header;
header.magic = LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC;
header.n_raw_bytes = (uint32_t)src_size;
header.n_payload_bytes = (uint32_t)sz;
memcpy(dst_buffer, &header, sizeof(header));
store4(dst_buffer + sizeof(lzvn_compressed_block_header) + sz,
LZFSE_ENDOFSTREAM_BLOCK_MAGIC);
return sz + extra_size;
}
// Try encoding with LZFSE
{
lzfse_encoder_state *state = scratch_buffer;
memset(state, 0x00, sizeof *state);
if (lzfse_encode_init(state) != LZFSE_STATUS_OK)
goto try_uncompressed;
state->dst = dst_buffer;
state->dst_begin = dst_buffer;
state->dst_end = &dst_buffer[dst_size];
state->src = src_buffer;
state->src_encode_i = 0;
if (src_size >= 0xffffffffU) {
// lzfse only uses 32 bits for offsets internally, so if the input
// buffer is really huge, we need to process it in smaller chunks.
// Note that we switch over to this path for sizes much smaller
// 2GB because it's actually faster to change algorithms well before
// it's necessary for correctness.
// The first chunk, we just process normally.
const lzfse_offset encoder_block_size = 262144;
state->src_end = encoder_block_size;
if (lzfse_encode_base(state) != LZFSE_STATUS_OK)
goto try_uncompressed;
src_size -= encoder_block_size;
while (src_size >= encoder_block_size) {
// All subsequent chunks require a translation to keep the offsets
// from getting too big. Note that we are always going from
// encoder_block_size up to 2*encoder_block_size so that the
// offsets remain positive (as opposed to resetting to zero and
// having negative offsets).
state->src_end = 2 * encoder_block_size;
if (lzfse_encode_base(state) != LZFSE_STATUS_OK)
goto try_uncompressed;
lzfse_encode_translate(state, encoder_block_size);
src_size -= encoder_block_size;
}
// Set the end for the final chunk.
state->src_end = encoder_block_size + (lzfse_offset)src_size;
}
// If the source buffer is small enough to use 32-bit offsets, we simply
// encode the whole thing in a single chunk.
else
state->src_end = (lzfse_offset)src_size;
// This is either the trailing chunk (if the source file is huge), or
// the whole source file.
if (lzfse_encode_base(state) != LZFSE_STATUS_OK)
goto try_uncompressed;
if (lzfse_encode_finish(state) != LZFSE_STATUS_OK)
goto try_uncompressed;
// No error occured, return compressed size.
return state->dst - dst_buffer;
}
try_uncompressed:
// Compression failed for some reason. If we can fit the data into the
// output buffer uncompressed, go ahead and do that instead.
if (original_size + 12 <= dst_size && original_size < INT32_MAX) {
uncompressed_block_header header = {.magic = LZFSE_UNCOMPRESSED_BLOCK_MAGIC,
.n_raw_bytes = (uint32_t)src_size};
uint8_t *dst_end = dst_buffer;
memcpy(dst_end, &header, sizeof header);
dst_end += sizeof header;
memcpy(dst_end, src_buffer, original_size);
dst_end += original_size;
store4(dst_end, LZFSE_ENDOFSTREAM_BLOCK_MAGIC);
dst_end += 4;
return dst_end - dst_buffer;
}
// Otherwise, there's nothing we can do, so return zero.
return 0;
}
size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size,
const uint8_t *__restrict src_buffer,
size_t src_size, void *__restrict scratch_buffer) {
int has_malloc = 0;
size_t ret = 0;
// Deal with the possible NULL pointer
if (scratch_buffer == NULL) {
// +1 in case scratch size could be zero
scratch_buffer = malloc(lzfse_encode_scratch_size() + 1);
has_malloc = 1;
}
if (scratch_buffer == NULL)
return 0;
ret = lzfse_encode_buffer_with_scratch(dst_buffer,
dst_size, src_buffer,
src_size, scratch_buffer);
if (has_malloc)
free(scratch_buffer);
return ret;
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,221 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LZFSE_ENCODE_TABLES_H
#define LZFSE_ENCODE_TABLES_H
#if defined(_MSC_VER) && !defined(__clang__)
# define inline __inline
#endif
static inline uint8_t l_base_from_value(int32_t value) {
static const uint8_t sym[LZFSE_ENCODE_MAX_L_VALUE + 1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16,
16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
return sym[value];
}
static inline uint8_t m_base_from_value(int32_t value) {
static const uint8_t sym[LZFSE_ENCODE_MAX_M_VALUE + 1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16,
16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19};
return sym[value];
}
static inline uint8_t d_base_from_value(int32_t value) {
static const uint8_t sym[64 * 4] = {
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9,
9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15,
15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 17, 18, 19, 20, 20, 21, 21,
22, 22, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27,
27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29,
30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
32, 32, 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 40,
41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44,
44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46,
47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 50, 51, 52, 52,
53, 53, 54, 54, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58,
59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61,
61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63,
0, 0, 0, 0};
int index = 0;
int in_range_k;
in_range_k = (value >= 0 && value < 60);
index |= (((value - 0) >> 0) + 0) & -in_range_k;
in_range_k = (value >= 60 && value < 1020);
index |= (((value - 60) >> 4) + 64) & -in_range_k;
in_range_k = (value >= 1020 && value < 16380);
index |= (((value - 1020) >> 8) + 128) & -in_range_k;
in_range_k = (value >= 16380 && value < 262140);
index |= (((value - 16380) >> 12) + 192) & -in_range_k;
return sym[index & 255];
}
#endif // LZFSE_ENCODE_TABLES_H
@@ -0,0 +1,219 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "lzfse_internal.h"
// Initialize encoder table T[NSYMBOLS].
// NSTATES = sum FREQ[i] is the number of states (a power of 2)
// NSYMBOLS is the number of symbols.
// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i]
// >= 0.
// Some symbols may have a 0 frequency. In that case, they should not be
// present in the data.
void fse_init_encoder_table(int nstates, int nsymbols,
const uint16_t *__restrict freq,
fse_encoder_entry *__restrict t) {
int offset = 0; // current offset
int n_clz = __builtin_clz(nstates);
for (int i = 0; i < nsymbols; i++) {
int f = (int)freq[i];
if (f == 0)
continue; // skip this symbol, no occurrences
int k =
__builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<<K) < 2*N
t[i].s0 = (int16_t)((f << k) - nstates);
t[i].k = (int16_t)k;
t[i].delta0 = (int16_t)(offset - f + (nstates >> k));
t[i].delta1 = (int16_t)(offset - f + (nstates >> (k - 1)));
offset += f;
}
}
// Initialize decoder table T[NSTATES].
// NSTATES = sum FREQ[i] is the number of states (a power of 2)
// NSYMBOLS is the number of symbols.
// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i]
// >= 0.
// Some symbols may have a 0 frequency. In that case, they should not be
// present in the data.
int fse_init_decoder_table(int nstates, int nsymbols,
const uint16_t *__restrict freq,
int32_t *__restrict t) {
assert(nsymbols <= 256);
assert(fse_check_freq(freq, nsymbols, nstates) == 0);
int n_clz = __builtin_clz(nstates);
int sum_of_freq = 0;
for (int i = 0; i < nsymbols; i++) {
int f = (int)freq[i];
if (f == 0)
continue; // skip this symbol, no occurrences
sum_of_freq += f;
if (sum_of_freq > nstates) {
return -1;
}
int k =
__builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<<K) < 2*N
int j0 = ((2 * nstates) >> k) - f;
// Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F
for (int j = 0; j < f; j++) {
fse_decoder_entry e;
e.symbol = (uint8_t)i;
if (j < j0) {
e.k = (int8_t)k;
e.delta = (int16_t)(((f + j) << k) - nstates);
} else {
e.k = (int8_t)(k - 1);
e.delta = (int16_t)((j - j0) << (k - 1));
}
memcpy(t, &e, sizeof(e));
t++;
}
}
return 0; // OK
}
// Initialize value decoder table T[NSTATES].
// NSTATES = sum FREQ[i] is the number of states (a power of 2)
// NSYMBOLS is the number of symbols.
// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i]
// >= 0.
// SYMBOL_VBITS[NSYMBOLS] and SYMBOLS_VBASE[NSYMBOLS] are the number of value
// bits to read and the base value for each symbol.
// Some symbols may have a 0 frequency. In that case, they should not be
// present in the data.
void fse_init_value_decoder_table(int nstates, int nsymbols,
const uint16_t *__restrict freq,
const uint8_t *__restrict symbol_vbits,
const int32_t *__restrict symbol_vbase,
fse_value_decoder_entry *__restrict t) {
assert(nsymbols <= 256);
assert(fse_check_freq(freq, nsymbols, nstates) == 0);
int n_clz = __builtin_clz(nstates);
for (int i = 0; i < nsymbols; i++) {
int f = (int)freq[i];
if (f == 0)
continue; // skip this symbol, no occurrences
int k =
__builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<<K) < 2*N
int j0 = ((2 * nstates) >> k) - f;
fse_value_decoder_entry ei = {0};
ei.value_bits = symbol_vbits[i];
ei.vbase = symbol_vbase[i];
// Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F
for (int j = 0; j < f; j++) {
fse_value_decoder_entry e = ei;
if (j < j0) {
e.total_bits = (uint8_t)k + e.value_bits;
e.delta = (int16_t)(((f + j) << k) - nstates);
} else {
e.total_bits = (uint8_t)(k - 1) + e.value_bits;
e.delta = (int16_t)((j - j0) << (k - 1));
}
memcpy(t, &e, 8);
t++;
}
}
}
// Remove states from symbols until the correct number of states is used.
static void fse_adjust_freqs(uint16_t *freq, int overrun, int nsymbols) {
for (int shift = 3; overrun != 0; shift--) {
for (int sym = 0; sym < nsymbols; sym++) {
if (freq[sym] > 1) {
int n = (freq[sym] - 1) >> shift;
if (n > overrun)
n = overrun;
freq[sym] -= n;
overrun -= n;
if (overrun == 0)
break;
}
}
}
}
// Normalize a table T[NSYMBOLS] of occurrences to FREQ[NSYMBOLS].
void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t,
uint16_t *__restrict freq) {
uint32_t s_count = 0;
int remaining = nstates; // must be signed; this may become < 0
int max_freq = 0;
int max_freq_sym = 0;
int shift = __builtin_clz(nstates) - 1;
uint32_t highprec_step;
// Compute the total number of symbol occurrences
for (int i = 0; i < nsymbols; i++)
s_count += t[i];
if (s_count == 0)
highprec_step = 0; // no symbols used
else
highprec_step = ((uint32_t)1 << 31) / s_count;
for (int i = 0; i < nsymbols; i++) {
// Rescale the occurrence count to get the normalized frequency.
// Round up if the fractional part is >= 0.5; otherwise round down.
// For efficiency, we do this calculation using integer arithmetic.
int f = (((t[i] * highprec_step) >> shift) + 1) >> 1;
// If a symbol was used, it must be given a nonzero normalized frequency.
if (f == 0 && t[i] != 0)
f = 1;
freq[i] = f;
remaining -= f;
// Remember the maximum frequency and which symbol had it.
if (f > max_freq) {
max_freq = f;
max_freq_sym = i;
}
}
// If there remain states to be assigned, then just assign them to the most
// frequent symbol. Alternatively, if we assigned more states than were
// actually available, then either remove states from the most frequent symbol
// (for minor overruns) or use the slower adjustment algorithm (for major
// overruns).
if (-remaining < (max_freq >> 2)) {
freq[max_freq_sym] += remaining;
} else {
fse_adjust_freqs(freq, -remaining, nsymbols);
}
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,339 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// LZFSE command line tool
#if !defined(_POSIX_C_SOURCE) || (_POSIX_C_SOURCE < 200112L)
# undef _POSIX_C_SOURCE
# define _POSIX_C_SOURCE 200112L
#endif
#if defined(_MSC_VER)
# if !defined(_CRT_NONSTDC_NO_DEPRECATE)
# define _CRT_NONSTDC_NO_DEPRECATE
# endif
# if !defined(_CRT_SECURE_NO_WARNINGS)
# define _CRT_SECURE_NO_WARNINGS
# endif
# if !defined(__clang__)
# define inline __inline
# endif
#endif
#include "lzfse.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#if defined(_MSC_VER)
# include <io.h>
# include <windows.h>
#else
# include <sys/time.h>
# include <unistd.h>
#endif
// Same as realloc(x,s), except x is freed when realloc fails
static inline void *lzfse_reallocf(void *x, size_t s) {
void *y = realloc(x, s);
if (y == 0) {
free(x);
return 0;
}
return y;
}
static double get_time() {
#if defined(_MSC_VER)
LARGE_INTEGER count, freq;
if (QueryPerformanceFrequency(&freq) && QueryPerformanceCounter(&count)) {
return (double)count.QuadPart / (double)freq.QuadPart;
}
return 1.0e-3 * (double)GetTickCount();
#else
struct timeval tv;
if (gettimeofday(&tv, 0) != 0) {
perror("gettimeofday");
exit(1);
}
return (double)tv.tv_sec + 1.0e-6 * (double)tv.tv_usec;
#endif
}
//--------------------
enum { LZFSE_ENCODE = 0, LZFSE_DECODE };
void usage(int argc, char **argv) {
fprintf(
stderr,
"Usage: %s -encode|-decode [-i input_file] [-o output_file] [-h] [-v]\n",
argv[0]);
}
#define USAGE(argc, argv) \
do { \
usage(argc, argv); \
exit(0); \
} while (0)
#define USAGE_MSG(argc, argv, ...) \
do { \
usage(argc, argv); \
fprintf(stderr, __VA_ARGS__); \
exit(1); \
} while (0)
int main(int argc, char **argv) {
const char *in_file = 0; // stdin
const char *out_file = 0; // stdout
int op = -1; // invalid op
int verbosity = 0; // quiet
// Parse options
for (int i = 1; i < argc;) {
// no args
const char *a = argv[i++];
if (strcmp(a, "-h") == 0)
USAGE(argc, argv);
if (strcmp(a, "-v") == 0) {
verbosity++;
continue;
}
if (strcmp(a, "-encode") == 0) {
op = LZFSE_ENCODE;
continue;
}
if (strcmp(a, "-decode") == 0) {
op = LZFSE_DECODE;
continue;
}
// one arg
const char **arg_var = 0;
if (strcmp(a, "-i") == 0 && in_file == 0)
arg_var = &in_file;
else if (strcmp(a, "-o") == 0 && out_file == 0)
arg_var = &out_file;
if (arg_var != 0) {
// Flag is recognized. Check if there is an argument.
if (i == argc)
USAGE_MSG(argc, argv, "Error: Missing arg after %s\n", a);
*arg_var = argv[i++];
continue;
}
USAGE_MSG(argc, argv, "Error: invalid flag %s\n", a);
}
if (op < 0)
USAGE_MSG(argc, argv, "Error: -encode|-decode required\n");
// Info
if (verbosity > 0) {
if (op == LZFSE_ENCODE)
fprintf(stderr, "LZFSE encode\n");
if (op == LZFSE_DECODE)
fprintf(stderr, "LZFSE decode\n");
fprintf(stderr, "Input: %s\n", in_file ? in_file : "stdin");
fprintf(stderr, "Output: %s\n", out_file ? out_file : "stdout");
}
// Load input
size_t in_allocated = 0; // allocated in IN
size_t in_size = 0; // used in IN
uint8_t *in = 0; // input buffer
int in_fd = -1; // input file desc
if (in_file != 0) {
// If we have a file name, open it, and allocate the exact input size
struct stat st;
#if defined(_WIN32)
in_fd = open(in_file, O_RDONLY | O_BINARY);
#else
in_fd = open(in_file, O_RDONLY);
#endif
if (in_fd < 0) {
perror(in_file);
exit(1);
}
if (fstat(in_fd, &st) != 0) {
perror(in_file);
exit(1);
}
if (st.st_size > SIZE_MAX) {
fprintf(stderr, "File is too large\n");
exit(1);
}
in_allocated = (size_t)st.st_size;
} else {
// Otherwise, read from stdin, and allocate to 1 MB, grow as needed
in_allocated = 1 << 20;
in_fd = 0;
#if defined(_WIN32)
if (setmode(in_fd, O_BINARY) == -1) {
perror("setmode");
exit(1);
}
#endif
}
in = (uint8_t *)malloc(in_allocated);
if (in == 0) {
perror("malloc");
exit(1);
}
while (1) {
// re-alloc if needed
if (in_size == in_allocated) {
if (in_allocated < (100 << 20))
in_allocated <<= 1; // double it
else
in_allocated += (100 << 20); // or add 100 MB if already large
in = lzfse_reallocf(in, in_allocated);
if (in == 0) {
perror("malloc");
exit(1);
}
}
ptrdiff_t r = read(in_fd, in + in_size, in_allocated - in_size);
if (r < 0) {
perror("read");
exit(1);
}
if (r == 0)
break; // end of file
in_size += (size_t)r;
}
if (in_file != 0) {
close(in_fd);
in_fd = -1;
}
// Size info
if (verbosity > 0) {
fprintf(stderr, "Input size: %zu B\n", in_size);
}
// Encode/decode
// Compute size for result buffer; we assume here that encode shrinks size,
// and that decode grows by no more than 4x. These are reasonable common-
// case guidelines, but are not formally guaranteed to be satisfied.
size_t out_allocated = (op == LZFSE_ENCODE) ? in_size : (4 * in_size);
size_t out_size = 0;
size_t aux_allocated = (op == LZFSE_ENCODE) ? lzfse_encode_scratch_size()
: lzfse_decode_scratch_size();
void *aux = aux_allocated ? malloc(aux_allocated) : 0;
if (aux_allocated != 0 && aux == 0) {
perror("malloc");
exit(1);
}
uint8_t *out = (uint8_t *)malloc(out_allocated);
if (out == 0) {
perror("malloc");
exit(1);
}
double c0 = get_time();
while (1) {
if (op == LZFSE_ENCODE)
out_size = lzfse_encode_buffer(out, out_allocated, in, in_size, aux);
else
out_size = lzfse_decode_buffer(out, out_allocated, in, in_size, aux);
// If output buffer was too small, grow and retry.
if (out_size == 0 || (op == LZFSE_DECODE && out_size == out_allocated)) {
if (verbosity > 0)
fprintf(stderr, "Output buffer was too small, increasing size...\n");
out_allocated <<= 1;
out = (uint8_t *)lzfse_reallocf(out, out_allocated);
if (out == 0) {
perror("malloc");
exit(1);
}
continue;
}
break;
}
double c1 = get_time();
if (verbosity > 0) {
fprintf(stderr, "Output size: %zu B\n", out_size);
size_t raw_size = (op == LZFSE_ENCODE) ? in_size : out_size;
size_t compressed_size = (op == LZFSE_ENCODE) ? out_size : in_size;
fprintf(stderr, "Compression ratio: %.3f\n",
(double)raw_size / (double)compressed_size);
double ns_per_byte = 1.0e9 * (c1 - c0) / (double)raw_size;
double mb_per_s = (double)raw_size / 1024.0 / 1024.0 / (c1 - c0);
fprintf(stderr, "Speed: %.2f ns/B, %.2f MB/s\n",ns_per_byte,mb_per_s);
}
// Write output
int out_fd = -1;
if (out_file) {
#if defined(_WIN32)
out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
S_IWRITE);
#else
out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
#endif
if (out_fd < 0) {
perror(out_file);
exit(1);
}
} else {
out_fd = 1; // stdout
#if defined(_WIN32)
if (setmode(out_fd, O_BINARY) == -1) {
perror("setmode");
exit(1);
}
#endif
}
for (size_t out_pos = 0; out_pos < out_size;) {
ptrdiff_t w = write(out_fd, out + out_pos, out_size - out_pos);
if (w < 0) {
perror("write");
exit(1);
}
if (w == 0) {
fprintf(stderr, "Failed to write to output file\n");
exit(1);
}
out_pos += (size_t)w;
}
if (out_file != 0) {
close(out_fd);
out_fd = -1;
}
free(in);
free(out);
free(aux);
return 0; // OK
}
@@ -0,0 +1,63 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LZFSE_TUNABLES_H
#define LZFSE_TUNABLES_H
// Parameters controlling details of the LZ-style match search. These values
// may be modified to fine tune compression ratio vs. encoding speed, while
// keeping the compressed format compatible with LZFSE. Note that
// modifying them will also change the amount of work space required by
// the encoder. The values here are those used in the compression library
// on iOS and OS X.
// Number of bits for hash function to produce. Should be in the range
// [10, 16]. Larger values reduce the number of false-positive found during
// the match search, and expand the history table, which may allow additional
// matches to be found, generally improving the achieved compression ratio.
// Larger values also increase the workspace size, and make it less likely
// that the history table will be present in cache, which reduces performance.
#define LZFSE_ENCODE_HASH_BITS 14
// Number of positions to store for each line in the history table. May
// be either 4 or 8. Using 8 doubles the size of the history table, which
// increases the chance of finding matches (thus improving compression ratio),
// but also increases the workspace size.
#define LZFSE_ENCODE_HASH_WIDTH 4
// Match length in bytes to cause immediate emission. Generally speaking,
// LZFSE maintains multiple candidate matches and waits to decide which match
// to emit until more information is available. When a match exceeds this
// threshold, it is emitted immediately. Thus, smaller values may give
// somewhat better performance, and larger values may give somewhat better
// compression ratios.
#define LZFSE_ENCODE_GOOD_MATCH 40
// When the source buffer is very small, LZFSE doesn't compress as well as
// some simpler algorithms. To maintain reasonable compression for these
// cases, we transition to use LZVN instead if the size of the source buffer
// is below this threshold.
#define LZFSE_ENCODE_LZVN_THRESHOLD 4096
#endif // LZFSE_TUNABLES_H
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,71 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// LZVN low-level decoder (v2)
// Functions in the low-level API should switch to these at some point.
// Apr 2014
#ifndef LZVN_DECODE_BASE_H
#define LZVN_DECODE_BASE_H
#include "lzfse_internal.h"
/*! @abstract Base decoder state. */
typedef struct {
// Decoder I/O
// Next byte to read in source buffer
const unsigned char *src;
// Next byte after source buffer
const unsigned char *src_end;
// Next byte to write in destination buffer (by decoder)
unsigned char *dst;
// Valid range for destination buffer is [dst_begin, dst_end - 1]
unsigned char *dst_begin;
unsigned char *dst_end;
// Next byte to read in destination buffer (modified by caller)
unsigned char *dst_current;
// Decoder state
// Partially expanded match, or 0,0,0.
// In that case, src points to the next literal to copy, or the next op-code
// if L==0.
size_t L, M, D;
// Distance for last emitted match, or 0
lzvn_offset d_prev;
// Did we decode end-of-stream?
int end_of_stream;
} lzvn_decoder_state;
/*! @abstract Decode source to destination.
* Updates \p state (src,dst,d_prev). */
void lzvn_decode(lzvn_decoder_state *state);
#endif // LZVN_DECODE_BASE_H
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,119 @@
/* ###
* IP: BSD-3-APPLE
*/
/*
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// LZVN low-level encoder
#ifndef LZVN_ENCODE_BASE_H
#define LZVN_ENCODE_BASE_H
#include "lzfse_internal.h"
// ===============================================================
// Types and Constants
#define LZVN_ENCODE_HASH_BITS \
14 // number of bits returned by the hash function [10, 16]
#define LZVN_ENCODE_OFFSETS_PER_HASH \
4 // stored offsets stack for each hash value, MUST be 4
#define LZVN_ENCODE_HASH_VALUES \
(1 << LZVN_ENCODE_HASH_BITS) // number of entries in hash table
#define LZVN_ENCODE_MAX_DISTANCE \
0xffff // max match distance we can represent with LZVN encoding, MUST be
// 0xFFFF
#define LZVN_ENCODE_MIN_MARGIN \
8 // min number of bytes required between current and end during encoding,
// MUST be >= 8
#define LZVN_ENCODE_MAX_LITERAL_BACKLOG \
400 // if the number of pending literals exceeds this size, emit a long
// literal, MUST be >= 271
/*! @abstract Type of table entry. */
typedef struct {
int32_t indices[4]; // signed indices in source buffer
uint32_t values[4]; // corresponding 32-bit values
} lzvn_encode_entry_type;
// Work size
#define LZVN_ENCODE_WORK_SIZE \
(LZVN_ENCODE_HASH_VALUES * sizeof(lzvn_encode_entry_type))
/*! @abstract Match */
typedef struct {
lzvn_offset m_begin; // beginning of match, current position
lzvn_offset m_end; // end of match, this is where the next literal would begin
// if we emit the entire match
lzvn_offset M; // match length M: m_end - m_begin
lzvn_offset D; // match distance D
lzvn_offset K; // match gain: M - distance storage (L not included)
} lzvn_match_info;
// ===============================================================
// Internal encoder state
/*! @abstract Base encoder state and I/O. */
typedef struct {
// Encoder I/O
// Source buffer
const unsigned char *src;
// Valid range in source buffer: we can access src[i] for src_begin <= i <
// src_end. src_begin may be negative.
lzvn_offset src_begin;
lzvn_offset src_end;
// Next byte to process in source buffer
lzvn_offset src_current;
// Next byte after the last byte to process in source buffer. We MUST have:
// src_current_end + 8 <= src_end.
lzvn_offset src_current_end;
// Next byte to encode in source buffer, may be before or after src_current.
lzvn_offset src_literal;
// Next byte to write in destination buffer
unsigned char *dst;
// Valid range in destination buffer: [dst_begin, dst_end - 1]
unsigned char *dst_begin;
unsigned char *dst_end;
// Encoder state
// Pending match
lzvn_match_info pending;
// Distance for last emitted match, or 0
lzvn_offset d_prev;
// Hash table used to find matches. Stores LZVN_ENCODE_OFFSETS_PER_HASH 32-bit
// signed indices in the source buffer, and the corresponding 4-byte values.
// The number of entries in the table is LZVN_ENCODE_HASH_VALUES.
lzvn_encode_entry_type *table;
} lzvn_encoder_state;
/*! @abstract Encode source to destination.
* Update \p state.
* The call ensures \c src_literal is never left too far behind \c src_current. */
void lzvn_encode(lzvn_encoder_state *state);
#endif // LZVN_ENCODE_BASE_H
@@ -37,6 +37,8 @@ public class CompLzssFileSystem implements GFileSystem {
public CompLzssFileSystem(FSRLRoot fsrl, ByteProvider provider, FileSystemService fsService,
TaskMonitor monitor) throws IOException, CancelledException {
this.fsFSRL = fsrl;
monitor.setMessage("Decompressing LZSS...");
try (ByteProvider tmpBP = new ByteProviderWrapper(provider, LzssConstants.HEADER_LENGTH,
@@ -32,9 +32,14 @@ public class CompLzssFileSystemFactory
public CompLzssFileSystem create(FSRLRoot targetFSRL, ByteProvider byteProvider,
FileSystemService fsService, TaskMonitor monitor)
throws IOException, CancelledException {
CompLzssFileSystem fs =
new CompLzssFileSystem(targetFSRL, byteProvider, fsService, monitor);
return fs;
try {
CompLzssFileSystem fs =
new CompLzssFileSystem(targetFSRL, byteProvider, fsService, monitor);
return fs;
}
finally {
byteProvider.close();
}
}
@Override
@@ -0,0 +1,114 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.file.formats.lzfse;
import java.io.File;
import java.io.IOException;
import java.util.List;
import ghidra.app.util.bin.ByteProvider;
import ghidra.app.util.bin.ByteProviderWrapper;
import ghidra.formats.gfilesystem.*;
import ghidra.formats.gfilesystem.annotations.FileSystemInfo;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
/**
* A {@link GFileSystem} implementation LZFSE compressed files
*
* @see <a href="https://github.com/lzfse/lzfse">lzfse reference implementation</a>
*/
@FileSystemInfo(type = "lzfse", description = "LZFSE", factory = LzfseFileSystemFactory.class, priority = FileSystemInfo.PRIORITY_HIGH)
public class LzfseFileSystem implements GFileSystem {
private FSRLRoot fsFSRL;
private SingleFileSystemIndexHelper fsIndex;
private FileSystemRefManager fsRefManager = new FileSystemRefManager(this);
private ByteProvider decompressedProvider;
/**
* Creates a new {@link LzfseFileSystem}.
* <p>
* NOTE: Successful completion of this constructor will result in {@code decompressedFile}
* being deleted.
*
* @param fsrlRoot This filesystem's {@link FSRLRoot}
* @param decompressedFile The decompressed lzfse {@link File file} (will be deleted after use)
* @param fsService The {@link FileSystemService}
* @param monitor {@link TaskMonitor}
* @throws IOException If there was an IO-related error
* @throws CancelledException If the user cancelled the operation
*/
public LzfseFileSystem(FSRLRoot fsrlRoot, File decompressedFile, FileSystemService fsService,
TaskMonitor monitor) throws IOException, CancelledException {
monitor.setMessage("Decompressing LZFSE...");
this.fsFSRL = fsrlRoot;
String name = "lzfse_decompressed";
decompressedProvider =
fsService.pushFileToCache(decompressedFile, fsFSRL.appendPath(name), monitor);
fsIndex = new SingleFileSystemIndexHelper(this, fsFSRL, name,
decompressedProvider.length(), decompressedProvider.getFSRL().getMD5());
}
@Override
public FSRLRoot getFSRL() {
return fsFSRL;
}
@Override
public String getName() {
return fsFSRL.getContainer().getName();
}
@Override
public FileSystemRefManager getRefManager() {
return fsRefManager;
}
@Override
public boolean isClosed() {
return decompressedProvider == null;
}
@Override
public void close() throws IOException {
fsRefManager.onClose();
if (decompressedProvider != null) {
decompressedProvider.close();
decompressedProvider = null;
}
fsIndex.clear();
}
@Override
public ByteProvider getByteProvider(GFile file, TaskMonitor monitor) throws IOException {
return fsIndex.isPayloadFile(file)
? new ByteProviderWrapper(decompressedProvider, file.getFSRL())
: null;
}
@Override
public List<GFile> getListing(GFile directory) throws IOException {
return fsIndex.getListing(directory);
}
@Override
public GFile lookup(String path) throws IOException {
return fsIndex.lookup(path);
}
}
@@ -0,0 +1,145 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.file.formats.lzfse;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import ghidra.app.util.bin.ByteProvider;
import ghidra.formats.gfilesystem.*;
import ghidra.formats.gfilesystem.factory.GFileSystemFactoryByteProvider;
import ghidra.formats.gfilesystem.factory.GFileSystemProbeBytesOnly;
import ghidra.framework.Application;
import ghidra.framework.OperatingSystem;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
/**
* Factory to identify and create instances of a {@link LzfseFileSystem}
*
* @see <a href="https://github.com/lzfse/lzfse">lzfse reference implementation</a>
*/
public class LzfseFileSystemFactory
implements GFileSystemFactoryByteProvider<LzfseFileSystem>, GFileSystemProbeBytesOnly {
private static final int START_BYTES_REQUIRED = 4;
private static final String LZFSE_NATIVE_BINARY_NAME = "lzfse";
private static final String LZFSE_TEMP_PREFIX = "lzfse";
private static final int LZFSE_NATIVE_TIMEOUT_SECONDS = 10;
private static final int LZFSE_ENDOFSTREAM_BLOCK_MAGIC = 0x24787662; // bvx$ (end of stream)
private static final int LZFSE_UNCOMPRESSED_BLOCK_MAGIC = 0x2d787662; // bvx- (raw data)
private static final int LZFSE_COMPRESSEDV1_BLOCK_MAGIC = 0x31787662; // bvx1 (lzfse compressed, uncompressed tables)
private static final int LZFSE_COMPRESSEDV2_BLOCK_MAGIC = 0x32787662; // bvx2 (lzfse compressed, compressed tables)
private static final int LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC = 0x6e787662; // bvxn (lzvn compressed)
@Override
public int getBytesRequired() {
return START_BYTES_REQUIRED;
}
@Override
public boolean probeStartBytes(FSRL containerFSRL, byte[] startBytes) {
int startValue = ByteBuffer.wrap(startBytes).order(ByteOrder.LITTLE_ENDIAN).getInt();
return switch (startValue) {
case LZFSE_ENDOFSTREAM_BLOCK_MAGIC:
case LZFSE_UNCOMPRESSED_BLOCK_MAGIC:
case LZFSE_COMPRESSEDV1_BLOCK_MAGIC:
case LZFSE_COMPRESSEDV2_BLOCK_MAGIC:
case LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC:
yield true;
default:
yield false;
};
}
@Override
public GFileSystem create(FSRLRoot targetFSRL, ByteProvider byteProvider,
FileSystemService fsService, TaskMonitor monitor)
throws IOException, CancelledException {
File compressedFile = null;
File decompressedFile = null;
try {
compressedFile =
fsService.createPlaintextTempFile(byteProvider, LZFSE_TEMP_PREFIX, monitor);
decompressedFile = lzfseDecompress(compressedFile);
return new LzfseFileSystem(targetFSRL, decompressedFile, fsService, monitor);
}
finally {
byteProvider.close();
if (compressedFile != null && compressedFile.exists()) {
compressedFile.delete();
}
if (decompressedFile != null && decompressedFile.exists()) {
decompressedFile.delete();
}
}
}
/**
* Uses the native lzfse decompressor to decompress the given compressed file
*
* @param compressedFile The lzfse-compressed {@link File file} to decompress
* @return The lzfse-decompressed {@link File}
* @throws IOException If there was an IO-related error
*/
private File lzfseDecompress(File compressedFile) throws IOException {
String lzfseName = LZFSE_NATIVE_BINARY_NAME;
if (OperatingSystem.CURRENT_OPERATING_SYSTEM.equals(OperatingSystem.WINDOWS)) {
lzfseName += ".exe";
}
File lzfseNativeBinary = Application.getOSFile(lzfseName);
File decompressedFile = Application.createTempFile(LZFSE_TEMP_PREFIX,
Long.toString(System.currentTimeMillis()));
List<String> command = new ArrayList<>();
command.add(lzfseNativeBinary.getPath());
command.add("-decode");
command.add("-i");
command.add(compressedFile.getPath());
command.add("-o");
command.add(decompressedFile.getPath());
Process p = new ProcessBuilder(command).start();
boolean success = false;
try {
if (!p.waitFor(LZFSE_NATIVE_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
p.destroyForcibly();
throw new IOException("lzfse native decompressor timed out");
}
if (p.exitValue() != 0) {
throw new IOException(
"lzfse native decompressor failed with exit code: " + p.exitValue());
}
success = true;
return decompressedFile;
}
catch (InterruptedException e) {
throw new IOException(e);
}
finally {
if (!success) {
decompressedFile.delete();
}
}
}
}