/*
* Copyright (c) 2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#ifndef __COMPRESSION_H
#define __COMPRESSION_H
#include <stdint.h>
#include <stddef.h>
#include <os/base.h>
/*!
* @enum compression_algorithm_t
* @abstract Tag used to select a compression algorithm.
* @discussion Further details on the supported formats, and their implementation:
*
* - LZ4 is an extremely high-performance compressor. The open source version
* is already one of the fastest compressors of which we are aware, and we
* have optimized it still further in our implementation. The encoded format
* we produce and consume is compatible with the open source version, except
* that we add a very simple frame to the raw stream to allow some additional
* validation and functionality.
*
* The frame is documented here so that you can easily wrap another LZ4
* encoder/decoder to produce/consume the same data stream if necessary. An
* LZ4 encoded buffer is a sequence of blocks, each of which begins with a
* header. There are three possible headers:
*
* a "compressed block header" is (hex) 62 76 34 31, followed by the
* size in bytes of the decoded (plaintext) data represented by the
* block and the size (in bytes) of the encoded data stored in the
* block. Both size fields are stored as (possibly unaligned) 32-bit
* little-endian values. The compressed block header is followed
* immediately by the actual lz4-encoded data stream.
*
* an "uncompressed block header" is (hex) 62 76 34 2d, followed by the
* size of the data stored in the uncompressed block as a (possibly
* unaligned) 32-bit little-endian value. The uncompressed block header
* is followed immediately by the uncompressed data buffer of the
* specified size.
*
* an "end of stream header" is (hex) 62 76 34 24, and marks the end
* of the lz4 frame. No further data may be written or read beyond
* this header.
*
* - SMB (Server Message Block) is a protocol for sharing files, printers
* and other abstractions over a computer network. SMB supports compression
* to speed up transfers. The following SMB compression algorithms are
* supported:
*
* ---------------|---------|---------|-------|---------------------------
* Algorithm | Encoder | Decoder | Ratio | Encoder / decoder memory
* ---------------|---------|---------|-------|---------------------------
* LZ77 | fastest | fastest | 2.3x | 66 KB / 0 KB
* LZ77+Huffman | slowest | slowest | 2.8x | 172 KB / 6 KB
* LZNT1 | fast | fastest | 2.0x | 33 KB / 0 KB
* ---------------|---------|---------|-------|---------------------------
*/
typedef enum{
COMPRESSION_LZ4 = 0x100, // LZ4 + simple frame format (buffer + stream API)
COMPRESSION_LZ4_RAW = 0x101, // LZ4 (buffer API only)
COMPRESSION_SMB_LZNT1 = 0xC00, // SMB LZNT1 (buffer API only)
COMPRESSION_SMB_LZ77 = 0xC10, // SMB LZ77 (buffer API only)
COMPRESSION_SMB_LZ77H = 0xC20, // SMB LZ77-HUFF (buffer API only)
} compression_algorithm_t;
// =================================================================================================================
#pragma mark - Buffer API
/*!
* @abstract Get the minimum scratch buffer size for the specified compression algorithm encoder.
* @param algorithm The compression algorithm for which the scratch space will be used.
* @return The number of bytes to allocate as a scratch buffer for use to encode with the specified
* compression algorithm. This number may be 0.
*/
typedef size_t (*compression_encode_scratch_buffer_size_proc)
(compression_algorithm_t algorithm);
/*!
* @abstract Compresses a buffer.
* @param dst_buffer Pointer to the first byte of the destination buffer.
* @param dst_size Size of the destination buffer in bytes.
* @param src_buffer Pointer to the first byte of the source buffer.
* @param src_size Size of the source buffer in bytes.
* @param scratch_buffer A pointer to scratch space that the routine can use for temporary
* storage during compression. To determine how much space to allocate for this
* scratch space, call compression_encode_scratch_buffer_size(algorithm). Scratch space
* may be re-used across multiple (serial) calls to _encode and _decode.
* Can be NULL, if an algorithm does not need any scratch space.
* @param algorithm The compression algorithm to be used.
* @return The number of bytes written to the destination buffer if the input is
* is successfully compressed. If the entire input cannot be compressed to fit
* into the provided destination buffer, or an error occurs, 0 is returned.
*/
typedef size_t (*compression_encode_buffer_proc)
(uint8_t* dst_buffer, size_t dst_size,
const uint8_t* src_buffer, size_t src_size,
void* scratch_buffer, compression_algorithm_t algorithm);
/*!
* @abstract Get the minimum scratch buffer size for the specified compression algorithm decoder.
* @param algorithm The compression algorithm for which the scratch space will be used.
* @return The number of bytes to allocate as a scratch buffer for use to decode with the specified
* compression algorithm. This number may be 0.
*/
typedef size_t (*compression_decode_scratch_buffer_size_proc)
(compression_algorithm_t algorithm);
/*!
* @abstract Decompresses a buffer.
* @param dst_buffer Pointer to the first byte of the destination buffer.
* @param dst_size Size of the destination buffer in bytes.
* @param src_buffer Pointer to the first byte of the source buffer.
* @param src_size Size of the source buffer in bytes.
* @param scratch_buffer A pointer to scratch space that the routine can use for temporary
* storage during decompression. To determine how much space to allocate for this
* scratch space, call compression_decode_scratch_buffer_size(algorithm). Scratch space
* may be re-used across multiple (serial) calls to _encode and _decode.
* Can be NULL, if an algorithm does not need any scratch space.
* @param algorithm The compression algorithm to be used.
* @return The number of bytes written to the destination buffer if the input is
* is successfully decompressed. If there is not enough space in the destination
* buffer to hold the entire expanded output, only the first dst_size bytes will
* be written to the buffer and dst_size is returned. Note that this behavior
* differs from that of compression_encode. If an error occurs, 0 is returned.
* SMB algorithms do not support truncated decodes.
* SMB algorithms expect src_size to be exactly the size of the compressed input.
*/
typedef size_t (*compression_decode_buffer_proc)
(uint8_t* dst_buffer, size_t dst_size,
const uint8_t* src_buffer, size_t src_size,
void* scratch_buffer, compression_algorithm_t algorithm);
// =================================================================================================================
#pragma mark - Stream API
/* Return values for the compression_stream functions. */
typedef enum{
COMPRESSION_STATUS_OK = 0,
COMPRESSION_STATUS_ERROR = -1,
COMPRESSION_STATUS_END = 1,
} compression_status_t;
typedef enum{
COMPRESSION_STREAM_ENCODE = 0, /* Encode to a compressed stream */
COMPRESSION_STREAM_DECODE = 1, /* Decode from a compressed stream */
} compression_stream_operation_t;
/* Bits for the flags in compression_stream_process. */
typedef enum{
COMPRESSION_STREAM_FINALIZE = 0x0001,
} compression_stream_flags_t;
typedef struct{
/*
* You are partially responsible for management of the dst_ptr,
* dst_size, src_ptr, and src_size fields. You must initialize
* them to describe valid memory buffers before making a call to
* compression_stream_process. compression_stream_process will update
* these fields before returning to account for the bytes of the src
* and dst buffers that were successfully processed.
*/
uint8_t* dst_ptr;
size_t dst_size;
const uint8_t* src_ptr;
size_t src_size;
/* The stream state object is managed by the compression_stream functions.
* You should not ever directly access this field. */
void* state;
} compression_stream_t;
/* There are two critical features of the stream interfaces:
*
* - They allow encoding and decoding to be resumed from where it ended
* when the end of a source or destination block was reached.
*
* - When resuming, the new source and destination blocks need not be
* contiguous with earlier blocks in the stream; all necessary state
* to resume compression is represented by the compression_stream_t object.
*
* These two properties enable tasks like:
*
* - Decoding a compressed stream into a buffer with the ability to grow
* the buffer and resume decoding if the expanded stream is too large
* to fit without repeating any work.
*
* - Encoding a stream as pieces of it become available without ever needing
* to create an allocation large enough to hold all the uncompressed data.
*
* The basic workflow for using the stream interface is as follows:
*
* 1. initialize the state of your compression_stream object by calling
* compression_stream_init with the operation parameter set to specify
* whether you will be encoding or decoding, and the chosen algorithm
* specified by the algorithm parameter. This will allocate storage
* for the state that allows encoding or decoding to be resumed
* across calls.
*
* 2. set the dst_buffer, dst_size, src_buffer, and src_size fields of
* the compression_stream object to point to the next blocks to be
* processed.
*
* 3. call compression_stream_process. If no further input will be added
* to the stream via subsequent calls, finalize should be non-zero.
* If compression_stream_process returns COMPRESSION_STATUS_END, there
* will be no further output from the stream.
*
* 4. repeat steps 2 and 3 as necessary to process the entire stream.
*
* 5. call compression_stream_destroy to free the state object in the
* compression_stream.
*/
/*!
* @abstract Initialize a compression_stream for
* encoding (if operation is COMPRESSION_STREAM_ENCODE) or
* decoding (if operation is COMPRESSION_STREAM_DECODE).
* @param stream Pointer to the compression_stream object to be initialized.
* @param operation Specifies whether the stream is to initialized for encoding or decoding.
* Must be either COMPRESSION_STREAM_ENCODE or COMPRESSION_STREAM_DECODE.
* @param algorithm The compression algorithm to be used. Must be one of the values specified
* in the compression_algorithm_t enum.
* @discussion This call initializes all fields of the compression_stream to zero, except for state;
* this routine allocates storage to capture the internal state of the encoding or decoding
* process so that it may be resumed. This storage is tracked via the state parameter.
* @return COMPRESSION_STATUS_OK if the stream was successfully initialized, or
* COMPRESSION_STATUS_ERROR if an error occurred.
*/
typedef compression_status_t (*compression_stream_init_proc)
(compression_stream_t* stream,
compression_stream_operation_t operation,
compression_algorithm_t algorithm);
/*!
* @abstract Functionally equivalent to compression_stream_destroy then compression_stream_init, but keeps the allocated state buffer.
* @return Status of the virtual compression_stream_init call
*/
typedef compression_status_t (*compression_stream_reinit_proc)
(compression_stream_t* stream,
compression_stream_operation_t operation,
compression_algorithm_t algorithm);
/*!
* @abstract Cleans up state information stored in a compression_stream object.
* @discussion Use this to free memory allocated by compression_stream_init. After calling
* this function, you will need to re-init the compression_stream object before
* using it again.
*/
typedef compression_status_t (*compression_stream_destroy_proc)
(compression_stream_t* stream);
/*!
* @abstract Encodes or decodes a block of the stream.
* @param stream Pointer to the compression_stream object to be operated on. Before calling
* this function, you must initialize the stream object by calling
* compression_stream_init, and setting the user-managed fields to describe your
* input and output buffers. When compression_stream_process returns, those
* fields will have been updated to account for the bytes that were successfully
* encoded or decoded in the course of its operation.
* @param flags Binary OR of zero or more compression_stream_flags:
* COMPRESSION_STREAM_FINALIZE
* If set, indicates that no further input will be added to the stream, and
* thus that the end of stream should be indicated if the input block is
* completely processed.
* @discussion Processes the buffers described by the stream object until the source buffer
* becomes empty, or the destination buffer becomes full, or the entire stream is
* processed, or an error is encountered.
* @return When encoding COMPRESSION_STATUS_END is returned only if all input has been
* read from the source, all output (including an end-of-stream marker) has been
* written to the destination, and COMPRESSION_STREAM_FINALIZE bit is set.
*
* When decoding COMPRESSION_STATUS_END is returned only if all input (including
* and end-of-stream marker) has been read from the source, and all output has
* been written to the destination.
*
* COMPRESSION_STATUS_OK is returned if all data in the source buffer is consumed,
* or all space in the destination buffer is used. In that case, further calls
* to compression_stream_process are expected, providing more data in the source
* buffer, or more space in the destination buffer.
*
* COMPRESSION_STATUS_ERROR is returned if an error is encountered (if the
* encoded data is corrupted, for example).
*
* When decoding a valid stream, the end of stream will be detected from the contents
* of the input, and COMPRESSION_STATUS_END will be returned in that case, even if
* COMPRESSION_STREAM_FINALIZE is not set, or more input is provided.
*
* When decoding a corrupted or truncated stream, if COMPRESSION_STREAM_FINALIZE is not
* set to notify the decoder that no more input is coming, the decoder will not consume
* or produce any data, and return COMPRESSION_STATUS_OK. In that case, the client code
* will call compression_stream_process again with the same state, entering an infinite loop.
* To avoid this, it is strongly advised to always set COMPRESSION_STREAM_FINALIZE when
* no more input is expected, for both encoding and decoding.
*/
typedef compression_status_t (*compression_stream_process_proc)
(compression_stream_t* stream, int flags);
/*!
* @abstract Identify the compression algorithm for the first 4 bytes of compressed data.
* @param data Points to 4 bytes at the beginning of the compressed data.
* @discussion This call identifies the compression algorithm used to generate the given data bytes.
* @return A valid compression_algorithm_t on success, or -1 if the data bytes do not correspond to any supported algorithm.
*/
typedef int (*compression_stream_identify_algorithm_proc)
(const uint8_t* data);
// =================================================================================================================
#pragma mark - Kernel interface
typedef struct{
// Stream API
compression_stream_init_proc compression_stream_init;
compression_stream_reinit_proc compression_stream_reinit;
compression_stream_destroy_proc compression_stream_destroy;
compression_stream_process_proc compression_stream_process;
compression_stream_identify_algorithm_proc compression_stream_identify_algorithm;
// Buffer API
compression_encode_scratch_buffer_size_proc compression_encode_scratch_buffer_size;
compression_encode_buffer_proc compression_encode_buffer;
compression_decode_scratch_buffer_size_proc compression_decode_scratch_buffer_size;
compression_decode_buffer_proc compression_decode_buffer;
} compression_ki_t;
__BEGIN_DECLS
/**
* @abstract The compression interface that was registered.
*/
extern const compression_ki_t * compression_ki_ptr;
/**
* @abstract Registers the compression kext interface for use within the kernel proper.
* @param ki The interface to register.
* @discussion This routine may only be called once and must be called before late-const has been applied to kernel memory.
*/
OS_EXPORT OS_NONNULL1
void compression_interface_register(const compression_ki_t *ki);
#if PRIVATE
typedef void (*registration_callback_t)(void);
void compression_interface_set_registration_callback(registration_callback_t callback);
#endif /* PRIVATE */
__END_DECLS
#endif // __COMPRESSION_H