Range coder implementation provided here is not the same as the traditional arithmetic coder covered by historical patents. Range coding was developed as a patent-free alternative to arithmetic coding. Implemented by the book. Performance and memory footprint - no goals. Simplicity is.
Range coder with simple adaptive frequency model.
#include <errno.h>
#include <stdint.h>
#include <string.h>
// https://gist.github.com/leok7v/9e09be9319a77a3d7e2da4d780d9905f
enum { // https://en.wikipedia.org/wiki/Range_coding
range_coder_bits = 15,
range_coder_sym_max = 1u << range_coder_bits,
};
struct range_coder {
// state:
uint64_t low;
uint64_t range;
uint64_t code;
// adaptive model:
uint64_t freq[range_coder_sym_max]; // Symbol frequencies
uint64_t tree[range_coder_sym_max + 1]; // Fenwick Tree (1-based indexing)
uint64_t total; // sum of all freq
// stream:
void* stream;
errno_t (*write)(struct range_coder* rc, uint8_t b);
errno_t (*read)(struct range_coder* rc, uint8_t* b);
};
static void rc_ft_init(uint64_t* tree, size_t size) { // Fenwick Tree
memset(tree, 0, sizeof(uint64_t) * size);
}
static void rc_ft_update(uint64_t* tree, size_t size, uint32_t i, int64_t v) {
// Update Fenwick Tree at position index 'i' by value 'v'
i++; // Convert to 1-based i
while (i < size) {
tree[i] += v;
i += i & (~i + 1); // Extract least significant set bit
}
}
static uint64_t rc_ft_query(uint64_t* tree, uint32_t i) {
uint64_t sum = 0; // cumulative frequency up to and including 'i'
i++; // Convert to 1-based i
while (i > 0) {
sum += tree[i];
i -= i & (~i + 1); // Extract least significant set bit
}
return sum;
}
static void rc_am_update(struct range_coder* rc, uint16_t symbol) {
// Update the rc after encoding/decoding a s
if (rc->total >= INT64_MAX / 2) {
// Rescale freq to prevent overflow
rc->total = 0;
rc_ft_init(rc->tree, range_coder_sym_max + 1);
for (uint32_t i = 0; i < range_coder_sym_max; i++) {
rc->freq[i] = (rc->freq[i] + 1) / 2;
rc_ft_update(rc->tree, range_coder_sym_max + 1, i, rc->freq[i]);
rc->total += rc->freq[i];
}
}
rc->freq[symbol]++;
rc_ft_update(rc->tree, range_coder_sym_max + 1, symbol, 1);
rc->total++;
}
static void rc_init_am(struct range_coder* rc) { // init adaptive model
rc->total = 0;
for (uint32_t i = 0; i < range_coder_sym_max; i++) {
rc->freq[i] = 1;
}
rc_ft_init(rc->tree, range_coder_sym_max + 1);
for (uint32_t i = 0; i < range_coder_sym_max; i++) {
rc_ft_update(rc->tree, range_coder_sym_max + 1, i, rc->freq[i]);
rc->total += rc->freq[i];
}
}
static void rc_init_encoder(struct range_coder* rc) {
rc->code = 0;
rc->low = 0;
rc->range = UINT64_MAX;
rc_init_am(rc);
}
static errno_t rc_init_decoder(struct range_coder* rc) {
uint64_t code = 0; // first 8 bytes
errno_t r = 0;
for (int i = 0; i < 8 && r == 0; i++) {
uint8_t b = 0;
r = rc->read(rc, &b);
code = (code << 8) | b;
}
if (r == 0) {
rc->code = code; // first 64 bits from input stream as is
rc->low = 0;
rc->range = UINT64_MAX;
rc_init_am(rc);
}
return r;
}
#define range_coder_top (1uLL << (range_coder_bits * 3 - 1))
#define range_coder_bottom (1uLL << (range_coder_bits * 3 - 5))
errno_t rc_encode(struct range_coder* rc, uint16_t symbol) {
uint64_t total = rc->total;
uint64_t cum_freq = symbol > 0 ? rc_ft_query(rc->tree, symbol - 1) : 0;
uint64_t freq = rc->freq[symbol];
uint64_t r = rc->range / total;
rc->low += r * cum_freq;
rc->range = r * freq;
while (1) {
if ((rc->low ^ (rc->low + rc->range)) < range_coder_top) {
errno_t r = rc->write(rc, rc->low >> (64 - 8));
if (r != 0) { return r; }
rc->range <<= 8;
rc->low <<= 8;
} else if (rc->range < range_coder_bottom) {
rc->range <<= 8;
rc->low <<= 8;
errno_t r = rc->write(rc, rc->low >> (64 - 8));
if (r != 0) { return r; }
} else {
break;
}
}
rc_am_update(rc, symbol);
return 0;
}
errno_t rc_flush(struct range_coder* rc) { // finish encode
for (int i = 0; i < 8; i++) {
errno_t r = rc->write(rc, rc->low >> (64 - 8));
if (r != 0) { return r; }
rc->low <<= 8;
}
return 0;
}
errno_t rc_decode(struct range_coder* rc, uint16_t* symbol) {
uint64_t total = rc->total;
uint64_t r = rc->range / total;
uint64_t count = (rc->code - rc->low) / r;
// Binary search to find the s
uint16_t s = 0;
uint32_t low = 0, high = range_coder_sym_max - 1;
while (low <= high) {
uint32_t mid = (low + high) / 2;
uint64_t cum_freq = rc_ft_query(rc->tree, mid);
if (cum_freq <= count) {
low = mid + 1;
} else {
high = mid - 1;
}
}
s = low;
uint64_t cum_freq_symbol = s > 0 ? rc_ft_query(rc->tree, s - 1) : 0;
uint64_t freq = rc->freq[s];
rc->low += r * cum_freq_symbol;
rc->range = r * freq;
while (1) {
if ((rc->low ^ (rc->low + rc->range)) < range_coder_top) {
rc->range <<= 8;
rc->low <<= 8;
uint8_t c = 0;
errno_t r = rc->read(rc, &c);
if (r != 0) { return r; }
rc->code = (rc->code << 8) | (uint8_t)c;
} else if (rc->range < range_coder_bottom) {
rc->range <<= 8;
rc->low <<= 8;
uint8_t c = 0;
errno_t r = rc->read(rc, &c);
if (r != 0) { return r; }
rc->code = (rc->code << 8) | (uint8_t)c;
} else {
break;
}
}
rc_am_update(rc, s);
*symbol = s;
return 0;
}
// test
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
static inline errno_t write_byte(struct range_coder* rc, uint8_t b) {
size_t written = fwrite(&b, 1, 1, (FILE*)rc->stream);
return written == 1 ? 0 : errno;
}
static inline errno_t read_byte(struct range_coder* rc, uint8_t* b) {
size_t read = fread(b, 1, 1, (FILE*)rc->stream);
return read == 1 ? 0 : errno;
}
static errno_t test(void) {
errno_t r = 0;
uint16_t data[] = {1234, 2345, 3456, 4567, 8901, 9012};
size_t data_size = sizeof(data) / sizeof(data[0]);
{
FILE* out = fopen("encoded.bin", "wb");
if (!out) {
perror("Failed to create and open output file");
return 1;
}
static struct range_coder rc_encoder;
rc_init_encoder(&rc_encoder);
rc_encoder.stream = out;
rc_encoder.write = write_byte;
for (size_t i = 0; i < data_size && r == 0; i++) {
r = rc_encode(&rc_encoder, data[i]);
}
if (r == 0) {
r = rc_flush(&rc_encoder);
}
if (fclose(out) != 0 && r == 0) {
r = errno;
}
if (r != 0) {
fprintf(stderr, "i/o error %d %s", r, strerror(r));
return r;
}
}
{ // decode:
FILE* in = fopen("encoded.bin", "rb");
if (in == NULL) {
perror("Failed to open input file");
return 1;
}
static struct range_coder rc_decoder;
rc_decoder.stream = in;
rc_decoder.read = read_byte;
r = rc_init_decoder(&rc_decoder);
if (r != 0) {
fprintf(stderr, "i/o error %d %s", r, strerror(r));
return r;
}
for (size_t i = 0; i < data_size && r == 0; i++) {
uint16_t symbol = 0;
r = rc_decode(&rc_decoder, &symbol);
if (r == 0) {
if (data[i] != symbol) {
fprintf(stderr, "expected %u got %u\n", data[i], symbol);
r = EINVAL;
} else {
printf("Decoded symbol: %u\n", symbol);
}
}
}
fclose(in); // not expected to fail for input file ever
if (r != 0) {
fprintf(stderr, "i/o error %d %s", r, strerror(r));
return r;
}
}
return r;
}
for (uint32_t i = 0; i < range_coder_sym_max; i++) {use typeuint32_tfori? I could seesize_t,unsigned, or evenint, but where diduint32_tcome from? \$\endgroup\$