I originally premiered this code to the internet in this StackOverflow post, and per a recommendation through one of the comments, I am reposting it here.
I am making a floating-point number set implementation using bitwise operations (for fun of course, I have no idea if this is performant). Feel free to render thoughts, and of course check out the original post if you are interested in the question I had about it. The main class (NumberSet) is the last class in the file.
/*
* This is pretty complicated and otherwise minimally commented so I feel as though I owe a bit of an explanation here.
* We want an intrinsically sorted set that can contain ANY valid JS number. So we first convert the number into bytes,
* by sharing an ArrayBuffer between a Uint8Array and Float64Array. Then we use the first 7 bytes, going from MSB to
* LSB, to traverse through a tree until we find the "ByteSet" that defines the values within the set for the
* last 8 bits. An optimization is also implemented where if it is known that all bytes after the one represented by
* the current node being traversed are 0, the presence of that exact number within the set is stored in that node
* and NO children are made. This takes advantage of the fact that integers in IEEE754 will typically end in long runs
* of 0.
*/
/**
* A simple set that can contain values from 0 to 255 (inclusive).
*/
class ByteSet {
private readonly data;
constructor() {
// 256 bits can fit in 32 bytes
this.data = new Uint8Array(32);
}
has(index: number): boolean {
const flag = 1 << (index & 7);
return (this.data[index >> 3] & flag) !== 0;
}
add(index: number): void {
this.set(index, true);
}
remove(index: number): void {
this.set(index, false);
}
private set(index: number, value: boolean): void {
const flag = 1 << (index & 7);
index >>= 3;
if (value) {
this.data[index] |= flag;
} else {
this.data[index] &= (~flag);
}
}
/**
* Iterates over this set and yields values that arise when a value in this set is inserted into ``buf8`` at
* ``offset`` and evaluated as a float through ``buf64``. This is used by ``NumberSetNode`` to permute through
* stored least significant bytes given the common upper 7 bytes of a float.
*/
*iterator(buf8: Uint8Array, buf64: Float64Array, offset: number): Generator<number> {
let head: number = -1;
let base: number;
let flag: number;
while ((++head) < 32) {
flag = this.data[head];
if (flag === 0) continue;
base = head << 3;
for (let i=0; i < 8; i++) {
if (flag & (1 << i)) {
buf8[offset] = (base | i);
yield buf64[0];
}
}
}
}
}
/*
It is imperative that we probe the platform's endianness so that we know which byte is most significant.
*/
const IS_BIG_ENDIAN: boolean = (() => {
const ab = new ArrayBuffer(2);
const u8 = new Uint8Array(ab);
const u16 = new Uint16Array(ab);
u8[0] = 0xAA;
u8[1] = 0xBB;
return u16[0] === 0xAABB;
})();
/*
If BE: Traverse from 0 (MSB) to 7 (LSB)
If LE: Traverse from 7 (MSB) to 0 (LSB)
*/
const TRAVERSAL_DIRECTION: 1 | -1 = IS_BIG_ENDIAN ? 1 : -1;
const TRAVERSAL_START: 0 | 7 = IS_BIG_ENDIAN ? 0 : 7;
const TRAVERSAL_END: 0 | 7 = IS_BIG_ENDIAN ? 7 : 0;
/**
* A node in the tree of a NumberSet. This covers the general case of diverting calls to the appropriate
* NumberSetNode or ByteSet, or using the stored value when appropriate.
*/
class NumberSetNode {
// @ts-ignore The value IS defined in the constructor, but though invocation of clear0
private value: boolean;
// @ts-ignore The value IS defined in the constructor, but though invocation of clear0
private children: { [index: number]: NumberSetNode | ByteSet };
constructor() {
this.clear0();
}
protected clear0(): void {
this.value = false;
this.children = {};
}
protected has0(bytes: Uint8Array, index: number, atWhichAllZero: number): boolean {
if (index === atWhichAllZero) return this.value;
const determinant: number = bytes[index];
const child = this.children[determinant];
if (typeof child === "undefined") return false;
index += TRAVERSAL_DIRECTION;
if (index === TRAVERSAL_END) {
return (child as ByteSet).has(bytes[index]);
} else {
return (child as NumberSetNode).has0(bytes, index, atWhichAllZero);
}
}
protected add0(bytes: Uint8Array, index: number, atWhichAllZero: number): void {
if (index === atWhichAllZero) {
this.value = true;
return;
}
const determinant: number = bytes[index];
let child: NumberSetNode | ByteSet = this.children[determinant];
index += TRAVERSAL_DIRECTION;
const isTail: boolean = (index === TRAVERSAL_END);
if (typeof child === "undefined") {
child = isTail ? new ByteSet() : new NumberSetNode();
this.children[determinant] = child;
}
if (isTail) {
(child as ByteSet).add(bytes[index]);
} else {
(child as NumberSetNode).add0(bytes, index, atWhichAllZero);
}
}
protected remove0(bytes: Uint8Array, index: number, atWhichAllZero: number): void {
if (index === atWhichAllZero) {
this.value = false;
return;
}
const determinant: number = bytes[index];
const child: NumberSetNode | ByteSet = this.children[determinant];
if (typeof child === "undefined") return;
index += TRAVERSAL_DIRECTION;
if (index === TRAVERSAL_END) {
(child as ByteSet).remove(bytes[index]);
} else {
(child as NumberSetNode).remove0(bytes, index, atWhichAllZero);
}
}
protected *iterator(buf8: Uint8Array, buf64: Float64Array, offset: number): Generator<number> {
if (this.value) {
// Fill remaining bytes with 0 in case a previous iteration would have placed information in this area.
// If this case is not triggered, a fill is not necessary since a complete overwrite is guaranteed.
IS_BIG_ENDIAN ? buf8.fill(0, offset, 8) : buf8.fill(0, 0, offset + 1);
yield buf64[0];
}
const next: number = offset + TRAVERSAL_DIRECTION;
const isTail: boolean = next === TRAVERSAL_END;
let child: NumberSetNode | ByteSet;
for (let k of Object.keys(this.children)) {
buf8[offset] = parseInt(k);
child = this.children[k as unknown as number];
if (isTail) {
for (let n of (child as ByteSet).iterator(buf8, buf64, next)) yield n;
} else {
for (let n of (child as NumberSetNode).iterator(buf8, buf64, next)) yield n;
}
}
}
}
/**
* An iterable set that can contain any number representable in JS
*/
export class NumberSet extends NumberSetNode implements Iterable<number> {
private readonly buf8: Uint8Array;
private readonly buf64: Float64Array;
private valueFlags: number = 0;
constructor() {
super();
const buf = new ArrayBuffer(8);
this.buf8 = new Uint8Array(buf);
this.buf64 = new Float64Array(buf);
}
/**
* NumberSet inherits has0, add0 and remove0 from NumberSetNode. This method converts a number
* to the 3 arguments that those functions require: a Uint8Array containing the IEEE754 representation
* of the number, an offset into the array that is significant for the current node, and an offset into the
* array at which all further values are 0.
*/
private parameterize(n: number): [ Uint8Array, number, number ] {
if (isNaN(n)) throw new Error("Cannot add NaN to NumberSet");
if (n === 0) n = 0; // JS says -0 === 0
this.buf64[0] = n;
let atWhichAllZero: number = TRAVERSAL_END + TRAVERSAL_DIRECTION;
let i: number = atWhichAllZero;
while (i !== TRAVERSAL_START) {
i -= TRAVERSAL_DIRECTION;
if (this.buf8[i] !== 0) break;
atWhichAllZero = i;
}
return [ this.buf8, TRAVERSAL_START, atWhichAllZero ];
}
/**
* Returns true if the NumberSet contains the given number
*/
has(n: number): boolean {
return this.has0(...this.parameterize(n));
}
/**
* Adds the given number to the NumberSet
*/
add(n: number): void {
if (n < 0) this.valueFlags |= 1; // Remember if this set may contain negative values
if (n !== Math.trunc(n)) this.valueFlags |= 2; // Remember if this set may contain fractional values
return this.add0(...this.parameterize(n));
}
/**
* Removes the given number from the NumberSet
*/
remove(n: number): void {
return this.remove0(...this.parameterize(n));
}
/**
* Clears the NumberSet
*/
clear(): void {
this.valueFlags = 0;
this.clear0();
}
[Symbol.iterator](): Iterator<number> {
return this.iterator(this.buf8, this.buf64, TRAVERSAL_START);
}
/**
* Similar to ``Array.from(this)``, but is guaranteed to be efficiently sorted.
*/
toArray(): number[] {
/*
* JS guarantees that numeric object keys will be sorted lexicographically. This allows us to take a shortcut.
* https://stackoverflow.com/questions/78731915/viable-to-sort-ieee754-floats-by-msb
*/
let arr: Float64Array | number[];
let typed: boolean;
// We want to return a number[] always for API stability, but the code path for ``this.valueFlags & 1`` takes
// advantage of efficient methods that only exist on Float64Array. So we keep track of whether or not the array
// must be converted to a number[].
if (this.valueFlags & 1) {
arr = new Float64Array(this);
if (arr.length < 2) return Array.from(arr);
typed = true;
// Negative values will appear after positive values during collection, flip them.
let firstNegativeValue: number = (arr.length >> 1);
let isNegative: boolean;
while (true) {
isNegative = arr[firstNegativeValue] < 0;
if (isNegative) {
if (firstNegativeValue === 0) return Array.from(arr);
isNegative = arr[firstNegativeValue - 1] < 0;
if (!isNegative) break;
firstNegativeValue--;
} else if ((++firstNegativeValue) >= arr.length) {
this.valueFlags ^= 1;
return Array.from(arr);
}
}
const negativeValueCount: number = arr.length - firstNegativeValue;
const positiveValues: Float64Array = arr.slice(0, firstNegativeValue);
arr.copyWithin(0, firstNegativeValue, arr.length);
if (negativeValueCount > 1) arr.subarray(0, negativeValueCount).reverse();
arr.set(positiveValues, negativeValueCount);
} else {
arr = Array.from(this);
if (arr.length < 2) return arr;
typed = false;
}
if (this.valueFlags & 2) {
// This shortcut may not work for some fractional numbers, sort if not already sorted.
let last: number = arr[0];
let cur: number;
for (let i = 1; i < arr.length; i++) {
cur = arr[i];
if (cur < last) {
arr = arr.sort();
break;
}
last = cur;
}
}
return typed ? Array.from(arr) : arr as number[];
}
}
EDIT: Post is outdated, you can see the latest iteration of the code on a GitHub repository here