Proof of concept: 3D Hilbert curves in O(log(n))

Back in my earlier post about 2D Hilbert curves in logarithmic time, I hinted that it would be possible but likely not efficient to extend the same approach to higher dimensions. Just as a proof of concept, I've done just that for the Hilbert Index -> XYZ mapping in 3D. Most of the actual code is auto-generated, and I haven't spent much effort optimizing it in the same way as the 2D version, as it is rather obviously useless in practice. I'm only presenting the 32bit version here, but the code easily extends to arbitrary bit widths with an appropriate large integer library.

Applying the same would also be possible for the XYZ -> Hilbert Index mapping, however the code would be at least 10x-20x slower due to the exponential blowup of the technique, and probably rather tedious to generate.

// Shamelessly borrowed from http://and-what-happened.blogspot.de/2011/08/fast-2d-and-3d-hilbert-curves-and.html
void Morton_3D_Decode_10bit(const uint32_t morton,
uint32_t& index1, uint32_t& index2, uint32_t& index3)
{ // unpack 3 10-bit indices from a 30-bit Morton code
uint32_t value1 = morton;
uint32_t value2 = (value1 >> 1);
uint32_t value3 = (value1 >> 2);
value1 &= 0x09249249;
value2 &= 0x09249249;
value3 &= 0x09249249;
value1 |= (value1 >> 2);
value2 |= (value2 >> 2);
value3 |= (value3 >> 2);
value1 &= 0x030c30c3;
value2 &= 0x030c30c3;
value3 &= 0x030c30c3;
value1 |= (value1 >> 4);
value2 |= (value2 >> 4);
value3 |= (value3 >> 4);
value1 &= 0x0300f00f;
value2 &= 0x0300f00f;
value3 &= 0x0300f00f;
value1 |= (value1 >> 8);
value2 |= (value2 >> 8);
value3 |= (value3 >> 8);
value1 &= 0x030000ff;
value2 &= 0x030000ff;
value3 &= 0x030000ff;
value1 |= (value1 >> 16);
value2 |= (value2 >> 16);
value3 |= (value3 >> 16);
value1 &= 0x000003ff;
value2 &= 0x000003ff;
value3 &= 0x000003ff;
index1 = value1;
index2 = value2;
index3 = value3;
}

void transformFromIndex_parallel(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t& t0, uint32_t& t1, uint32_t& t2, uint32_t& t3)
{
t0 = i2 | (i0 & i1);
t1 = (~i0 & ~i1 & ~i2) | (i0 & i1 & ~i2) | (~i0 & ~i1 & i2);
t2 = ~(i0 & i1) ^ i2;
t3 = (i0 & ~i1 & i2) | (~i0 & i1 & i2);
}

void multiplyTransform_parallel(
uint32_t A0, uint32_t A1, uint32_t A2, uint32_t A3,
uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
uint32_t& C0, uint32_t& C1, uint32_t& C2, uint32_t& C3)
{
C0 = (A0 & ~A1 & ~A2 & B0 & B1 & ~B2 & ~B3) | (A0 & ~A2 & ~A3 & ~B0 & B2) | (A0 & ~A1 & ~A2 & ~A3 & B0 & ~B1 & B3) | (A0 & ~A2 & ~A3 & ~B0 & B1 & B3) | (A0 & A1 & A3 & ~B0 & B1 & ~B2 & ~B3) | (A1 & ~A3 & ~B0 & B1 & ~B2 & ~B3) | (~A0 & A1 & ~A2 & ~A3 & B1 & B3) | (A1 & A2 & B0 & ~B1 & ~B2 & ~B3) | (A0 & ~A1 & A3 & B0 & ~B2 & ~B3) | (~A0 & ~A1 & A3 & B0 & B1 & ~B3) | (A1 & ~A2 & ~A3 & ~B0 & ~B1 & B2) | (~A0 & A1 & A2 & B0 & B2) | (~A0 & A1 & ~A2 & ~B0 & ~B1 & B3) | (A0 & A1 & ~A2 & ~B0 & ~B1 & ~B2) | (~A0 & A1 & ~A3 & B0 & ~B1 & B2) | (~A0 & A2 & ~B0 & ~B1 & B3) | (~A0 & A1 & A3 & ~B0 & B2) | (~A0 & ~A1 & ~A2 & ~A3 & B0) | (A0 & A1 & A3 & ~B1 & ~B3) | (A0 & ~B0 & ~B1 & ~B2 & ~B3) | (A0 & A1 & ~A3 & ~B0 & ~B3) | (~A0 & A2 & B0 & B1 & B3) | (~A1 & A3 & B0 & ~B1 & B3) | (~A0 & A3 & B0 & ~B1 & B3) | (~A0 & ~A1 & A2 & B3) | (A0 & A2 & ~B2 & ~B3) | (~A1 & A2 & B0 & B2) | (~A0 & A3 & B1 & B3) | (~A1 & A3 & B1 & B2);
C1 = (~A0 & A3 & B0 & B1 & ~B2 & ~B3) | (A0 & ~A1 & ~A2 & ~A3 & B0 & ~B1 & B3) | (~A0 & ~A2 & ~A3 & B0 & B1 & B2) | (A0 & A1 & A3 & ~B0 & B1 & ~B2 & ~B3) | (A0 & ~A3 & B0 & ~B1 & ~B2 & ~B3) | (~A0 & ~A1 & A3 & B0 & ~B2 & ~B3) | (~A1 & A2 & ~B0 & B1 & ~B2 & ~B3) | (A0 & ~A1 & A3 & B0 & ~B2 & ~B3) | (A0 & ~A1 & A3 & B1 & ~B2 & ~B3) | (~A1 & ~A2 & ~A3 & ~B0 & B1 & B2) | (~A0 & A1 & A2 & B0 & B2) | (A0 & A1 & ~A2 & ~B0 & ~B1 & ~B2) | (~A0 & A1 & ~A3 & B0 & ~B1 & B2) | (~A0 & A2 & B1 & ~B2 & ~B3) | (~A0 & ~A1 & A2 & ~B1 & B2) | (A1 & ~A2 & ~A3 & ~B1 & B3) | (A0 & ~A1 & ~A2 & ~B0 & B2) | (~A0 & ~A3 & ~B0 & B1 & B3) | (~A0 & ~A1 & ~A3 & B1 & ~B2) | (A0 & A1 & A3 & B1 & B2) | (A0 & ~A3 & ~B0 & ~B1 & B3) | (A1 & ~B0 & ~B1 & ~B2 & ~B3) | (A0 & A1 & ~A3 & ~B1 & ~B2) | (A0 & A1 & ~A2 & ~A3 & ~B1) | (~A0 & A3 & B0 & ~B1 & B3) | (A1 & A3 & ~B0 & B1 & B3) | (~A0 & A3 & ~B1 & B2) | (A0 & A2 & B1 & B2) | (~A1 & B0 & B1 & B3);
C2 = (A0 & ~A1 & A2 & ~B1 & ~B2 & ~B3) | (~A0 & A1 & ~A2 & B0 & B1 & ~B2 & ~B3) | (A0 & A1 & ~A2 & B0 & ~B1 & ~B2 & ~B3) | (A0 & ~A1 & ~A2 & B0 & B1 & ~B2 & ~B3) | (A0 & A1 & A2 & ~B0 & ~B2 & ~B3) | (~A0 & ~A1 & A3 & B0 & ~B1 & B2) | (~A1 & ~A2 & ~A3 & B0 & ~B1 & B2) | (~A0 & A1 & A2 & ~B0 & B1 & B2) | (~A0 & ~A1 & A3 & ~B0 & B1 & ~B3) | (A1 & A3 & B0 & ~B1 & ~B2 & ~B3) | (~A0 & A1 & A2 & B0 & ~B1 & B2) | (A0 & ~A1 & ~A2 & ~B0 & ~B1 & B3) | (~A0 & ~A1 & ~A3 & ~B0 & ~B1 & B2) | (A1 & ~A2 & ~A3 & B0 & B1 & B2) | (~A0 & ~A1 & ~A3 & B0 & B1 & B2) | (A0 & A1 & A3 & ~B0 & B1 & ~B2 & ~B3) | (A0 & A1 & ~A2 & ~B0 & B1 & B3) | (A0 & A1 & A3 & B0 & ~B1 & ~B2) | (A0 & ~A1 & A3 & B1 & ~B2 & ~B3) | (~A1 & ~A2 & ~A3 & ~B0 & B1 & B2) | (A1 & ~A2 & ~A3 & ~B0 & ~B1 & B2) | (~A0 & A1 & ~A2 & ~B0 & ~B1 & B3) | (A2 & ~B0 & ~B1 & ~B2 & ~B3) | (A0 & ~A1 & A2 & B1 & B3) | (A0 & A1 & A2 & B0 & B3) | (~A0 & A2 & B0 & B1 & B3) | (~A1 & A3 & B0 & ~B1 & B3) | (A1 & A3 & ~B0 & B1 & B3);
C3 = (A0 & A1 & ~A2 & ~A3 & ~B0 & B1 & ~B2 & ~B3) | (A0 & ~A1 & ~A2 & ~A3 & B0 & B1 & B2) | (A0 & A1 & ~A2 & ~A3 & ~B0 & ~B1 & B3) | (A0 & A1 & A3 & B0 & B1 & ~B2 & ~B3) | (~A0 & ~A1 & A2 & B0 & ~B1 & ~B3) | (A0 & ~A1 & ~A2 & ~B0 & B1 & B3) | (~A0 & A1 & ~A2 & ~B0 & B1 & B2) | (A0 & ~A1 & ~A2 & ~A3 & B0 & ~B1 & B3) | (~A0 & A1 & ~A3 & B0 & ~B1 & B3) | (A0 & A1 & ~A3 & B0 & ~B1 & B2) | (~A0 & ~A1 & A3 & B0 & ~B2 & ~B3) | (~A1 & A2 & ~B0 & B1 & ~B2 & ~B3) | (~A0 & A1 & ~A2 & ~A3 & B1 & B3) | (A1 & A2 & B0 & ~B1 & ~B2 & ~B3) | (~A0 & ~A1 & A3 & B0 & B1 & ~B3) | (~A0 & ~A1 & ~A2 & ~A3 & B3) | (~A1 & A2 & ~B0 & ~B1 & B3) | (A3 & ~B0 & ~B1 & ~B2 & ~B3) | (A0 & ~A1 & A2 & ~B1 & B2) | (~A2 & ~A3 & B0 & B1 & B3) | (~A0 & A1 & A3 & ~B0 & ~B3) | (A1 & A2 & B0 & B1 & ~B3) | (A0 & A3 & ~B0 & B2);
}

void octantFromIndex_parallel(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t& x, uint32_t& y, uint32_t& z)
{
x = i0 ^ i1;
y = i1 ^ i2;
z = i2;
}

void applyTransform_parallel(uint32_t t0, uint32_t t1, uint32_t t2, uint32_t t3, uint32_t x, uint32_t y, uint32_t z, uint32_t& o0, uint32_t& o1, uint32_t& o2)
{
o2 = (~t3 | t1 | t0 | ~z)&(t3 | t1 | ~t0 | ~y)&(~t3 | ~t2 | ~z | ~y | x)&(~t3 | t2 | t1 | ~t0 | ~x)&(t3 | ~t2 | t1 | t0 | x)&(t3 | ~t2 | ~t1 | t0 | y)&(~t3 | t2 | ~t1 | t0 | y)&(t3 | ~t2 | ~t1 | ~t0 | ~x)&(~t3 | t2 | ~t1 | ~t0 | ~z)&(t3 | t2 | t1 | t0 | z)&(t3 | t2 | ~t1 | t0 | x)&(t3 | t2 | ~t1 | ~t0 | z)&(~t3 | ~t2 | t1 | ~z | y | x)&(~t3 | ~t2 | t1 | z | y | ~x)&(~t3 | ~t2 | t0 | z | ~y | ~x)&(~t3 | ~t2 | ~t1 | ~t0 | y | x)&(~t3 | t0 | ~z | y | ~x)&(~t2 | t1 | t0 | ~y | x);
o1 = (~t3 | ~t2 | ~y | ~x)&(~t3 | ~t1 | t0 | ~x)&(~t3 | t1 | t0 | ~y)&(~t3 | ~t2 | t1 | ~z | ~x)&(t3 | ~t2 | z | x)&(~t3 | ~t2 | t1 | z | ~y)&(~t2 | t1 | t0 | z | ~x)&(~t3 | ~t2 | t0 | z | ~y)&(~t2 | ~t1 | ~t0 | z | ~x)&(~t3 | ~t2 | ~t1 | y | x)&(~t3 | t2 | t1 | ~t0 | ~z)&(~t3 | t2 | ~t1 | ~t0 | y)&(t3 | t2 | t1 | t0 | y)&(t3 | t2 | t1 | ~t0 | ~x)&(t2 | ~t1 | t0 | ~z | ~x)&(t3 | ~t1 | t0 | ~z | x)&(t3 | t2 | ~t1 | ~t0 | ~y)&(~t3 | ~t2 | t1 | ~z | y | x)&(t3 | ~t2 | t1 | ~t0 | ~z | x)&(~t3 | ~t2 | ~t0 | z | y | x);
o0 = (~t3 | ~t2 | ~y | ~x)&(~t2 | ~t1 | t0 | z)&(t2 | t1 | t0 | x)&(t2 | ~t1 | ~t0 | ~x)&(~t3 | t0 | ~z | x)&(~t3 | ~t2 | t1 | ~z | ~x)&(~t3 | ~t2 | t1 | z | ~y)&(~t3 | ~t2 | ~z | ~y | x)&(~t3 | ~t2 | ~t1 | z | ~x)&(t3 | ~t2 | t1 | t0 | y)&(~t3 | t2 | t1 | ~t0 | y)&(~t2 | t1 | ~t0 | ~z | ~x)&(~t3 | t2 | ~t1 | ~z | ~x)&(t3 | ~t2 | ~t1 | ~t0 | ~y)&(t3 | t2 | t1 | ~t0 | z)&(t3 | t2 | ~t1 | t0 | ~y)&(t3 | ~t2 | t1 | ~t0 | ~z | x)&(~t3 | ~t2 | ~t0 | z | y | x)&(~t2 | t0 | z | y | ~x);
}

void hilbertToMorton3D_logarithmic(uint32_t hilbertIndex, uint32_t bits, uint32_t& x, uint32_t& y, uint32_t& z)
{
uint32_t i0, i1, i2;
Morton_3D_Decode_10bit(hilbertIndex, i0, i1, i2);
i0 <<= 32 - bits;
i1 <<= 32 - bits;
i2 <<= 32 - bits;

uint32_t t0, t1, t2, t3;
transformFromIndex_parallel(i0, i1, i2, t0, t1, t2, t3);

{
t0 >>= 1;
t1 >>= 1;
t2 >>= 1;
t3 >>= 1;
}

for (uint32_t shift = 1; shift <= 16; shift *= 2)
{
uint32_t s0, s1, s2, s3;

s0 = t0 >> shift;
s1 = t1 >> shift;
s2 = t2 >> shift;
s3 = t3 >> shift;

uint32_t T0, T1, T2, T3;
multiplyTransform_parallel(t0, t1, t2, t3, s0, s1, s2, s3, T0, T1, T2, T3);

t0 = T0;
t1 = T1;
t2 = T2;
t3 = T3;
}

uint32_t o0, o1, o2;
octantFromIndex_parallel(i0, i1, i2, o0, o1, o2);
applyTransform_parallel(t0, t1, t2, t3, o0, o1, o2, x, y, z);

x >>= 32 - bits;
y >>= 32 - bits;
z >>= 32 - bits;
}