12-bit Packed RGB Format

The 12-bit filled or unpacked (48 bit) integer RGBA format is a standard video format in which each pixel component consumes 12 bits for a total of 48. The alpha channel is effectively ignored. This method of pixel packing wastes 25% of storage, but requires less CPU to manipulate once in memory. On systems where storage is at a premium, the Visual Effects and Finishing applications allow the artist to work in 12-bit packed RGB format (also known as 36-bit format), which is a non-standard Visual Effects and Finishing application-specific format. The alpha component is not present or used in this format, so the pixels are packed together to fit in a 36-bit space. This saves 25% of storage at the expense of increased CPU time required to extract the components into proper memory-aligned integers.

The table below describes the pixel component layout over 8 words (32 bytes), where R, G, and B represent the pixel components, and the integers represent the zero-based pixel index. For example, B2 is the blue component of the third pixel.

             | +0  |   +1  |  +2  |  +3 |
w0 byte 0  : | G0    |B0|    R0      |B1|
w1 byte 4  : | G1    |B0|    R1      |B1|
w2 byte 8  : | G2    |B0|    R2      |B1|
w3 byte 12 : | G3    |B2|    R3      |B3|
w4 byte 16 : | G4    |B2|    R4      |B3|
w5 byte 20 : | G5    |B2|    R5      |B3|
w6 byte 24 : | G6    |B4|    R6      |B5|
w7 byte 28 : | G7    |B4|    R7      |B5|
w8 byte 32 : | B6    |B4|    B7      |B5|

Unpacking Algorithm

The algorithm required to unpack 12-bit packed RGB format can be seen in the (un-optimized) utility macro below, where src and dst are the source and destination buffers, respectively.

#define UNPACK36GL(src, dst)                 \
{                                            \
 /* Unpacking : 6.625 cycles per pixel */    \
 UInt32 w0, w1, w2, w3, w4, w5, w6, w7, w8;  \
 UInt32 b0, b2, b4, b6;                      \
                                          \
 /* 9 cycles */                              \
 w0 = *((UInt32*)((src) + 0));               \
 w1 = *((UInt32*)((src) + 4));               \
 w2 = *((UInt32*)((src) + 8));               \
 w3 = *((UInt32*)((src) + 12));              \
 w4 = *((UInt32*)((src) + 16));              \
 w5 = *((UInt32*)((src) + 20));              \
 w6 = *((UInt32*)((src) + 24));              \
 w7 = *((UInt32*)((src) + 28));              \
 w8 = *((UInt32*)((src) + 32));              \
                                          \
 /* 24 cycles */                             \
 b0 = (((w0 & 0x000F000F) << 4) |            \
  ((w1 & 0x000F000F) << 8) |                 \
  ((w2 & 0x000F000F) << 12));                \
 b2 = (w3 & 0x000F000F) << 4) |              \
  ((w4 & 0x000F000F) << 8) |                 \
  ((w5 & 0x000F000F) << 12));                \
 b4 = (w6 & 0x000F000F) << 4) |              \
  ((w7 & 0x000F000F) << 8) |                 \
  ((w8 & 0x000F000F) << 12));                \
 b6 = w8;                                    \
                                          \
 /* 20 cycles */                             \
 *((UInt32*)((dst) + 0)) = b0 >> 16;         \
 *((UInt32*)((dst) + 4)) = w0;               \
 *((UInt32*)((dst) + 8)) = b0;               \
 *((UInt32*)((dst) + 12)) = w1;              \
 *((UInt32*)((dst) + 16)) = b2 >> 16;        \
 *((UInt32*)((dst) + 20)) = w2;              \
 *((UInt32*)((dst) + 24)) = b2;              \
 *((UInt32*)((dst) + 28)) = w3;              \
 *((UInt32*)((dst) + 32)) = b4 >> 16;        \
 *((UInt32*)((dst) + 36)) = w4;              \
 *((UInt32*)((dst) + 40)) = b4;              \
 *((UInt32*)((dst) + 44)) = w5;              \
 *((UInt32*)((dst) + 48)) = b6 >> 16;        \
 *((UInt32*)((dst) + 52)) = w6;              \
 *((UInt32*)((dst) + 56)) = b6;              \
 *((UInt32*)((dst) + 60)) = w7;              \
}