Large Datasets from Procedurals - Arnold Developer Guide
This tutorial shows how to create as much geometry as possible in a finite amount of RAM in a procedural.
Procedurally generated geometry for rendering in Arnold goes through 3 phases of RAM consumption. The first phase is the generation of data, next is the filling of Arnold's data structures, and last is the ray acceleration data structure creation (the Bounding Volume Hierarchy, or BVH, is usually the largest). When this is all done, the actual rendering begins, which does not use much more RAM. Directly building arrays of data into Arnold's data structures is the most efficient method, if it is possible. Users do not have control over RAM consumption of the BVH.
The Mandelbulb
In our example, we are generating a "Mandelbulb" 3D version of the Mandelbrot and Julia sets. This algorithm involves iterating a function (Z^n+C) and seeing if it exits a sphere of radius 2; if it stays inside for a set number of steps, it is considered a "prisoner point" and a small sphere is put there. Rendering a Mandelbulb as a dense grid of spheres is neither the most elegant nor the most efficient way to display this mathematical entity; we are just using this as a method to create large amounts of data for the purposes of this tutorial.
Breaking Generation into Chunks
It is not possible to know how many spheres we will have when we are done, so we cannot fill Arnold's arrays directly; instead, we fill a linked list and then use that to fill the arrays in a second pass. If we fill RAM with a giant linked list of all spheres, we would then need to allocate an array for Arnold to copy the data into; we would only be able to use half of the RAM in a system using this method. Instead, in our example, we break the task into smaller chunks, with each chunk filling a linked list, allocating an array, and then deleting the linked list. This allows us to fill our RAM to the top with renderable geometry. For our example, we simply broke the Mandelbulb up into slabs on the X-axis.
Multi-Threading
Running the math that generates the points takes CPU power, and many modern systems have access to multiple CPUs on a single system. In order to fill the RAM as efficiently as possible, we take each of our chunks and break it into sub-chunks, allowing a single CPU to fill each of those in parallel, using Arnold's AiThreadCreate(). There are numerous other accelerations that can be incorporated into this, such as SIMD sin/cos functions or possibly offloading computations to a GPU or other methods, but we leave these types of optimizations out of our example.
Source Code
Below is the source code for the procedural:
mandelbulb.cpp
#include <ai.h>
#include <cstring>
#include <cstdlib>
#include <vector>
using namespace std;
// procedural parameters
struct Mandelbulb
{
int grid_size; // sample grid resolution
int max_iterations; // max Z^n+C iterations to try
float power; // exponent, the "n" in Z^n+C
float sphere_scale; // scales the spheres
float orbit_threshold; // clears out a hollow center in the set
int chunks; // number of "chunks" for RAM management
int threads; // number of threads to use
bool julia; // mandelbrot/julia set switch
AtVector julia_C; // C value for julia sets (unused for mandelbrot)
AtUniverse *universe; // Universe to add the point shape to
int counter;
};
// returns Z^n + C
// for more info: http://www.skytopia.com/project/fractal/2mandelbulb.html#formula
// this function is called often and is not very fast,
// this would be an obvious place to add optimizations,
// such as SIMD sin() functions or whatnot
static AtVector iterate(AtVector Z, float n, AtVector C)
{
AtVector Zsquared;
float r2 = AiV3Dot(Z, Z);
float theta = atan2f(sqrtf(Z.x * Z.x + Z.y * Z.y), Z.z);
float phi = atan2f(Z.y, Z.x);
float r_n;
if (n == 8)
r_n = r2 * r2;
else
r_n = powf(r2,n*.5f);
const float sin_thetan = sinf(theta * n);
Zsquared.x = r_n * sin_thetan * cosf(phi * n);
Zsquared.y = r_n * sin_thetan * sinf(phi * n);
Zsquared.z = r_n * cosf(theta * n);
return Zsquared + C;
}
AI_PROCEDURAL_NODE_EXPORT_METHODS(MandelbulbMtd);
node_parameters
{
AiParameterInt("grid_size" , 1000);
AiParameterInt("max_iterations" , 10);
AiParameterFlt("power" , 8);
AiParameterFlt("sphere_scale" , 1);
AiParameterFlt("orbit_threshold", 0.05);
AiParameterInt("chunks" , 30);
AiParameterInt("threads" , 4);
AiParameterBool("julia" , false);
AiParameterVec("julia_C" , 0, 0, 0);
}
// we read the UI parameters into their global vars
procedural_init
{
Mandelbulb *bulb = new Mandelbulb();
*user_ptr = bulb;
// get the universe this procedural belongs to, and use it create all ginstances
bulb->universe = AiNodeGetUniverse(node);
bulb->grid_size = AiNodeGetInt(node, AtString("grid_size"));
bulb->max_iterations = AiNodeGetInt(node, AtString("max_iterations"));
bulb->power = AiNodeGetFlt(node, AtString("power"));
bulb->sphere_scale = AiNodeGetFlt(node, AtString("sphere_scale"));
bulb->orbit_threshold = AiSqr(AiNodeGetFlt(node, AtString("orbit_threshold")));
bulb->chunks = AiNodeGetInt(node, AtString("chunks"));
bulb->threads = AiClamp(AiNodeGetInt(node, AtString("threads")), 1, AI_MAX_THREADS);
bulb->julia = AiNodeGetBool(node, AtString("julia"));
bulb->julia_C = AiNodeGetVec(node, AtString("julia_C"));
bulb->counter = 0;
return true;
}
procedural_cleanup
{
Mandelbulb *bulb = (Mandelbulb*)user_ptr;
delete bulb;
return true;
}
// we will create one node per chunk as set in the UI
procedural_num_nodes
{
Mandelbulb *bulb = (Mandelbulb*)user_ptr;
return bulb->chunks;
}
// this is the function that gets run on each thread
// the function (Z^n+C) is sampled at all points in a regular grid
// prisoner points with an orbit greater than bulb->orbit_threshold are added
// the total grid is broken into bulb->chnks number of slabs on the X axis
// each slab is broken into bulb->threads number of sub slabs
// and the start and end value in X is passed in and that section is sampled
void fillList(Mandelbulb *bulb, int start, int end, int chunknum, vector<AtVector>& list)
{
float inv_grid_size = 1.0f / bulb->grid_size;
// these vars are for a crude counter for percent completed
unsigned int modder = static_cast<unsigned int>(float(end-start) / 5);
int levelcount = 0;
int percent = 0;
int localcounter = 0;
// only samples X in the range "start" to "end"
for (int X = start; X < end; X++) {
levelcount++;
// echo out some completion info
if (modder > 0) {
if ((levelcount%modder) ==0 ) {
percent += 10;
AiMsgInfo("[mandelbulb] %d percent of chunk %d", percent, chunknum);
}
}
// samples all points in Y and Z
for (int Y = 0; Y < bulb->grid_size; Y++) {
for (int Z = 0; Z < bulb->grid_size; Z++) {
AtVector sample;
sample.x = (X * inv_grid_size - 0.5f) * 2.5f;
sample.y = (Y * inv_grid_size - 0.5f) * 2.5f;
sample.z = (Z * inv_grid_size - 0.5f) * 2.5f;
// init the iterator
AtVector iterator = sample;
// now iterate the Z^n+C function bulb->max_iterations number of times
for (int iter = 0; iter < bulb->max_iterations; iter++) {
if (AiV3Dot(iterator,iterator) > 4)
break; //orbit has left the max radius of 2....
if (bulb->julia) {
// each UI value of C creates a full Julia set
iterator = iterate(iterator,bulb->power,bulb->julia_C);
} else {
// Mandelbrot set is the centerpoints of all Julia sets
iterator = iterate(iterator,bulb->power,sample);
}
}
// tiny orbits are usually inside the set, disallow them.
bool allowit = AiV3Dist2(sample,iterator) >= bulb->orbit_threshold;
// if the orbit is inside radius 2
// and its endpoint travelled greater than orbittthresh
if (AiV3Dot(iterator, iterator) < 4 && allowit) {
bulb->counter++; // increment global counter
localcounter++; // increment local counter
// this is a prisoner point, add it to the set
list.push_back(sample);
}
}
}
}
AiMsgInfo("[mandelbulb] finished 1 thread of chunk %d, new total new points %d", chunknum, localcounter);
}
// this builds the "points" node in Arnold and sets
// the point locations, radius, and sets it to sphere mode
static AtNode *build_node(const AtNode *parent, Mandelbulb *bulb, vector<AtVector>& list)
{
AtArray *pointarr = AiArrayConvert(list.size(), 1, AI_TYPE_VECTOR, &list[0]);
vector<AtVector>().swap(list); // clear data used by points vector.
AtNode *currentInstance = AiNode(bulb->universe, AtString("points"), AtString("mandelbulb"), parent); // initialize node as child of procedural
AiNodeSetArray(currentInstance, AtString("points"), pointarr);
AiNodeSetFlt(currentInstance, AtString("radius"), (2.0f/bulb->grid_size) * bulb->sphere_scale);
AiNodeSetInt(currentInstance, AtString("mode"), 1);
return currentInstance;
}
// a data structure to hold the arguments for the thread
// corresponds to the arguments to the fillList() function
struct ThreadArgs {
Mandelbulb *bulb;
int start;
int end;
int i;
vector<AtVector> list;
};
// a function to be passed for the thread to execute
// basically a wrapper to the fillList() function
unsigned int threadloop(void *pointer)
{
ThreadArgs *thread_args = (ThreadArgs*) pointer;
fillList(thread_args->bulb, thread_args->start, thread_args->end, thread_args->i, thread_args->list);
return 0;
}
// this is the function that Arnold calls to request the nodes
// that this procedural creates.
procedural_get_node
{
Mandelbulb *bulb = (Mandelbulb*)user_ptr;
// determine the start and end point of this chunk
float chunksize = float(bulb->grid_size) / float(bulb->chunks);
int start = static_cast<int>(i*chunksize);
int end = static_cast<int>((i+1)*chunksize);
if (end>bulb->grid_size)
end = bulb->grid_size;
float range = end - start;
// make an array of arguments for the threads
vector<ThreadArgs> thread_args(bulb->threads);
vector<void*> threads(bulb->threads);
// now loop through and launch the threads
for (int tnum = 0; tnum < bulb->threads; tnum++) {
// figure out the threads start and end points for the sub-chunks
int tstart = start + static_cast<int>((range/bulb->threads)*tnum);
int tend = start + static_cast<int>((range/bulb->threads)*(tnum+1));
thread_args[tnum].start = tstart;
thread_args[tnum].end = tend;
thread_args[tnum].i = i;
thread_args[tnum].bulb = bulb;
threads[tnum] = AiThreadCreate(threadloop, &thread_args[tnum], 0);
}
// using AiThreadWait, wait 'til the threads finish
size_t listlength = 0;
for (int tnum = 0; tnum < bulb->threads; tnum++) {
AiThreadWait(threads[tnum]);
// sum up the length of all threads lists
listlength += thread_args[tnum].list.size();
}
// a vector to hold all the point data
vector<AtVector> allpoints;
allpoints.reserve(listlength);
// concatenate all the vectors returned by the threads
for (int tnum = 0; tnum < bulb->threads; tnum++) {
allpoints.insert(allpoints.end(), thread_args[tnum].list.begin(), thread_args[tnum].list.end());
vector<AtVector>().swap(thread_args[tnum].list); // clear data
}
for (int k = 0; k < bulb->threads; k++)
AiThreadClose(threads[k]);
AiMsgInfo("[mandelbulb] total sphere count: %d", bulb->counter);
// if it's empty, return a null and Arnold handles it well.
// passing a node with no geometry causes errors.
if (listlength == 0)
return NULL;
// build the AtNode
return build_node(node, bulb, allpoints);
}
node_loader
{
if (i>0)
return false;
node->methods = MandelbulbMtd;
node->output_type = AI_TYPE_NONE;
node->name = "mandelbulb";
node->node_type = AI_NODE_SHAPE_PROCEDURAL;
strcpy(node->version, AI_VERSION);
return true;
}
Example Scene
The following .ass file generates an image similar to the one at the top of this page:
mandelbulb.ass
options
{
AA_samples 9
outputs "RGBA RGBA /out/arnold1:gaussian_filter /out/arnold1:jpeg"
xres 960
yres 540
GI_diffuse_depth 1
GI_specular_depth 1
GI_diffuse_samples 3
}
driver_jpeg
{
name /out/arnold1:jpeg
filename "mandelbulb.jpg"
}
gaussian_filter
{
name /out/arnold1:gaussian_filter
}
persp_camera
{
name /obj/cam1
fov 54.512329
focus_distance 1 2 FLOAT 3.0099871 3.01
aperture_size 0.004
aperture_blades 5
matrix 1 2 MATRIX
0.9011746 0.010277364 0.43333444 0
0.15803696 0.92311317 -0.35055155 0
-0.4036195 0.38439101 0.83026195 0
-1.38442 1.3010246 2.6316857 1
0.90098524 0.010277364 0.43372801 0
0.15819006 0.92311317 -0.35048249 0
-0.40398207 0.38439101 0.83008558 0
-1.386554 1.3010246 2.6282051 1
shutter_start 0.25
shutter_end 0.75
}
skydome_light
{
name mysky
intensity 1
}
utility
{
name /shop/utility1
shade_mode "lambert"
color 0.5 0.5 0.5
}
plane
{
name /obj/FLOOR:/shop/utility1:plane_0
point 0.0 -0.5 0.0
normal 0.0 1.0 0.0
shader "/shop/utility1"
}
standard_surface
{
name /shop/standard1
base 0.9
base_color 0.7 0.7 0.7
specular_roughness 0.167138
}
mandelbulb
{
name /obj/MandelJulia_Procedural1
shader "/shop/standard1"
grid_size 1600
max_iterations 10
power 8
sphere_scale 1
orbit_threshold 0.05
chunks 30
threads 16
julia off
julia_C -0.161224 1.04 0.183673
}
point_light
{
name /obj/arnold_light2
radius 5
matrix
1 0 0 0
0 1 0 0
0 0 1 0
-10 10 0 1
color 1 0.5996 0.076
intensity 300
samples 2
}
Here is a high-resolution, 2.5 billion sphere render by Thiago Ize and some close-ups rendered by Lee Griggs.