Misc GL/CL fixes and additions.

Made native calls package private, we can now use them from the high-level API. Added support for "render-to-texture" in the fractal demo.
2026-04-09 00:14:04 +00:00 · 2010-09-30 17:21:50 +00:00 · 2010-09-30 17:21:50 +00:00 · 2b79b68501
commit 2b79b68501
parent a8bcb7fd25
19 changed files with 1211 additions and 510 deletions
--- a/src/java/org/lwjgl/test/opencl/gl/DemoFractal.java
+++ b/src/java/org/lwjgl/test/opencl/gl/DemoFractal.java
@ -43,18 +43,23 @@ import org.lwjgl.util.Color;
 import org.lwjgl.util.ReadableColor;

 import java.io.*;
+import java.nio.ByteBuffer;
 import java.nio.IntBuffer;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;

 import static java.lang.Math.*;
 import static org.lwjgl.opencl.CL10.*;
 import static org.lwjgl.opencl.CL10GL.*;
 import static org.lwjgl.opencl.KHRGLEvent.*;
+import static org.lwjgl.opengl.AMDDebugOutput.*;
 import static org.lwjgl.opengl.ARBCLEvent.*;
+import static org.lwjgl.opengl.ARBDebugOutput.*;
 import static org.lwjgl.opengl.ARBSync.*;
 import static org.lwjgl.opengl.GL11.*;
-import static org.lwjgl.opengl.GL12.*;
 import static org.lwjgl.opengl.GL15.*;
+import static org.lwjgl.opengl.GL20.*;
 import static org.lwjgl.opengl.GL21.*;

 /*
@ -118,24 +123,35 @@ public class DemoFractal {
 	// max number of used GPUs
 	private static final int MAX_PARALLELISM_LEVEL = 8;

-	// max per pixel iterations to compute the fractal
-	private static final int MAX_ITERATIONS = 500;
+	private static final int COLOR_MAP_SIZE = 32 * 2 * 4;
+
+	private Set<String> params;

 	private CLContext clContext;
 	private CLCommandQueue[] queues;
 	private CLKernel[] kernels;
 	private CLProgram[] programs;

-	private CLMem[] pboBuffers;
-	private IntBuffer pboIDs;
+	private CLMem[] glBuffers;
+	private IntBuffer glIDs;
+
+	private boolean useTextures;
+
+	// Texture rendering
+	private int dlist;
+	private int vsh;
+	private int fsh;
+	private int program;

 	private CLMem[] colorMap;
-	private IntBuffer[] colorMapBuffer;

 	private final PointerBuffer kernel2DGlobalWorkSize;

-	private int width;
-	private int height;
+	// max per pixel iterations to compute the fractal
+	private int maxIterations = 500;
+
+	private int width = 512;
+	private int height = 512;

 	private double minX = -2f;
 	private double minY = -1.2f;
@ -174,13 +190,375 @@ public class DemoFractal {
 	private GLSync glSync;
 	private CLEvent glEvent;

-	public DemoFractal(int width, int height) {
-		kernel2DGlobalWorkSize = BufferUtils.createPointerBuffer(2);
+	public DemoFractal(final String[] args) {
+		params = new HashSet<String>();

-		this.width = width;
-		this.height = height;
+		for ( int i = 0; i < args.length; i++ ) {
+			final String arg = args[i];
+
+			if ( arg.charAt(0) != '-' && arg.charAt(0) != '/' )
+				throw new IllegalArgumentException("Invalid command-line argument: " + args[i]);
+
+			final String param = arg.substring(1);
+
+			if ( "forcePBO".equalsIgnoreCase(param) )
+				params.add("forcePBO");
+			else if ( "forceCPU".equalsIgnoreCase(param) )
+				params.add("forceCPU");
+			else if ( "debugGL".equalsIgnoreCase(param) )
+				params.add("debugGL");
+			else if ( "iterations".equalsIgnoreCase(param) ) {
+				if ( args.length < i + 1 + 1 )
+					throw new IllegalArgumentException("Invalid iterations argument specified.");
+
+				try {
+					this.maxIterations = Integer.parseInt(args[++i]);
+				} catch (NumberFormatException e) {
+					throw new IllegalArgumentException("Invalid number of iterations specified.");
+				}
+			} else if ( "res".equalsIgnoreCase(param) ) {
+				if ( args.length < i + 2 + 1 )
+					throw new IllegalArgumentException("Invalid res argument specified.");
+
+				try {
+					this.width = Integer.parseInt(args[++i]);
+					this.height = Integer.parseInt(args[++i]);
+
+					if ( width < 1 || height < 1 )
+						throw new IllegalArgumentException("Invalid res dimensions specified.");
+				} catch (NumberFormatException e) {
+					throw new IllegalArgumentException("Invalid res dimensions specified.");
+				}
+			}
+		}
+
+		kernel2DGlobalWorkSize = BufferUtils.createPointerBuffer(2);
 	}

+	public static void main(String args[]) {
+		DemoFractal demo = new DemoFractal(args);
+		demo.init();
+		demo.run();
+	}
+
+	public void init() {
+		try {
+			CL.create();
+			Display.setDisplayMode(new DisplayMode(width, height));
+			Display.setTitle("OpenCL Fractal Demo");
+			Display.setSwapInterval(0);
+			Display.create(new PixelFormat(), new ContextAttribs().withDebug(params.contains("debugGL")));
+		} catch (LWJGLException e) {
+			throw new RuntimeException(e);
+		}
+
+		try {
+			initCL(Display.getDrawable());
+		} catch (Exception e) {
+			if ( clContext != null )
+				clReleaseContext(clContext);
+			Display.destroy();
+			throw new RuntimeException(e);
+		}
+
+		glDisable(GL_DEPTH_TEST);
+		glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+
+		initView(Display.getDisplayMode().getWidth(), Display.getDisplayMode().getHeight());
+
+		initGLObjects();
+		glFinish();
+
+		setKernelConstants();
+	}
+
+	private void initCL(Drawable drawable) throws Exception {
+		// Find a platform
+		List<CLPlatform> platforms = CLPlatform.getPlatforms();
+		if ( platforms == null )
+			throw new RuntimeException("No OpenCL platforms found.");
+
+		final CLPlatform platform = platforms.get(0); // just grab the first one
+
+		// Find devices with GL sharing support
+		final Filter<CLDevice> glSharingFilter = new Filter<CLDevice>() {
+			public boolean accept(final CLDevice device) {
+				final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
+				return caps.CL_KHR_gl_sharing;
+			}
+		};
+		int device_type = params.contains("forceCPU") ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU;
+		List<CLDevice> devices = platform.getDevices(device_type, glSharingFilter);
+		if ( devices == null ) {
+			device_type = CL_DEVICE_TYPE_CPU;
+			devices = platform.getDevices(device_type, glSharingFilter);
+			if ( devices == null )
+				throw new RuntimeException("No OpenCL devices found with KHR_gl_sharing support.");
+		}
+
+		// Create the context
+		clContext = CLContext.create(platform, devices, new CLContextCallback() {
+			protected void handleMessage(final String errinfo, final ByteBuffer private_info) {
+				System.out.println("[CONTEXT MESSAGE] " + errinfo);
+			}
+		}, drawable, null);
+
+		slices = min(devices.size(), MAX_PARALLELISM_LEVEL);
+
+		// create command queues for every GPU, setup colormap and init kernels
+		queues = new CLCommandQueue[slices];
+		kernels = new CLKernel[slices];
+		colorMap = new CLMem[slices];
+
+		for ( int i = 0; i < slices; i++ ) {
+			colorMap[i] = clCreateBuffer(clContext, CL_MEM_READ_ONLY, COLOR_MAP_SIZE, null);
+			colorMap[i].checkValid();
+
+			// create command queue and upload color map buffer on each used device
+			queues[i] = clCreateCommandQueue(clContext, devices.get(i), CL_QUEUE_PROFILING_ENABLE, null);
+			queues[i].checkValid();
+
+			final ByteBuffer colorMapBuffer = clEnqueueMapBuffer(queues[i], colorMap[i], CL_TRUE, CL_MAP_WRITE, 0, COLOR_MAP_SIZE, null, null, null);
+			initColorMap(colorMapBuffer.asIntBuffer(), 32, Color.BLUE, Color.GREEN, Color.RED);
+			clEnqueueUnmapMemObject(queues[i], colorMap[i], colorMapBuffer, null, null);
+		}
+
+		// check if we have 64bit FP support on all devices
+		// if yes we can use only one program for all devices + one kernel per device.
+		// if not we will have to create (at least) one program for 32 and one for 64bit devices.
+		// since there are different vendor extensions for double FP we use one program per device.
+		// (OpenCL spec is not very clear about this usecases)
+		boolean all64bit = true;
+		for ( CLDevice device : devices ) {
+			if ( !isDoubleFPAvailable(device) ) {
+				all64bit = false;
+				break;
+			}
+		}
+
+		// load program(s)
+		programs = new CLProgram[all64bit ? 1 : slices];
+
+		final ContextCapabilities caps = GLContext.getCapabilities();
+
+		if ( !caps.OpenGL20 )
+			throw new RuntimeException("OpenGL 2.0 is required to run this demo.");
+		else if ( device_type == CL_DEVICE_TYPE_CPU && !caps.OpenGL21 )
+			throw new RuntimeException("OpenGL 2.1 is required to run this demo.");
+
+		if ( caps.GL_ARB_debug_output )
+			glDebugMessageCallbackARB(new ARBDebugOutputCallback());
+		else if ( caps.GL_AMD_debug_output )
+			glDebugMessageCallbackAMD(new AMDDebugOutputCallback());
+
+		if ( device_type == CL_DEVICE_TYPE_GPU )
+			System.out.println("OpenCL Device Type: GPU (Use -forceCPU to use CPU)");
+		else
+			System.out.println("OpenCL Device Type: CPU");
+		for ( int i = 0; i < devices.size(); i++ )
+			System.out.println("OpenCL Device #" + (i + 1) + " supports KHR_gl_event = " + CLCapabilities.getDeviceCapabilities(devices.get(i)).CL_KHR_gl_event);
+
+		System.out.println("\nMax Iterations: " + maxIterations + " (Use -iterations <count> to change)");
+		System.out.println("Display resolution: " + width + "x" + height + " (Use -res <width> <height> to change)");
+
+		System.out.println("\nOpenGL caps.GL_ARB_sync = " + caps.GL_ARB_sync);
+		System.out.println("OpenGL caps.GL_ARB_cl_event = " + caps.GL_ARB_cl_event);
+
+		// Use PBO if we're on a CPU implementation
+		useTextures = device_type == CL_DEVICE_TYPE_GPU && (!caps.OpenGL21 || !params.contains("forcePBO"));
+		if ( useTextures ) {
+			System.out.println("\nCL/GL Sharing method: TEXTURES (use -forcePBO to use PBO + DrawPixels)");
+			System.out.println("Rendering method: Shader on a fullscreen quad");
+		} else {
+			System.out.println("\nCL/GL Sharing method: PIXEL BUFFER OBJECTS");
+			System.out.println("Rendering method: DrawPixels");
+		}
+
+		buildPrograms();
+
+		// Detect GLtoCL synchronization method
+		syncGLtoCL = caps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied
+		if ( syncGLtoCL ) {
+			clEvents = new CLEvent[slices];
+			clSyncs = new GLSync[slices];
+			System.out.println("\nGL to CL sync: Using OpenCL events");
+		} else
+			System.out.println("\nGL to CL sync: Using clFinish");
+
+		// Detect CLtoGL synchronization method
+		syncCLtoGL = caps.OpenGL32 || caps.GL_ARB_sync;
+		if ( syncCLtoGL ) {
+			for ( CLDevice device : devices ) {
+				if ( !CLCapabilities.getDeviceCapabilities(device).CL_KHR_gl_event ) {
+					syncCLtoGL = false;
+					break;
+				}
+			}
+		}
+		if ( syncCLtoGL ) {
+			System.out.println("CL to GL sync: Using OpenGL sync objects");
+		} else
+			System.out.println("CL to GL sync: Using glFinish");
+
+		if ( useTextures ) {
+			dlist = glGenLists(1);
+
+			glNewList(dlist, GL_COMPILE);
+			glBegin(GL_QUADS);
+			{
+				glTexCoord2f(0.0f, 0.0f);
+				glVertex2f(0, 0);
+
+				glTexCoord2f(0.0f, 1.0f);
+				glVertex2i(0, height);
+
+				glTexCoord2f(1.0f, 1.0f);
+				glVertex2f(width, height);
+
+				glTexCoord2f(1.0f, 0.0f);
+				glVertex2f(width, 0);
+			}
+			glEnd();
+			glEndList();
+
+			vsh = glCreateShader(GL_VERTEX_SHADER);
+			glShaderSource(vsh, "varying vec2 texCoord;\n" +
+			                    "\n" +
+			                    "void main(void) {\n" +
+			                    "\tgl_Position = ftransform();\n" +
+			                    "\ttexCoord = gl_MultiTexCoord0.xy;\n" +
+			                    "}");
+			glCompileShader(vsh);
+
+			fsh = glCreateShader(GL_FRAGMENT_SHADER);
+			glShaderSource(fsh, "uniform sampler2D mandelbrot;\n" +
+			                    "\n" +
+			                    "varying vec2 texCoord;\n" +
+			                    "\n" +
+			                    "void main(void) {\n" +
+			                    "\tgl_FragColor = texture2D(mandelbrot, texCoord);" +
+			                    "}");
+			glCompileShader(fsh);
+
+			program = glCreateProgram();
+			glAttachShader(program, vsh);
+			glAttachShader(program, fsh);
+			glLinkProgram(program);
+
+			glUseProgram(program);
+			glUniform1i(glGetUniformLocation(program, "mandelbrot"), 0);
+		}
+
+		System.out.println("");
+	}
+
+	private void buildPrograms() {
+		/*
+		 * workaround: The driver keeps using the old binaries for some reason.
+		 * to solve this we simple create a new program and release the old.
+		 * however rebuilding programs should be possible -> remove when drivers are fixed.
+		 * (again: the spec is not very clear about this kind of usages)
+		 */
+		if ( programs[0] != null ) {
+			for ( CLProgram program : programs )
+				clReleaseProgram(program);
+		}
+
+		try {
+			createPrograms();
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+
+		// disable 64bit floating point math if not available
+		for ( int i = 0; i < programs.length; i++ ) {
+			final CLDevice device = queues[i].getCLDevice();
+
+			final StringBuilder options = new StringBuilder(useTextures ? " -D USE_TEXTURE" : "");
+			final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
+			if ( doublePrecision && isDoubleFPAvailable(device) ) {
+				//cl_khr_fp64
+				options.append(" -D DOUBLE_FP");
+
+				//amd's verson of double precision floating point math
+				if ( !caps.CL_KHR_fp64 && caps.CL_AMD_fp64 )
+					options.append(" -D AMD_FP");
+			}
+
+			System.out.println("\nOpenCL COMPILER OPTIONS: " + options);
+
+			try {
+				clBuildProgram(programs[i], device, options, null);
+			} finally {
+				System.out.println("BUILD LOG: " + programs[i].getBuildInfoString(device, CL_PROGRAM_BUILD_LOG));
+			}
+		}
+
+		rebuild = false;
+
+		// init kernel with constants
+		for ( int i = 0; i < kernels.length; i++ )
+			kernels[i] = clCreateKernel(programs[min(i, programs.length)], "mandelbrot", null);
+	}
+
+	private void initGLObjects() {
+		if ( glBuffers == null ) {
+			glBuffers = new CLMem[slices];
+			glIDs = BufferUtils.createIntBuffer(slices);
+		} else {
+			for ( CLMem mem : glBuffers )
+				clReleaseMemObject(mem);
+
+			if ( useTextures )
+				glDeleteTextures(glIDs);
+			else
+				glDeleteBuffers(glIDs);
+		}
+
+		if ( useTextures )
+			glGenTextures(glIDs);
+		else
+			glGenBuffers(glIDs);
+
+		if ( useTextures ) {
+			// Init textures
+			for ( int i = 0; i < slices; i++ ) {
+				glBindTexture(GL_TEXTURE_2D, glIDs.get(i));
+				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width / slices, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, (ByteBuffer)null);
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+				glBuffers[i] = clCreateFromGLTexture2D(clContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, glIDs.get(i), null);
+			}
+			glBindTexture(GL_TEXTURE_2D, 0);
+		} else {
+			// setup one empty PBO per slice
+			for ( int i = 0; i < slices; i++ ) {
+				glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glIDs.get(i));
+				glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * 4 / slices, GL_STREAM_DRAW);
+
+				glBuffers[i] = clCreateFromGLBuffer(clContext, CL_MEM_WRITE_ONLY, glIDs.get(i), null);
+			}
+			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+		}
+
+		buffersInitialized = true;
+	}
+
+	// init kernels with constants
+
+	private void setKernelConstants() {
+		for ( int i = 0; i < slices; i++ ) {
+			kernels[i]
+				.setArg(6, glBuffers[i])
+				.setArg(7, colorMap[i])
+				.setArg(8, COLOR_MAP_SIZE)
+				.setArg(9, maxIterations);
+		}
+	}
+
+	// rendering cycle
+
 	private void run() {
 		long startTime = System.currentTimeMillis() + 5000;
 		long fps = 0;
@ -206,10 +584,147 @@ public class DemoFractal {
 			}
 		}

+		clReleaseContext(clContext);
+
+		if ( useTextures ) {
+			glDeleteProgram(program);
+			glDeleteShader(fsh);
+			glDeleteShader(vsh);
+
+			glDeleteLists(dlist, 1);
+		}
+
 		CL.destroy();
 		Display.destroy();
 	}

+	public void display() {
+		// TODO: Need to clean-up events, test when ARB_cl_events & KHR_gl_event are implemented.
+
+		// make sure GL does not use our objects before we start computing
+		if ( syncCLtoGL && glEvent != null ) {
+			for ( final CLCommandQueue queue : queues )
+				clEnqueueWaitForEvents(queue, glEvent);
+		} else
+			glFinish();
+
+		if ( !buffersInitialized ) {
+			initGLObjects();
+			setKernelConstants();
+		}
+
+		if ( rebuild ) {
+			buildPrograms();
+			setKernelConstants();
+		}
+		compute(doublePrecision);
+
+		render();
+	}
+
+	// OpenCL
+
+	private void compute(final boolean is64bit) {
+		int sliceWidth = (int)(width / (float)slices);
+		double rangeX = (maxX - minX) / slices;
+		double rangeY = (maxY - minY);
+
+		kernel2DGlobalWorkSize.put(0, sliceWidth).put(1, height);
+
+		// start computation
+		for ( int i = 0; i < slices; i++ ) {
+			kernels[i].setArg(0, sliceWidth).setArg(1, height);
+			if ( !is64bit || !isDoubleFPAvailable(queues[i].getCLDevice()) ) {
+				kernels[i]
+					.setArg(2, (float)(minX + rangeX * i)).setArg(3, (float)minY)
+					.setArg(4, (float)rangeX).setArg(5, (float)rangeY);
+			} else {
+				kernels[i]
+					.setArg(2, minX + rangeX * i).setArg(3, minY)
+					.setArg(4, rangeX).setArg(5, rangeY);
+			}
+
+			// acquire GL objects, and enqueue a kernel with a probe from the list
+			clEnqueueAcquireGLObjects(queues[i], glBuffers[i], null, null);
+
+			clEnqueueNDRangeKernel(queues[i], kernels[i], 2,
+			                       null,
+			                       kernel2DGlobalWorkSize,
+			                       null,
+			                       null, null);
+
+			clEnqueueReleaseGLObjects(queues[i], glBuffers[i], null, syncGLtoCL ? syncBuffer : null);
+			if ( syncGLtoCL ) {
+				clEvents[i] = queues[i].getCLEvent(syncBuffer.get(0));
+				clSyncs[i] = glCreateSyncFromCLeventARB(queues[i].getParent(), clEvents[i], 0);
+			}
+		}
+
+		// block until done (important: finish before doing further gl work)
+		if ( !syncGLtoCL ) {
+			for ( int i = 0; i < slices; i++ )
+				clFinish(queues[i]);
+		}
+	}
+
+	// OpenGL
+
+	private void render() {
+		glClear(GL_COLOR_BUFFER_BIT);
+
+		if ( syncGLtoCL ) {
+			for ( int i = 0; i < slices; i++ )
+				glWaitSync(clSyncs[i], 0, 0);
+		}
+
+		//draw slices
+		int sliceWidth = width / slices;
+
+		if ( useTextures ) {
+			for ( int i = 0; i < slices; i++ ) {
+				int seperatorOffset = drawSeparator ? i : 0;
+
+				glBindTexture(GL_TEXTURE_2D, glIDs.get(i));
+				glCallList(dlist);
+			}
+		} else {
+			for ( int i = 0; i < slices; i++ ) {
+				int seperatorOffset = drawSeparator ? i : 0;
+
+				glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glIDs.get(i));
+				glRasterPos2i(sliceWidth * i + seperatorOffset, 0);
+
+				glDrawPixels(sliceWidth, height, GL_RGBA, GL_UNSIGNED_BYTE, 0);
+			}
+			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+		}
+
+		if ( syncCLtoGL ) {
+			glSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+			glEvent = clCreateEventFromGLsyncKHR(clContext, glSync, null);
+		}
+
+		//draw info text
+		/*
+		textRenderer.beginRendering(width, height, false);
+
+		textRenderer.draw("device/time/precision", 10, height - 15);
+
+		for ( int i = 0; i < slices; i++ ) {
+			CLDevice device = queues[i].getDevice();
+			boolean doubleFP = doublePrecision && isDoubleFPAvailable(device);
+			CLEvent event = probes.getEvent(i);
+			long start = event.getProfilingInfo(START);
+			long end = event.getProfilingInfo(END);
+			textRenderer.draw(device.getType().toString() + i + " "
+			                  + (int)((end - start) / 1000000.0f) + "ms @"
+			                  + (doubleFP ? "64bit" : "32bit"), 10, height - (20 + 16 * (slices - i)));
+		}
+
+		textRenderer.endRendering();
+		*/
+	}
+
 	private void handleIO() {
 		if ( Keyboard.getNumKeyboardEvents() != 0 ) {
 			while ( Keyboard.next() ) {
@ -282,7 +797,8 @@ public class DemoFractal {
 				if ( eventBtn == -1 ) {
 					final int dwheel = Mouse.getEventDWheel();
 					if ( dwheel != 0 ) {
-						double scale = dwheel > 0 ? 0.05 : -0.05;
+						double scaleFactor = Keyboard.isKeyDown(Keyboard.KEY_LCONTROL) || Keyboard.isKeyDown(Keyboard.KEY_RCONTROL) ? 0.25 : 0.05;
+						double scale = dwheel > 0 ? scaleFactor : -scaleFactor;

 						double deltaX = scale * (maxX - minX);
 						double deltaY = scale * (maxY - minY);
@ -305,146 +821,9 @@ public class DemoFractal {
 		}
 	}

-	public void init() {
-		try {
-			CL.create();
-			Display.setDisplayMode(new DisplayMode(width, height));
-			Display.setTitle("OpenCL Fractal Demo");
-			Display.create();
-		} catch (LWJGLException e) {
-			throw new RuntimeException(e);
-		}
-
-		try {
-			initCL(Display.getDrawable());
-		} catch (Exception e) {
-			if ( clContext != null )
-				clReleaseContext(clContext);
-			Display.destroy();
-			throw new RuntimeException(e);
-		}
-
-		Display.setSwapInterval(0);
-		glDisable(GL_DEPTH_TEST);
-		glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
-
-		initView(Display.getDisplayMode().getWidth(), Display.getDisplayMode().getHeight());
-
-		initPBO();
-		glFinish();
-
-		setKernelConstants();
-	}
-
-	private void initCL(Drawable drawable) throws Exception {
-		// Find a platform
-		List<CLPlatform> platforms = CLPlatform.getPlatforms();
-		if ( platforms == null )
-			throw new RuntimeException("No OpenCL platforms found.");
-
-		final CLPlatform platform = platforms.get(0); // just grab the first one
-
-		// Find devices with GL sharing support
-		final Filter<CLDevice> glSharingFilter = new Filter<CLDevice>() {
-			public boolean accept(final CLDevice device) {
-				final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
-				return caps.CL_KHR_gl_sharing;
-			}
-		};
-		List<CLDevice> devices = platform.getDevices(CL_DEVICE_TYPE_GPU, glSharingFilter);
-		if ( devices == null ) {
-			devices = platform.getDevices(CL_DEVICE_TYPE_CPU, glSharingFilter);
-			if ( devices == null )
-				throw new RuntimeException("No OpenCL devices found with KHR_gl_sharing support.");
-		}
-
-		// Create the context
-		final PointerBuffer deviceIDs = BufferUtils.createPointerBuffer(devices.size());
-		for ( CLDevice device : devices )
-			deviceIDs.put(device);
-		deviceIDs.flip();
-
-		final PointerBuffer contextProps = BufferUtils.createPointerBuffer(2 + 4 + 1);
-		contextProps.put(CL_CONTEXT_PLATFORM).put(platform);
-
-		drawable.setCLSharingProperties(contextProps); // Enable GL sharing
-
-		contextProps.put(0);
-		contextProps.flip();
-		clContext = clCreateContext(contextProps, deviceIDs, null, null);
-
-		slices = min(devices.size(), MAX_PARALLELISM_LEVEL);
-
-		// create command queues for every GPU, setup colormap and init kernels
-		queues = new CLCommandQueue[slices];
-		kernels = new CLKernel[slices];
-		colorMap = new CLMem[slices];
-		colorMapBuffer = new IntBuffer[slices];
-
-		for ( int i = 0; i < slices; i++ ) {
-			colorMapBuffer[i] = BufferUtils.createIntBuffer(32 * 2);
-			colorMap[i] = clCreateBuffer(clContext, CL_MEM_READ_ONLY, colorMapBuffer[i].capacity() * 4, null);
-			colorMap[i].checkValid();
-
-			initColorMap(colorMapBuffer[i], 32, Color.BLUE, Color.GREEN, Color.RED);
-
-			// create command queue and upload color map buffer on each used device
-			queues[i] = clCreateCommandQueue(clContext, devices.get(i), CL_QUEUE_PROFILING_ENABLE, null);
-			queues[i].checkValid();
-			clEnqueueWriteBuffer(queues[i], colorMap[i], CL_TRUE, 0, colorMapBuffer[i], null, null); // blocking upload
-
-		}
-
-		// check if we have 64bit FP support on all devices
-		// if yes we can use only one program for all devices + one kernel per device.
-		// if not we will have to create (at least) one program for 32 and one for 64bit devices.
-		// since there are different vendor extensions for double FP we use one program per device.
-		// (OpenCL spec is not very clear about this usecases)
-		boolean all64bit = true;
-		for ( CLDevice device : devices ) {
-			if ( !isDoubleFPAvailable(device) ) {
-				all64bit = false;
-				break;
-			}
-		}
-
-		// load program(s)
-		programs = new CLProgram[all64bit ? 1 : slices];
-
-		buildPrograms();
-
-		final ContextCapabilities caps = GLContext.getCapabilities();
-
-		System.out.println("OpenGL caps.OpenGL32 = " + caps.OpenGL32);
-		System.out.println("OpenGL caps.GL_ARB_sync = " + caps.GL_ARB_sync);
-		System.out.println("OpenGL caps.GL_ARB_cl_event = " + caps.GL_ARB_cl_event);
-		for ( int i = 0; i < devices.size(); i++ ) {
-			System.out.println("Device #" + (i + 1) + " supports KHR_gl_event = " + CLCapabilities.getDeviceCapabilities(devices.get(i)).CL_KHR_gl_event);
-		}
-
-		// Detect GLtoCL synchronization method
-		syncGLtoCL = caps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied
-		if ( syncGLtoCL ) {
-			clEvents = new CLEvent[slices];
-			clSyncs = new GLSync[slices];
-			System.out.println("GL to CL sync: Using OpenCL events");
-		} else
-			System.out.println("GL to CL sync: Using clFinish");
-
-		// Detect CLtoGL synchronization method
-		syncCLtoGL = caps.OpenGL32 || caps.GL_ARB_sync;
-		if ( syncCLtoGL ) {
-			for ( CLDevice device : devices ) {
-				if ( !CLCapabilities.getDeviceCapabilities(device).CL_KHR_gl_event ) {
-					syncCLtoGL = false;
-					break;
-				}
-			}
-		}
-		if ( syncCLtoGL ) {
-			System.out.println("CL to GL sync: Using OpenGL sync objects");
-		} else
-			System.out.println("CL to GL sync: Using glFinish");
+	private static boolean isDoubleFPAvailable(CLDevice device) {
+		final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
+		return caps.CL_KHR_fp64 || caps.CL_AMD_fp64;
 	}

 	private void createPrograms() throws IOException {
@ -492,10 +871,9 @@ public class DemoFractal {
 				int r = (int)(r0 + alpha * deltaR);
 				int g = (int)(g0 + alpha * deltaG);
 				int b = (int)(b0 + alpha * deltaB);
-				colorMap.put((r << 16) | (g << 8) | (b << 0));
+				colorMap.put((r << 0) | (g << 8) | (b << 16));
 			}
 		}
-		colorMap.rewind();
 	}

 	private static void initView(int width, int height) {
@ -509,219 +887,4 @@ public class DemoFractal {
 		glOrtho(0.0, width, 0.0, height, 0.0, 1.0);
 	}

-	private void initPBO() {
-		if ( pboBuffers == null ) {
-			pboBuffers = new CLMem[slices];
-			pboIDs = BufferUtils.createIntBuffer(slices);
-		} else {
-			for ( CLMem pboBuffer : pboBuffers )
-				clReleaseMemObject(pboBuffer);
-			glDeleteBuffers(pboIDs);
-		}
-
-		glGenBuffers(pboIDs);
-
-		// setup one empty PBO per slice
-		for ( int i = 0; i < slices; i++ ) {
-			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIDs.get(i));
-			glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * 4 / slices, GL_STREAM_DRAW);
-
-			pboBuffers[i] = clCreateFromGLBuffer(clContext, CL_MEM_WRITE_ONLY, pboIDs.get(i), null);
-		}
-		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-		buffersInitialized = true;
-	}
-
-	private void buildPrograms() {
-		/*
-		 * workaround: The driver keeps using the old binaries for some reason.
-		 * to solve this we simple create a new program and release the old.
-		 * however rebuilding programs should be possible -> remove when drivers are fixed.
-		 * (again: the spec is not very clear about this kind of usages)
-		 */
-		if ( programs[0] != null ) {
-			for ( CLProgram program : programs )
-				clReleaseProgram(program);
-		}
-
-		try {
-			createPrograms();
-		} catch (IOException e) {
-			throw new RuntimeException(e);
-		}
-
-		// disable 64bit floating point math if not available
-		for ( int i = 0; i < programs.length; i++ ) {
-			final CLDevice device = queues[i].getCLDevice();
-
-			final StringBuilder options = new StringBuilder("-cl-fast-relaxed-math");
-			final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
-			if ( doublePrecision && isDoubleFPAvailable(device) ) {
-				//cl_khr_fp64
-				options.append(" -D DOUBLE_FP");
-
-				//amd's verson of double precision floating point math
-				if ( !caps.CL_KHR_fp64 && caps.CL_AMD_fp64 )
-					options.append(" -D AMD_FP");
-			}
-
-			System.out.println("COMPILER OPTIONS: " + options);
-
-			clBuildProgram(programs[i], device, options, null);
-		}
-
-		rebuild = false;
-
-		for ( int i = 0; i < kernels.length; i++ ) {
-			// init kernel with constants
-			kernels[i] = clCreateKernel(programs[min(i, programs.length)], "mandelbrot", null);
-		}
-	}
-
-	// init kernels with constants
-
-	private void setKernelConstants() {
-		for ( int i = 0; i < slices; i++ ) {
-			kernels[i]
-				.setArg(6, pboBuffers[i])
-				.setArg(7, colorMap[i])
-				.setArg(8, colorMapBuffer[i].capacity())
-				.setArg(9, MAX_ITERATIONS);
-		}
-	}
-
-	// rendering cycle
-
-	public void display() {
-		// TODO: Need to clean-up events, test when ARB_cl_events & KHR_gl_event are implemented.
-
-		// make sure GL does not use our objects before we start computing
-		if ( syncCLtoGL ) {
-			for ( final CLCommandQueue queue : queues )
-				clEnqueueWaitForEvents(queue, glEvent);
-		} else
-			glFinish();
-
-		if ( !buffersInitialized ) {
-			initPBO();
-			setKernelConstants();
-		}
-
-		if ( rebuild ) {
-			buildPrograms();
-			setKernelConstants();
-		}
-		compute(doublePrecision);
-
-		render();
-	}
-
-	// OpenCL
-
-	private void compute(final boolean is64bit) {
-		int sliceWidth = (int)(width / (float)slices);
-		double rangeX = (maxX - minX) / slices;
-		double rangeY = (maxY - minY);
-
-		kernel2DGlobalWorkSize.put(0, sliceWidth).put(1, height);
-
-		// start computation
-		for ( int i = 0; i < slices; i++ ) {
-			kernels[i].setArg(0, sliceWidth).setArg(1, height);
-			if ( !is64bit || !isDoubleFPAvailable(queues[i].getCLDevice()) ) {
-				kernels[i]
-					.setArg(2, (float)(minX + rangeX * i)).setArg(3, (float)minY)
-					.setArg(4, (float)rangeX).setArg(5, (float)rangeY);
-			} else {
-				kernels[i]
-					.setArg(2, minX + rangeX * i).setArg(3, minY)
-					.setArg(4, rangeX).setArg(5, rangeY);
-			}
-
-			// aquire GL objects, and enqueue a kernel with a probe from the list
-			clEnqueueAcquireGLObjects(queues[i], pboBuffers[i], null, null);
-
-			clEnqueueNDRangeKernel(queues[i], kernels[i], 2,
-			                       null,
-			                       kernel2DGlobalWorkSize,
-			                       null,
-			                       null, null);
-
-			clEnqueueReleaseGLObjects(queues[i], pboBuffers[i], null, syncBuffer);
-			if ( syncGLtoCL ) {
-				clEvents[i] = queues[i].getCLEvent(syncBuffer.get(0));
-				clSyncs[i] = glCreateSyncFromCLeventARB(queues[i].getParent(), clEvents[i], 0);
-			}
-		}
-
-		// block until done (important: finish before doing further gl work)
-		if ( !syncGLtoCL ) {
-			for ( int i = 0; i < slices; i++ )
-				clFinish(queues[i]);
-		}
-	}
-
-	// OpenGL
-
-	private void render() {
-		glClear(GL_COLOR_BUFFER_BIT);
-
-		if ( syncGLtoCL ) {
-			for ( int i = 0; i < slices; i++ )
-				glWaitSync(clSyncs[i], 0, 0);
-		}
-
-		//draw slices
-		int sliceWidth = width / slices;
-
-		for ( int i = 0; i < slices; i++ ) {
-			int seperatorOffset = drawSeparator ? i : 0;
-
-			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIDs.get(i));
-			glRasterPos2i(sliceWidth * i + seperatorOffset, 0);
-
-			glDrawPixels(sliceWidth, height, GL_BGRA, GL_UNSIGNED_BYTE, 0);
-
-		}
-		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-		if ( syncCLtoGL ) {
-			glSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-			glEvent = clCreateEventFromGLsyncKHR(clContext, glSync, null);
-
-		}
-
-		//draw info text
-		/*
-		textRenderer.beginRendering(width, height, false);
-
-		textRenderer.draw("device/time/precision", 10, height - 15);
-
-		for ( int i = 0; i < slices; i++ ) {
-			CLDevice device = queues[i].getDevice();
-			boolean doubleFP = doublePrecision && isDoubleFPAvailable(device);
-			CLEvent event = probes.getEvent(i);
-			long start = event.getProfilingInfo(START);
-			long end = event.getProfilingInfo(END);
-			textRenderer.draw(device.getType().toString() + i + " "
-			                  + (int)((end - start) / 1000000.0f) + "ms @"
-			                  + (doubleFP ? "64bit" : "32bit"), 10, height - (20 + 16 * (slices - i)));
-		}
-
-		textRenderer.endRendering();
-		*/
-	}
-
-	private static boolean isDoubleFPAvailable(CLDevice device) {
-		final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
-		return caps.CL_KHR_fp64 || caps.CL_AMD_fp64;
-	}
-
-	public static void main(String args[]) {
-		DemoFractal demo = new DemoFractal(512, 512);
-		demo.init();
-		demo.run();
-	}
-
 }