mirror of
https://github.com/shadowfacts/lwjgl2-arm64.git
synced 2026-04-09 00:14:04 +00:00
Misc GL/CL fixes and additions.
Made native calls package private, we can now use them from the high-level API. Added support for "render-to-texture" in the fractal demo.
This commit is contained in:
parent
a8bcb7fd25
commit
2b79b68501
19 changed files with 1211 additions and 510 deletions
|
|
@ -43,18 +43,23 @@ import org.lwjgl.util.Color;
|
|||
import org.lwjgl.util.ReadableColor;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.IntBuffer;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static java.lang.Math.*;
|
||||
import static org.lwjgl.opencl.CL10.*;
|
||||
import static org.lwjgl.opencl.CL10GL.*;
|
||||
import static org.lwjgl.opencl.KHRGLEvent.*;
|
||||
import static org.lwjgl.opengl.AMDDebugOutput.*;
|
||||
import static org.lwjgl.opengl.ARBCLEvent.*;
|
||||
import static org.lwjgl.opengl.ARBDebugOutput.*;
|
||||
import static org.lwjgl.opengl.ARBSync.*;
|
||||
import static org.lwjgl.opengl.GL11.*;
|
||||
import static org.lwjgl.opengl.GL12.*;
|
||||
import static org.lwjgl.opengl.GL15.*;
|
||||
import static org.lwjgl.opengl.GL20.*;
|
||||
import static org.lwjgl.opengl.GL21.*;
|
||||
|
||||
/*
|
||||
|
|
@ -118,24 +123,35 @@ public class DemoFractal {
|
|||
// max number of used GPUs
|
||||
private static final int MAX_PARALLELISM_LEVEL = 8;
|
||||
|
||||
// max per pixel iterations to compute the fractal
|
||||
private static final int MAX_ITERATIONS = 500;
|
||||
private static final int COLOR_MAP_SIZE = 32 * 2 * 4;
|
||||
|
||||
private Set<String> params;
|
||||
|
||||
private CLContext clContext;
|
||||
private CLCommandQueue[] queues;
|
||||
private CLKernel[] kernels;
|
||||
private CLProgram[] programs;
|
||||
|
||||
private CLMem[] pboBuffers;
|
||||
private IntBuffer pboIDs;
|
||||
private CLMem[] glBuffers;
|
||||
private IntBuffer glIDs;
|
||||
|
||||
private boolean useTextures;
|
||||
|
||||
// Texture rendering
|
||||
private int dlist;
|
||||
private int vsh;
|
||||
private int fsh;
|
||||
private int program;
|
||||
|
||||
private CLMem[] colorMap;
|
||||
private IntBuffer[] colorMapBuffer;
|
||||
|
||||
private final PointerBuffer kernel2DGlobalWorkSize;
|
||||
|
||||
private int width;
|
||||
private int height;
|
||||
// max per pixel iterations to compute the fractal
|
||||
private int maxIterations = 500;
|
||||
|
||||
private int width = 512;
|
||||
private int height = 512;
|
||||
|
||||
private double minX = -2f;
|
||||
private double minY = -1.2f;
|
||||
|
|
@ -174,13 +190,375 @@ public class DemoFractal {
|
|||
private GLSync glSync;
|
||||
private CLEvent glEvent;
|
||||
|
||||
public DemoFractal(int width, int height) {
|
||||
kernel2DGlobalWorkSize = BufferUtils.createPointerBuffer(2);
|
||||
public DemoFractal(final String[] args) {
|
||||
params = new HashSet<String>();
|
||||
|
||||
this.width = width;
|
||||
this.height = height;
|
||||
for ( int i = 0; i < args.length; i++ ) {
|
||||
final String arg = args[i];
|
||||
|
||||
if ( arg.charAt(0) != '-' && arg.charAt(0) != '/' )
|
||||
throw new IllegalArgumentException("Invalid command-line argument: " + args[i]);
|
||||
|
||||
final String param = arg.substring(1);
|
||||
|
||||
if ( "forcePBO".equalsIgnoreCase(param) )
|
||||
params.add("forcePBO");
|
||||
else if ( "forceCPU".equalsIgnoreCase(param) )
|
||||
params.add("forceCPU");
|
||||
else if ( "debugGL".equalsIgnoreCase(param) )
|
||||
params.add("debugGL");
|
||||
else if ( "iterations".equalsIgnoreCase(param) ) {
|
||||
if ( args.length < i + 1 + 1 )
|
||||
throw new IllegalArgumentException("Invalid iterations argument specified.");
|
||||
|
||||
try {
|
||||
this.maxIterations = Integer.parseInt(args[++i]);
|
||||
} catch (NumberFormatException e) {
|
||||
throw new IllegalArgumentException("Invalid number of iterations specified.");
|
||||
}
|
||||
} else if ( "res".equalsIgnoreCase(param) ) {
|
||||
if ( args.length < i + 2 + 1 )
|
||||
throw new IllegalArgumentException("Invalid res argument specified.");
|
||||
|
||||
try {
|
||||
this.width = Integer.parseInt(args[++i]);
|
||||
this.height = Integer.parseInt(args[++i]);
|
||||
|
||||
if ( width < 1 || height < 1 )
|
||||
throw new IllegalArgumentException("Invalid res dimensions specified.");
|
||||
} catch (NumberFormatException e) {
|
||||
throw new IllegalArgumentException("Invalid res dimensions specified.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kernel2DGlobalWorkSize = BufferUtils.createPointerBuffer(2);
|
||||
}
|
||||
|
||||
public static void main(String args[]) {
|
||||
DemoFractal demo = new DemoFractal(args);
|
||||
demo.init();
|
||||
demo.run();
|
||||
}
|
||||
|
||||
public void init() {
|
||||
try {
|
||||
CL.create();
|
||||
Display.setDisplayMode(new DisplayMode(width, height));
|
||||
Display.setTitle("OpenCL Fractal Demo");
|
||||
Display.setSwapInterval(0);
|
||||
Display.create(new PixelFormat(), new ContextAttribs().withDebug(params.contains("debugGL")));
|
||||
} catch (LWJGLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try {
|
||||
initCL(Display.getDrawable());
|
||||
} catch (Exception e) {
|
||||
if ( clContext != null )
|
||||
clReleaseContext(clContext);
|
||||
Display.destroy();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
|
||||
initView(Display.getDisplayMode().getWidth(), Display.getDisplayMode().getHeight());
|
||||
|
||||
initGLObjects();
|
||||
glFinish();
|
||||
|
||||
setKernelConstants();
|
||||
}
|
||||
|
||||
private void initCL(Drawable drawable) throws Exception {
|
||||
// Find a platform
|
||||
List<CLPlatform> platforms = CLPlatform.getPlatforms();
|
||||
if ( platforms == null )
|
||||
throw new RuntimeException("No OpenCL platforms found.");
|
||||
|
||||
final CLPlatform platform = platforms.get(0); // just grab the first one
|
||||
|
||||
// Find devices with GL sharing support
|
||||
final Filter<CLDevice> glSharingFilter = new Filter<CLDevice>() {
|
||||
public boolean accept(final CLDevice device) {
|
||||
final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
|
||||
return caps.CL_KHR_gl_sharing;
|
||||
}
|
||||
};
|
||||
int device_type = params.contains("forceCPU") ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU;
|
||||
List<CLDevice> devices = platform.getDevices(device_type, glSharingFilter);
|
||||
if ( devices == null ) {
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
devices = platform.getDevices(device_type, glSharingFilter);
|
||||
if ( devices == null )
|
||||
throw new RuntimeException("No OpenCL devices found with KHR_gl_sharing support.");
|
||||
}
|
||||
|
||||
// Create the context
|
||||
clContext = CLContext.create(platform, devices, new CLContextCallback() {
|
||||
protected void handleMessage(final String errinfo, final ByteBuffer private_info) {
|
||||
System.out.println("[CONTEXT MESSAGE] " + errinfo);
|
||||
}
|
||||
}, drawable, null);
|
||||
|
||||
slices = min(devices.size(), MAX_PARALLELISM_LEVEL);
|
||||
|
||||
// create command queues for every GPU, setup colormap and init kernels
|
||||
queues = new CLCommandQueue[slices];
|
||||
kernels = new CLKernel[slices];
|
||||
colorMap = new CLMem[slices];
|
||||
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
colorMap[i] = clCreateBuffer(clContext, CL_MEM_READ_ONLY, COLOR_MAP_SIZE, null);
|
||||
colorMap[i].checkValid();
|
||||
|
||||
// create command queue and upload color map buffer on each used device
|
||||
queues[i] = clCreateCommandQueue(clContext, devices.get(i), CL_QUEUE_PROFILING_ENABLE, null);
|
||||
queues[i].checkValid();
|
||||
|
||||
final ByteBuffer colorMapBuffer = clEnqueueMapBuffer(queues[i], colorMap[i], CL_TRUE, CL_MAP_WRITE, 0, COLOR_MAP_SIZE, null, null, null);
|
||||
initColorMap(colorMapBuffer.asIntBuffer(), 32, Color.BLUE, Color.GREEN, Color.RED);
|
||||
clEnqueueUnmapMemObject(queues[i], colorMap[i], colorMapBuffer, null, null);
|
||||
}
|
||||
|
||||
// check if we have 64bit FP support on all devices
|
||||
// if yes we can use only one program for all devices + one kernel per device.
|
||||
// if not we will have to create (at least) one program for 32 and one for 64bit devices.
|
||||
// since there are different vendor extensions for double FP we use one program per device.
|
||||
// (OpenCL spec is not very clear about this usecases)
|
||||
boolean all64bit = true;
|
||||
for ( CLDevice device : devices ) {
|
||||
if ( !isDoubleFPAvailable(device) ) {
|
||||
all64bit = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// load program(s)
|
||||
programs = new CLProgram[all64bit ? 1 : slices];
|
||||
|
||||
final ContextCapabilities caps = GLContext.getCapabilities();
|
||||
|
||||
if ( !caps.OpenGL20 )
|
||||
throw new RuntimeException("OpenGL 2.0 is required to run this demo.");
|
||||
else if ( device_type == CL_DEVICE_TYPE_CPU && !caps.OpenGL21 )
|
||||
throw new RuntimeException("OpenGL 2.1 is required to run this demo.");
|
||||
|
||||
if ( caps.GL_ARB_debug_output )
|
||||
glDebugMessageCallbackARB(new ARBDebugOutputCallback());
|
||||
else if ( caps.GL_AMD_debug_output )
|
||||
glDebugMessageCallbackAMD(new AMDDebugOutputCallback());
|
||||
|
||||
if ( device_type == CL_DEVICE_TYPE_GPU )
|
||||
System.out.println("OpenCL Device Type: GPU (Use -forceCPU to use CPU)");
|
||||
else
|
||||
System.out.println("OpenCL Device Type: CPU");
|
||||
for ( int i = 0; i < devices.size(); i++ )
|
||||
System.out.println("OpenCL Device #" + (i + 1) + " supports KHR_gl_event = " + CLCapabilities.getDeviceCapabilities(devices.get(i)).CL_KHR_gl_event);
|
||||
|
||||
System.out.println("\nMax Iterations: " + maxIterations + " (Use -iterations <count> to change)");
|
||||
System.out.println("Display resolution: " + width + "x" + height + " (Use -res <width> <height> to change)");
|
||||
|
||||
System.out.println("\nOpenGL caps.GL_ARB_sync = " + caps.GL_ARB_sync);
|
||||
System.out.println("OpenGL caps.GL_ARB_cl_event = " + caps.GL_ARB_cl_event);
|
||||
|
||||
// Use PBO if we're on a CPU implementation
|
||||
useTextures = device_type == CL_DEVICE_TYPE_GPU && (!caps.OpenGL21 || !params.contains("forcePBO"));
|
||||
if ( useTextures ) {
|
||||
System.out.println("\nCL/GL Sharing method: TEXTURES (use -forcePBO to use PBO + DrawPixels)");
|
||||
System.out.println("Rendering method: Shader on a fullscreen quad");
|
||||
} else {
|
||||
System.out.println("\nCL/GL Sharing method: PIXEL BUFFER OBJECTS");
|
||||
System.out.println("Rendering method: DrawPixels");
|
||||
}
|
||||
|
||||
buildPrograms();
|
||||
|
||||
// Detect GLtoCL synchronization method
|
||||
syncGLtoCL = caps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied
|
||||
if ( syncGLtoCL ) {
|
||||
clEvents = new CLEvent[slices];
|
||||
clSyncs = new GLSync[slices];
|
||||
System.out.println("\nGL to CL sync: Using OpenCL events");
|
||||
} else
|
||||
System.out.println("\nGL to CL sync: Using clFinish");
|
||||
|
||||
// Detect CLtoGL synchronization method
|
||||
syncCLtoGL = caps.OpenGL32 || caps.GL_ARB_sync;
|
||||
if ( syncCLtoGL ) {
|
||||
for ( CLDevice device : devices ) {
|
||||
if ( !CLCapabilities.getDeviceCapabilities(device).CL_KHR_gl_event ) {
|
||||
syncCLtoGL = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( syncCLtoGL ) {
|
||||
System.out.println("CL to GL sync: Using OpenGL sync objects");
|
||||
} else
|
||||
System.out.println("CL to GL sync: Using glFinish");
|
||||
|
||||
if ( useTextures ) {
|
||||
dlist = glGenLists(1);
|
||||
|
||||
glNewList(dlist, GL_COMPILE);
|
||||
glBegin(GL_QUADS);
|
||||
{
|
||||
glTexCoord2f(0.0f, 0.0f);
|
||||
glVertex2f(0, 0);
|
||||
|
||||
glTexCoord2f(0.0f, 1.0f);
|
||||
glVertex2i(0, height);
|
||||
|
||||
glTexCoord2f(1.0f, 1.0f);
|
||||
glVertex2f(width, height);
|
||||
|
||||
glTexCoord2f(1.0f, 0.0f);
|
||||
glVertex2f(width, 0);
|
||||
}
|
||||
glEnd();
|
||||
glEndList();
|
||||
|
||||
vsh = glCreateShader(GL_VERTEX_SHADER);
|
||||
glShaderSource(vsh, "varying vec2 texCoord;\n" +
|
||||
"\n" +
|
||||
"void main(void) {\n" +
|
||||
"\tgl_Position = ftransform();\n" +
|
||||
"\ttexCoord = gl_MultiTexCoord0.xy;\n" +
|
||||
"}");
|
||||
glCompileShader(vsh);
|
||||
|
||||
fsh = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
glShaderSource(fsh, "uniform sampler2D mandelbrot;\n" +
|
||||
"\n" +
|
||||
"varying vec2 texCoord;\n" +
|
||||
"\n" +
|
||||
"void main(void) {\n" +
|
||||
"\tgl_FragColor = texture2D(mandelbrot, texCoord);" +
|
||||
"}");
|
||||
glCompileShader(fsh);
|
||||
|
||||
program = glCreateProgram();
|
||||
glAttachShader(program, vsh);
|
||||
glAttachShader(program, fsh);
|
||||
glLinkProgram(program);
|
||||
|
||||
glUseProgram(program);
|
||||
glUniform1i(glGetUniformLocation(program, "mandelbrot"), 0);
|
||||
}
|
||||
|
||||
System.out.println("");
|
||||
}
|
||||
|
||||
private void buildPrograms() {
|
||||
/*
|
||||
* workaround: The driver keeps using the old binaries for some reason.
|
||||
* to solve this we simple create a new program and release the old.
|
||||
* however rebuilding programs should be possible -> remove when drivers are fixed.
|
||||
* (again: the spec is not very clear about this kind of usages)
|
||||
*/
|
||||
if ( programs[0] != null ) {
|
||||
for ( CLProgram program : programs )
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
|
||||
try {
|
||||
createPrograms();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// disable 64bit floating point math if not available
|
||||
for ( int i = 0; i < programs.length; i++ ) {
|
||||
final CLDevice device = queues[i].getCLDevice();
|
||||
|
||||
final StringBuilder options = new StringBuilder(useTextures ? " -D USE_TEXTURE" : "");
|
||||
final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
|
||||
if ( doublePrecision && isDoubleFPAvailable(device) ) {
|
||||
//cl_khr_fp64
|
||||
options.append(" -D DOUBLE_FP");
|
||||
|
||||
//amd's verson of double precision floating point math
|
||||
if ( !caps.CL_KHR_fp64 && caps.CL_AMD_fp64 )
|
||||
options.append(" -D AMD_FP");
|
||||
}
|
||||
|
||||
System.out.println("\nOpenCL COMPILER OPTIONS: " + options);
|
||||
|
||||
try {
|
||||
clBuildProgram(programs[i], device, options, null);
|
||||
} finally {
|
||||
System.out.println("BUILD LOG: " + programs[i].getBuildInfoString(device, CL_PROGRAM_BUILD_LOG));
|
||||
}
|
||||
}
|
||||
|
||||
rebuild = false;
|
||||
|
||||
// init kernel with constants
|
||||
for ( int i = 0; i < kernels.length; i++ )
|
||||
kernels[i] = clCreateKernel(programs[min(i, programs.length)], "mandelbrot", null);
|
||||
}
|
||||
|
||||
private void initGLObjects() {
|
||||
if ( glBuffers == null ) {
|
||||
glBuffers = new CLMem[slices];
|
||||
glIDs = BufferUtils.createIntBuffer(slices);
|
||||
} else {
|
||||
for ( CLMem mem : glBuffers )
|
||||
clReleaseMemObject(mem);
|
||||
|
||||
if ( useTextures )
|
||||
glDeleteTextures(glIDs);
|
||||
else
|
||||
glDeleteBuffers(glIDs);
|
||||
}
|
||||
|
||||
if ( useTextures )
|
||||
glGenTextures(glIDs);
|
||||
else
|
||||
glGenBuffers(glIDs);
|
||||
|
||||
if ( useTextures ) {
|
||||
// Init textures
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
glBindTexture(GL_TEXTURE_2D, glIDs.get(i));
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width / slices, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, (ByteBuffer)null);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
glBuffers[i] = clCreateFromGLTexture2D(clContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, glIDs.get(i), null);
|
||||
}
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
} else {
|
||||
// setup one empty PBO per slice
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glIDs.get(i));
|
||||
glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * 4 / slices, GL_STREAM_DRAW);
|
||||
|
||||
glBuffers[i] = clCreateFromGLBuffer(clContext, CL_MEM_WRITE_ONLY, glIDs.get(i), null);
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
buffersInitialized = true;
|
||||
}
|
||||
|
||||
// init kernels with constants
|
||||
|
||||
private void setKernelConstants() {
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
kernels[i]
|
||||
.setArg(6, glBuffers[i])
|
||||
.setArg(7, colorMap[i])
|
||||
.setArg(8, COLOR_MAP_SIZE)
|
||||
.setArg(9, maxIterations);
|
||||
}
|
||||
}
|
||||
|
||||
// rendering cycle
|
||||
|
||||
private void run() {
|
||||
long startTime = System.currentTimeMillis() + 5000;
|
||||
long fps = 0;
|
||||
|
|
@ -206,10 +584,147 @@ public class DemoFractal {
|
|||
}
|
||||
}
|
||||
|
||||
clReleaseContext(clContext);
|
||||
|
||||
if ( useTextures ) {
|
||||
glDeleteProgram(program);
|
||||
glDeleteShader(fsh);
|
||||
glDeleteShader(vsh);
|
||||
|
||||
glDeleteLists(dlist, 1);
|
||||
}
|
||||
|
||||
CL.destroy();
|
||||
Display.destroy();
|
||||
}
|
||||
|
||||
public void display() {
|
||||
// TODO: Need to clean-up events, test when ARB_cl_events & KHR_gl_event are implemented.
|
||||
|
||||
// make sure GL does not use our objects before we start computing
|
||||
if ( syncCLtoGL && glEvent != null ) {
|
||||
for ( final CLCommandQueue queue : queues )
|
||||
clEnqueueWaitForEvents(queue, glEvent);
|
||||
} else
|
||||
glFinish();
|
||||
|
||||
if ( !buffersInitialized ) {
|
||||
initGLObjects();
|
||||
setKernelConstants();
|
||||
}
|
||||
|
||||
if ( rebuild ) {
|
||||
buildPrograms();
|
||||
setKernelConstants();
|
||||
}
|
||||
compute(doublePrecision);
|
||||
|
||||
render();
|
||||
}
|
||||
|
||||
// OpenCL
|
||||
|
||||
private void compute(final boolean is64bit) {
|
||||
int sliceWidth = (int)(width / (float)slices);
|
||||
double rangeX = (maxX - minX) / slices;
|
||||
double rangeY = (maxY - minY);
|
||||
|
||||
kernel2DGlobalWorkSize.put(0, sliceWidth).put(1, height);
|
||||
|
||||
// start computation
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
kernels[i].setArg(0, sliceWidth).setArg(1, height);
|
||||
if ( !is64bit || !isDoubleFPAvailable(queues[i].getCLDevice()) ) {
|
||||
kernels[i]
|
||||
.setArg(2, (float)(minX + rangeX * i)).setArg(3, (float)minY)
|
||||
.setArg(4, (float)rangeX).setArg(5, (float)rangeY);
|
||||
} else {
|
||||
kernels[i]
|
||||
.setArg(2, minX + rangeX * i).setArg(3, minY)
|
||||
.setArg(4, rangeX).setArg(5, rangeY);
|
||||
}
|
||||
|
||||
// acquire GL objects, and enqueue a kernel with a probe from the list
|
||||
clEnqueueAcquireGLObjects(queues[i], glBuffers[i], null, null);
|
||||
|
||||
clEnqueueNDRangeKernel(queues[i], kernels[i], 2,
|
||||
null,
|
||||
kernel2DGlobalWorkSize,
|
||||
null,
|
||||
null, null);
|
||||
|
||||
clEnqueueReleaseGLObjects(queues[i], glBuffers[i], null, syncGLtoCL ? syncBuffer : null);
|
||||
if ( syncGLtoCL ) {
|
||||
clEvents[i] = queues[i].getCLEvent(syncBuffer.get(0));
|
||||
clSyncs[i] = glCreateSyncFromCLeventARB(queues[i].getParent(), clEvents[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
// block until done (important: finish before doing further gl work)
|
||||
if ( !syncGLtoCL ) {
|
||||
for ( int i = 0; i < slices; i++ )
|
||||
clFinish(queues[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// OpenGL
|
||||
|
||||
private void render() {
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
if ( syncGLtoCL ) {
|
||||
for ( int i = 0; i < slices; i++ )
|
||||
glWaitSync(clSyncs[i], 0, 0);
|
||||
}
|
||||
|
||||
//draw slices
|
||||
int sliceWidth = width / slices;
|
||||
|
||||
if ( useTextures ) {
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
int seperatorOffset = drawSeparator ? i : 0;
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, glIDs.get(i));
|
||||
glCallList(dlist);
|
||||
}
|
||||
} else {
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
int seperatorOffset = drawSeparator ? i : 0;
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glIDs.get(i));
|
||||
glRasterPos2i(sliceWidth * i + seperatorOffset, 0);
|
||||
|
||||
glDrawPixels(sliceWidth, height, GL_RGBA, GL_UNSIGNED_BYTE, 0);
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
if ( syncCLtoGL ) {
|
||||
glSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
glEvent = clCreateEventFromGLsyncKHR(clContext, glSync, null);
|
||||
}
|
||||
|
||||
//draw info text
|
||||
/*
|
||||
textRenderer.beginRendering(width, height, false);
|
||||
|
||||
textRenderer.draw("device/time/precision", 10, height - 15);
|
||||
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
CLDevice device = queues[i].getDevice();
|
||||
boolean doubleFP = doublePrecision && isDoubleFPAvailable(device);
|
||||
CLEvent event = probes.getEvent(i);
|
||||
long start = event.getProfilingInfo(START);
|
||||
long end = event.getProfilingInfo(END);
|
||||
textRenderer.draw(device.getType().toString() + i + " "
|
||||
+ (int)((end - start) / 1000000.0f) + "ms @"
|
||||
+ (doubleFP ? "64bit" : "32bit"), 10, height - (20 + 16 * (slices - i)));
|
||||
}
|
||||
|
||||
textRenderer.endRendering();
|
||||
*/
|
||||
}
|
||||
|
||||
private void handleIO() {
|
||||
if ( Keyboard.getNumKeyboardEvents() != 0 ) {
|
||||
while ( Keyboard.next() ) {
|
||||
|
|
@ -282,7 +797,8 @@ public class DemoFractal {
|
|||
if ( eventBtn == -1 ) {
|
||||
final int dwheel = Mouse.getEventDWheel();
|
||||
if ( dwheel != 0 ) {
|
||||
double scale = dwheel > 0 ? 0.05 : -0.05;
|
||||
double scaleFactor = Keyboard.isKeyDown(Keyboard.KEY_LCONTROL) || Keyboard.isKeyDown(Keyboard.KEY_RCONTROL) ? 0.25 : 0.05;
|
||||
double scale = dwheel > 0 ? scaleFactor : -scaleFactor;
|
||||
|
||||
double deltaX = scale * (maxX - minX);
|
||||
double deltaY = scale * (maxY - minY);
|
||||
|
|
@ -305,146 +821,9 @@ public class DemoFractal {
|
|||
}
|
||||
}
|
||||
|
||||
public void init() {
|
||||
try {
|
||||
CL.create();
|
||||
Display.setDisplayMode(new DisplayMode(width, height));
|
||||
Display.setTitle("OpenCL Fractal Demo");
|
||||
Display.create();
|
||||
} catch (LWJGLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try {
|
||||
initCL(Display.getDrawable());
|
||||
} catch (Exception e) {
|
||||
if ( clContext != null )
|
||||
clReleaseContext(clContext);
|
||||
Display.destroy();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
Display.setSwapInterval(0);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
|
||||
initView(Display.getDisplayMode().getWidth(), Display.getDisplayMode().getHeight());
|
||||
|
||||
initPBO();
|
||||
glFinish();
|
||||
|
||||
setKernelConstants();
|
||||
}
|
||||
|
||||
private void initCL(Drawable drawable) throws Exception {
|
||||
// Find a platform
|
||||
List<CLPlatform> platforms = CLPlatform.getPlatforms();
|
||||
if ( platforms == null )
|
||||
throw new RuntimeException("No OpenCL platforms found.");
|
||||
|
||||
final CLPlatform platform = platforms.get(0); // just grab the first one
|
||||
|
||||
// Find devices with GL sharing support
|
||||
final Filter<CLDevice> glSharingFilter = new Filter<CLDevice>() {
|
||||
public boolean accept(final CLDevice device) {
|
||||
final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
|
||||
return caps.CL_KHR_gl_sharing;
|
||||
}
|
||||
};
|
||||
List<CLDevice> devices = platform.getDevices(CL_DEVICE_TYPE_GPU, glSharingFilter);
|
||||
if ( devices == null ) {
|
||||
devices = platform.getDevices(CL_DEVICE_TYPE_CPU, glSharingFilter);
|
||||
if ( devices == null )
|
||||
throw new RuntimeException("No OpenCL devices found with KHR_gl_sharing support.");
|
||||
}
|
||||
|
||||
// Create the context
|
||||
final PointerBuffer deviceIDs = BufferUtils.createPointerBuffer(devices.size());
|
||||
for ( CLDevice device : devices )
|
||||
deviceIDs.put(device);
|
||||
deviceIDs.flip();
|
||||
|
||||
final PointerBuffer contextProps = BufferUtils.createPointerBuffer(2 + 4 + 1);
|
||||
contextProps.put(CL_CONTEXT_PLATFORM).put(platform);
|
||||
|
||||
drawable.setCLSharingProperties(contextProps); // Enable GL sharing
|
||||
|
||||
contextProps.put(0);
|
||||
contextProps.flip();
|
||||
clContext = clCreateContext(contextProps, deviceIDs, null, null);
|
||||
|
||||
slices = min(devices.size(), MAX_PARALLELISM_LEVEL);
|
||||
|
||||
// create command queues for every GPU, setup colormap and init kernels
|
||||
queues = new CLCommandQueue[slices];
|
||||
kernels = new CLKernel[slices];
|
||||
colorMap = new CLMem[slices];
|
||||
colorMapBuffer = new IntBuffer[slices];
|
||||
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
colorMapBuffer[i] = BufferUtils.createIntBuffer(32 * 2);
|
||||
colorMap[i] = clCreateBuffer(clContext, CL_MEM_READ_ONLY, colorMapBuffer[i].capacity() * 4, null);
|
||||
colorMap[i].checkValid();
|
||||
|
||||
initColorMap(colorMapBuffer[i], 32, Color.BLUE, Color.GREEN, Color.RED);
|
||||
|
||||
// create command queue and upload color map buffer on each used device
|
||||
queues[i] = clCreateCommandQueue(clContext, devices.get(i), CL_QUEUE_PROFILING_ENABLE, null);
|
||||
queues[i].checkValid();
|
||||
clEnqueueWriteBuffer(queues[i], colorMap[i], CL_TRUE, 0, colorMapBuffer[i], null, null); // blocking upload
|
||||
|
||||
}
|
||||
|
||||
// check if we have 64bit FP support on all devices
|
||||
// if yes we can use only one program for all devices + one kernel per device.
|
||||
// if not we will have to create (at least) one program for 32 and one for 64bit devices.
|
||||
// since there are different vendor extensions for double FP we use one program per device.
|
||||
// (OpenCL spec is not very clear about this usecases)
|
||||
boolean all64bit = true;
|
||||
for ( CLDevice device : devices ) {
|
||||
if ( !isDoubleFPAvailable(device) ) {
|
||||
all64bit = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// load program(s)
|
||||
programs = new CLProgram[all64bit ? 1 : slices];
|
||||
|
||||
buildPrograms();
|
||||
|
||||
final ContextCapabilities caps = GLContext.getCapabilities();
|
||||
|
||||
System.out.println("OpenGL caps.OpenGL32 = " + caps.OpenGL32);
|
||||
System.out.println("OpenGL caps.GL_ARB_sync = " + caps.GL_ARB_sync);
|
||||
System.out.println("OpenGL caps.GL_ARB_cl_event = " + caps.GL_ARB_cl_event);
|
||||
for ( int i = 0; i < devices.size(); i++ ) {
|
||||
System.out.println("Device #" + (i + 1) + " supports KHR_gl_event = " + CLCapabilities.getDeviceCapabilities(devices.get(i)).CL_KHR_gl_event);
|
||||
}
|
||||
|
||||
// Detect GLtoCL synchronization method
|
||||
syncGLtoCL = caps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied
|
||||
if ( syncGLtoCL ) {
|
||||
clEvents = new CLEvent[slices];
|
||||
clSyncs = new GLSync[slices];
|
||||
System.out.println("GL to CL sync: Using OpenCL events");
|
||||
} else
|
||||
System.out.println("GL to CL sync: Using clFinish");
|
||||
|
||||
// Detect CLtoGL synchronization method
|
||||
syncCLtoGL = caps.OpenGL32 || caps.GL_ARB_sync;
|
||||
if ( syncCLtoGL ) {
|
||||
for ( CLDevice device : devices ) {
|
||||
if ( !CLCapabilities.getDeviceCapabilities(device).CL_KHR_gl_event ) {
|
||||
syncCLtoGL = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( syncCLtoGL ) {
|
||||
System.out.println("CL to GL sync: Using OpenGL sync objects");
|
||||
} else
|
||||
System.out.println("CL to GL sync: Using glFinish");
|
||||
private static boolean isDoubleFPAvailable(CLDevice device) {
|
||||
final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
|
||||
return caps.CL_KHR_fp64 || caps.CL_AMD_fp64;
|
||||
}
|
||||
|
||||
private void createPrograms() throws IOException {
|
||||
|
|
@ -492,10 +871,9 @@ public class DemoFractal {
|
|||
int r = (int)(r0 + alpha * deltaR);
|
||||
int g = (int)(g0 + alpha * deltaG);
|
||||
int b = (int)(b0 + alpha * deltaB);
|
||||
colorMap.put((r << 16) | (g << 8) | (b << 0));
|
||||
colorMap.put((r << 0) | (g << 8) | (b << 16));
|
||||
}
|
||||
}
|
||||
colorMap.rewind();
|
||||
}
|
||||
|
||||
private static void initView(int width, int height) {
|
||||
|
|
@ -509,219 +887,4 @@ public class DemoFractal {
|
|||
glOrtho(0.0, width, 0.0, height, 0.0, 1.0);
|
||||
}
|
||||
|
||||
private void initPBO() {
|
||||
if ( pboBuffers == null ) {
|
||||
pboBuffers = new CLMem[slices];
|
||||
pboIDs = BufferUtils.createIntBuffer(slices);
|
||||
} else {
|
||||
for ( CLMem pboBuffer : pboBuffers )
|
||||
clReleaseMemObject(pboBuffer);
|
||||
glDeleteBuffers(pboIDs);
|
||||
}
|
||||
|
||||
glGenBuffers(pboIDs);
|
||||
|
||||
// setup one empty PBO per slice
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIDs.get(i));
|
||||
glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * 4 / slices, GL_STREAM_DRAW);
|
||||
|
||||
pboBuffers[i] = clCreateFromGLBuffer(clContext, CL_MEM_WRITE_ONLY, pboIDs.get(i), null);
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
|
||||
buffersInitialized = true;
|
||||
}
|
||||
|
||||
private void buildPrograms() {
|
||||
/*
|
||||
* workaround: The driver keeps using the old binaries for some reason.
|
||||
* to solve this we simple create a new program and release the old.
|
||||
* however rebuilding programs should be possible -> remove when drivers are fixed.
|
||||
* (again: the spec is not very clear about this kind of usages)
|
||||
*/
|
||||
if ( programs[0] != null ) {
|
||||
for ( CLProgram program : programs )
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
|
||||
try {
|
||||
createPrograms();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// disable 64bit floating point math if not available
|
||||
for ( int i = 0; i < programs.length; i++ ) {
|
||||
final CLDevice device = queues[i].getCLDevice();
|
||||
|
||||
final StringBuilder options = new StringBuilder("-cl-fast-relaxed-math");
|
||||
final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
|
||||
if ( doublePrecision && isDoubleFPAvailable(device) ) {
|
||||
//cl_khr_fp64
|
||||
options.append(" -D DOUBLE_FP");
|
||||
|
||||
//amd's verson of double precision floating point math
|
||||
if ( !caps.CL_KHR_fp64 && caps.CL_AMD_fp64 )
|
||||
options.append(" -D AMD_FP");
|
||||
}
|
||||
|
||||
System.out.println("COMPILER OPTIONS: " + options);
|
||||
|
||||
clBuildProgram(programs[i], device, options, null);
|
||||
}
|
||||
|
||||
rebuild = false;
|
||||
|
||||
for ( int i = 0; i < kernels.length; i++ ) {
|
||||
// init kernel with constants
|
||||
kernels[i] = clCreateKernel(programs[min(i, programs.length)], "mandelbrot", null);
|
||||
}
|
||||
}
|
||||
|
||||
// init kernels with constants
|
||||
|
||||
private void setKernelConstants() {
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
kernels[i]
|
||||
.setArg(6, pboBuffers[i])
|
||||
.setArg(7, colorMap[i])
|
||||
.setArg(8, colorMapBuffer[i].capacity())
|
||||
.setArg(9, MAX_ITERATIONS);
|
||||
}
|
||||
}
|
||||
|
||||
// rendering cycle
|
||||
|
||||
public void display() {
|
||||
// TODO: Need to clean-up events, test when ARB_cl_events & KHR_gl_event are implemented.
|
||||
|
||||
// make sure GL does not use our objects before we start computing
|
||||
if ( syncCLtoGL ) {
|
||||
for ( final CLCommandQueue queue : queues )
|
||||
clEnqueueWaitForEvents(queue, glEvent);
|
||||
} else
|
||||
glFinish();
|
||||
|
||||
if ( !buffersInitialized ) {
|
||||
initPBO();
|
||||
setKernelConstants();
|
||||
}
|
||||
|
||||
if ( rebuild ) {
|
||||
buildPrograms();
|
||||
setKernelConstants();
|
||||
}
|
||||
compute(doublePrecision);
|
||||
|
||||
render();
|
||||
}
|
||||
|
||||
// OpenCL
|
||||
|
||||
private void compute(final boolean is64bit) {
|
||||
int sliceWidth = (int)(width / (float)slices);
|
||||
double rangeX = (maxX - minX) / slices;
|
||||
double rangeY = (maxY - minY);
|
||||
|
||||
kernel2DGlobalWorkSize.put(0, sliceWidth).put(1, height);
|
||||
|
||||
// start computation
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
kernels[i].setArg(0, sliceWidth).setArg(1, height);
|
||||
if ( !is64bit || !isDoubleFPAvailable(queues[i].getCLDevice()) ) {
|
||||
kernels[i]
|
||||
.setArg(2, (float)(minX + rangeX * i)).setArg(3, (float)minY)
|
||||
.setArg(4, (float)rangeX).setArg(5, (float)rangeY);
|
||||
} else {
|
||||
kernels[i]
|
||||
.setArg(2, minX + rangeX * i).setArg(3, minY)
|
||||
.setArg(4, rangeX).setArg(5, rangeY);
|
||||
}
|
||||
|
||||
// aquire GL objects, and enqueue a kernel with a probe from the list
|
||||
clEnqueueAcquireGLObjects(queues[i], pboBuffers[i], null, null);
|
||||
|
||||
clEnqueueNDRangeKernel(queues[i], kernels[i], 2,
|
||||
null,
|
||||
kernel2DGlobalWorkSize,
|
||||
null,
|
||||
null, null);
|
||||
|
||||
clEnqueueReleaseGLObjects(queues[i], pboBuffers[i], null, syncBuffer);
|
||||
if ( syncGLtoCL ) {
|
||||
clEvents[i] = queues[i].getCLEvent(syncBuffer.get(0));
|
||||
clSyncs[i] = glCreateSyncFromCLeventARB(queues[i].getParent(), clEvents[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
// block until done (important: finish before doing further gl work)
|
||||
if ( !syncGLtoCL ) {
|
||||
for ( int i = 0; i < slices; i++ )
|
||||
clFinish(queues[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// OpenGL
|
||||
|
||||
private void render() {
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
if ( syncGLtoCL ) {
|
||||
for ( int i = 0; i < slices; i++ )
|
||||
glWaitSync(clSyncs[i], 0, 0);
|
||||
}
|
||||
|
||||
//draw slices
|
||||
int sliceWidth = width / slices;
|
||||
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
int seperatorOffset = drawSeparator ? i : 0;
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIDs.get(i));
|
||||
glRasterPos2i(sliceWidth * i + seperatorOffset, 0);
|
||||
|
||||
glDrawPixels(sliceWidth, height, GL_BGRA, GL_UNSIGNED_BYTE, 0);
|
||||
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
|
||||
if ( syncCLtoGL ) {
|
||||
glSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
glEvent = clCreateEventFromGLsyncKHR(clContext, glSync, null);
|
||||
|
||||
}
|
||||
|
||||
//draw info text
|
||||
/*
|
||||
textRenderer.beginRendering(width, height, false);
|
||||
|
||||
textRenderer.draw("device/time/precision", 10, height - 15);
|
||||
|
||||
for ( int i = 0; i < slices; i++ ) {
|
||||
CLDevice device = queues[i].getDevice();
|
||||
boolean doubleFP = doublePrecision && isDoubleFPAvailable(device);
|
||||
CLEvent event = probes.getEvent(i);
|
||||
long start = event.getProfilingInfo(START);
|
||||
long end = event.getProfilingInfo(END);
|
||||
textRenderer.draw(device.getType().toString() + i + " "
|
||||
+ (int)((end - start) / 1000000.0f) + "ms @"
|
||||
+ (doubleFP ? "64bit" : "32bit"), 10, height - (20 + 16 * (slices - i)));
|
||||
}
|
||||
|
||||
textRenderer.endRendering();
|
||||
*/
|
||||
}
|
||||
|
||||
private static boolean isDoubleFPAvailable(CLDevice device) {
|
||||
final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device);
|
||||
return caps.CL_KHR_fp64 || caps.CL_AMD_fp64;
|
||||
}
|
||||
|
||||
public static void main(String args[]) {
|
||||
DemoFractal demo = new DemoFractal(512, 512);
|
||||
demo.init();
|
||||
demo.run();
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue