From 2b79b6850124e7705cb6630c3e184dd78c827272 Mon Sep 17 00:00:00 2001 From: Ioannis Tsakpinis Date: Thu, 30 Sep 2010 17:21:50 +0000 Subject: [PATCH] Misc GL/CL fixes and additions. Made native calls package private, we can now use them from the high-level API. Added support for "render-to-texture" in the fractal demo. --- src/java/org/lwjgl/BufferChecks.java | 49 +- src/java/org/lwjgl/LWJGLUtil.java | 97 +- .../org/lwjgl/PointerWrapperAbstract.java | 2 +- src/java/org/lwjgl/opencl/APIUtil.java | 49 +- src/java/org/lwjgl/opencl/CLChecks.java | 34 +- src/java/org/lwjgl/opencl/CLContext.java | 114 +++ src/java/org/lwjgl/opencl/CLMem.java | 68 ++ src/java/org/lwjgl/opencl/CLProgram.java | 11 + .../org/lwjgl/opencl/InfoUtilAbstract.java | 7 +- .../org/lwjgl/opencl/InfoUtilFactory.java | 149 ++- src/java/org/lwjgl/opencl/Util.java | 24 +- .../org/lwjgl/opencl/api/CLBufferRegion.java | 62 ++ .../org/lwjgl/opencl/api/CLImageFormat.java | 60 ++ .../org/lwjgl/test/opencl/gl/DemoFractal.java | 903 +++++++++++------- .../org/lwjgl/test/opencl/gl/Mandelbrot.cl | 51 +- .../util/generator/JavaMethodsGenerator.java | 2 +- .../lwjgl/opengl/EXT_direct_state_access.java | 20 +- src/templates/org/lwjgl/opengl/GL40.java | 1 + .../org/lwjgl/opengl/NV_half_float.java | 18 +- 19 files changed, 1211 insertions(+), 510 deletions(-) create mode 100644 src/java/org/lwjgl/opencl/api/CLBufferRegion.java create mode 100644 src/java/org/lwjgl/opencl/api/CLImageFormat.java diff --git a/src/java/org/lwjgl/BufferChecks.java b/src/java/org/lwjgl/BufferChecks.java index 86a64896..5eb52c44 100644 --- a/src/java/org/lwjgl/BufferChecks.java +++ b/src/java/org/lwjgl/BufferChecks.java @@ -31,13 +31,7 @@ */ package org.lwjgl; -import java.nio.Buffer; -import java.nio.ByteBuffer; -import java.nio.DoubleBuffer; -import java.nio.FloatBuffer; -import java.nio.IntBuffer; -import java.nio.ShortBuffer; -import java.nio.LongBuffer; +import java.nio.*; /** *

A class to check buffer boundaries in general. If there is unsufficient space @@ -89,21 +83,21 @@ public class BufferChecks { /** Helper method to ensure an IntBuffer is null-terminated */ public static void checkNullTerminated(IntBuffer buf) { - if ( buf.get(buf.limit() - 1) != 0 ) { + if ( LWJGLUtil.CHECKS && buf.get(buf.limit() - 1) != 0 ) { throw new IllegalArgumentException("Missing null termination"); } } /** Helper method to ensure a LongBuffer is null-terminated */ public static void checkNullTerminated(LongBuffer buf) { - if ( buf.get(buf.limit() - 1) != 0 ) { + if ( LWJGLUtil.CHECKS && buf.get(buf.limit() - 1) != 0 ) { throw new IllegalArgumentException("Missing null termination"); } } /** Helper method to ensure a PointerBuffer is null-terminated */ public static void checkNullTerminated(PointerBuffer buf) { - if ( buf.get(buf.limit() - 1) != 0 ) { + if ( LWJGLUtil.CHECKS && buf.get(buf.limit() - 1) != 0 ) { throw new IllegalArgumentException("Missing null termination"); } } @@ -196,6 +190,41 @@ public class BufferChecks { } } + /** + * Detects the buffer type and performs the corresponding check + * and also returns the buffer position in bytes. + * + * @param buffer the buffer to check + * @param size the size to check + * + * @return the buffer position in bytes + */ + public static int checkBuffer(final Buffer buffer, final int size) { + final int posShift; + if ( buffer instanceof ByteBuffer ) { + BufferChecks.checkBuffer((ByteBuffer)buffer, size); + posShift = 0; + } else if ( buffer instanceof ShortBuffer ) { + BufferChecks.checkBuffer((ShortBuffer)buffer, size); + posShift = 1; + } else if ( buffer instanceof IntBuffer ) { + BufferChecks.checkBuffer((IntBuffer)buffer, size); + posShift = 2; + } else if ( buffer instanceof LongBuffer ) { + BufferChecks.checkBuffer((LongBuffer)buffer, size); + posShift = 4; + } else if ( buffer instanceof FloatBuffer ) { + BufferChecks.checkBuffer((FloatBuffer)buffer, size); + posShift = 2; + } else if ( buffer instanceof DoubleBuffer ) { + BufferChecks.checkBuffer((DoubleBuffer)buffer, size); + posShift = 4; + } else + throw new IllegalArgumentException("Unsupported Buffer type specified: " + buffer.getClass()); + + return buffer.position() << posShift; + } + public static void checkBuffer(ByteBuffer buf, int size) { if ( LWJGLUtil.CHECKS ) { checkBufferSize(buf, size); diff --git a/src/java/org/lwjgl/LWJGLUtil.java b/src/java/org/lwjgl/LWJGLUtil.java index fb70a3c5..8faea8cc 100644 --- a/src/java/org/lwjgl/LWJGLUtil.java +++ b/src/java/org/lwjgl/LWJGLUtil.java @@ -32,16 +32,15 @@ package org.lwjgl; import java.io.File; +import java.lang.reflect.Field; import java.lang.reflect.Method; +import java.lang.reflect.Modifier; import java.nio.ByteBuffer; import java.security.AccessController; import java.security.PrivilegedAction; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.List; -import java.util.StringTokenizer; - +import java.util.*; /** *

@@ -488,4 +487,92 @@ public class LWJGLUtil { return major > major_required || (major == major_required && minor >= minor_required); } -} + /** + * Returns a map of public static final integer fields in the specified classes, to their String representations. + * An optional filter can be specified to only include specific fields. The target map may be null, in which + * case a new map is allocated and returned. + *

+ * This method is useful when debugging to quickly identify values returned from the AL/GL/CL APIs. + * + * @param filter the filter to use (optional) + * @param target the target map (optional) + * @param tokenClasses an array of classes to get tokens from + * + * @return the token map + */ + + public static Map getClassTokens(final TokenFilter filter, final Map target, final Class ... tokenClasses) { + return getClassTokens(filter, target, Arrays.asList(tokenClasses)); + } + + /** + * Returns a map of public static final integer fields in the specified classes, to their String representations. + * An optional filter can be specified to only include specific fields. The target map may be null, in which + * case a new map is allocated and returned. + *

+ * This method is useful when debugging to quickly identify values returned from the AL/GL/CL APIs. + * + * @param filter the filter to use (optional) + * @param target the target map (optional) + * @param tokenClasses the classes to get tokens from + * + * @return the token map + */ + public static Map getClassTokens(final TokenFilter filter, Map target, final Iterable tokenClasses) { + if ( target == null ) + target = new HashMap(); + + final int TOKEN_MODIFIERS = Modifier.PUBLIC | Modifier.STATIC | Modifier.FINAL; + + for ( final Class tokenClass : tokenClasses ) { + for ( final Field field : tokenClass.getDeclaredFields() ) { + // Get only fields. + if ( (field.getModifiers() & TOKEN_MODIFIERS) == TOKEN_MODIFIERS && field.getType() == int.class ) { + try { + final int value = field.getInt(null); + if ( filter != null && !filter.accept(field, value) ) + continue; + + if ( target.containsKey(value) ) // Print colliding tokens in their hex representation. + target.put(value, toHexString(value)); + else + target.put(value, field.getName()); + } catch (IllegalAccessException e) { + // Ignore + } + } + } + } + + return target; + } + + /** + * Returns a string representation of the integer argument as an + * unsigned integer in base 16. The string will be uppercase + * and will have a leading '0x'. + * + * @param value the integer value + * + * @return the hex string representation + */ + public static String toHexString(final int value) { + return "0x" + Integer.toHexString(value).toUpperCase(); + } + + /** Simple interface for Field filtering. */ + public interface TokenFilter { + + /** + * Should return true if the specified Field passes the filter. + * + * @param field the Field to test + * @param value the integer value of the field + * + * @result true if the Field is accepted + */ + boolean accept(Field field, int value); + + } + +} \ No newline at end of file diff --git a/src/java/org/lwjgl/PointerWrapperAbstract.java b/src/java/org/lwjgl/PointerWrapperAbstract.java index 7652cc1b..b6182044 100644 --- a/src/java/org/lwjgl/PointerWrapperAbstract.java +++ b/src/java/org/lwjgl/PointerWrapperAbstract.java @@ -63,7 +63,7 @@ public abstract class PointerWrapperAbstract implements PointerWrapper { */ public final void checkValid() { if ( LWJGLUtil.DEBUG && !isValid() ) - throw new IllegalStateException("This pointer is not valid."); + throw new IllegalStateException("This " + getClass().getSimpleName() + " pointer is not valid."); } public final long getPointer() { diff --git a/src/java/org/lwjgl/opencl/APIUtil.java b/src/java/org/lwjgl/opencl/APIUtil.java index 76bdf098..0f99081a 100644 --- a/src/java/org/lwjgl/opencl/APIUtil.java +++ b/src/java/org/lwjgl/opencl/APIUtil.java @@ -35,18 +35,15 @@ import org.lwjgl.BufferUtils; import org.lwjgl.LWJGLUtil; import org.lwjgl.PointerBuffer; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; import java.nio.*; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; -import java.util.*; - -import static org.lwjgl.opencl.CL10.*; +import java.util.HashSet; +import java.util.Set; +import java.util.StringTokenizer; /** * Utility class for OpenCL API calls. - * TODO: Remove useless stuff * * @author spasi */ @@ -356,38 +353,6 @@ final class APIUtil { return (int)size; } - static String toHexString(final int value) { - return "0x" + Integer.toHexString(value).toUpperCase(); - } - - static void getClassTokens(final Class[] tokenClasses, final Map target, final TokenFilter filter) { - getClassTokens(Arrays.asList(tokenClasses), target, filter); - } - - static void getClassTokens(final Iterable tokenClasses, final Map target, final TokenFilter filter) { - final int TOKEN_MODIFIERS = Modifier.PUBLIC | Modifier.STATIC | Modifier.FINAL; - - for ( final Class tokenClass : tokenClasses ) { - for ( final Field field : tokenClass.getDeclaredFields() ) { - // Get only fields. - if ( (field.getModifiers() & TOKEN_MODIFIERS) == TOKEN_MODIFIERS && field.getType() == int.class ) { - try { - final int value = field.getInt(null); - if ( filter != null && !filter.accept(field, value) ) - continue; - - if ( target.containsKey(value) ) // Print colliding tokens in their hex representation. - target.put(value, toHexString(value)); - else - target.put(value, field.getName()); - } catch (IllegalAccessException e) { - // Ignore - } - } - } - } - } - /** * A mutable CharSequence with very large initial length. We can wrap this in a re-usable CharBuffer for decoding. * We cannot subclass CharBuffer because of {@link java.nio.CharBuffer#toString(int,int)}. @@ -451,14 +416,6 @@ final class APIUtil { } - /** Simple interface for Field filtering */ - interface TokenFilter { - - /** Should return true if the specified Field passes the filter. */ - boolean accept(Field field, int value); - - } - /* ------------------------------------------------------------------------ --------------------------------------------------------------------------- OPENCL API UTILITIES BELOW diff --git a/src/java/org/lwjgl/opencl/CLChecks.java b/src/java/org/lwjgl/opencl/CLChecks.java index 26d9365d..bb608366 100644 --- a/src/java/org/lwjgl/opencl/CLChecks.java +++ b/src/java/org/lwjgl/opencl/CLChecks.java @@ -68,24 +68,24 @@ final class CLChecks { final long y = origin.get(1); final long z = origin.get(2); - if ( x < 0 || y < 0 || z < 0 ) + if ( LWJGLUtil.DEBUG && (x < 0 || y < 0 || z < 0) ) throw new IllegalArgumentException("Invalid cl_mem host origin: " + x + ", " + y + ", " + z); final long w = region.get(0); final long h = region.get(1); final long d = region.get(2); - if ( w < 1 || h < 1 || d < 1 ) + if ( LWJGLUtil.DEBUG && (w < 1 || h < 1 || d < 1) ) throw new IllegalArgumentException("Invalid cl_mem rectangle region dimensions: " + w + " x " + h + " x " + d); if ( row_pitch == 0 ) row_pitch = w; - else if ( row_pitch < w ) + else if ( LWJGLUtil.DEBUG && row_pitch < w ) throw new IllegalArgumentException("Invalid host row pitch specified: " + row_pitch); if ( slice_pitch == 0 ) slice_pitch = row_pitch * h; - else if ( slice_pitch < (row_pitch * h) ) + else if ( LWJGLUtil.DEBUG && slice_pitch < (row_pitch * h) ) throw new IllegalArgumentException("Invalid host slice pitch specified: " + slice_pitch); return (int)((z * slice_pitch + y * row_pitch + x) + (w * h * d)); @@ -110,17 +110,17 @@ final class CLChecks { final long h = region.get(1); final long d = region.get(2); - if ( w < 1 || h < 1 || d < 1 ) + if ( LWJGLUtil.DEBUG && (w < 1 || h < 1 || d < 1) ) throw new IllegalArgumentException("Invalid cl_mem image region dimensions: " + w + " x " + h + " x " + d); if ( row_pitch == 0 ) row_pitch = w; - else if ( row_pitch < w ) + else if ( LWJGLUtil.DEBUG && row_pitch < w ) throw new IllegalArgumentException("Invalid row pitch specified: " + row_pitch); if ( slice_pitch == 0 ) slice_pitch = row_pitch * h; - else if ( slice_pitch < (row_pitch * h) ) + else if ( LWJGLUtil.DEBUG && slice_pitch < (row_pitch * h) ) throw new IllegalArgumentException("Invalid slice pitch specified: " + slice_pitch); return (int)(slice_pitch * d); @@ -138,14 +138,17 @@ final class CLChecks { * @return the 2D image size in bytes */ static int calculateImage2DSize(final ByteBuffer format, final long w, final long h, long row_pitch) { - if ( LWJGLUtil.CHECKS && (w < 1 || h < 1) ) + if ( !LWJGLUtil.CHECKS ) + return 0; + + if ( LWJGLUtil.DEBUG && (w < 1 || h < 1) ) throw new IllegalArgumentException("Invalid 2D image dimensions: " + w + " x " + h); final int elementSize = getElementSize(format); if ( row_pitch == 0 ) row_pitch = w * elementSize; - else if ( LWJGLUtil.CHECKS && ((row_pitch < w * elementSize) || (row_pitch % elementSize != 0)) ) + else if ( LWJGLUtil.DEBUG && ((row_pitch < w * elementSize) || (row_pitch % elementSize != 0)) ) throw new IllegalArgumentException("Invalid image_row_pitch specified: " + row_pitch); return (int)(row_pitch * h); @@ -164,19 +167,22 @@ final class CLChecks { * @return the 3D image size in bytes */ static int calculateImage3DSize(final ByteBuffer format, final long w, final long h, final long d, long row_pitch, long slice_pitch) { - if ( LWJGLUtil.CHECKS && (w < 1 || h < 1 || d < 2) ) + if ( !LWJGLUtil.CHECKS ) + return 0; + + if ( LWJGLUtil.DEBUG && (w < 1 || h < 1 || d < 2) ) throw new IllegalArgumentException("Invalid 3D image dimensions: " + w + " x " + h + " x " + d); final int elementSize = getElementSize(format); if ( row_pitch == 0 ) row_pitch = w * elementSize; - else if ( LWJGLUtil.CHECKS && ((row_pitch < w * elementSize) || (row_pitch % elementSize != 0)) ) + else if ( LWJGLUtil.DEBUG && ((row_pitch < w * elementSize) || (row_pitch % elementSize != 0)) ) throw new IllegalArgumentException("Invalid image_row_pitch specified: " + row_pitch); if ( slice_pitch == 0 ) slice_pitch = row_pitch * h; - else if ( LWJGLUtil.CHECKS && ((row_pitch < row_pitch * h) || (slice_pitch % row_pitch != 0)) ) + else if ( LWJGLUtil.DEBUG && ((row_pitch < row_pitch * h) || (slice_pitch % row_pitch != 0)) ) throw new IllegalArgumentException("Invalid image_slice_pitch specified: " + row_pitch); return (int)(slice_pitch * d); @@ -223,7 +229,7 @@ final class CLChecks { case CL_ARGB: return 4; default: - throw new IllegalArgumentException("Invalid cl_channel_order specified: " + APIUtil.toHexString(channelOrder)); + throw new IllegalArgumentException("Invalid cl_channel_order specified: " + LWJGLUtil.toHexString(channelOrder)); } } @@ -255,7 +261,7 @@ final class CLChecks { case CL_FLOAT: return 4; default: - throw new IllegalArgumentException("Invalid cl_channel_type specified: " + APIUtil.toHexString(channelType)); + throw new IllegalArgumentException("Invalid cl_channel_type specified: " + LWJGLUtil.toHexString(channelType)); } } diff --git a/src/java/org/lwjgl/opencl/CLContext.java b/src/java/org/lwjgl/opencl/CLContext.java index 369dfdf2..17ec1585 100644 --- a/src/java/org/lwjgl/opencl/CLContext.java +++ b/src/java/org/lwjgl/opencl/CLContext.java @@ -31,6 +31,12 @@ */ package org.lwjgl.opencl; +import org.lwjgl.LWJGLException; +import org.lwjgl.opencl.api.CLImageFormat; +import org.lwjgl.opencl.api.Filter; +import org.lwjgl.opengl.Drawable; + +import java.nio.IntBuffer; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -124,6 +130,100 @@ public final class CLContext extends CLObjectChild { // ---------------[ UTILITY METHODS ]--------------- + /** + * Creates a new CLContext. + * + * @param platform the platform to use + * @param devices the devices to use + * @param errcode_ret the error code result + * + * @return the new CLContext + * + * @throws LWJGLException if an exception occurs while creating the context + */ + public static CLContext create(final CLPlatform platform, final List devices, final IntBuffer errcode_ret) throws LWJGLException { + return create(platform, devices, null, null, errcode_ret); + } + + /** + * Creates a new CLContext. + * + * @param platform the platform to use + * @param devices the devices to use + * @param pfn_notify the context callback function + * @param errcode_ret the error code result + * + * @return the new CLContext + * + * @throws LWJGLException if an exception occurs while creating the context + */ + public static CLContext create(final CLPlatform platform, final List devices, final CLContextCallback pfn_notify, final IntBuffer errcode_ret) throws LWJGLException { + return create(platform, devices, pfn_notify, null, errcode_ret); + } + + /** + * Creates a new CLContext. + * + * @param platform the platform to use + * @param devices the devices to use + * @param share_drawable the OpenGL drawable to share objects with + * @param errcode_ret the error code result + * + * @return the new CLContext + * + * @throws LWJGLException if an exception occurs while creating the context + */ + public static CLContext create(final CLPlatform platform, final List devices, final CLContextCallback pfn_notify, final Drawable share_drawable, final IntBuffer errcode_ret) throws LWJGLException { + return util.create(platform, devices, pfn_notify, share_drawable, errcode_ret); + } + + /** + * Creates a new CLContext. + * + * @param platform the platform to use + * @param device_type the device type to use + * @param errcode_ret the error code result + * + * @return the new CLContext + * + * @throws LWJGLException if an exception occurs while creating the context + */ + public static CLContext createFromType(final CLPlatform platform, final long device_type, final IntBuffer errcode_ret) throws LWJGLException { + return util.createFromType(platform, device_type, null, null, errcode_ret); + } + + /** + * Creates a new CLContext. + * + * @param platform the platform to use + * @param device_type the device type to use + * @param pfn_notify the context callback function + * @param errcode_ret the error code result + * + * @return the new CLContext + * + * @throws LWJGLException if an exception occurs while creating the context + */ + public static CLContext createFromType(final CLPlatform platform, final long device_type, final CLContextCallback pfn_notify, final IntBuffer errcode_ret) throws LWJGLException { + return util.createFromType(platform, device_type, pfn_notify, null, errcode_ret); + } + + /** + * Creates a new CLContext. + * + * @param platform the platform to use + * @param device_type the device type to use + * @param share_drawable the OpenGL drawable to share objects with + * @param errcode_ret the error code result + * + * @return the new CLContext + * + * @throws LWJGLException if an exception occurs while creating the context + */ + public static CLContext createFromType(final CLPlatform platform, final long device_type, final CLContextCallback pfn_notify, final Drawable share_drawable, final IntBuffer errcode_ret) throws LWJGLException { + return util.createFromType(platform, device_type, pfn_notify, share_drawable, errcode_ret); + } + /** * Returns the integer value of the specified parameter. * @@ -144,11 +244,25 @@ public final class CLContext extends CLObjectChild { return util.getInfoDevices(this); } + public List getSupportedImageFormats(final long flags, final int image_type) { + return getSupportedImageFormats(flags, image_type, null); + } + + public List getSupportedImageFormats(final long flags, final int image_type, final Filter filter) { + return util.getSupportedImageFormats(this, flags, image_type, filter); + } + /** CLContext utility methods interface. */ interface CLContextUtil extends InfoUtil { List getInfoDevices(CLContext context); + CLContext create(CLPlatform platform, List devices, CLContextCallback pfn_notify, Drawable share_drawable, IntBuffer errcode_ret) throws LWJGLException; + + CLContext createFromType(CLPlatform platform, long device_type, CLContextCallback pfn_notify, Drawable share_drawable, IntBuffer errcode_ret) throws LWJGLException; + + List getSupportedImageFormats(CLContext context, final long flags, final int image_type, Filter filter); + } // -------[ IMPLEMENTATION STUFF BELOW ]------- diff --git a/src/java/org/lwjgl/opencl/CLMem.java b/src/java/org/lwjgl/opencl/CLMem.java index 1e3f2fb0..28af2c6b 100644 --- a/src/java/org/lwjgl/opencl/CLMem.java +++ b/src/java/org/lwjgl/opencl/CLMem.java @@ -31,7 +31,12 @@ */ package org.lwjgl.opencl; +import org.lwjgl.opencl.api.CLBufferRegion; +import org.lwjgl.opencl.api.CLImageFormat; + +import java.nio.Buffer; import java.nio.ByteBuffer; +import java.nio.IntBuffer; /** * This class is a wrapper around a cl_mem pointer. @@ -50,6 +55,52 @@ public final class CLMem extends CLObjectChild { // ---------------[ UTILITY METHODS ]--------------- + /** + * Creates a new 2D image object. + * + * @param context the context on which to create the image object + * @param flags the memory object flags + * @param image_format the image format + * @param image_width the image width + * @param image_height the image height + * @param image_row_pitch the image row pitch + * @param host_ptr the host buffer from which to read image data (optional) + * @param errcode_ret the error code result + * + * @return the new CLMem object + */ + public static CLMem createImage2D(final CLContext context, final long flags, final CLImageFormat image_format, + final long image_width, final long image_height, final long image_row_pitch, + final Buffer host_ptr, final IntBuffer errcode_ret) { + return util.createImage2D(context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret); + } + + /** + * Creates a new 3D image object. + * + * @param context the context on which to create the image object + * @param flags the memory object flags + * @param image_format the image format + * @param image_width the image width + * @param image_height the image height + * @param image_depth the image depth + * @param image_row_pitch the image row pitch + * @param image_slice_pitch the image slice pitch + * @param host_ptr the host buffer from which to read image data (optional) + * @param errcode_ret the error code result + * + * @return the new CLMem object + */ + public static CLMem createImage3D(final CLContext context, final long flags, final CLImageFormat image_format, + final long image_width, final long image_height, final long image_depth, final long image_row_pitch, final long image_slice_pitch, + final Buffer host_ptr, final IntBuffer errcode_ret) { + return util.createImage3D(context, flags, image_format, image_width, image_height, image_depth, image_row_pitch, image_slice_pitch, host_ptr, errcode_ret); + } + + public CLMem createSubBuffer(final long flags, final int buffer_create_type, final CLBufferRegion buffer_create_info, final IntBuffer errcode_ret) { + return util.createSubBuffer(this, flags, buffer_create_type, buffer_create_info, errcode_ret); + } + /** * Returns the integer value of the specified parameter. * @@ -108,6 +159,15 @@ public final class CLMem extends CLObjectChild { return util.getImageInfoSize(this, param_name); } + /** + * Returns the image format. Applicable to image objects only. + * + * @return the parameter value + */ + public CLImageFormat getImageFormat() { + return util.getImageInfoFormat(this); + } + /** * Returns the image channel order. Applicable to image objects only. * @@ -165,10 +225,18 @@ public final class CLMem extends CLObjectChild { /** CLMem utility methods interface. */ interface CLMemUtil extends InfoUtil { + CLMem createImage2D(CLContext context, long flags, CLImageFormat image_format, long image_width, long image_height, long image_row_pitch, Buffer host_ptr, IntBuffer errcode_ret); + + CLMem createImage3D(CLContext context, long flags, CLImageFormat image_format, long image_width, long image_height, long image_depth, long image_row_pitch, long image_slice_pitch, Buffer host_ptr, IntBuffer errcode_ret); + + CLMem createSubBuffer(CLMem mem, long flags, int buffer_create_type, CLBufferRegion buffer_create_info, IntBuffer errcode_ret); + ByteBuffer getInfoHostBuffer(CLMem mem); long getImageInfoSize(CLMem mem, int param_name); + CLImageFormat getImageInfoFormat(CLMem mem); + int getImageInfoFormat(CLMem mem, int index); int getGLObjectType(CLMem mem); diff --git a/src/java/org/lwjgl/opencl/CLProgram.java b/src/java/org/lwjgl/opencl/CLProgram.java index 57bf853f..13182768 100644 --- a/src/java/org/lwjgl/opencl/CLProgram.java +++ b/src/java/org/lwjgl/opencl/CLProgram.java @@ -69,6 +69,15 @@ public final class CLProgram extends CLObjectChild { // ---------------[ UTILITY METHODS ]--------------- + /** + * Creates kernel objects for all kernels functions in this program. + * + * @return a CLKernel array + */ + public CLKernel[] createKernelsInProgram() { + return util.createKernelsInProgram(this); + } + /** * Returns the String value of the specified parameter. * @@ -168,6 +177,8 @@ public final class CLProgram extends CLObjectChild { /** CLProgram utility methods interface. */ interface CLProgramUtil extends InfoUtil { + CLKernel[] createKernelsInProgram(CLProgram program); + CLDevice[] getInfoDevices(CLProgram program); ByteBuffer getInfoBinaries(CLProgram program, ByteBuffer target); diff --git a/src/java/org/lwjgl/opencl/InfoUtilAbstract.java b/src/java/org/lwjgl/opencl/InfoUtilAbstract.java index b28d457e..25f854dd 100644 --- a/src/java/org/lwjgl/opencl/InfoUtilAbstract.java +++ b/src/java/org/lwjgl/opencl/InfoUtilAbstract.java @@ -31,6 +31,7 @@ */ package org.lwjgl.opencl; +import org.lwjgl.LWJGLUtil; import org.lwjgl.PointerBuffer; import java.nio.ByteBuffer; @@ -112,8 +113,8 @@ abstract class InfoUtilAbstract implements InfoUtil { object.checkValid(); final int bytes = getSizeRet(object, param_name); - if ( bytes == 0 ) - return ""; + if ( bytes <= 1 ) + return null; final ByteBuffer buffer = APIUtil.getBufferByte(bytes); getInfo(object, param_name, buffer, null); @@ -126,7 +127,7 @@ abstract class InfoUtilAbstract implements InfoUtil { final PointerBuffer bytes = APIUtil.getBufferPointer(); final int errcode = getInfo(object, param_name, null, bytes); if ( errcode != CL_SUCCESS ) - throw new IllegalArgumentException("Invalid parameter specified: " + APIUtil.toHexString(param_name)); + throw new IllegalArgumentException("Invalid parameter specified: " + LWJGLUtil.toHexString(param_name)); return (int)bytes.get(0); } diff --git a/src/java/org/lwjgl/opencl/InfoUtilFactory.java b/src/java/org/lwjgl/opencl/InfoUtilFactory.java index d73726ca..41fbba2a 100644 --- a/src/java/org/lwjgl/opencl/InfoUtilFactory.java +++ b/src/java/org/lwjgl/opencl/InfoUtilFactory.java @@ -32,8 +32,12 @@ package org.lwjgl.opencl; import org.lwjgl.*; +import org.lwjgl.opencl.api.CLBufferRegion; +import org.lwjgl.opencl.api.CLImageFormat; import org.lwjgl.opencl.api.Filter; +import org.lwjgl.opengl.Drawable; +import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.ArrayList; @@ -49,7 +53,6 @@ import static org.lwjgl.opencl.CL11.*; * so that they can be compiled for the generator. * * @author Spasi - * @since 28 Σεπ 2010 */ final class InfoUtilFactory { @@ -92,6 +95,70 @@ final class InfoUtilFactory { } + /** Custom clCreateContext implementation (reuses APIUtil.getBufferPointer) */ + public CLContext create(final CLPlatform platform, final List devices, final CLContextCallback pfn_notify, final Drawable share_drawable, final IntBuffer errcode_ret) throws LWJGLException { + final int propertyCount = 2 + (share_drawable == null ? 0 : 4) + 1; + + final PointerBuffer buffer = APIUtil.getBufferPointer(propertyCount + devices.size()); + buffer.put(CL_CONTEXT_PLATFORM).put(platform); + if ( share_drawable != null ) + share_drawable.setCLSharingProperties(buffer); + buffer.put(0); + + buffer.position(propertyCount); // Make sure we're at the right offset, setCLSharingProperties might not use all 4 positions. + for ( CLDevice device : devices ) + buffer.put(device); + + final long function_pointer = CLCapabilities.clCreateContext; + BufferChecks.checkFunctionAddress(function_pointer); + final long user_data = pfn_notify == null || pfn_notify.isCustom() ? 0 : CallbackUtil.createGlobalRef(pfn_notify); + CLContext __result = null; + try { + __result = new CLContext(nclCreateContext(buffer.getBuffer(), 0, devices.size(), buffer.getBuffer(), propertyCount * PointerBuffer.getPointerSize(), pfn_notify == null ? 0 : pfn_notify.getPointer(), user_data, errcode_ret, errcode_ret != null ? errcode_ret.position() : 0, function_pointer), platform); + return __result; + } finally { + CallbackUtil.registerCallback(__result, user_data); + } + } + + public CLContext createFromType(final CLPlatform platform, final long device_type, final CLContextCallback pfn_notify, final Drawable share_drawable, final IntBuffer errcode_ret) throws LWJGLException { + final int propertyCount = 2 + (share_drawable == null ? 0 : 4) + 1; + + final PointerBuffer properties = APIUtil.getBufferPointer(propertyCount); + properties.put(CL_CONTEXT_PLATFORM).put(platform); + if ( share_drawable != null ) + share_drawable.setCLSharingProperties(properties); + properties.put(0); + properties.flip(); + + return clCreateContextFromType(properties, device_type, pfn_notify, errcode_ret); + } + + public List getSupportedImageFormats(final CLContext context, final long flags, final int image_type, final Filter filter) { + final IntBuffer numBuffer = APIUtil.getBufferInt(); + clGetSupportedImageFormats(context, flags, image_type, null, numBuffer); + + final int num_image_formats = numBuffer.get(0); + if ( num_image_formats == 0 ) + return null; + + final ByteBuffer formatBuffer = BufferUtils.createByteBuffer(num_image_formats * CLImageFormat.STRUCT_SIZE); + clGetSupportedImageFormats(context, flags, image_type, formatBuffer, null); + + final List formats = new ArrayList(num_image_formats); + for ( int i = 0; i < num_image_formats; i++ ) { + final int offset = num_image_formats * CLImageFormat.STRUCT_SIZE; + final CLImageFormat format = new CLImageFormat( + formatBuffer.getInt(offset), + formatBuffer.getInt(offset + 4) + ); + if ( filter == null || filter.accept(format) ) + formats.add(format); + } + + return formats.size() == 0 ? null : formats; + } + } static final InfoUtil CL_DEVICE_UTIL = new CLDeviceUtil(); @@ -106,7 +173,7 @@ final class InfoUtilFactory { case CL_DEVICE_MAX_WORK_ITEM_SIZES: return getInfoInt(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS); default: - throw new IllegalArgumentException("Unsupported parameter: " + APIUtil.toHexString(param_name)); + throw new IllegalArgumentException("Unsupported parameter: " + LWJGLUtil.toHexString(param_name)); } } @@ -183,7 +250,7 @@ final class InfoUtilFactory { size = 3; break; default: - throw new IllegalArgumentException("Unsupported parameter: " + APIUtil.toHexString(param_name)); + throw new IllegalArgumentException("Unsupported parameter: " + LWJGLUtil.toHexString(param_name)); } final PointerBuffer buffer = APIUtil.getBufferPointer(size); @@ -215,6 +282,44 @@ final class InfoUtilFactory { return clGetMemObjectInfo(mem, param_name, param_value, param_value_size_ret); } + public CLMem createImage2D(final CLContext context, final long flags, final CLImageFormat image_format, final long image_width, final long image_height, final long image_row_pitch, final Buffer host_ptr, final IntBuffer errcode_ret) { + final ByteBuffer formatBuffer = APIUtil.getBufferByte(2 * 4); + formatBuffer.putInt(0, image_format.getChannelOrder()); + formatBuffer.putInt(4, image_format.getChannelType()); + + final long function_pointer = CLCapabilities.clCreateImage2D; + BufferChecks.checkFunctionAddress(function_pointer); + if ( errcode_ret != null ) + BufferChecks.checkBuffer(errcode_ret, 1); + return new CLMem(nclCreateImage2D(context.getPointer(), flags, formatBuffer, 0, image_width, image_height, image_row_pitch, host_ptr, + host_ptr != null ? BufferChecks.checkBuffer(host_ptr, CLChecks.calculateImage2DSize(formatBuffer, image_width, image_height, image_row_pitch)) : 0, + errcode_ret, errcode_ret != null ? errcode_ret.position() : 0, function_pointer), context); + } + + public CLMem createImage3D(final CLContext context, final long flags, final CLImageFormat image_format, final long image_width, final long image_height, final long image_depth, final long image_row_pitch, final long image_slice_pitch, final Buffer host_ptr, final IntBuffer errcode_ret) { + final ByteBuffer formatBuffer = APIUtil.getBufferByte(2 * 4); + formatBuffer.putInt(0, image_format.getChannelOrder()); + formatBuffer.putInt(4, image_format.getChannelType()); + + final long function_pointer = CLCapabilities.clCreateImage3D; + BufferChecks.checkFunctionAddress(function_pointer); + if ( errcode_ret != null ) + BufferChecks.checkBuffer(errcode_ret, 1); + return new CLMem(nclCreateImage3D(context.getPointer(), flags, formatBuffer, 0, image_width, image_height, image_depth, image_row_pitch, image_slice_pitch, host_ptr, + host_ptr != null ? BufferChecks.checkBuffer(host_ptr, CLChecks.calculateImage3DSize(formatBuffer, image_width, image_height, image_depth, image_row_pitch, image_slice_pitch)) : 0, + errcode_ret, errcode_ret != null ? errcode_ret.position() : 0, function_pointer), context); + } + + public CLMem createSubBuffer(final CLMem mem, final long flags, final int buffer_create_type, final CLBufferRegion buffer_create_info, final IntBuffer errcode_ret) { + final PointerBuffer infoBuffer = APIUtil.getBufferPointer(2); + + infoBuffer.put(buffer_create_info.getOrigin()); + infoBuffer.put(buffer_create_info.getSize()); + + return clCreateSubBuffer(mem, flags, buffer_create_type, infoBuffer.getBuffer(), errcode_ret); + + } + public ByteBuffer getInfoHostBuffer(final CLMem mem) { mem.checkValid(); @@ -242,6 +347,16 @@ final class InfoUtilFactory { return buffer.get(0); } + public CLImageFormat getImageInfoFormat(final CLMem mem) { + mem.checkValid(); + + final ByteBuffer format = APIUtil.getBufferByte(2 * 4); + + clGetImageInfo(mem, CL_IMAGE_FORMAT, format, null); + + return new CLImageFormat(format.getInt(0), format.getInt(4)); + } + public int getImageInfoFormat(final CLMem mem, final int index) { mem.checkValid(); @@ -346,10 +461,28 @@ final class InfoUtilFactory { case CL_PROGRAM_BINARY_SIZES: return getInfoInt(program, CL_PROGRAM_NUM_DEVICES); default: - throw new IllegalArgumentException("Unsupported parameter: " + APIUtil.toHexString(param_name)); + throw new IllegalArgumentException("Unsupported parameter: " + LWJGLUtil.toHexString(param_name)); } } + public CLKernel[] createKernelsInProgram(final CLProgram program) { + final IntBuffer numBuffer = APIUtil.getBufferInt(); + clCreateKernelsInProgram(program, null, numBuffer); + + final int num_kernels = numBuffer.get(0); + if ( num_kernels == 0 ) + return null; + + final PointerBuffer kernelIDs = APIUtil.getBufferPointer(num_kernels); + clCreateKernelsInProgram(program, kernelIDs, null); + + final CLKernel[] kernels = new CLKernel[num_kernels]; + for ( int i = 0; i < num_kernels; i++ ) + kernels[i] = program.getCLKernel(kernelIDs.get(i)); + + return kernels; + } + public CLDevice[] getInfoDevices(final CLProgram program) { program.checkValid(); @@ -413,8 +546,8 @@ final class InfoUtilFactory { program.checkValid(); final int bytes = getBuildSizeRet(program, device, param_name); - if ( bytes == 0 ) - throw new IllegalArgumentException("Invalid parameter specified: " + APIUtil.toHexString(param_name)); + if ( bytes <= 1 ) + return null; final ByteBuffer buffer = APIUtil.getBufferByte(bytes); clGetProgramBuildInfo(program, device, param_name, buffer, null); @@ -435,8 +568,10 @@ final class InfoUtilFactory { private static int getBuildSizeRet(final CLProgram program, final CLDevice device, final int param_name) { final PointerBuffer bytes = APIUtil.getBufferPointer(); final int errcode = clGetProgramBuildInfo(program, device, param_name, null, bytes); + if ( errcode != CL_SUCCESS ) + throw new IllegalArgumentException("Invalid parameter specified: " + LWJGLUtil.toHexString(param_name)); - return errcode == CL_SUCCESS ? (int)bytes.get(0) : 0; + return (int)bytes.get(0); } } diff --git a/src/java/org/lwjgl/opencl/Util.java b/src/java/org/lwjgl/opencl/Util.java index dd300823..acb03800 100644 --- a/src/java/org/lwjgl/opencl/Util.java +++ b/src/java/org/lwjgl/opencl/Util.java @@ -31,8 +31,9 @@ */ package org.lwjgl.opencl; +import org.lwjgl.LWJGLUtil; + import java.lang.reflect.Field; -import java.util.HashMap; import java.util.Map; /** @@ -43,20 +44,11 @@ import java.util.Map; public final class Util { /** Maps OpenCL error token values to their String representations. */ - private static final Map CL_ERROR_TOKENS = new HashMap(64); - - static { - APIUtil.getClassTokens(new Class[] { - CL10.class, CL11.class, - KHRGLSharing.class, KHRICD.class, - APPLEGLSharing.class, - EXTDeviceFission.class, - }, CL_ERROR_TOKENS, new APIUtil.TokenFilter() { - public boolean accept(final Field field, final int value) { - return value < 0; // Currently, all OpenCL errors have negative values. - } - }); - } + private static final Map CL_ERROR_TOKENS = LWJGLUtil.getClassTokens(new LWJGLUtil.TokenFilter() { + public boolean accept(final Field field, final int value) { + return value < 0; // Currently, all OpenCL errors have negative values. + } + }, null, CL10.class, CL11.class, KHRGLSharing.class, KHRICD.class, APPLEGLSharing.class, EXTDeviceFission.class); private Util() { } @@ -70,7 +62,7 @@ public final class Util { String errname = CL_ERROR_TOKENS.get(errcode); if ( errname == null ) errname = "UNKNOWN"; - throw new OpenCLException("Error Code: " + errname + " (" + APIUtil.toHexString(errcode) + ")"); + throw new OpenCLException("Error Code: " + errname + " (" + LWJGLUtil.toHexString(errcode) + ")"); } } \ No newline at end of file diff --git a/src/java/org/lwjgl/opencl/api/CLBufferRegion.java b/src/java/org/lwjgl/opencl/api/CLBufferRegion.java new file mode 100644 index 00000000..b0d72d31 --- /dev/null +++ b/src/java/org/lwjgl/opencl/api/CLBufferRegion.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2002-2010 LWJGL Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of 'LWJGL' nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.lwjgl.opencl.api; + +import org.lwjgl.PointerBuffer; + +/** + * Simple container for cl_buffer_region struct values. + * + * @author Spasi + */ +public final class CLBufferRegion { + + /** The cl_buffer_region struct size in bytes. */ + public static final int STRUCT_SIZE = 2 * PointerBuffer.getPointerSize(); + + private final int origin; + private final int size; + + public CLBufferRegion(final int origin, final int size) { + this.origin = origin; + this.size = size; + } + + public int getOrigin() { + return origin; + } + + public int getSize() { + return size; + } + +} \ No newline at end of file diff --git a/src/java/org/lwjgl/opencl/api/CLImageFormat.java b/src/java/org/lwjgl/opencl/api/CLImageFormat.java new file mode 100644 index 00000000..f0cd0236 --- /dev/null +++ b/src/java/org/lwjgl/opencl/api/CLImageFormat.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2002-2010 LWJGL Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of 'LWJGL' nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.lwjgl.opencl.api; + +/** + * Simple container for cl_image_format struct values. + * + * @author Spasi + */ +public final class CLImageFormat { + + /** The cl_image_format struct size in bytes. */ + public static final int STRUCT_SIZE = 2 * 4; + + private final int channelOrder; + private final int channelType; + + public CLImageFormat(final int channelOrder, final int channelType) { + this.channelOrder = channelOrder; + this.channelType = channelType; + } + + public int getChannelOrder() { + return channelOrder; + } + + public int getChannelType() { + return channelType; + } + +} \ No newline at end of file diff --git a/src/java/org/lwjgl/test/opencl/gl/DemoFractal.java b/src/java/org/lwjgl/test/opencl/gl/DemoFractal.java index a4629e44..f7676eba 100644 --- a/src/java/org/lwjgl/test/opencl/gl/DemoFractal.java +++ b/src/java/org/lwjgl/test/opencl/gl/DemoFractal.java @@ -43,18 +43,23 @@ import org.lwjgl.util.Color; import org.lwjgl.util.ReadableColor; import java.io.*; +import java.nio.ByteBuffer; import java.nio.IntBuffer; +import java.util.HashSet; import java.util.List; +import java.util.Set; import static java.lang.Math.*; import static org.lwjgl.opencl.CL10.*; import static org.lwjgl.opencl.CL10GL.*; import static org.lwjgl.opencl.KHRGLEvent.*; +import static org.lwjgl.opengl.AMDDebugOutput.*; import static org.lwjgl.opengl.ARBCLEvent.*; +import static org.lwjgl.opengl.ARBDebugOutput.*; import static org.lwjgl.opengl.ARBSync.*; import static org.lwjgl.opengl.GL11.*; -import static org.lwjgl.opengl.GL12.*; import static org.lwjgl.opengl.GL15.*; +import static org.lwjgl.opengl.GL20.*; import static org.lwjgl.opengl.GL21.*; /* @@ -118,24 +123,35 @@ public class DemoFractal { // max number of used GPUs private static final int MAX_PARALLELISM_LEVEL = 8; - // max per pixel iterations to compute the fractal - private static final int MAX_ITERATIONS = 500; + private static final int COLOR_MAP_SIZE = 32 * 2 * 4; + + private Set params; private CLContext clContext; private CLCommandQueue[] queues; private CLKernel[] kernels; private CLProgram[] programs; - private CLMem[] pboBuffers; - private IntBuffer pboIDs; + private CLMem[] glBuffers; + private IntBuffer glIDs; + + private boolean useTextures; + + // Texture rendering + private int dlist; + private int vsh; + private int fsh; + private int program; private CLMem[] colorMap; - private IntBuffer[] colorMapBuffer; private final PointerBuffer kernel2DGlobalWorkSize; - private int width; - private int height; + // max per pixel iterations to compute the fractal + private int maxIterations = 500; + + private int width = 512; + private int height = 512; private double minX = -2f; private double minY = -1.2f; @@ -174,13 +190,375 @@ public class DemoFractal { private GLSync glSync; private CLEvent glEvent; - public DemoFractal(int width, int height) { - kernel2DGlobalWorkSize = BufferUtils.createPointerBuffer(2); + public DemoFractal(final String[] args) { + params = new HashSet(); - this.width = width; - this.height = height; + for ( int i = 0; i < args.length; i++ ) { + final String arg = args[i]; + + if ( arg.charAt(0) != '-' && arg.charAt(0) != '/' ) + throw new IllegalArgumentException("Invalid command-line argument: " + args[i]); + + final String param = arg.substring(1); + + if ( "forcePBO".equalsIgnoreCase(param) ) + params.add("forcePBO"); + else if ( "forceCPU".equalsIgnoreCase(param) ) + params.add("forceCPU"); + else if ( "debugGL".equalsIgnoreCase(param) ) + params.add("debugGL"); + else if ( "iterations".equalsIgnoreCase(param) ) { + if ( args.length < i + 1 + 1 ) + throw new IllegalArgumentException("Invalid iterations argument specified."); + + try { + this.maxIterations = Integer.parseInt(args[++i]); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid number of iterations specified."); + } + } else if ( "res".equalsIgnoreCase(param) ) { + if ( args.length < i + 2 + 1 ) + throw new IllegalArgumentException("Invalid res argument specified."); + + try { + this.width = Integer.parseInt(args[++i]); + this.height = Integer.parseInt(args[++i]); + + if ( width < 1 || height < 1 ) + throw new IllegalArgumentException("Invalid res dimensions specified."); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid res dimensions specified."); + } + } + } + + kernel2DGlobalWorkSize = BufferUtils.createPointerBuffer(2); } + public static void main(String args[]) { + DemoFractal demo = new DemoFractal(args); + demo.init(); + demo.run(); + } + + public void init() { + try { + CL.create(); + Display.setDisplayMode(new DisplayMode(width, height)); + Display.setTitle("OpenCL Fractal Demo"); + Display.setSwapInterval(0); + Display.create(new PixelFormat(), new ContextAttribs().withDebug(params.contains("debugGL"))); + } catch (LWJGLException e) { + throw new RuntimeException(e); + } + + try { + initCL(Display.getDrawable()); + } catch (Exception e) { + if ( clContext != null ) + clReleaseContext(clContext); + Display.destroy(); + throw new RuntimeException(e); + } + + glDisable(GL_DEPTH_TEST); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + + initView(Display.getDisplayMode().getWidth(), Display.getDisplayMode().getHeight()); + + initGLObjects(); + glFinish(); + + setKernelConstants(); + } + + private void initCL(Drawable drawable) throws Exception { + // Find a platform + List platforms = CLPlatform.getPlatforms(); + if ( platforms == null ) + throw new RuntimeException("No OpenCL platforms found."); + + final CLPlatform platform = platforms.get(0); // just grab the first one + + // Find devices with GL sharing support + final Filter glSharingFilter = new Filter() { + public boolean accept(final CLDevice device) { + final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device); + return caps.CL_KHR_gl_sharing; + } + }; + int device_type = params.contains("forceCPU") ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU; + List devices = platform.getDevices(device_type, glSharingFilter); + if ( devices == null ) { + device_type = CL_DEVICE_TYPE_CPU; + devices = platform.getDevices(device_type, glSharingFilter); + if ( devices == null ) + throw new RuntimeException("No OpenCL devices found with KHR_gl_sharing support."); + } + + // Create the context + clContext = CLContext.create(platform, devices, new CLContextCallback() { + protected void handleMessage(final String errinfo, final ByteBuffer private_info) { + System.out.println("[CONTEXT MESSAGE] " + errinfo); + } + }, drawable, null); + + slices = min(devices.size(), MAX_PARALLELISM_LEVEL); + + // create command queues for every GPU, setup colormap and init kernels + queues = new CLCommandQueue[slices]; + kernels = new CLKernel[slices]; + colorMap = new CLMem[slices]; + + for ( int i = 0; i < slices; i++ ) { + colorMap[i] = clCreateBuffer(clContext, CL_MEM_READ_ONLY, COLOR_MAP_SIZE, null); + colorMap[i].checkValid(); + + // create command queue and upload color map buffer on each used device + queues[i] = clCreateCommandQueue(clContext, devices.get(i), CL_QUEUE_PROFILING_ENABLE, null); + queues[i].checkValid(); + + final ByteBuffer colorMapBuffer = clEnqueueMapBuffer(queues[i], colorMap[i], CL_TRUE, CL_MAP_WRITE, 0, COLOR_MAP_SIZE, null, null, null); + initColorMap(colorMapBuffer.asIntBuffer(), 32, Color.BLUE, Color.GREEN, Color.RED); + clEnqueueUnmapMemObject(queues[i], colorMap[i], colorMapBuffer, null, null); + } + + // check if we have 64bit FP support on all devices + // if yes we can use only one program for all devices + one kernel per device. + // if not we will have to create (at least) one program for 32 and one for 64bit devices. + // since there are different vendor extensions for double FP we use one program per device. + // (OpenCL spec is not very clear about this usecases) + boolean all64bit = true; + for ( CLDevice device : devices ) { + if ( !isDoubleFPAvailable(device) ) { + all64bit = false; + break; + } + } + + // load program(s) + programs = new CLProgram[all64bit ? 1 : slices]; + + final ContextCapabilities caps = GLContext.getCapabilities(); + + if ( !caps.OpenGL20 ) + throw new RuntimeException("OpenGL 2.0 is required to run this demo."); + else if ( device_type == CL_DEVICE_TYPE_CPU && !caps.OpenGL21 ) + throw new RuntimeException("OpenGL 2.1 is required to run this demo."); + + if ( caps.GL_ARB_debug_output ) + glDebugMessageCallbackARB(new ARBDebugOutputCallback()); + else if ( caps.GL_AMD_debug_output ) + glDebugMessageCallbackAMD(new AMDDebugOutputCallback()); + + if ( device_type == CL_DEVICE_TYPE_GPU ) + System.out.println("OpenCL Device Type: GPU (Use -forceCPU to use CPU)"); + else + System.out.println("OpenCL Device Type: CPU"); + for ( int i = 0; i < devices.size(); i++ ) + System.out.println("OpenCL Device #" + (i + 1) + " supports KHR_gl_event = " + CLCapabilities.getDeviceCapabilities(devices.get(i)).CL_KHR_gl_event); + + System.out.println("\nMax Iterations: " + maxIterations + " (Use -iterations to change)"); + System.out.println("Display resolution: " + width + "x" + height + " (Use -res to change)"); + + System.out.println("\nOpenGL caps.GL_ARB_sync = " + caps.GL_ARB_sync); + System.out.println("OpenGL caps.GL_ARB_cl_event = " + caps.GL_ARB_cl_event); + + // Use PBO if we're on a CPU implementation + useTextures = device_type == CL_DEVICE_TYPE_GPU && (!caps.OpenGL21 || !params.contains("forcePBO")); + if ( useTextures ) { + System.out.println("\nCL/GL Sharing method: TEXTURES (use -forcePBO to use PBO + DrawPixels)"); + System.out.println("Rendering method: Shader on a fullscreen quad"); + } else { + System.out.println("\nCL/GL Sharing method: PIXEL BUFFER OBJECTS"); + System.out.println("Rendering method: DrawPixels"); + } + + buildPrograms(); + + // Detect GLtoCL synchronization method + syncGLtoCL = caps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied + if ( syncGLtoCL ) { + clEvents = new CLEvent[slices]; + clSyncs = new GLSync[slices]; + System.out.println("\nGL to CL sync: Using OpenCL events"); + } else + System.out.println("\nGL to CL sync: Using clFinish"); + + // Detect CLtoGL synchronization method + syncCLtoGL = caps.OpenGL32 || caps.GL_ARB_sync; + if ( syncCLtoGL ) { + for ( CLDevice device : devices ) { + if ( !CLCapabilities.getDeviceCapabilities(device).CL_KHR_gl_event ) { + syncCLtoGL = false; + break; + } + } + } + if ( syncCLtoGL ) { + System.out.println("CL to GL sync: Using OpenGL sync objects"); + } else + System.out.println("CL to GL sync: Using glFinish"); + + if ( useTextures ) { + dlist = glGenLists(1); + + glNewList(dlist, GL_COMPILE); + glBegin(GL_QUADS); + { + glTexCoord2f(0.0f, 0.0f); + glVertex2f(0, 0); + + glTexCoord2f(0.0f, 1.0f); + glVertex2i(0, height); + + glTexCoord2f(1.0f, 1.0f); + glVertex2f(width, height); + + glTexCoord2f(1.0f, 0.0f); + glVertex2f(width, 0); + } + glEnd(); + glEndList(); + + vsh = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vsh, "varying vec2 texCoord;\n" + + "\n" + + "void main(void) {\n" + + "\tgl_Position = ftransform();\n" + + "\ttexCoord = gl_MultiTexCoord0.xy;\n" + + "}"); + glCompileShader(vsh); + + fsh = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(fsh, "uniform sampler2D mandelbrot;\n" + + "\n" + + "varying vec2 texCoord;\n" + + "\n" + + "void main(void) {\n" + + "\tgl_FragColor = texture2D(mandelbrot, texCoord);" + + "}"); + glCompileShader(fsh); + + program = glCreateProgram(); + glAttachShader(program, vsh); + glAttachShader(program, fsh); + glLinkProgram(program); + + glUseProgram(program); + glUniform1i(glGetUniformLocation(program, "mandelbrot"), 0); + } + + System.out.println(""); + } + + private void buildPrograms() { + /* + * workaround: The driver keeps using the old binaries for some reason. + * to solve this we simple create a new program and release the old. + * however rebuilding programs should be possible -> remove when drivers are fixed. + * (again: the spec is not very clear about this kind of usages) + */ + if ( programs[0] != null ) { + for ( CLProgram program : programs ) + clReleaseProgram(program); + } + + try { + createPrograms(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + // disable 64bit floating point math if not available + for ( int i = 0; i < programs.length; i++ ) { + final CLDevice device = queues[i].getCLDevice(); + + final StringBuilder options = new StringBuilder(useTextures ? " -D USE_TEXTURE" : ""); + final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device); + if ( doublePrecision && isDoubleFPAvailable(device) ) { + //cl_khr_fp64 + options.append(" -D DOUBLE_FP"); + + //amd's verson of double precision floating point math + if ( !caps.CL_KHR_fp64 && caps.CL_AMD_fp64 ) + options.append(" -D AMD_FP"); + } + + System.out.println("\nOpenCL COMPILER OPTIONS: " + options); + + try { + clBuildProgram(programs[i], device, options, null); + } finally { + System.out.println("BUILD LOG: " + programs[i].getBuildInfoString(device, CL_PROGRAM_BUILD_LOG)); + } + } + + rebuild = false; + + // init kernel with constants + for ( int i = 0; i < kernels.length; i++ ) + kernels[i] = clCreateKernel(programs[min(i, programs.length)], "mandelbrot", null); + } + + private void initGLObjects() { + if ( glBuffers == null ) { + glBuffers = new CLMem[slices]; + glIDs = BufferUtils.createIntBuffer(slices); + } else { + for ( CLMem mem : glBuffers ) + clReleaseMemObject(mem); + + if ( useTextures ) + glDeleteTextures(glIDs); + else + glDeleteBuffers(glIDs); + } + + if ( useTextures ) + glGenTextures(glIDs); + else + glGenBuffers(glIDs); + + if ( useTextures ) { + // Init textures + for ( int i = 0; i < slices; i++ ) { + glBindTexture(GL_TEXTURE_2D, glIDs.get(i)); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width / slices, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, (ByteBuffer)null); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + glBuffers[i] = clCreateFromGLTexture2D(clContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, glIDs.get(i), null); + } + glBindTexture(GL_TEXTURE_2D, 0); + } else { + // setup one empty PBO per slice + for ( int i = 0; i < slices; i++ ) { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glIDs.get(i)); + glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * 4 / slices, GL_STREAM_DRAW); + + glBuffers[i] = clCreateFromGLBuffer(clContext, CL_MEM_WRITE_ONLY, glIDs.get(i), null); + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + buffersInitialized = true; + } + + // init kernels with constants + + private void setKernelConstants() { + for ( int i = 0; i < slices; i++ ) { + kernels[i] + .setArg(6, glBuffers[i]) + .setArg(7, colorMap[i]) + .setArg(8, COLOR_MAP_SIZE) + .setArg(9, maxIterations); + } + } + + // rendering cycle + private void run() { long startTime = System.currentTimeMillis() + 5000; long fps = 0; @@ -206,10 +584,147 @@ public class DemoFractal { } } + clReleaseContext(clContext); + + if ( useTextures ) { + glDeleteProgram(program); + glDeleteShader(fsh); + glDeleteShader(vsh); + + glDeleteLists(dlist, 1); + } + CL.destroy(); Display.destroy(); } + public void display() { + // TODO: Need to clean-up events, test when ARB_cl_events & KHR_gl_event are implemented. + + // make sure GL does not use our objects before we start computing + if ( syncCLtoGL && glEvent != null ) { + for ( final CLCommandQueue queue : queues ) + clEnqueueWaitForEvents(queue, glEvent); + } else + glFinish(); + + if ( !buffersInitialized ) { + initGLObjects(); + setKernelConstants(); + } + + if ( rebuild ) { + buildPrograms(); + setKernelConstants(); + } + compute(doublePrecision); + + render(); + } + + // OpenCL + + private void compute(final boolean is64bit) { + int sliceWidth = (int)(width / (float)slices); + double rangeX = (maxX - minX) / slices; + double rangeY = (maxY - minY); + + kernel2DGlobalWorkSize.put(0, sliceWidth).put(1, height); + + // start computation + for ( int i = 0; i < slices; i++ ) { + kernels[i].setArg(0, sliceWidth).setArg(1, height); + if ( !is64bit || !isDoubleFPAvailable(queues[i].getCLDevice()) ) { + kernels[i] + .setArg(2, (float)(minX + rangeX * i)).setArg(3, (float)minY) + .setArg(4, (float)rangeX).setArg(5, (float)rangeY); + } else { + kernels[i] + .setArg(2, minX + rangeX * i).setArg(3, minY) + .setArg(4, rangeX).setArg(5, rangeY); + } + + // acquire GL objects, and enqueue a kernel with a probe from the list + clEnqueueAcquireGLObjects(queues[i], glBuffers[i], null, null); + + clEnqueueNDRangeKernel(queues[i], kernels[i], 2, + null, + kernel2DGlobalWorkSize, + null, + null, null); + + clEnqueueReleaseGLObjects(queues[i], glBuffers[i], null, syncGLtoCL ? syncBuffer : null); + if ( syncGLtoCL ) { + clEvents[i] = queues[i].getCLEvent(syncBuffer.get(0)); + clSyncs[i] = glCreateSyncFromCLeventARB(queues[i].getParent(), clEvents[i], 0); + } + } + + // block until done (important: finish before doing further gl work) + if ( !syncGLtoCL ) { + for ( int i = 0; i < slices; i++ ) + clFinish(queues[i]); + } + } + + // OpenGL + + private void render() { + glClear(GL_COLOR_BUFFER_BIT); + + if ( syncGLtoCL ) { + for ( int i = 0; i < slices; i++ ) + glWaitSync(clSyncs[i], 0, 0); + } + + //draw slices + int sliceWidth = width / slices; + + if ( useTextures ) { + for ( int i = 0; i < slices; i++ ) { + int seperatorOffset = drawSeparator ? i : 0; + + glBindTexture(GL_TEXTURE_2D, glIDs.get(i)); + glCallList(dlist); + } + } else { + for ( int i = 0; i < slices; i++ ) { + int seperatorOffset = drawSeparator ? i : 0; + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glIDs.get(i)); + glRasterPos2i(sliceWidth * i + seperatorOffset, 0); + + glDrawPixels(sliceWidth, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + if ( syncCLtoGL ) { + glSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glEvent = clCreateEventFromGLsyncKHR(clContext, glSync, null); + } + + //draw info text + /* + textRenderer.beginRendering(width, height, false); + + textRenderer.draw("device/time/precision", 10, height - 15); + + for ( int i = 0; i < slices; i++ ) { + CLDevice device = queues[i].getDevice(); + boolean doubleFP = doublePrecision && isDoubleFPAvailable(device); + CLEvent event = probes.getEvent(i); + long start = event.getProfilingInfo(START); + long end = event.getProfilingInfo(END); + textRenderer.draw(device.getType().toString() + i + " " + + (int)((end - start) / 1000000.0f) + "ms @" + + (doubleFP ? "64bit" : "32bit"), 10, height - (20 + 16 * (slices - i))); + } + + textRenderer.endRendering(); + */ + } + private void handleIO() { if ( Keyboard.getNumKeyboardEvents() != 0 ) { while ( Keyboard.next() ) { @@ -282,7 +797,8 @@ public class DemoFractal { if ( eventBtn == -1 ) { final int dwheel = Mouse.getEventDWheel(); if ( dwheel != 0 ) { - double scale = dwheel > 0 ? 0.05 : -0.05; + double scaleFactor = Keyboard.isKeyDown(Keyboard.KEY_LCONTROL) || Keyboard.isKeyDown(Keyboard.KEY_RCONTROL) ? 0.25 : 0.05; + double scale = dwheel > 0 ? scaleFactor : -scaleFactor; double deltaX = scale * (maxX - minX); double deltaY = scale * (maxY - minY); @@ -305,146 +821,9 @@ public class DemoFractal { } } - public void init() { - try { - CL.create(); - Display.setDisplayMode(new DisplayMode(width, height)); - Display.setTitle("OpenCL Fractal Demo"); - Display.create(); - } catch (LWJGLException e) { - throw new RuntimeException(e); - } - - try { - initCL(Display.getDrawable()); - } catch (Exception e) { - if ( clContext != null ) - clReleaseContext(clContext); - Display.destroy(); - throw new RuntimeException(e); - } - - Display.setSwapInterval(0); - glDisable(GL_DEPTH_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - - initView(Display.getDisplayMode().getWidth(), Display.getDisplayMode().getHeight()); - - initPBO(); - glFinish(); - - setKernelConstants(); - } - - private void initCL(Drawable drawable) throws Exception { - // Find a platform - List platforms = CLPlatform.getPlatforms(); - if ( platforms == null ) - throw new RuntimeException("No OpenCL platforms found."); - - final CLPlatform platform = platforms.get(0); // just grab the first one - - // Find devices with GL sharing support - final Filter glSharingFilter = new Filter() { - public boolean accept(final CLDevice device) { - final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device); - return caps.CL_KHR_gl_sharing; - } - }; - List devices = platform.getDevices(CL_DEVICE_TYPE_GPU, glSharingFilter); - if ( devices == null ) { - devices = platform.getDevices(CL_DEVICE_TYPE_CPU, glSharingFilter); - if ( devices == null ) - throw new RuntimeException("No OpenCL devices found with KHR_gl_sharing support."); - } - - // Create the context - final PointerBuffer deviceIDs = BufferUtils.createPointerBuffer(devices.size()); - for ( CLDevice device : devices ) - deviceIDs.put(device); - deviceIDs.flip(); - - final PointerBuffer contextProps = BufferUtils.createPointerBuffer(2 + 4 + 1); - contextProps.put(CL_CONTEXT_PLATFORM).put(platform); - - drawable.setCLSharingProperties(contextProps); // Enable GL sharing - - contextProps.put(0); - contextProps.flip(); - clContext = clCreateContext(contextProps, deviceIDs, null, null); - - slices = min(devices.size(), MAX_PARALLELISM_LEVEL); - - // create command queues for every GPU, setup colormap and init kernels - queues = new CLCommandQueue[slices]; - kernels = new CLKernel[slices]; - colorMap = new CLMem[slices]; - colorMapBuffer = new IntBuffer[slices]; - - for ( int i = 0; i < slices; i++ ) { - colorMapBuffer[i] = BufferUtils.createIntBuffer(32 * 2); - colorMap[i] = clCreateBuffer(clContext, CL_MEM_READ_ONLY, colorMapBuffer[i].capacity() * 4, null); - colorMap[i].checkValid(); - - initColorMap(colorMapBuffer[i], 32, Color.BLUE, Color.GREEN, Color.RED); - - // create command queue and upload color map buffer on each used device - queues[i] = clCreateCommandQueue(clContext, devices.get(i), CL_QUEUE_PROFILING_ENABLE, null); - queues[i].checkValid(); - clEnqueueWriteBuffer(queues[i], colorMap[i], CL_TRUE, 0, colorMapBuffer[i], null, null); // blocking upload - - } - - // check if we have 64bit FP support on all devices - // if yes we can use only one program for all devices + one kernel per device. - // if not we will have to create (at least) one program for 32 and one for 64bit devices. - // since there are different vendor extensions for double FP we use one program per device. - // (OpenCL spec is not very clear about this usecases) - boolean all64bit = true; - for ( CLDevice device : devices ) { - if ( !isDoubleFPAvailable(device) ) { - all64bit = false; - break; - } - } - - // load program(s) - programs = new CLProgram[all64bit ? 1 : slices]; - - buildPrograms(); - - final ContextCapabilities caps = GLContext.getCapabilities(); - - System.out.println("OpenGL caps.OpenGL32 = " + caps.OpenGL32); - System.out.println("OpenGL caps.GL_ARB_sync = " + caps.GL_ARB_sync); - System.out.println("OpenGL caps.GL_ARB_cl_event = " + caps.GL_ARB_cl_event); - for ( int i = 0; i < devices.size(); i++ ) { - System.out.println("Device #" + (i + 1) + " supports KHR_gl_event = " + CLCapabilities.getDeviceCapabilities(devices.get(i)).CL_KHR_gl_event); - } - - // Detect GLtoCL synchronization method - syncGLtoCL = caps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied - if ( syncGLtoCL ) { - clEvents = new CLEvent[slices]; - clSyncs = new GLSync[slices]; - System.out.println("GL to CL sync: Using OpenCL events"); - } else - System.out.println("GL to CL sync: Using clFinish"); - - // Detect CLtoGL synchronization method - syncCLtoGL = caps.OpenGL32 || caps.GL_ARB_sync; - if ( syncCLtoGL ) { - for ( CLDevice device : devices ) { - if ( !CLCapabilities.getDeviceCapabilities(device).CL_KHR_gl_event ) { - syncCLtoGL = false; - break; - } - } - } - if ( syncCLtoGL ) { - System.out.println("CL to GL sync: Using OpenGL sync objects"); - } else - System.out.println("CL to GL sync: Using glFinish"); + private static boolean isDoubleFPAvailable(CLDevice device) { + final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device); + return caps.CL_KHR_fp64 || caps.CL_AMD_fp64; } private void createPrograms() throws IOException { @@ -492,10 +871,9 @@ public class DemoFractal { int r = (int)(r0 + alpha * deltaR); int g = (int)(g0 + alpha * deltaG); int b = (int)(b0 + alpha * deltaB); - colorMap.put((r << 16) | (g << 8) | (b << 0)); + colorMap.put((r << 0) | (g << 8) | (b << 16)); } } - colorMap.rewind(); } private static void initView(int width, int height) { @@ -509,219 +887,4 @@ public class DemoFractal { glOrtho(0.0, width, 0.0, height, 0.0, 1.0); } - private void initPBO() { - if ( pboBuffers == null ) { - pboBuffers = new CLMem[slices]; - pboIDs = BufferUtils.createIntBuffer(slices); - } else { - for ( CLMem pboBuffer : pboBuffers ) - clReleaseMemObject(pboBuffer); - glDeleteBuffers(pboIDs); - } - - glGenBuffers(pboIDs); - - // setup one empty PBO per slice - for ( int i = 0; i < slices; i++ ) { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIDs.get(i)); - glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * 4 / slices, GL_STREAM_DRAW); - - pboBuffers[i] = clCreateFromGLBuffer(clContext, CL_MEM_WRITE_ONLY, pboIDs.get(i), null); - } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - buffersInitialized = true; - } - - private void buildPrograms() { - /* - * workaround: The driver keeps using the old binaries for some reason. - * to solve this we simple create a new program and release the old. - * however rebuilding programs should be possible -> remove when drivers are fixed. - * (again: the spec is not very clear about this kind of usages) - */ - if ( programs[0] != null ) { - for ( CLProgram program : programs ) - clReleaseProgram(program); - } - - try { - createPrograms(); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // disable 64bit floating point math if not available - for ( int i = 0; i < programs.length; i++ ) { - final CLDevice device = queues[i].getCLDevice(); - - final StringBuilder options = new StringBuilder("-cl-fast-relaxed-math"); - final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device); - if ( doublePrecision && isDoubleFPAvailable(device) ) { - //cl_khr_fp64 - options.append(" -D DOUBLE_FP"); - - //amd's verson of double precision floating point math - if ( !caps.CL_KHR_fp64 && caps.CL_AMD_fp64 ) - options.append(" -D AMD_FP"); - } - - System.out.println("COMPILER OPTIONS: " + options); - - clBuildProgram(programs[i], device, options, null); - } - - rebuild = false; - - for ( int i = 0; i < kernels.length; i++ ) { - // init kernel with constants - kernels[i] = clCreateKernel(programs[min(i, programs.length)], "mandelbrot", null); - } - } - - // init kernels with constants - - private void setKernelConstants() { - for ( int i = 0; i < slices; i++ ) { - kernels[i] - .setArg(6, pboBuffers[i]) - .setArg(7, colorMap[i]) - .setArg(8, colorMapBuffer[i].capacity()) - .setArg(9, MAX_ITERATIONS); - } - } - - // rendering cycle - - public void display() { - // TODO: Need to clean-up events, test when ARB_cl_events & KHR_gl_event are implemented. - - // make sure GL does not use our objects before we start computing - if ( syncCLtoGL ) { - for ( final CLCommandQueue queue : queues ) - clEnqueueWaitForEvents(queue, glEvent); - } else - glFinish(); - - if ( !buffersInitialized ) { - initPBO(); - setKernelConstants(); - } - - if ( rebuild ) { - buildPrograms(); - setKernelConstants(); - } - compute(doublePrecision); - - render(); - } - - // OpenCL - - private void compute(final boolean is64bit) { - int sliceWidth = (int)(width / (float)slices); - double rangeX = (maxX - minX) / slices; - double rangeY = (maxY - minY); - - kernel2DGlobalWorkSize.put(0, sliceWidth).put(1, height); - - // start computation - for ( int i = 0; i < slices; i++ ) { - kernels[i].setArg(0, sliceWidth).setArg(1, height); - if ( !is64bit || !isDoubleFPAvailable(queues[i].getCLDevice()) ) { - kernels[i] - .setArg(2, (float)(minX + rangeX * i)).setArg(3, (float)minY) - .setArg(4, (float)rangeX).setArg(5, (float)rangeY); - } else { - kernels[i] - .setArg(2, minX + rangeX * i).setArg(3, minY) - .setArg(4, rangeX).setArg(5, rangeY); - } - - // aquire GL objects, and enqueue a kernel with a probe from the list - clEnqueueAcquireGLObjects(queues[i], pboBuffers[i], null, null); - - clEnqueueNDRangeKernel(queues[i], kernels[i], 2, - null, - kernel2DGlobalWorkSize, - null, - null, null); - - clEnqueueReleaseGLObjects(queues[i], pboBuffers[i], null, syncBuffer); - if ( syncGLtoCL ) { - clEvents[i] = queues[i].getCLEvent(syncBuffer.get(0)); - clSyncs[i] = glCreateSyncFromCLeventARB(queues[i].getParent(), clEvents[i], 0); - } - } - - // block until done (important: finish before doing further gl work) - if ( !syncGLtoCL ) { - for ( int i = 0; i < slices; i++ ) - clFinish(queues[i]); - } - } - - // OpenGL - - private void render() { - glClear(GL_COLOR_BUFFER_BIT); - - if ( syncGLtoCL ) { - for ( int i = 0; i < slices; i++ ) - glWaitSync(clSyncs[i], 0, 0); - } - - //draw slices - int sliceWidth = width / slices; - - for ( int i = 0; i < slices; i++ ) { - int seperatorOffset = drawSeparator ? i : 0; - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIDs.get(i)); - glRasterPos2i(sliceWidth * i + seperatorOffset, 0); - - glDrawPixels(sliceWidth, height, GL_BGRA, GL_UNSIGNED_BYTE, 0); - - } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - if ( syncCLtoGL ) { - glSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - glEvent = clCreateEventFromGLsyncKHR(clContext, glSync, null); - - } - - //draw info text - /* - textRenderer.beginRendering(width, height, false); - - textRenderer.draw("device/time/precision", 10, height - 15); - - for ( int i = 0; i < slices; i++ ) { - CLDevice device = queues[i].getDevice(); - boolean doubleFP = doublePrecision && isDoubleFPAvailable(device); - CLEvent event = probes.getEvent(i); - long start = event.getProfilingInfo(START); - long end = event.getProfilingInfo(END); - textRenderer.draw(device.getType().toString() + i + " " - + (int)((end - start) / 1000000.0f) + "ms @" - + (doubleFP ? "64bit" : "32bit"), 10, height - (20 + 16 * (slices - i))); - } - - textRenderer.endRendering(); - */ - } - - private static boolean isDoubleFPAvailable(CLDevice device) { - final CLDeviceCapabilities caps = CLCapabilities.getDeviceCapabilities(device); - return caps.CL_KHR_fp64 || caps.CL_AMD_fp64; - } - - public static void main(String args[]) { - DemoFractal demo = new DemoFractal(512, 512); - demo.init(); - demo.run(); - } - } \ No newline at end of file diff --git a/src/java/org/lwjgl/test/opencl/gl/Mandelbrot.cl b/src/java/org/lwjgl/test/opencl/gl/Mandelbrot.cl index 2fc959c2..c9a9eeb6 100644 --- a/src/java/org/lwjgl/test/opencl/gl/Mandelbrot.cl +++ b/src/java/org/lwjgl/test/opencl/gl/Mandelbrot.cl @@ -9,18 +9,24 @@ typedef float varfloat; #endif +#ifdef USE_TEXTURE + typedef __write_only image2d_t OUTPUT_TYPE; +#else + typedef global uint * OUTPUT_TYPE; +#endif + /** * For a description of this algorithm please refer to * http://en.wikipedia.org/wiki/Mandelbrot_set * @author Michael Bien */ kernel void mandelbrot( - const int width, const int height, - const varfloat x0, const varfloat y0, - const varfloat rangeX, const varfloat rangeY, - global uint *output, global uint *colorMap, - const int colorMapSize, const int maxIterations) { - + const int width, const int height, + const varfloat x0, const varfloat y0, + const varfloat rangeX, const varfloat rangeY, + OUTPUT_TYPE output, global uint *colorMap, + const int colorMapSize, const int maxIterations +) { unsigned int ix = get_global_id(0); unsigned int iy = get_global_id(1); @@ -33,7 +39,7 @@ kernel void mandelbrot( varfloat magnitudeSquared = 0; int iteration = 0; - while (magnitudeSquared < 4 && iteration < maxIterations) { + while ( magnitudeSquared < 4 && iteration < maxIterations ) { varfloat x2 = x*x; varfloat y2 = y*y; y = 2 * x * y + i; @@ -42,14 +48,31 @@ kernel void mandelbrot( iteration++; } - if (iteration == maxIterations) { - output[iy * width + ix] = 0; - }else { + if ( iteration == maxIterations ) { + #ifdef USE_TEXTURE + write_imagef(output, (int2)(ix, iy), (float4)0); + #else + output[iy * width + ix] = 0; + #endif + } else { varfloat alpha = (varfloat)iteration / maxIterations; int colorIndex = (int)(alpha * colorMapSize); - output[iy * width + ix] = colorMap[colorIndex]; - // monochrom - // output[iy * width + ix] = 255*iteration/maxIterations; + #ifdef USE_TEXTURE + // We could have changed colorMap to a texture + sampler, but the + // unpacking below has minimal overhead and it's kinda interesting. + // We could also use an R32UI texture and do the unpacking in GLSL, + // but then we'd require OpenGL 3.0 (GLSL 1.30). + uint c = colorMap[colorIndex]; + float3 oc = (float3)( + (c & 0xFF) >> 0, + (c & 0xFF00) >> 8, + (c & 0xFF0000) >> 16 + ); + write_imagef(output, (int2)(ix, iy), (float4)(oc / 255.0, 1.0)); + #else + output[iy * width + ix] = colorMap[colorIndex]; + #endif + // monochrom + //output[iy * width + ix] = 255*iteration/maxIterations; } - } \ No newline at end of file diff --git a/src/java/org/lwjgl/util/generator/JavaMethodsGenerator.java b/src/java/org/lwjgl/util/generator/JavaMethodsGenerator.java index fa091789..f18d9a95 100644 --- a/src/java/org/lwjgl/util/generator/JavaMethodsGenerator.java +++ b/src/java/org/lwjgl/util/generator/JavaMethodsGenerator.java @@ -94,7 +94,7 @@ public class JavaMethodsGenerator { private static void printJavaNativeStub(PrintWriter writer, MethodDeclaration method, Mode mode, boolean generate_error_checks, boolean context_specific) { if (Utils.isMethodIndirect(generate_error_checks, context_specific, method)) { - writer.print("\tprivate static native "); + writer.print("\tstatic native "); } else { Utils.printDocComment(writer, method); writer.print("\tpublic static native "); diff --git a/src/templates/org/lwjgl/opengl/EXT_direct_state_access.java b/src/templates/org/lwjgl/opengl/EXT_direct_state_access.java index 98e5ded2..f0ab746d 100644 --- a/src/templates/org/lwjgl/opengl/EXT_direct_state_access.java +++ b/src/templates/org/lwjgl/opengl/EXT_direct_state_access.java @@ -520,11 +520,11 @@ public interface EXT_direct_state_access { value parameters */ - @Optional(reason = "AMD does not expose this (last driver checked: 10.5)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @Dependent("OpenGL30") void glEnableClientStateiEXT(@GLenum int array, @GLuint int index); - @Optional(reason = "AMD does not expose this (last driver checked: 10.5)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @Dependent("OpenGL30") void glDisableClientStateiEXT(@GLenum int array, @GLuint int index); @@ -566,7 +566,7 @@ public interface EXT_direct_state_access { and before state value parameters */ - @Optional(reason = "AMD does not expose this (last driver checked: 10.5)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @Dependent("OpenGL30") @StripPostfix("params") void glGetFloati_vEXT(@GLenum int pname, @GLuint int index, @OutParameter @Check("16") FloatBuffer params); @@ -577,7 +577,7 @@ public interface EXT_direct_state_access { @StripPostfix("params") void glGetFloati_vEXT2(@GLenum int pname, @GLuint int index, @OutParameter FloatBuffer params); - @Optional(reason = "AMD does not expose this (last driver checked: 10.5)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @Dependent("OpenGL30") @StripPostfix("params") void glGetDoublei_vEXT(@GLenum int pname, @GLuint int index, @OutParameter @Check("16") DoubleBuffer params); @@ -588,7 +588,7 @@ public interface EXT_direct_state_access { @StripPostfix("params") void glGetDoublei_vEXT2(@GLenum int pname, @GLuint int index, @OutParameter DoubleBuffer params); - @Optional(reason = "AMD does not expose this (last driver checked: 10.5)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @Dependent("OpenGL30") @StripPostfix(value = "params", hasPostfix = false) void glGetPointeri_vEXT(@GLenum int pname, @GLuint int index, @Result @GLvoid ByteBuffer params); @@ -817,6 +817,7 @@ public interface EXT_direct_state_access { @GLvoid ByteBuffer data); + @Dependent("OpenGL13") void glGetCompressedMultiTexImageEXT(@GLenum int texunit, @GLenum int target, int level, @OutParameter @@ -826,15 +827,6 @@ public interface EXT_direct_state_access { @GLshort @GLint Buffer img); - @Dependent("OpenGL13") - void glGetCompressedMultiTexImage(@GLenum int texunit, @GLenum int target, int level, - @OutParameter - @BufferObject(BufferKind.PackPBO) - @Check - @GLbyte - @GLshort - @GLint Buffer img); - /* OpenGL 1.3: New transpose matrix commands add "Matrix" suffix to name, drops "Matrix" suffix from name, and add initial "enum diff --git a/src/templates/org/lwjgl/opengl/GL40.java b/src/templates/org/lwjgl/opengl/GL40.java index a11ef8d1..e7c1e26e 100644 --- a/src/templates/org/lwjgl/opengl/GL40.java +++ b/src/templates/org/lwjgl/opengl/GL40.java @@ -184,6 +184,7 @@ public interface GL40 { */ int GL_MIN_SAMPLE_SHADING_VALUE = 0x8C37; + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") void glMinSampleShading(@GLclampf float value); // --------------------------------------------------------------------- diff --git a/src/templates/org/lwjgl/opengl/NV_half_float.java b/src/templates/org/lwjgl/opengl/NV_half_float.java index 1a48662c..b0eada44 100644 --- a/src/templates/org/lwjgl/opengl/NV_half_float.java +++ b/src/templates/org/lwjgl/opengl/NV_half_float.java @@ -98,42 +98,42 @@ public interface NV_half_float { @NoErrorCheck void glSecondaryColor3hNV(@GLhalf short red, @GLhalf short green, @GLhalf short blue); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck void glVertexWeighthNV(@GLhalf short weight); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck void glVertexAttrib1hNV(@GLuint int index, @GLhalf short x); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck void glVertexAttrib2hNV(@GLuint int index, @GLhalf short x, @GLhalf short y); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck void glVertexAttrib3hNV(@GLuint int index, @GLhalf short x, @GLhalf short y, @GLhalf short z); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck void glVertexAttrib4hNV(@GLuint int index, @GLhalf short x, @GLhalf short y, @GLhalf short z, @GLhalf short w); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck @StripPostfix("attribs") void glVertexAttribs1hvNV(@GLuint int index, @AutoSize("attribs") @GLsizei int n, @Const @GLhalf ShortBuffer attribs); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck @StripPostfix("attribs") void glVertexAttribs2hvNV(@GLuint int index, @AutoSize(value = "attribs", expression = " >> 1") @GLsizei int n, @Const @GLhalf ShortBuffer attribs); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck @StripPostfix("attribs") void glVertexAttribs3hvNV(@GLuint int index, @AutoSize(value = "attribs", expression = " / 3") @GLsizei int n, @Const @GLhalf ShortBuffer attribs); - @Optional(reason = "AMD does not expose this (last driver checked: 10.3)") + @Optional(reason = "AMD does not expose this (last driver checked: 10.9)") @NoErrorCheck @StripPostfix("attribs") void glVertexAttribs4hvNV(@GLuint int index, @AutoSize(value = "attribs", expression = " >> 2") @GLsizei int n, @Const @GLhalf ShortBuffer attribs);