Hi!
Great library this LWJGL. Been testing it for a while now. I've taken some interest in OpenCL and started learning yesterday.
After some frustrating debugging and comparing with Visual C version of my program I think I found some problems with
CL12.clCreateImage method.
Here's the relevant portion from the template:
@Check(value = "errcode_ret", canBeNull = true)
@PointerWrapper(value = "cl_mem", params = "context")
CLMem clCreateImage(@PointerWrapper("cl_context") CLContext context,
@NativeType("cl_mem_flags") long flags,
@Check("2 * 4") @Const @NativeType("cl_image_format") ByteBuffer image_format,
@Check("3 * 4 + 7 * PointerBuffer.getPointerSize()") @Const @NativeType("cl_image_desc") ByteBuffer image_desc,
@Check
@cl_byte
@cl_short
@cl_int
@cl_float Buffer host_ptr,
@OutParameter @Check(value = "1", canBeNull = true) @cl_int IntBuffer errcode_ret);
Problem #1:On 64-bit Windows the
cl_image_desc struct is actually 72 bytes, not 3x4+7x8=68. There seems to be padding after the first 32-bit
cl_mem_object_type field to align the following 64-bit fields.
Here's the struct from AMD-APP-SDK-v2.8-Windows-64:
typedef struct _cl_image_desc {
cl_mem_object_type image_type;
size_t image_width;
size_t image_height;
size_t image_depth;
size_t image_array_size;
size_t image_row_pitch;
size_t image_slice_pitch;
cl_uint num_mip_levels;
cl_uint num_samples;
cl_mem buffer;
} cl_image_desc;
Problem #2:The parameter
Buffer host_ptr should be nullable. It may be null when creating a result image.
See e.g. SimpleImage sample in AMD APP SDK samples:
// Create 2D input image
inputImage2D = clCreateImage(context,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
&imageFormat,
&imageDesc,
inputImageData,
&status);
CHECK_OPENCL_ERROR(status,"clCreateImage failed. (inputImage2D)");
// Create 2D output image
outputImage2D = clCreateImage(context,
CL_MEM_WRITE_ONLY,
&imageFormat,
&imageDesc,
0,
&status);
CHECK_OPENCL_ERROR(status,"clCreateImage failed. (outputImage2D)");
Other stuff:my environment:
- LWJGL 2.8.4 (OpenCL seems to be unchanged in 2.8.5)
- 64-bit Windows 7
- AMD Radeon HD 7950
- java 7 64-bit
- visual C (2010)
Oh, and if you find it useful, I created a class to represent the
cl_image_desc struct. It's conceptually modeled after
CLImageFormat class. Additionally it can output itself as a ByteBuffer (with the necessary padding on 64-bit machines). Field comments are from http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/imageDescriptor.html (http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/imageDescriptor.html)
Feel free to do whatever you want with it. Since I'm still learning OpenCL (started yesterday), I won't give any warranties/promises ;).
import java.nio.ByteBuffer;
import org.lwjgl.BufferUtils;
import org.lwjgl.MemoryUtil;
import org.lwjgl.PointerBuffer;
/**
* Simple container for cl_image_desc struct values.
*
* @author Uku Hirvesoo
*/
public final class CLImageDesc
{
/** The cl_image_desc struct size in bytes. */
public final int STRUCT_SIZE;
// Describes the image type and must be either CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, or CL_MEM_OBJECT_IMAGE3D.
private final int image_type; // cl_mem_object_type = cl_uint
// The width of the image in pixels. For a 2D image and image array, the image width must be ≤ CL_DEVICE_IMAGE2D_MAX_WIDTH. For a 3D image, the image width must be ≤ CL_DEVICE_IMAGE3D_MAX_WIDTH. For a 1D image buffer, the image width must be ≤ CL_DEVICE_IMAGE_MAX_BUFFER_SIZE. For a 1D image and 1D image array, the image width must be ≤ CL_DEVICE_IMAGE2D_MAX_WIDTH.
private final long image_width; // size_t
// The height of the image in pixels. This is only used if the image is a 2D, 3D or 2D image array. For a 2D image or image array, the image height must be ≤ CL_DEVICE_IMAGE2D_MAX_HEIGHT. For a 3D image, the image height must be ≤ CL_DEVICE_IMAGE3D_MAX_HEIGHT.
private final long image_height; // size_t
// The depth of the image in pixels. This is only used if the image is a 3D image and must be a value ≥ 1 and ≤ CL_DEVICE_IMAGE3D_MAX_DEPTH.
private final long image_depth; // size_t
// The number of images in the image array. This is only used if the image is a 1D or 2D image array. The values for image_array_size, if specified, must be a value ≥ 1 and ≤ CL_DEVICE_IMAGE_MAX_ARRAY_SIZE.
// Note that reading and writing 2D image arrays from a kernel with image_array_size = 1 may be lower performance than 2D images.
private final long image_array_size; // size_t
// The scan-line pitch in bytes. This must be 0 if host_ptr is NULL and can be either 0 or ≥ image_width * size of element in bytes if host_ptr is not NULL. If host_ptr is not NULL and image_row_pitch = 0, image_row_pitch is calculated as image_width * size of element in bytes. If image_row_pitch is not 0, it must be a multiple of the image element size in bytes.
private final long image_row_pitch; // size_t
// The size in bytes of each 2D slice in the 3D image or the size in bytes of each image in a 1D or 2D image array. This must be 0 if host_ptr is NULL. If host_ptr is not NULL, image_slice_pitch can be either 0 or ≥ image_row_pitch * image_height for a 2D image array or 3D image and can be either 0 or ≥ image_row_pitch for a 1D image array. If host_ptr is not NULL and image_slice_pitch = 0, image_slice_pitch is calculated as image_row_pitch * image_height for a 2D image array or 3D image and image_row_pitch for a 1D image array. If image_slice_pitch is not 0, it must be a multiple of the image_row_pitch.
private final long image_slice_pitch; // size_t
// Must be 0.
private final int num_mip_levels = 0; // cl_uint
// Must be 0.
private final int num_samples = 0; // cl_uint
// Refers to a valid buffer memory object if image_type is CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. For a 1D image buffer object, the image pixels are taken from the buffer object's data store. When the contents of a buffer object's data store are modified, those changes are reflected in the contents of the 1D image buffer object and vice-versa at corresponding sychronization points. The image_width * size of element in bytes must be ≤ size of buffer object data store.
private final PointerBuffer buffer; // cl_mem
public CLImageDesc(final int image_type, final int image_width, final int image_height, final int image_depth, final int image_array_size, final int image_row_pitch, final int image_slice_pitch, final PointerBuffer buffer)
{
if(PointerBuffer.getPointerSize() == 4)
{
STRUCT_SIZE = 40; // 10 x 4 byte fields in struct
}
else if(PointerBuffer.getPointerSize() == 8)
{
STRUCT_SIZE = 72; // 3 * 4 byte fields + 7 * 8 byte fields + 4 byte padding after first int;
}
else
{
throw new RuntimeException("strange environment");
}
this.image_type = image_type;
this.image_width = image_width;
this.image_height = image_height;
this.image_depth = image_depth;
this.image_array_size = image_array_size;
this.image_row_pitch = image_row_pitch;
this.image_slice_pitch = image_slice_pitch;
this.buffer = buffer;
}
public int getImage_type()
{
return image_type;
}
public long getImage_width()
{
return image_width;
}
public long getImage_height()
{
return image_height;
}
public long getImage_depth()
{
return image_depth;
}
public long getImage_array_size()
{
return image_array_size;
}
public long getImage_row_pitch()
{
return image_row_pitch;
}
public long getImage_slice_pitch()
{
return image_slice_pitch;
}
public int getNum_mip_levels()
{
return num_mip_levels;
}
public int getNum_samples()
{
return num_samples;
}
public PointerBuffer getBuffer()
{
return buffer;
}
public ByteBuffer getAsByteBuffer()
{
ByteBuffer ret = BufferUtils.createByteBuffer(STRUCT_SIZE);
if(PointerBuffer.getPointerSize() == 4)
{
ret.putInt(image_type);
ret.putInt((int)image_width);
ret.putInt((int)image_height);
ret.putInt((int)image_depth);
ret.putInt((int)image_array_size);
ret.putInt((int)image_row_pitch);
ret.putInt((int)image_slice_pitch);
ret.putInt(num_mip_levels);
ret.putInt(num_samples);
ret.putInt((int)MemoryUtil.getAddressSafe(buffer, 0));
}
else if(PointerBuffer.getPointerSize() == 8)
{
ret.putInt(image_type);
ret.putInt(0xCCCCCCCC); // random padding to align the following longs
ret.putLong(image_width);
ret.putLong(image_height);
ret.putLong(image_depth);
ret.putLong(image_array_size);
ret.putLong(image_row_pitch);
ret.putLong(image_slice_pitch);
ret.putInt(num_mip_levels);
ret.putInt(num_samples);
ret.putLong(MemoryUtil.getAddressSafe(buffer, 0));
}
else
{
throw new RuntimeException("strange environment");
}
ret.rewind();
return ret;
}
}