[FIXED] OpenCL CL12.clCreateImage has few bugs

Started by snoukkis, March 09, 2013, 18:42:56

Previous topic - Next topic

snoukkis

Hi!

Great library this LWJGL. Been testing it for a while now. I've taken some interest in OpenCL and started learning yesterday.

After some frustrating debugging and comparing with Visual C version of my program I think I found some problems with CL12.clCreateImage method.

Here's the relevant portion from the template:
@Check(value = "errcode_ret", canBeNull = true)
@PointerWrapper(value = "cl_mem", params = "context")
CLMem clCreateImage(@PointerWrapper("cl_context") CLContext context,
                    @NativeType("cl_mem_flags") long flags,
                    @Check("2 * 4") @Const @NativeType("cl_image_format") ByteBuffer image_format,
                    @Check("3 * 4 + 7 * PointerBuffer.getPointerSize()") @Const @NativeType("cl_image_desc") ByteBuffer image_desc,
                    @Check
                    @cl_byte
                    @cl_short
                    @cl_int
                    @cl_float Buffer host_ptr,
                    @OutParameter @Check(value = "1", canBeNull = true) @cl_int IntBuffer errcode_ret);


Problem #1:
On 64-bit Windows the cl_image_desc struct is actually 72 bytes, not 3x4+7x8=68. There seems to be padding after the first 32-bit cl_mem_object_type field to align the following 64-bit fields.

Here's the struct from AMD-APP-SDK-v2.8-Windows-64:
typedef struct _cl_image_desc {
    cl_mem_object_type      image_type;
    size_t                  image_width;
    size_t                  image_height;
    size_t                  image_depth;
    size_t                  image_array_size;
    size_t                  image_row_pitch;
    size_t                  image_slice_pitch;
    cl_uint                 num_mip_levels;
    cl_uint                 num_samples;
    cl_mem                  buffer;
} cl_image_desc;


Problem #2:
The parameter Buffer host_ptr should be nullable. It may be null when creating a result image.

See e.g. SimpleImage sample in AMD APP SDK samples:
   // Create 2D input image
    inputImage2D = clCreateImage(context,
                                 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                 &imageFormat,
                                 &imageDesc,
                                 inputImageData,
                                 &status);
    CHECK_OPENCL_ERROR(status,"clCreateImage failed. (inputImage2D)");

    // Create 2D output image
    outputImage2D = clCreateImage(context,
                                  CL_MEM_WRITE_ONLY,
                                  &imageFormat,
                                  &imageDesc,
                                  0,
                                  &status);
    CHECK_OPENCL_ERROR(status,"clCreateImage failed. (outputImage2D)");



Other stuff:


my environment:

  • LWJGL 2.8.4 (OpenCL seems to be unchanged in 2.8.5)
  • 64-bit Windows 7
  • AMD Radeon HD 7950
  • java 7 64-bit
  • visual C (2010)

Oh, and if you find it useful, I created a class to represent the cl_image_desc struct. It's conceptually modeled after CLImageFormat class. Additionally it can output itself as a ByteBuffer (with the necessary padding on 64-bit machines). Field comments are from http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/imageDescriptor.html
Feel free to do whatever you want with it. Since I'm still learning OpenCL (started yesterday), I won't give any warranties/promises ;).

import java.nio.ByteBuffer;

import org.lwjgl.BufferUtils;
import org.lwjgl.MemoryUtil;
import org.lwjgl.PointerBuffer;

/**
 * Simple container for cl_image_desc struct values.
 *
 * @author Uku Hirvesoo
 */
public final class CLImageDesc
{
	/** The cl_image_desc struct size in bytes. */
	public final int STRUCT_SIZE;
	
	// Describes the image type and must be either CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, or CL_MEM_OBJECT_IMAGE3D. 
	private final int image_type; // cl_mem_object_type = cl_uint
	
	// The width of the image in pixels. For a 2D image and image array, the image width must be ≤ CL_DEVICE_IMAGE2D_MAX_WIDTH. For a 3D image, the image width must be ≤ CL_DEVICE_IMAGE3D_MAX_WIDTH. For a 1D image buffer, the image width must be ≤ CL_DEVICE_IMAGE_MAX_BUFFER_SIZE. For a 1D image and 1D image array, the image width must be ≤ CL_DEVICE_IMAGE2D_MAX_WIDTH.
	private final long image_width; // size_t
	
	// The height of the image in pixels. This is only used if the image is a 2D, 3D or 2D image array. For a 2D image or image array, the image height must be ≤ CL_DEVICE_IMAGE2D_MAX_HEIGHT. For a 3D image, the image height must be ≤ CL_DEVICE_IMAGE3D_MAX_HEIGHT.
	private final long image_height; // size_t
	
	// The depth of the image in pixels. This is only used if the image is a 3D image and must be a value ≥ 1 and ≤ CL_DEVICE_IMAGE3D_MAX_DEPTH.
	private final long image_depth; // size_t
	
	// The number of images in the image array. This is only used if the image is a 1D or 2D image array. The values for image_array_size, if specified, must be a value ≥ 1 and ≤ CL_DEVICE_IMAGE_MAX_ARRAY_SIZE.
    // Note that reading and writing 2D image arrays from a kernel with image_array_size = 1 may be lower performance than 2D images.
	private final long image_array_size; // size_t
	
	// The scan-line pitch in bytes. This must be 0 if host_ptr is NULL and can be either 0 or ≥ image_width * size of element in bytes if host_ptr is not NULL. If host_ptr is not NULL and image_row_pitch = 0, image_row_pitch is calculated as image_width * size of element in bytes. If image_row_pitch is not 0, it must be a multiple of the image element size in bytes.
	private final long image_row_pitch; // size_t
	
	// The size in bytes of each 2D slice in the 3D image or the size in bytes of each image in a 1D or 2D image array. This must be 0 if host_ptr is NULL. If host_ptr is not NULL, image_slice_pitch can be either 0 or ≥ image_row_pitch * image_height for a 2D image array or 3D image and can be either 0 or ≥ image_row_pitch for a 1D image array. If host_ptr is not NULL and image_slice_pitch = 0, image_slice_pitch is calculated as image_row_pitch * image_height for a 2D image array or 3D image and image_row_pitch for a 1D image array. If image_slice_pitch is not 0, it must be a multiple of the image_row_pitch.
	private final long image_slice_pitch; // size_t
	
	// Must be 0.
	private final int num_mip_levels = 0; // cl_uint
	
	// Must be 0.
	private final int num_samples = 0; // cl_uint
	
	// Refers to a valid buffer memory object if image_type is CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. For a 1D image buffer object, the image pixels are taken from the buffer object's data store. When the contents of a buffer object's data store are modified, those changes are reflected in the contents of the 1D image buffer object and vice-versa at corresponding sychronization points. The image_width * size of element in bytes must be ≤ size of buffer object data store.
	private final PointerBuffer buffer; // cl_mem
	
	public CLImageDesc(final int image_type, final int image_width, final int image_height, final int image_depth, final int image_array_size, final int image_row_pitch, final int image_slice_pitch, final PointerBuffer buffer)
	{
		if(PointerBuffer.getPointerSize() == 4)
		{
			STRUCT_SIZE = 40; // 10 x 4 byte fields in struct
		}
		else if(PointerBuffer.getPointerSize() == 8)
		{
			STRUCT_SIZE = 72; // 3 * 4 byte fields + 7 * 8 byte fields + 4 byte padding after first int;
		}
		else
		{
			throw new RuntimeException("strange environment");
		}
		
		this.image_type = image_type;
		this.image_width = image_width;
		this.image_height = image_height;
		this.image_depth = image_depth;
		this.image_array_size = image_array_size;
		this.image_row_pitch = image_row_pitch;
		this.image_slice_pitch = image_slice_pitch;
		this.buffer = buffer;
	}
	
	public int getImage_type()
	{
		return image_type;
	}

	public long getImage_width()
	{
		return image_width;
	}

	public long getImage_height()
	{
		return image_height;
	}

	public long getImage_depth()
	{
		return image_depth;
	}

	public long getImage_array_size()
	{
		return image_array_size;
	}

	public long getImage_row_pitch()
	{
		return image_row_pitch;
	}

	public long getImage_slice_pitch()
	{
		return image_slice_pitch;
	}

	public int getNum_mip_levels()
	{
		return num_mip_levels;
	}

	public int getNum_samples()
	{
		return num_samples;
	}

	public PointerBuffer getBuffer()
	{
		return buffer;
	}

	public ByteBuffer getAsByteBuffer()
	{
		ByteBuffer ret = BufferUtils.createByteBuffer(STRUCT_SIZE);
		
		if(PointerBuffer.getPointerSize() == 4)
		{
			ret.putInt(image_type);
			ret.putInt((int)image_width);
			ret.putInt((int)image_height);
			ret.putInt((int)image_depth);
			ret.putInt((int)image_array_size);
			ret.putInt((int)image_row_pitch);
			ret.putInt((int)image_slice_pitch);
			ret.putInt(num_mip_levels);
			ret.putInt(num_samples);
			ret.putInt((int)MemoryUtil.getAddressSafe(buffer, 0));
		}
		else if(PointerBuffer.getPointerSize() == 8)
		{
			ret.putInt(image_type);
			ret.putInt(0xCCCCCCCC); // random padding to align the following longs
			ret.putLong(image_width);
			ret.putLong(image_height);
			ret.putLong(image_depth);
			ret.putLong(image_array_size);
			ret.putLong(image_row_pitch);
			ret.putLong(image_slice_pitch);
			ret.putInt(num_mip_levels);
			ret.putInt(num_samples);
			ret.putLong(MemoryUtil.getAddressSafe(buffer, 0));
		}
		else
		{
			throw new RuntimeException("strange environment");
		}
		
		ret.rewind();
		
		return ret;
	}
}

spasi

Thank you snoukkis, both issues have been fixed.