OpenCL problem, out of stack space [solved]

Started by petarts, September 30, 2016, 19:37:05

Previous topic - Next topic

petarts

when i read the javadoc about clBuildProgram it says
"user_data can be NULL."
but i don't know how to give it null so i give it zero, but it gives me an error:
/*
 * Copyright LWJGL. All rights reserved.
 * License terms: https://www.lwjgl.org/license
 */
package Main;

import org.lwjgl.BufferUtils;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.*;
import org.lwjgl.system.MemoryStack;

import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.*;
import static Main.InfoUtil.*;
import static org.lwjgl.system.MemoryStack.*;
import static org.lwjgl.system.MemoryUtil.*;

public final class CLDemo {

	private CLDemo() {
	}

	public static void main(String[] args) {
		try ( MemoryStack stack = stackPush() ) {
			demo(stack);
		}
	}

	private static void demo(MemoryStack stack) {
		IntBuffer pi = stack.mallocInt(1);
		checkCLError(clGetPlatformIDs(null, pi));
		if ( pi.get(0) == 0 )
			throw new RuntimeException("No OpenCL platforms found.");
		PointerBuffer platforms = stack.mallocPointer(pi.get(0));
		checkCLError(clGetPlatformIDs(platforms, (IntBuffer)null));

		PointerBuffer ctxProps = stack.mallocPointer(3);
		ctxProps
			.put(0, CL_CONTEXT_PLATFORM)
			.put(2, 0);

		IntBuffer errcode_ret = stack.callocInt(1);
			long platform = platforms.get(0);
			ctxProps.put(1, platform);

			CLCapabilities platformCaps = CL.createPlatformCapabilities(platform);

			checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, null, pi));

			PointerBuffer devices = stack.mallocPointer(pi.get(0));
			checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, devices, (IntBuffer)null));
				long device = devices.get(0);
				CLCapabilities caps = CL.createDeviceCapabilities(device, platformCaps);
				CLContextCallback contextCB;
				long context = clCreateContext(ctxProps, device, contextCB = CLContextCallback.create((errinfo, private_info, cb, user_data) -> {
					System.err.println("[LWJGL] cl_context_callback");
					System.err.println("\tInfo: " + memUTF8(errinfo));
				}), NULL, errcode_ret);
				checkCLError(errcode_ret);
				long que=clCreateCommandQueue(context, device, NULL, errcode_ret);
				CharSequence add=
				"_kernel void sum(_global const float* a, _global float* result, int const size) {\n"+
				"	const int itemId = get_global_id(0); \n"+
				"	if(itemId < size) {\n"+
				"		result[itemId] = a[itemId*2] + a[itemId*2+1];\n"+
				"	}\n"+
				"}";
				long sumProgram=CL10.clCreateProgramWithSource(context, add, null);
				int error = CL10.clBuildProgram(sumProgram, devices.get(0), "", null,0);
				checkCLError(error);
				long sumKernel=CL10.clCreateKernel(sumProgram, "sum", (int[])null);
				float[] in=new float[200];
				float[] out=new float[100];
				for(int i=0;i<100;i++){
					in[i]=i;
					in[i+1]=i;
				}
				FloatBuffer aBuff = BufferUtils.createFloatBuffer(200);
				aBuff.put(in);
				aBuff.rewind();
				IntBuffer errorBuff = BufferUtils.createIntBuffer(1); // Error buffer

				long _in = CL10.clCreateBuffer(context, CL10.CL_MEM_WRITE_ONLY | CL10.CL_MEM_COPY_HOST_PTR, aBuff, errorBuff);
				checkCLError(errorBuff.get(0));
				long _out = CL10.clCreateBuffer(context, CL10.CL_MEM_READ_ONLY, 400, errorBuff);
				checkCLError(errorBuff.get(0));
				CL10.clSetKernelArg1p(sumKernel,0,_in);
				CL10.clSetKernelArg1p(sumKernel,1,_out);
				CL10.clSetKernelArg1p(sumKernel, 2, 100);
				PointerBuffer globalWorkSize = BufferUtils.createPointerBuffer(1);
				globalWorkSize.put(0, 100);
				clEnqueueNDRangeKernel(que, sumKernel, 1, null, globalWorkSize, null, null, null);
				CL10.clFinish(que);
				for(int i=0;i<100;i++){
					System.out.println(out[i]);
				}
				
	}


}


Exception in thread "main" java.lang.RuntimeException: OpenCL error [0xFFFFFFF5]
	at Main.InfoUtil.checkCLError(InfoUtil.java:130)
	at Main.CLDemo.demo(CLDemo.java:71)
	at Main.CLDemo.main(CLDemo.java:26)

Kai

You should re-read Spasi's last post.

Long story short, here is a working version of your reduction program:
private static void demo(MemoryStack stack) {
    IntBuffer counts = stack.mallocInt(1);
    checkCLError(clGetPlatformIDs(null, counts));
    int platformCount = counts.get(0);
    if (platformCount == 0)
        throw new RuntimeException("No OpenCL platforms found.");
    PointerBuffer platforms = stack.mallocPointer(platformCount);
    checkCLError(clGetPlatformIDs(platforms, (IntBuffer) null));
    PointerBuffer ctxProps = stack.mallocPointer(3);
    ctxProps.put(0, CL_CONTEXT_PLATFORM).put(2, 0);
    IntBuffer errcode_ret = stack.callocInt(1);
    long platform = platforms.get(0);
    ctxProps.put(1, platform);
    checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, null, counts));
    int deviceCount = counts.get(0);
    if (deviceCount == 0)
      throw new RuntimeException("No OpenCL devices found.");
    PointerBuffer devices = stack.mallocPointer(deviceCount);
    checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, devices, (IntBuffer) null));
    long device = devices.get(0);
    long context = clCreateContext(ctxProps, device, null, NULL, errcode_ret);
    checkCLError(errcode_ret);
    long que = clCreateCommandQueue(context, device, NULL, errcode_ret);
    checkCLError(errcode_ret);
    CharSequence add =
    "kernel void sum(global const float* a, global float* result, int const size) {\n"+ // <- 'kernel' and 'global' !
    "   const int itemId = get_global_id(0); \n"+
    "   if(itemId < size) {\n"+
    "       result[itemId] = a[itemId*2] + a[itemId*2+1];\n"+
    "   }\n"+
    "}";
    long sumProgram = CL10.clCreateProgramWithSource(context, add, null);
    checkCLError(CL10.clBuildProgram(sumProgram, devices.get(0), "", null,0));
    checkCLError(errcode_ret);
    long sumKernel = CL10.clCreateKernel(sumProgram, "sum", errcode_ret);
    checkCLError(errcode_ret);
    float[] in  = new float[200];
    float[] out = new float[100];
    for (int i = 0; i < 200; i++) {
        in[i] = i;
    }
    FloatBuffer aBuff = stack.mallocFloat(200);
    aBuff.put(in).rewind();
    long _in = CL10.clCreateBuffer(context, CL10.CL_MEM_READ_ONLY | CL10.CL_MEM_COPY_HOST_PTR, aBuff, errcode_ret); // <- READ_ONLY !
    checkCLError(errcode_ret);
    long _out = CL10.clCreateBuffer(context, CL10.CL_MEM_READ_WRITE, 400, errcode_ret); // <- READ_WRITE !
    checkCLError(errcode_ret);
    checkCLError(CL10.clSetKernelArg1p(sumKernel, 0, _in));
    checkCLError(CL10.clSetKernelArg1p(sumKernel, 1, _out));
    checkCLError(CL10.clSetKernelArg1i(sumKernel, 2, 100)); // <- clSetKernelArg1i !
    PointerBuffer globalWorkSize = stack.mallocPointer(1);
    globalWorkSize.put(0, 100);
    PointerBuffer kernelEvent = stack.mallocPointer(1);
    checkCLError(clEnqueueNDRangeKernel(que, sumKernel, 1, null, globalWorkSize, null, null, kernelEvent));
    PointerBuffer readEvent = stack.mallocPointer(1);
    checkCLError(clEnqueueReadBuffer(que, _out, 1, 0, out, kernelEvent, readEvent)); // <- read back results !
    checkCLError(clWaitForEvents(readEvent));
    for (int i = 0; i < 100; i++) {
        System.out.println(out[i]);
    }
}

petarts

okay, thank you, i will now look at what you have done and see what mistakes i have made in my program