Hello Guest

OpenCL problem, out of stack space [solved]

  • 17 Replies
  • 7762 Views
Re: OpenCL problem, out of stack space
« Reply #15 on: October 02, 2016, 07:40:15 »
when i read the javadoc about clBuildProgram it says
"user_data can be NULL."
but i don't know how to give it null so i give it zero, but it gives me an error:
Code: [Select]
/*
 * Copyright LWJGL. All rights reserved.
 * License terms: https://www.lwjgl.org/license
 */
package Main;

import org.lwjgl.BufferUtils;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.*;
import org.lwjgl.system.MemoryStack;

import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import static org.lwjgl.opencl.CL10.*;
import static Main.InfoUtil.*;
import static org.lwjgl.system.MemoryStack.*;
import static org.lwjgl.system.MemoryUtil.*;

public final class CLDemo {

private CLDemo() {
}

public static void main(String[] args) {
try ( MemoryStack stack = stackPush() ) {
demo(stack);
}
}

private static void demo(MemoryStack stack) {
IntBuffer pi = stack.mallocInt(1);
checkCLError(clGetPlatformIDs(null, pi));
if ( pi.get(0) == 0 )
throw new RuntimeException("No OpenCL platforms found.");
PointerBuffer platforms = stack.mallocPointer(pi.get(0));
checkCLError(clGetPlatformIDs(platforms, (IntBuffer)null));

PointerBuffer ctxProps = stack.mallocPointer(3);
ctxProps
.put(0, CL_CONTEXT_PLATFORM)
.put(2, 0);

IntBuffer errcode_ret = stack.callocInt(1);
long platform = platforms.get(0);
ctxProps.put(1, platform);

CLCapabilities platformCaps = CL.createPlatformCapabilities(platform);

checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, null, pi));

PointerBuffer devices = stack.mallocPointer(pi.get(0));
checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, devices, (IntBuffer)null));
long device = devices.get(0);
CLCapabilities caps = CL.createDeviceCapabilities(device, platformCaps);
CLContextCallback contextCB;
long context = clCreateContext(ctxProps, device, contextCB = CLContextCallback.create((errinfo, private_info, cb, user_data) -> {
System.err.println("[LWJGL] cl_context_callback");
System.err.println("\tInfo: " + memUTF8(errinfo));
}), NULL, errcode_ret);
checkCLError(errcode_ret);
long que=clCreateCommandQueue(context, device, NULL, errcode_ret);
CharSequence add=
"_kernel void sum(_global const float* a, _global float* result, int const size) {\n"+
" const int itemId = get_global_id(0); \n"+
" if(itemId < size) {\n"+
" result[itemId] = a[itemId*2] + a[itemId*2+1];\n"+
" }\n"+
"}";
long sumProgram=CL10.clCreateProgramWithSource(context, add, null);
int error = CL10.clBuildProgram(sumProgram, devices.get(0), "", null,0);
checkCLError(error);
long sumKernel=CL10.clCreateKernel(sumProgram, "sum", (int[])null);
float[] in=new float[200];
float[] out=new float[100];
for(int i=0;i<100;i++){
in[i]=i;
in[i+1]=i;
}
FloatBuffer aBuff = BufferUtils.createFloatBuffer(200);
aBuff.put(in);
aBuff.rewind();
IntBuffer errorBuff = BufferUtils.createIntBuffer(1); // Error buffer

long _in = CL10.clCreateBuffer(context, CL10.CL_MEM_WRITE_ONLY | CL10.CL_MEM_COPY_HOST_PTR, aBuff, errorBuff);
checkCLError(errorBuff.get(0));
long _out = CL10.clCreateBuffer(context, CL10.CL_MEM_READ_ONLY, 400, errorBuff);
checkCLError(errorBuff.get(0));
CL10.clSetKernelArg1p(sumKernel,0,_in);
CL10.clSetKernelArg1p(sumKernel,1,_out);
CL10.clSetKernelArg1p(sumKernel, 2, 100);
PointerBuffer globalWorkSize = BufferUtils.createPointerBuffer(1);
globalWorkSize.put(0, 100);
clEnqueueNDRangeKernel(que, sumKernel, 1, null, globalWorkSize, null, null, null);
CL10.clFinish(que);
for(int i=0;i<100;i++){
System.out.println(out[i]);
}

}


}

Code: [Select]
Exception in thread "main" java.lang.RuntimeException: OpenCL error [0xFFFFFFF5]
at Main.InfoUtil.checkCLError(InfoUtil.java:130)
at Main.CLDemo.demo(CLDemo.java:71)
at Main.CLDemo.main(CLDemo.java:26)
« Last Edit: October 02, 2016, 09:38:46 by petarts »

*

Kai

Re: OpenCL problem, out of stack space
« Reply #16 on: October 02, 2016, 10:14:13 »
You should re-read Spasi's last post.

Long story short, here is a working version of your reduction program:
Code: [Select]
private static void demo(MemoryStack stack) {
    IntBuffer counts = stack.mallocInt(1);
    checkCLError(clGetPlatformIDs(null, counts));
    int platformCount = counts.get(0);
    if (platformCount == 0)
        throw new RuntimeException("No OpenCL platforms found.");
    PointerBuffer platforms = stack.mallocPointer(platformCount);
    checkCLError(clGetPlatformIDs(platforms, (IntBuffer) null));
    PointerBuffer ctxProps = stack.mallocPointer(3);
    ctxProps.put(0, CL_CONTEXT_PLATFORM).put(2, 0);
    IntBuffer errcode_ret = stack.callocInt(1);
    long platform = platforms.get(0);
    ctxProps.put(1, platform);
    checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, null, counts));
    int deviceCount = counts.get(0);
    if (deviceCount == 0)
      throw new RuntimeException("No OpenCL devices found.");
    PointerBuffer devices = stack.mallocPointer(deviceCount);
    checkCLError(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, devices, (IntBuffer) null));
    long device = devices.get(0);
    long context = clCreateContext(ctxProps, device, null, NULL, errcode_ret);
    checkCLError(errcode_ret);
    long que = clCreateCommandQueue(context, device, NULL, errcode_ret);
    checkCLError(errcode_ret);
    CharSequence add =
    "kernel void sum(global const float* a, global float* result, int const size) {\n"+ // <- 'kernel' and 'global' !
    "   const int itemId = get_global_id(0); \n"+
    "   if(itemId < size) {\n"+
    "       result[itemId] = a[itemId*2] + a[itemId*2+1];\n"+
    "   }\n"+
    "}";
    long sumProgram = CL10.clCreateProgramWithSource(context, add, null);
    checkCLError(CL10.clBuildProgram(sumProgram, devices.get(0), "", null,0));
    checkCLError(errcode_ret);
    long sumKernel = CL10.clCreateKernel(sumProgram, "sum", errcode_ret);
    checkCLError(errcode_ret);
    float[] in  = new float[200];
    float[] out = new float[100];
    for (int i = 0; i < 200; i++) {
        in[i] = i;
    }
    FloatBuffer aBuff = stack.mallocFloat(200);
    aBuff.put(in).rewind();
    long _in = CL10.clCreateBuffer(context, CL10.CL_MEM_READ_ONLY | CL10.CL_MEM_COPY_HOST_PTR, aBuff, errcode_ret); // <- READ_ONLY !
    checkCLError(errcode_ret);
    long _out = CL10.clCreateBuffer(context, CL10.CL_MEM_READ_WRITE, 400, errcode_ret); // <- READ_WRITE !
    checkCLError(errcode_ret);
    checkCLError(CL10.clSetKernelArg1p(sumKernel, 0, _in));
    checkCLError(CL10.clSetKernelArg1p(sumKernel, 1, _out));
    checkCLError(CL10.clSetKernelArg1i(sumKernel, 2, 100)); // <- clSetKernelArg1i !
    PointerBuffer globalWorkSize = stack.mallocPointer(1);
    globalWorkSize.put(0, 100);
    PointerBuffer kernelEvent = stack.mallocPointer(1);
    checkCLError(clEnqueueNDRangeKernel(que, sumKernel, 1, null, globalWorkSize, null, null, kernelEvent));
    PointerBuffer readEvent = stack.mallocPointer(1);
    checkCLError(clEnqueueReadBuffer(que, _out, 1, 0, out, kernelEvent, readEvent)); // <- read back results !
    checkCLError(clWaitForEvents(readEvent));
    for (int i = 0; i < 100; i++) {
        System.out.println(out[i]);
    }
}
« Last Edit: October 02, 2016, 11:17:35 by Kai »

Re: OpenCL problem, out of stack space
« Reply #17 on: October 02, 2016, 12:06:24 »
okay, thank you, i will now look at what you have done and see what mistakes i have made in my program