Can you detail what “a number of … iterations” means? 10? 1000? 100000?
I just assembled the snippets that you provided into this example, and it’s currently passing the 5000 mark as I am writing this…
package test;
import static org.jocl.CL.*;
import org.jocl.*;
public class RepeatedInitAndShutdownTest
{
private static String programSource =
"__kernel void "+
"compute(__global float *a,"+
" __global float *b)"+
"{"+
"}";
private static cl_platform_id clPlatform;
private static cl_device_id clDevice;
private static cl_context clContext;
private static cl_command_queue clCommandQueue;
private static cl_program clProgram;
private static cl_kernel clKernel;
/**
* The entry point of this sample
*
* @param args Not used
*/
public static void main(String args[])
{
CL.setExceptionsEnabled(true);
int count = 0;
while (true)
{
initGPU();
makeSomethingOnGPU();
destroyGPU();
System.out.println("Run "+count+" done");
count++;
}
}
private static void initGPU()
{
//Obtain the number of platforms
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];
// Obtain a clPlatform ID
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
clPlatform = platforms[1];
// Initialize the context properties
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, clPlatform);
// Obtain the number of devices for the clPlatform
int numDevicesArray[] = new int[1];
clGetDeviceIDs(clPlatform, CL_DEVICE_TYPE_GPU, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];
// Obtain a clDevice ID
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(clPlatform, CL_DEVICE_TYPE_GPU, numDevices, devices, null);
clDevice = devices[0];
// Create a context for the selected clDevice
clContext = clCreateContext(contextProperties, 1,
new cl_device_id[]{clDevice}, null, null, null);
// Create a command-queue for the selected clDevice
clCommandQueue = clCreateCommandQueue(clContext, clDevice, 0, null);
}
static void makeSomethingOnGPU()
{
int n = 1000;
float dummy[] = new float[n];
cl_mem clustersInMem = clCreateBuffer(clContext,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
n * Sizeof.cl_float, Pointer.to(dummy), null);
cl_mem clusterOutMem = clCreateBuffer(clContext,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
n * Sizeof.cl_float, Pointer.to(dummy), null);
clProgram = clCreateProgramWithSource(
clContext, 1, new String[]{ programSource }, null, null);
clBuildProgram(clProgram, 0, null, null, null, null);
clKernel = clCreateKernel(clProgram, "compute", null);
clSetKernelArg(clKernel, 0, Sizeof.cl_mem, Pointer.to(clustersInMem));
clSetKernelArg(clKernel, 1, Sizeof.cl_mem, Pointer.to(clusterOutMem));
long global_work_size[] = new long[]{n};
clEnqueueNDRangeKernel(clCommandQueue, clKernel, 1, null,
global_work_size, null, 0, null, null);
clReleaseMemObject(clustersInMem);
clReleaseMemObject(clusterOutMem);
}
static void destroyGPU()
{
if (clKernel != null)
clReleaseKernel(clKernel);
if (clProgram != null)
clReleaseProgram(clProgram);
if (clCommandQueue != null)
clReleaseCommandQueue(clCommandQueue);
if (clContext != null)
clReleaseContext(clContext);
}
}
Can you confirm that this works in your case as well?