This is my class utilizing OpenCL. The method calcNr is looped and it calcuates each part of computing and at the end it reads output with readOutput method and flushes memory with clearMemory method. I have optimized the algorythm so i have only 2 output buffers now but still readOutput leaves really big garbage uncollected. Somehow i was unable to PM this message to you…
import java.lang.Math.*;
import static org.jocl.CL.*;
import org.jocl.*;
public class ZefirMath {
String MathKernel;
int AccType;
int dg, n;
float NRz;
cl_context context;
cl_kernel kernel;
cl_command_queue commandQueue;
cl_mem memObjects[];
cl_mem outputObject[];
cl_program program;
float NRoutput[];
float Moutput[];
float H1input[];
float H2input[];
float H3input[];
float G1input[];
float G2input[];
float G3input[];
Pointer Arg1;
Pointer Arg2;
Pointer Arg3;
Pointer Arg4;
Pointer Arg5;
Pointer Arg6;
Params BestNr;
Params BestMass;
public void init(int acc, int idg, float nr, float Fw, float E, float Ro, float d, float Mg, float D1, float D2, float D3)
{
AccType = acc;
dg=idg;
n=0;
NRz=nr;
setMathKernel(Fw, E, Ro, d, Mg, D1, D2, D3);
createContext();
}
final public void createContext()
{
long numBytes[] = new long[1];
cl_platform_id platforms[] = new cl_platform_id[1];
clGetPlatformIDs(platforms.length, platforms, null);
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platforms[0]);
if(AccType==0)
{
context = clCreateContextFromType( contextProperties, CL_DEVICE_TYPE_ALL, null, null, null);
}
else if (AccType==1)
{
context = clCreateContextFromType( contextProperties, CL_DEVICE_TYPE_CPU, null, null, null);
}
else
{
context = clCreateContextFromType( contextProperties, CL_DEVICE_TYPE_GPU, null, null, null);
}
if (context == null)
{
System.out.println("Unable to create a context");
}
CL.setExceptionsEnabled(true);
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, null, numBytes);
int numDevices = (int) numBytes[0] / Sizeof.cl_device_id;
cl_device_id devices[] = new cl_device_id[numDevices];
clGetContextInfo(context, CL_CONTEXT_DEVICES, numBytes[0], Pointer.to(devices), null);
commandQueue = clCreateCommandQueue(context, devices[0], 0, null);
program = clCreateProgramWithSource(context, 1, new String[]{ MathKernel }, null, null);
clBuildProgram(program, 0, null, null, null, null);
kernel = clCreateKernel(program, "Integrator", null);
}
public void clearMemory()
{
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[2]);
clReleaseMemObject(memObjects[3]);
clReleaseMemObject(memObjects[4]);
clReleaseMemObject(memObjects[5]);
clReleaseMemObject(outputObject[0]);
clReleaseMemObject(outputObject[1]);
}
public void clearKernel()
{
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
Params ReadOutput(boolean bestmass)
{
Pointer dst = Pointer.to(NRoutput);
clEnqueueReadBuffer(commandQueue, outputObject[0], CL_TRUE, 0, n * Sizeof.cl_float, dst, 0, null, null);
Pointer dst1 = Pointer.to(Moutput);
clEnqueueReadBuffer(commandQueue, outputObject[1], CL_TRUE, 0, n * Sizeof.cl_float, dst1, 0, null, null);
clFinish(commandQueue);
float bestNr=0;
float bestM=99999;
int bestId=0;
if(bestmass)
for(int i=0; i<n; i++)
{
if(Moutput** < bestM && NRoutput** > NRz)
{
bestId=i;
bestNr=NRoutput**;
bestM=Moutput**;
}
}
else
for(int i=0; i<n; i++)
{
if(NRoutput**>bestNr)
{
bestId=i;
bestNr=NRoutput**;
bestM=Moutput**;
}
}
Params Best = new Params(NRoutput[bestId], 0, Moutput[bestId], //Nr, Mr, M
H1input[bestId],H2input[bestId],H3input[bestId], //H1,H2,H3
G1input[bestId],G2input[bestId],G3input[bestId], //G1,G2,G3
0,0,0, //M1,M2,M3
0,0,0); //F1,F2,F3
return new Params();//Best;
}
Params Recalculate(float Fw, float E, float Ro, float d, float Mg,
float D1, float D2, float D3,
float H1, float H2, float H3,
float G1, float G2, float G3)
{ ... }
final void setMathKernel(float Fw, float E, float Ro, float d, float Mg, float D1, float D2, float D3)
{ ... }
Params calcNr(float H, float H1, float H2, float G1, float G2min, float G3min, float G2max, float G3max)
{
int j=0;
float G2=G2min*10000;
do
{
G2=G2+dg;
float G3=G3min*10000;
do
{
j++;
G3=G3+dg;
}
while(G3<G3max*10000);
}
while(G2<G2max*10000);
if(j!=0)
{
n = j;
if(H1input == null)
{
H1input = new float[n];
H2input = new float[n];
H3input = new float[n];
G1input = new float[n];
G2input = new float[n];
G3input = new float[n];
Arg1 = Pointer.to(H1input);
Arg2 = Pointer.to(H2input);
Arg3 = Pointer.to(H3input);
Arg4 = Pointer.to(G1input);
Arg5 = Pointer.to(G2input);
Arg6 = Pointer.to(G3input);
}
j=0;
float H3=H-H1-H2;
int h3=java.lang.Math.round((H3*10));
H3=(float)h3/10;
G2=G2min*10000;
do
{
float G3=G3min*10000;
do
{
H1input[j]=H1;
H2input[j]=H2;
H3input[j]=H3;
G1input[j]=G1;
G2input[j]=G2/10000;
G3input[j]=G3/10000;
G3=G3+dg;
j++;
}
while(G3<G3max*10000);
G2=G2+dg;
}
while(G2<G2max*10000);
if(memObjects == null)
{
memObjects = new cl_mem[6];
}
memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, Arg1, null);
memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, Arg2, null);
memObjects[2] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, Arg3, null);
memObjects[3] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, Arg4, null);
memObjects[4] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, Arg5, null);
memObjects[5] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, Arg6, null);
if(outputObject == null)
{
outputObject = new cl_mem[2];
}
outputObject[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_float * n, null, null);
outputObject[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_float * n, null, null);
clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2]));
clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[3]));
clSetKernelArg(kernel, 4, Sizeof.cl_mem, Pointer.to(memObjects[4]));
clSetKernelArg(kernel, 5, Sizeof.cl_mem, Pointer.to(memObjects[5]));
clSetKernelArg(kernel, 6, Sizeof.cl_mem, Pointer.to(outputObject[0]));
clSetKernelArg(kernel, 7, Sizeof.cl_mem, Pointer.to(outputObject[1]));
long global_work_size[] = new long[]{n};
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, global_work_size, null, 0, null, null);
clFinish(commandQueue);
if(NRoutput == null)
{
NRoutput = new float[n];
Moutput = new float[n];
}
BestNr = ReadOutput(false);
BestMass = ReadOutput(true);
clearMemory();
return BestNr;
}
else
return new Params();
}
}