hello all,
im currently writing on a gameengine similar to ultima 1-6 , since this only requires reacting to events i don’t care much
about performance , but it would be a nice addition if i can speed it up at a few points. One of the major bottlenecks is
the blitting function in my FrameBuffer class. I’ve written a simple CL-blitting class that should help me out here , problem
is that my target FrameBuffer remains completly black, so if somebody could point out my errors it would be awesome
public HWBlitter() {
String programSource = "__kernel void swapblit(__global int * outC,__global int * inputC,__global float * outD,__global float * inputD)
"+
"{
"+
" size_t tid = get_global_id(0);
"+
" if(inputD[tid] <= outD[tid])
"+
" {
"+
" outD[tid] = inputD[tid];
"+
" outC[tid] = inputC[tid];
"+
" }
"+
"}
";
long numBytes[] = new long[1];
cl_platform_id platforms[] = new cl_platform_id[1];
clGetPlatformIDs(platforms.length, platforms, null);
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platforms[0]);
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, null, null, null);
if (context == null) {
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, null, null, null);
if (context == null) {
}
}
CL.setExceptionsEnabled(true);
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, null, numBytes);
int numDevices = (int) numBytes[0] / Sizeof.cl_device_id;
devices = new cl_device_id[numDevices];
clGetContextInfo(context, CL_CONTEXT_DEVICES, numBytes[0], Pointer.to(devices), null);
commandQueue = clCreateCommandQueue(context, devices[0], 0, null);
program = clCreateProgramWithSource(context,1, new String[]{ programSource }, null, null);
clBuildProgram(program, 0, null, null, null, null);
kernel = clCreateKernel(program, "swapblit", null);
}
public void finalize() {
if(kernel != null) {
clReleaseKernel(kernel);
}
if(program != null) {
clReleaseProgram(program);
}
if(commandQueue != null) {
clReleaseCommandQueue(commandQueue);
}
if(context != null) {
clReleaseContext(context);
}
}
public void Execute(FrameBuffer target,FrameBuffer source) {
cl_mem memObjects[] = new cl_mem[4];
memObjects[0] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
Sizeof.cl_int*(target.colorBuffer.length), Pointer.to(target.colorBuffer), null);
memObjects[1] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
Sizeof.cl_int*(source.colorBuffer.length), Pointer.to(source.colorBuffer), null);
memObjects[2] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
Sizeof.cl_float*(target.colorBuffer.length), Pointer.to(target.colorBuffer), null);
memObjects[3] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
Sizeof.cl_float*(source.depthBuffer.length), Pointer.to(source.depthBuffer), null);
clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2]));
clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[3]));
long global_work_size[] = new long[]{source.colorBuffer.length};
long local_work_size[] = new long[]{1};
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,global_work_size, local_work_size, 0, null, null);
clEnqueueReadBuffer(commandQueue, memObjects[0], CL_TRUE, 0,Sizeof.cl_int*(target.colorBuffer.length), Pointer.to(target.colorBuffer), 0, null, null);
clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,Sizeof.cl_int*(target.depthBuffer.length), Pointer.to(target.depthBuffer), 0, null, null);
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[2]);
clReleaseMemObject(memObjects[3]);
}
}
private static HWBlitter hwBlitter = null;
/**
*
* @param source
* @param sx
* @param sy
* @param dx
* @param dy
* @param w
* @param h
* @param alpha
*/
protected void blitSwapFrameBuffer(FrameBuffer source,int sx,int sy,int dx,int dy,int w,int h,int alpha) {
this.needsBlockProperty = true;
int sw = this.Width - sx;
int dw = source.Width - dx;
int dh = source.Height - dy;
if(dh <= 0) {
return;
} else if(dh > h) {
dh = h;
}
if(dw <= 0) {
return;
} else if(dw > w) {
dw = w;
}
if(sw <= 0) {
return;
} else if(sw > w ) {
sw = source.Width;
}
//add opencl
if(FrameBuffer.hwBlitter != null && FrameBuffer.hwBlitter.context != null) {
FrameBuffer.hwBlitter.Execute(this, source);
} else {
for(int y=0; (y< dh) && ((y+sy) < this.Height); y++) {
for (int x=0; (x<sw) && (x< dw); x++) {
float d = this.depthBuffer[( y+sy) * this.Width + (x+sx)];
float db = source.depthBuffer[(y+dy) * source.Width + (x+dx)];
if (db <= d && source.colorBuffer[( y+dy) * source.Width + (x+dx)] != alpha) {
this.depthBuffer[(y+sy) * this.Width + (x+sx)] = db;
this.colorBuffer[(y+sy) * this.Width + (x+sx)] = source.colorBuffer[( y+dy) * source.Width + (x+dx)];
}
}
}
}
this.needsBlockProperty = false;
}