Blitting with JOCL

hello all,
im currently writing on a gameengine similar to ultima 1-6 , since this only requires reacting to events i don’t care much
about performance , but it would be a nice addition if i can speed it up at a few points. One of the major bottlenecks is
the blitting function in my FrameBuffer class. I’ve written a simple CL-blitting class that should help me out here , problem
is that my target FrameBuffer remains completly black, so if somebody could point out my errors it would be awesome :smiley:




        public HWBlitter() {
            String programSource = "__kernel void swapblit(__global int *  outC,__global int * inputC,__global float * outD,__global float *  inputD)    
"+
                                   "{                                                                                                                  
"+
                                   "    size_t tid = get_global_id(0);                                                                                  
"+
                                   "    if(inputD[tid] <= outD[tid])                                                                                   
"+
                                   "       {                                                                                                              
"+
                                   "        outD[tid] = inputD[tid];                                                                                  
"+
                                   "        outC[tid] = inputC[tid];                                                                                  
"+
                                   "       }                                                                                                              
"+
                                   "}                                                                                                                  
";
            long numBytes[] = new long[1];
            cl_platform_id platforms[] = new cl_platform_id[1];
            clGetPlatformIDs(platforms.length, platforms, null);
            cl_context_properties contextProperties = new cl_context_properties();
            contextProperties.addProperty(CL_CONTEXT_PLATFORM, platforms[0]);
            context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, null, null, null);
            
            if (context == null) {
                context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, null, null, null);
                if (context == null) {
                    
                }
            }
            
            CL.setExceptionsEnabled(true);
            clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, null, numBytes); 
            int numDevices = (int) numBytes[0] / Sizeof.cl_device_id;
            devices = new cl_device_id[numDevices];
            clGetContextInfo(context, CL_CONTEXT_DEVICES, numBytes[0],  Pointer.to(devices), null);        
            commandQueue =  clCreateCommandQueue(context, devices[0], 0, null);
            program = clCreateProgramWithSource(context,1, new String[]{ programSource }, null, null);
            clBuildProgram(program, 0, null, null, null, null);
            kernel = clCreateKernel(program, "swapblit", null);
            
            
        }
        public void finalize() {
            if(kernel != null) {
                clReleaseKernel(kernel);
            }
            if(program != null) {
                clReleaseProgram(program);
            }
            if(commandQueue != null) {
                clReleaseCommandQueue(commandQueue);
            }
            
            if(context != null) {
                clReleaseContext(context);
            }
        }
        
        public void Execute(FrameBuffer target,FrameBuffer source) {
            
            cl_mem memObjects[] = new cl_mem[4];
            memObjects[0] = clCreateBuffer(context, 
                    CL_MEM_READ_WRITE, 
                    Sizeof.cl_int*(target.colorBuffer.length), Pointer.to(target.colorBuffer), null);
        
            memObjects[1] = clCreateBuffer(context, 
                    CL_MEM_READ_WRITE, 
                    Sizeof.cl_int*(source.colorBuffer.length), Pointer.to(source.colorBuffer), null);
            memObjects[2] = clCreateBuffer(context, 
                    CL_MEM_READ_WRITE, 
                    Sizeof.cl_float*(target.colorBuffer.length), Pointer.to(target.colorBuffer), null);
            memObjects[3] = clCreateBuffer(context, 
                    CL_MEM_READ_WRITE, 
                    Sizeof.cl_float*(source.depthBuffer.length), Pointer.to(source.depthBuffer), null);                
            
            clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
            clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
            clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2]));
            clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[3]));
            
            long global_work_size[] = new long[]{source.colorBuffer.length};
            long local_work_size[] = new long[]{1};
            clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,global_work_size, local_work_size, 0, null, null);
            clEnqueueReadBuffer(commandQueue, memObjects[0], CL_TRUE,  0,Sizeof.cl_int*(target.colorBuffer.length),  Pointer.to(target.colorBuffer), 0, null, null);
            clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE,  0,Sizeof.cl_int*(target.depthBuffer.length),  Pointer.to(target.depthBuffer), 0, null, null);
            clReleaseMemObject(memObjects[0]);
            clReleaseMemObject(memObjects[1]);
            clReleaseMemObject(memObjects[2]);
            clReleaseMemObject(memObjects[3]);
            
        }
    }

    private static HWBlitter hwBlitter = null;
    /**
     * 
     * @param source
     * @param sx
     * @param sy
     * @param dx
     * @param dy
     * @param w
     * @param h
     * @param alpha
     */
    protected void blitSwapFrameBuffer(FrameBuffer source,int sx,int sy,int dx,int dy,int w,int h,int alpha) {

        this.needsBlockProperty = true;
        int sw = this.Width - sx;
        int dw = source.Width - dx;
        int dh = source.Height - dy;
        
        if(dh <= 0) {
            return;
        } else if(dh > h) {
            dh = h;
        }         
        
        if(dw <= 0) {
            return;
        } else if(dw > w) {
            dw = w;
        } 
        
        if(sw <= 0) {
            return;
        } else if(sw > w ) {
            sw = source.Width;
        }    
        //add opencl
        if(FrameBuffer.hwBlitter != null && FrameBuffer.hwBlitter.context != null) {
            FrameBuffer.hwBlitter.Execute(this, source);
        } else {
            for(int y=0; (y< dh) && ((y+sy) < this.Height); y++) {
                for (int x=0; (x<sw) && (x< dw); x++) {
                    float d = this.depthBuffer[( y+sy) * this.Width + (x+sx)]; 
                    float db = source.depthBuffer[(y+dy) * source.Width + (x+dx)];
                    if (db <= d && source.colorBuffer[( y+dy) * source.Width + (x+dx)] != alpha) {
                        this.depthBuffer[(y+sy) * this.Width + (x+sx)] = db;
                        this.colorBuffer[(y+sy) * this.Width + (x+sx)] = source.colorBuffer[( y+dy) * source.Width + (x+dx)];
                    }
                }
            }
        }
        

        this.needsBlockProperty = false;
    }

Hello

Although it’s hard (or nearly impossible) to understand from this snippet what’s going on there, when quickly skimming it, the lines


memObjects[0] = clCreateBuffer(..., Pointer.to(target.colorBuffer), null);
memObjects[1] = clCreateBuffer(..., Pointer.to(source.colorBuffer), null);
memObjects[2] = clCreateBuffer(..., Pointer.to(target.colorBuffer), null);
memObjects[3] = clCreateBuffer(..., Pointer.to(source.depthBuffer), null);                

at least look somehow “inconsistent” concerning the usage of color- and depth buffers… Maybe another concentrated reading after some hours of sleep will bring more insights…

bye

haha yeah sorry about that lol

But I assume the problem still exists?
I’m not sure when I’ll find the time to dig deeper through the code. Presumably, it’s difficult to convert it into a compileable test case…?

Well i could send you a copy of my source just got add some comments