Hi guys,
I want to bind a pitch linear memory to 2D texture so that I could get the memory data using tex2D in the kernel and then modify the memory data. I use the driver API to do this reading/writing on the same pitch memory:
public class TextureTest2{
//
private static int sizeY = 2;
private static int sizeX = 4;
private static int fltLn = 1;
public static void main(String[] args){
// initializing the driver API
CUDAUtil.initialization();
// Load the CUBIN file containing the kernels
KernelLauncher kl=KernelLauncher.create("d:/GPU/TextureTest2.cu","TextureTest");
CUdeviceptr p1=new CUdeviceptr();
long[] pPitch=new long[1];
cuMemAllocPitch(p1,pPitch,sizeX*Sizeof.FLOAT*fltLn,sizeY,Sizeof.FLOAT);
// Copy the host input to the array
float[] data=new float[sizeX*sizeY*fltLn];
int ptr=0;
for(int i=0;i<sizeX*sizeY;i++)
for(int j=0;j<fltLn;j++) data[ptr++]=i;
System.out.println(Arrays.toString(data));
CUDA_MEMCPY2D copyHD = new CUDA_MEMCPY2D();
copyHD.srcMemoryType = CUmemorytype.CU_MEMORYTYPE_HOST;
copyHD.srcHost = Pointer.to(data);
copyHD.srcPitch = sizeX * Sizeof.FLOAT * fltLn;
copyHD.dstMemoryType = CUmemorytype.CU_MEMORYTYPE_DEVICE;
copyHD.dstDevice=p1;
copyHD.WidthInBytes = sizeX * Sizeof.FLOAT * fltLn;
copyHD.Height = sizeY;
cuMemcpy2D(copyHD);
CUDA_ARRAY_DESCRIPTOR ad=new CUDA_ARRAY_DESCRIPTOR();
ad.Format=CU_AD_FORMAT_FLOAT;
ad.Width =sizeX;
ad.Height=sizeY;
ad.NumChannels=fltLn;
// Set up the texture reference
CUtexref texref = new CUtexref();
cuModuleGetTexRef(texref, kl.getModule(), "texture_float4_2D");
cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT);
cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP);
cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP);
cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES);
cuTexRefSetFormat(texref, CU_AD_FORMAT_FLOAT, fltLn);
cuTexRefSetAddress2D(texref,ad,p1,pPitch[0]);
// Prepare the output device memory
CUdeviceptr dOutput = CUDAUtil.newDeviceData1D(new float[sizeY*sizeX*fltLn]);
kl.setup(new dim3(1,1,1),new dim3(sizeX,sizeY,1));
kl.call(dOutput,p1);
float[] hOutput=new float[sizeX*sizeY*fltLn];
cuMemcpyDtoH(Pointer.to(hOutput), dOutput, sizeX*sizeY*Sizeof.FLOAT * fltLn);
// Print the results
System.out.println(Arrays.toString(hOutput));
cuMemFree(p1);
// Clean up
CUDAUtil.finalization();
}
}
The following is the kernel code:
texture<float, 2, cudaReadModeElementType> texture_float4_2D;
extern "C"
__global__ void TextureTest(float* output,float* in){
int WIDTH =4;
int HEIGHT=2;
int tx=threadIdx.x+blockIdx.x*blockDim.x;
int ty=threadIdx.y+blockIdx.y*blockDim.y;
if(tx<WIDTH&&ty<HEIGHT){
output[ty*WIDTH+tx]=tex2D(texture_float4_2D, tx, ty)+1;
}
}
The output of the program is like:
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
not like expected:
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]
[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
It seems that the tex2D in the kernel do not return the value I want. Maybe the binding is not successful but I cannot find anything wrong in the code.
Please give me some help. Thanks very much.