Hello i´m starting with jcuda and eclipse and i have implemented your example:
package prueba;
import jcuda.LogLevel;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.CUmodule;
import jcuda.driver.JCudaDriver;
public class TestCUBINCall {
// Run attributes, not really important but define vector element size.
// This is from another larger test program I was working on.
private static int maxParticleCount = 4;
private static int dimensions = 3;
// Calculate vector needs
private static int vectorElementCount = maxParticleCount*dimensions;
private static int vectorMemSize = vectorElementCount*Sizeof.FLOAT;
// All input and result arrays are the same size
private static float particleCoordinates[] = new float[vectorElementCount];
private static float particlePositionModifiers[] = new float[vectorElementCount];
private static float results[] = new float[vectorElementCount];
private static int threads_per_block = 256;
/**
* Simple test method to calculate the sum of 2 vectors
*
* [1,1,1,1,1,1,1,1,1...] + [2,2,2,2,2,2,2,2,2...] = [3,3,3,3,3,3,3,3,3...]
*
*/
public static void testCUBIN(){
// Initialize the driver and create a context for the first device.
JCudaDriver.cuInit(0);
CUcontext pctx = new CUcontext();
CUdevice dev = new CUdevice();
JCudaDriver.cuDeviceGet(dev, 0);
JCudaDriver.cuCtxCreate(pctx, 0, dev);
// Load the CUBIN file.
CUmodule module = new CUmodule();
JCudaDriver.cuModuleLoad(module, "vector_add.sm_10.cubin");
// Obtain a function pointer to the "sampleKernel" function.
CUfunction function = new CUfunction();
JCudaDriver.cuModuleGetFunction(function, module, "add_vectors_kernel");
// Prepare host test data
for(int i = 0; i < vectorElementCount; i++){
particleCoordinates** = 1;
particlePositionModifiers** = 2;
results** = 0;
}
// Define pointers to input vectors
CUdeviceptr positionDevicePointer = new CUdeviceptr();
CUdeviceptr modificationDevicePointer = new CUdeviceptr();
CUdeviceptr outputDevicePointer = new CUdeviceptr();
// Allocate memory space on the GPU
JCudaDriver.cuMemAlloc(positionDevicePointer, vectorMemSize);
JCudaDriver.cuMemAlloc(modificationDevicePointer, vectorMemSize);
JCudaDriver.cuMemAlloc(outputDevicePointer, vectorMemSize);
// Copy data from host to device
JCudaDriver.cuMemcpyHtoD(positionDevicePointer, Pointer.to(particleCoordinates), vectorMemSize);
JCudaDriver.cuMemcpyHtoD(modificationDevicePointer, Pointer.to(particlePositionModifiers),vectorMemSize);
// Set up the execution parameters.
int num_blocks = (int) ((float) (vectorElementCount + threads_per_block - 1) / (float) threads_per_block);
int max_blocks_per_dimension = 65535;
int num_blocks_y = (int) ((float) (num_blocks + max_blocks_per_dimension - 1) / (float) max_blocks_per_dimension);
int num_blocks_x = (int) ((float) (num_blocks + num_blocks_y - 1) / (float) num_blocks_y);
JCudaDriver.cuFuncSetBlockShape(function, num_blocks_x, num_blocks_y, 1);
// Set up the parameters for the function call
Pointer dInPositions = Pointer.to(positionDevicePointer);
Pointer dInModifiers = Pointer.to(modificationDevicePointer);
Pointer dOut = Pointer.to(outputDevicePointer);
Pointer vectorSize = Pointer.to(new int[]{vectorElementCount});
// Accumulate offset used for function call
int offset = 0;
// Position
offset = JCudaDriver.align(offset, Sizeof.POINTER);
JCudaDriver.cuParamSetv(function, offset, dInPositions, Sizeof.POINTER);
offset += Sizeof.POINTER;
// Modifier
offset = JCudaDriver.align(offset, Sizeof.POINTER);
JCudaDriver.cuParamSetv(function, offset, dInModifiers, Sizeof.POINTER);
offset += Sizeof.POINTER;
// Results
offset = JCudaDriver.align(offset, Sizeof.POINTER);
JCudaDriver.cuParamSetv(function, offset, dOut, Sizeof.POINTER);
offset += Sizeof.POINTER;
// Vector Size
offset = JCudaDriver.align(offset, Sizeof.INT);
JCudaDriver.cuParamSetv(function, offset, vectorSize, Sizeof.INT);
offset += Sizeof.INT;
JCudaDriver.cuParamSetSize(function, offset);
// Call the function.
JCudaDriver.cuLaunch(function);
JCudaDriver.cuCtxSynchronize();
// Copy the device output to the host.
JCudaDriver.cuMemcpyDtoH(Pointer.to(results), outputDevicePointer, vectorMemSize);
// Verify the result via simple output
// All values should be 3, ie 3,3,3,3,3,3,3,3,3,3........
for(int i = 0; i < vectorElementCount; i++){
System.out.print(results**+",");
}
// Clean up.
JCudaDriver.cuMemFree(positionDevicePointer);
JCudaDriver.cuMemFree(modificationDevicePointer);
JCudaDriver.cuMemFree(outputDevicePointer);
}
/**
* Kick off the test
* @param args
*/
public static void main(String[] args){
TestCUBINCall test = new TestCUBINCall();
test.testCUBIN();
}
}
But i have had problems like that
Error while loading native library with base name “JCudaDriver”
Operating system name: Windows 7
Architecture : amd64
Architecture bit size: 64
Exception in thread “main” java.lang.UnsatisfiedLinkError: Could not load native library
at jcuda.LibUtils.loadLibrary(LibUtils.java:79)
at jcuda.driver.JCudaDriver.(JCudaDriver.java:107)
at prueba.TestCUBINCall.testCUBIN(TestCUBINCall.java:42)
at prueba.TestCUBINCall.main(TestCUBINCall.java:147)
i have added the external jar “jcuda-0.3.1.jar”
Thaks