UnsatisfiedLinkError on Windows 7, 64bit

Hello i´m starting with jcuda and eclipse and i have implemented your example:

package prueba;

import jcuda.LogLevel;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.CUmodule;
import jcuda.driver.JCudaDriver;

public class TestCUBINCall {
    // Run attributes, not really important but define vector element size. 
    // This is from another larger test program I was working on.
    private static int maxParticleCount = 4;
    private static int dimensions = 3;
    // Calculate vector needs
    private static int vectorElementCount = maxParticleCount*dimensions;
    private static int vectorMemSize = vectorElementCount*Sizeof.FLOAT;
    // All input and result arrays are the same size
    private static float particleCoordinates[] = new float[vectorElementCount];
    private static float particlePositionModifiers[] = new float[vectorElementCount];
    private static float results[] = new float[vectorElementCount];
    private static int threads_per_block = 256;

     * Simple test method to calculate the sum of 2 vectors
     * [1,1,1,1,1,1,1,1,1...] + [2,2,2,2,2,2,2,2,2...] = [3,3,3,3,3,3,3,3,3...]
    public static void testCUBIN(){
        // Initialize the driver and create a context for the first device.
        CUcontext pctx = new CUcontext();
        CUdevice dev = new CUdevice();
        JCudaDriver.cuDeviceGet(dev, 0);
        JCudaDriver.cuCtxCreate(pctx, 0, dev);

        // Load the CUBIN file.
        CUmodule module = new CUmodule();
        JCudaDriver.cuModuleLoad(module, "vector_add.sm_10.cubin");

        // Obtain a function pointer to the "sampleKernel" function.
        CUfunction function = new CUfunction();
        JCudaDriver.cuModuleGetFunction(function, module, "add_vectors_kernel");

        // Prepare host test data
        for(int i = 0; i < vectorElementCount; i++){
            particleCoordinates** = 1;
            particlePositionModifiers** = 2;
            results** = 0;

        // Define pointers to input vectors
        CUdeviceptr positionDevicePointer = new CUdeviceptr();
        CUdeviceptr modificationDevicePointer = new CUdeviceptr();
        CUdeviceptr outputDevicePointer = new CUdeviceptr();
        // Allocate memory space on the GPU
        JCudaDriver.cuMemAlloc(positionDevicePointer, vectorMemSize);
        JCudaDriver.cuMemAlloc(modificationDevicePointer, vectorMemSize);
        JCudaDriver.cuMemAlloc(outputDevicePointer, vectorMemSize);

        // Copy data from host to device
        JCudaDriver.cuMemcpyHtoD(positionDevicePointer, Pointer.to(particleCoordinates), vectorMemSize);
        JCudaDriver.cuMemcpyHtoD(modificationDevicePointer, Pointer.to(particlePositionModifiers),vectorMemSize);

        // Set up the execution parameters.
        int num_blocks = (int) ((float) (vectorElementCount + threads_per_block - 1) / (float) threads_per_block);
        int max_blocks_per_dimension = 65535;
        int num_blocks_y = (int) ((float) (num_blocks + max_blocks_per_dimension - 1) / (float) max_blocks_per_dimension);
        int num_blocks_x = (int) ((float) (num_blocks + num_blocks_y - 1) / (float) num_blocks_y);
        JCudaDriver.cuFuncSetBlockShape(function, num_blocks_x, num_blocks_y, 1);

        // Set up the parameters for the function call
        Pointer dInPositions = Pointer.to(positionDevicePointer);
        Pointer dInModifiers = Pointer.to(modificationDevicePointer);
        Pointer dOut = Pointer.to(outputDevicePointer);
        Pointer vectorSize = Pointer.to(new int[]{vectorElementCount});
        // Accumulate offset used for function call
        int offset = 0;
        // Position
        offset = JCudaDriver.align(offset, Sizeof.POINTER);
        JCudaDriver.cuParamSetv(function, offset, dInPositions, Sizeof.POINTER);
        offset += Sizeof.POINTER;
        // Modifier
        offset = JCudaDriver.align(offset, Sizeof.POINTER);
        JCudaDriver.cuParamSetv(function, offset, dInModifiers, Sizeof.POINTER);
        offset += Sizeof.POINTER;
        // Results
        offset = JCudaDriver.align(offset, Sizeof.POINTER);
        JCudaDriver.cuParamSetv(function, offset, dOut, Sizeof.POINTER);
        offset += Sizeof.POINTER;

        // Vector Size
        offset = JCudaDriver.align(offset, Sizeof.INT);
        JCudaDriver.cuParamSetv(function, offset, vectorSize, Sizeof.INT);
        offset += Sizeof.INT;
        JCudaDriver.cuParamSetSize(function, offset);

        // Call the function.

        // Copy the device output to the host.
        JCudaDriver.cuMemcpyDtoH(Pointer.to(results), outputDevicePointer, vectorMemSize);

        // Verify the result via simple output
        // All values should be 3, ie 3,3,3,3,3,3,3,3,3,3........
        for(int i = 0; i < vectorElementCount; i++){

        // Clean up.
     * Kick off the test
     * @param args
    public static void main(String[] args){
        TestCUBINCall test = new TestCUBINCall();

But i have had problems like that

Error while loading native library with base name “JCudaDriver”
Operating system name: Windows 7
Architecture : amd64
Architecture bit size: 64
Exception in thread “main” java.lang.UnsatisfiedLinkError: Could not load native library
at jcuda.LibUtils.loadLibrary(LibUtils.java:79)
at jcuda.driver.JCudaDriver.(JCudaDriver.java:107)
at prueba.TestCUBINCall.testCUBIN(TestCUBINCall.java:42)
at prueba.TestCUBINCall.main(TestCUBINCall.java:147)

i have added the external jar “jcuda-0.3.1.jar”


Hello Fran,

Usually the error message
“UnsatisfiedLinkError: Could not load native library”
means that it can not find the native DLLs. You should put these DLLs into the root directory of your project, namely all the files “JCudaRuntime-windows-x86_64.dll” and so on.

Alternatively, you could add
-Djava.library.path=C:\your\path o he\DLLs
to the VM arguments in Eclipse.


Thanks Marco. That was the problem.

However now I must compile mi file.cu to file.cubin

So I write in a windows console “nvcc file.cu --cubin”

but it says “Cannot find compiler ‘cl.exe’ in PATH”.

I think that it´s my last problem to finish the implementation.

What can I do???

Hello Franja,

The NVCC requires a C compiler in the background, as described in the NVCC documentation in the C:\CUDA directory. (In contrast to that, for example, OpenCL has a built-in compiler, and you don’t need another C compiler or CUBIN files, but can directly compile the kernel source code). That means that you also need Visual Studio to compile own CUBIN files.


Thank you very much. I will find a visual study to finish me first cuda implementation.

I don´t want to disturb you but I am going to tell you what I need to finish a work (maybe you have something similar):

  • a java method to multiply two matrix with cuda paralelism (N multicore processing where n is a parameter of the method): public double[][]multiply(double a[][], b[][], int N)
  • a java method to calculate the inverse of one matrix (N multicore processing where n is a parameter of the method): public double[][]inverse(double a[][], int N)

Now I will work in the compilation with visual studio but I would appreciate if you helped me in my real problem.


The remainder of this thread was about matrix operations, so the thread has been split and the discussion about matrix operations is continued here