I have a Kernel Function Processing some Strings and returning an char** Array.
Now I want to copy this String Array to constant device memory in Java via cudaMemcpyToSymbol which gives me both times an error of “cudaErrorInvalidSymbol”
For simplicity I created a int array of size one and tried to copy it to constant device memory.
Kernel File:
__device__ __constant__ int* testInt;
/*
* @param array int* Maximum Value Output Array of Size 1
*/
__global__ void getValue(int* maxValue)
{
if(threadIdx.x == 0)
maxValue[0] = 7;
}
Pointer max_d holds the Kernel Output Array with Int Element {7}, so my kernel function is working properly.
int error1 = JCuda.cudaMemcpyToSymbol("testInt", max_d, Sizeof.INT, 0, cudaMemcpyDeviceToDevice);
int[] testInt = {0};
int error2 = JCuda.cudaMemcpyFromSymbol(Pointer.to(testInt), "testInt", Sizeof.INT, 0, cudaMemcpyDeviceToHost);
You are already loading the module with the driver API, and you also have to obtain the pointer to the constant memory using the driver API. Here is an example (I thought I already posted it somewhere, but could not find it). It shows how to obtain the pointer to constant memory, and use this memory in the kernel:
import static jcuda.driver.JCudaDriver.*;
import jcuda.*;
import jcuda.driver.*;
import jcuda.utils.KernelLauncher;
public class ConstantMemoryTest
{
public static void main(String args[])
{
JCudaDriver.setExceptionsEnabled(true);
// Compile the input file and obtain the module
System.out.println("Initializing...");
KernelLauncher kernelLauncher =
KernelLauncher.create("constantMemoryTestKernel.cu", "test");
CUmodule module = kernelLauncher.getModule();
// Obtain a pointer to the constant symbol
System.out.println("Obtaining pointer to constant symbol...");
CUdeviceptr constantMemoryPointer = new CUdeviceptr();
long sizeArray[] = {0};
cuModuleGetGlobal(constantMemoryPointer, sizeArray, module, "constantMemory");
int size = (int)sizeArray[0];
int numElements = size / Sizeof.INT;
System.out.println("constantMemoryPointer: "+constantMemoryPointer+" size "+size);
// Prepare the input- and output memory
System.out.println("Preparing input and output...");
int hostInput[] = new int[numElements];
for (int i=0; i<numElements; i++)
{
hostInput** = i;
}
cuMemcpyHtoD(constantMemoryPointer, Pointer.to(hostInput), size);
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, size);
// Invoke the kernel
System.out.println("Launching the kernel...");
int blockSize = 32;
int gridSize = size / blockSize;
kernelLauncher.setBlockSize(blockSize, 1, 1);
kernelLauncher.setGridSize(gridSize, 1);
kernelLauncher.call(deviceOutput);
// Copy the device memory to the host and print the result
System.out.println("Obtaining results...");
int hostOutput[] = new int[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, size);
for(int i=0;i<Math.min(numElements, 30);i++)
{
System.out.print(hostOutput**+" ");
}
System.out.println("...");
cuMemFree(deviceOutput);
}
}
Kernel:
__constant__ int constantMemory[1024];
extern "C"
__global__ void test(int *output)
{
int i = blockDim.x * blockIdx.x + threadIdx.x ;
output** = constantMemory**;
}