I modified the CudaVectorAdd example as following
// Create the PTX file by calling the NVCC
//String ptxFileName = preparePtxFile(“H:\Dokumente und Einstellungen\gbarbieri\Eigene Dateien\NetBeansProjects\Cuda sqrt\src\cuda\sqrt\src\CudaSqrt.cu”);
String ptxFileName = “H:\CudaSqrt.ptx”;
// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
// Load the ptx file.
CUmodule module = new CUmodule();
cuModuleLoad(module, ptxFileName);
// Obtain a function pointer to the "sqrt" function.
CUfunction function = new CUfunction();
cuModuleGetFunction(function, module, "spqr");
And also the ptx file
//extern “C”
global void sqrt(int n, float a) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i<n)
a* = a**+1;
}
Moved in the root (H:\CudaSqrt.ptx) because nvcc doesnt like spaces within the name…