First of all, thanks for this awesome library!
My question:
I’ve been trying to call cusolverDnSSgels using JCusolver, but am getting a segmentation fault.
I am following a CUDA example from here: StackOverflow: Trying to run a CusolverSSgels testcase, however it is not working
The java version of this example is below at the end of my post.
The segmentation fault that occurs when running the Java code is as follows:
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x00007f98b9fe8b6d, pid=19450, tid=19455
#
# JRE version: OpenJDK Runtime Environment (17.0+35) (build 17+35-2724)
# Java VM: OpenJDK 64-Bit Server VM (17+35-2724, mixed mode, sharing, tiered, compressed oops, compressed class ptrs, g1 gc, linux-amd64)
# Problematic frame:
# C [libcusolver.so.11+0x20fb6d] cusolverDnIRSInfosGetNiters+0xd
...
Any help on this would be greatly appreciated.
(NVIDIA-SMI 495.29.05 Driver Version: 495.29.05 CUDA Version: 11.5)
Jcuda version from Maven Central: 11.4.1
SgelsTest.java
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.jcusolver.cusolverDnHandle;
import jcuda.jcusolver.cusolverStatus;
import java.util.Arrays;
import static jcuda.jcusolver.JCusolverDn.*;
import static jcuda.runtime.JCuda.*;
import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
public class SgelsTest {
public static void main(String[] args) {
// This test follows the C implementation here:
// https://stackoverflow.com/questions/67569389/trying-to-run-a-cusolverssgels-testcase-however-it-is-not-working
float[] A = {6f, 7f, 6f, 5f, 5f, 5f};
float[] y = {9f, 3f, 10f};
// A =
//
// 6 5
// 7 5
// 6 5
//
//
// y =
//
// 9
// 3
// 10
//params
final int C = 3;
final int M = 2;
final int lda = C;
final cusolverDnHandle handle = new cusolverDnHandle();
int status = cusolverDnCreate(handle);
System.out.println("cusolver initialisation status = " + cusolverStatus.stringFor(status));
Pointer dA = new Pointer();
Pointer dy = new Pointer();
Pointer dx = new Pointer();
status = cudaMalloc(dA, (long) A.length * Sizeof.FLOAT);
System.out.println("malloc A status = " + status + " " + cudaGetErrorName(status) + " " + cudaGetErrorString(status));
status = cudaMalloc(dy, (long) y.length * Sizeof.FLOAT);
System.out.println("malloc y status = " + status + " " + cudaGetErrorName(status) + " " + cudaGetErrorString(status));
float[] x = new float[M];
status = cudaMalloc(dx, (long) x.length * Sizeof.FLOAT);
System.out.println("malloc x status = " + status + " " + cudaGetErrorName(status) + " " + cudaGetErrorString(status));
status = cudaMemcpy(dA, Pointer.to(A), A.length * Sizeof.FLOAT, cudaMemcpyHostToDevice);
System.out.println("memcpy A status = " + status + " " + cudaGetErrorName(status) + " " + cudaGetErrorString(status));
status = cudaMemcpy(dy, Pointer.to(y), y.length * Sizeof.FLOAT, cudaMemcpyHostToDevice);
System.out.println("memcpy y status = " + status + " " + cudaGetErrorName(status) + " " + cudaGetErrorString(status));
long[] bufferSize = { 0L };
Pointer buffer = new Pointer();
cudaMalloc(buffer, Sizeof.FLOAT);
status = cusolverDnSSgels_bufferSize(handle, C, M, 1, dA, lda, dy, C, dx, M, buffer, bufferSize);
System.out.println("status of buffer size = " + status + " " + cusolverStatus.stringFor(status));
System.out.println("buffer size = " + bufferSize[0]);
Pointer dWork = new Pointer();
cudaMalloc(dWork, Sizeof.FLOAT * bufferSize[0]);
int[] niter = { 0 };
int[] dinfo = { 0 };
status = cusolverDnSSgels(handle, C, M, 1, dA, lda, dy, C, dx, M, dWork, bufferSize[0], niter, dinfo);
System.out.println("status of sgels = " + status + " " + cusolverStatus.stringFor(status));
status = cudaMemcpy(Pointer.to(x), dx, x.length * Sizeof.FLOAT, cudaMemcpyDeviceToHost);
System.out.println("memcpy x status = " + status + " " + cudaGetErrorName(status));
System.out.println(Arrays.toString(x));
cudaFree(dA);
cudaFree(dy);
cudaFree(dx);
cudaFree(buffer);
cudaFree(dWork);
}
}