Hi,
I tried to reproduce it in a smaller Setup, but I wasnt able to do so.
Any tips on how I might be able to debug this?
I believe some calculation is breaking Cuda and then when I want to getVector or SetVector the Problem occurs.
If I could everytime I want to get or set first ask Cuda if it is still working, that would be great for finding the Error.
cheers
Noodles
*** Edit ***
Hi,
ok I was able to reproduce the Error in a “smaller Setup”
[SPOILER]```import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import Matrix.GPUOp;
import Matrix.Mat2;
public class Test implements Runnable{
private ExecutorService executor = Executors.newFixedThreadPool(50);
public static Test test = new Test();
public static boolean run = true;
public static int started = 0;
public static int finished = 0;
public static void main(String[] args) {
test.act();
}
public void act() {
GPUOp.init();
while (run) {
executor.execute(new Test());
started++;
System.out.println(started+","+finished);
synchronized (Test.class) {
while (finished<started-200) {
try {
wait();
} catch (Exception e) {}
}
}
}
GPUOp.close();
}
public void run() {
try {
Thread.sleep((long)(Math.random()*2000));
} catch (Exception e) {}
int numNeurons = (int)(Math.random()*100)+1;
int numInputs = (int)(Math.random()*800)+1;
int numOutputs = (int)(Math.random()*30)+1;
Mat2 result = new Mat2(numOutputs,1);
Mat2 outputWeights = new Mat2(numOutputs,numNeurons);
Mat2 externalNeuronValues = new Mat2(numNeurons,1);
Mat2 in1 = new Mat2(numNeurons,1);
Mat2 in2 = new Mat2(numNeurons,1);
Mat2 in3 = new Mat2(numNeurons,1);
Mat2 outGateWeights = new Mat2(numNeurons,numNeurons);
Mat2 changeGateWeights = new Mat2(numNeurons,numNeurons);
Mat2 inGateWeights = new Mat2(numNeurons,numNeurons);
Mat2 internalNeuronValues = new Mat2(numNeurons,1);
Mat2 calc1 = new Mat2(numNeurons,1);
Mat2 calc2 = new Mat2(numNeurons,1);
Mat2 calc3 = new Mat2(numNeurons,1);
Mat2 inputValues = new Mat2(numInputs,1);
Mat2 inputOutGateWeights = new Mat2(numNeurons,numInputs);
Mat2 inputInGateWeights = new Mat2(numNeurons,numInputs);
Mat2 inputChangeGateWeights = new Mat2(numNeurons,numInputs);
result.randomize(-1, 1);
outputWeights.randomize(-1, 1);
externalNeuronValues.randomize(-1, 1);
in1.randomize(-1, 1);
in2.randomize(-1, 1);
in3.randomize(-1, 1);
outGateWeights.randomize(-1, 1);
changeGateWeights.randomize(-1, 1);
inGateWeights.randomize(-1, 1);
internalNeuronValues.randomize(-1, 1);
calc1.randomize(-1, 1);
calc2.randomize(-1, 1);
calc3.randomize(-1, 1);
inputValues.randomize(-1, 1);
inputOutGateWeights.randomize(-1, 1);
inputInGateWeights.randomize(-1, 1);
inputChangeGateWeights.randomize(-1, 1);
result.GPUinit();
outputWeights.GPUinit();
externalNeuronValues.GPUinit();
in1.GPUinit();
in2.GPUinit();
in3.GPUinit();
outGateWeights.GPUinit();
changeGateWeights.GPUinit();
inGateWeights.GPUinit();
internalNeuronValues.GPUinit();
calc1.GPUinit();
calc2.GPUinit();
calc3.GPUinit();
inputValues.GPUinit();
inputOutGateWeights.GPUinit();
inputInGateWeights.GPUinit();
inputChangeGateWeights.GPUinit();
//GPUOp.doCalcIns(in1, in2, in3, inputValues, inputOutGateWeights, inputInGateWeights, inputChangeGateWeights);
GPUOp.doCalc(in1, in2, in3, outGateWeights, changeGateWeights, inGateWeights, internalNeuronValues, externalNeuronValues, calc1, calc2, calc3);
GPUOp.getOutput(result, outputWeights, externalNeuronValues);
result.GPUfree();
outputWeights.GPUfree();
externalNeuronValues.GPUfree();
in1.GPUfree();
in2.GPUfree();
in3.GPUfree();
outGateWeights.GPUfree();
changeGateWeights.GPUfree();
inGateWeights.GPUfree();
internalNeuronValues.GPUfree();
calc1.GPUfree();
calc2.GPUfree();
calc3.GPUfree();
inputValues.GPUfree();
inputOutGateWeights.GPUfree();
inputInGateWeights.GPUfree();
inputChangeGateWeights.GPUfree();
test.finished();
}
public synchronized void finished() {
finished++;
test.notifyAll();
}
}```[/SPOILER]
[SPOILER]```package Matrix;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.JCudaDriver;
import jcuda.jcublas.JCublas;
import jcuda.runtime.JCuda;
import jcuda.vec.VecFloat;
public class GPUOp {
public static void fNormalize(Mat2 d_Out,int n,Mat2 d_In) {
VecFloat.exp(n, d_Out.getDevice().get(), d_In.getDevice().get()); // x = e^x
VecFloat.scalarDiv(n, d_Out.getDevice().get(), 1f, d_Out.getDevice().get()); // x = e^(-x)
VecFloat.scalarAdd(n, d_Out.getDevice().get(), 1f, d_Out.getDevice().get()); // x = 1+e^(-x)
VecFloat.scalarDiv(n, d_Out.getDevice().get(), 1f, d_Out.getDevice().get()); // x = 1/(1+e^(-x))
}
public static void gNormalize(Mat2 d_Out,int n,Mat2 d_In) {
VecFloat.exp(n, d_Out.getDevice().get(), d_In.getDevice().get()); // x = e^x
VecFloat.scalarDiv(n, d_Out.getDevice().get(), 1f, d_Out.getDevice().get()); // x = e^(-x)
VecFloat.scalarAdd(n, d_Out.getDevice().get(), 1f, d_Out.getDevice().get()); // x = 1+e^(-x)
VecFloat.scalarDiv(n, d_Out.getDevice().get(), 2f, d_Out.getDevice().get()); // x = 2/(1+e^(-x))
VecFloat.subScalar(n, d_Out.getDevice().get(), d_Out.getDevice().get(), 1f); // x = (2/(1+e^(-x)))-1
}
public static void GateBulk(Mat2 d_Out,Mat2 env, Mat2 weights,int neurons) {
JCublas.cublasSgemm('n', 'n', neurons, 1, neurons, 1.0f, weights.getDevice().get(), neurons, env.getDevice().get(), neurons, 0.0f, d_Out.getDevice().get(), neurons);
fNormalize(d_Out,neurons,d_Out);
}
public static void GateBulk(Mat2 d_Out,Mat2 env, Mat2 weights,int neurons,int input) {
JCublas.cublasSgemm('n', 'n', neurons, 1, input, 1.0f, weights.getDevice().get(), neurons, env.getDevice().get(), input, 0.0f, d_Out.getDevice().get(), neurons);
fNormalize(d_Out,neurons,d_Out);
}
public static void doCalcIns(Mat2 in1,Mat2 in2,Mat2 in3,Mat2 inputValues,Mat2 inputOutGateWeights,Mat2 inputInGateWeights,Mat2 inputChangeGateWeights) {
int neurons = inputOutGateWeights.cols();
int input = inputValues.elements();
GateBulk(in1,inputValues,inputOutGateWeights,neurons,input);
GateBulk(in2,inputValues,inputInGateWeights,neurons,input);
GateBulk(in3,inputValues,inputChangeGateWeights,neurons,input);
}
public static void doCalc(Mat2 in1, Mat2 in2, Mat2 in3, Mat2 outGateWeights,
Mat2 changeGateWeights, Mat2 inGateWeights, Mat2 internalNeuronValues,Mat2 externalNeuronValues,Mat2 calc1,Mat2 calc2,Mat2 calc3) {
int neurons = internalNeuronValues.elements();
/*CUdeviceptr calc1 = new CUdeviceptr();
JCublas.cublasAlloc(neurons, Sizeof.FLOAT, calc1);
CUdeviceptr calc2 = new CUdeviceptr();
JCublas.cublasAlloc(neurons, Sizeof.FLOAT, calc2);
CUdeviceptr calc3 = new CUdeviceptr();
JCublas.cublasAlloc(neurons, Sizeof.FLOAT, calc3);
CUdeviceptr calc4 = new CUdeviceptr();
JCublas.cublasAlloc(neurons, Sizeof.FLOAT, calc4);*/
//
GateBulk(calc1,externalNeuronValues,outGateWeights,neurons);
//calc1 used d_yOut
GateBulk(calc2,externalNeuronValues,inGateWeights,neurons);
//calc2 used d_yIn
GateBulk(calc3,externalNeuronValues,changeGateWeights,neurons);
//calc3 used d_yCh
VecFloat.add(neurons,calc1.getDevice().get(),calc1.getDevice().get(),in1.getDevice().get());
VecFloat.add(neurons,calc2.getDevice().get(),calc2.getDevice().get(),in2.getDevice().get());
VecFloat.add(neurons,calc3.getDevice().get(),calc3.getDevice().get(),in3.getDevice().get());
//
//
VecFloat.mul(neurons, calc2.getDevice().get(), calc2.getDevice().get(), calc3.getDevice().get());
//calc3 free
//
//
VecFloat.add(neurons, internalNeuronValues.getDevice().get(), internalNeuronValues.getDevice().get(), calc2.getDevice().get());
//calc2 free
//
//
fNormalize(calc2,neurons,internalNeuronValues);
//calc2 used
/*float[] h_INV = new float[neurons];
JCublas.cublasGetVector(neurons, Sizeof.FLOAT, internalNeuronValues.getDevice().get(), 1, Pointer.to(h_INV), 1);
internalNeuronValues.set(h_INV);*/
//
//
VecFloat.mul(neurons,externalNeuronValues.getDevice().get(), calc1.getDevice().get(), calc2.getDevice().get());
//calc1 free
//calc2 free
//
/*//
CUdeviceptr d_Out = new CUdeviceptr();
JCublas.cublasAlloc(output, Sizeof.FLOAT, d_Out);
JCublas.cublasSgemm('n', 'n', output, 1, neurons, 1.0f, outputWeights.getDevice().get(), output, externalNeuronValues.getDevice().get(), neurons, 0.0f, d_Out, output);
//*/
//
/*float[] h_ENV = new float[neurons];
JCublas.cublasGetVector(neurons, Sizeof.FLOAT, externalNeuronValues.getDevice().get(), 1, Pointer.to(h_ENV), 1);
externalNeuronValues.set(h_ENV);*/
//
/*float[] h_Out = new float[output];
JCublas.cublasGetVector(output, Sizeof.FLOAT, d_Out, 1, Pointer.to(h_Out), 1);
result.set(h_Out);
JCudaDriver.cuMemFree(d_Out);*/
}
public static void getOutput(Mat2 result,Mat2 outputWeights,Mat2 externalNeuronValues) {
synchronized (GPUOp.class) {
long time = 0;
try {
time = System.currentTimeMillis();
int neurons = externalNeuronValues.elements();
int output = result.elements();
CUdeviceptr d_Out = new CUdeviceptr();
JCublas.cublasAlloc(output, Sizeof.FLOAT, d_Out);
JCublas.cublasSgemm('n', 'n', output, 1, neurons, 1.0f, outputWeights.getDevice().get(), output, externalNeuronValues.getDevice().get(), neurons, 0.0f, d_Out, output);
float[] h_Out = new float[output];
JCublas.cublasGetVector(output, Sizeof.FLOAT, d_Out, 1, Pointer.to(h_Out), 1);
result.set(h_Out);
JCublas.cublasFree(d_Out);
} catch (Exception e) {System.out.println(System.currentTimeMillis()-time);System.out.println(result.rows()+","+result.cols()+";"+outputWeights.rows()+","+outputWeights.cols()+";"+externalNeuronValues.rows()+","+externalNeuronValues.cols());result.print();outputWeights.print();externalNeuronValues.print();e.printStackTrace();}
}
}
public static void init() {
JCuda.setExceptionsEnabled(true);
JCudaDriver.setExceptionsEnabled(true);
JCublas.setExceptionsEnabled(true);
JCublas.cublasInit();
VecFloat.init();
}
public static void close() {
JCublas.cublasShutdown();
VecFloat.shutdown();
}
public static void print(CUdeviceptr ptr,int size) {
float[] h_ENV = new float[size];
JCublas.cublasGetVector(size, Sizeof.FLOAT, ptr, 1, Pointer.to(h_ENV), 1);
System.out.println("######################");
for (int i=0;i<h_ENV.length;i++) {
System.out.println(h_ENV**);
}
System.out.println("######################");
}
}```[/SPOILER]
[SPOILER]```package Matrix;
import Util.Random;
public class Mat2 {
private int rows;
private int cols;
private float[] data;
private int elements;
private boolean rowFirst = false;
private CuDeviceHolder ptr;
public Mat2(int rows,int cols) {
this.rows = rows;
this.cols = cols;
elements = this.rows*this.cols;
data = new float[elements];
}
public Mat2(Mat2 mat) {
this.rows = mat.rows;
this.cols = mat.cols;
elements = this.rows*this.cols;
data = new float[mat.elements];
for (int i=0;i<data.length;i++) {
data**=mat.data**;
}
this.rowFirst=mat.rowFirst;
}
public void set(float[] data) {
this.data = data;
}
public void set(int row,int col,float val) {
data[loc(row,col)] = val;
}
public float get(int row,int col) {
return data[loc(row,col)];
}
public int rows() {
return rows;
}
public int cols() {
return cols;
}
public float[] data() {
return data;
}
public int elements() {
return elements;
}
/*public Mat2 add(Mat2 second) {
Mat2 result = new Mat2(rows,cols);
if (!rowFirst) result.colFirst();
result.data = GPUOp.add(data, second.data);
return result;
}
public Mat2 mul(Mat2 second) {
Mat2 result = new Mat2(rows,second.cols);
if (!rowFirst) result.colFirst();
result.data = GPUOp.mul(data, second.data,rows,cols,second.rows,second.cols);
return result;
}*/
public Mat2 sk(float skalar) {
return null;
}
public Mat2 copy() {
Mat2 result = new Mat2(rows,cols);
if (!rowFirst) result.colFirst();
for (int r=0;r<rows;r++) {
for (int c=0;c<cols;c++) {
result.set(r, c, get(r,c));
}
}
return result;
}
public int loc(int row,int col) {
if (rowFirst) {
if (row>=rows) throw new RuntimeException("row "+row);
if (col>=cols) throw new RuntimeException("col "+col);
return (rows*row)+col;
} else {
if (row>=rows) throw new RuntimeException("row "+row);
if (col>=cols) throw new RuntimeException("col "+col);
return (rows*col)+row;
}
}
public boolean colFirst() {
if (!rowFirst) return false;
rowFirst = false;
return true;
}
public boolean rowFirst() {
if (rowFirst) return false;
rowFirst = true;
return true;
}
/*@Override
public String toString() {
String result = "";
for (int r=0;r<rows;r++) {
if (r!=0)result+="
“;
for (int c=0;c<cols;c++) {
if(c!=0) result+=”,";
result += get(r,c);
}
}
return result;
}*/
public void randomize(float min, float max) {
for (int i=0;i<data.length;i++) {
data**=Random.getRandom().getFloat(min, max);
}
}
public void id() {
for (int i=0;i<(rows<cols?rows:cols);i++) {
set(i,i,1f);
}
}
public void removeRow(int i) {
rows -=1;
elements = rows*cols;
float[] newData = new float[elements];
for (int row=0;row<rows+1;row++) {
for (int col=0;col<cols;col++) {
if (row<i) {
try {
newData[(rows*col)+row]=data[((rows+1)*col)+row];
}catch (Exception e) {e.printStackTrace();System.out.println(rows+","+cols+","+elements+","+row+","+col+","+i);}
}
if (row>i) {
newData[(rows*col)+row-1]=data[((rows+1)*col)+row];
}
}
}
data = newData;
}
public void removeCol(int i) {
cols -=1;
elements = rows*cols;
float[] newData = new float[elements];
for (int j=0;j<data.length;j++) {
if (j < rows*i){
newData[j]=data[j];
} else if (j >= rows+rows*i){
newData[j-rows]=data[j];
}
}
data = newData;
}
public void addRow(int i,float min,float max) {
rows+=1;
elements = rows*cols;
float[] newData = new float[elements];
for (int row=0;row<rows;row++) {
for (int col=0;col<cols;col++) {
if (row<i) {
try {
newData[(rows*col)+row]=data[((rows-1)*col)+row];
}catch (Exception e) {e.printStackTrace();System.out.println(rows+","+cols+","+elements+","+row+","+col+","+i);}
}
if (row==i) {
newData[(rows*col)+row]=Random.getRandom().getFloat(min, max);
}
if (row>i) {
newData[(rows*col)+row]=data[((rows-1)*col)+row-1];
}
}
}
data=newData;
}
public void addCol(int i,float min,float max) {
cols+=1;
elements = rows*cols;
float[] newData = new float[elements];
for (int j=0;j<newData.length;j++) {
if (j < rows*i){
newData[j]=data[j];
} else if (j >= rows+rows*i){
newData[j]=data[j-rows];
} else {
newData[j]=Random.getRandom().getFloat(min, max);
}
}
data = newData;
}
public void mutate(float min,float max) {
int r=Random.getRandom().getInt(0, elements-1);
data[r]=data[r]+Random.getRandom().getFloat(min, max);
/*if (data[r]>5) data[r]=5;
if (data[r]<-5) data[r]=-5;*/
}
public void print() {
System.out.println("#####################################");
for (int i=0;i<rows;i++) {
for (int j=0;j<cols;j++) {
if (j>0) System.out.print(",");
System.out.print(get(i,j));
}
System.out.println();
}
System.out.println("#####################################");
}
public CuDeviceHolder getDevice() {
return ptr;
}
public void GPUinit() {
if (ptr!=null) System.out.println("++++++++++++++++++++++++++++++++++++++");
ptr= new CuDeviceHolder(this);
}
public void GPUfree() {
ptr.free();
ptr=null;
}
}```[/SPOILER]
Now then in the First Class given Test, the line I have commented out produces the Problem, the way I have given the Example it actually works, but if I comment the line in then somewhere an Error is produced, its not easy to see where since theres alot of Threads starting and ending and such.
Must I try to simplify the Problem further?
cheers
Noodles