Hi…trying to cover the matrix operations like multiplication,addition,subtraction and matrix row sum average…successfully done with the other three except matrix row sum average…working with visual c++ and my code is
__global__ void rowSums(float* InputMatrix, float* RowSumAverage, int rows, int cols)
{
int row = threadIdx.x + blockIdx.x * blockDim.x;//to set the start index in kernel
if (row < rows)
{
float sum = 0, counter = 0;
for (int col = 0; col < cols; col++)
{
if (matrix[row * cols + col] == 0)
{
sum += matrix[row * cols + col];
}
else
{
sum += matrix[row * cols + col];
counter++;
}
}
sum = sum / counter;
sums[row] = sum;
}
}
this also working fine…but how parallelization exists here…because looping exist there… when matrix addition is done then we don’t need to raise the index again and again…
code is
__global__ void MatAdd(int A[][N], int B[][N], int C[][N]){
int i = threadIdx.x;
int j = threadIdx.y;
C**[j] = A**[j] + B**[j];
}
i also tried this code to do matrix row sum but its not working
__global__ void RowSum(int B[][N], int Sum[N], int *row, int *col)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
if (i < *row && j < *col)
Sum[j] += B**[j];
}
is there any clue to do this work…