[QUOTE=Marco13]Hello
What is the device_type constant that you are passing to clGetDeviceIDs? It should be CL_DEVICE_TYPE_GPU (or CL_DEVICE_TYPE_ALL to obtain also CPUs).
Can you run one of the minimal, basic examples from http://jocl.org/samples/samples.html on this PC?
bye
Marco[/QUOTE]
Hi Macro
I can run these basic examples on mu GPU.
This is a sample JOCL code which I tried to execute on GPU from my Hadoop mapreduce code.
You can find " final long deviceType = CL_DEVICE_TYPE_GPU;" in bold in the code snippet below.
I have an AMD GPU on my machine.
CL_DEVICE_NAME: Tahiti
CL_DEVICE_VENDOR: Advanced Micro Devices, Inc.
CL_DRIVER_VERSION: 1214.3 (VM)
CL_DEVICE_TYPE: CL_DEVICE_TYPE_GPU
And
If I change this to CPU instead of GPU (i.e final long deviceType = CL_DEVICE_TYPE_CPU) then the mapper runs to completion and job is successful.
Please find the code snippet(Mapper Class) below:
package com.testMR.jocl;
import static org.jocl.CL.CL_CONTEXT_PLATFORM;
import static org.jocl.CL.CL_DEVICE_TYPE_ALL;
import static org.jocl.CL.CL_DEVICE_TYPE_GPU;
import static org.jocl.CL.CL_DEVICE_TYPE_CPU;
import static org.jocl.CL.CL_MEM_COPY_HOST_PTR;
import static org.jocl.CL.CL_MEM_READ_ONLY;
import static org.jocl.CL.CL_MEM_READ_WRITE;
import static org.jocl.CL.CL_TRUE;
import static org.jocl.CL.clBuildProgram;
import static org.jocl.CL.clCreateBuffer;
import static org.jocl.CL.clCreateCommandQueue;
import static org.jocl.CL.clCreateContext;
import static org.jocl.CL.clCreateKernel;
import static org.jocl.CL.clCreateProgramWithSource;
import static org.jocl.CL.clEnqueueNDRangeKernel;
import static org.jocl.CL.clEnqueueReadBuffer;
import static org.jocl.CL.clGetDeviceIDs;
import static org.jocl.CL.clGetPlatformIDs;
import static org.jocl.CL.clReleaseCommandQueue;
import static org.jocl.CL.clReleaseContext;
import static org.jocl.CL.clReleaseKernel;
import static org.jocl.CL.clReleaseMemObject;
import static org.jocl.CL.clReleaseProgram;
import static org.jocl.CL.clSetKernelArg;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.jocl.CL;
import org.jocl.Pointer;
import org.jocl.Sizeof;
import org.jocl.cl_command_queue;
import org.jocl.cl_context;
import org.jocl.cl_context_properties;
import org.jocl.cl_device_id;
import org.jocl.cl_kernel;
import org.jocl.cl_mem;
import org.jocl.cl_platform_id;
import org.jocl.cl_program;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{
private static String programSource =
“__kernel void “+
“sampleKernel(__global const float *a,”+
" __global const float *b,”+
" __global float *c)”+
“{”+
" int gid = get_global_id(0);"+
" c[gid] = a[gid] * b[gid];"+
“}”;
//hadoop supported data types
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
//context.write(arg0, arg1);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException
{
// Create input- and output data
int n = 10;
float srcArrayA[] = new float[n];
float srcArrayB[] = new float[n];
float dstArray[] = new float[n];
for (int i=0; i<n; i++)
{
srcArrayA** = i;
srcArrayB** = i;
}
Pointer srcA = Pointer.to(srcArrayA);
Pointer srcB = Pointer.to(srcArrayB);
Pointer dst = Pointer.to(dstArray);
// The platform, device type and device number
// that will be used
final int platformIndex = 0;
** final long deviceType = CL_DEVICE_TYPE_GPU;**
final int deviceIndex = 0;
// Enable exceptions and subsequently omit error checks in this sample
CL.setExceptionsEnabled(true);
// Obtain the number of platforms
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];
// Obtain a platform ID
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
cl_platform_id platform = platforms[platformIndex];
// Initialize the context properties
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
// Obtain the number of devices for the platform
int numDevicesArray[] = new int[1];
clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];
// Obtain a device ID
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
cl_device_id device = devices[deviceIndex];
// Create a context for the selected device
cl_context openCL_context = clCreateContext(
contextProperties, 1, new cl_device_id[]{device},
null, null, null);
// Create a command-queue for the selected device
cl_command_queue commandQueue =
clCreateCommandQueue(openCL_context, device, 0, null);
// Allocate the memory objects for the input- and output data
cl_mem memObjects[] = new cl_mem[3];
memObjects[0] = clCreateBuffer(openCL_context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
Sizeof.cl_float * n, srcA, null);
memObjects[1] = clCreateBuffer(openCL_context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
Sizeof.cl_float * n, srcB, null);
memObjects[2] = clCreateBuffer(openCL_context,
CL_MEM_READ_WRITE,
Sizeof.cl_float * n, null, null);
// Create the program from the source code
cl_program program = clCreateProgramWithSource(openCL_context,
1, new String[]{ programSource }, null, null);
// Build the program
clBuildProgram(program, 0, null, null, null, null);
// Create the kernel
cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
// Set the arguments for the kernel
clSetKernelArg(kernel, 0,
Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 1,
Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 2,
Sizeof.cl_mem, Pointer.to(memObjects[2]));
// Set the work-item dimensions
long global_work_size[] = new long[]{n};
long local_work_size[] = new long[]{1};
// Execute the kernel
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
global_work_size, local_work_size, 0, null, null);
// Read the output data
clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
n * Sizeof.cl_float, dst, 0, null, null);
// Release kernel, program, and memory objects
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(openCL_context);
// Verify the result
boolean passed = true;
final float epsilon = 1e-7f;
for (int i=0; i<n; i++)
{
float x = dstArray**;
float y = srcArrayA** * srcArrayB**;
boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x);
if (!epsilonEqual)
{
passed = false;
break;
}
}
//System.out.println("Test "+(passed?"PASSED":"FAILED"));
context.write(new Text("Passed"),new IntWritable(1));
if (n <= 10)
{
//System.out.println("Result: "+java.util.Arrays.toString(dstArray));
context.write(new Text(java.util.Arrays.toString(dstArray)),new IntWritable(2));
}
}
}
I have used identity reducer.