Added a 32b version of the SPIR test

Graphics / POCL - Pekka Jääskeläinen [tut.fi] - 5 February 2014 00:41 EST



###

diff --git a/configure.ac b/configure.ac
index a7a49d7..7ceb4db 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1261,7 +1261,8 @@ AC_CONFIG_FILES([Makefile
lib/poclu/Makefile
examples/Makefile
examples/example1/Makefile
- examples/example1-spir/Makefile
+ examples/example1-spir64/Makefile
+ examples/example1-spir32/Makefile
examples/example2/Makefile
examples/example2a/Makefile
examples/standalone/Makefile
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 375ffa5..d337661 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -22,6 +22,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

-SUBDIRS = example1 example1-spir example2 example2a trig scalarwave standalone \
- opencl-book-samples VexCL ViennaCL Rodinia Parboil AMD AMDSDK2.9 \
- EinsteinToolkit piglit
+SUBDIRS = example1 example1-spir32 example1-spir64 example2 example2a trig \
+ scalarwave standalone opencl-book-samples VexCL ViennaCL Rodinia Parboil \
+ AMD AMDSDK2.9 EinsteinToolkit piglit
diff --git a/examples/example1-spir/Makefile.am b/examples/example1-spir/Makefile.am
deleted file mode 100644
index 226c2eb..0000000
--- a/examples/example1-spir/Makefile.am
+++ /dev/null
@@ -1,31 +0,0 @@
-# Process this file with automake to produce Makefile.in (in this,
-# and all subdirectories).
-# Makefile.am for pocl/examples/example1.
-#
-# Copyright (c) 2011 Universidad Rey Juan Carlos
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-noinst_PROGRAMS = example1-spir
-
-example1_spir_SOURCES = example1.c example1_exec.c example1.cl example1.spir
-example1_spir_LDADD = @OPENCL_LIBS@ ../../lib/poclu/libpoclu.la
-example1_spir_CFLAGS = @OPENCL_CFLAGS@
-
-AM_CPPFLAGS = -I$(top_srcdir)/fix-include -I$(top_srcdir)/include -DSRCDIR='"$(abs_srcdir)"'
diff --git a/examples/example1-spir/example1.c b/examples/example1-spir/example1.c
deleted file mode 100644
index 7798027..0000000
--- a/examples/example1-spir/example1.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/* example1 - Simple example from OpenCL 1.0 specification, modified
-
-
- Copyright (c) 2011 Universidad Rey Juan Carlos
- 2014 Pekka Jääskeläinen
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-*/
-
-#include
-#include
-#include
-#include
-
-#define N 128
-
-extern int exec_dot_product_kernel
-(const char *program_source, size_t source_size,
- int n, void *srcA, void *srcB, void *dst);
-
-int
-main (void)
-{
- FILE *source_file;
- char *source;
- int source_size;
- cl_float4 *srcA, *srcB;
- cl_float *dst;
- int ierr;
- int i;
-
- source_file = fopen("example1.spir", "r");
- if (source_file == NULL)
- source_file = fopen (SRCDIR "/example1.spir", "r");
-
- assert(source_file != NULL && SRCDIR "example1.spir not found!!");
-
- fseek (source_file, 0, SEEK_END);
- source_size = ftell (source_file);
- fseek (source_file, 0, SEEK_SET);
-
- source = (char *) malloc (source_size +1 );
- assert (source != NULL);
-
- fread (source, source_size, 1, source_file);
-
- fclose (source_file);
-
- srcA = (cl_float4 *) malloc (N * sizeof (cl_float4));
- srcB = (cl_float4 *) malloc (N * sizeof (cl_float4));
- dst = (cl_float *) malloc (N * sizeof (cl_float));
-
- for (i = 0; i < N; ++i)
- {
- srcA[i].x = i;
- srcA[i].y = i;
- srcA[i].z = i;
- srcA[i].w = i;
- srcB[i].x = i;
- srcB[i].y = i;
- srcB[i].z = i;
- srcB[i].w = i;
- }
-
- ierr = exec_dot_product_kernel (source, source_size, N, srcA, srcB, dst);
- if (ierr) printf ("ERROR\n");
-
- for (i = 0; i < 4; ++i)
- {
- printf ("(%f, %f, %f, %f) . (%f, %f, %f, %f) = %f\n",
- srcA[i].x, srcA[i].y, srcA[i].z, srcA[i].w,
- srcB[i].x, srcB[i].y, srcB[i].z, srcB[i].w,
- dst[i]);
- if (srcA[i].x * srcB[i].x +
- srcA[i].y * srcB[i].y +
- srcA[i].z * srcB[i].z +
- srcA[i].w * srcB[i].w != dst[i])
- {
- printf ("FAIL\n");
- return -1;
- }
- }
-
- printf ("OK\n");
- return 0;
-}
diff --git a/examples/example1-spir/example1.cl b/examples/example1-spir/example1.cl
deleted file mode 100644
index a5d4498..0000000
--- a/examples/example1-spir/example1.cl
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-__kernel void
-dot_product (__global const float4 *a,
- __global const float4 *b, __global float *c)
-{
- int gid = get_global_id(0);
-
- /* This parallel region does not vectorize with the
- loop vectorizer because it accesses vector datatypes.
- Perhaps with SLP/BB vectorizer.*/
-
- float ax = a[gid].x;
- float ay = a[gid].y;
- float az = a[gid].z;
- float aw = a[gid].w;
-
- float bx = b[gid].x,
- by = b[gid].y,
- bz = b[gid].z,
- bw = b[gid].w;
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- /* This parallel region should vectorize. */
- c[gid] = ax * bx;
- c[gid] += ay * by;
- c[gid] += az * bz;
- c[gid] += aw * bw;
-}
diff --git a/examples/example1-spir/example1.spir b/examples/example1-spir/example1.spir
deleted file mode 100644
index 9362b7a..0000000
Binary files a/examples/example1-spir/example1.spir and /dev/null differ
diff --git a/examples/example1-spir/example1_exec.c b/examples/example1-spir/example1_exec.c
deleted file mode 100644
index 60a412b..0000000
--- a/examples/example1-spir/example1_exec.c
+++ /dev/null
@@ -1,186 +0,0 @@
-#include
-#include
-#include
-
-void
-delete_memobjs(cl_mem *memobjs, int n)
-{
- int i;
- for (i=0; i- clReleaseMemObject(memobjs[i]);
-}
-
-int
-exec_dot_product_kernel(const char *program_source, size_t source_size,
- int n, cl_float4 *srcA, cl_float4 *srcB, cl_float *dst)
-{
- cl_context context;
- cl_command_queue cmd_queue;
- cl_device_id *devices;
- cl_program program;
- cl_kernel kernel;
- cl_mem memobjs[3];
- size_t global_work_size[1];
- size_t local_work_size[1];
- size_t cb;
- cl_int err;
- int i;
- context = poclu_create_any_context();
- if (context == (cl_context)0)
- return -1;
-
- // get the list of GPU devices associated with context
- clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
- devices = (cl_device_id *) malloc(cb);
- clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
-
- // create a command-queue
- cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
- if (cmd_queue == (cl_command_queue)0)
- {
- clReleaseContext(context);
- free(devices);
- return -1;
- }
-
- for (i = 0; i < n; ++i)
- {
- poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
- poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
- }
-
-
- // allocate the buffer memory objects
- memobjs[0] = clCreateBuffer(context,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float4) * n, srcA, NULL);
- if (memobjs[0] == (cl_mem)0)
- {
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- memobjs[1] = clCreateBuffer(context,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float4) * n, srcB, NULL);
- if (memobjs[1] == (cl_mem)0)
- {
- delete_memobjs(memobjs, 1);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- memobjs[2] = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- sizeof(cl_float) * n, NULL, NULL);
- if (memobjs[2] == (cl_mem)0)
- {
- delete_memobjs(memobjs, 2);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- // create the program
- program =
- clCreateProgramWithBinary
- (context, 1, devices, &source_size,
- (const unsigned char**)&program_source, NULL, NULL);
- if (program == (cl_program)0)
- {
- delete_memobjs(memobjs, 3);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- // build the program
- err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- delete_memobjs(memobjs, 3);
- clReleaseProgram(program);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- // create the kernel
- kernel = clCreateKernel(program, "dot_product", NULL);
- if (kernel == (cl_kernel)0)
- {
- delete_memobjs(memobjs, 3);
- clReleaseProgram(program);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- // set the args values
- err = clSetKernelArg(kernel, 0,
- sizeof(cl_mem), (void *) &memobjs[0]);
- err |= clSetKernelArg(kernel, 1,
- sizeof(cl_mem), (void *) &memobjs[1]);
- err |= clSetKernelArg(kernel, 2,
- sizeof(cl_mem), (void *) &memobjs[2]);
-
- if (err != CL_SUCCESS)
- {
- delete_memobjs(memobjs, 3);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- // set work-item dimensions
- global_work_size[0] = n;
- local_work_size[0]= 128;
-
- // execute kernel
- err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL,
- global_work_size, local_work_size,
- 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- delete_memobjs(memobjs, 3);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
-
- // read output image
- err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE,
- 0, n * sizeof(cl_float), dst,
- 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- delete_memobjs(memobjs, 3);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return -1;
- }
- for (i = 0; i < n; ++i)
- {
- poclu_bswap_cl_float_array(devices[0], (cl_float*)&dst[i], 1);
- poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
- poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
- }
- free(devices);
-
-
- // release kernel, program, and memory objects
- delete_memobjs(memobjs, 3);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseCommandQueue(cmd_queue);
- clReleaseContext(context);
- return 0; // success...
-}
diff --git a/examples/example1-spir/generate_spir.sh b/examples/example1-spir/generate_spir.sh
deleted file mode 100755
index 28da381..0000000
--- a/examples/example1-spir/generate_spir.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-# NOTE:
-# 1) Install the official SPIR generator version of Clang/LLVM:
-# https://github.com/KhronosGroup/SPIR
-#
-# 2) Download opencl_spir.h from
-# https://raw.github.com/KhronosGroup/SPIR-Tools/master/headers/opencl_spir.h
-# and add "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" in the beginning of
-# it to make it compile.
-clang -cc1 -emit-llvm-bc -triple spir64-unknown-unknown -include opencl_spir.h -o example1.spir example1.cl
diff --git a/examples/example1-spir32/Makefile.am b/examples/example1-spir32/Makefile.am
new file mode 100644
index 0000000..7ccca59
--- /dev/null
+++ b/examples/example1-spir32/Makefile.am
@@ -0,0 +1,31 @@
+# Process this file with automake to produce Makefile.in (in this,
+# and all subdirectories).
+# Makefile.am for pocl/examples/example1.
+#
+# Copyright (c) 2011 Universidad Rey Juan Carlos
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+noinst_PROGRAMS = example1-spir32
+
+example1_spir32_SOURCES = example1.c example1_exec.c example1.cl example1.spir
+example1_spir32_LDADD = @OPENCL_LIBS@ ../../lib/poclu/libpoclu.la
+example1_spir32_CFLAGS = @OPENCL_CFLAGS@
+
+AM_CPPFLAGS = -I$(top_srcdir)/fix-include -I$(top_srcdir)/include -DSRCDIR='"$(abs_srcdir)"'
diff --git a/examples/example1-spir32/example1.c b/examples/example1-spir32/example1.c
new file mode 100644
index 0000000..7798027
--- /dev/null
+++ b/examples/example1-spir32/example1.c
@@ -0,0 +1,102 @@
+/* example1 - Simple example from OpenCL 1.0 specification, modified
+
+
+ Copyright (c) 2011 Universidad Rey Juan Carlos
+ 2014 Pekka Jääskeläinen
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include
+#include
+#include
+#include
+
+#define N 128
+
+extern int exec_dot_product_kernel
+(const char *program_source, size_t source_size,
+ int n, void *srcA, void *srcB, void *dst);
+
+int
+main (void)
+{
+ FILE *source_file;
+ char *source;
+ int source_size;
+ cl_float4 *srcA, *srcB;
+ cl_float *dst;
+ int ierr;
+ int i;
+
+ source_file = fopen("example1.spir", "r");
+ if (source_file == NULL)
+ source_file = fopen (SRCDIR "/example1.spir", "r");
+
+ assert(source_file != NULL && SRCDIR "example1.spir not found!!");
+
+ fseek (source_file, 0, SEEK_END);
+ source_size = ftell (source_file);
+ fseek (source_file, 0, SEEK_SET);
+
+ source = (char *) malloc (source_size +1 );
+ assert (source != NULL);
+
+ fread (source, source_size, 1, source_file);
+
+ fclose (source_file);
+
+ srcA = (cl_float4 *) malloc (N * sizeof (cl_float4));
+ srcB = (cl_float4 *) malloc (N * sizeof (cl_float4));
+ dst = (cl_float *) malloc (N * sizeof (cl_float));
+
+ for (i = 0; i < N; ++i)
+ {
+ srcA[i].x = i;
+ srcA[i].y = i;
+ srcA[i].z = i;
+ srcA[i].w = i;
+ srcB[i].x = i;
+ srcB[i].y = i;
+ srcB[i].z = i;
+ srcB[i].w = i;
+ }
+
+ ierr = exec_dot_product_kernel (source, source_size, N, srcA, srcB, dst);
+ if (ierr) printf ("ERROR\n");
+
+ for (i = 0; i < 4; ++i)
+ {
+ printf ("(%f, %f, %f, %f) . (%f, %f, %f, %f) = %f\n",
+ srcA[i].x, srcA[i].y, srcA[i].z, srcA[i].w,
+ srcB[i].x, srcB[i].y, srcB[i].z, srcB[i].w,
+ dst[i]);
+ if (srcA[i].x * srcB[i].x +
+ srcA[i].y * srcB[i].y +
+ srcA[i].z * srcB[i].z +
+ srcA[i].w * srcB[i].w != dst[i])
+ {
+ printf ("FAIL\n");
+ return -1;
+ }
+ }
+
+ printf ("OK\n");
+ return 0;
+}
diff --git a/examples/example1-spir32/example1.cl b/examples/example1-spir32/example1.cl
new file mode 100644
index 0000000..a5d4498
--- /dev/null
+++ b/examples/example1-spir32/example1.cl
@@ -0,0 +1,30 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void
+dot_product (__global const float4 *a,
+ __global const float4 *b, __global float *c)
+{
+ int gid = get_global_id(0);
+
+ /* This parallel region does not vectorize with the
+ loop vectorizer because it accesses vector datatypes.
+ Perhaps with SLP/BB vectorizer.*/
+
+ float ax = a[gid].x;
+ float ay = a[gid].y;
+ float az = a[gid].z;
+ float aw = a[gid].w;
+
+ float bx = b[gid].x,
+ by = b[gid].y,
+ bz = b[gid].z,
+ bw = b[gid].w;
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ /* This parallel region should vectorize. */
+ c[gid] = ax * bx;
+ c[gid] += ay * by;
+ c[gid] += az * bz;
+ c[gid] += aw * bw;
+}
diff --git a/examples/example1-spir32/example1_exec.c b/examples/example1-spir32/example1_exec.c
new file mode 100644
index 0000000..60a412b
--- /dev/null
+++ b/examples/example1-spir32/example1_exec.c
@@ -0,0 +1,186 @@
+#include
+#include
+#include
+
+void
+delete_memobjs(cl_mem *memobjs, int n)
+{
+ int i;
+ for (i=0; i+ clReleaseMemObject(memobjs[i]);
+}
+
+int
+exec_dot_product_kernel(const char *program_source, size_t source_size,
+ int n, cl_float4 *srcA, cl_float4 *srcB, cl_float *dst)
+{
+ cl_context context;
+ cl_command_queue cmd_queue;
+ cl_device_id *devices;
+ cl_program program;
+ cl_kernel kernel;
+ cl_mem memobjs[3];
+ size_t global_work_size[1];
+ size_t local_work_size[1];
+ size_t cb;
+ cl_int err;
+ int i;
+ context = poclu_create_any_context();
+ if (context == (cl_context)0)
+ return -1;
+
+ // get the list of GPU devices associated with context
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
+ devices = (cl_device_id *) malloc(cb);
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
+
+ // create a command-queue
+ cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
+ if (cmd_queue == (cl_command_queue)0)
+ {
+ clReleaseContext(context);
+ free(devices);
+ return -1;
+ }
+
+ for (i = 0; i < n; ++i)
+ {
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
+ }
+
+
+ // allocate the buffer memory objects
+ memobjs[0] = clCreateBuffer(context,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float4) * n, srcA, NULL);
+ if (memobjs[0] == (cl_mem)0)
+ {
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ memobjs[1] = clCreateBuffer(context,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float4) * n, srcB, NULL);
+ if (memobjs[1] == (cl_mem)0)
+ {
+ delete_memobjs(memobjs, 1);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ memobjs[2] = clCreateBuffer(context,
+ CL_MEM_READ_WRITE,
+ sizeof(cl_float) * n, NULL, NULL);
+ if (memobjs[2] == (cl_mem)0)
+ {
+ delete_memobjs(memobjs, 2);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // create the program
+ program =
+ clCreateProgramWithBinary
+ (context, 1, devices, &source_size,
+ (const unsigned char**)&program_source, NULL, NULL);
+ if (program == (cl_program)0)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // build the program
+ err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // create the kernel
+ kernel = clCreateKernel(program, "dot_product", NULL);
+ if (kernel == (cl_kernel)0)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // set the args values
+ err = clSetKernelArg(kernel, 0,
+ sizeof(cl_mem), (void *) &memobjs[0]);
+ err |= clSetKernelArg(kernel, 1,
+ sizeof(cl_mem), (void *) &memobjs[1]);
+ err |= clSetKernelArg(kernel, 2,
+ sizeof(cl_mem), (void *) &memobjs[2]);
+
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // set work-item dimensions
+ global_work_size[0] = n;
+ local_work_size[0]= 128;
+
+ // execute kernel
+ err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL,
+ global_work_size, local_work_size,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // read output image
+ err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE,
+ 0, n * sizeof(cl_float), dst,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+ for (i = 0; i < n; ++i)
+ {
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&dst[i], 1);
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
+ }
+ free(devices);
+
+
+ // release kernel, program, and memory objects
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return 0; // success...
+}
diff --git a/examples/example1-spir32/generate_spir32.sh b/examples/example1-spir32/generate_spir32.sh
new file mode 100755
index 0000000..1c7dd20
--- /dev/null
+++ b/examples/example1-spir32/generate_spir32.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# NOTE:
+# 1) Install the official SPIR generator version of Clang/LLVM:
+# https://github.com/KhronosGroup/SPIR
+#
+# 2) Download opencl_spir.h from
+# https://raw.github.com/KhronosGroup/SPIR-Tools/master/headers/opencl_spir.h
+# and add "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" in the beginning of
+# it to make it compile.
+clang -cc1 -emit-llvm-bc -triple spir-unknown-unknown -include opencl_spir.h -o example1.spir example1.cl
diff --git a/examples/example1-spir64/Makefile.am b/examples/example1-spir64/Makefile.am
new file mode 100644
index 0000000..226c2eb
--- /dev/null
+++ b/examples/example1-spir64/Makefile.am
@@ -0,0 +1,31 @@
+# Process this file with automake to produce Makefile.in (in this,
+# and all subdirectories).
+# Makefile.am for pocl/examples/example1.
+#
+# Copyright (c) 2011 Universidad Rey Juan Carlos
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+noinst_PROGRAMS = example1-spir
+
+example1_spir_SOURCES = example1.c example1_exec.c example1.cl example1.spir
+example1_spir_LDADD = @OPENCL_LIBS@ ../../lib/poclu/libpoclu.la
+example1_spir_CFLAGS = @OPENCL_CFLAGS@
+
+AM_CPPFLAGS = -I$(top_srcdir)/fix-include -I$(top_srcdir)/include -DSRCDIR='"$(abs_srcdir)"'
diff --git a/examples/example1-spir64/example1.c b/examples/example1-spir64/example1.c
new file mode 100644
index 0000000..7798027
--- /dev/null
+++ b/examples/example1-spir64/example1.c
@@ -0,0 +1,102 @@
+/* example1 - Simple example from OpenCL 1.0 specification, modified
+
+
+ Copyright (c) 2011 Universidad Rey Juan Carlos
+ 2014 Pekka Jääskeläinen
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include
+#include
+#include
+#include
+
+#define N 128
+
+extern int exec_dot_product_kernel
+(const char *program_source, size_t source_size,
+ int n, void *srcA, void *srcB, void *dst);
+
+int
+main (void)
+{
+ FILE *source_file;
+ char *source;
+ int source_size;
+ cl_float4 *srcA, *srcB;
+ cl_float *dst;
+ int ierr;
+ int i;
+
+ source_file = fopen("example1.spir", "r");
+ if (source_file == NULL)
+ source_file = fopen (SRCDIR "/example1.spir", "r");
+
+ assert(source_file != NULL && SRCDIR "example1.spir not found!!");
+
+ fseek (source_file, 0, SEEK_END);
+ source_size = ftell (source_file);
+ fseek (source_file, 0, SEEK_SET);
+
+ source = (char *) malloc (source_size +1 );
+ assert (source != NULL);
+
+ fread (source, source_size, 1, source_file);
+
+ fclose (source_file);
+
+ srcA = (cl_float4 *) malloc (N * sizeof (cl_float4));
+ srcB = (cl_float4 *) malloc (N * sizeof (cl_float4));
+ dst = (cl_float *) malloc (N * sizeof (cl_float));
+
+ for (i = 0; i < N; ++i)
+ {
+ srcA[i].x = i;
+ srcA[i].y = i;
+ srcA[i].z = i;
+ srcA[i].w = i;
+ srcB[i].x = i;
+ srcB[i].y = i;
+ srcB[i].z = i;
+ srcB[i].w = i;
+ }
+
+ ierr = exec_dot_product_kernel (source, source_size, N, srcA, srcB, dst);
+ if (ierr) printf ("ERROR\n");
+
+ for (i = 0; i < 4; ++i)
+ {
+ printf ("(%f, %f, %f, %f) . (%f, %f, %f, %f) = %f\n",
+ srcA[i].x, srcA[i].y, srcA[i].z, srcA[i].w,
+ srcB[i].x, srcB[i].y, srcB[i].z, srcB[i].w,
+ dst[i]);
+ if (srcA[i].x * srcB[i].x +
+ srcA[i].y * srcB[i].y +
+ srcA[i].z * srcB[i].z +
+ srcA[i].w * srcB[i].w != dst[i])
+ {
+ printf ("FAIL\n");
+ return -1;
+ }
+ }
+
+ printf ("OK\n");
+ return 0;
+}
diff --git a/examples/example1-spir64/example1.cl b/examples/example1-spir64/example1.cl
new file mode 100644
index 0000000..a5d4498
--- /dev/null
+++ b/examples/example1-spir64/example1.cl
@@ -0,0 +1,30 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void
+dot_product (__global const float4 *a,
+ __global const float4 *b, __global float *c)
+{
+ int gid = get_global_id(0);
+
+ /* This parallel region does not vectorize with the
+ loop vectorizer because it accesses vector datatypes.
+ Perhaps with SLP/BB vectorizer.*/
+
+ float ax = a[gid].x;
+ float ay = a[gid].y;
+ float az = a[gid].z;
+ float aw = a[gid].w;
+
+ float bx = b[gid].x,
+ by = b[gid].y,
+ bz = b[gid].z,
+ bw = b[gid].w;
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ /* This parallel region should vectorize. */
+ c[gid] = ax * bx;
+ c[gid] += ay * by;
+ c[gid] += az * bz;
+ c[gid] += aw * bw;
+}
diff --git a/examples/example1-spir64/example1.spir b/examples/example1-spir64/example1.spir
new file mode 100644
index 0000000..9362b7a
Binary files /dev/null and b/examples/example1-spir64/example1.spir differ
diff --git a/examples/example1-spir64/example1_exec.c b/examples/example1-spir64/example1_exec.c
new file mode 100644
index 0000000..60a412b
--- /dev/null
+++ b/examples/example1-spir64/example1_exec.c
@@ -0,0 +1,186 @@
+#include
+#include
+#include
+
+void
+delete_memobjs(cl_mem *memobjs, int n)
+{
+ int i;
+ for (i=0; i+ clReleaseMemObject(memobjs[i]);
+}
+
+int
+exec_dot_product_kernel(const char *program_source, size_t source_size,
+ int n, cl_float4 *srcA, cl_float4 *srcB, cl_float *dst)
+{
+ cl_context context;
+ cl_command_queue cmd_queue;
+ cl_device_id *devices;
+ cl_program program;
+ cl_kernel kernel;
+ cl_mem memobjs[3];
+ size_t global_work_size[1];
+ size_t local_work_size[1];
+ size_t cb;
+ cl_int err;
+ int i;
+ context = poclu_create_any_context();
+ if (context == (cl_context)0)
+ return -1;
+
+ // get the list of GPU devices associated with context
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
+ devices = (cl_device_id *) malloc(cb);
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
+
+ // create a command-queue
+ cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
+ if (cmd_queue == (cl_command_queue)0)
+ {
+ clReleaseContext(context);
+ free(devices);
+ return -1;
+ }
+
+ for (i = 0; i < n; ++i)
+ {
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
+ }
+
+
+ // allocate the buffer memory objects
+ memobjs[0] = clCreateBuffer(context,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float4) * n, srcA, NULL);
+ if (memobjs[0] == (cl_mem)0)
+ {
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ memobjs[1] = clCreateBuffer(context,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float4) * n, srcB, NULL);
+ if (memobjs[1] == (cl_mem)0)
+ {
+ delete_memobjs(memobjs, 1);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ memobjs[2] = clCreateBuffer(context,
+ CL_MEM_READ_WRITE,
+ sizeof(cl_float) * n, NULL, NULL);
+ if (memobjs[2] == (cl_mem)0)
+ {
+ delete_memobjs(memobjs, 2);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // create the program
+ program =
+ clCreateProgramWithBinary
+ (context, 1, devices, &source_size,
+ (const unsigned char**)&program_source, NULL, NULL);
+ if (program == (cl_program)0)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // build the program
+ err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // create the kernel
+ kernel = clCreateKernel(program, "dot_product", NULL);
+ if (kernel == (cl_kernel)0)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // set the args values
+ err = clSetKernelArg(kernel, 0,
+ sizeof(cl_mem), (void *) &memobjs[0]);
+ err |= clSetKernelArg(kernel, 1,
+ sizeof(cl_mem), (void *) &memobjs[1]);
+ err |= clSetKernelArg(kernel, 2,
+ sizeof(cl_mem), (void *) &memobjs[2]);
+
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // set work-item dimensions
+ global_work_size[0] = n;
+ local_work_size[0]= 128;
+
+ // execute kernel
+ err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL,
+ global_work_size, local_work_size,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+
+ // read output image
+ err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE,
+ 0, n * sizeof(cl_float), dst,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return -1;
+ }
+ for (i = 0; i < n; ++i)
+ {
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&dst[i], 1);
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
+ poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
+ }
+ free(devices);
+
+
+ // release kernel, program, and memory objects
+ delete_memobjs(memobjs, 3);
+ clReleaseKernel(kernel);
+ clReleaseProgram(program);
+ clReleaseCommandQueue(cmd_queue);
+ clReleaseContext(context);
+ return 0; // success...
+}
diff --git a/examples/example1-spir64/generate_spir.sh b/examples/example1-spir64/generate_spir.sh
new file mode 100755
index 0000000..28da381
--- /dev/null
+++ b/examples/example1-spir64/generate_spir.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# NOTE:
+# 1) Install the official SPIR generator version of Clang/LLVM:
+# https://github.com/KhronosGroup/SPIR
+#
+# 2) Download opencl_spir.h from
+# https://raw.github.com/KhronosGroup/SPIR-Tools/master/headers/opencl_spir.h
+# and add "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" in the beginning of
+# it to make it compile.
+clang -cc1 -emit-llvm-bc -triple spir64-unknown-unknown -include opencl_spir.h -o example1.spir example1.cl
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 80db635..4c5bbf9 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -42,7 +42,22 @@ AT_CLEANUP
AT_SETUP([example1: dot product (SPIR64)])
# This SPIR example works because it does not use local memory nor
# call builtins that are mangled with address spaces.
-AT_SKIP_IF([grep HOST_CPU $abs_top_builddir/config.h | cut -d\" -f2 | grep -q powerpc])
+AT_SKIP_IF([grep HOST_CPU $abs_top_builddir/config.h | cut -d\" -f2 | egrep -q 'powerpc|armv7'])
+AT_KEYWORDS([spir])
+AT_DATA([expout],
+[(0.000000, 0.000000, 0.000000, 0.000000) . (0.000000, 0.000000, 0.000000, 0.000000) = 0.000000
+(1.000000, 1.000000, 1.000000, 1.000000) . (1.000000, 1.000000, 1.000000, 1.000000) = 4.000000
+(2.000000, 2.000000, 2.000000, 2.000000) . (2.000000, 2.000000, 2.000000, 2.000000) = 16.000000
+(3.000000, 3.000000, 3.000000, 3.000000) . (3.000000, 3.000000, 3.000000, 3.000000) = 36.000000
+OK
+])
+AT_CHECK([$abs_top_builddir/examples/example1-spir64/example1-spir], 0, expout, ignore)
+AT_CLEANUP
+
+AT_SETUP([example1: dot product (SPIR32)])
+# This SPIR example works because it does not use local memory nor
+# call builtins that are mangled with address spaces.
+AT_SKIP_IF([! grep HOST_CPU $abs_top_builddir/config.h | cut -d\" -f2 | egrep -q 'powerpc|armv7'])
AT_KEYWORDS([spir])
AT_DATA([expout],
[(0.000000, 0.000000, 0.000000, 0.000000) . (0.000000, 0.000000, 0.000000, 0.000000) = 0.000000
@@ -51,7 +66,7 @@ AT_DATA([expout],
(3.000000, 3.000000, 3.000000, 3.000000) . (3.000000, 3.000000, 3.000000, 3.000000) = 36.000000
OK
])
-AT_CHECK([$abs_top_builddir/examples/example1-spir/example1-spir], 0, expout, ignore)
+AT_CHECK([$abs_top_builddir/examples/example1-spir32/example1-spir], 0, expout, ignore)
AT_CLEANUP

AT_SETUP([example2: matrix transpose])

1ec3d7c Added a 32b version of the SPIR test.
configure.ac | 3 +-
examples/Makefile.am | 6 +-
examples/example1-spir/Makefile.am | 31 -----
examples/example1-spir/example1.c | 102 ---------------
examples/example1-spir/example1.cl | 30 -----
examples/example1-spir/example1.spir | Bin 1604 -> 0 bytes
examples/example1-spir/example1_exec.c | 186 ---------------------------
examples/example1-spir/generate_spir.sh | 10 --
examples/example1-spir32/Makefile.am | 31 +++++
examples/example1-spir32/example1.c | 102 +++++++++++++++
examples/example1-spir32/example1.cl | 30 +++++
examples/example1-spir32/example1_exec.c | 186 +++++++++++++++++++++++++++
examples/example1-spir32/generate_spir32.sh | 10 ++
examples/example1-spir64/Makefile.am | 31 +++++
examples/example1-spir64/example1.c | 102 +++++++++++++++
examples/example1-spir64/example1.cl | 30 +++++
examples/example1-spir64/example1.spir | Bin 0 -> 1604 bytes
examples/example1-spir64/example1_exec.c | 186 +++++++++++++++++++++++++++
examples/example1-spir64/generate_spir.sh | 10 ++
tests/testsuite.at | 19 ++-
20 files changed, 740 insertions(+), 365 deletions(-)

Upstream: github.com


  • Share