diff --git a/OpenWindow/OpenWindow.vcxproj b/OpenWindow/OpenWindow.vcxproj
index 92d4ff0..d6e46ec 100644
--- a/OpenWindow/OpenWindow.vcxproj
+++ b/OpenWindow/OpenWindow.vcxproj
@@ -78,9 +78,11 @@
true
$(ProjectDir)\include
4996;
+ _MBCS;%(PreprocessorDefinitions)
$(ProjectDir)\lib;%(AdditionalLibraryDirectories)
+ NotSet
@@ -91,9 +93,11 @@
true
$(ProjectDir)\include
4996;
+ _MBCS;%(PreprocessorDefinitions)
$(ProjectDir)\lib;%(AdditionalLibraryDirectories)
+ NotSet
@@ -106,11 +110,13 @@
true
$(ProjectDir)\include
4996;
+ _MBCS;%(PreprocessorDefinitions)
true
true
$(ProjectDir)\lib;%(AdditionalLibraryDirectories)
+ NotSet
@@ -123,11 +129,13 @@
true
$(ProjectDir)\include
4996;
+ _MBCS;%(PreprocessorDefinitions)
true
true
$(ProjectDir)\lib;%(AdditionalLibraryDirectories)
+ NotSet
diff --git a/OpenWindow/kernel_sources.cpp b/OpenWindow/kernel_sources.cpp
index dcda827..f1e480a 100644
--- a/OpenWindow/kernel_sources.cpp
+++ b/OpenWindow/kernel_sources.cpp
@@ -1,46 +1,16 @@
#include "kernels.h"
-const char* matrix_mul_kernel_source =
-"__kernel void mmul ( \n"
-" __global float* A, \n"
-" __global float* B, \n"
-" __global float* C) \n"
-"{ \n"
-" int k; \n"
-" int i = get_global_id(0); \n"
-" int j = get_global_id(1); \n"
-" float tmp; \n"
-" if( (i < 4) && (j < 4) ) \n"
-" { \n"
-" tmp = 0.0; \n"
-" for(k = 0; k < 4; k++) \n"
-" tmp += A[i*4+k] * B[k*4+j]; \n"
-" C[i*4+j] = tmp; \n"
-" } \n"
-"} \n" ;
-
const char* vertex_shader_kernel_source =
-"__kernel \n"
-"void vertex_shader( __global float* z_matrix, \n"
-" __global float* VertexBuffer, \n"
-" __global float* NewVertexBuffer) \n"
-"{ \n"
-" int index = 3 * get_global_id(0); \n"
-" float new_x = 0; \n"
-" float new_y = 0; \n"
-" float new_z = 0; \n"
-" float new_w = 0; \n"
-" for(int i = 0; i < 3; i++) { \n"
-" new_x = new_x + VertexBuffer[index+i]*z_matrix[0*4 + i]; \n"
-" new_y = new_y + VertexBuffer[index+i]*z_matrix[1*4 + i]; \n"
-" new_z = new_z + VertexBuffer[index+i]*z_matrix[2*4 + i]; \n"
-" new_w = new_w + VertexBuffer[index+i]*z_matrix[3*4 + i]; \n"
-" } \n"
-" new_x = new_x + z_matrix[0*4 +3]; \n"
-" new_y = new_y + z_matrix[1*4 +3]; \n"
-" new_z = new_z + z_matrix[2*4 +3]; \n"
-" new_w = new_w + z_matrix[3*4 +3]; \n"
-" NewVertexBuffer[index+0] = new_x / new_w; \n"
-" NewVertexBuffer[index+1] = new_y / new_w; \n"
-" NewVertexBuffer[index+2] = new_z / new_w; \n"
-"} \n";
+"__kernel \n"
+"void vertex_shader( __global float* m, \n"
+" __global float* VertexBuffer, \n"
+" __global float* NewVertexBuffer) \n"
+"{ \n"
+" int local_index = get_local_id(0); \n"
+" int global_index = get_group_id(0); \n"
+" NewVertexBuffer[4*global_index+local_index] = \n"
+" m[local_index*4]*VertexBuffer[3*global_index] \n"
+" + m[local_index*4 + 1]*VertexBuffer[3*global_index+1] \n"
+" + m[local_index*4 + 2]*VertexBuffer[3*global_index+2] \n"
+" + m[local_index*4 + 3]; \n"
+"} \n";
diff --git a/OpenWindow/kernels.cpp b/OpenWindow/kernels.cpp
index 1e4b9f8..c2e3f48 100644
--- a/OpenWindow/kernels.cpp
+++ b/OpenWindow/kernels.cpp
@@ -11,12 +11,7 @@ cl_context context;
// Matrix Multiplication Variables
-#define ORDER 4
#define MATRIX_SIZE 16
-int Mdim, Pdim, Ndim;
-cl_mem mat_a;
-cl_mem mat_b;
-cl_mem mat_c;
// vertex shader
cl_mem mat_z;
@@ -27,15 +22,10 @@ cl_mem new_vertices_mem;
cl_platform_id* platforms = NULL;
cl_device_id* devices = NULL;
-cl_program mat_mul_prog;
cl_program vertex_shader_prog;
-cl_kernel mat_mul_kernel;
cl_kernel vertex_shader_kernel;
cl_command_queue commands;
-size_t mat_mul_global[DIM];
-size_t mat_mul_local[DIM];
-
int err;
@@ -53,78 +43,43 @@ void init_kernels() {
commands = clCreateCommandQueue(context, devices[0], 0, &err);
- mat_a = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE, NULL, &err);
- mat_b = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE, NULL, &err);
- mat_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * MATRIX_SIZE, NULL, &err);
-
-
- mat_mul_prog = clCreateProgramWithSource(context, 1, (const char **)&matrix_mul_kernel_source, NULL, &err);
vertex_shader_prog = clCreateProgramWithSource(context, 1, (const char **)&vertex_shader_kernel_source, NULL, &err);
- err = clBuildProgram(mat_mul_prog, 1, devices, NULL, NULL, NULL);
err = clBuildProgram(vertex_shader_prog, 1, devices, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
- OutputDebugString("Error: Failed to build program executable!\n");
- clGetProgramBuildInfo(mat_mul_prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
- OutputDebugString(buffer);
+ printf("Error: Failed to build program executable!\n");
+ //clGetProgramBuildInfo(mat_mul_prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+ printf(buffer);
return;
}
- mat_mul_kernel = clCreateKernel(mat_mul_prog, "mmul", &err);
vertex_shader_kernel = clCreateKernel(vertex_shader_prog, "vertex_shader", &err);
err = 0;
- err |= clSetKernelArg(mat_mul_kernel, 0, sizeof(cl_mem), &mat_a);
- err |= clSetKernelArg(mat_mul_kernel, 1, sizeof(cl_mem), &mat_b);
- err |= clSetKernelArg(mat_mul_kernel, 2, sizeof(cl_mem), &mat_c);
-
}
-void mat4_mul(float* A, float* B, float* C)
-{
- err = clEnqueueWriteBuffer(commands, mat_a, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, A, 0, NULL, NULL);
- err = clEnqueueWriteBuffer(commands, mat_b, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, B, 0, NULL, NULL);
-
- mat_mul_global[0] = ORDER;
- mat_mul_global[1] = ORDER;
-
- err = clEnqueueNDRangeKernel(commands, mat_mul_kernel, 2, NULL, mat_mul_global, NULL, 0, NULL, NULL);
-
- clFlush(commands);
- clFinish(commands);
-
- err = clEnqueueReadBuffer(commands, mat_c, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, C, 0, NULL, NULL);
-}
-
-void vertex_shader(Matrix* ViewPort, Matrix* Projection, Matrix* ModelView, Matrix* ModelTransform, float* vertices, int vertex_count, float* new_vertices) {
-
- mat_z = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * MATRIX_SIZE, NULL, &err);
- vertices_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * vertex_count * 3, NULL, &err);
- new_vertices_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * vertex_count * 3, NULL, &err);
+void vertex_shader(float* z, float* vertices, int vertex_count, float* new_vertices) {
+ mat_z = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE , NULL, &err);
+ vertices_mem = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * vertex_count * 3, NULL, &err);
+ new_vertices_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * vertex_count * 4, NULL, &err);
err = clSetKernelArg(vertex_shader_kernel, 0, sizeof(cl_mem), &mat_z);
err = clSetKernelArg(vertex_shader_kernel, 1, sizeof(cl_mem), &vertices_mem);
err = clSetKernelArg(vertex_shader_kernel, 2, sizeof(cl_mem), &new_vertices_mem);
-
- Matrix intermediate_z;
- { // Calculate the Z to be multiplied by each vertex
- mat4_mul((float*)ViewPort , (float*)Projection , (float*) &intermediate_z);
- mat4_mul((float*)&intermediate_z, (float*)ModelView , (float*) &intermediate_z);
- mat4_mul((float*)&intermediate_z, (float*)ModelTransform, (float*) &intermediate_z);
- }
-
- err = clEnqueueWriteBuffer(commands, mat_z, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, (float*)&intermediate_z, 0, NULL, NULL);
+ err = clEnqueueWriteBuffer(commands, mat_z , CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, z, 0, NULL, NULL);
err = clEnqueueWriteBuffer(commands, vertices_mem, CL_TRUE, 0, sizeof(float) * vertex_count * 3, vertices, 0, NULL, NULL);
- size_t vertex_shader_global[] = { vertex_count };
- size_t vertex_shader_local[] = { 16 };
+ size_t vertex_shader_global[] = { vertex_count*4 };
+ size_t vertex_shader_local[] = { 4 };
- err = clEnqueueNDRangeKernel(commands, vertex_shader_kernel, 1, NULL, vertex_shader_global, NULL, 0, NULL, NULL);
+ err = clEnqueueNDRangeKernel(commands, vertex_shader_kernel, 1, NULL, vertex_shader_global, vertex_shader_local, 0, NULL, NULL);
- err = clEnqueueReadBuffer(commands, new_vertices_mem, CL_TRUE, 0, sizeof(float) * vertex_count * 3, new_vertices, 0, NULL, NULL);
+ clFlush(commands);
+ clFinish(commands);
+ err = clEnqueueReadBuffer(commands, new_vertices_mem, CL_TRUE, 0, sizeof(float) * vertex_count * 4, new_vertices, 0, NULL, NULL);
clReleaseMemObject(mat_z);
clReleaseMemObject(vertices_mem);
@@ -133,13 +88,8 @@ void vertex_shader(Matrix* ViewPort, Matrix* Projection, Matrix* ModelView, Matr
void destroy_kernels()
{
- clReleaseProgram(mat_mul_prog);
clReleaseProgram(vertex_shader_prog);
- clReleaseKernel(mat_mul_kernel);
clReleaseKernel(vertex_shader_kernel);
- clReleaseMemObject(mat_a);
- clReleaseMemObject(mat_b);
- clReleaseMemObject(mat_c);
// clReleaseMemObject(mat_z);
// clReleaseMemObject(vertices_mem);
// clReleaseMemObject(new_vertices_mem);
diff --git a/OpenWindow/kernels.h b/OpenWindow/kernels.h
index f354680..da5d50d 100644
--- a/OpenWindow/kernels.h
+++ b/OpenWindow/kernels.h
@@ -39,7 +39,7 @@ extern const char* vertex_shader_kernel_source;
void init_kernels();
void mat4_mul(float* A, float* B, float* C);
-void vertex_shader(Matrix* ViewPort, Matrix* Projection, Matrix* ModelView, Matrix* ModelTransform, float* vertices, int vertex_count, float* new_vertices);
+void vertex_shader(float* z, float* vertices, int vertex_count, float* new_vertices);
void destroy_kernels();
diff --git a/OpenWindow/main.cpp b/OpenWindow/main.cpp
index 66261ca..8f79185 100644
--- a/OpenWindow/main.cpp
+++ b/OpenWindow/main.cpp
@@ -21,6 +21,12 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine
hwnd = create_window(hInstance);
ShowCursor(false);
+
+ AllocConsole();
+ freopen("CONIN$", "r",stdin);
+ freopen("CONOUT$", "w",stdout);
+ freopen("CONOUT$", "w",stderr);
+
ShowWindow(hwnd, nCmdShow);
init_camera();
diff --git a/OpenWindow/renderer.cpp b/OpenWindow/renderer.cpp
index 6f4fa72..c59352b 100644
--- a/OpenWindow/renderer.cpp
+++ b/OpenWindow/renderer.cpp
@@ -5,6 +5,7 @@
#include "util_renderer.h"
#include "CL/cl.h"
#include "kernels.h"
+#include
#pragma comment (lib, "x86_64/opencl.lib")
@@ -34,6 +35,8 @@ Camera camera;
Vec3f light_dir = Vec3f(1, 1, 1).normalize();
+float* new_verts = (float*)malloc(4 * sizeof(float) * model->nverts());
+
void init_camera() {
camera.SetPosition(DEFAULT_CAMERA_POS);
camera.SetRotation(DEFAULT_CAMERA_ROT);
@@ -98,28 +101,20 @@ void render()
}
{
- model->rotate(Vec3f(0, 0, 90));
- model->ApplyTransform();
+ //model->rotate(Vec3f(0, 0, 90));
+ //model->ApplyTransform();
}
- //Matrix z = ViewPort * Projection * ModelView * model->Transform;
-
clear_zbuffer();
TextureShader shader;
shader.uniform_m = (Projection);
shader.uniform_mit = (Projection).invert_transpose();
- //Matrix intermediate_z;
- //mat4_mul((float*) &ViewPort, (float*) &Projection, (float*) &intermediate_z);
- //mat4_mul((float*)&intermediate_z, (float*)&ModelView, (float*) &intermediate_z);
- //mat4_mul((float*)&intermediate_z, (float*)&model->Transform, (float*) &intermediate_z);
- //shader.z = intermediate_z;
+ Matrix z = ViewPort * Projection * ModelView * model->Transform;
- //std::vector new_verts = model->verts_;
- Vec3f* new_verts = (Vec3f*)malloc(3 * sizeof(float) * model->nverts());
- vertex_shader(&ViewPort, &Projection, &ModelView, &model->Transform, (float*)&model->verts_, model->nverts(), (float*)new_verts);
-
+ // Vertex Shader: Should be called per model
+ vertex_shader((float*)&z, *(float**)((Vec3f*) &model->verts_), model->nverts(), new_verts);
#pragma omp parallel for
for (int i = 0; i < model->nfaces(); i++) {
@@ -127,9 +122,7 @@ void render()
bool out = true;
#pragma omp parallel for
for (int j = 0; j < 3; j++) {
- //screen_coords[j] = model->vert(i, j);
- //screen_coords[j] = shader.vertex(i, j);
- screen_coords[j] = new_verts[model->faces_[i][j][0]];
+ screen_coords[j] = ((Vec4f*)new_verts)[model->faces_[i][j][0]];
Vec3f screen3(screen_coords[j]);
shader.varying_uv_coords.set_col(j, model->uv(i, j));
@@ -138,7 +131,5 @@ void render()
if(!out)
triangle(screen_coords, shader);
}
-
- free(new_verts);
}