From e57702936e69914ef6544a04549fdeae5e2e2d60 Mon Sep 17 00:00:00 2001 From: mo7sener Date: Wed, 18 Dec 2019 19:09:00 +0200 Subject: [PATCH] Vertex Shader is now GPU-Accelerated --- OpenWindow/OpenWindow.vcxproj | 8 ++++ OpenWindow/kernel_sources.cpp | 56 ++++++------------------- OpenWindow/kernels.cpp | 78 +++++++---------------------------- OpenWindow/kernels.h | 2 +- OpenWindow/main.cpp | 6 +++ OpenWindow/renderer.cpp | 27 ++++-------- 6 files changed, 51 insertions(+), 126 deletions(-) diff --git a/OpenWindow/OpenWindow.vcxproj b/OpenWindow/OpenWindow.vcxproj index 92d4ff0..d6e46ec 100644 --- a/OpenWindow/OpenWindow.vcxproj +++ b/OpenWindow/OpenWindow.vcxproj @@ -78,9 +78,11 @@ true $(ProjectDir)\include 4996; + _MBCS;%(PreprocessorDefinitions) $(ProjectDir)\lib;%(AdditionalLibraryDirectories) + NotSet @@ -91,9 +93,11 @@ true $(ProjectDir)\include 4996; + _MBCS;%(PreprocessorDefinitions) $(ProjectDir)\lib;%(AdditionalLibraryDirectories) + NotSet @@ -106,11 +110,13 @@ true $(ProjectDir)\include 4996; + _MBCS;%(PreprocessorDefinitions) true true $(ProjectDir)\lib;%(AdditionalLibraryDirectories) + NotSet @@ -123,11 +129,13 @@ true $(ProjectDir)\include 4996; + _MBCS;%(PreprocessorDefinitions) true true $(ProjectDir)\lib;%(AdditionalLibraryDirectories) + NotSet diff --git a/OpenWindow/kernel_sources.cpp b/OpenWindow/kernel_sources.cpp index dcda827..f1e480a 100644 --- a/OpenWindow/kernel_sources.cpp +++ b/OpenWindow/kernel_sources.cpp @@ -1,46 +1,16 @@ #include "kernels.h" -const char* matrix_mul_kernel_source = -"__kernel void mmul ( \n" -" __global float* A, \n" -" __global float* B, \n" -" __global float* C) \n" -"{ \n" -" int k; \n" -" int i = get_global_id(0); \n" -" int j = get_global_id(1); \n" -" float tmp; \n" -" if( (i < 4) && (j < 4) ) \n" -" { \n" -" tmp = 0.0; \n" -" for(k = 0; k < 4; k++) \n" -" tmp += A[i*4+k] * B[k*4+j]; \n" -" C[i*4+j] = tmp; \n" -" } \n" -"} \n" ; - const char* vertex_shader_kernel_source = -"__kernel \n" -"void vertex_shader( __global float* z_matrix, \n" -" __global float* VertexBuffer, \n" -" __global float* NewVertexBuffer) \n" -"{ \n" -" int index = 3 * get_global_id(0); \n" -" float new_x = 0; \n" -" float new_y = 0; \n" -" float new_z = 0; \n" -" float new_w = 0; \n" -" for(int i = 0; i < 3; i++) { \n" -" new_x = new_x + VertexBuffer[index+i]*z_matrix[0*4 + i]; \n" -" new_y = new_y + VertexBuffer[index+i]*z_matrix[1*4 + i]; \n" -" new_z = new_z + VertexBuffer[index+i]*z_matrix[2*4 + i]; \n" -" new_w = new_w + VertexBuffer[index+i]*z_matrix[3*4 + i]; \n" -" } \n" -" new_x = new_x + z_matrix[0*4 +3]; \n" -" new_y = new_y + z_matrix[1*4 +3]; \n" -" new_z = new_z + z_matrix[2*4 +3]; \n" -" new_w = new_w + z_matrix[3*4 +3]; \n" -" NewVertexBuffer[index+0] = new_x / new_w; \n" -" NewVertexBuffer[index+1] = new_y / new_w; \n" -" NewVertexBuffer[index+2] = new_z / new_w; \n" -"} \n"; +"__kernel \n" +"void vertex_shader( __global float* m, \n" +" __global float* VertexBuffer, \n" +" __global float* NewVertexBuffer) \n" +"{ \n" +" int local_index = get_local_id(0); \n" +" int global_index = get_group_id(0); \n" +" NewVertexBuffer[4*global_index+local_index] = \n" +" m[local_index*4]*VertexBuffer[3*global_index] \n" +" + m[local_index*4 + 1]*VertexBuffer[3*global_index+1] \n" +" + m[local_index*4 + 2]*VertexBuffer[3*global_index+2] \n" +" + m[local_index*4 + 3]; \n" +"} \n"; diff --git a/OpenWindow/kernels.cpp b/OpenWindow/kernels.cpp index 1e4b9f8..c2e3f48 100644 --- a/OpenWindow/kernels.cpp +++ b/OpenWindow/kernels.cpp @@ -11,12 +11,7 @@ cl_context context; // Matrix Multiplication Variables -#define ORDER 4 #define MATRIX_SIZE 16 -int Mdim, Pdim, Ndim; -cl_mem mat_a; -cl_mem mat_b; -cl_mem mat_c; // vertex shader cl_mem mat_z; @@ -27,15 +22,10 @@ cl_mem new_vertices_mem; cl_platform_id* platforms = NULL; cl_device_id* devices = NULL; -cl_program mat_mul_prog; cl_program vertex_shader_prog; -cl_kernel mat_mul_kernel; cl_kernel vertex_shader_kernel; cl_command_queue commands; -size_t mat_mul_global[DIM]; -size_t mat_mul_local[DIM]; - int err; @@ -53,78 +43,43 @@ void init_kernels() { commands = clCreateCommandQueue(context, devices[0], 0, &err); - mat_a = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE, NULL, &err); - mat_b = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE, NULL, &err); - mat_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * MATRIX_SIZE, NULL, &err); - - - mat_mul_prog = clCreateProgramWithSource(context, 1, (const char **)&matrix_mul_kernel_source, NULL, &err); vertex_shader_prog = clCreateProgramWithSource(context, 1, (const char **)&vertex_shader_kernel_source, NULL, &err); - err = clBuildProgram(mat_mul_prog, 1, devices, NULL, NULL, NULL); err = clBuildProgram(vertex_shader_prog, 1, devices, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; - OutputDebugString("Error: Failed to build program executable!\n"); - clGetProgramBuildInfo(mat_mul_prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); - OutputDebugString(buffer); + printf("Error: Failed to build program executable!\n"); + //clGetProgramBuildInfo(mat_mul_prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); + printf(buffer); return; } - mat_mul_kernel = clCreateKernel(mat_mul_prog, "mmul", &err); vertex_shader_kernel = clCreateKernel(vertex_shader_prog, "vertex_shader", &err); err = 0; - err |= clSetKernelArg(mat_mul_kernel, 0, sizeof(cl_mem), &mat_a); - err |= clSetKernelArg(mat_mul_kernel, 1, sizeof(cl_mem), &mat_b); - err |= clSetKernelArg(mat_mul_kernel, 2, sizeof(cl_mem), &mat_c); - } -void mat4_mul(float* A, float* B, float* C) -{ - err = clEnqueueWriteBuffer(commands, mat_a, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, A, 0, NULL, NULL); - err = clEnqueueWriteBuffer(commands, mat_b, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, B, 0, NULL, NULL); - - mat_mul_global[0] = ORDER; - mat_mul_global[1] = ORDER; - - err = clEnqueueNDRangeKernel(commands, mat_mul_kernel, 2, NULL, mat_mul_global, NULL, 0, NULL, NULL); - - clFlush(commands); - clFinish(commands); - - err = clEnqueueReadBuffer(commands, mat_c, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, C, 0, NULL, NULL); -} - -void vertex_shader(Matrix* ViewPort, Matrix* Projection, Matrix* ModelView, Matrix* ModelTransform, float* vertices, int vertex_count, float* new_vertices) { - - mat_z = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * MATRIX_SIZE, NULL, &err); - vertices_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * vertex_count * 3, NULL, &err); - new_vertices_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * vertex_count * 3, NULL, &err); +void vertex_shader(float* z, float* vertices, int vertex_count, float* new_vertices) { + mat_z = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE , NULL, &err); + vertices_mem = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * vertex_count * 3, NULL, &err); + new_vertices_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * vertex_count * 4, NULL, &err); err = clSetKernelArg(vertex_shader_kernel, 0, sizeof(cl_mem), &mat_z); err = clSetKernelArg(vertex_shader_kernel, 1, sizeof(cl_mem), &vertices_mem); err = clSetKernelArg(vertex_shader_kernel, 2, sizeof(cl_mem), &new_vertices_mem); - - Matrix intermediate_z; - { // Calculate the Z to be multiplied by each vertex - mat4_mul((float*)ViewPort , (float*)Projection , (float*) &intermediate_z); - mat4_mul((float*)&intermediate_z, (float*)ModelView , (float*) &intermediate_z); - mat4_mul((float*)&intermediate_z, (float*)ModelTransform, (float*) &intermediate_z); - } - - err = clEnqueueWriteBuffer(commands, mat_z, CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, (float*)&intermediate_z, 0, NULL, NULL); + err = clEnqueueWriteBuffer(commands, mat_z , CL_TRUE, 0, sizeof(float) * MATRIX_SIZE, z, 0, NULL, NULL); err = clEnqueueWriteBuffer(commands, vertices_mem, CL_TRUE, 0, sizeof(float) * vertex_count * 3, vertices, 0, NULL, NULL); - size_t vertex_shader_global[] = { vertex_count }; - size_t vertex_shader_local[] = { 16 }; + size_t vertex_shader_global[] = { vertex_count*4 }; + size_t vertex_shader_local[] = { 4 }; - err = clEnqueueNDRangeKernel(commands, vertex_shader_kernel, 1, NULL, vertex_shader_global, NULL, 0, NULL, NULL); + err = clEnqueueNDRangeKernel(commands, vertex_shader_kernel, 1, NULL, vertex_shader_global, vertex_shader_local, 0, NULL, NULL); - err = clEnqueueReadBuffer(commands, new_vertices_mem, CL_TRUE, 0, sizeof(float) * vertex_count * 3, new_vertices, 0, NULL, NULL); + clFlush(commands); + clFinish(commands); + err = clEnqueueReadBuffer(commands, new_vertices_mem, CL_TRUE, 0, sizeof(float) * vertex_count * 4, new_vertices, 0, NULL, NULL); clReleaseMemObject(mat_z); clReleaseMemObject(vertices_mem); @@ -133,13 +88,8 @@ void vertex_shader(Matrix* ViewPort, Matrix* Projection, Matrix* ModelView, Matr void destroy_kernels() { - clReleaseProgram(mat_mul_prog); clReleaseProgram(vertex_shader_prog); - clReleaseKernel(mat_mul_kernel); clReleaseKernel(vertex_shader_kernel); - clReleaseMemObject(mat_a); - clReleaseMemObject(mat_b); - clReleaseMemObject(mat_c); // clReleaseMemObject(mat_z); // clReleaseMemObject(vertices_mem); // clReleaseMemObject(new_vertices_mem); diff --git a/OpenWindow/kernels.h b/OpenWindow/kernels.h index f354680..da5d50d 100644 --- a/OpenWindow/kernels.h +++ b/OpenWindow/kernels.h @@ -39,7 +39,7 @@ extern const char* vertex_shader_kernel_source; void init_kernels(); void mat4_mul(float* A, float* B, float* C); -void vertex_shader(Matrix* ViewPort, Matrix* Projection, Matrix* ModelView, Matrix* ModelTransform, float* vertices, int vertex_count, float* new_vertices); +void vertex_shader(float* z, float* vertices, int vertex_count, float* new_vertices); void destroy_kernels(); diff --git a/OpenWindow/main.cpp b/OpenWindow/main.cpp index 66261ca..8f79185 100644 --- a/OpenWindow/main.cpp +++ b/OpenWindow/main.cpp @@ -21,6 +21,12 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine hwnd = create_window(hInstance); ShowCursor(false); + + AllocConsole(); + freopen("CONIN$", "r",stdin); + freopen("CONOUT$", "w",stdout); + freopen("CONOUT$", "w",stderr); + ShowWindow(hwnd, nCmdShow); init_camera(); diff --git a/OpenWindow/renderer.cpp b/OpenWindow/renderer.cpp index 6f4fa72..c59352b 100644 --- a/OpenWindow/renderer.cpp +++ b/OpenWindow/renderer.cpp @@ -5,6 +5,7 @@ #include "util_renderer.h" #include "CL/cl.h" #include "kernels.h" +#include #pragma comment (lib, "x86_64/opencl.lib") @@ -34,6 +35,8 @@ Camera camera; Vec3f light_dir = Vec3f(1, 1, 1).normalize(); +float* new_verts = (float*)malloc(4 * sizeof(float) * model->nverts()); + void init_camera() { camera.SetPosition(DEFAULT_CAMERA_POS); camera.SetRotation(DEFAULT_CAMERA_ROT); @@ -98,28 +101,20 @@ void render() } { - model->rotate(Vec3f(0, 0, 90)); - model->ApplyTransform(); + //model->rotate(Vec3f(0, 0, 90)); + //model->ApplyTransform(); } - //Matrix z = ViewPort * Projection * ModelView * model->Transform; - clear_zbuffer(); TextureShader shader; shader.uniform_m = (Projection); shader.uniform_mit = (Projection).invert_transpose(); - //Matrix intermediate_z; - //mat4_mul((float*) &ViewPort, (float*) &Projection, (float*) &intermediate_z); - //mat4_mul((float*)&intermediate_z, (float*)&ModelView, (float*) &intermediate_z); - //mat4_mul((float*)&intermediate_z, (float*)&model->Transform, (float*) &intermediate_z); - //shader.z = intermediate_z; + Matrix z = ViewPort * Projection * ModelView * model->Transform; - //std::vector new_verts = model->verts_; - Vec3f* new_verts = (Vec3f*)malloc(3 * sizeof(float) * model->nverts()); - vertex_shader(&ViewPort, &Projection, &ModelView, &model->Transform, (float*)&model->verts_, model->nverts(), (float*)new_verts); - + // Vertex Shader: Should be called per model + vertex_shader((float*)&z, *(float**)((Vec3f*) &model->verts_), model->nverts(), new_verts); #pragma omp parallel for for (int i = 0; i < model->nfaces(); i++) { @@ -127,9 +122,7 @@ void render() bool out = true; #pragma omp parallel for for (int j = 0; j < 3; j++) { - //screen_coords[j] = model->vert(i, j); - //screen_coords[j] = shader.vertex(i, j); - screen_coords[j] = new_verts[model->faces_[i][j][0]]; + screen_coords[j] = ((Vec4f*)new_verts)[model->faces_[i][j][0]]; Vec3f screen3(screen_coords[j]); shader.varying_uv_coords.set_col(j, model->uv(i, j)); @@ -138,7 +131,5 @@ void render() if(!out) triangle(screen_coords, shader); } - - free(new_verts); }