diff --git a/OpenWindow/kernel_sources.cpp b/OpenWindow/kernel_sources.cpp index 494ffc3..ad68bc1 100644 --- a/OpenWindow/kernel_sources.cpp +++ b/OpenWindow/kernel_sources.cpp @@ -27,7 +27,7 @@ const char* fragment_shader_kernel_source = "} \n" " \n" "__kernel void fragment_shader ( \n" -" __global int* faces, \n" +" __global int3* faces, \n" " __global float* vertices, \n" " __global int* pixels, \n" " __global int* screen_width, \n" @@ -39,8 +39,9 @@ const char* fragment_shader_kernel_source = " __global float* norms_buff, \n" " __global uchar* diffuse_map \n" ") { \n" -" int index = get_global_id(0); \n" -" \n" +" int GROUP_ID = get_group_id(0); \n" +" int GROUP_SIZE = get_local_size(0); \n" +" int LOCAL_ID = get_local_id(0); \n" " \n" " bool out = true; \n" " float3 vertices3[3]; \n" @@ -48,18 +49,18 @@ const char* fragment_shader_kernel_source = " float3 norms[3]; \n" " \n" " for(int i = 0; i < 3; i++) { \n" -" float4 vertex = vertices [faces[(index * 9) + (3 * i) + (0)]]; \n" +" float4 vertex;// = vertices[faces[GROUP_ID * 3 + i ][0]]; \n" " for(int j = 0; j < 4; j ++) { \n" -" vertex[j] = vertices[4 * faces[(index*9) + (3 * i) + (0)] + j]; \n" +" vertex[j] = vertices[4 * faces[GROUP_ID*3 + i][0] + j]; \n" " } \n" " \n" " for(int j = 0; j < 2; j++) { \n" -" uv_coords[i][j] = uv_buffer [2 * faces[(index * 9) + (3 * i) + (1)] + j]; \n" +" uv_coords[i][j] = uv_buffer [2 * faces[GROUP_ID * 3 + i][1] + j]; \n" " } \n" " \n" " for( int j = 0; j < 3; j++ ) { \n" " vertices3[j][i] = (vertex[j]/vertex[3]); \n" -" norms[j][i] = norms_buff[3 * (faces[(index * 9) + (3 * i) + (2)]) + j]; \n" +" norms[j][i] = norms_buff[3 * (faces[GROUP_ID * 3 + i][2]) + j]; \n" " } \n" " \n" " if ( vertices3[0][i] > 0 && vertices3[0][i] < *screen_width \n" @@ -82,10 +83,21 @@ const char* fragment_shader_kernel_source = " } \n" " } \n" " \n" -" float3 point; \n" -" for(point[0] = bounding_box_min[0]; point[0] <= bounding_box_max[0]; point[0]++) { \n" -" for(point[1] = bounding_box_min[1]; point[1] <= bounding_box_max[1]; point[1]++) { \n" +" if(bounding_box_min[0] > *screen_width || bounding_box_max[0] < 0 || bounding_box_min[1] > *screen_width || bounding_box_max[1] < 0) return; \n" " \n" +" \n" +" int X_PER_ITEM = (int)(ceil((float)(bounding_box_max[0] - bounding_box_min[0]) / 16.f)); \n" +" int Y_PER_ITEM = (int)(ceil((float)(bounding_box_max[1] - bounding_box_min[1]) / 16.f)); \n" +" float STARTING_X = bounding_box_min[0] + X_PER_ITEM * (LOCAL_ID % 16); \n" +" float ENDING_X = STARTING_X + X_PER_ITEM; \n" +" float STARTING_Y = bounding_box_min[1] + Y_PER_ITEM * (LOCAL_ID / 16); \n" +" float ENDING_Y = STARTING_Y + Y_PER_ITEM; \n" +" \n" +" \n" +" float3 point; \n" +" for(point[0] = STARTING_X; point[0] <= ENDING_X; point[0]++) { \n" +" for(point[1] = STARTING_Y; point[1] <= ENDING_Y; point[1]++) { \n" +" if(point[1] >= *screen_width || point[1] >= *screen_width) break; \n" " float3 bc_coord = barycentric(vertices3, point); \n" " if (bc_coord[0] < 0 || bc_coord[1] < 0 || bc_coord[2] < 0) continue; \n" " \n" @@ -114,13 +126,18 @@ const char* fragment_shader_kernel_source = " float3 normalized_norm = normalize(normal); \n" " \n" " \n" -" float intensity = clamp((dot(normalized_norm , (float3){light_dir[0], light_dir[1], light_dir[2]})), 0.f, 1.f); \n" +" float intensity = clamp((dot(normalized_norm , (float3){light_dir[0], light_dir[1], light_dir[2]})), 0.f, 1.f) + 0.2; \n" " \n" " int color = 0; \n" " color |= ((int)fmin((float)(diffuse_map[col_index + 0]) * intensity, (float) 0xff)) << 16; \n" " color |= ((int)fmin((float)(diffuse_map[col_index + 1]) * intensity, (float) 0xff)) << 8; \n" " color |= ((int)fmin((float)(diffuse_map[col_index + 2]) * intensity, (float) 0xff)) << 0; \n" " \n" +" //color |= ((int)fmin((float)(0xff) * intensity, (float) 0xff)) << 16; \n" +" //color |= ((int)fmin((float)(0xff) * intensity, (float) 0xff)) << 8; \n" +" //color |= ((int)fmin((float)(0xff) * intensity, (float) 0xff)) << 0; \n" +" \n" +" \n" " z_buffer[(int) (point[0] + point[1] * *screen_width)] = point[2]; \n" " pixels [(int) (point[0] + point[1] * *screen_width)] = color;// & 0x00ffffff; \n" " } \n" diff --git a/OpenWindow/kernels.cpp b/OpenWindow/kernels.cpp index 5f31cf7..88e0a06 100644 --- a/OpenWindow/kernels.cpp +++ b/OpenWindow/kernels.cpp @@ -107,7 +107,7 @@ void vertex_shader(float* z, float* vertices, int vertex_count, float* new_verti { mat_z = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * MATRIX_SIZE , NULL, &err); vertices_mem = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * vertex_count * 3, NULL, &err); - new_vertices_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * vertex_count * 4, NULL, &err); + new_vertices_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * vertex_count, NULL, &err); err = clSetKernelArg(vertex_shader_kernel, 0, sizeof(cl_mem), &mat_z); err = clSetKernelArg(vertex_shader_kernel, 1, sizeof(cl_mem), &vertices_mem); @@ -134,7 +134,7 @@ void clear(cl_mem* buffer, size_t size, const int pattern) { } void fragment_shader( - int* faces, + cl_int3* faces, int nfaces, float* uv, size_t uv_size, @@ -149,7 +149,7 @@ void fragment_shader( ) { if (!fragment_shader_buffers_initialized) { - faces_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(int) * 3 * 3 * nfaces , NULL, &err); + faces_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(cl_int3) * 3 * nfaces , NULL, &err); pixel_data_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * screen_height * screen_width , NULL, &err); screen_width_mem = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(int) , NULL, &err); z_buffer_mem = clCreateBuffer(context, CL_MEM_READ_ONLY , sizeof(float) * screen_height * screen_width, NULL, &err); @@ -174,7 +174,7 @@ void fragment_shader( err = clSetKernelArg(fragment_shader_kernel, 10, sizeof(cl_mem), &diffuse_map_buffer); err = clEnqueueWriteBuffer(commands, screen_width_mem , CL_FALSE, 0, sizeof(int) , &screen_width , 0, NULL, NULL); - err = clEnqueueWriteBuffer(commands, faces_buffer , CL_FALSE, 0, sizeof(int) * 3 * 3 * nfaces , faces , 0, NULL, NULL); + err = clEnqueueWriteBuffer(commands, faces_buffer , CL_FALSE, 0, sizeof(cl_int3) * 3 * nfaces , faces , 0, NULL, NULL); err = clEnqueueWriteBuffer(commands, nfaces_mem , CL_FALSE, 0, sizeof(int) , &nfaces , 0, NULL, NULL); err = clEnqueueWriteBuffer(commands, uv_buffer , CL_FALSE, 0, uv_size , uv , 0, NULL, NULL); err = clEnqueueWriteBuffer(commands, map_size_buffer , CL_FALSE, 0, sizeof(int) * 2 , map_size , 0, NULL, NULL); @@ -190,7 +190,8 @@ void fragment_shader( clear(&z_buffer_mem, sizeof(float) * screen_width * screen_height, 0); - size_t fragment_shader_global[] = { nfaces }; + size_t fragment_shader_global[] = { nfaces * 256 }; + size_t framgent_shader_local[] = { 256 }; err = clEnqueueNDRangeKernel(commands, fragment_shader_kernel, 1, NULL, fragment_shader_global, NULL, 0, NULL, NULL); diff --git a/OpenWindow/kernels.h b/OpenWindow/kernels.h index 16855dd..0465e3a 100644 --- a/OpenWindow/kernels.h +++ b/OpenWindow/kernels.h @@ -35,7 +35,7 @@ extern const char* fragment_shader_kernel_source; void init_kernels(); void vertex_shader(float* z, float* vertices, int vertex_count, float* new_vertices); void fragment_shader( - int* faces, + cl_int3* faces, int nfaces, float* uv, size_t uv_size, diff --git a/OpenWindow/main.cpp b/OpenWindow/main.cpp index 4a63c41..39e05ac 100644 --- a/OpenWindow/main.cpp +++ b/OpenWindow/main.cpp @@ -75,14 +75,14 @@ bool HandleButtonPressed() { void CALLBACK FixedUpdate(HWND hwnd, UINT message, UINT uInt, DWORD dWord) { - TIME += 0.167; + TIME += 0.03333333; HandleButtonPressed(); camera.ApplyChanges(); clock_t start = clock(); render(); + Update(); clock_t end = clock(); printf("FPS: %f\n", 1/((float)(end-start)/CLOCKS_PER_SEC)); - Update(); } LRESULT CALLBACK WndProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) diff --git a/OpenWindow/renderer.cpp b/OpenWindow/renderer.cpp index 52aa4aa..2b376fa 100644 --- a/OpenWindow/renderer.cpp +++ b/OpenWindow/renderer.cpp @@ -14,9 +14,9 @@ #define VERTICAL_CAMERA_CLAMP_UP 90 #define VERTICAL_CAMERA_CLAMP_DOWN -90 #define NEAR_CLIP_PLANE 1.f -#define FAR_CLIP_PLANE 200.0f +#define FAR_CLIP_PLANE 2000.0f #define FOV 50 -#define CAMERA_MOVEMENT_SPEED .1f +#define CAMERA_MOVEMENT_SPEED .7f #define DEFAULT_CAMERA_POS Vec3f(0, 0, 5) #define DEFAULT_CAMERA_ROT Vec3f(0, 0, 0) #define LIGHT_INTENSITY 1.5 @@ -30,13 +30,13 @@ Matrix ViewPort = Matrix::identity(); Matrix ModelView = Matrix::identity(); Matrix Projection = Matrix::identity(); -Model* model = new Model("african_head.obj"); +Model* model = new Model("sakura.obj"); Camera camera; Vec3f light_dir = Vec3f(1, 1, 1).normalize(); float* new_verts = (float*)malloc(4 * sizeof(float) * model->nverts()); -int* faces = (int*)malloc(3 * 3 * sizeof(int) * model->nfaces()); +cl_int3* faces = (cl_int3*)malloc(3 * sizeof(cl_int3) * model->nfaces()); bool init_flag = false; @@ -96,10 +96,13 @@ void render() if (!init_flag) { //light_dir = camera.GetForward().normalize() * -1; viewport(0, 0, screen_width, screen_height, FAR_CLIP_PLANE, NEAR_CLIP_PLANE); - for (int i = 0; i < model->nfaces(); i++) - for (int j = 0; j < 3; j++) - for (int k = 0; k < 3; k++) - faces[i * 9 + j * 3 + k] = model->faces_[i][j][k]; + for (int i = 0; i < model->nfaces(); i++) { + for (int j = 0; j < 3; j++) { + faces[i * 3 + j].x = model->faces_[i][j][0]; + faces[i * 3 + j].y = model->faces_[i][j][1]; + faces[i * 3 + j].z = model->faces_[i][j][2]; + } + } init_flag = true; } diff --git a/OpenWindow/sakura_diffuse.tga b/OpenWindow/sakura_diffuse.tga new file mode 100644 index 0000000..9d7d13d Binary files /dev/null and b/OpenWindow/sakura_diffuse.tga differ