#include #include #include #include #include #include #ifndef _LANGUAGE_C #define _LANGUAGE_C #endif #include #include "config.h" #include "gfx_pc.h" #include "gfx_cc.h" #include "gfx_window_manager_api.h" #include "gfx_rendering_api.h" #include "gfx_screen_config.h" #define SUPPORT_CHECK(x) assert(x) // SCALE_M_N: upscale/downscale M-bit integer to N-bit #define SCALE_5_8(VAL_) (((VAL_) * 0xFF) / 0x1F) #define SCALE_8_5(VAL_) ((((VAL_) + 4) * 0x1F) / 0xFF) #define SCALE_4_8(VAL_) ((VAL_) * 0x11) #define SCALE_8_4(VAL_) ((VAL_) / 0x11) #define SCALE_3_8(VAL_) ((VAL_) * 0x24) #define SCALE_8_3(VAL_) ((VAL_) / 0x24) #define HALF_SCREEN_WIDTH (SCREEN_WIDTH / 2) #define HALF_SCREEN_HEIGHT (SCREEN_HEIGHT / 2) #define RATIO_X (gfx_current_dimensions.width / (2.0f * HALF_SCREEN_WIDTH)) #define RATIO_Y (gfx_current_dimensions.height / (2.0f * HALF_SCREEN_HEIGHT)) #define MAX_BUFFERED 256 #define MAX_LIGHTS 2 #define MAX_VERTICES 64 struct RGBA { uint8_t r, g, b, a; }; struct XYWidthHeight { uint16_t x, y, width, height; }; struct LoadedVertex { float x, y, z, w; float u, v; struct RGBA color; uint8_t clip_rej; }; struct TextureHashmapNode { struct TextureHashmapNode *next; const uint8_t *texture_addr; uint8_t fmt, siz; uint32_t texture_id; uint8_t cms, cmt; bool linear_filter; }; static struct { struct TextureHashmapNode *hashmap[1024]; struct TextureHashmapNode pool[512]; uint32_t pool_pos; } gfx_texture_cache; struct ColorCombiner { uint32_t cc_id; struct ShaderProgram *prg; uint8_t shader_input_mapping[2][4]; }; static struct ColorCombiner color_combiner_pool[64]; static uint8_t color_combiner_pool_size; static struct RSP { float modelview_matrix_stack[11][4][4]; uint8_t modelview_matrix_stack_size; float MP_matrix[4][4]; float P_matrix[4][4]; Light_t current_lights[MAX_LIGHTS + 1]; float current_lights_coeffs[MAX_LIGHTS][3]; float current_lookat_coeffs[2][3]; // lookat_x, lookat_y uint8_t current_num_lights; // includes ambient light bool lights_changed; uint32_t geometry_mode; int16_t fog_mul, fog_offset; struct { // U0.16 uint16_t s, t; } texture_scaling_factor; struct LoadedVertex loaded_vertices[MAX_VERTICES + 4]; } rsp; static struct RDP { const uint8_t *palette; struct { const uint8_t *addr; uint8_t siz; uint8_t tile_number; } texture_to_load; struct { const uint8_t *addr; uint32_t size_bytes; } loaded_texture[2]; struct { uint8_t fmt; uint8_t siz; uint8_t cms, cmt; uint16_t uls, ult, lrs, lrt; // U10.2 uint32_t line_size_bytes; } texture_tile; bool textures_changed[2]; uint32_t other_mode_l, other_mode_h; uint32_t combine_mode; struct RGBA env_color, prim_color, fog_color, fill_color; struct XYWidthHeight viewport, scissor; bool viewport_or_scissor_changed; void *z_buf_address; void *color_image_address; } rdp; static struct RenderingState { bool depth_test; bool depth_mask; bool decal_mode; bool alpha_blend; struct XYWidthHeight viewport, scissor; struct ShaderProgram *shader_program; struct TextureHashmapNode *textures[2]; } rendering_state; struct GfxDimensions gfx_current_dimensions; static bool dropped_frame; static float buf_vbo[MAX_BUFFERED * (26 * 3)]; // 3 vertices in a triangle and 26 floats per vtx static size_t buf_vbo_len; static size_t buf_vbo_num_tris; static struct GfxWindowManagerAPI *gfx_wapi; static struct GfxRenderingAPI *gfx_rapi; #include static unsigned long get_time(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (unsigned long)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; } static void gfx_flush(void) { if (buf_vbo_len > 0) { int num = buf_vbo_num_tris; unsigned long t0 = get_time(); gfx_rapi->draw_triangles(buf_vbo, buf_vbo_len, buf_vbo_num_tris); buf_vbo_len = 0; buf_vbo_num_tris = 0; unsigned long t1 = get_time(); /*if (t1 - t0 > 1000) { printf("f: %d %d\n", num, (int)(t1 - t0)); }*/ } } static struct ShaderProgram *gfx_lookup_or_create_shader_program(uint32_t shader_id) { struct ShaderProgram *prg = gfx_rapi->lookup_shader(shader_id); if (prg == NULL) { gfx_rapi->unload_shader(rendering_state.shader_program); prg = gfx_rapi->create_and_load_new_shader(shader_id); rendering_state.shader_program = prg; } return prg; } static void gfx_generate_cc(struct ColorCombiner *comb, uint32_t cc_id) { uint8_t c[2][4]; uint32_t shader_id = (cc_id >> 24) << 24; uint8_t shader_input_mapping[2][4] = {{0}}; for (int i = 0; i < 4; i++) { c[0][i] = (cc_id >> (i * 3)) & 7; c[1][i] = (cc_id >> (12 + i * 3)) & 7; } for (int i = 0; i < 2; i++) { if (c[i][0] == c[i][1] || c[i][2] == CC_0) { c[i][0] = c[i][1] = c[i][2] = 0; } uint8_t input_number[8] = {0}; int next_input_number = SHADER_INPUT_1; for (int j = 0; j < 4; j++) { int val = 0; switch (c[i][j]) { case CC_0: break; case CC_TEXEL0: val = SHADER_TEXEL0; break; case CC_TEXEL1: val = SHADER_TEXEL1; break; case CC_TEXEL0A: val = SHADER_TEXEL0A; break; case CC_PRIM: case CC_SHADE: case CC_ENV: case CC_LOD: if (input_number[c[i][j]] == 0) { shader_input_mapping[i][next_input_number - 1] = c[i][j]; input_number[c[i][j]] = next_input_number++; } val = input_number[c[i][j]]; break; } shader_id |= val << (i * 12 + j * 3); } } comb->cc_id = cc_id; comb->prg = gfx_lookup_or_create_shader_program(shader_id); memcpy(comb->shader_input_mapping, shader_input_mapping, sizeof(shader_input_mapping)); } static struct ColorCombiner *gfx_lookup_or_create_color_combiner(uint32_t cc_id) { static struct ColorCombiner *prev_combiner; if (prev_combiner != NULL && prev_combiner->cc_id == cc_id) { return prev_combiner; } for (size_t i = 0; i < color_combiner_pool_size; i++) { if (color_combiner_pool[i].cc_id == cc_id) { return prev_combiner = &color_combiner_pool[i]; } } gfx_flush(); struct ColorCombiner *comb = &color_combiner_pool[color_combiner_pool_size++]; gfx_generate_cc(comb, cc_id); return prev_combiner = comb; } static bool gfx_texture_cache_lookup(int tile, struct TextureHashmapNode **n, const uint8_t *orig_addr, uint32_t fmt, uint32_t siz) { size_t hash = (uintptr_t)orig_addr; hash = (hash >> 5) & 0x3ff; struct TextureHashmapNode **node = &gfx_texture_cache.hashmap[hash]; while (*node != NULL && *node - gfx_texture_cache.pool < gfx_texture_cache.pool_pos) { if ((*node)->texture_addr == orig_addr && (*node)->fmt == fmt && (*node)->siz == siz) { gfx_rapi->select_texture(tile, (*node)->texture_id); *n = *node; return true; } node = &(*node)->next; } if (gfx_texture_cache.pool_pos == sizeof(gfx_texture_cache.pool) / sizeof(struct TextureHashmapNode)) { // Pool is full. We just invalidate everything and start over. gfx_texture_cache.pool_pos = 0; node = &gfx_texture_cache.hashmap[hash]; //puts("Clearing texture cache"); } *node = &gfx_texture_cache.pool[gfx_texture_cache.pool_pos++]; if ((*node)->texture_addr == NULL) { (*node)->texture_id = gfx_rapi->new_texture(); } gfx_rapi->select_texture(tile, (*node)->texture_id); gfx_rapi->set_sampler_parameters(tile, false, 0, 0); (*node)->cms = 0; (*node)->cmt = 0; (*node)->linear_filter = false; (*node)->next = NULL; (*node)->texture_addr = orig_addr; (*node)->fmt = fmt; (*node)->siz = siz; *n = *node; return false; } static void import_texture_rgba16(int tile) { uint8_t rgba32_buf[8192]; for (uint32_t i = 0; i < rdp.loaded_texture[tile].size_bytes / 2; i++) { uint16_t col16 = (rdp.loaded_texture[tile].addr[2 * i] << 8) | rdp.loaded_texture[tile].addr[2 * i + 1]; uint8_t a = col16 & 1; uint8_t r = col16 >> 11; uint8_t g = (col16 >> 6) & 0x1f; uint8_t b = (col16 >> 1) & 0x1f; rgba32_buf[4*i + 0] = SCALE_5_8(r); rgba32_buf[4*i + 1] = SCALE_5_8(g); rgba32_buf[4*i + 2] = SCALE_5_8(b); rgba32_buf[4*i + 3] = a ? 255 : 0; } uint32_t width = rdp.texture_tile.line_size_bytes / 2; uint32_t height = rdp.loaded_texture[tile].size_bytes / rdp.texture_tile.line_size_bytes; gfx_rapi->upload_texture(rgba32_buf, width, height); } static void import_texture_ia4(int tile) { uint8_t rgba32_buf[32768]; for (uint32_t i = 0; i < rdp.loaded_texture[tile].size_bytes * 2; i++) { uint8_t byte = rdp.loaded_texture[tile].addr[i / 2]; uint8_t part = (byte >> (4 - (i % 2) * 4)) & 0xf; uint8_t intensity = part >> 1; uint8_t alpha = part & 1; uint8_t r = intensity; uint8_t g = intensity; uint8_t b = intensity; rgba32_buf[4*i + 0] = SCALE_3_8(r); rgba32_buf[4*i + 1] = SCALE_3_8(g); rgba32_buf[4*i + 2] = SCALE_3_8(b); rgba32_buf[4*i + 3] = alpha ? 255 : 0; } uint32_t width = rdp.texture_tile.line_size_bytes * 2; uint32_t height = rdp.loaded_texture[tile].size_bytes / rdp.texture_tile.line_size_bytes; gfx_rapi->upload_texture(rgba32_buf, width, height); } static void import_texture_ia8(int tile) { uint8_t rgba32_buf[16384]; for (uint32_t i = 0; i < rdp.loaded_texture[tile].size_bytes; i++) { uint8_t intensity = rdp.loaded_texture[tile].addr[i] >> 4; uint8_t alpha = rdp.loaded_texture[tile].addr[i] & 0xf; uint8_t r = intensity; uint8_t g = intensity; uint8_t b = intensity; rgba32_buf[4*i + 0] = SCALE_4_8(r); rgba32_buf[4*i + 1] = SCALE_4_8(g); rgba32_buf[4*i + 2] = SCALE_4_8(b); rgba32_buf[4*i + 3] = SCALE_4_8(alpha); } uint32_t width = rdp.texture_tile.line_size_bytes; uint32_t height = rdp.loaded_texture[tile].size_bytes / rdp.texture_tile.line_size_bytes; gfx_rapi->upload_texture(rgba32_buf, width, height); } static void import_texture_ia16(int tile) { uint8_t rgba32_buf[8192]; for (uint32_t i = 0; i < rdp.loaded_texture[tile].size_bytes / 2; i++) { uint8_t intensity = rdp.loaded_texture[tile].addr[2 * i]; uint8_t alpha = rdp.loaded_texture[tile].addr[2 * i + 1]; uint8_t r = intensity; uint8_t g = intensity; uint8_t b = intensity; rgba32_buf[4*i + 0] = r; rgba32_buf[4*i + 1] = g; rgba32_buf[4*i + 2] = b; rgba32_buf[4*i + 3] = alpha; } uint32_t width = rdp.texture_tile.line_size_bytes / 2; uint32_t height = rdp.loaded_texture[tile].size_bytes / rdp.texture_tile.line_size_bytes; gfx_rapi->upload_texture(rgba32_buf, width, height); } static void import_texture_ci4(int tile) { uint8_t rgba32_buf[32768]; for (uint32_t i = 0; i < rdp.loaded_texture[tile].size_bytes * 2; i++) { uint8_t byte = rdp.loaded_texture[tile].addr[i / 2]; uint8_t idx = (byte >> (4 - (i % 2) * 4)) & 0xf; uint16_t col16 = (rdp.palette[idx * 2] << 8) | rdp.palette[idx * 2 + 1]; // Big endian load uint8_t a = col16 & 1; uint8_t r = col16 >> 11; uint8_t g = (col16 >> 6) & 0x1f; uint8_t b = (col16 >> 1) & 0x1f; rgba32_buf[4*i + 0] = SCALE_5_8(r); rgba32_buf[4*i + 1] = SCALE_5_8(g); rgba32_buf[4*i + 2] = SCALE_5_8(b); rgba32_buf[4*i + 3] = a ? 255 : 0; } uint32_t width = rdp.texture_tile.line_size_bytes * 2; uint32_t height = rdp.loaded_texture[tile].size_bytes / rdp.texture_tile.line_size_bytes; gfx_rapi->upload_texture(rgba32_buf, width, height); } static void import_texture_ci8(int tile) { uint8_t rgba32_buf[16384]; for (uint32_t i = 0; i < rdp.loaded_texture[tile].size_bytes; i++) { uint8_t idx = rdp.loaded_texture[tile].addr[i]; uint16_t col16 = (rdp.palette[idx * 2] << 8) | rdp.palette[idx * 2 + 1]; // Big endian load uint8_t a = col16 & 1; uint8_t r = col16 >> 11; uint8_t g = (col16 >> 6) & 0x1f; uint8_t b = (col16 >> 1) & 0x1f; rgba32_buf[4*i + 0] = SCALE_5_8(r); rgba32_buf[4*i + 1] = SCALE_5_8(g); rgba32_buf[4*i + 2] = SCALE_5_8(b); rgba32_buf[4*i + 3] = a ? 255 : 0; } uint32_t width = rdp.texture_tile.line_size_bytes; uint32_t height = rdp.loaded_texture[tile].size_bytes / rdp.texture_tile.line_size_bytes; gfx_rapi->upload_texture(rgba32_buf, width, height); } static void import_texture(int tile) { uint8_t fmt = rdp.texture_tile.fmt; uint8_t siz = rdp.texture_tile.siz; if (gfx_texture_cache_lookup(tile, &rendering_state.textures[tile], rdp.loaded_texture[tile].addr, fmt, siz)) { return; } int t0 = get_time(); if (fmt == G_IM_FMT_RGBA) { if (siz == G_IM_SIZ_16b) { import_texture_rgba16(tile); } else { abort(); } } else if (fmt == G_IM_FMT_IA) { if (siz == G_IM_SIZ_4b) { import_texture_ia4(tile); } else if (siz == G_IM_SIZ_8b) { import_texture_ia8(tile); } else if (siz == G_IM_SIZ_16b) { import_texture_ia16(tile); } else { abort(); } } else if (fmt == G_IM_FMT_CI) { if (siz == G_IM_SIZ_4b) { import_texture_ci4(tile); } else if (siz == G_IM_SIZ_8b) { import_texture_ci8(tile); } else { abort(); } } else { abort(); } int t1 = get_time(); //printf("Time diff: %d\n", t1 - t0); } static void gfx_normalize_vector(float v[3]) { float s = sqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); v[0] /= s; v[1] /= s; v[2] /= s; } static void gfx_transposed_matrix_mul(float res[3], const float a[3], const float b[4][4]) { res[0] = a[0] * b[0][0] + a[1] * b[0][1] + a[2] * b[0][2]; res[1] = a[0] * b[1][0] + a[1] * b[1][1] + a[2] * b[1][2]; res[2] = a[0] * b[2][0] + a[1] * b[2][1] + a[2] * b[2][2]; } static void calculate_normal_dir(const Light_t *light, float coeffs[3]) { float light_dir[3] = { light->dir[0] / 127.0f, light->dir[1] / 127.0f, light->dir[2] / 127.0f }; gfx_transposed_matrix_mul(coeffs, light_dir, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1]); gfx_normalize_vector(coeffs); } static void gfx_matrix_mul(float res[4][4], const float a[4][4], const float b[4][4]) { float tmp[4][4]; for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { tmp[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j] + a[i][2] * b[2][j] + a[i][3] * b[3][j]; } } memcpy(res, tmp, sizeof(tmp)); } static void gfx_sp_matrix(uint8_t parameters, const int32_t *addr) { float matrix[4][4]; #if 0 // Original code when fixed point matrices were used for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j += 2) { int32_t int_part = addr[i * 2 + j / 2]; uint32_t frac_part = addr[8 + i * 2 + j / 2]; matrix[i][j] = (int32_t)((int_part & 0xffff0000) | (frac_part >> 16)) / 65536.0f; matrix[i][j + 1] = (int32_t)((int_part << 16) | (frac_part & 0xffff)) / 65536.0f; } } #else memcpy(matrix, addr, sizeof(matrix)); #endif if (parameters & G_MTX_PROJECTION) { if (parameters & G_MTX_LOAD) { memcpy(rsp.P_matrix, matrix, sizeof(matrix)); } else { gfx_matrix_mul(rsp.P_matrix, matrix, rsp.P_matrix); } } else { // G_MTX_MODELVIEW if ((parameters & G_MTX_PUSH) && rsp.modelview_matrix_stack_size < 11) { ++rsp.modelview_matrix_stack_size; memcpy(rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 2], sizeof(matrix)); } if (parameters & G_MTX_LOAD) { memcpy(rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], matrix, sizeof(matrix)); } else { gfx_matrix_mul(rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], matrix, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1]); } rsp.lights_changed = 1; } gfx_matrix_mul(rsp.MP_matrix, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], rsp.P_matrix); } static void gfx_sp_pop_matrix(uint32_t count) { while (count--) { if (rsp.modelview_matrix_stack_size > 0) { --rsp.modelview_matrix_stack_size; if (rsp.modelview_matrix_stack_size > 0) { gfx_matrix_mul(rsp.MP_matrix, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1], rsp.P_matrix); } } } } static float gfx_adjust_x_for_aspect_ratio(float x) { return x * (4.0f / 3.0f) / ((float)gfx_current_dimensions.width / (float)gfx_current_dimensions.height); } static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *vertices) { for (size_t i = 0; i < n_vertices; i++, dest_index++) { const Vtx_t *v = &vertices[i].v; const Vtx_tn *vn = &vertices[i].n; struct LoadedVertex *d = &rsp.loaded_vertices[dest_index]; float x = v->ob[0] * rsp.MP_matrix[0][0] + v->ob[1] * rsp.MP_matrix[1][0] + v->ob[2] * rsp.MP_matrix[2][0] + rsp.MP_matrix[3][0]; float y = v->ob[0] * rsp.MP_matrix[0][1] + v->ob[1] * rsp.MP_matrix[1][1] + v->ob[2] * rsp.MP_matrix[2][1] + rsp.MP_matrix[3][1]; float z = v->ob[0] * rsp.MP_matrix[0][2] + v->ob[1] * rsp.MP_matrix[1][2] + v->ob[2] * rsp.MP_matrix[2][2] + rsp.MP_matrix[3][2]; float w = v->ob[0] * rsp.MP_matrix[0][3] + v->ob[1] * rsp.MP_matrix[1][3] + v->ob[2] * rsp.MP_matrix[2][3] + rsp.MP_matrix[3][3]; x = gfx_adjust_x_for_aspect_ratio(x); short U = v->tc[0] * rsp.texture_scaling_factor.s >> 16; short V = v->tc[1] * rsp.texture_scaling_factor.t >> 16; if (rsp.geometry_mode & G_LIGHTING) { if (rsp.lights_changed) { for (int i = 0; i < rsp.current_num_lights - 1; i++) { calculate_normal_dir(&rsp.current_lights[i], rsp.current_lights_coeffs[i]); } static const Light_t lookat_x = {{0, 0, 0}, 0, {0, 0, 0}, 0, {127, 0, 0}, 0}; static const Light_t lookat_y = {{0, 0, 0}, 0, {0, 0, 0}, 0, {0, 127, 0}, 0}; calculate_normal_dir(&lookat_x, rsp.current_lookat_coeffs[0]); calculate_normal_dir(&lookat_y, rsp.current_lookat_coeffs[1]); rsp.lights_changed = false; } int r = rsp.current_lights[rsp.current_num_lights - 1].col[0]; int g = rsp.current_lights[rsp.current_num_lights - 1].col[1]; int b = rsp.current_lights[rsp.current_num_lights - 1].col[2]; for (int i = 0; i < rsp.current_num_lights - 1; i++) { float intensity = 0; intensity += vn->n[0] * rsp.current_lights_coeffs[i][0]; intensity += vn->n[1] * rsp.current_lights_coeffs[i][1]; intensity += vn->n[2] * rsp.current_lights_coeffs[i][2]; intensity /= 127.0f; if (intensity > 0.0f) { r += intensity * rsp.current_lights[i].col[0]; g += intensity * rsp.current_lights[i].col[1]; b += intensity * rsp.current_lights[i].col[2]; } } d->color.r = r > 255 ? 255 : r; d->color.g = g > 255 ? 255 : g; d->color.b = b > 255 ? 255 : b; if (rsp.geometry_mode & G_TEXTURE_GEN) { float dotx = 0, doty = 0; dotx += vn->n[0] * rsp.current_lookat_coeffs[0][0]; dotx += vn->n[1] * rsp.current_lookat_coeffs[0][1]; dotx += vn->n[2] * rsp.current_lookat_coeffs[0][2]; doty += vn->n[0] * rsp.current_lookat_coeffs[1][0]; doty += vn->n[1] * rsp.current_lookat_coeffs[1][1]; doty += vn->n[2] * rsp.current_lookat_coeffs[1][2]; U = (int32_t)((dotx / 127.0f + 1.0f) / 4.0f * rsp.texture_scaling_factor.s); V = (int32_t)((doty / 127.0f + 1.0f) / 4.0f * rsp.texture_scaling_factor.t); } } else { d->color.r = v->cn[0]; d->color.g = v->cn[1]; d->color.b = v->cn[2]; } d->u = U; d->v = V; // trivial clip rejection d->clip_rej = 0; if (x < -w) d->clip_rej |= 1; if (x > w) d->clip_rej |= 2; if (y < -w) d->clip_rej |= 4; if (y > w) d->clip_rej |= 8; if (z < -w) d->clip_rej |= 16; if (z > w) d->clip_rej |= 32; d->x = x; d->y = y; d->z = z; d->w = w; if (rsp.geometry_mode & G_FOG) { if (fabsf(w) < 0.001f) { // To avoid division by zero w = 0.001f; } float winv = 1.0f / w; if (winv < 0.0f) { winv = 32767.0f; } float fog_z = z * winv * rsp.fog_mul + rsp.fog_offset; if (fog_z < 0) fog_z = 0; if (fog_z > 255) fog_z = 255; d->color.a = fog_z; // Use alpha variable to store fog factor } else { d->color.a = v->cn[3]; } } } static void gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t vtx3_idx) { struct LoadedVertex *v1 = &rsp.loaded_vertices[vtx1_idx]; struct LoadedVertex *v2 = &rsp.loaded_vertices[vtx2_idx]; struct LoadedVertex *v3 = &rsp.loaded_vertices[vtx3_idx]; struct LoadedVertex *v_arr[3] = {v1, v2, v3}; //if (rand()%2) return; if (v1->clip_rej & v2->clip_rej & v3->clip_rej) { // The whole triangle lies outside the visible area return; } if ((rsp.geometry_mode & G_CULL_BOTH) != 0) { float dx1 = v1->x / (v1->w) - v2->x / (v2->w); float dy1 = v1->y / (v1->w) - v2->y / (v2->w); float dx2 = v3->x / (v3->w) - v2->x / (v2->w); float dy2 = v3->y / (v3->w) - v2->y / (v2->w); float cross = dx1 * dy2 - dy1 * dx2; if ((v1->w < 0) ^ (v2->w < 0) ^ (v3->w < 0)) { // If one vertex lies behind the eye, negating cross will give the correct result. // If all vertices lie behind the eye, the triangle will be rejected anyway. cross = -cross; } switch (rsp.geometry_mode & G_CULL_BOTH) { case G_CULL_FRONT: if (cross <= 0) return; break; case G_CULL_BACK: if (cross >= 0) return; break; case G_CULL_BOTH: // Why is this even an option? return; } } bool depth_test = (rsp.geometry_mode & G_ZBUFFER) == G_ZBUFFER; if (depth_test != rendering_state.depth_test) { gfx_flush(); gfx_rapi->set_depth_test(depth_test); rendering_state.depth_test = depth_test; } bool z_upd = (rdp.other_mode_l & Z_UPD) == Z_UPD; if (z_upd != rendering_state.depth_mask) { gfx_flush(); gfx_rapi->set_depth_mask(z_upd); rendering_state.depth_mask = z_upd; } bool zmode_decal = (rdp.other_mode_l & ZMODE_DEC) == ZMODE_DEC; if (zmode_decal != rendering_state.decal_mode) { gfx_flush(); gfx_rapi->set_zmode_decal(zmode_decal); rendering_state.decal_mode = zmode_decal; } if (rdp.viewport_or_scissor_changed) { if (memcmp(&rdp.viewport, &rendering_state.viewport, sizeof(rdp.viewport)) != 0) { gfx_flush(); gfx_rapi->set_viewport(rdp.viewport.x, rdp.viewport.y, rdp.viewport.width, rdp.viewport.height); rendering_state.viewport = rdp.viewport; } if (memcmp(&rdp.scissor, &rendering_state.scissor, sizeof(rdp.scissor)) != 0) { gfx_flush(); gfx_rapi->set_scissor(rdp.scissor.x, rdp.scissor.y, rdp.scissor.width, rdp.scissor.height); rendering_state.scissor = rdp.scissor; } rdp.viewport_or_scissor_changed = false; } uint32_t cc_id = rdp.combine_mode; bool use_alpha = (rdp.other_mode_l & (G_BL_A_MEM << 18)) == 0; bool use_fog = (rdp.other_mode_l >> 30) == G_BL_CLR_FOG; bool texture_edge = (rdp.other_mode_l & CVG_X_ALPHA) == CVG_X_ALPHA; if (texture_edge) { use_alpha = true; } if (use_alpha) cc_id |= SHADER_OPT_ALPHA; if (use_fog) cc_id |= SHADER_OPT_FOG; if (texture_edge) cc_id |= SHADER_OPT_TEXTURE_EDGE; if (!use_alpha) { cc_id &= ~0xfff000; } struct ColorCombiner *comb = gfx_lookup_or_create_color_combiner(cc_id); struct ShaderProgram *prg = comb->prg; if (prg != rendering_state.shader_program) { gfx_flush(); gfx_rapi->unload_shader(rendering_state.shader_program); gfx_rapi->load_shader(prg); rendering_state.shader_program = prg; } if (use_alpha != rendering_state.alpha_blend) { gfx_flush(); gfx_rapi->set_use_alpha(use_alpha); rendering_state.alpha_blend = use_alpha; } uint8_t num_inputs; bool used_textures[2]; gfx_rapi->shader_get_info(prg, &num_inputs, used_textures); for (int i = 0; i < 2; i++) { if (used_textures[i]) { if (rdp.textures_changed[i]) { gfx_flush(); import_texture(i); rdp.textures_changed[i] = false; } bool linear_filter = (rdp.other_mode_h & (3U << G_MDSFT_TEXTFILT)) != G_TF_POINT; if (linear_filter != rendering_state.textures[i]->linear_filter || rdp.texture_tile.cms != rendering_state.textures[i]->cms || rdp.texture_tile.cmt != rendering_state.textures[i]->cmt) { gfx_flush(); gfx_rapi->set_sampler_parameters(i, linear_filter, rdp.texture_tile.cms, rdp.texture_tile.cmt); rendering_state.textures[i]->linear_filter = linear_filter; rendering_state.textures[i]->cms = rdp.texture_tile.cms; rendering_state.textures[i]->cmt = rdp.texture_tile.cmt; } } } bool use_texture = used_textures[0] || used_textures[1]; uint32_t tex_width = (rdp.texture_tile.lrs - rdp.texture_tile.uls + 4) / 4; uint32_t tex_height = (rdp.texture_tile.lrt - rdp.texture_tile.ult + 4) / 4; bool z_is_from_0_to_1 = gfx_rapi->z_is_from_0_to_1(); for (int i = 0; i < 3; i++) { float z = v_arr[i]->z, w = v_arr[i]->w; if (z_is_from_0_to_1) { z = (z + w) / 2.0f; } buf_vbo[buf_vbo_len++] = v_arr[i]->x; buf_vbo[buf_vbo_len++] = v_arr[i]->y; buf_vbo[buf_vbo_len++] = z; buf_vbo[buf_vbo_len++] = w; if (use_texture) { float u = (v_arr[i]->u - rdp.texture_tile.uls * 8) / 32.0f; float v = (v_arr[i]->v - rdp.texture_tile.ult * 8) / 32.0f; if ((rdp.other_mode_h & (3U << G_MDSFT_TEXTFILT)) != G_TF_POINT) { // Linear filter adds 0.5f to the coordinates u += 0.5f; v += 0.5f; } buf_vbo[buf_vbo_len++] = u / tex_width; buf_vbo[buf_vbo_len++] = v / tex_height; } if (use_fog) { buf_vbo[buf_vbo_len++] = rdp.fog_color.r / 255.0f; buf_vbo[buf_vbo_len++] = rdp.fog_color.g / 255.0f; buf_vbo[buf_vbo_len++] = rdp.fog_color.b / 255.0f; buf_vbo[buf_vbo_len++] = v_arr[i]->color.a / 255.0f; // fog factor (not alpha) } for (int j = 0; j < num_inputs; j++) { struct RGBA *color; struct RGBA tmp; for (int k = 0; k < 1 + (use_alpha ? 1 : 0); k++) { switch (comb->shader_input_mapping[k][j]) { case CC_PRIM: color = &rdp.prim_color; break; case CC_SHADE: color = &v_arr[i]->color; break; case CC_ENV: color = &rdp.env_color; break; case CC_LOD: { float distance_frac = (v1->w - 3000.0f) / 3000.0f; if (distance_frac < 0.0f) distance_frac = 0.0f; if (distance_frac > 1.0f) distance_frac = 1.0f; tmp.r = tmp.g = tmp.b = tmp.a = distance_frac * 255.0f; color = &tmp; break; } default: memset(&tmp, 0, sizeof(tmp)); color = &tmp; break; } if (k == 0) { buf_vbo[buf_vbo_len++] = color->r / 255.0f; buf_vbo[buf_vbo_len++] = color->g / 255.0f; buf_vbo[buf_vbo_len++] = color->b / 255.0f; } else { if (use_fog && color == &v_arr[i]->color) { // Shade alpha is 100% for fog buf_vbo[buf_vbo_len++] = 1.0f; } else { buf_vbo[buf_vbo_len++] = color->a / 255.0f; } } } } /*struct RGBA *color = &v_arr[i]->color; buf_vbo[buf_vbo_len++] = color->r / 255.0f; buf_vbo[buf_vbo_len++] = color->g / 255.0f; buf_vbo[buf_vbo_len++] = color->b / 255.0f; buf_vbo[buf_vbo_len++] = color->a / 255.0f;*/ } if (++buf_vbo_num_tris == MAX_BUFFERED) { gfx_flush(); } } static void gfx_sp_geometry_mode(uint32_t clear, uint32_t set) { rsp.geometry_mode &= ~clear; rsp.geometry_mode |= set; } static void gfx_calc_and_set_viewport(const Vp_t *viewport) { // 2 bits fraction float width = 2.0f * viewport->vscale[0] / 4.0f; float height = 2.0f * viewport->vscale[1] / 4.0f; float x = (viewport->vtrans[0] / 4.0f) - width / 2.0f; float y = SCREEN_HEIGHT - ((viewport->vtrans[1] / 4.0f) + height / 2.0f); width *= RATIO_X; height *= RATIO_Y; x *= RATIO_X; y *= RATIO_Y; rdp.viewport.x = x; rdp.viewport.y = y; rdp.viewport.width = width; rdp.viewport.height = height; rdp.viewport_or_scissor_changed = true; } static void gfx_sp_movemem(uint8_t index, uint8_t offset, const void* data) { switch (index) { case G_MV_VIEWPORT: gfx_calc_and_set_viewport((const Vp_t *) data); break; #if 0 case G_MV_LOOKATY: case G_MV_LOOKATX: memcpy(rsp.current_lookat + (index - G_MV_LOOKATY) / 2, data, sizeof(Light_t)); //rsp.lights_changed = 1; break; #endif #ifdef F3DEX_GBI_2 case G_MV_LIGHT: { int lightidx = offset / 24 - 2; if (lightidx >= 0 && lightidx <= MAX_LIGHTS) { // skip lookat // NOTE: reads out of bounds if it is an ambient light memcpy(rsp.current_lights + lightidx, data, sizeof(Light_t)); } break; } #else case G_MV_L0: case G_MV_L1: case G_MV_L2: // NOTE: reads out of bounds if it is an ambient light memcpy(rsp.current_lights + (index - G_MV_L0) / 2, data, sizeof(Light_t)); break; #endif } } static void gfx_sp_moveword(uint8_t index, uint16_t offset, uint32_t data) { switch (index) { case G_MW_NUMLIGHT: #ifdef F3DEX_GBI_2 rsp.current_num_lights = data / 24 + 1; // add ambient light #else // Ambient light is included // The 31th bit is a flag that lights should be recalculated rsp.current_num_lights = (data - 0x80000000U) / 32; #endif rsp.lights_changed = 1; break; case G_MW_FOG: rsp.fog_mul = (int16_t)(data >> 16); rsp.fog_offset = (int16_t)data; break; } } static void gfx_sp_texture(uint16_t sc, uint16_t tc, uint8_t level, uint8_t tile, uint8_t on) { rsp.texture_scaling_factor.s = sc; rsp.texture_scaling_factor.t = tc; } static void gfx_dp_set_scissor(uint32_t mode, uint32_t ulx, uint32_t uly, uint32_t lrx, uint32_t lry) { float x = ulx / 4.0f * RATIO_X; float y = (SCREEN_HEIGHT - lry / 4.0f) * RATIO_Y; float width = (lrx - ulx) / 4.0f * RATIO_X; float height = (lry - uly) / 4.0f * RATIO_Y; rdp.scissor.x = x; rdp.scissor.y = y; rdp.scissor.width = width; rdp.scissor.height = height; rdp.viewport_or_scissor_changed = true; } static void gfx_dp_set_texture_image(uint32_t format, uint32_t size, uint32_t width, const void* addr) { rdp.texture_to_load.addr = addr; rdp.texture_to_load.siz = size; } static void gfx_dp_set_tile(uint8_t fmt, uint32_t siz, uint32_t line, uint32_t tmem, uint8_t tile, uint32_t palette, uint32_t cmt, uint32_t maskt, uint32_t shiftt, uint32_t cms, uint32_t masks, uint32_t shifts) { SUPPORT_CHECK(siz != G_IM_SIZ_32b); if (tile == G_TX_RENDERTILE) { SUPPORT_CHECK(palette == 0); // palette should set upper 4 bits of color index in 4b mode rdp.texture_tile.fmt = fmt; rdp.texture_tile.siz = siz; rdp.texture_tile.cms = cms; rdp.texture_tile.cmt = cmt; rdp.texture_tile.line_size_bytes = line * 8; rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } if (tile == G_TX_LOADTILE) { rdp.texture_to_load.tile_number = tmem / 256; } } static void gfx_dp_set_tile_size(uint8_t tile, uint16_t uls, uint16_t ult, uint16_t lrs, uint16_t lrt) { if (tile == G_TX_RENDERTILE) { rdp.texture_tile.uls = uls; rdp.texture_tile.ult = ult; rdp.texture_tile.lrs = lrs; rdp.texture_tile.lrt = lrt; rdp.textures_changed[0] = true; rdp.textures_changed[1] = true; } } static void gfx_dp_load_tlut(uint8_t tile, uint32_t high_index) { SUPPORT_CHECK(tile == G_TX_LOADTILE); SUPPORT_CHECK(rdp.texture_to_load.siz == G_IM_SIZ_16b); rdp.palette = rdp.texture_to_load.addr; } static void gfx_dp_load_block(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t lrs, uint32_t dxt) { if (tile == 1) return; SUPPORT_CHECK(tile == G_TX_LOADTILE); SUPPORT_CHECK(uls == 0); SUPPORT_CHECK(ult == 0); // The lrs field rather seems to be number of pixels to load uint32_t word_size_shift; switch (rdp.texture_to_load.siz) { case G_IM_SIZ_4b: word_size_shift = 0; // Or -1? It's unused in SM64 anyway. break; case G_IM_SIZ_8b: word_size_shift = 0; break; case G_IM_SIZ_16b: word_size_shift = 1; break; case G_IM_SIZ_32b: word_size_shift = 2; break; } uint32_t size_bytes = (lrs + 1) << word_size_shift; rdp.loaded_texture[rdp.texture_to_load.tile_number].size_bytes = size_bytes; assert(size_bytes <= 4096 && "bug: too big texture"); rdp.loaded_texture[rdp.texture_to_load.tile_number].addr = rdp.texture_to_load.addr; rdp.textures_changed[rdp.texture_to_load.tile_number] = true; } static uint8_t color_comb_component(uint32_t v) { switch (v) { case G_CCMUX_TEXEL0: return CC_TEXEL0; case G_CCMUX_TEXEL1: return CC_TEXEL1; case G_CCMUX_PRIMITIVE: return CC_PRIM; case G_CCMUX_SHADE: return CC_SHADE; case G_CCMUX_ENVIRONMENT: return CC_ENV; case G_CCMUX_TEXEL0_ALPHA: return CC_TEXEL0A; case G_CCMUX_LOD_FRACTION: return CC_LOD; default: return CC_0; } } static inline uint32_t color_comb(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { return color_comb_component(a) | (color_comb_component(b) << 3) | (color_comb_component(c) << 6) | (color_comb_component(d) << 9); } static void gfx_dp_set_combine_mode(uint32_t rgb, uint32_t alpha) { rdp.combine_mode = rgb | (alpha << 12); } static void gfx_dp_set_env_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.env_color.r = r; rdp.env_color.g = g; rdp.env_color.b = b; rdp.env_color.a = a; } static void gfx_dp_set_prim_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.prim_color.r = r; rdp.prim_color.g = g; rdp.prim_color.b = b; rdp.prim_color.a = a; } static void gfx_dp_set_fog_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { rdp.fog_color.r = r; rdp.fog_color.g = g; rdp.fog_color.b = b; rdp.fog_color.a = a; } static void gfx_dp_set_fill_color(uint32_t packed_color) { uint16_t col16 = (uint16_t)packed_color; uint32_t r = col16 >> 11; uint32_t g = (col16 >> 6) & 0x1f; uint32_t b = (col16 >> 1) & 0x1f; uint32_t a = col16 & 1; rdp.fill_color.r = SCALE_5_8(r); rdp.fill_color.g = SCALE_5_8(g); rdp.fill_color.b = SCALE_5_8(b); rdp.fill_color.a = a * 255; } static void gfx_draw_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry) { uint32_t saved_other_mode_h = rdp.other_mode_h; uint32_t cycle_type = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)); if (cycle_type == G_CYC_COPY) { rdp.other_mode_h = (rdp.other_mode_h & ~(3U << G_MDSFT_TEXTFILT)) | G_TF_POINT; } // U10.2 coordinates float ulxf = ulx; float ulyf = uly; float lrxf = lrx; float lryf = lry; ulxf = ulxf / (4.0f * HALF_SCREEN_WIDTH) - 1.0f; ulyf = -(ulyf / (4.0f * HALF_SCREEN_HEIGHT)) + 1.0f; lrxf = lrxf / (4.0f * HALF_SCREEN_WIDTH) - 1.0f; lryf = -(lryf / (4.0f * HALF_SCREEN_HEIGHT)) + 1.0f; ulxf = gfx_adjust_x_for_aspect_ratio(ulxf); lrxf = gfx_adjust_x_for_aspect_ratio(lrxf); struct LoadedVertex* ul = &rsp.loaded_vertices[MAX_VERTICES + 0]; struct LoadedVertex* ll = &rsp.loaded_vertices[MAX_VERTICES + 1]; struct LoadedVertex* lr = &rsp.loaded_vertices[MAX_VERTICES + 2]; struct LoadedVertex* ur = &rsp.loaded_vertices[MAX_VERTICES + 3]; ul->x = ulxf; ul->y = ulyf; ul->z = -1.0f; ul->w = 1.0f; ll->x = ulxf; ll->y = lryf; ll->z = -1.0f; ll->w = 1.0f; lr->x = lrxf; lr->y = lryf; lr->z = -1.0f; lr->w = 1.0f; ur->x = lrxf; ur->y = ulyf; ur->z = -1.0f; ur->w = 1.0f; // The coordinates for texture rectangle shall bypass the viewport setting struct XYWidthHeight default_viewport = {0, 0, gfx_current_dimensions.width, gfx_current_dimensions.height}; struct XYWidthHeight viewport_saved = rdp.viewport; uint32_t geometry_mode_saved = rsp.geometry_mode; rdp.viewport = default_viewport; rdp.viewport_or_scissor_changed = true; rsp.geometry_mode = 0; gfx_sp_tri1(MAX_VERTICES + 0, MAX_VERTICES + 1, MAX_VERTICES + 3); gfx_sp_tri1(MAX_VERTICES + 1, MAX_VERTICES + 2, MAX_VERTICES + 3); rsp.geometry_mode = geometry_mode_saved; rdp.viewport = viewport_saved; rdp.viewport_or_scissor_changed = true; if (cycle_type == G_CYC_COPY) { rdp.other_mode_h = saved_other_mode_h; } } static void gfx_dp_texture_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry, uint8_t tile, int16_t uls, int16_t ult, int16_t dsdx, int16_t dtdy, bool flip) { uint32_t saved_combine_mode = rdp.combine_mode; if ((rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_COPY) { // Per RDP Command Summary Set Tile's shift s and this dsdx should be set to 4 texels // Divide by 4 to get 1 instead dsdx >>= 2; // Color combiner is turned off in copy mode gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_TEXEL0), color_comb(0, 0, 0, G_ACMUX_TEXEL0)); // Per documentation one extra pixel is added in this modes to each edge lrx += 1 << 2; lry += 1 << 2; } // uls and ult are S10.5 // dsdx and dtdy are S5.10 // lrx, lry, ulx, uly are U10.2 // lrs, lrt are S10.5 if (flip) { dsdx = -dsdx; dtdy = -dtdy; } int16_t width = !flip ? lrx - ulx : lry - uly; int16_t height = !flip ? lry - uly : lrx - ulx; float lrs = ((uls << 7) + dsdx * width) >> 7; float lrt = ((ult << 7) + dtdy * height) >> 7; struct LoadedVertex* ul = &rsp.loaded_vertices[MAX_VERTICES + 0]; struct LoadedVertex* ll = &rsp.loaded_vertices[MAX_VERTICES + 1]; struct LoadedVertex* lr = &rsp.loaded_vertices[MAX_VERTICES + 2]; struct LoadedVertex* ur = &rsp.loaded_vertices[MAX_VERTICES + 3]; ul->u = uls; ul->v = ult; lr->u = lrs; lr->v = lrt; if (!flip) { ll->u = uls; ll->v = lrt; ur->u = lrs; ur->v = ult; } else { ll->u = lrs; ll->v = ult; ur->u = uls; ur->v = lrt; } gfx_draw_rectangle(ulx, uly, lrx, lry); rdp.combine_mode = saved_combine_mode; } static void gfx_dp_fill_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry) { if (rdp.color_image_address == rdp.z_buf_address) { // Don't clear Z buffer here since we already did it with glClear return; } uint32_t mode = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)); if (mode == G_CYC_COPY || mode == G_CYC_FILL) { // Per documentation one extra pixel is added in this modes to each edge lrx += 1 << 2; lry += 1 << 2; } for (int i = MAX_VERTICES; i < MAX_VERTICES + 4; i++) { struct LoadedVertex* v = &rsp.loaded_vertices[i]; v->color = rdp.fill_color; } uint32_t saved_combine_mode = rdp.combine_mode; gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_SHADE), color_comb(0, 0, 0, G_ACMUX_SHADE)); gfx_draw_rectangle(ulx, uly, lrx, lry); rdp.combine_mode = saved_combine_mode; } static void gfx_dp_set_z_image(void *z_buf_address) { rdp.z_buf_address = z_buf_address; } static void gfx_dp_set_color_image(uint32_t format, uint32_t size, uint32_t width, void* address) { rdp.color_image_address = address; } static void gfx_sp_set_other_mode(uint32_t shift, uint32_t num_bits, uint64_t mode) { uint64_t mask = (((uint64_t)1 << num_bits) - 1) << shift; uint64_t om = rdp.other_mode_l | ((uint64_t)rdp.other_mode_h << 32); om = (om & ~mask) | mode; rdp.other_mode_l = (uint32_t)om; rdp.other_mode_h = (uint32_t)(om >> 32); } static inline void *seg_addr(uintptr_t w1) { return (void *) w1; } #define C0(pos, width) ((cmd->words.w0 >> (pos)) & ((1U << width) - 1)) #define C1(pos, width) ((cmd->words.w1 >> (pos)) & ((1U << width) - 1)) static void gfx_run_dl(Gfx* cmd) { int dummy = 0; for (;;) { uint32_t opcode = cmd->words.w0 >> 24; switch (opcode) { // RSP commands: case G_MTX: #ifdef F3DEX_GBI_2 gfx_sp_matrix(C0(0, 8) ^ G_MTX_PUSH, (const int32_t *) seg_addr(cmd->words.w1)); #else gfx_sp_matrix(C0(16, 8), (const int32_t *) seg_addr(cmd->words.w1)); #endif break; case (uint8_t)G_POPMTX: #ifdef F3DEX_GBI_2 gfx_sp_pop_matrix(cmd->words.w1 / 64); #else gfx_sp_pop_matrix(1); #endif break; case G_MOVEMEM: #ifdef F3DEX_GBI_2 gfx_sp_movemem(C0(0, 8), C0(8, 8) * 8, seg_addr(cmd->words.w1)); #else gfx_sp_movemem(C0(16, 8), 0, seg_addr(cmd->words.w1)); #endif break; case (uint8_t)G_MOVEWORD: #ifdef F3DEX_GBI_2 gfx_sp_moveword(C0(16, 8), C0(0, 16), cmd->words.w1); #else gfx_sp_moveword(C0(0, 8), C0(8, 16), cmd->words.w1); #endif break; case (uint8_t)G_TEXTURE: #ifdef F3DEX_GBI_2 gfx_sp_texture(C1(16, 16), C1(0, 16), C0(11, 3), C0(8, 3), C0(1, 7)); #else gfx_sp_texture(C1(16, 16), C1(0, 16), C0(11, 3), C0(8, 3), C0(0, 8)); #endif break; case G_VTX: #ifdef F3DEX_GBI_2 gfx_sp_vertex(C0(12, 8), C0(1, 7) - C0(12, 8), seg_addr(cmd->words.w1)); #elif defined(F3DEX_GBI) || defined(F3DLP_GBI) gfx_sp_vertex(C0(10, 6), C0(16, 8) / 2, seg_addr(cmd->words.w1)); #else gfx_sp_vertex((C0(0, 16)) / sizeof(Vtx), C0(16, 4), seg_addr(cmd->words.w1)); #endif break; case G_DL: if (C0(16, 1) == 0) { // Push return address gfx_run_dl((Gfx *)seg_addr(cmd->words.w1)); } else { cmd = (Gfx *)seg_addr(cmd->words.w1); --cmd; // increase after break } break; case (uint8_t)G_ENDDL: return; #ifdef F3DEX_GBI_2 case G_GEOMETRYMODE: gfx_sp_geometry_mode(~C0(0, 24), cmd->words.w1); break; #else case (uint8_t)G_SETGEOMETRYMODE: gfx_sp_geometry_mode(0, cmd->words.w1); break; case (uint8_t)G_CLEARGEOMETRYMODE: gfx_sp_geometry_mode(cmd->words.w1, 0); break; #endif case (uint8_t)G_TRI1: #ifdef F3DEX_GBI_2 gfx_sp_tri1(C0(16, 8) / 2, C0(8, 8) / 2, C0(0, 8) / 2); #elif defined(F3DEX_GBI) || defined(F3DLP_GBI) gfx_sp_tri1(C1(16, 8) / 2, C1(8, 8) / 2, C1(0, 8) / 2); #else gfx_sp_tri1(C1(16, 8) / 10, C1(8, 8) / 10, C1(0, 8) / 10); #endif break; #if defined(F3DEX_GBI) || defined(F3DLP_GBI) case (uint8_t)G_TRI2: gfx_sp_tri1(C0(16, 8) / 2, C0(8, 8) / 2, C0(0, 8) / 2); gfx_sp_tri1(C1(16, 8) / 2, C1(8, 8) / 2, C1(0, 8) / 2); break; #endif case (uint8_t)G_SETOTHERMODE_L: #ifdef F3DEX_GBI_2 gfx_sp_set_other_mode(31 - C0(8, 8) - C0(0, 8), C0(0, 8) + 1, cmd->words.w1); #else gfx_sp_set_other_mode(C0(8, 8), C0(0, 8), cmd->words.w1); #endif break; case (uint8_t)G_SETOTHERMODE_H: #ifdef F3DEX_GBI_2 gfx_sp_set_other_mode(63 - C0(8, 8) - C0(0, 8), C0(0, 8) + 1, (uint64_t) cmd->words.w1 << 32); #else gfx_sp_set_other_mode(C0(8, 8) + 32, C0(0, 8), (uint64_t) cmd->words.w1 << 32); #endif break; // RDP Commands: case G_SETTIMG: gfx_dp_set_texture_image(C0(21, 3), C0(19, 2), C0(0, 10), seg_addr(cmd->words.w1)); break; case G_LOADBLOCK: gfx_dp_load_block(C1(24, 3), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_SETTILE: gfx_dp_set_tile(C0(21, 3), C0(19, 2), C0(9, 9), C0(0, 9), C1(24, 3), C1(20, 4), C1(18, 2), C1(14, 4), C1(10, 4), C1(8, 2), C1(4, 4), C1(0, 4)); break; case G_SETTILESIZE: gfx_dp_set_tile_size(C1(24, 3), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_LOADTLUT: gfx_dp_load_tlut(C1(24, 3), C1(14, 10)); break; case G_SETENVCOLOR: gfx_dp_set_env_color(C1(24, 8), C1(16, 8), C1(8, 8), C1(0, 8)); break; case G_SETPRIMCOLOR: gfx_dp_set_prim_color(C1(24, 8), C1(16, 8), C1(8, 8), C1(0, 8)); break; case G_SETFOGCOLOR: gfx_dp_set_fog_color(C1(24, 8), C1(16, 8), C1(8, 8), C1(0, 8)); break; case G_SETFILLCOLOR: gfx_dp_set_fill_color(cmd->words.w1); break; case G_SETCOMBINE: gfx_dp_set_combine_mode( color_comb(C0(20, 4), C1(28, 4), C0(15, 5), C1(15, 3)), color_comb(C0(12, 3), C1(12, 3), C0(9, 3), C1(9, 3))); /*color_comb(C0(5, 4), C1(24, 4), C0(0, 5), C1(6, 3)), color_comb(C1(21, 3), C1(3, 3), C1(18, 3), C1(0, 3)));*/ break; // G_SETPRIMCOLOR, G_CCMUX_PRIMITIVE, G_ACMUX_PRIMITIVE, is used by Goddard // G_CCMUX_TEXEL1, LOD_FRACTION is used in Bowser room 1 case G_TEXRECT: case G_TEXRECTFLIP: { int32_t lrx, lry, tile, ulx, uly; uint32_t uls, ult, dsdx, dtdy; #ifdef F3DEX_GBI_2E lrx = (int32_t)(C0(0, 24) << 8) >> 8; lry = (int32_t)(C1(0, 24) << 8) >> 8; ++cmd; ulx = (int32_t)(C0(0, 24) << 8) >> 8; uly = (int32_t)(C1(0, 24) << 8) >> 8; ++cmd; uls = C0(16, 16); ult = C0(0, 16); dsdx = C1(16, 16); dtdy = C1(0, 16); #else lrx = C0(12, 12); lry = C0(0, 12); tile = C1(24, 3); ulx = C1(12, 12); uly = C1(0, 12); ++cmd; uls = C1(16, 16); ult = C1(0, 16); ++cmd; dsdx = C1(16, 16); dtdy = C1(0, 16); #endif gfx_dp_texture_rectangle(ulx, uly, lrx, lry, tile, uls, ult, dsdx, dtdy, opcode == G_TEXRECTFLIP); break; } case G_FILLRECT: #ifdef F3DEX_GBI_2E { int32_t lrx, lry, ulx, uly; lrx = (int32_t)(C0(0, 24) << 8) >> 8; lry = (int32_t)(C1(0, 24) << 8) >> 8; ++cmd; ulx = (int32_t)(C0(0, 24) << 8) >> 8; uly = (int32_t)(C1(0, 24) << 8) >> 8; gfx_dp_fill_rectangle(ulx, uly, lrx, lry); break; } #else gfx_dp_fill_rectangle(C1(12, 12), C1(0, 12), C0(12, 12), C0(0, 12)); break; #endif case G_SETSCISSOR: gfx_dp_set_scissor(C1(24, 2), C0(12, 12), C0(0, 12), C1(12, 12), C1(0, 12)); break; case G_SETZIMG: gfx_dp_set_z_image(seg_addr(cmd->words.w1)); break; case G_SETCIMG: gfx_dp_set_color_image(C0(21, 3), C0(19, 2), C0(0, 11), seg_addr(cmd->words.w1)); break; } ++cmd; } } static void gfx_sp_reset() { rsp.modelview_matrix_stack_size = 1; rsp.current_num_lights = 2; rsp.lights_changed = true; } void gfx_get_dimensions(uint32_t *width, uint32_t *height) { gfx_wapi->get_dimensions(width, height); } void gfx_init(struct GfxWindowManagerAPI *wapi, struct GfxRenderingAPI *rapi) { gfx_wapi = wapi; gfx_rapi = rapi; gfx_wapi->init(); gfx_rapi->init(); // Used in the 120 star TAS static uint32_t precomp_shaders[] = { 0x01200200, 0x00000045, 0x00000200, 0x01200a00, 0x00000a00, 0x01a00045, 0x00000551, 0x01045045, 0x05a00a00, 0x01200045, 0x05045045, 0x01045a00, 0x01a00a00, 0x0000038d, 0x01081081, 0x0120038d, 0x03200045, 0x03200a00, 0x01a00a6f, 0x01141045, 0x07a00a00, 0x05200200, 0x03200200 }; for (size_t i = 0; i < sizeof(precomp_shaders) / sizeof(uint32_t); i++) { gfx_lookup_or_create_shader_program(precomp_shaders[i]); } } void gfx_start_frame(void) { gfx_wapi->handle_events(); gfx_wapi->get_dimensions(&gfx_current_dimensions.width, &gfx_current_dimensions.height); if (gfx_current_dimensions.height == 0) { // Avoid division by zero gfx_current_dimensions.height = 1; } gfx_current_dimensions.aspect_ratio = (float)gfx_current_dimensions.width / (float)gfx_current_dimensions.height; } void gfx_run(Gfx *commands) { gfx_sp_reset(); //puts("New frame"); if (!gfx_wapi->start_frame()) { dropped_frame = true; return; } dropped_frame = false; double t0 = gfx_wapi->get_time(); gfx_rapi->start_frame(); gfx_run_dl(commands); gfx_flush(); double t1 = gfx_wapi->get_time(); //printf("Process %f %f\n", t1, t1 - t0); gfx_wapi->swap_buffers_begin(); } void gfx_end_frame(void) { if (!dropped_frame) { gfx_wapi->swap_buffers_end(); } }