From 7d3c139d0c297cd767585bd11fe85078e2959941 Mon Sep 17 00:00:00 2001 From: angrylion Date: Fri, 29 Oct 2021 12:42:41 +0000 Subject: [PATCH] rdp: much better support for writes to hidden bits with 8-bit frame buffers; basic emulation of the VI nearest-neighbor filtering glitch. git-svn-id: https://svn.code.sf.net/p/angrylions-stuff/code/trunk@114 74b3176d-45c4-4b80-9022-e50c74be2ad4 --- mylittle-nocomment/n64video.cpp | 798 ++++++++++++++++++++++++++------ 1 file changed, 651 insertions(+), 147 deletions(-) diff --git a/mylittle-nocomment/n64video.cpp b/mylittle-nocomment/n64video.cpp index 6f9f1a8..929af89 100644 --- a/mylittle-nocomment/n64video.cpp +++ b/mylittle-nocomment/n64video.cpp @@ -408,6 +408,7 @@ STRICTINLINE void combiner_2cycle_cycle0(int adseed, UINT32 cvg, INT32* acalpha) STRICTINLINE void combiner_2cycle_cycle1(int adseed, UINT32* curpixel_cvg); STRICTINLINE int blender_1cycle(UINT32* fr, UINT32* fg, UINT32* fb, int dith, UINT32 blend_en, UINT32 prewrap, UINT32 curpixel_cvg, UINT32 curpixel_cvbit); STRICTINLINE int blender_2cycle_cycle0(UINT32 curpixel_cvg, UINT32 curpixel_cvbit); +STRICTINLINE void blender_2cycle_cycle0_gval(UINT32 curpixel); STRICTINLINE void blender_2cycle_cycle1(UINT32* fr, UINT32* fg, UINT32* fb, int dith, UINT32 blend_en, UINT32 prewrap); STRICTINLINE void texture_pipeline_cycle(COLOR* TEX, COLOR* prev, INT32 SSS, INT32 SST, UINT32 tilenum, UINT32 cycle); STRICTINLINE void tc_pipeline_copy(INT32* sss0, INT32* sss1, INT32* sss2, INT32* sss3, INT32* sst, int tilenum); @@ -425,21 +426,25 @@ STRICTINLINE int alpha_compare(INT32 comb_alpha); STRICTINLINE INT32 color_combiner_equation(INT32 a, INT32 b, INT32 c, INT32 d); STRICTINLINE INT32 alpha_combiner_equation(INT32 a, INT32 b, INT32 c, INT32 d); STRICTINLINE void blender_equation_cycle0(int* r, int* g, int* b); +STRICTINLINE void blender_equation_cycle0_gval(int* g); STRICTINLINE void blender_equation_cycle0_2(int* r, int* g, int* b); +STRICTINLINE void blender_equation_cycle0_2_gval(int* g); STRICTINLINE void blender_equation_cycle1(int* r, int* g, int* b); +STRICTINLINE void blender_equation_cycle1_gval(int* g); STRICTINLINE UINT32 rightcvghex(UINT32 x, UINT32 fmask); STRICTINLINE UINT32 leftcvghex(UINT32 x, UINT32 fmask); STRICTINLINE INT32 chroma_key_min(COLOR* col); +void complete_delayed_hbwrites(int delayedhbwidx); STRICTINLINE void compute_cvg_noflip(INT32 scanline); STRICTINLINE void compute_cvg_flip(INT32 scanline); -INLINE void fbwrite_4(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg); -INLINE void fbwrite_8(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg); -INLINE void fbwrite_16(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg); -INLINE void fbwrite_32(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg); -INLINE void fbfill_4(UINT32 curpixel); -INLINE void fbfill_8(UINT32 curpixel); -INLINE void fbfill_16(UINT32 curpixel); -INLINE void fbfill_32(UINT32 curpixel); +INLINE void fbwrite_4(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx); +INLINE void fbwrite_8(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx); +INLINE void fbwrite_16(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx); +INLINE void fbwrite_32(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx); +INLINE void fbfill_4(UINT32 curpixel, int flip, int* delayedhbwidx); +INLINE void fbfill_8(UINT32 curpixel, int flip, int* delayedhbwidx); +INLINE void fbfill_16(UINT32 curpixel, int flip, int* delayedhbwidx); +INLINE void fbfill_32(UINT32 curpixel, int flip, int* delayedhbwidx); INLINE void fbread_4(UINT32 num, UINT32* curpixel_memcvg); INLINE void fbread_8(UINT32 num, UINT32* curpixel_memcvg); INLINE void fbread_16(UINT32 num, UINT32* curpixel_memcvg); @@ -448,6 +453,11 @@ INLINE void fbread2_4(UINT32 num, UINT32* curpixel_memcvg); INLINE void fbread2_8(UINT32 num, UINT32* curpixel_memcvg); INLINE void fbread2_16(UINT32 num, UINT32* curpixel_memcvg); INLINE void fbread2_32(UINT32 num, UINT32* curpixel_memcvg); +STRICTINLINE void pairwrite8(UINT32 in, UINT32 rval, int flip, int* delayedhbwidx); +STRICTINLINE void pairwrite16(UINT32 in, UINT16 rval, UINT8 hval, int iscolor); +STRICTINLINE void pairwrite32(UINT32 in, INT32 cval, UINT8 hval0, UINT8 hval1); +void rejected_hbwrite_1cycle(int cdith, UINT32 blend_en, UINT32 prewrap, UINT32 curpixel, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx); +void rejected_hbwrite_2cycle(int cdith, UINT32 blend_en, UINT32 prewrap, UINT32 curpixel, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx); STRICTINLINE UINT32 z_decompress(UINT32 rawz); STRICTINLINE UINT32 dz_decompress(UINT32 compresseddz); STRICTINLINE UINT32 dz_compress(UINT32 value); @@ -486,6 +496,7 @@ STRICTINLINE void video_max_optimized(UINT32* Pixels, UINT32* penumin, UINT32* p INLINE void calculate_clamp_diffs(UINT32 tile); INLINE void calculate_tile_derivs(UINT32 tile); STRICTINLINE void rgb_dither(int* r, int* g, int* b, int dith); +STRICTINLINE void rgb_dither_gval(int* g, int dith); STRICTINLINE void get_dither_noise(int x, int y, int* cdith, int* adith); STRICTINLINE void vi_vl_lerp(CCVG* up, CCVG down, UINT32 frac); STRICTINLINE void rgba_correct(int offx, int offy, int r, int g, int b, int a, UINT32 cvg); @@ -543,11 +554,16 @@ static void (*fbread2_func[4])(UINT32, UINT32*) = fbread2_4, fbread2_8, fbread2_16, fbread2_32 }; -static void (*fbwrite_func[4])(UINT32, UINT32, UINT32, UINT32, UINT32, UINT32, UINT32) = +static void (*fbwrite_func[4])(UINT32, UINT32, UINT32, UINT32, UINT32, UINT32, UINT32, int, int*) = { fbwrite_4, fbwrite_8, fbwrite_16, fbwrite_32 }; +static void (*fbfill_func[4])(UINT32, int, int*) = +{ + fbfill_4, fbfill_8, fbfill_16, fbfill_32 +}; + static void (*tcdiv_func[2])(INT32, INT32, INT32, INT32*, INT32*) = { tcdiv_nopersp, tcdiv_persp @@ -555,7 +571,8 @@ static void (*tcdiv_func[2])(INT32, INT32, INT32, INT32*, INT32*) = void (*fbread1_ptr)(UINT32, UINT32*) = fbread_func[0]; void (*fbread2_ptr)(UINT32, UINT32*) = fbread2_func[0]; -void (*fbwrite_ptr)(UINT32, UINT32, UINT32, UINT32, UINT32, UINT32, UINT32) = fbwrite_func[0]; +void (*fbwrite_ptr)(UINT32, UINT32, UINT32, UINT32, UINT32, UINT32, UINT32, int, int*) = fbwrite_func[0]; +void (*fbfill_ptr)(UINT32, int, int*) = fbfill_func[0]; void (*tcdiv_ptr)(INT32, INT32, INT32, INT32*, INT32*) = tcdiv_func[0]; @@ -585,6 +602,8 @@ UINT16 deltaz_comparator_lut[0x10000]; INT32 clamp_t_diff[8]; INT32 clamp_s_diff[8]; CVtcmaskDERIVATIVE cvarray[0x100]; +UINT8 oldhb[8]; +int last_overwriting_scanline; #define RDRAM_MASK 0x00ffffff @@ -603,27 +622,9 @@ CVtcmaskDERIVATIVE cvarray[0x100]; else {(rdst) = (hdst) = 0;} \ } -#define PAIRWRITE16(in, rval, hval) \ -{ \ - (in) &= (RDRAM_MASK >> 1); \ - if ((in) <= idxlim16) {rdram_16[(in) ^ WORD_ADDR_XOR] = (rval); hidden_bits[(in)] = (hval);} \ -} - -#define PAIRWRITE32(in, rval, hval0, hval1) \ -{ \ - (in) &= (RDRAM_MASK >> 2); \ - if ((in) <= idxlim32) {rdram[(in)] = (rval); hidden_bits[(in) << 1] = (hval0); hidden_bits[((in) << 1) + 1] = (hval1);} \ -} - -#define PAIRWRITE8(in, rval) \ -{ \ - (in) &= RDRAM_MASK; \ - if ((in) <= plim) {if ((in) & 1) hidden_bits[(in) >> 1] = ((rval) & 1) ? 3 : 0; rdram_8[(in) ^ BYTE_ADDR_XOR] = (rval); } \ -} - struct onetime { - int nolerp, copymstrangecrashes, fillmcrashes, fillmbitcrashes, syncfullcrash, vbusclock; + int copymstrangecrashes, fillmcrashes, fillmbitcrashes, syncfullcrash, vbusclock; } onetimewarnings; extern INT32 pitchindwords; @@ -968,6 +969,7 @@ int rdp_init() memset(&memory_color, 0, sizeof(COLOR)); memset(&pre_memory_color, 0, sizeof(COLOR)); + memset(&oldhb, 0, sizeof(oldhb)); rdp_pipeline_crashed = 0; memset(&onetimewarnings, 0, sizeof(onetimewarnings)); @@ -995,6 +997,7 @@ int rdp_init() rdram_8 = (UINT8*)rdram; rdram_16 = (UINT16*)rdram; + return 0; } @@ -1048,11 +1051,10 @@ int rdp_update() UINT32 x_add = vi_x_scale & 0xfff; - if (!lerp_en && vitype == 2 && !onetimewarnings.nolerp && h_start < 0x80 && x_add <= 0x200) - { - popmessage("Disabling VI interpolation in 16-bit color modes causes glitches on hardware if h_start is less than 128 pixels and x_scale is less or equal to 0x200."); - onetimewarnings.nolerp = 1; - } + int vinnglitch = 0; + + if (!lerp_en && (vitype & 2) && h_start < (vitype == 2 ? 0x80 : 0x40) && x_add <= 0x200) + vinnglitch = vitype == 2 ? 0x40 : 0x20; h_start -= (ispal ? 128 : 108); @@ -1098,11 +1100,12 @@ int rdp_update() { prevserrate = 1; prevvicurrent = vi_v_current_line & 1; - oldvstart = v_start; } else prevserrate = 0; + oldvstart = v_start; + int linecount = serration_pulses ? (pitchindwords << 1) : pitchindwords; int lineshifter = serration_pulses ? 0 : 1; int twolines = serration_pulses ? 1 : 0; @@ -1487,6 +1490,27 @@ int rdp_update() vi_vl_lerp(&nextcolor, scannextcolor, yfrac); vi_vl_lerp(&color, nextcolor, xfrac); } + else if (vinnglitch) + { + if (prev_line_x & vinnglitch) + color.r = color.g = color.b = 0; + else + { + + cur_x = pixels + (prev_line_x & (vinnglitch - 1)); + vi_fetch_filter_ptr(&color, frame_buffer, cur_x, fsaa, dither_filter, vres, 0); + + if (divot) + { + CCVG prevcol, nextcol; + prev_x = pixels + ((prev_line_x - 1) & (vinnglitch - 1)); + next_x = pixels + (line_x & (vinnglitch - 1)); + vi_fetch_filter_ptr(&prevcol, frame_buffer, prev_x, fsaa, dither_filter, vres, 0); + vi_fetch_filter_ptr(&nextcol, frame_buffer, next_x, fsaa, dither_filter, vres, 0); + divot_filter(&color, color, prevcol, nextcol); + } + } + } r = color.r; g = color.g; @@ -2283,11 +2307,33 @@ STRICTINLINE int blender_2cycle_cycle0(UINT32 curpixel_cvg, UINT32 curpixel_cvbi blended_pixel_color.r = r; blended_pixel_color.g = g; blended_pixel_color.b = b; + } + + return wen; +} + +STRICTINLINE void blender_2cycle_cycle0_gval(UINT32 curpixel) +{ + int g, fbsel; + UINT32 fb; + + fbsel = fb_size; + + if (fb_size == PIXEL_SIZE_8BIT) + { + fb = fb_address + curpixel; + if (!(fb & 1)) + fbsel--; } - memory_color = pre_memory_color; + if (fbsel & 1) + { + inv_pixel_color.a = (~(*blender1b_a[0])) & 0xff; + + blender_equation_cycle0_2_gval(&g); - return wen; + blended_pixel_color.g = g; + } } STRICTINLINE void blender_2cycle_cycle1(UINT32* fr, UINT32* fg, UINT32* fb, int dith, UINT32 blend_en, UINT32 prewrap) @@ -4826,7 +4872,9 @@ void render_spans_1cycle_complete(int start, int end, int tilenum, int flip) int curpixel = 0; int x, length, scdiff, lodlength; UINT32 fir, fig, fib; - + int delayedhbwidx = -1; + int wen; + for (i = start; i <= end; i++) { if (span[i].validline) @@ -4935,15 +4983,19 @@ void render_spans_1cycle_complete(int start, int end, int tilenum, int flip) fbread1_ptr(curpixel, &curpixel_memcvg); - if (z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg)) + wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); + + if (wen) + wen = blender_1cycle(&fir, &fig, &fib, cdith, blend_en, prewrap, curpixel_cvg, curpixel_cvbit); + + if (wen) { - if (blender_1cycle(&fir, &fig, &fib, cdith, blend_en, prewrap, curpixel_cvg, curpixel_cvbit)) - { - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); - } + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_1cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); r += drinc; g += dginc; @@ -4957,6 +5009,10 @@ void render_spans_1cycle_complete(int start, int end, int tilenum, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); + } void render_spans_1cycle_notexel1(int start, int end, int tilenum, int flip) @@ -5019,6 +5075,8 @@ void render_spans_1cycle_notexel1(int start, int end, int tilenum, int flip) int curpixel = 0; int x, length, scdiff, lodlength; UINT32 fir, fig, fib; + int delayedhbwidx = -1; + int wen; for (i = start; i <= end; i++) { @@ -5101,20 +5159,22 @@ void render_spans_1cycle_notexel1(int start, int end, int tilenum, int flip) get_dither_noise(x, i, &cdith, &adith); combiner_1cycle(adith, &curpixel_cvg); - + fbread1_ptr(curpixel, &curpixel_memcvg); - if (z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg)) - { - - if (blender_1cycle(&fir, &fig, &fib, cdith, blend_en, prewrap, curpixel_cvg, curpixel_cvbit)) - { - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); + wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); + + if (wen) + wen = blender_1cycle(&fir, &fig, &fib, cdith, blend_en, prewrap, curpixel_cvg, curpixel_cvbit); - } + if (wen) + { + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_1cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); s += dsinc; t += dtinc; @@ -5131,6 +5191,9 @@ void render_spans_1cycle_notexel1(int start, int end, int tilenum, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } void render_spans_1cycle_notex(int start, int end, int tilenum, int flip) @@ -5183,6 +5246,8 @@ void render_spans_1cycle_notex(int start, int end, int tilenum, int flip) int curpixel = 0; int x, length, scdiff; UINT32 fir, fig, fib; + int delayedhbwidx = -1; + int wen; for (i = start; i <= end; i++) { @@ -5244,15 +5309,21 @@ void render_spans_1cycle_notex(int start, int end, int tilenum, int flip) combiner_1cycle(adith, &curpixel_cvg); fbread1_ptr(curpixel, &curpixel_memcvg); - if (z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg)) + + wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); + + if (wen) + wen = blender_1cycle(&fir, &fig, &fib, cdith, blend_en, prewrap, curpixel_cvg, curpixel_cvbit); + + if (wen) { - if (blender_1cycle(&fir, &fig, &fib, cdith, blend_en, prewrap, curpixel_cvg, curpixel_cvbit)) - { - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); - } + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_1cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + r += drinc; g += dginc; b += dbinc; @@ -5265,6 +5336,9 @@ void render_spans_1cycle_notex(int start, int end, int tilenum, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } void render_spans_2cycle_complete(int start, int end, int tilenum, int flip) @@ -5335,6 +5409,7 @@ void render_spans_2cycle_complete(int start, int end, int tilenum, int flip) int x, length, scdiff, lodlength; UINT32 fir, fig, fib; + int delayedhbwidx = -1; for (i = start; i <= end; i++) { @@ -5459,10 +5534,13 @@ void render_spans_2cycle_complete(int start, int end, int tilenum, int flip) wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); if (wen) - wen &= blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); - else - memory_color = pre_memory_color; + wen = blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); + if (!wen && i >= last_overwriting_scanline) + blender_2cycle_cycle0_gval(curpixel); + + memory_color = pre_memory_color; + x += xinc; r += drinc; @@ -5486,17 +5564,17 @@ void render_spans_2cycle_complete(int start, int end, int tilenum, int flip) combiner_2cycle_cycle0(adith, nextpixel_cvg, &acalpha); if (wen) - { - wen &= alpha_compare(acalpha); + wen = alpha_compare(acalpha); - if (wen) - { - blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); - } + if (wen) + { + blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_2cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); if (other_modes.f.getditherlevel < 2) get_dither_noise(x, i, &cdith, &adith); @@ -5510,6 +5588,9 @@ void render_spans_2cycle_complete(int start, int end, int tilenum, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } static void render_spans_2cycle_notexelnext(int start, int end, int tilenum, int flip) @@ -5577,6 +5658,7 @@ static void render_spans_2cycle_notexelnext(int start, int end, int tilenum, int int x, length, scdiff; UINT32 fir, fig, fib; + int delayedhbwidx = -1; for (i = start; i <= end; i++) { @@ -5665,9 +5747,12 @@ static void render_spans_2cycle_notexelnext(int start, int end, int tilenum, int wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); if (wen) - wen &= blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); - else - memory_color = pre_memory_color; + wen = blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); + + if (!wen && i >= last_overwriting_scanline) + blender_2cycle_cycle0_gval(curpixel); + + memory_color = pre_memory_color; x += xinc; @@ -5700,18 +5785,18 @@ static void render_spans_2cycle_notexelnext(int start, int end, int tilenum, int combiner_2cycle_cycle0(adith, nextpixel_cvg, &acalpha); - if (wen) - { - wen &= alpha_compare(acalpha); + if (wen) + wen = alpha_compare(acalpha); - if (wen) - { - blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); - } + if (wen) + { + blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_2cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); if (other_modes.f.getditherlevel < 2) get_dither_noise(x, i, &cdith, &adith); @@ -5725,6 +5810,9 @@ static void render_spans_2cycle_notexelnext(int start, int end, int tilenum, int } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } static void render_spans_2cycle_notexel1(int start, int end, int tilenum, int flip) @@ -5791,6 +5879,7 @@ static void render_spans_2cycle_notexel1(int start, int end, int tilenum, int fl int x, length, scdiff; UINT32 fir, fig, fib; + int delayedhbwidx = -1; for (i = start; i <= end; i++) { @@ -5878,9 +5967,12 @@ static void render_spans_2cycle_notexel1(int start, int end, int tilenum, int fl wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); if (wen) - wen &= blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); - else - memory_color = pre_memory_color; + wen = blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); + + if (!wen && i >= last_overwriting_scanline) + blender_2cycle_cycle0_gval(curpixel); + + memory_color = pre_memory_color; x += xinc; @@ -5912,18 +6004,18 @@ static void render_spans_2cycle_notexel1(int start, int end, int tilenum, int fl combiner_2cycle_cycle0(adith, nextpixel_cvg, &acalpha); - if (wen) - { - wen &= alpha_compare(acalpha); + if (wen) + wen = alpha_compare(acalpha); - if (wen) - { - blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); - } + if (wen) + { + blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_2cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); if (other_modes.f.getditherlevel < 2) get_dither_noise(x, i, &cdith, &adith); @@ -5937,6 +6029,9 @@ static void render_spans_2cycle_notexel1(int start, int end, int tilenum, int fl } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } static void render_spans_2cycle_notex(int start, int end, int tilenum, int flip) @@ -5993,6 +6088,7 @@ static void render_spans_2cycle_notex(int start, int end, int tilenum, int flip) int x, length, scdiff; UINT32 fir, fig, fib; + int delayedhbwidx = -1; for (i = start; i <= end; i++) { @@ -6065,9 +6161,12 @@ static void render_spans_2cycle_notex(int start, int end, int tilenum, int flip) wen = z_compare(zbcur, sz, dzpix, dzpixenc, &blend_en, &prewrap, &curpixel_cvg, curpixel_memcvg); if (wen) - wen &= blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); - else - memory_color = pre_memory_color; + wen = blender_2cycle_cycle0(curpixel_cvg, curpixel_cvbit); + + if (!wen && i >= last_overwriting_scanline) + blender_2cycle_cycle0_gval(curpixel); + + memory_color = pre_memory_color; x += xinc; @@ -6087,18 +6186,18 @@ static void render_spans_2cycle_notex(int start, int end, int tilenum, int flip) combiner_2cycle_cycle0(adith, nextpixel_cvg, &acalpha); - if (wen) - { - wen &= alpha_compare(acalpha); + if (wen) + wen = alpha_compare(acalpha); - if (wen) - { - blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); - fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg); - if (other_modes.z_update_en) - z_store(zbcur, sz, dzpixenc); - } + if (wen) + { + blender_2cycle_cycle1(&fir, &fig, &fib, cdith, blend_en, prewrap); + fbwrite_ptr(curpixel, fir, fig, fib, blend_en, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); + if (other_modes.z_update_en) + z_store(zbcur, sz, dzpixenc); } + else if (i >= last_overwriting_scanline) + rejected_hbwrite_2cycle(cdith, blend_en, prewrap, curpixel, curpixel_cvg, curpixel_memcvg, flip, &delayedhbwidx); if (other_modes.f.getditherlevel < 2) get_dither_noise(x, i, &cdith, &adith); @@ -6112,6 +6211,9 @@ static void render_spans_2cycle_notex(int start, int end, int tilenum, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } void render_spans_fill(int start, int end, int flip) @@ -6133,6 +6235,7 @@ void render_spans_fill(int start, int end, int flip) int prevxstart; int curpixel = 0; int x, length; + int delayedhbwidx = -1; for (i = start; i <= end; i++) { @@ -6159,22 +6262,7 @@ void render_spans_fill(int start, int end, int flip) for (j = 0; j <= length; j++) { - switch(fb_size) - { - case 0: - fbfill_4(curpixel); - break; - case 1: - fbfill_8(curpixel); - break; - case 2: - fbfill_16(curpixel); - break; - case 3: - default: - fbfill_32(curpixel); - break; - } + fbfill_ptr(curpixel, flip, &delayedhbwidx); x += xinc; curpixel += xinc; @@ -6191,6 +6279,9 @@ void render_spans_fill(int start, int end, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } void render_spans_copy(int start, int end, int tilenum, int flip) @@ -6239,6 +6330,7 @@ void render_spans_copy(int start, int end, int tilenum, int flip) int bytesperpixel = (fb_size == PIXEL_SIZE_4BIT) ? 1 : (1 << (fb_size - 1)); UINT32 fbendptr = 0; INT32 threshold, currthreshold; + int delayedhbwidx = -1; #define PIXELS_TO_BYTES_SPECIAL4(pix, siz) ((siz) ? PIXELS_TO_BYTES(pix, siz) : (pix)) @@ -6322,9 +6414,8 @@ void render_spans_copy(int start, int end, int tilenum, int flip) { tempbyte = (UINT32)((copyqword >> (k << 3)) & 0xff); if (alphamask & (1 << k)) - { - PAIRWRITE8(tempdword, tempbyte); - } + pairwrite8(tempdword, tempbyte, flip, &delayedhbwidx); + k--; tempdword += xinc; copywmask--; @@ -6337,6 +6428,9 @@ void render_spans_copy(int start, int end, int tilenum, int flip) } } } + + if (delayedhbwidx >= 0 && flip && fb_size == PIXEL_SIZE_8BIT) + complete_delayed_hbwrites(delayedhbwidx); } void loading_pipeline(int start, int end, int tilenum, int coord_quad, int ltlut) @@ -6600,6 +6694,15 @@ static void edgewalker_for_prims(INT32* ewdata) other_modes.f.stalederivs = 0; } + if (fb_size == PIXEL_SIZE_8BIT) + { + oldhb[0] &= ~2; + oldhb[4] &= ~2; + } + + int oldhb_diff = fb_size == PIXEL_SIZE_16BIT ? 7 : 3; + last_overwriting_scanline = -1; + flip = (ewdata[0] & 0x800000) ? 1 : 0; max_level = (ewdata[0] >> 19) & 7; tilenum = (ewdata[0] >> 16) & 7; @@ -6896,6 +6999,9 @@ static void edgewalker_for_prims(INT32* ewdata) span[j].rx = minxhx; span[j].validline = !allinval && !allover && !allunder && (!scfield || (scfield && !(sckeepodd ^ (j & 1)))); + if (span[j].validline && fb_size > PIXEL_SIZE_8BIT) + if ((span[j].lx - span[j].rx) >= oldhb_diff) + last_overwriting_scanline = j; } } @@ -6979,6 +7085,10 @@ static void edgewalker_for_prims(INT32* ewdata) span[j].lx = minxmx; span[j].rx = maxxhx; span[j].validline = !allinval && !allover && !allunder && (!scfield || (scfield && !(sckeepodd ^ (j & 1)))); + + if (span[j].validline && fb_size > PIXEL_SIZE_8BIT) + if ((span[j].rx - span[j].lx) >= oldhb_diff) + last_overwriting_scanline = j; } } @@ -6997,14 +7107,17 @@ static void edgewalker_for_prims(INT32* ewdata) switch(other_modes.cycle_type) { case CYCLE_TYPE_1: + switch (other_modes.f.textureuselevel0) { case 0: render_spans_1cycle_complete(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; case 1: render_spans_1cycle_notexel1(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; case 2: default: render_spans_1cycle_notex(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; } + break; case CYCLE_TYPE_2: + switch (other_modes.f.textureuselevel1) { case 0: render_spans_2cycle_complete(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; @@ -7012,6 +7125,7 @@ static void edgewalker_for_prims(INT32* ewdata) case 2: render_spans_2cycle_notexel1(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; case 3: default: render_spans_2cycle_notex(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; } + break; case CYCLE_TYPE_COPY: render_spans_copy(yhlimit >> 2, yllimit >> 2, tilenum, flip); break; case CYCLE_TYPE_FILL: render_spans_fill(yhlimit >> 2, yllimit >> 2, flip); break; @@ -8201,6 +8315,7 @@ static void rdp_set_color_image(UINT32 w1, UINT32 w2) fbread1_ptr = fbread_func[fb_size]; fbread2_ptr = fbread2_func[fb_size]; fbwrite_ptr = fbwrite_func[fb_size]; + fbfill_ptr = fbfill_func[fb_size]; } static void (*const rdp_command_table[64])(UINT32 w1, UINT32 w2) = @@ -8399,6 +8514,33 @@ STRICTINLINE void blender_equation_cycle0(int* r, int* g, int* b) } } +STRICTINLINE void blender_equation_cycle0_gval(int* g) +{ + int blend1a, blend2a; + int blg, sum; + blend1a = *blender1b_a[0] >> 3; + blend2a = *blender2b_a[0] >> 3; + + int mulb; + if (blender2b_a[0] == &memory_color.a) + { + blend1a = (blend1a >> blshifta) & 0x3C; + blend2a = (blend2a >> blshiftb) | 3; + } + + mulb = blend2a + 1; + + blg = (*blender1a_g[0]) * blend1a + (*blender2a_g[0]) * mulb; + + if (!other_modes.force_blend) + { + sum = ((blend1a & ~3) + (blend2a & ~3) + 4) << 9; + *g = bldiv_hwaccurate_table[sum | ((blg >> 2) & 0x7ff)]; + } + else + *g = (blg >> 5) & 0xff; +} + STRICTINLINE void blender_equation_cycle0_2(int* r, int* g, int* b) { int blend1a, blend2a; @@ -8417,6 +8559,22 @@ STRICTINLINE void blender_equation_cycle0_2(int* r, int* g, int* b) *b = (((*blender1a_b[0]) * blend1a + (*blender2a_b[0]) * blend2a) >> 5) & 0xff; } +STRICTINLINE void blender_equation_cycle0_2_gval(int* g) +{ + int blend1a, blend2a; + blend1a = *blender1b_a[0] >> 3; + blend2a = *blender2b_a[0] >> 3; + + if (blender2b_a[0] == &memory_color.a) + { + blend1a = (blend1a >> pastblshifta) & 0x3C; + blend2a = (blend2a >> pastblshiftb) | 3; + } + + blend2a += 1; + *g = (((*blender1a_g[0]) * blend1a + (*blender2a_g[0]) * blend2a) >> 5) & 0xff; +} + STRICTINLINE void blender_equation_cycle1(int* r, int* g, int* b) { int blend1a, blend2a; @@ -8451,6 +8609,32 @@ STRICTINLINE void blender_equation_cycle1(int* r, int* g, int* b) } } +STRICTINLINE void blender_equation_cycle1_gval(int* g) +{ + int blend1a, blend2a; + int blg, sum; + blend1a = *blender1b_a[1] >> 3; + blend2a = *blender2b_a[1] >> 3; + + int mulb; + if (blender2b_a[1] == &memory_color.a) + { + blend1a = (blend1a >> blshifta) & 0x3C; + blend2a = (blend2a >> blshiftb) | 3; + } + + mulb = blend2a + 1; + blg = (*blender1a_g[1]) * blend1a + (*blender2a_g[1]) * mulb; + + if (!other_modes.force_blend) + { + sum = ((blend1a & ~3) + (blend2a & ~3) + 4) << 9; + *g = bldiv_hwaccurate_table[sum | ((blg >> 2) & 0x7ff)]; + } + else + *g = (blg >> 5) & 0xff; +} + STRICTINLINE UINT32 rightcvghex(UINT32 x, UINT32 fmask) { UINT32 covered = ((x & 7) + 1) >> 1; @@ -8606,21 +8790,21 @@ int rdp_close() return 0; } -INLINE void fbwrite_4(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg) +INLINE void fbwrite_4(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx) { UINT32 fb = fb_address + curpixel; RWRITEADDR8(fb, 0); } -INLINE void fbwrite_8(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg) +INLINE void fbwrite_8(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx) { UINT32 fb = fb_address + curpixel; - PAIRWRITE8(fb, (fb & 1) ? (g & 0xff) : (r & 0xff)); + pairwrite8(fb, (fb & 1) ? (g & 0xff) : (r & 0xff), flip, delayedhbwidx); } -INLINE void fbwrite_16(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg) +INLINE void fbwrite_16(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx) { #undef CVG_DRAW #ifdef CVG_DRAW @@ -8648,10 +8832,11 @@ INLINE void fbwrite_16(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 ble rval = finalcolor|(finalcvg >> 2); hval = finalcvg & 3; - PAIRWRITE16(fb, rval, hval); + + pairwrite16(fb, rval, hval, 1); } -INLINE void fbwrite_32(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg) +INLINE void fbwrite_32(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 blend_en, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx) { UINT32 fb = (fb_address >> 2) + curpixel; @@ -8661,23 +8846,23 @@ INLINE void fbwrite_32(UINT32 curpixel, UINT32 r, UINT32 g, UINT32 b, UINT32 ble finalcolor = (r << 24) | (g << 16) | (b << 8); finalcolor |= (finalcvg << 5); - PAIRWRITE32(fb, finalcolor, (g & 1) ? 3 : 0, 0); + pairwrite32(fb, finalcolor, (g & 1) ? 3 : 0, 0); } -INLINE void fbfill_4(UINT32 curpixel) +INLINE void fbfill_4(UINT32 curpixel, int flip, int* delayedhbwidx) { rdp_pipeline_crashed = 1; } -INLINE void fbfill_8(UINT32 curpixel) +INLINE void fbfill_8(UINT32 curpixel, int flip, int* delayedhbwidx) { UINT32 fb = fb_address + curpixel; UINT32 val = (fill_color >> (((fb & 3) ^ 3) << 3)) & 0xff; - PAIRWRITE8(fb, val); + pairwrite8(fb, val, flip, delayedhbwidx); } -INLINE void fbfill_16(UINT32 curpixel) +INLINE void fbfill_16(UINT32 curpixel, int flip, int* delayedhbwidx) { UINT16 val; UINT8 hval; @@ -8687,13 +8872,14 @@ INLINE void fbfill_16(UINT32 curpixel) else val = (fill_color >> 16) & 0xffff; hval = ((val & 1) << 1) | (val & 1); - PAIRWRITE16(fb, val, hval); + + pairwrite16(fb, val, hval, 1); } -INLINE void fbfill_32(UINT32 curpixel) +INLINE void fbfill_32(UINT32 curpixel, int flip, int* delayedhbwidx) { UINT32 fb = (fb_address >> 2) + curpixel; - PAIRWRITE32(fb, fill_color, (fill_color & 0x10000) ? 3 : 0, (fill_color & 0x1) ? 3 : 0); + pairwrite32(fb, fill_color, (fill_color & 0x10000) ? 3 : 0, (fill_color & 0x1) ? 3 : 0); } INLINE void fbread_4(UINT32 curpixel, UINT32* curpixel_memcvg) @@ -9073,7 +9259,7 @@ STRICTINLINE void z_store(UINT32 zcurpixel, UINT32 z, int dzpixenc) UINT16 zval = z_com_table[z & 0x3ffff]|(dzpixenc >> 2); UINT8 hval = dzpixenc & 3; - PAIRWRITE16(zcurpixel, zval, hval); + pairwrite16(zcurpixel, zval, hval, 0); } STRICTINLINE UINT32 dz_decompress(UINT32 dz_compressed) @@ -9291,6 +9477,290 @@ STRICTINLINE INT32 normalize_dzpix(INT32 sum) return 0; } +STRICTINLINE void pairwrite8(UINT32 in, UINT32 rval, int flip, int* delayedhbwidx) +{ + in &= RDRAM_MASK; + if (!flip) + { + if (in <= plim) + { + int hdst8 = hidden_bits[in >> 1]; + + if (!(in & 1)) + { + if (hdst8 & HB_CLEAN) + hidden_bits[in >> 1] = (rdram_16[(in >> 1) ^ WORD_ADDR_XOR] & 1) ? 1 : 0; + else + hidden_bits[in >> 1] &= ~2; + + hidden_bits[in >> 1] |= oldhb[(in >> 1) & 7] & 2; + + } + else + { + if (hdst8 & HB_CLEAN) + hidden_bits[in >> 1] = (rdram_16[(in >> 1) ^ WORD_ADDR_XOR] & 1) ? 2 : 0; + else + hidden_bits[in >> 1] &= ~1; + + hidden_bits[in >> 1] |= rval & 1; + } + rdram_8[in ^ BYTE_ADDR_XOR] = rval; + } + + if (in & 1) + { + oldhb[(in >> 1) & 7] = (rval & 1) ? 3 : 0; + + } + } + else + { + + if (*delayedhbwidx >= 0 && (UINT32)*delayedhbwidx < in) + { + if ((UINT32)*delayedhbwidx <= plim) + { + + int oldhbidx = *delayedhbwidx >> 1; + + hidden_bits[oldhbidx] &= ~2; + hidden_bits[oldhbidx] |= oldhb[oldhbidx & 7] & 2; + } + + *delayedhbwidx = -1; + } + + if (in & 1) + { + if (in <= plim) + { + + if (*delayedhbwidx >= 0) + hidden_bits[in >> 1] = (rval & 1) ? 3 : 0; + else + { + int hdst8 = hidden_bits[in >> 1]; + if (hdst8 & HB_CLEAN) + hidden_bits[in >> 1] = (rdram_16[(in >> 1) ^ WORD_ADDR_XOR] & 1) ? 2 : 0; + else + hidden_bits[in >> 1] &= ~1; + + hidden_bits[in >> 1] |= rval & 1; + } + + rdram_8[in ^ BYTE_ADDR_XOR] = rval; + } + + oldhb[(in >> 1) & 7] = (rval & 1) ? 3 : 0; + *delayedhbwidx = -1; + + } + else + { + if (in <= plim) + { + int hdst8 = hidden_bits[in >> 1]; + if (hdst8 & HB_CLEAN) + hidden_bits[in >> 1] = (rdram_16[(in >> 1) ^ WORD_ADDR_XOR] & 1) ? 3 : 0; + + rdram_8[in ^ BYTE_ADDR_XOR] = rval; + } + + *delayedhbwidx = in + 1; + } + } +} + +STRICTINLINE void pairwrite16(UINT32 in, UINT16 rval, UINT8 hval, int iscolor) +{ + in &= (RDRAM_MASK >> 1); + if (in <= idxlim16) + { + rdram_16[in ^ WORD_ADDR_XOR] = rval; + hidden_bits[in] = hval; + } + + if (iscolor) + { + oldhb[in & 7] = hval; + + } +} + +STRICTINLINE void pairwrite32(UINT32 in, INT32 cval, UINT8 hval0, UINT8 hval1) +{ + in &= (RDRAM_MASK >> 2); + if (in <= idxlim32) + { + rdram[in] = cval; + hidden_bits[in << 1] = hval0; + hidden_bits[(in << 1) + 1] = hval1; + } + + oldhb[(in << 1) & 7] = hval0; + oldhb[((in << 1) + 1) & 7] = hval1; +} + +void rejected_hbwrite_1cycle(int cdith, UINT32 blend_en, UINT32 prewrap, UINT32 curpixel, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx) +{ + int g, dontblend, gval; + UINT32 fb; + INT32 hval = 0; + int fbsel = fb_size; + + if (fb_size == PIXEL_SIZE_8BIT) + { + fb = fb_address + curpixel; + if (!(fb & 1)) + fbsel--; + } + + if (fbsel & 1) + { + if (!other_modes.color_on_cvg || prewrap) + { + dontblend = (other_modes.f.partialreject_1cycle && pixel_color.a >= 0xff); + if (!blend_en || dontblend) + g = *blender1a_g[0]; + else + { + inv_pixel_color.a = (~(*blender1b_a[0])) & 0xff; + + blender_equation_cycle0_gval(&g); + } + } + else + g = *blender2a_g[0]; + + if (other_modes.rgb_dither_sel != 3) + rgb_dither_gval(&g, cdith); + + gval = (g & 1) ? 3 : 0; + } + + switch (fbsel) + { + case PIXEL_SIZE_4BIT: + break; + case PIXEL_SIZE_8BIT: + if (flip && *delayedhbwidx >= 0) + { + if ((UINT32)*delayedhbwidx < fb) + { + if ((UINT32)*delayedhbwidx <= plim) + { + int oldhbidx = *delayedhbwidx >> 1; + hidden_bits[oldhbidx] &= ~2; + hidden_bits[oldhbidx] |= oldhb[oldhbidx & 7] & 2; + } + } + else if (fb <= plim) + { + + hidden_bits[fb >> 1] &= ~2; + hidden_bits[fb >> 1] |= gval & 2; + } + + *delayedhbwidx = -1; + } + + oldhb[(fb >> 1) & 7] = gval; + + break; + case PIXEL_SIZE_16BIT: + fb = (fb_address >> 1) + curpixel; + if (fb_format == FORMAT_RGBA) + hval = finalize_spanalpha(blend_en, curpixel_cvg, curpixel_memcvg) & 3; + oldhb[fb & 7] = hval; + + break; + case PIXEL_SIZE_32BIT: + fb = (fb_address >> 2) + curpixel; + oldhb[(fb << 1) & 7] = gval; + oldhb[((fb << 1) + 1) & 7] = 0; + break; + } +} + +void rejected_hbwrite_2cycle(int cdith, UINT32 blend_en, UINT32 prewrap, UINT32 curpixel, UINT32 curpixel_cvg, UINT32 curpixel_memcvg, int flip, int* delayedhbwidx) +{ + int g, dontblend, gval; + UINT32 fb; + INT32 hval = 0; + int fbsel = fb_size; + + if (fb_size == PIXEL_SIZE_8BIT) + { + fb = fb_address + curpixel; + if (!(fb & 1)) + fbsel--; + } + + if (fbsel & 1) + { + if (!other_modes.color_on_cvg || prewrap) + { + dontblend = (other_modes.f.partialreject_2cycle && pixel_color.a >= 0xff); + if (!blend_en || dontblend) + g = *blender1a_g[1]; + else + { + inv_pixel_color.a = (~(*blender1b_a[1])) & 0xff; + + blender_equation_cycle1_gval(&g); + } + } + else + g = *blender2a_g[1]; + + if (other_modes.rgb_dither_sel != 3) + rgb_dither_gval(&g, cdith); + + gval = (g & 1) ? 3 : 0; + } + + switch (fbsel) + { + case PIXEL_SIZE_4BIT: + break; + case PIXEL_SIZE_8BIT: + if (flip && *delayedhbwidx >= 0) + { + if ((UINT32)*delayedhbwidx < fb) + { + if ((UINT32)*delayedhbwidx <= plim) + { + int oldhbidx = *delayedhbwidx >> 1; + hidden_bits[oldhbidx] &= ~2; + hidden_bits[oldhbidx] |= oldhb[oldhbidx & 7] & 2; + } + } + else if (fb <= plim) + { + hidden_bits[fb >> 1] &= ~2; + hidden_bits[fb >> 1] |= gval & 2; + } + + *delayedhbwidx = -1; + } + + oldhb[(fb >> 1) & 7] = gval; + break; + case PIXEL_SIZE_16BIT: + fb = (fb_address >> 1) + curpixel; + if (fb_format == FORMAT_RGBA) + hval = finalize_spanalpha(blend_en, curpixel_cvg, curpixel_memcvg) & 3; + oldhb[fb & 7] = hval; + break; + case PIXEL_SIZE_32BIT: + fb = (fb_address >> 2) + curpixel; + oldhb[(fb << 1) & 7] = gval; + oldhb[((fb << 1) + 1) & 7] = 0; + break; + } +} + STRICTINLINE INT32 CLIP(INT32 value, INT32 min, INT32 max) { if (value < min) @@ -9848,6 +10318,26 @@ STRICTINLINE void rgb_dither(int* r, int* g, int* b, int dith) *b = *b + (ditherdiff & replacesign); } +STRICTINLINE void rgb_dither_gval(int* g, int dith) +{ + INT32 newg = *g; + INT32 gcomp; + + if (newg > 247) + newg = 255; + else + newg = (newg & 0xf8) + 8; + + if (other_modes.rgb_dither_sel != 2) + gcomp = dith; + else + gcomp = (dith >> 3) & 7; + + INT32 replacesign = (gcomp - (*g & 7)) >> 31; + INT32 ditherdiff = newg - *g; + *g = *g + (ditherdiff & replacesign); +} + STRICTINLINE void get_dither_noise(int x, int y, int* cdith, int* adith) { @@ -10547,6 +11037,7 @@ STRICTINLINE void tclod_1cycle_next(INT32* sss, INT32* sst, INT32 s, INT32 t, IN nextt = (span[nextscan].t + dtinc) >> 16; nexts = (span[nextscan].s + dsinc) >> 16; nextsw = (span[nextscan].w + dwinc) >> 16; + fart = (span[nextscan].t + (dtinc << 1)) >> 16; fars = (span[nextscan].s + (dsinc << 1)) >> 16; farsw = (span[nextscan].w + (dwinc << 1)) >> 16; @@ -10556,12 +11047,14 @@ STRICTINLINE void tclod_1cycle_next(INT32* sss, INT32* sst, INT32 s, INT32 t, IN nextt = span[nextscan].t >> 16; nexts = span[nextscan].s >> 16; nextsw = span[nextscan].w >> 16; + fart = (span[nextscan].t + dtinc) >> 16; fars = (span[nextscan].s + dsinc) >> 16; farsw = (span[nextscan].w + dwinc) >> 16; } else if (sigs->onelessthanmid) { + nextsw = (w + dwinc) >> 16; nexts = (s + dsinc) >> 16; nextt = (t + dtinc) >> 16; @@ -11263,6 +11756,17 @@ UINT32 compare_tri_command(UINT32 w0, UINT32 w1, UINT32 w2) return 0; } +void complete_delayed_hbwrites(int delayedhbwidx) +{ + if ((UINT32)delayedhbwidx <= plim) + { + int oldhbidx = delayedhbwidx >> 1; + + hidden_bits[oldhbidx] &= ~2; + hidden_bits[oldhbidx] |= oldhb[oldhbidx & 7] & 2; + } +} + void show_color(COLOR* col) { popmessage("R: 0x%x, G: 0x%x, B: 0x%x, A: 0x%x", col->r, col->g, col->b, col->a);