;-----------------------------------------------------------------------------
;
;   Monolith 5. Non-Perspective Correct Nearest Gouraud Modulated
;               Z buffer (LE or GT) 565.
;
;   Globals (ATTENTION)
;
;   StackPos - stack pos holder
;   uSpans - Number of spans to process
;   iSurfaceStep - what to add to screen pointer
;   iZStep - what to add to Z buffer pointer
;   uPix - Pixel Count
;
;   Register useage
;
;   mm6 - Contains gouraud color at all times.
;
;   Changes from general MMX code.
;   1)  Convert directly from 565 to internal format to remove
;       extra unpack.  Remove alpha set.
;   2)  Keep uB uG uR uA in MM6.
;   3)  Didnt need to save texture color or blended color so these
;       are kept in registers.
;   4)  All calls and jumps were removed.
;   5)  Removed alpha masking in in modulate code.
;
;-----------------------------------------------------------------------------

INCLUDE iammx.inc
INCLUDE offs_acp.inc



;TBD check to see if this value is correct.
COLOR_SHIFT equ 8


.586
.model flat


; Big seperating lines seperate code into span code
; and loop code.  If span and loop are not going to
; end up being combined then it will be easy to
; seperate the code.


.data

; Need externs for all of the variables that are needed for various beads

  EXTERN IncHighandLow16:MMWORD
  EXTERN UFracVFracMask:MMWORD
  EXTERN UV32to15Mask:MMWORD
  EXTERN Makelow16one:MMWORD
  EXTERN MaskKeepUValues:MMWORD
  EXTERN MaskKeepVValues:MMWORD
  EXTERN UFrac:MMWORD
  EXTERN VFrac:MMWORD
  EXTERN Zero:MMWORD
  EXTERN memD3DTFG_POINT:MMWORD
  EXTERN GiveUp:MMWORD
  EXTERN LastW:MMWORD
  EXTERN Val0x000a000a:MMWORD
  EXTERN Val0xffff:MMWORD
  EXTERN Val0x0000002000000020:MMWORD
  EXTERN Val0x0000ffff0000ffff:MMWORD


opt_MaskRed565to888 MMWORD 000000000000F800H

EXTERN MaskRed565to888:MMWORD
EXTERN MaskGreen565to888:MMWORD
EXTERN MaskBlue565to888:MMWORD

EXTERN MaskRed555to888:MMWORD
EXTERN MaskGreen555to888:MMWORD
EXTERN MaskBlue555to888:MMWORD

EXTERN MaskAlpha1555to8888:MMWORD
EXTERN MaskRed1555to8888:MMWORD
EXTERN MaskGreen1555to8888:MMWORD
EXTERN MaskBlue1555to8888:MMWORD

; TBD. I think that I want to do 0xffff instead of 0xff.  This will
; have to be checked.  There is a value very similiar to this in
; buf write.
EXTERN SetAlphato0xffff:MMWORD
EXTERN SetAlphato0xff:MMWORD

; TODO This equate are identical to the ones in texread.mas.  Maybe they should be in a common .inc file.
RedShift565to888     equ 8
GreenShift565to888   equ 5
BlueShift565to888    equ 3

RedShift555to888     equ 9
GreenShift555to888   equ 6
BlueShift555to888    equ 3

AlphaShift1555to8888 equ 16
RedShift1555to8888   equ 9
GreenShift1555to8888 equ 6
BlueShift1555to8888  equ 3

EXTERN Zero:MMWORD


EXTERN DW_One_One:MMWORD


EXTERN MaskOffAlpha:MMWORD
EXTERN ShiftTA:MMWORD
EXTERN Val0x00ff00ff00ff00ff:MMWORD
EXTERN Val0x000000ff00ff00ff:MMWORD
EXTERN Val0X0000000001000000:MMWORD
EXTERN AlphaVal128:MMWORD
EXTERN RGBVal128:MMWORD


    EXTERN  g_uDitherValue:MMWORD
    EXTERN  SetAlphato0xff:MMWORD
    EXTERN  u888to565RedBlueMask:MMWORD
    EXTERN  u888to565GreenMask:MMWORD
    EXTERN  u888to565Multiplier:MMWORD
    EXTERN  uVal0x000007ff03ff07ff:MMWORD
    EXTERN  uVal0x0000078003c00780:MMWORD
    EXTERN  u888to555RedBlueMask:MMWORD
    EXTERN  u888to555GreenMask:MMWORD
    EXTERN  u888to555Multiplier:MMWORD
    EXTERN  uVal0x000007ff07ff07ff:MMWORD
    EXTERN  uVal0x0000078007800780:MMWORD



;-----------------------------------------------------------------------------
; Span Variables
StackPos    dd  ?
uSpans      dd  ?
;-----------------------------------------------------------------------------

;-----------------------------------------------------------------------------
; Loop Variables

iSurfaceStep    dd  ?
iZStep          dd  ?
uPix            dd  ?

;-----------------------------------------------------------------------------

.code



PUBLIC _MMXMLRast_5
_MMXMLRast_5:
    push    ebp
    mov     StackPos, esp
    mov     eax, esp
    sub     esp, 0Ch        ; This will need to change if stack frame size changes.
    push    ebx
    push    esi
    push    edi

    ; Put pCtx into ebx
    mov     ebx, [eax+8]

    ;PD3DI_RASTPRIM pP = pCtx->pPrim;
    mov     ecx, [ebx+RASTCTX_pPrim]

    ;while (pP)
    ;{
PrimLoop:
    cmp     ecx, 0
    je      ExitPrimLoop

    ;UINT16 uSpans = pP->uSpans;
    movzx   eax, word ptr [ecx+RASTPRIM_uSpans]
    mov     uSpans, eax

    ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
    mov     ebp, ecx
    add     ebp, SIZEOF_RASTPRIM


    ;while (uSpans-- > 0)
    ;{
SpanLoop:
    mov     edx, uSpans
    mov     eax, edx
    dec     eax
    mov     uSpans, eax
    test    edx, edx
    jle     ExitSpanLoop

    ;pCtx->pfnBegin(pCtx, pP, pS);

;-----------------------------------------------------------------------------
;  LoopAny code inserted here.  This is to get rid of an extra
;  jump.
;-----------------------------------------------------------------------------

; Setup Code begins
    movzx   eax, word ptr [ebp+RASTSPAN_uPix]
    mov     uPix, eax


        movq    mm5, [ebp+RASTSPAN_iUoW1]

    psrad   mm5, TEX_TO_FINAL_SHIFT
    movq    [ebx+RASTCTX_SI+SPANITER_iU1], mm5


        ;pCtx->SI.iU2 = pS->iUoW2>>TEX_TO_FINAL_SHIFT;
        ;pCtx->SI.iV2 = pS->iVoW2>>TEX_TO_FINAL_SHIFT;

        ;movq    mm5, [ebp+RASTSPAN_iUoW2]


        ;pCtx->SI.iDW = 0x0;
        mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0

        ;pCtx->SI.iSpecialW = 0;
        mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
; -----


    ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
    ;{
    mov     eax, [ecx+RASTPRIM_uFlags]
    and     eax, D3DI_RASTPRIM_X_DEC
    test    eax, eax
    jz      LeftToRightSpan
        ;iZStep = -pCtx->iZStep;
        mov eax, [ebx+RASTCTX_iZStep]
        neg eax
        mov iZStep, eax
        ;iSurfaceStep = -pCtx->iSurfaceStep;
        mov eax, [ebx+RASTCTX_iSurfaceStep]
        neg eax
        mov iSurfaceStep, eax
    ;}
        jmp DoneSpanDirif
    ;else
    ;{
LeftToRightSpan:
        ;iZStep = pCtx->iZStep;
        mov eax, [ebx+RASTCTX_iZStep]
        mov iZStep, eax
        ;iSurfaceStep = pCtx->iSurfaceStep;
        mov eax, [ebx+RASTCTX_iSurfaceStep]
        mov iSurfaceStep, eax
    ;}
DoneSpanDirif:

movq      mm6, [ebp+RASTSPAN_uB]
; Setup Code Ends
; ----------------------------------------------------------------------------------------------------------------
; Loop Code Begins


PixelLoop:
    ; Ztestcode
    ; edx is uZ
    ; eax is uZB
    ; 16 bit unsigned format
    ;UINT16 uZ = (UINT16)(pS->uZ>>15);
    ;UINT16 uZB = *((UINT16*)pS->pZ);
    mov       edx, [ebp+RASTSPAN_uZ]
    movd      mm4, edx
    mov       esi, [ebp+RASTSPAN_pZ]
    shr       edx, 15
    movzx     eax, word ptr [esi]

    ;pS->uZ += pP->iDZDX;
    ;if ((pCtx->iZXorMask)^(uZ > uZB))
    ; !(uZ > uZB) <==>
    ; (uZ <= uZB) <==>
    ; (uZ < uZB+1) <==>
    ;
    sub     eax, edx
    paddd   mm4, [ecx+RASTPRIM_iDZDX]
    movd    [ebp+RASTSPAN_uZ], mm4
    xor     eax, [ebx+RASTCTX_iZXorMask]
    test    eax, eax
    js     FailLabel

    mov       word ptr [esi], dx

    ; texturecode

    mov   esi, [ebx+RASTCTX_pTexture]

    movq        mm5, MMWORD PTR Val0x000a000a  ; This is TEX_FINAL_SHIFT - 6 = 10.

    movd        mm4, [esi+SPANTEX_iShiftU]
    psubw       mm5, mm4
    movq        mm4, mm5
    pand        mm5, MMWORD PTR Val0xffff

    psrld       mm4, 16

    movd        mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
    psrad       mm1, mm5
    movd        mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
    psrad       mm2, mm4

    punpckldq   mm1, mm2

    movzx   edx, word ptr [esi+SPANTEX_iShiftPitch]
    add     edx, 16
    movd    mm2, edx
    movq    mm5, MMWORD ptr Makelow16one
    pslld   mm5, mm2
    por       mm5, MMWORD ptr Makelow16one
    psrad     mm1, 6
    packssdw  mm1, mm1  ; Value needs to be packed since all wrap/mirror
    movd      mm0, [esi+SPANTEX_uMaskU]     ; Load U and V mask
    movq      mm7, mm1
    movd      mm4, [esi+SPANTEX_iFlipMaskU]
    pand      mm7, mm4
    pcmpeqw   mm7, MMWORD PTR Zero
    pandn     mm7, mm0
    pand      mm1, mm0
    pxor      mm1, mm7
    movq    mm4, mm1

    pmaddwd   mm4, mm5  ; Throw in first address calculation.
    mov       edi, [esi+SPANTEX_pBits]
    movd      eax, mm4

    ; Read in texture color
    movzx   eax, word ptr [edi+2*eax]

    ; Convert from 565 to internal format
    movd    mm1, eax    ; Make two more copies of input color
    movq    mm2, mm1
    pand    mm1, dword ptr MaskGreen565to888  ; MaskGreen565to888 is in memory
    pand    mm2, dword ptr opt_MaskRed565to888    ; MaskRed565to888 in memory.
    psllq   mm2, 24     ;RedShift565to888   ; RedShift should be an immediate
    psllq   mm1, 13     ;GreenShift565to888
    shl     eax, 3      ;BlueShift565to888
    por     mm1, mm2
    and     eax, 0FFH   ;dword ptr MaskBlue565to888
    movd    mm2, eax
    por     mm2, mm1

    ;modulate

    movq      mm1, mm6
    psrlw     mm1, COLOR_SHIFT    ; COLOR_SHIFT is set to 8.
    pmullw    mm1, mm2


    ; convert back to 565


        mov     edi, [ebp+RASTSPAN_pSurface]

        psrlw   mm1, 8          ; Convert color1 from 8.8 two 0.8
        packuswb    mm1, mm7        ; pack one color
        movq    mm3, mm1
        pand    mm1, MMWORD PTR u888to565RedBlueMask
        pmaddwd mm1, MMWORD PTR u888to565Multiplier
        pand    mm3, MMWORD PTR u888to565GreenMask
        por     mm1, mm3
        psrld   mm1, 5

        movd    edx, mm1

        ; write pixel to screen.
        mov     [edi], dx


FailLabel:

        dec   uPix    ;// BUG BUG?? uPix should never start as zero should it?

        jle   ExitPixelLoop

    ; Keeping uB in MM6 so that dont need to store back and forth from memory.
    paddw   mm6, [ecx+RASTPRIM_iDBDX]

    movq    mm5, [ebp+RASTSPAN_iUoW1]
    paddd   mm5, [ecx+RASTPRIM_iDUoW1DX]
    movq    [ebp+RASTSPAN_iUoW1], mm5

    psrad   mm5, TEX_TO_FINAL_SHIFT
    movq    [ebx+RASTCTX_SI+SPANITER_iU1], mm5

        mov   eax, dword ptr [ebp+RASTSPAN_pZ]
        mov   edx, dword ptr [ebp+RASTSPAN_pSurface]

        add   eax, iZStep
        add   edx, iSurfaceStep

        mov   dword ptr [ebp+RASTSPAN_pZ], eax
        mov   dword ptr [ebp+RASTSPAN_pSurface], edx

    jmp   PixelLoop


ExitPixelLoop:
; Loop code ends

;-----------------------------------------------------------------------------
;  LoopAny code ends here
;-----------------------------------------------------------------------------

    add     ebp, SIZEOF_RASTSPAN

    jmp     SpanLoop
ExitSpanLoop:

    mov     ecx, [ecx+RASTPRIM_pNext]

    jmp     PrimLoop

ExitPrimLoop:

    emms
    xor     eax, eax

    pop     edi
    pop     esi
    pop     ebx
    mov     esp, StackPos
    pop     ebp
    ret

END