2025-04-27 07:49:33 -04:00

1282 lines
44 KiB
NASM
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

page ,132
;------------------------------Module-Header----------------------------;
; Module Name: line.asm
;
; Transparent and Opaque output for 1Bpp to 1Bpp drawing
;
; Created: 24-Aug-1994
; Author: Mark Enstrom
;
; Copyright (c) 1994-1999 Microsoft Corporation
;-----------------------------------------------------------------------;
.386
.model small,c
assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
assume fs:nothing,gs:nothing
.xlist
include callconv.inc
include gdii386.inc
include line.inc
spt_lEndOffset equ 000H
spt_ulEndMask equ 004H
spt_ulStartMask equ 008H
spt_ebp equ 00CH
spt_esi equ 010H
spt_edi equ 014H
spt_ebx equ 018H
spt_ulRetAddr equ 01CH
spt_pjSrcIn equ 020H
spt_SrcLeft equ 024H
spt_DeltaSrcIn equ 028H
spt_pjDstIn equ 02CH
spt_DstLeft equ 030H
spt_DstRight equ 034H
spt_DeltaDstIn equ 038H
spt_cy equ 03CH
spt_uF equ 040H
spt_uB equ 044H
spt_pS equ 048H
.list
;------------------------------------------------------------------------------;
.code
_TEXT$01 SEGMENT DWORD USE32 PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
;------------------------------------------------------------------------------;
;------------------------------------------------------------------------------;
; vSrcTranCopyS1D1
;
; Transparent copy from 1Bpp to 1Bpp (DWORD ALIGNED)
;
; Entry:
;
; pjSrcIn
; SrcLeft
; DeltaSrcIn
; pjDstIn
; DstLeft
; DstRight
; DeltaDstIn
; cy
; uF
; uB
; pS
;
;
; Returns:
; none
; Registers Destroyed:
; all
; Calls:
; None
; History:
;
; WARNING: If call parameters are changed, the ret statement must be fixed
;
;------------------------------------------------------------------------------;
public vSrcTranCopyS1D1@44
vSrcTranCopyS1D1@44 proc near
;
; use ebp as general register, use esp for parameter and local access
; save ebp,ebx,esi,edi
push ebx
push edi
push esi
push ebp
sub esp,3 * 4
;
; Start and end cases and masks. Start case is the first partial
; DWORD (if needed) and end case is the last partial DWORD (if needed)
;
mov ecx,spt_DstLeft[esp] ; Left (startint Dst Location)
sub edi,edi ; zero
not edi ; flag = 0xffffff
mov esi,edi ; flag = 0xffffff
and ecx,01Fh ; DstLeftAln,if 0 then don't make mask
jz @F
;
; ulStartMask >>= lStartCase, shift ulStartMask right by starting alignment,
; then BSWAP this back to little endian
;
shr edi,cl ; ulStartMask >> lStartCase
ror di,8 ; 0 1 2 3 -> 0 1 3 2
ror edi,16 ; 0 1 3 2 -> 3 2 0 1
ror di,8 ; 3 2 0 1 -> 3 2 1 0
@@:
mov eax,spt_DstRight[esp] ; right (last pixel)
and eax,01Fh ; DstRightAln = DstRight & 0x1F
jz @F ; if 0, mask = 0xffffffff
;
; ulEndMask = 0xFFFFFF << ( 32 - lEndCase). Shift big endian 0xFFFFFF left
; by lEndCase, then BSWAP
;
mov ecx,32 ; shift 32 - lEndCase
sub ecx,eax ; 32 - lEndCase
shl esi,cl ; ulEndMask << lEndCase
ror si,8 ; 0 1 2 3 -> 0 1 3 2
ror esi,16 ; 0 1 3 2 -> 3 2 0 1
ror si,8 ; 3 2 0 1 -> 3 2 1 0
@@:
mov spt_ulStartMask[esp],edi ; save
mov spt_ulEndMask[esp],esi ; save
;
; check for transparent 1 or transparent 0 foreground color
;
test spt_uF[esp],1
jz Trans1to1Invert
;
; calculate the number of full dwords to copy
;
; Full DWORDS = (DstRight >> 5) - ((DstLeft + 31) >> 5)
;
mov eax,spt_DstLeft[esp] ; Left (startint Dst Location)
mov ebx,spt_DstRight[esp] ; right (last pixel)
add eax,31 ; DstLeft + 31
shr ebx,5 ; DstRight >> 5
shr eax,5 ; (DstLeft+31) >> 5
;
; if <= 0, then no full DWORDS need to be copied, go to end cases
;
sub ebx,eax ; RightDW - LeftDw
jle Tran1to1Partial
mov spt_lEndOffset[esp],ebx ; save
;
; calc starting Dst Address = pjDstIn + (((DstLeft+31) >> 5) << 2)
; calc starting Src Address = pjSrcIn + (((SrcLeft + 31) >> 5) << 2)
;
mov edi,spt_pjDstIn[esp]
mov esi,spt_pjSrcIn[esp]
mov ebx,spt_SrcLeft[esp] ; SrcLeft
add ebx,31 ; SrcLeft + 31
shr ebx,5 ; (SrcLeft + 31) >> 5
lea edi,[edi + 4 * eax] ; pjDstIn + (((DstLeft+31) >> 5) * 4
lea esi,[esi + 4 * ebx] ; pjSrcIn + ((SrcLeft + 31) >> 5) * 4
;
; save scan line addresses
;
mov edx,spt_cy[esp] ; keep track of # of scan lines
Tran1to1DwordLoop:
;
; DWORD loop
;
mov eax,spt_lEndOffset[esp] ; get inner loop count
@@:
;
; read and write 1 DWORD
;
mov ebx,[4 * eax - 4 + esi] ; load dw 0
or [4 * eax - 4 + edi],ebx ; store dw 0
dec eax ; dec dword count
jnz @B ; loop until done
Tran1to1DwordLoopComplete:
;
; done with scan line, add strides
;
add edi,spt_DeltaDstIn[esp] ; pjDst += DeltaDst
add esi,spt_DeltaSrcIn[esp] ; pjSrc += DeltaSrc
;
; dec and check cy
;
dec edx ; cy--
jne Tran1to1DwordLoop
Tran1to1Partial:
;
; handle partial DWORD blts. first the start case, which may be the
; left edge or it may be a combined DWORD left and right, in which
; case the start and end masks are just anded together
;
mov eax,spt_DstLeft[esp] ; left position
and eax,01Fh ; left edge pixels
je Tran1to1RightEdge ; if zero, no left edge
;
; There is a left edge, are right and left DWORDS the same
;
mov ebx,spt_DstLeft[esp] ; left position
mov edx,spt_DstRight[esp] ; right edge
mov ecx,-32 ; ~0x1F - dword address alignment
and ebx,ecx ; DstLeft & ~0x1F
and edx,ecx ; DstRight & ~0x1F
mov ecx,spt_ulStartMask[esp] ; get start start mask
cmp edx,ebx ; if equal, start and stop
jne @F ; in same DWORD
;
; left and right are same src DWORD, combine masks, then
; set right edge to 0 so Tran1to1RightEdge won't run
;
and ecx,spt_ulEndMask[esp] ; combine with start mask
mov spt_DstRight[esp],0 ; save 0
@@:
;
; calc left edge starting addresses
;
; pjDst = pjDstIn + ((DstLeft >> 5) << 2)
; pjSrc = pjSrcIn + ((SrcLeft >> 5) << 2)
;
mov eax,spt_DstLeft[esp] ; load
mov ebx,spt_SrcLeft[esp] ; load
shr eax,5 ; DstLeft >> 3
shr ebx,5 ; SrcLeft >> 3
mov edi,spt_pjDstIn[esp] ; load dst addr
mov esi,spt_pjSrcIn[esp] ; load src addr
lea edi,[edi + 4*eax] ; dest address
lea esi,[esi + 4*ebx] ; src address
mov ebp,spt_DeltaSrcIn[esp] ; scr scan line stride
mov ebx,spt_DeltaDstIn[esp] ; dst scan line stride
mov edx,spt_cy[esp] ; loop count
;
; unroll loop 2 times, this requires address fix-up
;
sub edi,ebx ; fix-up for odd cycle
sub esi,ebp ; fix-up for odd cycle
add edx,1 ; offset for unrolling
shr edx,1 ; 2 times through loop
jnc Tran1to1LeftEdgeOdd ; do first "odd" dword
add edi,ebx ; un-fix-up for even cycle
add esi,ebp ; un-fix-up for even cycle
@@:
mov eax,[esi] ; load src
and eax,ecx ; mask
or [edi],eax ; store
Tran1to1LeftEdgeOdd:
mov eax,[esi + ebp] ; load src
and eax,ecx ; mask
or [edi + ebx],eax ; store
lea esi,[esi][ebp*2] ; src += 2 * DeltaSrc
lea edi,[edi][ebx*2] ; dst += 2 * DeltaDst
dec edx ; loop count
jnz @b ;
;
; done with left edge
;
Tran1to1RightEdge:
;
; right edge, check DstRight[4:0] for pixels on the right edge
;
mov eax,spt_DstRight[esp] ; load
and eax,01Fh ; are there right edge pixels
jz EndTran1to1
;
; there are right edge pixels to draw, calc pjSrc and pjDst
;
; pjDst = pjDstIn + ((DstRight >> 5) << 2)
; pjSrc = pjSrcIn + (((SrcLeft + (DstRight - DstLeft)) >> 5) << 2)
;
mov ebx,spt_DstRight[esp] ; DstRight
mov ecx,spt_DstLeft[esp] ; load DstLeft
mov eax,spt_SrcLeft[esp] ; load SrcLeft
mov edx,ebx ; edi = DstRight
sub ebx,ecx ; (DstRight - DstLeft)
add eax,ebx ; SrcLeft + (DstRight - DstLeft)
shr edx,5 ; DstRight >> 5
shr eax,5 ; Src Offset >> 5
mov edi,spt_pjDstIn[esp] ;
mov esi,spt_pjSrcIn[esp] ;
lea edi,[edi + 4*edx] ; pjDstIn + DstOffset
lea esi,[esi + 4*eax] ; pjSrcIn + SrcOffset
mov ecx,spt_ulEndMask[esp] ; load end mask
mov ebx,spt_DeltaSrcIn[esp] ; src scan line inc
mov ebp,spt_DeltaDstIn[esp] ; dst scan line inc
mov edx,spt_cy[esp] ; loop count
;
; unroll loop 2 times, fix up address for odd cycle
;
sub edi,ebp ; fix-up for odd cycle
sub esi,ebx ; fix-up for odd cycle
add edx,1 ; offset for 2 times unrolling
shr edx,1 ; LoopCount / 2
jnc Tran1to1RightEdgeOdd ; do single "odd" DWORD
add edi,ebp ; un-fix-up for odd cycle
add esi,ebx ; un-fix-up for odd cycle
@@:
mov eax,[esi] ; load src dword
and eax,ecx ; mask
or [edi],eax ; or in src
Tran1to1RightEdgeOdd:
mov eax,[esi+ebx] ; load src dword
and eax,ecx ; mask
or [edi+ebp],eax ; or in src
lea esi,[esi][ebx*2] ; src += 2 * DeltaSrc
lea edi,[edi][ebp*2] ; dst += 2 * DeltaDst
dec edx ; dec loop count
jnz @B ; and loop until zero
jmp EndTran1to1
;
; transparent invert (uF = 0)
;
Trans1to1Invert:
;
; Transparent text where the foreground color is 0, must invert the
; src DWORD and then and it onto the dest.
;
;
; calculate the number of full dwords to copy
;
; Full DWORDS = (DstRight >> 5) - ((DstLeft + 31) >> 5)
;
mov eax,spt_DstLeft[esp] ; Left (startint Dst Location)
mov ebx,spt_DstRight[esp] ; right (last pixel)
add eax,31 ; DstLeft + 31
shr ebx,5 ; DstRight >> 5
shr eax,5 ; (DstLeft+31) >> 5
;
; if <= 0, then no full DWORDS need to be copied, go to end cases
;
sub ebx,eax ; RightDW - LeftDw
jle Tran1to1PartialInvert
mov spt_lEndOffset[esp],ebx ; save
;
; calc starting Dst Address = pjDstIn + (((DstLeft+31) >> 5) << 2)
; calc starting Src Address = pjSrcIn + (((SrcLeft + 31) >> 5) << 2)
;
mov edi,spt_pjDstIn[esp]
mov esi,spt_pjSrcIn[esp]
mov ebx,spt_SrcLeft[esp] ; SrcLeft
add ebx,31 ; SrcLeft + 31
shr ebx,5 ; (SrcLeft + 31) >> 5
lea edi,[edi + 4 * eax] ; pjDstIn + (((DstLeft+31) >> 5) * 4
lea esi,[esi + 4 * ebx] ; pjSrcIn + ((SrcLeft + 31) >> 5) * 4
;
; save scan line addresses
;
mov edx,spt_cy[esp] ; keep track of # of scan lines
Tran1to1DwordLoopInvert:
;
; DWORD loop
;
mov eax,spt_lEndOffset[esp] ; get inner loop count
@@:
;
; read and write 1 DWORD
;
mov ebx,[4 * eax - 4 + esi] ; load dw 0
not ebx ; invert
and [4 * eax - 4 + edi],ebx ; store dw 0
dec eax ; dec dword count
jnz @B ; loop until done
Tran1to1DwordLoopCompleteInvert:
;
; done with scan line, add strides
;
add edi,spt_DeltaDstIn[esp] ; pjDst += DeltaDst
add esi,spt_DeltaSrcIn[esp] ; pjSrc += DeltaSrc
;
; dec and check cy
;
dec edx ; cy--
jne Tran1to1DwordLoopInvert
Tran1to1PartialInvert:
;
; handle partial DWORD blts. first the start case, which may be the
; left edge or it may be a combined DWORD left and right, in which
; case the start and end masks are just anded together
;
mov eax,spt_DstLeft[esp] ; left position
and eax,01Fh ; left edge pixels
je Tran1to1RightEdgeInvert ; if zero, no left edge
;
; There is a left edge, are right and left DWORDS the same
;
mov ebx,spt_DstLeft[esp] ; left position
mov edx,spt_DstRight[esp] ; right edge
mov ecx,-32 ; ~0x1F - dword address alignment
and ebx,ecx ; DstLeft & ~0x1F
and edx,ecx ; DstRight & ~0x1F
mov ecx,spt_ulStartMask[esp] ; get start start mask
cmp edx,ebx ; if equal then start and end
jne @F ; in same DWORD
;
; left and right are same src DWORD, combine masks, then
; set right edge to 0 so Tran1to1RightEdge won't run
;
and ecx,spt_ulEndMask[esp] ; combine with start mask
mov spt_DstRight[esp],0 ; save 0
@@:
;
; calc left edge starting addresses
;
; pjDst = pjDstIn + ((DstLeft >> 5) << 2)
; pjSrc = pjSrcIn + ((SrcLeft >> 5) << 2)
;
mov eax,spt_DstLeft[esp] ; load
mov ebx,spt_SrcLeft[esp] ; load
shr eax,5 ; DstLeft >> 3
shr ebx,5 ; SrcLeft >> 3
mov edi,spt_pjDstIn[esp] ; load dst addr
mov esi,spt_pjSrcIn[esp] ; load src addr
lea edi,[edi + 4*eax] ; dest address
lea esi,[esi + 4*ebx] ; src address
mov ebp,spt_DeltaSrcIn[esp] ; scr scan line stride
mov ebx,spt_DeltaDstIn[esp] ; dst scan line stride
mov edx,spt_cy[esp] ; loop count
;
; unroll left edge loop 2 times, this requires address fix-up
;
sub edi,ebx ; fix-up for odd cycle
sub esi,ebp ; fix-up for odd cycle
add edx,1 ; offset for unrolling
shr edx,1 ; 2 times through loop
jnc Tran1to1LeftEdgeOddInvert ; do first "odd" dword
add edi,ebx ; un-fix-up for even cycle
add esi,ebp ; un-fix-up for even cycle
@@:
mov eax,[esi] ; load src
and eax,ecx ; mask
not eax ; invert
and [edi],eax ; store
Tran1to1LeftEdgeOddInvert:
mov eax,[esi + ebp] ; load src
and eax,ecx ; mask
not eax ; invert
and [edi + ebx],eax ; store
lea esi,[esi][ebp*2] ; src += 2 * DeltaSrc
lea edi,[edi][ebx*2] ; dst += 2 * DeltaDst
dec edx ; loop count
jnz @b ;
;
; done with left edge
;
Tran1to1RightEdgeInvert:
;
; right edge, check DstRight[4:0] for pixels on the right edge
;
mov eax,spt_DstRight[esp] ; load
and eax,01Fh ; are there right edge pixels
jz EndTran1to1
;
; there are right edge pixels to draw, calc pjSrc and pjDst
;
; pjDst = pjDstIn + ((DstRight >> 5) << 2)
; pjSrc = pjSrcIn + (((SrcLeft + (DstRight - DstLeft)) >> 5) << 2)
;
mov ebx,spt_DstRight[esp] ; DstRight
mov ecx,spt_DstLeft[esp] ; load DstLeft
mov eax,spt_SrcLeft[esp] ; load SrcLeft
mov edx,ebx ; edi = DstRight
sub ebx,ecx ; (DstRight - DstLeft)
add eax,ebx ; SrcLeft + (DstRight - DstLeft)
shr edx,5 ; DstRight >> 5
shr eax,5 ; Src Offset >> 5
mov edi,spt_pjDstIn[esp] ;
mov esi,spt_pjSrcIn[esp] ;
lea edi,[edi + 4*edx] ; pjDstIn + DstOffset
lea esi,[esi + 4*eax] ; pjSrcIn + SrcOffset
mov ecx,spt_ulEndMask[esp] ; load end mask
mov ebx,spt_DeltaSrcIn[esp] ; src scan line inc
mov ebp,spt_DeltaDstIn[esp] ; dst scan line inc
mov edx,spt_cy[esp] ; loop count
;
; unroll right edge loop 2 times. This requires address fix-up
;
sub esi,ebx ; fix-up address for odd cycle
sub edi,ebp ; fix-up address for odd cycle
add edx,1 ; offset for 2 times unrolling
shr edx,1 ; LoopCount / 2
jnc Tran1to1RightEdgeOddInvert ; do single "odd" DWORD
add esi,ebx ; fix-up address for odd cycle
add edi,ebp ; fix-up address for odd cycle
@@:
mov eax,[esi] ; load src dword
and eax,ecx ; mask
not eax ; invert
and [edi],eax ; or in src
Tran1to1RightEdgeOddInvert:
mov eax,[esi+ebx] ; load src dword
and eax,ecx ; mask
not eax ; invert
and [edi+ebp],eax ; or in src
lea esi,[esi][ebx*2] ; src += 2 * DeltaSrc
lea edi,[edi][ebp*2] ; dst += 2 * DeltaDst
dec edx ; dec loop count
jnz @B ; and loop until zero
;
; done!
;
EndTran1to1:
add esp,3*4
pop ebp
pop esi
pop edi
pop ebx
ret 44
vSrcTranCopyS1D1@44 endp
spo_tMask equ 000H
spo_lXorMask equ 004H
spo_lEndOffset equ 008H
spo_ulEndMask equ 00CH
spo_ulStartMask equ 010H
spo_ebp equ 014H
spo_esi equ 018H
spo_edi equ 01CH
spo_ebx equ 020H
spo_ulRetAddr equ 024H
spo_pjSrcIn equ 028H
spo_SrcLeft equ 02CH
spo_DeltaSrcIn equ 030H
spo_pjDstIn equ 034H
spo_DstLeft equ 038H
spo_DstRight equ 03CH
spo_DeltaDstIn equ 040H
spo_cy equ 044H
spo_uF equ 048H
spo_uB equ 04CH
spo_pS equ 050H
;
;---------------------------Private-Routine-----------------------------;
; vSrcOpaqCopyS1D1
;
; Opaque copy from 1Bpp to 1Bpp (DWORD ALIGNED)
;
; Entry:
;
; pjSrcIn
; SrcLeft
; DeltaSrcIn
; pjDstIn
; DstLeft
; DstRight
; DeltaDstIn
; cy
; uF
; uB
; pS
;
;
;
; Returns:
; none
; Registers Destroyed:
; all
; Calls:
; None
; History:
;
; WARNING: If call parameters are changed, the ret statement must be fixed
;
;-----------------------------------------------------------------------;
public vSrcOpaqCopyS1D1@44
vSrcOpaqCopyS1D1@44 proc near
;
; opaque expansion requires a check for masking, if
; uF and uB are the same color, then the and mask is set to
; 0, this really means the src is ignored. The and mask is
; set to 0xFFFFFFFF if the uF = 0, this causes an inversion
; from src to dst.
;
; This routine is split into 2 sections, section 1 has an and
; mask value of 0xFFFFFFFF, so it is ignored. Section 2 has an
; and mask value if 0, so no src needs to be read.
;
;
push ebx
push edi
push esi
push ebp
sub esp,5*4
mov spo_lXorMask[esp],0
test spo_uF[esp],1
jnz @F
mov spo_lXorMask[esp],0FFFFFFFFh
@@:
;
; Start and end cases and masks. Start case is the first partial
; DWORD (if needed) and end case is the last partial DWORD (if needed)
;
mov ecx,spo_DstLeft[esp] ; Left (startint Dst Location)
sub edi,edi ; zero
not edi ; flag = 0xffffff
mov esi,edi ; flag = 0xffffff
and ecx,01Fh ; DstLeftAln,if 0 then don't make mask
jz @F
;
; ulStartMask >>= lStartCase, shift ulStartMask right by starting alignment,
; then BSWAP this back to little endian
;
shr edi,cl ; ulStartMask >> lStartCase
ror di,8 ; 0 1 2 3 -> 0 1 3 2
ror edi,16 ; 0 1 3 2 -> 3 2 0 1
ror di,8 ; 3 2 0 1 -> 3 2 1 0
@@:
mov eax,spo_DstRight[esp] ; right (last pixel)
and eax,01Fh ; DstRightAln = DstRight & 0x1F
jz @F ; if 0, mask = 0xffffffff
;
; ulEndMask = 0xFFFFFF << ( 32 - lEndCase). Shift big endian 0xFFFFFF left
; by lEndCase, then BSWAP
;
mov ecx,32 ; shift 32 - lEndCase
sub ecx,eax ; 32 - lEndCase
shl esi,cl ; ulEndMask << lEndCase
ror si,8 ; 0 1 2 3 -> 0 1 3 2
ror esi,16 ; 0 1 3 2 -> 3 2 0 1
ror si,8 ; 3 2 0 1 -> 3 2 1 0
@@:
mov spo_ulStartMask[esp],edi ; save
mov spo_ulEndMask[esp],esi ; save
;
; check for uF == uB, if so then branch to second section of routine
;
mov eax,spo_uF[esp]
cmp eax,spo_uB[esp]
jz Opaq1to1NoSrc
;
; calculate the number of full dwords to copy
;
; Full DWORDS = (DstRight >> 5) - ((DstLeft + 31) >> 5)
;
mov eax,spo_DstLeft[esp] ; Left (startint Dst Location)
mov ebx,spo_DstRight[esp] ; right (last pixel)
add eax,31 ; DstLeft + 31
shr ebx,5 ; DstRight >> 5
shr eax,5 ; (DstLeft+31) >> 5
;
; if <= 0, then no full DWORDS need to be copied, go to end cases
;
sub ebx,eax ; RightDW - LeftDw
jle Opaq1to1Partial
mov spo_lEndOffset[esp],ebx ; save
;
; calc starting Dst Address = pjDstIn + (((DstLeft+31) >> 5) << 2)
; calc starting Src Address = pjSrcIn + (((SrcLeft + 31) >> 5) << 2)
;
mov edi,spo_pjDstIn[esp]
mov esi,spo_pjSrcIn[esp]
mov ebx,spo_SrcLeft[esp] ; SrcLeft
add ebx,31 ; SrcLeft + 31
shr ebx,5 ; (SrcLeft + 31) >> 5
lea edi,[edi + 4 * eax] ; pjDstIn + (((DstLeft+31) >> 5) * 4
lea esi,[esi + 4 * ebx] ; pjSrcIn + ((SrcLeft + 31) >> 5) * 4
;
; save scan line addresses
;
mov edx,spo_cy[esp] ; keep track of # of scan lines
mov ecx,spo_lXorMask[esp] ; load xor mask
mov ebp,spo_DeltaSrcIn[esp]
Opaq1to1DwordLoop:
mov eax,spo_lEndOffset[esp] ; load loop x count
@@:
;
; read and write 1 DWORD
;
mov ebx,[4 * eax -4 + esi] ; load dw 0
xor ebx,ecx ; xor mask
mov [4 * eax -4 + edi],ebx ; store dw 0
dec eax ; dec dword count
jnz @B ; loop until done
;
; done with scan line, add strides
;
add edi,spo_DeltaDstIn[esp] ; pjDst += DeltaDst
add esi,ebp ; pjSrc += DeltaSrc
;
; dec and check cy
;
dec edx ; cy--
jne Opaq1to1DwordLoop
Opaq1to1Partial:
;
; handle partial DWORD blts. first the start case, which may be the
; left edge or it may be a combined DWORD left and right, in which
; case the start and end masks are just anded together
;
mov eax,spo_DstLeft[esp] ; left position
and eax,01Fh ; left edge pixels
je Opaq1to1RightEdge ; if zero, no left edge
;
; There is a left edge, are right and left DWORDS the same
;
mov ebx,spo_DstRight[esp] ; right edge
mov edx,-32 ; ~0x1F - dword address alignment
and ebx,edx ; DstRight & ~0x1F
and edx,spo_DstLeft[esp] ; DstLeft & ~0x1F
mov ecx,spo_ulStartMask[esp] ; get start start mask
cmp edx,ebx ; if equal, start and stop
jne @F ; in same DWORD
;
; left and right are same src DWORD, combine masks, then
; set right edge to 0 so Opaq1to1RightEdge won't run
;
and ecx,spo_ulEndMask[esp] ; combine with start mask
mov spo_DstRight[esp],0 ; save 0
@@:
mov spo_ulStartMask[esp],ecx ; Src Mask
not ecx
mov spo_tMask[esp],ecx ; Dst Mask = ~SrcMask
;
; calc left edge starting addresses
;
; pjDst = pjDstIn + ((DstLeft >> 5) << 2)
; pjSrc = pjSrcIn + ((SrcLeft >> 5) << 2)
;
mov eax,spo_DstLeft[esp] ; load
mov ebx,spo_SrcLeft[esp] ; load
shr eax,5 ; DstLeft >> 3
shr ebx,5 ; SrcLeft >> 3
mov edi,spo_pjDstIn[esp] ; load dst addr
mov esi,spo_pjSrcIn[esp] ; load src addr
lea edi,[edi + 4*eax] ; dest address
lea esi,[esi + 4*ebx] ; src address
mov ecx,spo_DeltaDstIn[esp]
mov ebp,spo_DeltaSrcIn[esp]
mov edx,spo_cy[esp]
;
; unroll left edge loop 2 times. Address fix-up is needed
;
sub esi,ebp ; fix-up address for odd cycle
sub edi,ecx ; fix-up address for odd cycle
add edx,1
shr edx,1
jnc Opaq1to1PartialLeftOdd
add esi,ebp ; un-fix-up address for even cycle
add edi,ecx ; un-fix-up address for even cycle
@@:
mov eax,[esi] ; load src
mov ebx,[edi] ; load dst
xor eax,spo_lXorMask[esp] ; xor
and ebx,spo_tMask[esp]
and eax,spo_ulStartMask[esp]
or eax,ebx ; mask src with lStartMask
mov [edi],eax
Opaq1to1PartialLeftOdd:
mov eax,[esi + ebp] ; load src
mov ebx,[edi + ecx] ; load dst
xor eax,spo_lXorMask[esp] ; xor
and ebx,spo_tMask[esp]
and eax,spo_ulStartMask[esp]
or eax,ebx ; mask src with lStartMask
mov [edi + ecx],eax
lea esi,[esi + 2*ebp]
lea edi,[edi + 2*ecx]
dec edx
jnz @B
;
; done with left edge
;
Opaq1to1RightEdge:
;
; right edge, check DstRight[4:0] for pixels on the right edge
;
mov eax,spo_DstRight[esp] ; load
and eax,01Fh ; are there right edge pixels
jz EndOpaq1to1
;
; there are right edge pixels to draw, calc pjSrc and pjDst
;
; pjDst = pjDstIn + ((DstRight >> 5) << 2)
; pjSrc = pjSrcIn + (((SrcLeft + (DstRight - DstLeft)) >> 5) << 2)
;
mov ebx,spo_DstRight[esp] ; DstRight
mov ecx,spo_DstLeft[esp] ; load DstLeft
mov eax,spo_SrcLeft[esp] ; load SrcLeft
mov edx,ebx ; edi = DstRight
sub ebx,ecx ; (DstRight - DstLeft)
add eax,ebx ; SrcLeft + (DstRight - DstLeft)
shr edx,5 ; DstRight >> 5
shr eax,5 ; Src Offset >> 5
mov edi,spo_pjDstIn[esp] ;
mov esi,spo_pjSrcIn[esp] ;
lea edi,[edi + 4*edx] ; pjDstIn + DstOffset
lea esi,[esi + 4*eax] ; pjSrcIn + SrcOffset
mov ecx,spo_DeltaDstIn[esp]
mov ebp,spo_DeltaSrcIn[esp]
mov edx,spo_cy[esp]
mov eax,spo_ulEndMask[esp] ; load end mask
not eax ; invert for dst mask
mov spo_ulStartMask[esp],eax ; not mask for dst
;
; unroll right edge 2 times. Address fix-up is needed.
;
sub esi,ebp ; fix up address for odd cycle
sub edi,ecx ; fix up address for odd cycle
inc edx ; add 1 to loop count
shr edx,1 ; device loop count by 2
jnc Opaq1to1PartialRightOdd ; if carry, loop was initially even
add esi,ebp ; fix up address for odd cycle
add edi,ecx ; fix up address for odd cycle
@@:
mov eax,[esi] ; load src dw 0
mov ebx,[edi] ; load dst dw 0
xor eax,spo_lXorMask[esp] ; xor src dw
and ebx,spo_ulStartMask[esp] ; mask dst dw
and eax,spo_ulEndMask[esp] ; mask src dw
or eax,ebx ; combine
mov [edi],eax ; store
Opaq1to1PartialRightOdd:
mov eax,[esi + ebp] ; load src dw 1
mov ebx,[edi + ecx] ; load dst dw 1
xor eax,spo_lXorMask[esp] ; xor src dw
and ebx,spo_ulStartMask[esp] ; mask dst dw
and eax,spo_ulEndMask[esp] ; mask src dw
or eax,ebx ; combine
mov [edi + ecx],eax ; store
lea esi,[esi + 2 * ebp] ; 2nd next scan line
lea edi,[edi + 2 * ecx] ; 2nd next scan line
dec edx ; dec loop count
jnz @B ; repeat
jmp EndOpaq1to1
;
; Opaque text expansion, no src required. lXorMask is masked and written to dest
;
Opaq1to1NoSrc:
;
; calculate the number of full dwords to copy
;
; Full DWORDS = (DstRight >> 5) - ((DstLeft + 31) >> 5)
;
mov eax,spo_DstLeft[esp] ; Left (startint Dst Location)
mov ebx,spo_DstRight[esp] ; right (last pixel)
add eax,31 ; DstLeft + 31
shr ebx,5 ; DstRight >> 5
shr eax,5 ; (DstLeft+31) >> 5
;
; if <= 0, then no full DWORDS need to be copied, go to end cases
;
sub ebx,eax ; RightDW - LeftDw
jle Opaq1to1PartialNoSrc
mov spo_lEndOffset[esp],ebx ; save
;
; calc starting Dst Address = pjDstIn + (((DstLeft+31) >> 5) << 2)
;
mov edi,spo_pjDstIn[esp]
lea edi,[edi + 4 * eax] ; pjDstIn + (((DstLeft+31) >> 5) * 4
;
; save scan line addresses
;
mov edx,spo_cy[esp] ; keep track of # of scan lines
mov ebx,spo_lXorMask[esp] ; xor mask
not ebx ; not xor mask
mov esi,spo_DeltaDstIn[esp] ; dest scan line inc
Opaq1to1DwordLoopNoSrc:
;
; DWORD loop, unrolled 2 times. lEndOffset has been pre-incremented so
; a loop count of 1 is incremented to 2, then shifted to make a loop
; count of 1 with a carry of zero. ( no carry represents a odd element)
; Address fix-up is required.
;
mov eax,spo_lEndOffset[esp] ; get inner loop count
@@:
;
; write 1 DWORD
;
mov [4 * eax - 4 + edi],ebx ; store dw 0
dec eax ; dec dword count
jnz @B ; loop until done
;
; done with scan line, add strides
;
add edi,esi ; pjDst += DeltaDst
;
; dec and check cy
;
dec edx ; cy--
jne Opaq1to1DwordLoopNoSrc
Opaq1to1PartialNoSrc:
;
; handle partial DWORD blts. first the start case, which may be the
; left edge or it may be a combined DWORD left and right, in which
; case the start and end masks are just anded together
;
mov eax,spo_DstLeft[esp] ; left position
and eax,01Fh ; left edge pixels
je Opaq1to1RightEdgeNoSrc ; if zero, no left edge
;
; There is a left edge, are right and left DWORDS the same
;
mov edx,spo_DstRight[esp] ; right edge
mov ecx,-32 ; ~0x1F - dword address alignment
and ebx,ecx ; DstLeft & ~0x1F
and edx,ecx ; DstRight & ~0x1F
mov ecx,spo_ulStartMask[esp] ; get start start mask
cmp edx,ebx ; if equal then start and end
jne @F ; in same DWORD
;
; left and right are same src DWORD, combine masks, then
; set right edge to 0 so Opaq1to1RightEdge won't run
;
and ecx,spo_ulEndMask[esp] ; combine with start mask
mov spo_DstRight[esp],0 ; save 0
@@:
;
; calc left edge starting addresses
;
; pjDst = pjDstIn + ((DstLeft >> 5) << 2)
;
mov eax,spo_DstLeft[esp] ; load
shr eax,5 ; DstLeft >> 3
mov edi,spo_pjDstIn[esp] ; load dst addr
lea edi,[edi + 4*eax] ; dest address
mov ebx,spo_DeltaDstIn[esp] ; dst scan line stride
mov esi,spo_lXorMask[esp] ; load xor mask
not esi ; invert
and esi,ecx ; masked constant src
not ecx ; ~lStartMask
mov edx,spo_cy[esp] ; loop count
;
; unroll left edge loop 2 times. Address fix-up is needed
;
sub edi,ebx ; fix-up address for odd cycle
add edx,1 ; offset for unrolling
shr edx,1 ; 2 times through loop
jnc Opaq1to1LeftEdgeOddNoSrc ; do first "odd" dword
add edi,ebx ; un-fix-up address for even cycle
@@:
mov eax,[edi] ; load src
and eax,ecx ; mask
or eax,esi ; invert
mov [edi],eax ; store
Opaq1to1LeftEdgeOddNoSrc:
mov eax,[edi + ebx] ; load src
and eax,ecx ; mask
or eax,esi ; invert
mov [edi + ebx],eax ; store
lea edi,[edi][ebx*2] ; dst += 2 * DeltaDst
dec edx ; loop count
jnz @b ;
;
; done with left edge
;
Opaq1to1RightEdgeNoSrc:
;
; right edge, check DstRight[4:0] for pixels on the right edge
;
mov eax,spo_DstRight[esp] ; load
and eax,01Fh ; are there right edge pixels
jz EndOpaq1to1
;
; there are right edge pixels to draw, calc pjSrc and pjDst
;
; pjDst = pjDstIn + ((DstRight >> 5) << 2)
;
mov ebx,spo_DstRight[esp] ; DstRight
shr ebx,5 ; DstRight >> 5
mov edi,spo_pjDstIn[esp] ;
lea edi,[edi + 4*ebx] ; pjDstIn + DstOffset
mov ecx,spo_ulEndMask[esp] ; load end mask
mov esi,spo_lXorMask[esp] ; load xor mask
not esi ; invert
and esi,ecx ; mask with endmask
not ecx ; invert end mask
mov ebx,spo_DeltaDstIn[esp] ; dst scan line inc
mov edx,spo_cy[esp] ; loop count
;
; unroll right edge 2 times, address fix-up is needed
;
sub edi,ebx ; fix-up address for odd cycle
add edx,1 ; offset for 2 times unrolling
shr edx,1 ; LoopCount / 2
jnc Opaq1to1RightEdgeOddNoSrc ; do single "odd" DWORD
add edi,ebx ; un-fix-up address for even cycle
@@:
mov eax,[edi] ; load src dword
and eax,ecx ; mask
or eax,esi ; combine
mov [edi],eax ; store
Opaq1to1RightEdgeOddNoSrc:
mov eax,[edi+ebx] ; load src dword
and eax,ecx ; mask
or eax,esi ; combine
mov [edi+ebx],eax ; store
lea edi,[edi][ebx*2] ; dst += 2 * DeltaDst
dec edx ; dec loop count
jnz @B ; and loop until zero
;
; done!
;
EndOpaq1to1:
add esp,5*4
pop ebp
pop esi
pop edi
pop ebx
ret 44
vSrcOpaqCopyS1D1@44 endp
_TEXT$01 ends
end