1349 lines
53 KiB
NASM
1349 lines
53 KiB
NASM
page ,132
|
||
title BitBLT
|
||
;---------------------------Module-Header------------------------------;
|
||
; Module Name: cblt.asm
|
||
;
|
||
; Copyright (c) 1992 Microsoft Corporation
|
||
;-----------------------------------------------------------------------;
|
||
.386
|
||
|
||
;!!! All the code to convert from color to mono in this file needs to
|
||
;!!! be deleted. We don't need to do it anymore.
|
||
|
||
|
||
|
||
|
||
ifndef DOS_PLATFORM
|
||
.model small,c
|
||
else
|
||
ifdef STD_CALL
|
||
.model small,c
|
||
else
|
||
.model small,pascal
|
||
endif; STD_CALL
|
||
endif; DOS_PLATFORM
|
||
|
||
assume ds:FLAT,es:FLAT,ss:FLAT
|
||
assume fs:nothing,gs:nothing
|
||
|
||
.code
|
||
|
||
_TEXT$01 SEGMENT DWORD USE32 PUBLIC 'CODE'
|
||
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
||
|
||
.xlist
|
||
include stdcall.inc ; calling convention cmacros
|
||
|
||
include i386\cmacFLAT.inc ; FLATland cmacros
|
||
include i386\display.inc ; Display specific structures
|
||
include i386\ppc.inc ; Pack pel conversion structure
|
||
include i386\bitblt.inc ; General definitions
|
||
include i386\ropdefs.inc ; Rop definitions
|
||
include i386\egavga.inc ; EGA register definitions
|
||
include i386\devdata.inc
|
||
.list
|
||
|
||
extrn roptable:byte
|
||
;-----------------------------Public-Routine----------------------------;
|
||
; CBLT
|
||
;
|
||
; Compile a BLT onto the stack.
|
||
;
|
||
; Entry:
|
||
; EDI --> memory on stack to receive BLT program
|
||
; EBP --> fr structure
|
||
; Returns:
|
||
; Nothing
|
||
;-----------------------------------------------------------------------;
|
||
|
||
fr equ [ebp] ;For consistancy with other sources
|
||
|
||
cProc cblt
|
||
|
||
subttl Compile - Outer Loop
|
||
page
|
||
|
||
; If converting a packed pel format to planer format, add the code
|
||
; to convert one source scan into planer format
|
||
|
||
test fr.ppcBlt.fb,PPC_NEEDED
|
||
jz no_pack_pel_conversion
|
||
mov al,I_MOV_EBP_DWORD_I ;Give conversion routine access
|
||
stosb ; to conversion data
|
||
lea eax,fr.ppcBlt
|
||
stosd
|
||
mov al,I_CALL_DISP32 ;Call the static conversion code
|
||
stosb
|
||
mov eax,fr.ppcBlt.pfnConvert
|
||
sub eax,edi
|
||
sub eax,4 ;4 for length of displacement
|
||
stosd
|
||
no_pack_pel_conversion:
|
||
|
||
; Initialize plane indicator.
|
||
|
||
mov ax,(PLANE_1*256)+I_MOV_BL_BYTE_I
|
||
stosw
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Create the outerloop code. The first part of this code will save
|
||
; the scan line count register, destination pointer, and the source
|
||
; pointer (if there is a source).
|
||
;
|
||
; The generated code should look like:
|
||
;
|
||
; push ecx ;Save scan line count
|
||
; push edi ;Save destination pointer
|
||
; < push esi > ;Save source pointer
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
mov fr.pNextPlane,edi ;Save address of next plane code
|
||
mov bl,fr.the_flags
|
||
mov ax,I_PUSH_ECX_PUSH_EDI ;Save scan line count, destination ptr
|
||
stosw
|
||
test bl,F0_SRC_PRESENT ;Is a source needed?
|
||
jz cblt_2020 ; No
|
||
mov al,I_PUSH_ESI ; Yes, save source pointer
|
||
stosb
|
||
cblt_2020:
|
||
|
||
subttl Compile - Plane Selection
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; If the destination device is color and the display is involved in
|
||
; the blt, then the color plane selection logic must be added in.
|
||
; If the destination is monochrome, then no plane logic is needed.
|
||
; Two color memory bitmaps will not cause the plane selection logic
|
||
; to be copied.
|
||
;
|
||
; The generated code should look like:
|
||
;
|
||
; < push ebx > ;Save plane index
|
||
; < plane selection > ;Select plane
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
test bl,F0_DEST_IS_COLOR ;Is the destination color?
|
||
jz cblt_pattern_fetch ; No
|
||
mov al,I_PUSH_EBX ;Save plane index
|
||
stosb
|
||
test bl,F0_DEST_IS_DEV+F0_SRC_IS_DEV ;Is the device involved?
|
||
jz cblt_pattern_fetch ; No
|
||
|
||
; The device is involved for a color blt. Copy the logic for selecting
|
||
; the read/write plane
|
||
|
||
mov esi,offset FLAT:cps ;--> plane select logic
|
||
mov ecx,LENGTH_CPS
|
||
rep movsb
|
||
|
||
subttl Compile - Pattern Fetch
|
||
page
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Set up any pattern fetch code that might be needed.
|
||
; The pattern code has many fixups, so it isn't taken from a
|
||
; template. It is just stuffed as it is created.
|
||
;
|
||
; Entry: None
|
||
;
|
||
; Exit: DH = pattern
|
||
;
|
||
; Uses: AX,BX,CX,DH,flags
|
||
;
|
||
; For solid color brushes:
|
||
;
|
||
; mov dh,color
|
||
;
|
||
; For monochrome brushes:
|
||
;
|
||
; mov ebx,12345678h ;Load address of the brush
|
||
; mov dh,7[ebx] ;Get next brush byte
|
||
; mov al,[12345678h] ;Get brush index
|
||
; add al,direction ;Add displacement to next byte (+1/-1)
|
||
; and al,00000111b ;Keep it in range
|
||
; mov [12345678h],al ;Store displacement to next plane's bits
|
||
;
|
||
; For color brushes:
|
||
;
|
||
; mov ebx,12345678h ;Load address of the brush
|
||
; mov dh,7[bx] ;Get next brush byte
|
||
; mov al,[12345678h] ;Get brush index
|
||
; add al,SIZE Pattern ;Add displacement to next plane's bits
|
||
; and al,00011111b ;Keep it within the brush
|
||
; mov [12345678h],al ;Store displacement to next plane's bits
|
||
;
|
||
; The address of the increment for the brush is saved for
|
||
; the plane looping logic if the destination is a three plane
|
||
; color device. For a four plane color device, the AND
|
||
; automatically handles the wrap and no fixup is needed at
|
||
; the end of the plane loop.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_pattern_fetch:
|
||
test bl,F0_PAT_PRESENT ;Is a pattern needed?
|
||
jz cblt_initial_byte_fetch ; No, skip pattern code
|
||
mov al,fr.brush_accel ;Solid color needs no fetch logic
|
||
test al,SOLID_BRUSH
|
||
jz cblt_nonsolid_brush
|
||
and al,MM_ALL
|
||
shl eax,16
|
||
mov ax,I_TEST_BL_BYTE_I
|
||
stosd
|
||
dec edi ;Was only a three byte instruction
|
||
mov eax,I_SETNZ_DH
|
||
stosd
|
||
dec edi ;Was only a three byte instruction
|
||
mov ax,I_NEG_DH
|
||
stosw
|
||
jmp short cblt_initial_byte_fetch
|
||
|
||
cblt_nonsolid_brush:
|
||
mov al,I_MOV_EBX_DWORD_I ;mov ebx,lpPBrush
|
||
stosb
|
||
mov eax,fr.lpPBrush
|
||
stosd
|
||
mov ax,I_MOV_DH_EBX_DISP8 ;mov dh,pat_row[ebx]
|
||
stosw
|
||
mov edx,edi ;Save address of the brush index
|
||
mov al,fr.pat_row ;Set initial pattern row
|
||
mov bh,00000111b ;Set brush index mask
|
||
and al,bh ;Make sure it's legal at start
|
||
stosb
|
||
mov al,I_MOV_AL_MEM
|
||
stosb ;mov al,[xxxxxxxx]
|
||
mov eax,edx
|
||
stosd
|
||
mov al,I_ADD_AL_BYTE_I
|
||
mov ah,direction ;Set brush index
|
||
errnz INCREASE-1 ;Must be a 1
|
||
errnz DECREASE+1 ;Must be a -1
|
||
|
||
test bl,F0_COLOR_PAT ;Color pattern required?
|
||
jz cblt_2060 ; No
|
||
mov fr.addr_brush_index,edx ;Save address of brush index
|
||
mov ah,SIZE_PATTERN ;Set increment to next plane
|
||
mov bh,00011111b ;Set brush index mask
|
||
|
||
cblt_2060:
|
||
stosw
|
||
mov ah,bh ;and al,BrushIndexMask
|
||
mov al,I_AND_AL_BYTE_I
|
||
stosw
|
||
mov al,I_MOV_MEM_AL
|
||
stosb ;mov [xxxxxxxx],al
|
||
mov eax,edx
|
||
stosd
|
||
|
||
|
||
subttl Compile - Initial Byte Fetch
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Create the initial byte code. This may consist of one or two
|
||
; initial fetches (if there is a source), followed by the required
|
||
; logic action. The code should look something like:
|
||
;
|
||
; BLTouterloop:
|
||
; < mov bp,mask_p > ;Load phase mask for entire loop
|
||
; < xor bh,bh > ;Clear previous unused bits
|
||
;
|
||
; ; Perform first byte fetch
|
||
;
|
||
; < lodsb > ;Get source byte
|
||
; < color<==>mono munge > ;Color <==> mono conversion
|
||
; < phase alignment > ;Align bits as needed
|
||
;
|
||
; ; If an optional second fetch is needed, perform one
|
||
;
|
||
; < lodsb > ;Get source byte
|
||
; < color to mono munge > ;Color to mono munging
|
||
; < phase alignment > ;Align bits as needed
|
||
;
|
||
; logical action ;Perform logical action required
|
||
;
|
||
; mov ah,[edi] ;Get destination
|
||
; and ax,cx ;Saved unaltered bits
|
||
; or al,ah ; and mask in altered bits
|
||
; stosb ;Save the result
|
||
;
|
||
; The starting address of the first fetch/logical combination will be
|
||
; saved so that the code can be copied later instead of recreating it
|
||
; (if there are two fecthes, the first fetch will not be copied)
|
||
;
|
||
; The length of the code up to the masking for altered/unaltered bits
|
||
; will be saved so the code can be copied into the inner loop.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_initial_byte_fetch:
|
||
xor dx,dx
|
||
or dh,fr.phase_h ;Is the phase 0? (also get the phase)
|
||
jz cblt_3020 ; Yes, so no phase alignment needed
|
||
mov al,I_SIZE_OVERRIDE
|
||
stosb
|
||
mov al,I_MOV_BP_WORD_I ;Set up the phase mask
|
||
stosb
|
||
mov ax,fr.mask_p ;Place the mask into the instruction
|
||
stosw
|
||
mov ax,I_XOR_BH_BH ;Clear previous unused bits
|
||
stosw
|
||
|
||
cblt_3020:
|
||
mov fr.start_fl,edi ;Save starting address of action
|
||
test fr.the_flags,F0_SRC_PRESENT ;Is there a source?
|
||
jz cblt_4000 ; No, don't generate fetch code
|
||
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Generate the required sequence of instructions for a fetch
|
||
; sequence. Only the minimum code required is generated.
|
||
;
|
||
; The code generated will look something like the following:
|
||
;
|
||
; BLTfetch:
|
||
; < lodsb > ;Get the next byte
|
||
; < color munging > ;Mono <==> color munging
|
||
;
|
||
; ; If the phase alignment isn't zero, then generate the minimum
|
||
; ; phase alignment needed. RORs or ROLs will be generated,
|
||
; ; depending on the fastest sequence. If the phase alignment
|
||
; ; is zero, than no phase alignment code will be generated.
|
||
;
|
||
; < ror al,n > ;Rotate as needed
|
||
; < mov ah,al > ;Mask used, unused bits
|
||
; < and ax,bp > ;(BP) = phase mask
|
||
; < or al,bh > ;Mask in old unused bits
|
||
; < mov bh,ah > ;Save new unused bits
|
||
;
|
||
;
|
||
; The nice thing about the above is it is possible for the fetch to
|
||
; degenerate into a simple LODSB instruction.
|
||
;
|
||
; Currently: BL = the_flags
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_3040:
|
||
mov fr.moore_flags,0 ;Assume REP cannot be used
|
||
shl bl,1 ;Color conversion?
|
||
jnc cblt_3180 ; No, we were lucky this time
|
||
errnz F0_GAG_CHOKE-10000000b
|
||
js cblt_3100 ;Mono ==> color
|
||
errnz F0_COLOR_PAT-01000000b
|
||
|
||
subttl Compile - Initial Byte Fetch, Color ==> Mono
|
||
page
|
||
|
||
; !!! Color to mono should not be needed anymore since the Engine will
|
||
; !!! not be calling me to do it! Let's remove this code!
|
||
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Generate the code to go from color to mono. Color to mono
|
||
; should map all colors that are background to 1's (white), and
|
||
; all colors which aren't background to 0's (black). If the source
|
||
; is the display, then the color compare register will be used.
|
||
; If the source is a memory bitmap, each byte of the plane will be
|
||
; XORed with the color from that plane, with the results all ORed
|
||
; together. The final result will then be complemented, giving
|
||
; the desired result.
|
||
;
|
||
; The generated code for bitmaps should look something like:
|
||
;
|
||
; mov al,next_plane[esi] ;Get C1 byte of source
|
||
; mov ah,2*next_plane[esi] ;Get C2 byte of source
|
||
; xor ax,C1BkColor+(C2BkColor*256) ;XOR with plane's color
|
||
; or ah,al ;OR the result
|
||
; mov al,3*next_plane[esi] ;Get C3 byte of source
|
||
; xor al,C3BkColor
|
||
; or ah,al
|
||
; lodsb ;Get C0 source
|
||
; xor al,C0BkColor ;XOR with C0BkColor
|
||
; or al,ah ;OR with previous result
|
||
; not al ;NOT to give 1's where background
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_3070:
|
||
test bl,F0_SRC_IS_DEV SHL 1 ;If device, use color compare register
|
||
jz cblt_3080 ;It's a memory bitmap
|
||
|
||
; We're in luck, the color compare register can be used. Set up
|
||
; for a color read, and use the normal mono fetch code. Show the
|
||
; innerloop code that the REP instruction can be used if this is
|
||
; a source copy.
|
||
|
||
mov fr.moore_flags,F1_REP_OK
|
||
mov ecx,edx ;Save dx
|
||
mov ah,fr.bkColor.SPECIAL ;Get SPECIAL byte of color
|
||
and ah,MM_ALL
|
||
mov al,GRAF_COL_COMP ;Stuff color into compare register
|
||
mov dx,EGA_BASE+GRAF_ADDR
|
||
out dx,ax
|
||
mov ax,GRAF_CDC ;Set Color Don't Care register
|
||
out dx,ax
|
||
mov ax,M_COLOR_READ SHL 8 + GRAF_MODE
|
||
out dx,ax
|
||
mov edx,ecx
|
||
jmp cblt_3180 ;Go generate mono fetch code
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; The source is a memory bitmap. Generate the code to compute
|
||
; the result of the four planes:
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_3080:
|
||
mov ax,I_MOV_AL_ESI_DISP32
|
||
stosw
|
||
mov eax,fr.src.next_plane
|
||
stosd
|
||
mov ebx,eax ;Save plane width
|
||
mov ax,I_MOV_AH_ESI_DISP32
|
||
stosw
|
||
lea eax,[ebx*2]
|
||
stosd
|
||
mov al,I_SIZE_OVERRIDE
|
||
stosb
|
||
mov al,I_XOR_AX_WORD_I
|
||
stosb
|
||
mov al,fr.bkColor.SPECIAL ;get the color index byte
|
||
mov ah,al ;have the same in AH
|
||
and ax,(C2_BIT shl 8) or C1_BIT
|
||
neg al
|
||
sbb al,al ;al will be 0ffh if plane bit is 1
|
||
neg ah
|
||
sbb ah,ah ;ah wil be 0ffh if plane bit is 1
|
||
stosw
|
||
mov ax,I_OR_AH_AL
|
||
stosw
|
||
|
||
mov ax,I_MOV_AL_ESI_DISP32
|
||
stosw
|
||
lea eax,[ebx*2][ebx]
|
||
stosd
|
||
mov al,I_XOR_AL_BYTE_I
|
||
mov ah,fr.bkColor.SPECIAL
|
||
and ah,C3_BIT
|
||
neg ah
|
||
sbb ah,ah
|
||
stosw
|
||
mov ax,I_OR_AH_AL
|
||
stosw
|
||
|
||
mov ax,I_LODSB+(I_XOR_AL_BYTE_I*256)
|
||
stosw
|
||
mov al,fr.bkColor.SPECIAL
|
||
shr al,1 ;get C0_BIT into carry
|
||
sbb al,al ;make it 0ffh if bit was set
|
||
.errnz C0_BIT - 00000001b
|
||
stosb ;save the modified value
|
||
errnz pcol_C0
|
||
mov ax,I_OR_AL_AH
|
||
stosw
|
||
mov ax,I_NOT_AL
|
||
stosw
|
||
jmp cblt_3240 ;Go create logic code
|
||
|
||
subttl Compile - Initial Byte Fetch, Mono ==> Color
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; The conversion is mono to color. Generate the code to
|
||
; do the conversion, and generate the table which will
|
||
; have the conversion values in it.
|
||
;
|
||
; When going from mono to color, 1 bits are considered to be
|
||
; the background color, and 0 bits are considered to be the
|
||
; foreground color.
|
||
;
|
||
; For each plane:
|
||
;
|
||
; If the foreground=background=1, then 1 can be used in
|
||
; place of the source.
|
||
;
|
||
; If the foreground=background=0, then 0 can be used in
|
||
; place of the source.
|
||
;
|
||
; If the foreground=0 and background=1, then the source
|
||
; can be used as is.
|
||
;
|
||
; If the foreground=1 and background=0, then the source
|
||
; must be complemented before using.
|
||
;
|
||
; Looks like a boolean function to me.
|
||
;
|
||
; An AND mask and an XOR mask will be computed for each plane,
|
||
; based on the above. The source will then be processed against
|
||
; the table. The generated code should look like
|
||
;
|
||
; lodsb
|
||
; and al,[xxxx]
|
||
; xor al,[xxxx+1]
|
||
;
|
||
; The table for munging the colors as stated above should look like:
|
||
;
|
||
; BackGnd ForeGnd Result AND XOR
|
||
; 1 1 1 00 FF
|
||
; 0 0 0 00 00
|
||
; 1 0 S FF 00
|
||
; 0 1 not S FF FF
|
||
;
|
||
; From this, it can be seen that the XOR mask is the same as the
|
||
; foreground color. The AND mask is the XOR of the foreground
|
||
; and the background color. Not too hard to compute
|
||
;
|
||
; It can also be seen that if the background color is white and the
|
||
; foreground (text) color is black, then the conversion needn't be
|
||
; generated (it just gives the source). This is advantageous since
|
||
; it will allow phased aligned source copies to use REP MOVSW.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
; Check to see if the background color is black, and the
|
||
; foreground color is white. This can be determined by
|
||
; looking at the accelerator flags in the physical color.
|
||
|
||
cblt_3100:
|
||
mov ah,fr.TextColor.SPECIAL
|
||
xor ah,MONO_BIT ;Map black to white
|
||
and ah,fr.bkColor.SPECIAL ;AND in background color
|
||
cmp ah,MONO_BIT+ONES_OR_ZEROS
|
||
jne cblt_3110 ;Not black
|
||
mov fr.moore_flags,F1_REP_OK+F1_NO_MUNGE ;Show reps as ok, no color munge table
|
||
jmp short cblt_3180 ;Normal fetch required
|
||
|
||
; No way around it. The color conversion table and code
|
||
; must be generated.
|
||
|
||
cblt_3110:
|
||
mov cl,fr.bkColor.SPECIAL ;Get BackGround Colors
|
||
mov ch,fr.TextColor.SPECIAL ;Get ForeGround Colors
|
||
xor cl,ch
|
||
shr cl,1
|
||
sbb al,al
|
||
shr ch,1
|
||
sbb ah,ah
|
||
mov word ptr fr.ajM2C.(pcol_C0 * 2),ax
|
||
shr cl,1
|
||
sbb al,al
|
||
shr ch,1
|
||
sbb ah,ah
|
||
mov word ptr fr.ajM2C.(pcol_C1 * 2),ax
|
||
shr cl,1
|
||
sbb al,al
|
||
shr ch,1
|
||
sbb ah,ah
|
||
mov word ptr fr.ajM2C.(pcol_C2 * 2),ax
|
||
shr cl,1
|
||
sbb al,al
|
||
shr ch,1
|
||
sbb ah,ah
|
||
mov word ptr fr.ajM2C.(pcol_C3 * 2),ax
|
||
errnz <TextColor - bkColor - 4>
|
||
|
||
; Generate the code for munging the color as stated above.
|
||
|
||
mov ax,I_LODSB
|
||
stosb ;lodsb
|
||
mov ax,I_AND_AL_MEM ;and al,[xxxx]
|
||
stosw
|
||
lea eax,fr.ajM2C ; Set address of color munge
|
||
stosd
|
||
mov ebx,eax ; Save address
|
||
mov ax,I_XOR_AL_MEM ;xor al,[xxxx]
|
||
stosw
|
||
lea eax,1[ebx] ; Set address of XOR mask
|
||
stosd
|
||
jmp short cblt_3240
|
||
|
||
; Just need to generate the normal fetch sequence (lodsb)
|
||
|
||
cblt_3180:
|
||
mov al,I_LODSB ;Generate source fetch
|
||
stosb
|
||
|
||
subttl Compile - Phase Alignment
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Generate the phase alignment if any.
|
||
;
|
||
; It is assumed that AL contains the source byte
|
||
;
|
||
; Currently:
|
||
; DH = phase alignment
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_3240:
|
||
mov ecx,edi ;end of fetch code
|
||
sub ecx,fr.start_fl ;start of fetch code
|
||
mov fr.cFetchCode,ecx ;save size of fetch code
|
||
xor ecx,ecx ;Might have garbage in it
|
||
or dh,dh ;Any phase alignment?
|
||
jz cblt_3280 ; No, so skip alignment
|
||
mov cl,dh ;Get horizontal phase for rotating
|
||
mov ax,I_ROL_AL_N ;Assume rotate left n times
|
||
cmp cl,5 ;4 or less rotates?
|
||
jc cblt_3260 ; Yes
|
||
neg cl ; No, compute ROR count
|
||
add cl,8
|
||
mov ah,HIGH I_ROR_AL_N
|
||
errnz <(LOW I_ROL_AL_N)-(LOW I_ROR_AL_N)>
|
||
|
||
cblt_3260:
|
||
stosw ;Stuff the phase alignment rotates
|
||
mov al,cl ; then the phase alignment code
|
||
stosb
|
||
|
||
; Do not generate phase masking if there is only 1 src And only 1 dest byte.
|
||
; This is not just an optimization, see comments where these flags are set.
|
||
|
||
xor ch,ch
|
||
mov al,fr.first_fetch
|
||
and al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
|
||
xor al,FF_ONLY_1_SRC_BYTE or FF_ONLY_1_DEST_BYTE
|
||
jz cblt_3280
|
||
mov esi,offset FLAT:phase_align
|
||
mov ecx,PHASE_ALIGN_LEN
|
||
rep movsb
|
||
|
||
cblt_3280:
|
||
test fr.first_fetch,FF_TWO_INIT_FETCHES ;Generate another fetch?
|
||
jz cblt_4000 ; No
|
||
|
||
; A second fetch needs to be stuffed. Copy the one just created.
|
||
|
||
mov esi,fr.start_fl ;Set new start, get old
|
||
mov fr.start_fl,edi
|
||
mov ecx,edi ;Compute how long fetch is
|
||
sub ecx,esi ; and move the bytes
|
||
rep movsb
|
||
|
||
subttl Compile - ROP Generation
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Create the logic action code
|
||
;
|
||
; The given ROP will be converted into the actual code that
|
||
; performs the ROP.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
; Copy the ROP template into the BLT
|
||
|
||
cblt_4000:
|
||
mov ax,fr.operands ;Get back rop data
|
||
mov bl,ah ;Get count of number of bits to move
|
||
and ebx,HIGH ROPLength
|
||
shr ebx,2
|
||
movzx ecx,roptable+256[ebx] ;Get length into cx
|
||
errnz ROPLength-0001110000000000b
|
||
|
||
mov ebx,eax ;Get offset of the template
|
||
and ebx,ROPOffset
|
||
lea esi,roptable[ebx] ;--> the template
|
||
rep movsb ;Move the template
|
||
|
||
cblt_4020:
|
||
mov bx,ax ;Keep rop around
|
||
or ah,ah ;Generate a negate?
|
||
jns cblt_4040 ; No
|
||
mov ax,I_NOT_AL
|
||
stosw
|
||
|
||
public cblt_4040
|
||
cblt_4040::
|
||
mov fr.end_fl,edi ;Save end of fetch/logic operation
|
||
|
||
subttl Compile - Mask And Save
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Generate code to mask and save the result. If the destination
|
||
; isn't in a register, it will be loaded from ES:[DI] first. The
|
||
; mask operation will then be performed, and the result stored.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
mov ax,I_MOV_AH_DEST ; ah,[edi]
|
||
stosw
|
||
|
||
mov esi,offset FLAT:masked_store;Move rest of masked store template
|
||
movsb ;Move size override
|
||
movsd
|
||
movsw
|
||
errnz MASKED_STORE_LEN-7 ;Must be seven bytes long
|
||
mov ax,fr.start_mask ;Stuff start mask into
|
||
xchg ah,al ; the template
|
||
|
||
mov [edi][MASKED_STORE_MASK],ax
|
||
|
||
mov fr.end_fls,edi ;Save end of fetch/logic/store operation
|
||
|
||
subttl Compile - Inner Loop Generation
|
||
page
|
||
;-----------------------------------------------------------------------;
|
||
; Now for the hard stuff; The inner loop (said with a "gasp!").
|
||
;
|
||
; If there is no innerloop, then no code will be generated
|
||
; (now that's fast!).
|
||
;-----------------------------------------------------------------------;
|
||
|
||
cblt_5000:
|
||
mov edx,fr.inner_loop_count ;Get the loop count
|
||
or dx,dx ;If the count is null
|
||
jz cblt_6000 ; don't generate any code
|
||
|
||
;!!! Since we no longer pass in the old style rops, we can;t enable this code
|
||
;!!! and shold remove/alter it someday. Besides, most of it is in special.asm
|
||
if 0 ;!!!
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; We have something for a loop count. If this just happens to be
|
||
; a source copy (S) with a phase of zero, then the innerloop degenerates
|
||
; to a repeated MOVSB instruction. This little special case is
|
||
; worth checking for and handling!
|
||
;
|
||
; Also, if this is one of the special cases {P, Pn, DDx, DDxn}, then it
|
||
; will also be special cased since these are all pattern fills (pattern,
|
||
; not pattern, 0, 1).
|
||
;
|
||
; The same code can be shared for these routines, with the exception
|
||
; that patterns use a STOSx instruction instead of a MOVSx instruction
|
||
; and need a value loaded in AX
|
||
;
|
||
; So we lied a little. If a color conversion is going on, then the
|
||
; REP MOVSB might not be usable. If the F1_REP_OK flag has been set, then
|
||
; we can use it. The F1_REP_OK flag will be set for a mono ==> color
|
||
; conversion where the background color is white and the foreground
|
||
; color is black, or for a color ==> mono conversion with the screen
|
||
; as the source (the color compare register will be used).
|
||
;
|
||
; For the special cases {P, Pn, DDx, DDxn}, color conversion is
|
||
; not possible, so ignore it for them.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
mov bl,byte ptr fr.Rop ;Get the raster op
|
||
test bl,EPS_INDEX ;Can this be special cased?
|
||
jnz cblt_5500 ; No
|
||
errnz <HIGH EPS_INDEX>
|
||
errnz SPEC_PARSE_STR_INDEX ;The special case index must be 0
|
||
|
||
test bl,EPS_OFF ;Is this a source copy
|
||
jz cblt_5040 ; Yes
|
||
errnz <SOURCE_COPY AND 11b> ;Offset for source copy must be 0
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; We should have one of the following fill operations:
|
||
;
|
||
; P - Pattern
|
||
; Pn - NOT pattern
|
||
; DDx - 0 fill
|
||
; DDxn - 1 fill
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
mov ax,I_MOV_AL_0FFH ;Assume this is a 0 or 1 fill
|
||
test bl,01h ;Is it 0 or 1 fill?
|
||
jz cblt_5020 ; Yes, initialize AX with 0FFh
|
||
mov ax,I_MOV_AL_DH ; No, initialize AX with pattern
|
||
|
||
errnz PAT_COPY-0000000000100001b
|
||
errnz NOTPAT_COPY-0000000000000001b
|
||
errnz FILL_BLACK-0000000001000010b
|
||
errnz FILL_WHITE-0000000001100010b
|
||
|
||
cblt_5020:
|
||
stosw
|
||
mov ax,I_MOV_AH_AL
|
||
stosw
|
||
mov si,I_STOSB ;Set up for repeated code processor
|
||
test bl,LogPar ;If Pn or 0, then complement pattern
|
||
jnz cblt_5060 ; Is just P or 1
|
||
errnz <HIGH LogPar>
|
||
mov al,I_SIZE_OVERRIDE
|
||
stosb
|
||
mov ax,I_NOT_AX ; Is Pn or 0, complement AX
|
||
stosw
|
||
jmp short cblt_5060
|
||
|
||
errnz PAT_COPY-00100001b
|
||
errnz NOTPAT_COPY-00000001b
|
||
errnz FILL_BLACK-01000010b
|
||
errnz FILL_WHITE-01100010b
|
||
|
||
|
||
; This is a source copy. The phase must be zero for a source copy
|
||
; to be condensed into a REP MOVSx.
|
||
|
||
cblt_5040:
|
||
test fr.phase_h,0FFh ;Is horizontal phase zero?
|
||
jnz cblt_5500 ; No, can't condense source copy
|
||
mov si,I_MOVSB ;Set register for moving bytes
|
||
|
||
; For a color conversion, F1_REP_OK must be set.
|
||
|
||
test fr.the_flags,F0_GAG_CHOKE ;Color conversion?
|
||
jz cblt_5060 ; No, rep is OK to use
|
||
test fr.moore_flags,F1_REP_OK ; Yes, can we rep it?
|
||
jz cblt_5500 ; No, do it the hard way
|
||
|
||
|
||
;-----------------------------------------------------------------------;
|
||
; This is a source copy or pattern fill. Process an odd byte with
|
||
; a MOVSB or STOSB, then process the rest of the bytes with a REP
|
||
; MOVSW or a REP STOSW. If the REP isn't needed, leave it out.
|
||
;
|
||
; Don't get caught on this like I did! If the direction of the
|
||
; BLT is from right to left (decrementing addresses), then both
|
||
; the source and destination pointers must be decremented by one
|
||
; so that the next two bytes are processed, not the next byte and
|
||
; the byte just processed. Also, after all words have been processed,
|
||
; the source and destination pointers must be incremented by one to
|
||
; point to the last byte (since the last MOVSW or STOSW would have
|
||
; decremented both pointers by 2).
|
||
;
|
||
; If the target machine is an 8086, then it would be well worth the
|
||
; extra logic to align the fields on word boundaries before the MOVSxs
|
||
; if at all possible.
|
||
;
|
||
; The generated code should look something like:
|
||
;
|
||
; WARP8: ;This code for moving left to right
|
||
; movsb ;Process an odd byte
|
||
; mov ecx,gl_inner_loop_count/2 ;Set word count
|
||
; rep ;If a count, then repeat is needed
|
||
; movsw ;Move words until done
|
||
;
|
||
;
|
||
; WARP8: ;This code for moving left to right
|
||
; movsb ;Process an odd byte
|
||
; dec si ;adjust pointer for moving words
|
||
; dec di
|
||
; mov ecx,gl_inner_loop_count/2 ;Set word count
|
||
; rep ;If a count, then repeat is needed
|
||
; movsw ;Move words until done
|
||
; inc si ;adjust since words were moved
|
||
; inc di
|
||
;
|
||
;
|
||
; Of course, if any part of the above routine isn't needed, it isn't
|
||
; generated (i.e. the generated code might just be a single MOVSB)
|
||
;-----------------------------------------------------------------------;
|
||
|
||
cblt_5060:
|
||
shr edx,1 ;Byte count / 2 for words
|
||
jnc cblt_5080 ; No odd byte to move
|
||
mov ax,si ; Odd byte, move it
|
||
stosb
|
||
|
||
cblt_5080:
|
||
jz cblt_5140 ;No more bytes to move
|
||
xor bx,bx ;Flag as stepping from left to right
|
||
cmp bl,fr.step_direction ;Moving from the right to the left?
|
||
errnz STEPLEFT ; (left direction must be zero)
|
||
jnz cblt_5100 ; No
|
||
mov ax,I_DEC_ESI_DEC_EDI ; Yes, decrement both pointers
|
||
stosw
|
||
mov bx,I_INC_ESI_INC_EDI ;Set up to increment the pointers later
|
||
|
||
0cblt_5100:
|
||
cmp edx,1 ;Move one word or many words?
|
||
jz cblt_5120 ; Only one word
|
||
mov al,I_MOV_ECX_DWORD_I ; Many words, load count
|
||
stosb
|
||
mov eax,edx
|
||
stosd
|
||
mov al,I_REP ;a repeat instruction
|
||
stosb
|
||
|
||
cblt_5120:
|
||
mov al,I_SIZE_OVERRIDE
|
||
stosb
|
||
mov ax,si ;Set the word instruction
|
||
inc ax
|
||
stosb
|
||
errnz I_MOVSW-I_MOVSB-1 ;The word form of the instruction
|
||
errnz I_STOSW-I_STOSB-1 ; must be the byte form + 1
|
||
|
||
or bx,bx ;Need to increment the pointers?
|
||
jz cblt_5140 ; No
|
||
mov ax,bx ; Yes, increment both pointers
|
||
stosw
|
||
|
||
cblt_5140:
|
||
jmp cblt_6000 ;Done setting up the innerloop
|
||
page
|
||
|
||
endif
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; There is some count for the innerloop of the BLT. Generate the
|
||
; required BLT. Two or four copies of the BLT will be placed on the
|
||
; stack. This allows the LOOP instruction at the end to be distributed
|
||
; over two or four bytes instead of 1, saving 11 or 12 clocks for each
|
||
; byte (for 4). Multiply 12 clocks by ~ 16K and you save a lot of
|
||
; clocks!
|
||
;
|
||
; If there are less than four (two) bytes to be BLTed, then no looping
|
||
; instructions will be generated. If there are more than four (two)
|
||
; bytes, then there is the possibility of an initial jump instruction
|
||
; to enter the loop to handle the modulo n result of the loop count.
|
||
;
|
||
; The innerloop code will look something like:
|
||
;
|
||
; < mov cx,loopcount/n> ;load count if >n innerloop bytes
|
||
; < jmp short ??? > ;If a first jump is needed, do one
|
||
;
|
||
; BLTloop:
|
||
; replicate initial byte BLT code up to n times
|
||
;
|
||
; < loop BLTloop > ;Loop until all bytes processed
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_5500:
|
||
mov ebx,fr.end_fl ;Compute size of the fetch code
|
||
sub ebx,fr.start_fl
|
||
inc ebx ;A stosb will be appended
|
||
mov esi,4 ;Assume replication 4 times
|
||
mov cl,2 ; (shift count two bits left)
|
||
cmp ebx,32 ;Small enough for 4 times?
|
||
jc cblt_5520 ; Yes, replicate 4 times
|
||
shr esi,1 ; No, replicate 2 times
|
||
dec ecx
|
||
|
||
cblt_5520:
|
||
cmp edx,esi ;Generate a loop? (edx = loopcount)
|
||
jle cblt_5540 ; No, just copy code
|
||
mov al,I_MOV_ECX_DWORD_I
|
||
stosb ;mov cx,loopcount/n
|
||
mov eax,edx ;Compute loop count
|
||
shr eax,cl
|
||
stosd
|
||
shl eax,cl ;See if loopcount MOD n is 0
|
||
sub eax,edx
|
||
jz cblt_5540 ;Zero, no odd count to handle
|
||
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; There is an odd portion of bytes to be processed. Increment
|
||
; the loop counter for the odd pass through the loop and then
|
||
; compute the displacement for entering the loop.
|
||
;
|
||
; To compute the displacement, subtract the number of odd bytes
|
||
; from the modulus being used (i.e. 4-3=1). This gives the
|
||
; number of bytes to skip over the first time through the loop.
|
||
;
|
||
; Multiply this by the number of bytes for a logic sequence,
|
||
; and the result will be the displacement for the jump.
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
inc dword ptr [edi][-4] ;Not zero, adjust for partial loop
|
||
add eax,esi ;Compute where to enter the loop at
|
||
push edx
|
||
mul ebx
|
||
pop edx
|
||
mov ecx,eax
|
||
mov al,I_JMP_DISP32 ;Stuff jump instruction
|
||
stosb
|
||
mov eax,ecx ;Stuff displacement for jump
|
||
stosd
|
||
|
||
;-----------------------------------------------------------------------;
|
||
; Currently: EDX = loop count
|
||
; ESI = loop modulus
|
||
; EBX = size of one logic operation
|
||
; EDI --> next location in the loop
|
||
;-----------------------------------------------------------------------;
|
||
|
||
cblt_5540:
|
||
mov ecx,ebx ;Set move count
|
||
mov ebx,edx ;Set maximum for move
|
||
cmp ebx,esi ;Is the max > what's left?
|
||
jle cblt_5560 ; No, just use what's left
|
||
mov ebx,esi ; Yes, copy the max
|
||
|
||
cblt_5560:
|
||
sub edx,esi ;If dx > 0, then loop logic needed
|
||
mov esi,fr.start_fl ;--> fetch code to copy
|
||
mov eax,ecx ;Save a copy of fetch length
|
||
rep movsb ;Move fetch code and stuff stosb
|
||
mov esi,edi ;--> new source (and top of loop)
|
||
sub esi,eax
|
||
mov byte ptr [edi][-1],I_STOSB
|
||
dec ebx ;One copy has been made
|
||
push edx
|
||
mul ebx ;Compute # bytes left to move
|
||
pop edx
|
||
mov ecx,eax ;Set move count
|
||
rep movsb ;Move the fetches
|
||
sub esi,eax ;Restore pointer to start of loop
|
||
|
||
page
|
||
|
||
; The innermost BLT code has been created and needs the looping
|
||
; logic added to it. If there is any looping to be done, then
|
||
; generate the loop code. The code within the innerloop may be
|
||
; greater than 126 bytes, so a LOOP instruction may not be used
|
||
; in this case.
|
||
|
||
cblt_5580:
|
||
or edx,edx ;Need a loop?
|
||
jle cblt_6000 ; No, don't generate one
|
||
mov al,I_DEC_ECX
|
||
stosb
|
||
mov ax,I_JNZ_DISP32
|
||
stosw
|
||
mov eax,esi ;Compute offset of loop
|
||
sub eax,edi
|
||
sub eax,4 ;Bias by DISP32
|
||
stosd
|
||
|
||
|
||
subttl Compile - Last Byte Processing
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; All the innerloop stuff has been processed. Now generate the code for
|
||
; the final byte if there is one. This code is almost identical to the
|
||
; code for the first byte except there will only be one fetch (if a
|
||
; fetch is needed at all).
|
||
;
|
||
; The code generated will look something like:
|
||
;
|
||
; < fetch > ;Get source byte
|
||
; < align > ;Align source if needed
|
||
; action ;Perform desired action
|
||
; mask and store
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_6000:
|
||
mov dx,fr.last_mask ;Get last byte mask
|
||
or dh,dh ;Is there a last byte to be processed?
|
||
jz cblt_6100 ; No.
|
||
|
||
mov ecx,fr.end_fls ;Get end of fetch/logic/store operation
|
||
mov esi,fr.start_fl ;Get start of fetch/logic sequence
|
||
sub ecx,esi ;Compute length of the code
|
||
test fr.first_fetch,FF_NO_LAST_FETCH
|
||
jz cblt_include_fetch
|
||
test fr.the_flags,F0_SRC_PRESENT ; was there a fetch?
|
||
jz cblt_was_no_fetch
|
||
cmp fr.phase_h,0 ; Phase zero case is not combined
|
||
; into innerloop as it should be.
|
||
; If the final byte is full then we
|
||
; better not remove the lodsb ( i.e.
|
||
je cblt_include_fetch ; 0 - 0 = 0 would make us think we could)
|
||
|
||
mov eax,fr.cFetchCode ; don't copy the fetch (lodsb)
|
||
add esi,eax
|
||
sub ecx,eax
|
||
|
||
cblt_was_no_fetch:
|
||
cblt_include_fetch:
|
||
|
||
rep movsb ;Copy the fetch/action/store code
|
||
xchg dh,dl
|
||
mov [edi][MASKED_STORE_MASK],dx ;Stuff last byte mask into the code
|
||
skip_save:
|
||
subttl Compile - Looping Logic
|
||
page
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Looping logic.
|
||
;
|
||
; The looping logic must handle monochrome bitmaps, color bitmaps,
|
||
; huge bitmaps, the device, the presence or absence of a source
|
||
; or pattern, and mono <==> color interactions.
|
||
;
|
||
; The type of looping logic is always based on the destination.
|
||
;
|
||
; Plane Update Facts:
|
||
;
|
||
; 1) If the destination device is color, then there will be
|
||
; logic for plane selection. Plane selection is performed
|
||
; at the start of the loop for the display. Plane selection
|
||
; for bitmaps is performed at the end of the loop in anticipation
|
||
; of the next plane.
|
||
;
|
||
; The following applies when the destination is color:
|
||
;
|
||
; a) The destination update consists of:
|
||
;
|
||
; 1) If the destination is the display, the next plane will
|
||
; be selected by the plane selection code at the start
|
||
; of the scan line loop.
|
||
;
|
||
; 2) If not the display, then the PDevice must a bitmap.
|
||
; The next plane will be selected by updating the
|
||
; destination offset by the next_plane value.
|
||
;
|
||
;
|
||
; b) If F0_GAG_CHOKE isn't specified, then there may be a source.
|
||
; If there is a source, it must be color, and the update
|
||
; consists of:
|
||
;
|
||
; 1) If the source is the display, the next plane will be
|
||
; selected by the plane selection code at the start of
|
||
; the loop.
|
||
;
|
||
; 2) If not the display, then the PDevice must a bitmap.
|
||
; The next plane will be selected by updating the
|
||
; destination offset by the next_plane value.
|
||
;
|
||
;
|
||
; c) If F0_GAG_CHOKE is specified, then the source must be a
|
||
; monochrome bitmap which is undergoing mono to color
|
||
; conversion. The AND & XOR mask table which is used
|
||
; for the conversion will have to be updated, unless
|
||
; the F1_NO_MUNGE flag is set indicating that the color
|
||
; conversion really wasn't needed.
|
||
;
|
||
; The source's pointer will not be updated. It will
|
||
; remain pointing to the same scan of the source until
|
||
; all planes of the destination have been processed.
|
||
;
|
||
;
|
||
; d) In all cases, the plane mask rotation code will be
|
||
; generated. If the plane indicator doesn't overflow,
|
||
; then start at the top of the scan line loop for the
|
||
; next plane.
|
||
;
|
||
; If the plane indicator overflows, then:
|
||
;
|
||
; 1) If there is a pattern present, it's a color
|
||
; pattern fetch. The index of which scan of
|
||
; the brush to use will have to be updated.
|
||
;
|
||
; 2) Enter the scan line update routine
|
||
;
|
||
;
|
||
; 2) If the destination is monochrome, then there will be no
|
||
; plane selection logic.
|
||
;
|
||
; If F0_GAG_CHOKE is specified, then color ==> mono conversion
|
||
; is taking place. Any plane selection logic is internal
|
||
; to the ROP byte fetch code. Any color brush was pre-
|
||
; processed into a monochrome brush, so no brush updating
|
||
; need be done
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
subttl Looping Logic - Plane Selection
|
||
page
|
||
|
||
; Get saved parameters off of the stack.
|
||
;
|
||
; < pop ebx > ;Get plane indicator
|
||
; < pop esi > ;Get source pointer
|
||
; pop edi ;Get destination pointer
|
||
; pop ecx ;Get loop count
|
||
|
||
cblt_6100:
|
||
mov bh,fr.the_flags ;These flags will be used a lot
|
||
test bh,F0_DEST_IS_COLOR ;Is the destination color?
|
||
jz cblt_6120 ; No
|
||
mov al,I_POP_EBX ;Restore plane index
|
||
stosb
|
||
|
||
cblt_6120:
|
||
test bh,F0_SRC_PRESENT ;Is a source needed?
|
||
jz cblt_6140 ; No
|
||
mov al,I_POP_ESI ; Yes, get source pointer
|
||
stosb
|
||
|
||
cblt_6140:
|
||
mov ax,I_POP_EDI_POP_ECX ;Get destination pointer
|
||
stosw ;Get loop count
|
||
test bh,F0_DEST_IS_COLOR ;Color scanline update?
|
||
jz cblt_6300 ; No, just do the mono scanline update
|
||
|
||
; The scanline update is for color. Generate the logic to update
|
||
; a brush, perform plane selection, process mono ==> color conversion,
|
||
; and test for plane overflow.
|
||
|
||
cblt_6160:
|
||
or bh,bh ;Color conversion?
|
||
jns cblt_6180 ; No
|
||
errnz F0_GAG_CHOKE-10000000b
|
||
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; The source is monochrome. Handle mono ==> color conversion.
|
||
; The AND & XOR mask table will need to be rotated for the next
|
||
; pass over the source.
|
||
;
|
||
; The source scanline pointer will not be updated until all planes
|
||
; have been processed for the current scan.
|
||
;
|
||
; If F1_NO_MUNGE has been specified, then the color conversion table
|
||
; and the color conversion code was not generated, and no update
|
||
; code will be needed.
|
||
;
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
test fr.moore_flags,F1_NO_MUNGE ;Is there really a conversion table?
|
||
jnz short cblt_6200 ; No, so skip the code
|
||
|
||
mov al,I_MOV_EBP_DWORD_I ;lea ebp,fr.ajM2C
|
||
stosb
|
||
lea eax,fr.ajM2c ;Get address of table
|
||
stosd
|
||
mov esi,offset FLAT:rot_and_xor ;--> rotate code
|
||
mov cx,LEN_ROT_AND_XOR
|
||
rep movsb
|
||
jmp short cblt_6200
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; If there is a source, it must be color. If it is a memory
|
||
; bitmap, then the next plane must be selected, else it is
|
||
; the display and the next plane will be selected through
|
||
; the hardware registers.
|
||
;
|
||
; < add si,next_plane>
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_6180:
|
||
test bh,F0_SRC_PRESENT ;Is there really a source?
|
||
jz cblt_6200 ;No source.
|
||
test bh,F0_SRC_IS_DEV ;Is the source the display?
|
||
jnz cblt_6200 ; Yes, use hardware plane selection
|
||
mov ax,I_ADD_ESI_DWORD_I ; No, generate plane update
|
||
stosw ;Add si,next_plane
|
||
mov eax,fr.src.next_plane
|
||
stosd
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; If the destination isn't the device, then it must be a color
|
||
; memory bitamp, and it's pointer will have to be updated by
|
||
; bmWidthPlanes. If it is the display, then the next plane
|
||
; will be selected through the hardware registers.
|
||
;
|
||
; < add di,next_plane>
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_6200:
|
||
test bh,F0_DEST_IS_DEV ;Is the destination the display
|
||
jnz cblt_6220 ; Yes, don't generate update code
|
||
mov ax,I_ADD_EDI_DWORD_I ; No, update bitmap to the next plane
|
||
stosw
|
||
mov eax,fr.dest.next_plane
|
||
stosd
|
||
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; The source and destination pointers have been updated.
|
||
; Now generate the plane looping logic.
|
||
;
|
||
; < shl bl,1 > ;Select next plane
|
||
; < jnc StartOfLoop > ; Yes, go process next
|
||
; < mov bl,PLANE_1 > ;Reset plane indicator
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
cblt_6220:
|
||
mov ax,I_SHL_BL_1 ;Stuff plane looping logic
|
||
stosw
|
||
|
||
mov edx,fr.pNextPlane ;Compute relative offset of
|
||
sub edx,edi ; start of loop
|
||
sub edx,6 ;Bias offset by length of jnc inst.
|
||
mov ax,I_JNC_DISP32
|
||
stosw ;jnc StartOfLoop
|
||
mov eax,edx
|
||
stosd
|
||
|
||
subttl Looping Logic - Color Brush Update
|
||
page
|
||
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; The plane update logic has been copied. If a pattern was
|
||
; involved for a color BLT, then the pattern index will need
|
||
; to be updated to the next scanline for three plane mode.
|
||
;
|
||
; This will involve subtracting off 3*SIZE_PATTERN (MonoPlane),
|
||
; and adding in the increment. The result must be masked with
|
||
; 00000111b to select the correct source. Note that the update
|
||
; can be done with an add instruction and a mask operation.
|
||
;
|
||
; inc index+MonoPlane inc-MonoPlane result AND 07h
|
||
;
|
||
; 1 0+32 = 32 1-32 = -31 1 1
|
||
; 1 7+32 = 39 1-32 = -31 8 0
|
||
; -1 0+32 = 32 -1-32 = -33 FF 7
|
||
; -1 7+32 = 39 -1-32 = -33 6 6
|
||
;
|
||
; < mov al,[12345678] > ;Get brush index
|
||
; < add al,n > ;Add displacement to next byte
|
||
; < and al,00000111b > ;Keep it in range
|
||
; < mov [12345678],al > ;Store displacement to next byte
|
||
;
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
test bh,F0_PAT_PRESENT ;Is a pattern involved?
|
||
jz cblt_6300 ; No
|
||
test fr.brush_accel,SOLID_BRUSH
|
||
jnz cblt_6300 ;Solid color fetch needs no updating
|
||
mov al,I_MOV_AL_MEM
|
||
stosb ;mov al,[xxxxxxxx]
|
||
mov edx,fr.addr_brush_index
|
||
mov eax,edx
|
||
stosd
|
||
mov al,I_ADD_AL_BYTE_I
|
||
mov ah,fr.direction ;add al,bais
|
||
sub ah,oem_brush_mono ;Anybody ever fly one of these things?
|
||
errnz INCREASE-1 ;Must be a 1
|
||
errnz DECREASE+1 ;Must be a -1
|
||
stosw
|
||
mov ax,0700h+I_AND_AL_BYTE_I ;and al,00000111b
|
||
stosw
|
||
mov al,I_MOV_MEM_AL
|
||
stosb ;mov [xxxxxxxx],al
|
||
mov eax,edx
|
||
stosd
|
||
|
||
subttl Looping Logic - Scan Line Update
|
||
page
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
; Generate the next scanline code. The next scan line code must
|
||
; handle monochrome bitmaps, the device, the presence or absence
|
||
; of a source.
|
||
;
|
||
; Also color bitmaps, and mono <==> color interactions.
|
||
;
|
||
; < add si,gl_src.next_scan> ;Normal source scan line update
|
||
; add di,gl_dest.next_scan ;Normal destination scan line update
|
||
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ;
|
||
|
||
;!!! We have the problem in that this code assumes that cPlanes*cjBytesScan
|
||
;!!! is the same as next_scan. This might not always be the case, and we
|
||
;!!! should do somehting about fixing this. This would require pushing an
|
||
;!!! extra copy of pScan_n_Plane0 and then adding next-scan to this when we
|
||
;!!! have exhausted the planes for scan n
|
||
|
||
cblt_6300:
|
||
test bh,F0_SRC_PRESENT ;Is there a source?
|
||
jz cblt_6340 ; No, skip source processing
|
||
mov ax,I_ADD_ESI_DWORD_I ;add esi,increment
|
||
stosw
|
||
mov eax,fr.src.next_scan
|
||
stosd
|
||
|
||
cblt_6340:
|
||
mov ax,I_ADD_EDI_DWORD_I ;add edi,increment
|
||
stosw
|
||
mov eax,fr.dest.next_scan
|
||
stosd
|
||
|
||
; Compile the scan line loop. The code simply jumps to the start
|
||
; of the outer loop if more scans exist to be processed.
|
||
|
||
cblt_6380:
|
||
mov al,I_DEC_ECX
|
||
stosb
|
||
mov ax,I_JNZ_DISP32
|
||
stosw
|
||
mov eax,fr.blt_addr ;Compute relative offset of
|
||
sub eax,edi ; start of loop
|
||
sub eax,4 ;Adjust jump bias for DISP32
|
||
stosd ; and store it into jump
|
||
|
||
cblt_6420:
|
||
mov al,I_RET ;Stuff the far return instruction
|
||
stosb
|
||
|
||
cRet cblt
|
||
endProc cblt
|
||
|
||
_TEXT$01 ends
|
||
|
||
end
|
||
|
||
|