3201 lines
130 KiB
NASM
3201 lines
130 KiB
NASM
;******************************Module*Header*******************************\
|
|
; Module Name: nalgnblt.asm
|
|
;
|
|
; driver prototypes
|
|
;
|
|
; Copyright (c) 1992 Microsoft Corporation
|
|
;**************************************************************************/
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; VOID vNonAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc,
|
|
; INT icopydir);
|
|
; Input:
|
|
; pdsurf - surface on which to copy
|
|
; prcldest - pointer to destination rectangle
|
|
; pptlsrc - pointer to source upper left corner
|
|
; icopydir - direction in which copy must proceed to avoid overlap problems
|
|
; and synchronize with the clip enumeration visually, according to
|
|
; constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in
|
|
; WINDDI.H
|
|
;
|
|
; Performs accelarated non-aligned SRCCOPY VGA-to-VGA blts.
|
|
;
|
|
;-----------------------------------------------------------------------;
|
|
;
|
|
; Note: The source and dest *must* be non-aligned (not have the same
|
|
; left-edge intrabyte pixel alignment. Will not work properly if they are
|
|
; in fact aligned.
|
|
;
|
|
; Note: Assumes all rectangles have positive heights and widths. Will not
|
|
; work properly if this is not the case.
|
|
;
|
|
;-----------------------------------------------------------------------;
|
|
|
|
comment $
|
|
|
|
The overall approach of this module for each rectangle to copy is:
|
|
|
|
1) Precalculate the masks and whole byte widths, and determine which of
|
|
partial left edge, partial right edge, and whole middle bytes are required
|
|
for this copy.
|
|
|
|
2) Set up the starting pointers for each of the areas (left, whole middle,
|
|
right), the start and stop scan lines, the copying direction (left-to-right
|
|
or right-to-left, and top-to-bottom or bottom-to-top), the threading
|
|
(sequence of calls required to do the left/whole/right components in the
|
|
proper sequence), based on the passed-in copy direction, which in turn is
|
|
dictated by the nature of the overlap between the source and destination.
|
|
|
|
3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window,
|
|
1 R/W window, unbanked), that sequences through the intersection of each
|
|
bank with the source and destination rectangles in the proper direction
|
|
(top-to-bottom or bottom-to-top, based on the passed-in copy direction),
|
|
and performs the copy in each such rectangle. The threading vector is used
|
|
to call the required routines (copy left/whole/right bytes). For 1 R/W and
|
|
1R/1W adapters, there is a second threading vector that is called when the
|
|
source and the destination are both adequately (for the copy purposes)
|
|
addressable simultaneously (because they're in the same bank), so there's
|
|
no need to copy through a temp buffer. We want to avoid the temp
|
|
buffer whenever we can, because it's slower.
|
|
|
|
Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However,
|
|
each plane's bytes are not stored in the corresponding plane's temp buffer, but
|
|
rather consecutively in the plane 0 temp buffer. This is to reduce page
|
|
faulting, and also so that 1R/1W adapters only need a temp buffer large enough
|
|
to hold 4*tallest bank words (4K will do). 1 R/W adapters still copy whole
|
|
bytes through the full temp buffer, using all four planes' temp buffers, so
|
|
they require a temp buffer big enough to hold a full bank (256K will do).
|
|
|
|
Note: The VGA's rotator is used to perform all rotation in this module. The
|
|
two source bytes relevant to this operation are masked to preserve the desired
|
|
bits, then combined and fed to the VGA's rotator, which performs the rotation.
|
|
This is better than letting the 386/486 do the rotation because even with the
|
|
barrel shifter, those processors take 3 cycles per rotate, where the masking
|
|
and combining take only 2 cycles (or no cycles, for edges with 1-wide
|
|
sources). We also get to avoid 16-bit instructions like ROL AX,CL; the 16-bit
|
|
size prefix costs a cycle on a 486.
|
|
|
|
commend $
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; This is no longer used, but is needed by unroll.inc.
|
|
|
|
LOOP_UNROLL_SHIFT equ 1
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Maximum # of edge bytes to process before switching to next plane. Larger
|
|
; means faster, but there's more potential for flicker, since the raster scan
|
|
; has a better chance of catching bytes that have changed in some planes but
|
|
; not all planes. Larger also means bigger.
|
|
|
|
EDGE_CHUNK_SIZE equ 4
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Macro to push the current threading sequence (string of routine calls) on the
|
|
; stack, then jump to the first threading entry. The threading pointer can be
|
|
; specified, or defaults to pCurrentThread. The return address can be
|
|
; immediately after the JMP, or can be specified.
|
|
|
|
THREAD_AND_START macro THREADING,RETURN_ADDR
|
|
local push_base, return_address
|
|
|
|
ifb <&RETURN_ADDR&>
|
|
push offset return_address ;after all the threaded routines, we
|
|
; return here
|
|
else
|
|
push offset &RETURN_ADDR& ;return here
|
|
endif
|
|
|
|
ifb <&THREADING&>
|
|
mov eax,pCurrentThread
|
|
else
|
|
mov eax,&THREADING&
|
|
endif
|
|
|
|
mov ecx,[eax] ;# of routines to thread (at least 1)
|
|
lea ecx,[ecx*2+ecx] ;pushes below are 3 bytes each
|
|
mov edx,offset push_base+3
|
|
sub edx,ecx
|
|
jmp edx ;branch to push or jmp below
|
|
|
|
; Push the threading addresses on to the stack, so routines perform the
|
|
; threading as they return.
|
|
|
|
push dword ptr [eax+12] ;3 byte instruction
|
|
push dword ptr [eax+8]
|
|
push_base:
|
|
jmp dword ptr [eax+4] ;jump to the first threaded routine
|
|
|
|
return_address:
|
|
endm
|
|
|
|
;-----------------------------------------------------------------------;
|
|
|
|
.386
|
|
|
|
ifndef DOS_PLATFORM
|
|
.model small,c
|
|
else
|
|
ifdef STD_CALL
|
|
.model small,c
|
|
else
|
|
.model small,pascal
|
|
endif; STD_CALL
|
|
endif; DOS_PLATFORM
|
|
|
|
assume ds:FLAT,es:FLAT,ss:FLAT
|
|
assume fs:nothing,gs:nothing
|
|
|
|
.xlist
|
|
include stdcall.inc ;calling convention cmacros
|
|
include i386\egavga.inc
|
|
include i386\strucs.inc
|
|
include i386\unroll.inc
|
|
include i386\ropdefs.inc
|
|
|
|
.list
|
|
|
|
;-----------------------------------------------------------------------;
|
|
|
|
.data
|
|
|
|
; Threads for stringing together left, whole byte, and right operations
|
|
; in various orders, both using a temp buffer and not. Data format is:
|
|
;
|
|
; DWORD +0 = # of calls in thread (1, 2, or 3)
|
|
; +4 = first call (required)
|
|
; +8 = second call (optional)
|
|
; +12 = third call (optional)
|
|
|
|
align 4
|
|
|
|
; Copies not involving the temp buffer.
|
|
|
|
Thread_L dd 1
|
|
dd copy_left_edge
|
|
|
|
Thread_W dd 1
|
|
dd copy_whole_bytes
|
|
|
|
Thread_R dd 1
|
|
dd copy_right_edge
|
|
|
|
Thread_LR dd 2
|
|
dd copy_left_edge
|
|
dd copy_right_edge
|
|
|
|
Thread_RL dd 2
|
|
dd copy_right_edge
|
|
dd copy_left_edge
|
|
|
|
Thread_LW dd 2
|
|
dd copy_left_edge
|
|
dd copy_whole_bytes
|
|
|
|
Thread_WL dd 2
|
|
dd copy_whole_bytes
|
|
dd copy_left_edge
|
|
|
|
Thread_WR dd 2
|
|
dd copy_whole_bytes
|
|
dd copy_right_edge
|
|
|
|
Thread_RW dd 2
|
|
dd copy_right_edge
|
|
dd copy_whole_bytes
|
|
|
|
Thread_LWR dd 3
|
|
dd copy_left_edge
|
|
dd copy_whole_bytes
|
|
dd copy_right_edge
|
|
|
|
Thread_RWL dd 3
|
|
dd copy_right_edge
|
|
dd copy_whole_bytes
|
|
dd copy_left_edge
|
|
|
|
; Copies involving the temp buffer.
|
|
|
|
Thread_Lb dd 1
|
|
dd copy_left_edge_via_buffer
|
|
|
|
Thread_Wb dd 1
|
|
dd copy_whole_bytes_via_buffer
|
|
|
|
Thread_Rb dd 1
|
|
dd copy_right_edge_via_buffer
|
|
|
|
Thread_LbRb dd 2
|
|
dd copy_left_edge_via_buffer
|
|
dd copy_right_edge_via_buffer
|
|
|
|
Thread_RbLb dd 2
|
|
dd copy_right_edge_via_buffer
|
|
dd copy_left_edge_via_buffer
|
|
|
|
Thread_LbW dd 2
|
|
dd copy_left_edge_via_buffer
|
|
dd copy_whole_bytes
|
|
|
|
Thread_LbWb dd 2
|
|
dd copy_left_edge_via_buffer
|
|
dd copy_whole_bytes_via_buffer
|
|
|
|
Thread_WLb dd 2
|
|
dd copy_whole_bytes
|
|
dd copy_left_edge_via_buffer
|
|
|
|
Thread_WbLb dd 2
|
|
dd copy_whole_bytes_via_buffer
|
|
dd copy_left_edge_via_buffer
|
|
|
|
Thread_WRb dd 2
|
|
dd copy_whole_bytes
|
|
dd copy_right_edge_via_buffer
|
|
|
|
Thread_WbRb dd 2
|
|
dd copy_whole_bytes_via_buffer
|
|
dd copy_right_edge_via_buffer
|
|
|
|
Thread_RbW dd 2
|
|
dd copy_right_edge_via_buffer
|
|
dd copy_whole_bytes
|
|
|
|
Thread_RbWb dd 2
|
|
dd copy_right_edge_via_buffer
|
|
dd copy_whole_bytes_via_buffer
|
|
|
|
Thread_LbWRb dd 3
|
|
dd copy_left_edge_via_buffer
|
|
dd copy_whole_bytes
|
|
dd copy_right_edge_via_buffer
|
|
|
|
Thread_LbWbRb dd 3
|
|
dd copy_left_edge_via_buffer
|
|
dd copy_whole_bytes_via_buffer
|
|
dd copy_right_edge_via_buffer
|
|
|
|
Thread_RbWLb dd 3
|
|
dd copy_right_edge_via_buffer
|
|
dd copy_whole_bytes
|
|
dd copy_left_edge_via_buffer
|
|
|
|
Thread_RbWbLb dd 3
|
|
dd copy_right_edge_via_buffer
|
|
dd copy_whole_bytes_via_buffer
|
|
dd copy_left_edge_via_buffer
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of thread selection for various horizontal copy directions, with
|
|
; the look-up index a 4-bit field as follows:
|
|
;
|
|
; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
|
|
; Bit 2 = 1 if left edge must be copied
|
|
; Bit 1 = 1 if whole bytes must be copied
|
|
; Bit 0 = 1 if right edge must be copied
|
|
;
|
|
; This is used for all cases where both the source and destination are
|
|
; simultaneously addressable for our purposes, so there's no need to go
|
|
; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W).
|
|
|
|
MasterThreadTable label dword
|
|
;right-to-left
|
|
dd 0 ;<not used>
|
|
dd Thread_R ;R->L, R
|
|
dd Thread_W ;R->L, W
|
|
dd Thread_RW ;R->L, RW
|
|
dd Thread_L ;R->L, L
|
|
dd Thread_RL ;R->L, RL
|
|
dd Thread_WL ;R->L, WL
|
|
dd Thread_RWL ;R->L, RWL
|
|
;left-to-right
|
|
dd 0 ;<not used>
|
|
dd Thread_R ;L->R, R
|
|
dd Thread_W ;L->R, W
|
|
dd Thread_WR ;L->R, WR
|
|
dd Thread_L ;L->R, L
|
|
dd Thread_LR ;L->R, LR
|
|
dd Thread_LW ;L->R, LW
|
|
dd Thread_LWR ;L->R, LWR
|
|
|
|
|
|
; Table of thread selection for various adapter types and horizontal
|
|
; copy directions, with the look-up index a 6-bit field as follows:
|
|
;
|
|
; Bit 5 = adapter type high bit
|
|
; Bit 4 = adapter type low bit
|
|
; Bit 3 = 1 if left-to-right copy, 0 if right-to-left
|
|
; Bit 2 = 1 if left edge must be copied
|
|
; Bit 1 = 1 if whole bytes must be copied
|
|
; Bit 0 = 1 if right edge must be copied
|
|
;
|
|
; This is used for all cases where the source and destination are not both
|
|
; simultaneously addressable for our purposes, so we need to go through the
|
|
; temp buffer (only for 1 R/W and 1R/1W, and only sometimes).
|
|
|
|
MasterThreadTableViaBuffer label dword
|
|
;unbanked (no need for buffer)
|
|
;right-to-left
|
|
dd 0 ;<not used>
|
|
dd Thread_R ;R->L, R
|
|
dd Thread_W ;R->L, W
|
|
dd Thread_RW ;R->L, RW
|
|
dd Thread_L ;R->L, L
|
|
dd Thread_RL ;R->L, RL
|
|
dd Thread_WL ;R->L, WL
|
|
dd Thread_RWL ;R->L, RWL
|
|
;left-to-right
|
|
dd 0 ;<not used>
|
|
dd Thread_R ;L->R, R
|
|
dd Thread_W ;L->R, W
|
|
dd Thread_WR ;L->R, WR
|
|
dd Thread_L ;L->R, L
|
|
dd Thread_LR ;L->R, LR
|
|
dd Thread_LW ;L->R, LW
|
|
dd Thread_LWR ;L->R, LWR
|
|
|
|
;1 R/W banking window (everything goes through
|
|
; buffer)
|
|
;right-to-left
|
|
dd 0 ;<not used>
|
|
dd Thread_Rb ;R->L, R
|
|
dd Thread_Wb ;R->L, W
|
|
dd Thread_RbWb ;R->L, RW
|
|
dd Thread_Lb ;R->L, L
|
|
dd Thread_RbLb ;R->L, RL
|
|
dd Thread_WbLb ;R->L, WL
|
|
dd Thread_RbWbLb ;R->L, RWL
|
|
;left-to-right
|
|
dd 0 ;<not used>
|
|
dd Thread_Rb ;L->R, R
|
|
dd Thread_Wb ;L->R, W
|
|
dd Thread_WbRb ;L->R, WR
|
|
dd Thread_Lb ;L->R, L
|
|
dd Thread_LbRb ;L->R, LR
|
|
dd Thread_LbWb ;L->R, LW
|
|
dd Thread_LbWbRb ;L->R, LWR
|
|
|
|
;1R/1W banking window (edge go through buffer)
|
|
;right-to-left
|
|
dd 0 ;<not used>
|
|
dd Thread_Rb ;R->L, R
|
|
dd Thread_W ;R->L, W
|
|
dd Thread_RbW ;R->L, RW
|
|
dd Thread_Lb ;R->L, L
|
|
dd Thread_RbLb ;R->L, RL
|
|
dd Thread_WLb ;R->L, WL
|
|
dd Thread_RbWLb ;R->L, RWL
|
|
;left-to-right
|
|
dd 0 ;<not used>
|
|
dd Thread_Rb ;L->R, R
|
|
dd Thread_W ;L->R, W
|
|
dd Thread_WRb ;L->R, WR
|
|
dd Thread_Lb ;L->R, L
|
|
dd Thread_LbRb ;L->R, LR
|
|
dd Thread_LbW ;L->R, LW
|
|
dd Thread_LbWRb ;L->R, LWR
|
|
|
|
;2 R/W banking window (no need for buffer)
|
|
;right-to-left
|
|
dd 0 ;<not used>
|
|
dd Thread_R ;R->L, R
|
|
dd Thread_W ;R->L, W
|
|
dd Thread_RW ;R->L, RW
|
|
dd Thread_L ;R->L, L
|
|
dd Thread_RL ;R->L, RL
|
|
dd Thread_WL ;R->L, WL
|
|
dd Thread_RWL ;R->L, RWL
|
|
;left-to-right
|
|
dd 0 ;<not used>
|
|
dd Thread_R ;L->R, R
|
|
dd Thread_W ;L->R, W
|
|
dd Thread_WR ;L->R, WR
|
|
dd Thread_L ;L->R, L
|
|
dd Thread_LR ;L->R, LR
|
|
dd Thread_LW ;L->R, LW
|
|
dd Thread_LWR ;L->R, LWR
|
|
|
|
|
|
; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer.
|
|
|
|
ADAPTER_FIELD_SHIFT equ 4
|
|
|
|
; Mask for setting left-to-right bit to "left-to-right true" for use in both
|
|
; MasterThread tables.
|
|
|
|
LEFT_TO_RIGHT_FIELD_SET equ 1000b
|
|
|
|
|
|
; Table of top-to-bottom loops for adapter types.
|
|
|
|
align 4
|
|
TopToBottomLoopTable label dword
|
|
dd top_to_bottom_2RW ;unbanked is same as 2RW
|
|
dd top_to_bottom_1RW
|
|
dd top_to_bottom_1R1W
|
|
dd top_to_bottom_2RW
|
|
|
|
|
|
; Table of bottom-to-top loops for adapter types.
|
|
|
|
align 4
|
|
BottomToTopLoopTable label dword
|
|
dd bottom_to_top_2RW ;unbanked is same as 2RW
|
|
dd bottom_to_top_1RW
|
|
dd bottom_to_top_1R1W
|
|
dd bottom_to_top_2RW
|
|
|
|
|
|
; Table of routines for setting up to copy in various directions.
|
|
|
|
align 4
|
|
SetUpForCopyDirection label dword
|
|
dd left_to_right_top_to_bottom ;CD_RIGHTDOWN
|
|
dd right_to_left_top_to_bottom ;CD_LEFTDOWN
|
|
dd left_to_right_bottom_to_top ;CD_RIGHTUP
|
|
dd right_to_left_bottom_to_top ;CD_LEFTUP
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Left edge clip masks for intrabyte start addresses 0 through 7.
|
|
; Whole byte cases are flagged as 0ffh.
|
|
|
|
jLeftMaskTable label byte
|
|
db 0ffh,07fh,03fh,01fh,00fh,007h,003h,001h
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Right edge clip masks for intrabyte end addresses (non-inclusive)
|
|
; 0 through 7. Whole byte cases are flagged as 0ffh.
|
|
|
|
jRightMaskTable label byte
|
|
db 0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of width-based source-edge-to-buffer copy routines.
|
|
|
|
align 4
|
|
copy_edge_from_screen_to_buffer label dword
|
|
dd copy_screen_to_buffered_edge_1ws
|
|
dd copy_screen_to_buffered_edge_2ws
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of width-based buffer-to-dest-edge copy routines.
|
|
|
|
align 4
|
|
copy_edge_from_buffer_to_screen label dword
|
|
dd copy_buffered_edge_to_screen_1ws
|
|
dd copy_buffered_edge_to_screen_2ws
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of width-based edge copy routines (no intermediate buffer).
|
|
|
|
align 4
|
|
copy_edge_table label dword
|
|
dd copy_edge_1ws
|
|
dd copy_edge_2ws
|
|
|
|
;-----------------------------------------------------------------------;
|
|
|
|
.code
|
|
|
|
_TEXT$04 SEGMENT DWORD USE32 PUBLIC 'CODE'
|
|
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
|
|
|
;-----------------------------------------------------------------------;
|
|
|
|
cProc vNonAlignedSrcCopy,16,< \
|
|
uses esi edi ebx, \
|
|
pdsurf: ptr DEVSURF, \
|
|
prcldest : ptr RECTL, \
|
|
pptlsrc : ptr POINTL, \
|
|
icopydir : dword
|
|
|
|
local culWholeBytesWidth : dword ;# of bytes to copy across each scan
|
|
local ulBlockHeight : dword ;# of scans to copy per bank block
|
|
local ulWholeScanDelta : dword;offset from end of one whole bytes
|
|
; scan to start of next
|
|
local ulWholeBytesSrc : dword ;offset in bitmap of first source whole
|
|
; byte to copy from
|
|
local ulWholeBytesDest : dword;offset in bitmap of first source whole
|
|
; byte to copy to
|
|
local ulLeftEdgeSrc : dword ;offset in bitmap of first source left
|
|
; edge byte to copy from
|
|
local ulLeftEdgeDest : dword ;offset in bitmap of first dest left
|
|
; edge byte to copy to
|
|
local ulRightEdgeSrc : dword ;offset in bitmap of first source right
|
|
; edge byte to copy from
|
|
local ulRightEdgeDest : dword ;offset in bitmap of first dest right
|
|
; edge byte to copy to
|
|
local ulNextScan : dword ;width of scan, in bytes
|
|
local jLeftMask : dword ;left edge clip mask
|
|
local jRightMask : dword ;right edge clip mask
|
|
local culTempCount : dword ;handy temporary counter
|
|
local pTempEntry : dword ;temporary storage for vector into
|
|
; unrolled loop
|
|
local pTempPlane : dword ;pointer to storage in temp buffer for
|
|
; edge bytes (which are stored
|
|
; consecutively, not in each plane's
|
|
; temp buffer, to reduce possible page
|
|
; faulting
|
|
local ppTempPlane0 : dword ;pointer to pointer to storage in temp
|
|
; buffer for plane 0, immediately
|
|
; preceded by storage for planes 1, 2,
|
|
; and 3
|
|
local ppTempPlane3 : dword ;like above, but for plane 3
|
|
local ulOffsetInBank : dword ;offset relative to bank start
|
|
local pSrcAddr : dword ;working pointer to first source
|
|
; byte to copy from
|
|
local pDestAddr : dword ;working pointer to first dest
|
|
; byte to copy to
|
|
local ulCurrentJustification:dword ;justification used to map in
|
|
; banks; top for top to bottom
|
|
; copies, bottom for bottom to top
|
|
local ulCurrentSrcScan :dword ;scan line used to map in current
|
|
; source bank
|
|
local ulCurrentDestScan:dword ;scan line used to map in current dest
|
|
; bank
|
|
local ulLastDestScan :dword ;scan in target rect at which we stop
|
|
; advancing through banks
|
|
local pCurrentThread : dword ;pointer to data describing the
|
|
; threaded calls to be performed to
|
|
; perform the current copy
|
|
local pCurrentThreadViaBuffer:dword
|
|
;pointer to data describing the
|
|
; threaded calls to be performed to
|
|
; perform the current copy in the case
|
|
; where the source and destination are
|
|
; not simultaneously adequately
|
|
; accessible, so the copy has to go
|
|
; through a temp buffer (used only for
|
|
; 1 R/W and 1R/1W banking)
|
|
local ulAdapterType : dword ;adapter type code, per VIDEO_BANK_TYPE
|
|
local ulLWRType : dword ;whether left edge, whole bytes, and
|
|
; right edge are involved in the
|
|
; current operation;
|
|
; bit 2 = 1 if left edge involved
|
|
; bit 1 = 1 if whole bytes involved
|
|
; bit 0 = 1 if right edge involved
|
|
local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start
|
|
; address past the left edge when the
|
|
; left edge is partial
|
|
local ulCombineMask : dword ;mask for combining desired portions
|
|
; of AL and AH before ORing to make a
|
|
; single byte; used to combine before
|
|
; letting VGA rotate byte as it's
|
|
; written. Used for all cases except
|
|
; whole bytes copied left-to-right
|
|
local ulCombineMaskWhole : dword
|
|
;mask for combining desired portions of
|
|
; AL and AH when copying whole bytes
|
|
; (different from ulCombineMask in the
|
|
; case of whole bytes left-to-right
|
|
; copies, because then AH is the lsb
|
|
; and AL is the MSB; then, this is
|
|
; ulCombineMask with the bytes swapped.
|
|
; For right-to-left whole byte copies,
|
|
; this is the same as ulCombineMask)
|
|
local ulTempScanCount : dword ;temp scan line countdown variable
|
|
local ulWholeScanSrcDelta : dword
|
|
;offset from end of one source whole
|
|
; bytes scan line to start of next.
|
|
; Differs from ulWholeScanDelta because
|
|
; of source rotation pipeline priming
|
|
local ulLeftSrcWidthMinus1 : dword ;# of bytes in left src edge minus
|
|
; one (0 or 1)
|
|
local ulRightSrcWidthMinus1 : dword ;# of bytes in right src edge
|
|
; minus one (0 or 1)
|
|
|
|
;-----------------------------------------------------------------------;
|
|
|
|
; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W
|
|
; adapters), and other rectangle-independent variables.
|
|
|
|
mov esi,pdsurf
|
|
mov eax,[esi].dsurf_pvBankBufferPlane0
|
|
mov pTempPlane,eax
|
|
lea eax,[esi].dsurf_pvBankBufferPlane0
|
|
mov ppTempPlane0,eax
|
|
lea eax,[esi].dsurf_pvBankBufferPlane3
|
|
mov ppTempPlane3,eax
|
|
|
|
mov eax,[esi].dsurf_vbtBankingType
|
|
mov ulAdapterType,eax
|
|
|
|
; Copy the rectangle.
|
|
|
|
call copy_rect
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Set the VGA registers back to their default state.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov eax,(0ffh shl 8) + GRAF_BIT_MASK
|
|
out dx,ax ;enable bit mask for all bits
|
|
|
|
mov eax,(DR_SET shl 8) + GRAF_DATA_ROT
|
|
out dx,ax ;restore default of no rotation
|
|
|
|
mov dl,SEQ_DATA
|
|
mov al,MM_ALL
|
|
out dx,al ;enable writes to all planes
|
|
|
|
cld ;restore default direction flag
|
|
|
|
cRet vNonAlignedSrcCopy ;done
|
|
|
|
|
|
;***********************************************************************;
|
|
;
|
|
; Copies the specified rectangle.
|
|
;
|
|
;***********************************************************************;
|
|
|
|
copy_rect:
|
|
|
|
; Calculate the rotation, set up the VGA's rotator, and set the byte-combining
|
|
; masks.
|
|
|
|
mov edi,prcldest ;left edge of destination
|
|
mov esi,pptlsrc
|
|
mov ah,byte ptr [edi].xLeft ;left edge of source
|
|
sub ah,byte ptr [esi].ptl_x
|
|
and ah,07h ;rotation = (dest - source) % 8
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_DATA_ROT
|
|
out dx,ax ;set the VGA's rotator for the rotation
|
|
|
|
; Set up byte-combining mask, in preparation for ORing and letting the VGA's
|
|
; rotator rotate, assuming the left-hand source byte is in AL and the
|
|
; right-hand source byte is in AH (true for all cases except left-to-right
|
|
; whole bytes).
|
|
|
|
mov cl,ah
|
|
mov eax,0000ff00h
|
|
rol ax,cl
|
|
mov ulCombineMask,eax
|
|
|
|
; Calculate source edge widths (1 or 2 bytes).
|
|
|
|
sub edx,edx ;assume right source width is 1
|
|
mov ebx,[edi].xLeft
|
|
mov ecx,[edi].xRight ;dest right edge (non-inclusive)
|
|
dec ecx ;make it inclusive
|
|
sub ecx,ebx ;dest width = dest right - dest left
|
|
mov eax,[esi].ptl_x
|
|
add ecx,eax ;ECX = right edge of source
|
|
xor eax,ecx
|
|
and eax,not 07h ;do the src start and end differ in byte
|
|
; address bits? (as opposed to intrabyte)
|
|
jz short @F ;no, force 1-wide source
|
|
|
|
mov al,byte ptr [edi].xLeft
|
|
mov ah,byte ptr [esi].ptl_x
|
|
and eax,00000707h
|
|
cmp ah,al
|
|
jb short @F
|
|
inc edx ;left source width is 2
|
|
@@:
|
|
mov ulLeftSrcWidthMinus1,edx
|
|
|
|
sub edx,edx ;assume right source width is 1
|
|
mov eax,[edi].xRight ;dest right edge (non-inclusive)
|
|
dec eax ;make it inclusive
|
|
and cl,07h ;intrabyte source address
|
|
and al,07h ;intrabyte dest address
|
|
cmp cl,al
|
|
ja short @F
|
|
inc edx ;right source width is 2
|
|
@@:
|
|
mov ulRightSrcWidthMinus1,edx
|
|
|
|
; Set up masks and whole bytes count, and build left/whole/right index
|
|
; indicating which of those parts are involved in the copy.
|
|
|
|
mov ebx,[edi].xRight ;right edge of fill (non-inclusive)
|
|
mov ecx,ebx
|
|
and ecx,0111b ;intrabyte address of right edge
|
|
mov ah,jRightMaskTable[ecx] ;right edge mask
|
|
|
|
mov esi,[edi].xLeft ;left edge of fill (inclusive)
|
|
mov ecx,esi
|
|
shr ecx,3 ;/8 for start offset from left edge
|
|
; of scan line
|
|
sub ebx,esi ;width in pixels of fill
|
|
|
|
and esi,0111b ;intrabyte address of left edge
|
|
mov al,jLeftMaskTable[esi] ;left edge mask
|
|
|
|
dec ebx ;make inclusive on right
|
|
add ebx,esi ;inclusive width, starting counting at
|
|
; the beginning of the left edge byte
|
|
shr ebx,3 ;width of fill in bytes touched - 1
|
|
jnz short more_than_1_byte ;more than 1 byte is involved
|
|
|
|
; Only one byte will be affected. Combine first/last masks.
|
|
|
|
and al,ah ;we'll use first byte mask only
|
|
xor ah,ah ;want last byte mask to be 0 to
|
|
; indicate right edge not involved
|
|
inc ebx ;so there's one count to subtract below
|
|
; if this isn't a whole edge byte
|
|
more_than_1_byte:
|
|
|
|
; If all pixels in the left edge are altered, combine the first byte into the
|
|
; whole byte count, because we can handle solid edge bytes faster as part of
|
|
; the whole bytes. Ditto for the right edge.
|
|
|
|
sub ecx,ecx ;edge whole-status accumulator
|
|
cmp al,-1 ;is left edge a whole byte or partial?
|
|
adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole
|
|
sub ebx,ecx ;if left edge partial, deduct it from
|
|
; the whole bytes count
|
|
mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if
|
|
; it's partial when pointing to the
|
|
; whole bytes
|
|
and ah,ah ;is right edge mask 0, meaning this
|
|
; fill is only 1 byte wide?
|
|
jz short save_masks ;yes, no need to do anything
|
|
or ecx,40h ;assume there's a partial right edge
|
|
cmp ah,-1 ;is right edge a whole byte or partial?
|
|
jnz short save_masks ;partial
|
|
;bit 1=0 if left edge partial, 1 whole
|
|
inc ebx ;if right edge whole, include it in the
|
|
; whole bytes count
|
|
and ecx,not 40h ;there's no partial right edge
|
|
save_masks:
|
|
cmp ebx,1 ;do we have any whole bytes?
|
|
cmc ;CF set if whole byte count > 0
|
|
adc ecx,ecx ;if any whole bytes, set whole bytes
|
|
; bit in left/whole/right accumulator
|
|
rol cl,1 ;align the left/whole/right bits
|
|
mov ulLWRType,ecx ;save left/whole/right status
|
|
|
|
mov byte ptr jLeftMask,al ;save left and right clip masks
|
|
mov byte ptr jRightMask,ah
|
|
mov culWholeBytesWidth,ebx ;save # of whole bytes
|
|
|
|
; Copy the rectangle in the specified direction.
|
|
|
|
mov eax,icopydir
|
|
jmp SetUpForCopyDirection[eax*4]
|
|
|
|
|
|
;***********************************************************************;
|
|
;
|
|
; The following routines set up to handle the four possible copy
|
|
; directions.
|
|
;
|
|
;***********************************************************************;
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Set-up code for left-to-right, top-to-bottom copies.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
left_to_right_top_to_bottom::
|
|
|
|
cld ;we'll copy left to right
|
|
|
|
; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
|
|
; rotate, assuming the left-hand source byte is in AH and the right-hand source
|
|
; byte is in AL (true only for left-to-right whole bytes).
|
|
|
|
mov eax,ulCombineMask
|
|
not eax
|
|
mov ulCombineMaskWhole,eax
|
|
|
|
mov esi,pdsurf
|
|
mov eax,[esi].dsurf_lNextScan
|
|
mov ulNextScan,eax ;copy top to bottom
|
|
sub eax,culWholeBytesWidth ;offset from end of one dest whole byte
|
|
mov ulWholeScanDelta,eax ; scan to start of next
|
|
dec eax ;offset from end of one src whole byte
|
|
mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
|
|
; leading byte used to prime the
|
|
; rotation pipeline
|
|
|
|
mov esi,ulLWRType ;3-bit flag field for left, whole, and
|
|
; right involvement in operation
|
|
or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index
|
|
mov eax,MasterThreadTable[esi*4]
|
|
mov pCurrentThread,eax ;threading when no buffering is needed
|
|
mov edx,ulAdapterType
|
|
shl edx,ADAPTER_FIELD_SHIFT
|
|
or esi,edx ;factor adapter type into the index
|
|
mov eax,MasterThreadTableViaBuffer[esi*4]
|
|
mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
|
|
|
|
mov ulCurrentJustification,JustifyTop ;copy top to bottom
|
|
|
|
mov esi,prcldest
|
|
mov eax,[esi].yBottom
|
|
mov ulLastDestScan,eax ;end at bottom of dest copy rect
|
|
mov eax,[esi].yTop
|
|
mov ulCurrentDestScan,eax ;start at top of dest copy rect
|
|
mul ulNextScan ;offset in bitmap of top dest rect scan
|
|
mov edx,[esi].xLeft
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first dest byte
|
|
mov ulLeftEdgeDest,eax ;that's where the left dest edge is
|
|
add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
|
|
; byte, unless the left edge is a whole
|
|
; byte and is thus part of the whole
|
|
; bytes already
|
|
mov ulWholeBytesDest,eax ;where the whole dest bytes start
|
|
add eax,culWholeBytesWidth ;point to the right edge
|
|
mov ulRightEdgeDest,eax ;where the right dest edge starts
|
|
|
|
mov esi,pptlsrc
|
|
mov eax,[esi].ptl_y
|
|
mov ulCurrentSrcScan,eax ;start at top of source copy rect
|
|
mul ulNextScan ;offset in bitmap of top dest rect scan
|
|
mov edx,[esi].ptl_x
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first source byte
|
|
mov ulLeftEdgeSrc,eax ;that's where the left src edge is
|
|
add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
|
|
dec eax ; last (leftmost) left edge byte, so
|
|
add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
|
|
; wide, except when the left dest byte
|
|
; is solid so the left edge is part of
|
|
; the whole bytes
|
|
mov ulWholeBytesSrc,eax ;where the src whole bytes start
|
|
add eax,culWholeBytesWidth ;point to the right edge
|
|
mov ulRightEdgeSrc,eax ;where the right src edge starts,
|
|
; because the whole bytes and the right
|
|
; source edge share a byte, and we
|
|
; always point to the leftmost byte in
|
|
; the right source edge
|
|
|
|
; Branch to the appropriate top-to-bottom bank enumeration loop.
|
|
|
|
mov eax,ulAdapterType
|
|
jmp TopToBottomLoopTable[eax*4]
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Set-up code for right-to-left, top-to-bottom copies.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
right_to_left_top_to_bottom::
|
|
|
|
std ;we'll copy right to left
|
|
|
|
; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
|
|
; rotate, assuming the left-hand source byte is in AL and the right-hand source
|
|
; byte is in AH (always true except for left-to-right whole bytes).
|
|
|
|
mov eax,ulCombineMask
|
|
mov ulCombineMaskWhole,eax
|
|
|
|
mov esi,pdsurf
|
|
mov eax,[esi].dsurf_lNextScan
|
|
mov ulNextScan,eax ;copy top to bottom
|
|
add eax,culWholeBytesWidth ;offset from end of one whole byte scan
|
|
mov ulWholeScanDelta,eax ; to start of next, given that we're
|
|
; copying one way and going scan-to-
|
|
; scan the other way
|
|
inc eax ;offset from end of one src whole byte
|
|
mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
|
|
; leading byte used to prime the
|
|
; rotation pipeline
|
|
|
|
mov esi,ulLWRType ;3-bit flag field for left, whole, and
|
|
; right involvement in operation
|
|
;leave left-to-right field cleared, so
|
|
; we look up right-to-left entries
|
|
mov eax,MasterThreadTable[esi*4]
|
|
mov pCurrentThread,eax ;threading when no buffering is needed
|
|
mov edx,ulAdapterType
|
|
shl edx,ADAPTER_FIELD_SHIFT
|
|
or esi,edx ;factor adapter type into the index
|
|
mov eax,MasterThreadTableViaBuffer[esi*4]
|
|
mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
|
|
|
|
mov ulCurrentJustification,JustifyTop ;copy top to bottom
|
|
|
|
mov esi,prcldest
|
|
mov eax,[esi].yBottom
|
|
mov ulLastDestScan,eax ;end at bottom of dest copy rect
|
|
mov eax,[esi].yTop
|
|
mov ulCurrentDestScan,eax ;start at top of dest copy rect
|
|
mul ulNextScan ;offset in bitmap of top dest rect scan
|
|
mov edx,[esi].xLeft
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first dest byte
|
|
mov ulLeftEdgeDest,eax ;that's where the left dest edge is
|
|
add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
|
|
; byte, unless the left edge is a whole
|
|
; byte and is thus part of the whole
|
|
; bytes already
|
|
add eax,culWholeBytesWidth ;point to the right edge
|
|
mov ulRightEdgeDest,eax ;where the right dest edge starts
|
|
dec eax ;back up to the last whole byte
|
|
mov ulWholeBytesDest,eax ;where the whole dest bytes start
|
|
|
|
mov esi,pptlsrc
|
|
mov eax,[esi].ptl_y
|
|
mov ulCurrentSrcScan,eax ;start at top of source copy rect
|
|
mul ulNextScan ;offset in bitmap of top dest rect scan
|
|
mov edx,[esi].ptl_x
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first source byte
|
|
mov ulLeftEdgeSrc,eax ;that's where the left src edge is
|
|
add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
|
|
dec eax ; last (leftmost) left edge byte, so
|
|
add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
|
|
; wide, except when the left dest byte
|
|
; is solid so the left edge is part of
|
|
; the whole bytes
|
|
add eax,culWholeBytesWidth ;point to the right edge of the whole
|
|
; src bytes, accounting for the extra
|
|
; source byte needed to prime the
|
|
; rotation pipeline
|
|
mov ulWholeBytesSrc,eax ;where the src whole bytes start
|
|
mov ulRightEdgeSrc,eax ;that's also where the right src edge
|
|
; starts, because the whole bytes and
|
|
; the right source edge share a byte,
|
|
; and we always point to the leftmost
|
|
; byte in the right source edge
|
|
|
|
; Branch to the appropriate top-to-bottom bank enumeration loop.
|
|
|
|
mov eax,ulAdapterType
|
|
jmp TopToBottomLoopTable[eax*4]
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Set-up code for left-to-right, bottom-to-top copies.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
left_to_right_bottom_to_top::
|
|
|
|
cld ;we'll copy left to right
|
|
|
|
; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
|
|
; rotate, assuming the left-hand source byte is in AH and the right-hand source
|
|
; byte is in AL (true only for left-to-right whole bytes).
|
|
|
|
mov eax,ulCombineMask
|
|
not eax
|
|
mov ulCombineMaskWhole,eax
|
|
|
|
mov edi,pdsurf
|
|
mov eax,[edi].dsurf_lNextScan
|
|
neg eax
|
|
mov ulNextScan,eax ;copy bottom to top
|
|
sub eax,culWholeBytesWidth ;offset from end of one whole byte scan
|
|
mov ulWholeScanDelta,eax ; to start of next, given that we're
|
|
; copying one way and going scan-to-
|
|
; scan the other way
|
|
dec eax ;offset from end of one src whole byte
|
|
mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
|
|
; leading byte used to prime the
|
|
; rotation pipeline
|
|
|
|
mov esi,ulLWRType ;3-bit flag field for left, whole, and
|
|
; right involvement in operation
|
|
or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index
|
|
mov eax,MasterThreadTable[esi*4]
|
|
mov pCurrentThread,eax ;threading when no buffering is needed
|
|
mov edx,ulAdapterType
|
|
shl edx,ADAPTER_FIELD_SHIFT
|
|
or esi,edx ;factor adapter type into the index
|
|
mov eax,MasterThreadTableViaBuffer[esi*4]
|
|
mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
|
|
|
|
mov ulCurrentJustification,JustifyBottom ;copy bottom to top
|
|
|
|
mov esi,prcldest
|
|
mov edx,[esi].yTop
|
|
mov ulLastDestScan,edx ;end at top of dest copy rect
|
|
mov eax,[esi].yBottom
|
|
dec eax ;rectangle definition is non-inclusive,
|
|
; so advance to first scan we'll copy
|
|
sub edx,eax ;-(offset from rect top to bottom)
|
|
push edx ;remember for use with source
|
|
mov ulCurrentDestScan,eax ;start at bottom of dest copy rect
|
|
mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
|
|
; scan (first scan to which to copy)
|
|
mov edx,[esi].xLeft
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first dest byte
|
|
mov ulLeftEdgeDest,eax ;that's where the left dest edge is
|
|
add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
|
|
; byte, unless the left edge is a whole
|
|
; byte and is thus part of the whole
|
|
; bytes already
|
|
mov ulWholeBytesDest,eax ;where the whole dest bytes start
|
|
add eax,culWholeBytesWidth ;point to the right edge
|
|
mov ulRightEdgeDest,eax ;where the right dest edge starts
|
|
|
|
mov esi,pptlsrc
|
|
mov eax,[esi].ptl_y
|
|
pop edx ;retrieve -(offset from top to bottom)
|
|
sub eax,edx ;advance to bottom of source rect
|
|
; (inclusive; this is first scan from
|
|
; which to copy)
|
|
mov ulCurrentSrcScan,eax ;start at bottom of source copy rect
|
|
mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
|
|
; scan
|
|
mov edx,[esi].ptl_x
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first source byte
|
|
mov ulLeftEdgeSrc,eax ;that's where the left src edge is
|
|
add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
|
|
dec eax ; last (leftmost) left edge byte, so
|
|
add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
|
|
; wide, except when the left dest byte
|
|
; is solid so the left edge is part of
|
|
; the whole bytes
|
|
mov ulWholeBytesSrc,eax ;where the src whole bytes start
|
|
add eax,culWholeBytesWidth ;point to the right edge
|
|
mov ulRightEdgeSrc,eax ;where the right src edge starts,
|
|
; because the whole bytes and the right
|
|
; source edge share a byte, and we
|
|
; always point to the leftmost byte in
|
|
; the right source edge
|
|
|
|
; Branch to the appropriate bottom-to-top bank enumeration loop.
|
|
|
|
mov eax,ulAdapterType
|
|
jmp BottomToTopLoopTable[eax*4]
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Set-up code for right-to-left, bottom-to-top copies.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
right_to_left_bottom_to_top::
|
|
|
|
std ;we'll copy right to left
|
|
|
|
; Byte-combining mask, in preparation for ORing and letting the VGA's rotator
|
|
; rotate, assuming the left-hand source byte is in AL and the right-hand source
|
|
; byte is in AH (always true except for left-to-right whole bytes).
|
|
|
|
mov eax,ulCombineMask
|
|
mov ulCombineMaskWhole,eax
|
|
|
|
mov edi,pdsurf
|
|
mov eax,[edi].dsurf_lNextScan
|
|
neg eax
|
|
mov ulNextScan,eax ;copy bottom to top
|
|
add eax,culWholeBytesWidth ;offset from end of one whole byte scan
|
|
mov ulWholeScanDelta,eax ; to start of next
|
|
inc eax ;offset from end of one src whole byte
|
|
mov ulWholeScanSrcDelta,eax ; scan to start of next, accounting for
|
|
; leading byte used to prime the
|
|
; rotation pipeline
|
|
|
|
mov esi,ulLWRType ;3-bit flag field for left, whole, and
|
|
; right involvement in operation
|
|
;leave left-to-right field cleared, so
|
|
; we look up right-to-left entries
|
|
mov eax,MasterThreadTable[esi*4]
|
|
mov pCurrentThread,eax ;threading when no buffering is needed
|
|
mov edx,ulAdapterType
|
|
shl edx,ADAPTER_FIELD_SHIFT
|
|
or esi,edx ;factor adapter type into the index
|
|
mov eax,MasterThreadTableViaBuffer[esi*4]
|
|
mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed
|
|
|
|
mov ulCurrentJustification,JustifyBottom ;copy bottom to top
|
|
|
|
mov esi,prcldest
|
|
mov edx,[esi].yTop
|
|
mov ulLastDestScan,edx ;end at top of dest copy rect
|
|
mov eax,[esi].yBottom
|
|
dec eax ;rectangle definition is non-inclusive,
|
|
; so advance to first scan we'll copy
|
|
sub edx,eax ;-(offset from rect top to bottom)
|
|
push edx ;remember for use with source
|
|
mov ulCurrentDestScan,eax ;start at bottom of dest copy rect
|
|
mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
|
|
; scan (first scan to which to copy)
|
|
mov edx,[esi].xLeft
|
|
shr edx,3 ;byte X address
|
|
add eax,edx
|
|
mov ulLeftEdgeDest,eax ;that's where the left dest edge is
|
|
add eax,ulLeftEdgeAdjust ;the whole bytes start at the next
|
|
; byte, unless the left edge is a whole
|
|
; byte and is thus part of the whole
|
|
; bytes already
|
|
add eax,culWholeBytesWidth ;point to the right edge
|
|
mov ulRightEdgeDest,eax ;where the right dest edge starts
|
|
dec eax ;back up to the last whole byte
|
|
mov ulWholeBytesDest,eax ;where the whole dest bytes start
|
|
|
|
mov esi,pptlsrc
|
|
mov eax,[esi].ptl_y
|
|
pop edx ;retrieve -(offset from top to bottom)
|
|
sub eax,edx ;advance to bottom of source rect
|
|
; (inclusive; this is first scan from
|
|
; which to copy)
|
|
mov ulCurrentSrcScan,eax ;start at bottom of source copy rect
|
|
mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect
|
|
; scan
|
|
mov edx,[esi].ptl_x
|
|
shr edx,3 ;byte X address
|
|
add eax,edx ;offset in bitmap of first source byte
|
|
mov ulLeftEdgeSrc,eax ;that's where the left src edge is
|
|
add eax,ulLeftSrcWidthMinus1 ;the first whole byte includes the
|
|
dec eax ; last (leftmost) left edge byte, so
|
|
add eax,ulLeftEdgeAdjust ; add a byte if the left edge is 2
|
|
; wide, except when the left dest byte
|
|
; is solid so the left edge is part of
|
|
; the whole bytes
|
|
add eax,culWholeBytesWidth ;point to the right edge of the whole
|
|
; src bytes, accounting for the extra
|
|
; source byte needed to prime the
|
|
; rotation pipeline
|
|
mov ulWholeBytesSrc,eax ;where the src whole bytes start
|
|
mov ulRightEdgeSrc,eax ;that's also where the right src edge
|
|
; starts, because the whole bytes and
|
|
; the right source edge share a byte,
|
|
; and we always point to the leftmost
|
|
; byte in the right source edge
|
|
|
|
; Branch to the appropriate bottom-to-top bank enumeration loop.
|
|
|
|
mov eax,ulAdapterType
|
|
jmp BottomToTopLoopTable[eax*4]
|
|
|
|
|
|
;***********************************************************************;
|
|
;
|
|
; The following routines are the banking loops.
|
|
;
|
|
;***********************************************************************;
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Banking for 2 R/W and unbanked adapters, top to bottom.
|
|
;-----------------------------------------------------------------------;
|
|
top_to_bottom_2RW::
|
|
|
|
; We're going top to bottom. Map in the source and dest, top-justified.
|
|
|
|
mov ebx,pdsurf
|
|
mov edx,ulCurrentSrcScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
|
|
; current source bank?
|
|
jl short top_2RW_map_init_src_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
|
|
; current source bank?
|
|
jl short top_2RW_init_src_bank_mapped
|
|
;no, proper bank already mapped
|
|
top_2RW_map_init_src_bank:
|
|
|
|
; Map bank containing the top source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyTop,MapSourceBank>
|
|
|
|
top_2RW_init_src_bank_mapped:
|
|
|
|
mov edx,ulCurrentDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
|
|
; current dest bank?
|
|
jl short top_2RW_map_init_dest_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
|
|
; current dest bank?
|
|
jl short top_2RW_init_dest_bank_mapped
|
|
;no, proper bank already mapped
|
|
top_2RW_map_init_dest_bank:
|
|
|
|
; Map bank containing the top dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyTop,MapDestBank>
|
|
|
|
top_2RW_init_dest_bank_mapped:
|
|
|
|
; Bank-by-bank top-to-bottom copy loop.
|
|
|
|
top_2RW_bank_loop:
|
|
|
|
; Decide how far we can go before we run out of bank or rectangle to copy.
|
|
|
|
mov edx,ulLastDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom
|
|
jl short @F ;copy rectangle bottom is in this bank
|
|
mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
|
|
; of bank, at least
|
|
@@:
|
|
sub edx,ulCurrentDestScan ;# of scans we can and want to do in
|
|
; the dest bank
|
|
mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom
|
|
sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank
|
|
|
|
cmp edx,eax
|
|
jb short @F ;source bank isn't limiting
|
|
mov edx,eax ;source bank is limiting
|
|
@@:
|
|
mov ulBlockHeight,edx ;# of scans we'll do in this bank
|
|
|
|
; We're ready to copy this block.
|
|
|
|
THREAD_AND_START
|
|
|
|
; Any more scans to copy?
|
|
|
|
mov eax,ulCurrentDestScan
|
|
mov esi,ulBlockHeight
|
|
add eax,esi ;we've copied to dest up to here
|
|
cmp ulLastDestScan,eax ;are we at the dest rect bottom?
|
|
jz short top_2RW_done ;yes, we're done
|
|
mov ulCurrentDestScan,eax
|
|
|
|
; Now advance either or both banks, as needed.
|
|
|
|
mov ebx,pdsurf
|
|
cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
|
|
; current dest bank?
|
|
jl short top_2RW_dest_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,eax,JustifyTop,MapDestBank>
|
|
|
|
top_2RW_dest_bank_mapped:
|
|
|
|
add esi,ulCurrentSrcScan ;we've copied from source up to here
|
|
mov ulCurrentSrcScan,esi
|
|
|
|
cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
|
|
; current src bank?
|
|
jl short top_2RW_src_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,esi,JustifyTop,MapSourceBank>
|
|
|
|
top_2RW_src_bank_mapped:
|
|
|
|
jmp top_2RW_bank_loop
|
|
|
|
top_2RW_done:
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Banking for 2 R/W and unbanked adapters, bottom to top.
|
|
;-----------------------------------------------------------------------;
|
|
bottom_to_top_2RW::
|
|
|
|
; We're going bottom to top. Map in the source and dest, bottom-justified.
|
|
|
|
mov ebx,pdsurf
|
|
mov edx,ulCurrentSrcScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
|
|
; current source bank?
|
|
jl short bot_2RW_map_init_src_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
|
|
; than current src bank?
|
|
jl short bot_2RW_init_src_bank_mapped
|
|
;no, proper bank already mapped
|
|
bot_2RW_map_init_src_bank:
|
|
|
|
; Map bank containing the bottom source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyBottom,MapSourceBank>
|
|
|
|
bot_2RW_init_src_bank_mapped:
|
|
|
|
mov edx,ulCurrentDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
|
|
; current dest bank?
|
|
jl short bot_2RW_map_init_dest_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
|
|
; than current dst bank?
|
|
jl short bot_2RW_init_dest_bank_mapped
|
|
;no, proper bank already mapped
|
|
bot_2RW_map_init_dest_bank:
|
|
|
|
; Map bank containing the bottom dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyBottom,MapDestBank>
|
|
|
|
bot_2RW_init_dest_bank_mapped:
|
|
|
|
; Bank-by-bank bottom-to-top copy loop.
|
|
|
|
bot_2RW_bank_loop:
|
|
|
|
; Decide how far we can go before we run out of bank or rectangle to copy.
|
|
|
|
mov edx,ulLastDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop
|
|
jg short @F ;copy rectangle top is in this bank
|
|
mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
|
|
; of bank, at least
|
|
@@:
|
|
neg edx
|
|
add edx,ulCurrentDestScan ;# of scans we can and want to do in
|
|
inc edx ; the dest bank
|
|
|
|
mov eax,ulCurrentSrcScan
|
|
sub eax,[ebx].dsurf_rcl2WindowClipS.yTop
|
|
inc eax ;# of scans we can do in the src bank
|
|
|
|
cmp edx,eax
|
|
jb short @F ;source bank isn't limiting
|
|
mov edx,eax ;source bank is limiting
|
|
@@:
|
|
mov ulBlockHeight,edx ;# of scans we'll do in this bank
|
|
|
|
; We're ready to copy this block.
|
|
|
|
THREAD_AND_START
|
|
|
|
; Any more scans to copy?
|
|
|
|
mov eax,ulCurrentDestScan
|
|
mov esi,ulBlockHeight
|
|
sub eax,esi ;we've copied to dest up to here
|
|
cmp ulLastDestScan,eax ;are we past the dest rect top?
|
|
jg short bot_2RW_done ;yes, we're done
|
|
mov ulCurrentDestScan,eax
|
|
|
|
; Now advance either or both banks, as needed.
|
|
|
|
mov ebx,pdsurf
|
|
cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
|
|
; current dest bank?
|
|
jge short bot_2RW_dest_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,eax,JustifyBottom,MapDestBank>
|
|
|
|
bot_2RW_dest_bank_mapped:
|
|
|
|
mov eax,ulCurrentSrcScan
|
|
sub eax,esi ;we've copied from source up to here
|
|
mov ulCurrentSrcScan,eax
|
|
|
|
cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
|
|
; current src bank?
|
|
jge short bot_2RW_src_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,eax,JustifyBottom,MapSourceBank>
|
|
|
|
bot_2RW_src_bank_mapped:
|
|
|
|
jmp bot_2RW_bank_loop
|
|
|
|
bot_2RW_done:
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Banking for 1R/1W adapters, top to bottom.
|
|
;-----------------------------------------------------------------------;
|
|
top_to_bottom_1R1W::
|
|
|
|
; We're going top to bottom. Map in the source and dest, top-justified.
|
|
|
|
mov ebx,pdsurf
|
|
mov edx,ulCurrentSrcScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than
|
|
; current source bank?
|
|
jl short top_1R1W_map_init_src_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than
|
|
; current source bank?
|
|
jl short top_1R1W_init_src_bank_mapped
|
|
;no, proper bank already mapped
|
|
top_1R1W_map_init_src_bank:
|
|
|
|
; Map bank containing the top source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyTop,MapSourceBank>
|
|
|
|
top_1R1W_init_src_bank_mapped:
|
|
|
|
mov edx,ulCurrentDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than
|
|
; current dest bank?
|
|
jl short top_1R1W_map_init_dest_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than
|
|
; current dest bank?
|
|
jl short top_1R1W_init_dest_bank_mapped
|
|
;no, proper bank already mapped
|
|
top_1R1W_map_init_dest_bank:
|
|
|
|
; Map bank containing the top dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyTop,MapDestBank>
|
|
|
|
top_1R1W_init_dest_bank_mapped:
|
|
|
|
; Bank-by-bank top-to-bottom copy loop.
|
|
|
|
top_1R1W_bank_loop:
|
|
|
|
; Decide how far we can go before we run out of bank or rectangle to copy.
|
|
|
|
mov edx,ulLastDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom
|
|
jl short @F ;copy rectangle bottom is in this bank
|
|
mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end
|
|
; of bank, at least
|
|
@@:
|
|
sub edx,ulCurrentDestScan ;# of scans we can and want to do in
|
|
; the dest bank
|
|
mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom
|
|
sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank
|
|
|
|
cmp edx,eax
|
|
jb short @F ;source bank isn't limiting
|
|
mov edx,eax ;source bank is limiting
|
|
@@:
|
|
mov ulBlockHeight,edx ;# of scans we'll do in this bank
|
|
|
|
; We're ready to copy this block.
|
|
; Select different threading, depending on whether the source and destination
|
|
; are currently in the same bank; we can do edges faster if they are.
|
|
|
|
mov eax,[ebx].dsurf_ulWindowBank
|
|
cmp eax,[ebx].dsurf_ulWindowBank[4]
|
|
jz short top_1R1W_copy_same_bank
|
|
|
|
; Source and dest are currently in different banks, must go through temp buffer.
|
|
|
|
THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans
|
|
|
|
; Source and dest are currently in the same bank.
|
|
|
|
top_1R1W_copy_same_bank:
|
|
THREAD_AND_START
|
|
|
|
; Any more scans to copy?
|
|
|
|
top_1R1W_check_more_scans:
|
|
|
|
mov eax,ulCurrentDestScan
|
|
mov esi,ulBlockHeight
|
|
add eax,esi ;we've copied to dest up to here
|
|
cmp ulLastDestScan,eax ;are we at the dest rect bottom?
|
|
jz short top_1R1W_done ;yes, we're done
|
|
mov ulCurrentDestScan,eax
|
|
|
|
; Now advance either or both banks, as needed.
|
|
|
|
mov ebx,pdsurf
|
|
cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than
|
|
; current dest bank?
|
|
jl short top_1R1W_dest_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,eax,JustifyTop,MapDestBank>
|
|
|
|
top_1R1W_dest_bank_mapped:
|
|
|
|
add esi,ulCurrentSrcScan ;we've copied from source up to here
|
|
mov ulCurrentSrcScan,esi
|
|
|
|
cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than
|
|
; current src bank?
|
|
jl short top_1R1W_src_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,esi,JustifyTop,MapSourceBank>
|
|
|
|
top_1R1W_src_bank_mapped:
|
|
|
|
jmp top_1R1W_bank_loop
|
|
|
|
top_1R1W_done:
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Banking for 1R/1W adapters, bottom to top.
|
|
;-----------------------------------------------------------------------;
|
|
bottom_to_top_1R1W::
|
|
|
|
; We're going bottom to top. Map in the source and dest, bottom-justified.
|
|
|
|
mov ebx,pdsurf
|
|
mov edx,ulCurrentSrcScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than
|
|
; current source bank?
|
|
jl short bot_1R1W_map_init_src_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater
|
|
; than current src bank?
|
|
jl short bot_1R1W_init_src_bank_mapped
|
|
;no, proper bank already mapped
|
|
bot_1R1W_map_init_src_bank:
|
|
|
|
; Map bank containing the bottom source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyBottom,MapSourceBank>
|
|
|
|
bot_1R1W_init_src_bank_mapped:
|
|
|
|
mov edx,ulCurrentDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than
|
|
; current dest bank?
|
|
jl short bot_1R1W_map_init_dest_bank ;yes, map in proper bank
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater
|
|
; than current dst bank?
|
|
jl short bot_1R1W_init_dest_bank_mapped
|
|
;no, proper bank already mapped
|
|
bot_1R1W_map_init_dest_bank:
|
|
|
|
; Map bank containing the bottom dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,edx,JustifyBottom,MapDestBank>
|
|
|
|
bot_1R1W_init_dest_bank_mapped:
|
|
|
|
; Bank-by-bank bottom-to-top copy loop.
|
|
|
|
bot_1R1W_bank_loop:
|
|
|
|
; Decide how far we can go before we run out of bank or rectangle to copy.
|
|
|
|
mov edx,ulLastDestScan
|
|
cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop
|
|
jg short @F ;copy rectangle top is in this bank
|
|
mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end
|
|
; of bank, at least
|
|
@@:
|
|
neg edx
|
|
add edx,ulCurrentDestScan ;# of scans we can and want to do in
|
|
inc edx ; the dest bank
|
|
|
|
mov eax,ulCurrentSrcScan
|
|
sub eax,[ebx].dsurf_rcl2WindowClipS.yTop
|
|
inc eax ;# of scans we can do in the src bank
|
|
|
|
cmp edx,eax
|
|
jb short @F ;source bank isn't limiting
|
|
mov edx,eax ;source bank is limiting
|
|
@@:
|
|
mov ulBlockHeight,edx ;# of scans we'll do in this bank
|
|
|
|
; We're ready to copy this block.
|
|
; Select different threading, depending on whether the source and destination
|
|
; are currently in the same bank; we can do edges faster if they are.
|
|
|
|
mov al,byte ptr [ebx].dsurf_ulWindowBank
|
|
cmp al,byte ptr [ebx].dsurf_ulWindowBank[4]
|
|
jz short bot_1R1W_copy_same_bank
|
|
|
|
; Source and dest are currently in different banks, must go through temp buffer.
|
|
|
|
THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans
|
|
|
|
; Source and dest are currently in the same bank.
|
|
|
|
bot_1R1W_copy_same_bank:
|
|
THREAD_AND_START
|
|
|
|
; Any more scans to copy?
|
|
|
|
bot_1R1W_check_more_scans:
|
|
|
|
mov eax,ulCurrentDestScan
|
|
mov esi,ulBlockHeight
|
|
sub eax,esi ;we've copied to dest up to here
|
|
cmp ulLastDestScan,eax ;are we past the dest rect top?
|
|
jg short bot_1R1W_done ;yes, we're done
|
|
mov ulCurrentDestScan,eax
|
|
|
|
; Now advance either or both banks, as needed.
|
|
|
|
mov ebx,pdsurf
|
|
cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than
|
|
; current dest bank?
|
|
jge short bot_1R1W_dest_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,eax,JustifyBottom,MapDestBank>
|
|
|
|
bot_1R1W_dest_bank_mapped:
|
|
|
|
mov eax,ulCurrentSrcScan
|
|
sub eax,esi ;we've copied from source up to here
|
|
mov ulCurrentSrcScan,eax
|
|
|
|
cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than
|
|
; current src bank?
|
|
jge short bot_1R1W_src_bank_mapped ;no, proper bank still mapped
|
|
|
|
; Map bank containing the current source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,eax,JustifyBottom,MapSourceBank>
|
|
|
|
bot_1R1W_src_bank_mapped:
|
|
|
|
jmp bot_1R1W_bank_loop
|
|
|
|
bot_1R1W_done:
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Banking for 1 R/W adapters, top to bottom.
|
|
;-----------------------------------------------------------------------;
|
|
top_to_bottom_1RW::
|
|
|
|
; We're going top to bottom. Map in the dest, top-justified.
|
|
|
|
mov ebx,pdsurf
|
|
mov esi,ulCurrentDestScan
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest top less than
|
|
; current bank?
|
|
jl short top_1RW_map_init_dest_bank ;yes, map in proper bank
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than
|
|
; current bank?
|
|
jl short top_1RW_init_dest_bank_mapped
|
|
;no, proper bank already mapped
|
|
top_1RW_map_init_dest_bank:
|
|
|
|
; Map bank containing the top dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
|
|
|
|
top_1RW_init_dest_bank_mapped:
|
|
|
|
; Bank-by-bank top-to-bottom copy loop.
|
|
|
|
top_1RW_bank_loop:
|
|
|
|
; Decide how far we can go before we run out of bank or rectangle to copy.
|
|
|
|
mov edi,ulLastDestScan
|
|
cmp edi,[ebx].dsurf_rcl1WindowClip.yBottom
|
|
jl short @F ;copy rectangle bottom is in this bank
|
|
mov edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end
|
|
; of bank, at least
|
|
@@:
|
|
sub edi,esi ;# of scans we can and want to do in the dest bank
|
|
|
|
; Now make sure source is mapped in. This is the condition the copying routines
|
|
; expect, and we need to figure out how far we can go in the source.
|
|
|
|
sub edx,edx ;assume source and dest are in the same
|
|
; bank
|
|
mov esi,ulCurrentSrcScan
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
|
|
; current bank?
|
|
jl short top_1RW_map_src_Bank ;yes, must map in
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
|
|
; current bank?
|
|
jl short top_1RW_src_bank_mapped ;no, proper bank still mapped
|
|
|
|
top_1RW_map_src_Bank:
|
|
|
|
; Map bank containing the current source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
|
|
|
|
mov edx,1 ;mark that source and dest are not in
|
|
; the same bank
|
|
top_1RW_src_bank_mapped:
|
|
|
|
mov eax,[ebx].dsurf_rcl1WindowClip.yBottom
|
|
sub eax,esi ;# of scans we can do in the src bank
|
|
|
|
cmp edi,eax
|
|
jb short @F ;source bank isn't limiting
|
|
mov edi,eax ;source bank is limiting
|
|
@@:
|
|
mov ulBlockHeight,edi ;# of scans we'll do in this bank
|
|
|
|
; We're ready to copy this block.
|
|
; Select different threading, depending on whether the source and destination
|
|
; are currently in the same bank; we can do edges faster if they are.
|
|
|
|
and edx,edx
|
|
jz short top_1RW_copy_same_bank
|
|
|
|
; Source and dest are currently in different banks, must go through temp buffer.
|
|
|
|
THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans
|
|
|
|
; Source and dest are currently in the same bank.
|
|
|
|
top_1RW_copy_same_bank:
|
|
THREAD_AND_START
|
|
|
|
; Any more scans to copy?
|
|
|
|
top_1RW_check_more_scans:
|
|
|
|
mov esi,ulCurrentDestScan
|
|
mov edi,ulBlockHeight
|
|
add esi,edi ;we've copied to dest up to here
|
|
cmp ulLastDestScan,esi ;are we at the dest rect bottom?
|
|
jz short top_1RW_done ;yes, we're done
|
|
mov ulCurrentDestScan,esi
|
|
|
|
; Now make sure the dest bank is mapped in.
|
|
|
|
mov ebx,pdsurf
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
|
|
; current bank?
|
|
jl short top_1RW_map_dest_bank ;yes, map in dest bank
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
|
|
; current bank?
|
|
jl short top_1RW_dest_bank_mapped ;no, proper bank mapped
|
|
|
|
top_1RW_map_dest_bank:
|
|
|
|
; Map bank containing the current dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyTop>
|
|
|
|
top_1RW_dest_bank_mapped:
|
|
|
|
add ulCurrentSrcScan,edi ;we've copied from source up to here
|
|
|
|
jmp top_1RW_bank_loop
|
|
|
|
top_1RW_done:
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Banking for 1 R/W adapters, bottom to top.
|
|
;-----------------------------------------------------------------------;
|
|
bottom_to_top_1RW::
|
|
|
|
; We're going bottom to top. Map in the dest, bottom-justified.
|
|
|
|
mov ebx,pdsurf
|
|
mov esi,ulCurrentDestScan
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest bottom less than
|
|
; current dest bank?
|
|
jl short bot_1RW_map_init_dest_bank ;yes, map in proper bank
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater
|
|
; than current dst bank?
|
|
jl short bot_1RW_init_dest_bank_mapped
|
|
;no, proper bank already mapped
|
|
bot_1RW_map_init_dest_bank:
|
|
|
|
; Map bank containing the bottom dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
|
|
|
|
bot_1RW_init_dest_bank_mapped:
|
|
|
|
; Bank-by-bank bottom-to-top copy loop.
|
|
|
|
bot_1RW_bank_loop:
|
|
|
|
; Decide how far we can go before we run out of bank or rectangle to copy.
|
|
|
|
mov edi,ulLastDestScan
|
|
cmp edi,[ebx].dsurf_rcl1WindowClip.yTop
|
|
jg short @F ;copy rectangle top is in this bank
|
|
mov edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end
|
|
; of bank, at least
|
|
@@:
|
|
neg edi
|
|
add edi,esi ;# of scans we can and want to do in
|
|
inc edi ; the dest bank
|
|
|
|
; Now make sure source is mapped in. This is the condition the copying routines
|
|
; expect, and we need to figure out how far we can go in the source.
|
|
|
|
sub edx,edx ;assume source and dest are in the same
|
|
; bank
|
|
mov esi,ulCurrentSrcScan
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than
|
|
; current bank?
|
|
jl short bot_1RW_map_src_Bank ;yes, must map in
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than
|
|
; current bank?
|
|
jl short bot_1RW_src_bank_mapped ;no, proper bank still mapped
|
|
|
|
bot_1RW_map_src_Bank:
|
|
|
|
; Map bank containing the current source scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
|
|
|
|
mov edx,1 ;mark that source and dest are not in
|
|
; the same bank
|
|
bot_1RW_src_bank_mapped:
|
|
|
|
sub esi,[ebx].dsurf_rcl1WindowClip.yTop
|
|
inc esi ;# of scans we can do in the src bank
|
|
|
|
cmp edi,esi
|
|
jb short @F ;source bank isn't limiting
|
|
mov edi,esi ;source bank is limiting
|
|
@@:
|
|
mov ulBlockHeight,edi ;# of scans we'll do in this bank
|
|
|
|
; We're ready to copy this block.
|
|
; Select different threading, depending on whether the source and destination
|
|
; are currently in the same bank; we can copy much faster if they are.
|
|
|
|
and edx,edx
|
|
jz short bot_1RW_copy_same_bank
|
|
|
|
; Source and dest are currently in different banks, must go through temp buffer.
|
|
|
|
THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans
|
|
|
|
; Source and dest are currently in the same bank.
|
|
|
|
bot_1RW_copy_same_bank:
|
|
THREAD_AND_START
|
|
|
|
; Any more scans to copy?
|
|
|
|
bot_1RW_check_more_scans:
|
|
|
|
mov esi,ulCurrentDestScan
|
|
mov edi,ulBlockHeight
|
|
sub esi,edi ;we've copied to dest up to here
|
|
cmp ulLastDestScan,esi ;are we past the dest rect top?
|
|
jg short bot_1RW_done ;yes, we're done
|
|
mov ulCurrentDestScan,esi
|
|
|
|
; Now make sure the dest bank is mapped in.
|
|
|
|
mov ebx,pdsurf
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than
|
|
; current bank?
|
|
jl short bot_1RW_map_dest_bank ;yes, map in dest bank
|
|
cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than
|
|
; current bank?
|
|
jl short bot_1RW_dest_bank_mapped ;no, proper bank mapped
|
|
|
|
bot_1RW_map_dest_bank:
|
|
|
|
; Map bank containing the current dest scan line into source window.
|
|
; Note: EBX, ESI, and EDI preserved, according to C calling conventions.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>,<ebx,esi,JustifyBottom>
|
|
|
|
bot_1RW_dest_bank_mapped:
|
|
|
|
sub ulCurrentSrcScan,edi ;we've copied from source up to here
|
|
|
|
jmp bot_1RW_bank_loop
|
|
|
|
bot_1RW_done:
|
|
PLAIN_RET
|
|
|
|
|
|
;***********************************************************************;
|
|
;
|
|
; The following routines are the low-level copying routines. They know
|
|
; almost nothing about banks (the routines that copy through a temp
|
|
; buffer know how to switch banks after filling the temp buffer, but
|
|
; that's it). Banking should be taken care of at a higher level.
|
|
;
|
|
;***********************************************************************;
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies a block of solid bytes directly from the source to the
|
|
; destination, without using a temp buffer. We can't use the latches,
|
|
; though, because this is a rotated copy. Can only be used by 2 R/W or
|
|
; 1R/1W window banking, or by unbanked modes, or by 1 R/W adapters when
|
|
; the source and dest are in the same bank. 1 R/W adapters must go
|
|
; through an intermediate local buffer when the source and the destination
|
|
; aren't in the same bank.
|
|
;
|
|
; Input:
|
|
; Direction Flag set for desired direction of copy
|
|
; culWholeBytesWidth = # of bytes to copy across each scan line
|
|
; ulWholeScanDelta = distance to start of next dest scan from end of
|
|
; current
|
|
; ulWholeScanSrcDelta = distance to start of next source scan from end of
|
|
; current
|
|
; ulBlockHeight = # of scans to copy
|
|
; ulWholeBytesSrc = start source offset in bitmap
|
|
; ulWholeBytesDest = start dest offset in bitmap
|
|
; ulCombineMaskWhole = masking to be applied before ORing the two source
|
|
; bytes together, to keep only the data needed in preparation
|
|
; for the VGA rotator doing its stuff
|
|
;
|
|
; Output:
|
|
; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
|
|
; scan processed
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_whole_bytes::
|
|
|
|
; Calculate start source and dest addresses from bitmap start addresses and
|
|
; offsets within bitmap.
|
|
|
|
mov ecx,pdsurf
|
|
mov eax,ulWholeBytesSrc
|
|
add eax,[ecx].dsurf_pvBitmapStart2WindowS
|
|
mov pSrcAddr,eax
|
|
mov eax,ulWholeBytesDest
|
|
add eax,[ecx].dsurf_pvBitmapStart2WindowD
|
|
mov pDestAddr,eax
|
|
|
|
; Set the bit mask to enable all bits.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov eax,(0ffh shl 8) + GRAF_BIT_MASK
|
|
out dx,ax
|
|
|
|
; Leave GC Index pointing to the Read Map register.
|
|
|
|
mov al,GRAF_READ_MAP
|
|
out dx,al
|
|
|
|
; Set up to copy the whole bytes from the buffer.
|
|
|
|
mov eax,ulBlockHeight
|
|
mov ulTempScanCount,eax
|
|
|
|
copy_whole_scan_loop:
|
|
|
|
mov cl,MM_C3 ;start by copying plane 3 (for Map Mask)
|
|
|
|
copy_whole_plane_loop:
|
|
|
|
; Set Map Mask to enable writes to the plane we're copying.
|
|
|
|
mov edx,VGA_BASE + SEQ_DATA
|
|
mov al,cl
|
|
out dx,al
|
|
|
|
; Set Read Map to enable reads from the plane we're copying.
|
|
|
|
mov dl,GRAF_DATA
|
|
shr al,1 ;map plane into ReadMask
|
|
cmp al,100b ;set Carry if not C3 (plane 3)
|
|
adc al,-1 ;sub 1 only if C3
|
|
out dx,al
|
|
|
|
; Select the corresponding plane from the temp buffer.
|
|
|
|
mov esi,pSrcAddr ;source offset in screen
|
|
mov edi,pDestAddr ;point to destination start
|
|
|
|
lodsb ;prime the rotation pipeline
|
|
mov ah,al ;for combining with the next byte
|
|
|
|
mov edx,ulCombineMaskWhole
|
|
mov ebx,culWholeBytesWidth
|
|
|
|
; AH = rotation pipeline-priming byte
|
|
; EDX = mask to preserve desired portions of AH and AL before combining
|
|
; ESI = source address to copy from
|
|
; EDI = target address to copy to
|
|
; Map Mask set to enable the desired plane for write
|
|
; Bit Mask set to enable all bits
|
|
|
|
copy_whole_loop:
|
|
lodsb ;get byte to copy
|
|
mov ch,al ;set aside for next time
|
|
and eax,edx ;mask the bytes in preparation for combining
|
|
; and rotating them
|
|
or al,ah ;combine them
|
|
stosb ;write the composite byte
|
|
; VGA rotates during write
|
|
mov ah,ch ;prepare byte for combining next time
|
|
|
|
dec ebx
|
|
jnz copy_whole_loop
|
|
|
|
; Do next plane, if any.
|
|
|
|
shr cl,1 ;advance to next plane
|
|
jnz copy_whole_plane_loop
|
|
|
|
; Remember where we left off, for next scan.
|
|
|
|
add edi,ulWholeScanDelta ;point to next dest scan
|
|
mov pDestAddr,edi
|
|
add esi,ulWholeScanSrcDelta ;point to next source scan
|
|
mov pSrcAddr,esi
|
|
|
|
; Count down scan lines.
|
|
|
|
dec ulTempScanCount
|
|
jnz copy_whole_scan_loop
|
|
|
|
; Remember where we left off, for next time.
|
|
|
|
mov ecx,pdsurf
|
|
sub esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
mov ulWholeBytesSrc,esi
|
|
sub edi,[ecx].dsurf_pvBitmapStart2WindowD
|
|
mov ulWholeBytesDest,edi
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies a block of solid bytes from the source to the destination via
|
|
; the temp buffer. This should only be used by 1 R/W adapters, and then
|
|
; only when the source and dest are in different banks.
|
|
;
|
|
; All relevant bytes are first copied from the source to a temp buffer that's
|
|
; an image of the source. Then, we copy each of the four planes for one scan
|
|
; line from the temp buffer to the screen before going on to the next scan
|
|
; line. See ALIGNBLT.ASM for comments about why this is done.
|
|
;
|
|
; Input:
|
|
; Direction Flag set for desired direction of copy
|
|
; culWholeBytesWidth = # of bytes to copy across each scan line
|
|
; ulWholeScanDelta = distance to start of next scan from end of current
|
|
; ulNextScan = width of a scan line
|
|
; ulBlockHeight = # of scans to copy
|
|
; ulWholeBytesSrc = start source offset in bitmap
|
|
; ulWholeBytesDest = start dest offset in bitmap
|
|
; ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer
|
|
; ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer
|
|
; ulCombineMaskWhole = masking to be applied before ORing the two source
|
|
; bytes together, to keep only the data needed in preparation
|
|
; for the VGA rotator doing its stuff
|
|
; Expects the source bank to be mapped in; source bank is mapped in on
|
|
; exit
|
|
;
|
|
; Output:
|
|
; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last
|
|
; scan processed
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_whole_bytes_via_buffer::
|
|
|
|
; Calculate start source address from bitmap start address and offset within
|
|
; bitmap.
|
|
|
|
mov ecx,pdsurf
|
|
mov eax,ulWholeBytesSrc
|
|
add eax,[ecx].dsurf_pvBitmapStart
|
|
mov pSrcAddr,eax
|
|
sub eax,[ecx].dsurf_pvStart
|
|
mov ulOffsetInBank,eax ;will come in handy because we treat the
|
|
; temp buffer as an image of the current
|
|
; bank
|
|
|
|
; First, copy all the bytes into the temporary buffer.
|
|
|
|
; Leave the GC Index pointing to the Read Map.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_READ_MAP
|
|
out dx,al
|
|
|
|
mov eax,3 ;start by copying plane 3
|
|
copy_whole_to_buffer_plane_loop:
|
|
mov ebx,ulBlockHeight ;# of scans to copy
|
|
mov esi,pSrcAddr ;source offset in screen
|
|
mov edi,ppTempPlane0
|
|
mov edi,[edi+eax*4] ;pointer to current plane in temp buffer
|
|
add edi,ulOffsetInBank ;dest for plane in temp buffer
|
|
|
|
mov edx,VGA_BASE + GRAF_DATA
|
|
out dx,al ;set Read Map to plane we're copying from.
|
|
|
|
push eax ;remember plane index
|
|
mov eax,ulWholeScanSrcDelta ;offset to next scan
|
|
mov edx,culWholeBytesWidth ;# of bytes per scan
|
|
inc edx ;always one more source byte than dest byte
|
|
copy_whole_to_buffer_scan_loop:
|
|
mov ecx,edx ;# of bytes per scan
|
|
rep movsb ;copy the scan line to the temp buffer
|
|
add esi,eax ;point to next source scan
|
|
add edi,eax ;point to next dest scan
|
|
|
|
dec ebx ;count down scan lines
|
|
jnz copy_whole_to_buffer_scan_loop
|
|
|
|
pop eax ;get back plane index
|
|
dec eax ;count down planes
|
|
jns copy_whole_to_buffer_plane_loop
|
|
|
|
; Remember where we left off, for next time.
|
|
|
|
mov ebx,pdsurf
|
|
sub esi,[ebx].dsurf_pvBitmapStart
|
|
mov ulWholeBytesSrc,esi
|
|
|
|
|
|
; Now copy the temp buffer to the screen.
|
|
|
|
; Map in the destination bank, so we can read/write to it and let the Bit Mask
|
|
; work.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
|
|
<ebx,ulCurrentDestScan,ulCurrentJustification>
|
|
|
|
; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
|
|
; until now to calculate this, because the dest bank wasn't mapped earlier).
|
|
|
|
mov eax,ulWholeBytesDest
|
|
add eax,[ebx].dsurf_pvBitmapStart
|
|
mov pDestAddr,eax
|
|
|
|
; Set the bit mask to enable all bits.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov eax,(0ffh shl 8) + GRAF_BIT_MASK
|
|
out dx,ax
|
|
|
|
; Set up to copy the whole bytes from the buffer.
|
|
|
|
mov eax,ulBlockHeight
|
|
mov ulTempScanCount,eax
|
|
|
|
copy_whole_from_buffer_scan_loop:
|
|
|
|
mov ebx,ppTempPlane3 ;point to plane 3's temp buffer offset
|
|
mov cl,MM_C3 ;start by copying plane 3
|
|
|
|
copy_whole_from_buffer_plane_loop:
|
|
|
|
; Set Map Mask to enable writes to the plane we're copying.
|
|
|
|
mov edx,VGA_BASE + SEQ_DATA
|
|
mov al,cl
|
|
out dx,al
|
|
|
|
; Select the corresponding plane from the temp buffer.
|
|
|
|
mov esi,[ebx] ;point to plane start in temp buffer
|
|
sub ebx,4 ;point to next temp buffer plane ptr
|
|
push ebx ;preserve pointer to plane pointer
|
|
|
|
add esi,ulOffsetInBank ;point to current scan start in temp buffer
|
|
mov edi,pDestAddr ;point to destination start
|
|
|
|
lodsb ;prime the rotation pipeline
|
|
mov ah,al ;for combining with the next byte
|
|
|
|
mov edx,ulCombineMaskWhole
|
|
mov ebx,culWholeBytesWidth
|
|
|
|
; AH = rotation pipeline-priming byte
|
|
; EDX = mask to preserve desired portions of AH and AL before combining
|
|
; ESI = source address to copy from
|
|
; EDI = target address to copy to
|
|
; Map Mask set to enable the desired plane for write
|
|
; Bit Mask set to enable all bits
|
|
|
|
copy_whole_from_buffer_loop:
|
|
lodsb ;get byte to copy
|
|
mov ch,al ;set aside for next time
|
|
and eax,edx ;mask the bytes in preparation for combining
|
|
; and rotating them
|
|
or al,ah ;combine them
|
|
stosb ;write the composite byte
|
|
; VGA rotates during write
|
|
mov ah,ch ;prepare byte for combining next time
|
|
|
|
dec ebx
|
|
jnz copy_whole_from_buffer_loop
|
|
|
|
; Do next plane, if any.
|
|
|
|
pop ebx ;retrieve pointer to plane pointer
|
|
shr cl,1 ;advance to next plane
|
|
jnz copy_whole_from_buffer_plane_loop
|
|
|
|
; Remember where we left off, for next scan.
|
|
|
|
add edi,ulWholeScanDelta ;point to next dest scan
|
|
mov pDestAddr,edi
|
|
mov eax,ulNextScan
|
|
add ulOffsetInBank,eax ;next scan's start in temp buffer,
|
|
; relative to start of plane's storage
|
|
|
|
; Count down scan lines.
|
|
|
|
dec ulTempScanCount
|
|
jnz copy_whole_from_buffer_scan_loop
|
|
|
|
; Remember where we left off, for next time.
|
|
|
|
mov ebx,pdsurf
|
|
sub edi,[ebx].dsurf_pvBitmapStart
|
|
mov ulWholeBytesDest,edi
|
|
|
|
; Put back the original source bank.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl>, \
|
|
<ebx,ulCurrentSrcScan,ulCurrentJustification>
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies a strip of left edge bytes from the source to the destination,
|
|
; assuming both the source and the destination are both readable and
|
|
; writable. Can only be used by 2 R/W window banking, or by unbanked
|
|
; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
|
|
; buffer when the source and dest are in different banks. Processes up to
|
|
; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
|
|
; flicker.
|
|
;
|
|
; Input:
|
|
; ulNextScan = width of scan, in bytes
|
|
; ulBlockHeight = # of scans to copy
|
|
; ulLeftEdgeSrc = start source offset in bitmap
|
|
; ulLeftEdgeDest = start dest offset in bitmap
|
|
; ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
|
|
; jLeftMask = left edge clip mask
|
|
;
|
|
; Output:
|
|
; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
|
|
; scan processed
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_left_edge::
|
|
|
|
; Calculate start source and dest addresses from bitmap start addresses and
|
|
; offsets within bitmap.
|
|
|
|
mov ecx,pdsurf
|
|
mov esi,ulLeftEdgeSrc
|
|
add esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
mov edi,ulLeftEdgeDest
|
|
add edi,[ecx].dsurf_pvBitmapStart2WindowD
|
|
|
|
; Copy the edge.
|
|
|
|
mov ah,byte ptr jLeftMask ;clip mask for this edge
|
|
mov ebx,ulLeftSrcWidthMinus1
|
|
call copy_edge_table[ebx*4]
|
|
|
|
; Remember where we left off, for next time.
|
|
|
|
mov ecx,pdsurf
|
|
sub esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
mov ulLeftEdgeSrc,esi
|
|
sub edi,[ecx].dsurf_pvBitmapStart2WindowD
|
|
mov ulLeftEdgeDest,edi
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies a strip of right edge bytes from the source to the destination,
|
|
; assuming both the source and the destination are both readable and
|
|
; writable. Can only be used by 2 R/W window banking, or by unbanked
|
|
; modes. 1 R/W and 1R/1W adapters must go through an intermediate local
|
|
; buffer when the source and dest are in different banks. Processes up to
|
|
; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause
|
|
; flicker.
|
|
;
|
|
; Input:
|
|
; ulNextScan = width of scan, in bytes
|
|
; ulBlockHeight = # of scans to copy
|
|
; ulRightEdgeSrc = start source offset in bitmap
|
|
; ulRightEdgeDest = start dest offset in bitmap
|
|
; ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
|
|
; jRightMask = right edge clip mask
|
|
;
|
|
; Output:
|
|
; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
|
|
; scan processed
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_right_edge::
|
|
|
|
; Calculate start source and dest addresses from bitmap start addresses and
|
|
; offsets within bitmap.
|
|
|
|
mov ecx,pdsurf
|
|
mov esi,ulRightEdgeSrc
|
|
add esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
mov edi,ulRightEdgeDest
|
|
add edi,[ecx].dsurf_pvBitmapStart2WindowD
|
|
|
|
; Copy the edge.
|
|
|
|
mov ah,byte ptr jRightMask ;clip mask for this edge
|
|
mov ebx,ulRightSrcWidthMinus1
|
|
call copy_edge_table[ebx*4]
|
|
|
|
; Remember where we left off, for next time
|
|
|
|
mov ecx,pdsurf
|
|
sub esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
mov ulRightEdgeSrc,esi
|
|
sub edi,[ecx].dsurf_pvBitmapStart2WindowD
|
|
mov ulRightEdgeDest,edi
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies an edge from a 1-wide source to the destination on the screen.
|
|
; Entry:
|
|
; AH = bit mask setting for edge
|
|
; ESI = source address
|
|
; EDI = destination address
|
|
; ulBlockHeight = # of bytes to copy per plane
|
|
; ulNextScan = scan width
|
|
; Source readable, and destination readable and writable
|
|
; Exit:
|
|
; ESI = next source address
|
|
; EDI = next destination address
|
|
;
|
|
; Preserved: EBP
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_edge_1ws::
|
|
mov pSrcAddr,esi
|
|
mov pDestAddr,edi
|
|
|
|
; Set the clip mask for this edge.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_BIT_MASK
|
|
out dx,ax
|
|
|
|
; Leave the GC Index pointing to the Read Map.
|
|
|
|
mov al,GRAF_READ_MAP
|
|
out dx,al
|
|
|
|
mov ecx,offset copy_edge_rw_1ws_full_chunk
|
|
;entry point into unrolled loop to copy first
|
|
; chunk, assuming it's a full chunk
|
|
mov ebx,ulBlockHeight
|
|
|
|
; Copy the edge in a series of chunks.
|
|
|
|
copy_edge_chunk_loop_1ws:
|
|
|
|
sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
|
|
; a full chunk
|
|
jge short @F ;do a full chunk
|
|
add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
|
|
; scans
|
|
mov ecx,pfnCopyEdgeRWEntry_1ws[-4][ebx*4]
|
|
;entry point into unrolled loop to copy desired
|
|
; chunk size
|
|
sub ebx,ebx ;no scans after this
|
|
@@:
|
|
push ebx ;remember remaining scan count
|
|
|
|
mov ah,MM_C3 ;start by copying plane 3
|
|
mov ebx,ulNextScan
|
|
|
|
copy_edge_plane_loop_1ws::
|
|
|
|
; Set Map Mask to enable writes to plane we're copying.
|
|
|
|
mov al,ah
|
|
mov dl,SEQ_DATA
|
|
out dx,al
|
|
|
|
; Set Read Map to same plane.
|
|
|
|
shr al,1 ;map plane into ReadMask
|
|
cmp al,100b ;set Carry if not C3 (plane 3)
|
|
adc al,-1 ;sub 1 only if C3
|
|
mov dl,GRAF_DATA
|
|
out dx,al
|
|
|
|
mov esi,pSrcAddr
|
|
mov edi,pDestAddr
|
|
|
|
jmp ecx ;copy the left edge
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of unrolled edge loop entry points. First entry point is to copy
|
|
; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
pfnCopyEdgeRWEntry_1ws label dword
|
|
INDEX = 1
|
|
rept EDGE_CHUNK_SIZE
|
|
DEFINE_DD EDGE_RW_1WS,%INDEX
|
|
INDEX = INDEX+1
|
|
endm
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Unrolled loop for copying a strip of edge bytes, with 1-wide source and
|
|
; destination both readable and writable.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
COPY_EDGE_RW_1WS macro ENTRY_LABEL,ENTRY_INDEX
|
|
&ENTRY_LABEL&ENTRY_INDEX&:
|
|
mov al,[esi] ;get byte to copy
|
|
add esi,ebx ;point to next source scan
|
|
mov dl,[edi] ;read to load latches (value doesn't matter)
|
|
mov [edi],al ;write, with the Bit Mask clipping
|
|
; VGA rotates during write
|
|
add edi,ebx ;point to next dest scan
|
|
endm ;-----------------------------------;
|
|
|
|
; EBX = scan line width
|
|
; ESI = source address to copy from
|
|
; EDI = target address to copy to
|
|
; Bit Mask set to desired clipping
|
|
; Read Map and Map Mask set to enable the desired plane for read and write
|
|
|
|
copy_edge_rw_1ws_full_chunk:
|
|
UNROLL_LOOP COPY_EDGE_RW_1WS,EDGE_RW_1WS,EDGE_CHUNK_SIZE
|
|
|
|
; Do next plane within this chunk, if any.
|
|
|
|
shr ah,1 ;advance to next plane
|
|
jnz copy_edge_plane_loop_1ws
|
|
|
|
; Remember where we left off, for the next chunk.
|
|
|
|
mov pSrcAddr,esi
|
|
mov pDestAddr,edi
|
|
|
|
; Do next chunk within this bank block, if any.
|
|
|
|
pop ebx ;retrieve remaining scan count
|
|
and ebx,ebx ;any scans left?
|
|
jnz copy_edge_chunk_loop_1ws ;more scans to do
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies a strip of left edge bytes from the source to the destination
|
|
; through an intermediate RAM buffer. This is the approach required by
|
|
; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
|
|
; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
|
|
; cause flicker.
|
|
;
|
|
; Input:
|
|
; ulNextScan = width of scan, in bytes
|
|
; ulBlockHeight = # of scans to copy
|
|
; ulLeftEdgeSrc = start source offset in bitmap
|
|
; ulLeftEdgeDest = start dest offset in bitmap
|
|
; jLeftMask = left edge clip mask
|
|
; pTempPlane = pointer to temp storage buffer
|
|
; ulCurrentSrcScan = scan used to map in source bank
|
|
; ulCurrentDestScan = scan used to map in dest bank
|
|
; ulCurrentJustification = justification used to map in current bank
|
|
; ulLeftSrcWidthMinus1 = width of left source edge minus 1 (0 or 1)
|
|
; For 1 R/W adapters, expects the source bank to be mapped in; banking
|
|
; is the same at exit as it was at entry
|
|
;
|
|
; Output:
|
|
; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last
|
|
; scan processed
|
|
;
|
|
; Note that this should never be called for an unbanked or 2 R/W adapter,
|
|
; because the source and dest are always both addressable simultaneously then.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_left_edge_via_buffer::
|
|
|
|
; First, copy all the bytes into the temporary buffer.
|
|
|
|
; Calculate start source and dest addresses from bitmap start addresses and
|
|
; offsets within bitmap.
|
|
|
|
mov ecx,pdsurf
|
|
mov esi,ulLeftEdgeSrc
|
|
add esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
|
|
; Copy the edge from the source to the temp buffer.
|
|
|
|
mov eax,ulLeftSrcWidthMinus1
|
|
call copy_edge_from_screen_to_buffer[eax*4]
|
|
|
|
; Remember where we left off, for next time
|
|
|
|
mov ebx,pdsurf
|
|
sub esi,[ebx].dsurf_pvBitmapStart2WindowS
|
|
mov ulLeftEdgeSrc,esi
|
|
|
|
; Now copy the temp buffer to the screen.
|
|
|
|
; Map in the source bank to match the destination, so we can read/write to it
|
|
; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
|
|
; mapped by this call, which is fine.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
|
|
|
|
; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
|
|
; until now to calculate this, because the dest bank wasn't mapped earlier).
|
|
|
|
mov edi,ulLeftEdgeDest
|
|
add edi,[ebx].dsurf_pvBitmapStart2WindowD
|
|
|
|
; Do the copy.
|
|
|
|
mov ah,byte ptr jLeftMask ;clip mask for this edge
|
|
mov ebx,ulLeftSrcWidthMinus1
|
|
call copy_edge_from_buffer_to_screen[ebx*4]
|
|
|
|
; Remember where we left off, for next time.
|
|
|
|
mov ebx,pdsurf
|
|
sub edi,[ebx].dsurf_pvBitmapStart2WindowD
|
|
mov ulLeftEdgeDest,edi
|
|
|
|
; Put back the original source bank. Note that on a 1 R/W adapter, both banks
|
|
; will be mapped by this call, which is fine.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies a strip of right edge bytes from the source to the destination
|
|
; through an intermediate RAM buffer. This is the approach required by
|
|
; 1 R/W and 1R/1W adapters when the source and dest are in different banks.
|
|
; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might
|
|
; cause flicker.
|
|
;
|
|
; Input:
|
|
; ulNextScan = width of scan, in bytes
|
|
; ulBlockHeight = # of scans to copy
|
|
; ulRightEdgeSrc = start source offset in bitmap
|
|
; ulRightEdgeDest = start dest offset in bitmap
|
|
; jRightMask = right edge clip mask
|
|
; pTempPlane = pointer to temp storage buffer
|
|
; ulCurrentSrcScan = scan used to map in source bank
|
|
; ulCurrentDestScan = scan used to map in dest bank
|
|
; ulCurrentJustification = justification used to map in current bank
|
|
; ulRightSrcWidthMinus1 = width of right source edge minus 1 (0 or 1)
|
|
; For 1 R/W adapters, expects the source bank to be mapped in; banking
|
|
; is the same at exit as it was at entry
|
|
;
|
|
; Output:
|
|
; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last
|
|
; scan processed
|
|
;
|
|
; Note that this should never be called for an unbanked or 2 R/W adapter,
|
|
; because the source and dest are always both addressable simultaneously then.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_right_edge_via_buffer::
|
|
|
|
; First, copy all the bytes into the temporary buffer.
|
|
|
|
; Calculate start source address from bitmap start addresses and
|
|
; offsets within bitmap.
|
|
|
|
mov ecx,pdsurf
|
|
mov esi,ulRightEdgeSrc
|
|
add esi,[ecx].dsurf_pvBitmapStart2WindowS
|
|
|
|
; Copy the edge from the source to the temp buffer.
|
|
|
|
mov eax,ulRightSrcWidthMinus1
|
|
call copy_edge_from_screen_to_buffer[eax*4]
|
|
|
|
; Remember where we left off, for next time
|
|
|
|
mov ebx,pdsurf
|
|
sub esi,[ebx].dsurf_pvBitmapStart2WindowS
|
|
mov ulRightEdgeSrc,esi
|
|
|
|
; Now copy the temp buffer to the screen.
|
|
|
|
; Map in the source bank to match the destination, so we can read/write to it
|
|
; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be
|
|
; mapped by this call, which is correct.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,ulCurrentDestScan,ulCurrentJustification,MapSourceBank>
|
|
|
|
; Calculate dest start address (if this is a 1 R/W adapter, we had to wait
|
|
; until now to calculate this, because the dest bank wasn't mapped earlier).
|
|
|
|
mov edi,ulRightEdgeDest
|
|
add edi,[ebx].dsurf_pvBitmapStart2WindowD
|
|
|
|
; Do the copy.
|
|
|
|
mov ah,byte ptr jRightMask ;clip mask for this edge
|
|
mov ebx,ulRightSrcWidthMinus1
|
|
call copy_edge_from_buffer_to_screen[ebx*4]
|
|
|
|
; Remember where we left off, for next time.
|
|
|
|
mov ebx,pdsurf
|
|
sub edi,[ebx].dsurf_pvBitmapStart2WindowD
|
|
mov ulRightEdgeDest,edi
|
|
|
|
; Put back the original source bank. Note that on a 1 R/W adapter, both banks
|
|
; will be mapped by this call, which is fine.
|
|
|
|
ptrCall <dword ptr [ebx].dsurf_pfnBankControl2Window>, \
|
|
<ebx,ulCurrentSrcScan,ulCurrentJustification,MapSourceBank>
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies an edge from the temp buffer (1 wide) to the screen.
|
|
; Entry:
|
|
; AH = bit mask setting for edge
|
|
; EDI = destination address
|
|
; pTempPlane = temp buffer from which to copy
|
|
; ulBlockHeight = # of bytes to copy per plane
|
|
; ulNextScan = scan width
|
|
; Source and dest banks both pointing to destination
|
|
; Exit:
|
|
; EDI = next destination address
|
|
;
|
|
; Preserved: EBP
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_buffered_edge_to_screen_1ws::
|
|
|
|
mov pDestAddr,edi
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_BIT_MASK
|
|
out dx,ax
|
|
|
|
mov pTempEntry,offset copy_edge_from_buf_full_chunk_1ws
|
|
;entry point into unrolled loop to copy first
|
|
; chunk, assuming it's a full chunk
|
|
mov ecx,pTempPlane ;temp buffer start (copy from here)
|
|
mov ebx,ulBlockHeight ;total # of scans to copy
|
|
|
|
; Copy the edge in a series of chunks, to avoid flicker.
|
|
|
|
copy_from_buffer_chunk_loop_1ws:
|
|
|
|
sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
|
|
; a full chunk
|
|
jge short @F ;do a full chunk
|
|
add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
|
|
; scans
|
|
mov ebx,pfnCopyEdgesFromBufferEntry_1ws[-4][ebx*4]
|
|
mov pTempEntry,ebx ;entry point into unrolled loop to copy desired
|
|
; chunk size
|
|
sub ebx,ebx ;no scans after this
|
|
@@:
|
|
push ebx ;remember remaining scan count
|
|
|
|
mov al,MM_C3 ;start by copying plane 3
|
|
mov ebx,ulNextScan
|
|
|
|
push ecx ;remember current temp buffer start
|
|
|
|
copy_from_buffer_plane_loop_1ws:
|
|
|
|
; Set Map Mask to enable writes to plane we're copying.
|
|
|
|
mov dl,SEQ_DATA ;leave DX pointing to the Sequencer Data reg
|
|
out dx,al
|
|
|
|
mov esi,ecx ;point to current plane's source byte
|
|
add ecx,ulBlockHeight ;point to next plane's source byte
|
|
|
|
mov edi,pDestAddr
|
|
|
|
jmp pTempEntry ;copy the left edge
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of unrolled edge copy-from-buffer loop entry points. First entry
|
|
; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
|
|
; bytes.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
pfnCopyEdgesFromBufferEntry_1ws label dword
|
|
INDEX = 1
|
|
rept EDGE_CHUNK_SIZE
|
|
DEFINE_DD EDGE_FROM_BUFFER_1WS,%INDEX
|
|
INDEX = INDEX+1
|
|
endm
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
|
|
; buffer.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
COPY_EDGE_FROM_BUFFER_1WS macro ENTRY_LABEL,ENTRY_INDEX
|
|
&ENTRY_LABEL&ENTRY_INDEX&:
|
|
mov ah,[esi] ;get byte to copy
|
|
inc esi ;point to next source (temp buffer) byte
|
|
mov dl,[edi] ;read to load latches (value doesn't matter)
|
|
mov [edi],ah ;write, with the Bit Mask clipping
|
|
; VGA rotates during write
|
|
add edi,ebx ;point to next dest (screen) scan
|
|
endm ;-----------------------------------;
|
|
|
|
; EBX = scan line width
|
|
; ESI = source address to copy from (temp buffer)
|
|
; EDI = target address to copy to (screen)
|
|
; Bit Mask set to desired clipping
|
|
; Map Mask set to enable the desired plane for write
|
|
|
|
copy_edge_from_buf_full_chunk_1ws:
|
|
UNROLL_LOOP COPY_EDGE_FROM_BUFFER_1WS, \
|
|
EDGE_FROM_BUFFER_1WS,EDGE_CHUNK_SIZE
|
|
|
|
; Do next plane within this chunk, if any.
|
|
|
|
shr al,1 ;advance to next plane
|
|
jnz copy_from_buffer_plane_loop_1ws
|
|
|
|
; Remember where we left off, for next chunk.
|
|
|
|
mov pDestAddr,edi
|
|
pop ecx ;get back current temp buffer start
|
|
add ecx,EDGE_CHUNK_SIZE ;point to next chunk's start
|
|
|
|
; Do next chunk within this bank block, if any.
|
|
|
|
pop ebx ;retrieve remaining scan count
|
|
and ebx,ebx ;any scans left?
|
|
jnz copy_from_buffer_chunk_loop_1ws ;more scans to do
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies an edge from the screen (1 wide) to the temp buffer.
|
|
; Entry:
|
|
; ESI = source address
|
|
; pTempPlane = temp buffer from which to copy
|
|
; ulBlockHeight = # of bytes to copy per plane
|
|
; ulNextScan = scan width
|
|
; Source bank pointing to source
|
|
; Exit:
|
|
; DH = VGA_BASE SHR 8
|
|
; ESI = next source address
|
|
;
|
|
; Preserved: EBP
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_screen_to_buffered_edge_1ws::
|
|
|
|
mov pSrcAddr,esi
|
|
|
|
; Leave the GC Index pointing to the Read Map.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_READ_MAP
|
|
out dx,al
|
|
|
|
mov ecx,ulNextScan
|
|
mov edi,pTempPlane ;dest offset in temp buffer for plane 3 bytes.
|
|
;The rest of the planes are stored
|
|
; consecutively
|
|
mov al,3 ;start by copying plane 3
|
|
mov dl,GRAF_DATA ;leave DX pointing to the GC Data reg
|
|
copy_edge_to_buffer_plane_loop_1ws:
|
|
mov esi,pSrcAddr ;source pointer
|
|
|
|
out dx,al ;set Read Map to plane we're copying from.
|
|
|
|
mov ebx,ulBlockHeight
|
|
|
|
; EBX = count of unrolled loop iterations
|
|
; ECX = offset from end of one scan's fill to start of next
|
|
; ESI = source address to copy from (screen)
|
|
; EDI = target address to copy to (temp buffer)
|
|
; Read Map set to enable the desired plane for read
|
|
|
|
edge_to_buffer_loop_1ws:
|
|
mov ah,[esi] ;get byte to copy
|
|
add esi,ecx ;point to next source scan
|
|
mov [edi],ah ;copy byte to temp buffer
|
|
inc edi ;point to next temp buffer byte
|
|
|
|
dec ebx
|
|
jnz edge_to_buffer_loop_1ws
|
|
|
|
dec al ;count down planes
|
|
jns copy_edge_to_buffer_plane_loop_1ws
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies an edge from a 2-wide source to the destination on the screen.
|
|
; Entry:
|
|
; AH = bit mask setting for edge
|
|
; ESI = source address
|
|
; EDI = destination address
|
|
; ulBlockHeight = # of bytes to copy per plane
|
|
; ulNextScan = scan width
|
|
; ulCombineMask = masking to be applied before ORing the two source
|
|
; bytes together, to keep only the data needed in preparation
|
|
; for the VGA rotator doing its stuff
|
|
; Source readable, and destination readable and writable
|
|
; Exit:
|
|
; ESI = next source address
|
|
; EDI = next destination address
|
|
;
|
|
; Preserved: EBP
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_edge_2ws::
|
|
mov pSrcAddr,esi
|
|
mov pDestAddr,edi
|
|
|
|
; Set the clip mask for this edge.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_BIT_MASK
|
|
out dx,ax
|
|
|
|
; Leave the GC Index pointing to the Read Map.
|
|
|
|
mov al,GRAF_READ_MAP
|
|
out dx,al
|
|
|
|
mov ebx,ulBlockHeight
|
|
|
|
mov ecx,offset copy_edge_rw_2ws_full_chunk
|
|
;entry point into unrolled loop assuming we do
|
|
; a full chunk the first time
|
|
|
|
; Copy the edge in a series of chunks.
|
|
|
|
copy_edge_chunk_loop_2ws:
|
|
|
|
sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
|
|
; a full chunk
|
|
jge short @F ;do a full chunk
|
|
add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
|
|
; scans
|
|
mov ecx,pfnCopyEdgeRWEntry_2ws[-4][ebx*4]
|
|
;entry point into unrolled loop to copy desired
|
|
; chunk size
|
|
sub ebx,ebx ;no scans after this
|
|
@@:
|
|
push ebx ;remember remaining scan count
|
|
|
|
mov eax,(MM_C3 SHL 8) + 3 ;start by copying plane 3
|
|
mov ebx,ulNextScan
|
|
|
|
copy_edge_plane_loop_2ws:
|
|
|
|
push eax ;preserve plane info
|
|
|
|
; Set Read Map to enable reads from plane we're copying from.
|
|
|
|
mov edx,VGA_BASE + GRAF_DATA
|
|
out dx,al
|
|
|
|
; Set Map Mask to enable writes to plane we're copying.
|
|
|
|
mov dl,SEQ_DATA
|
|
mov al,ah
|
|
out dx,al
|
|
|
|
mov esi,pSrcAddr
|
|
mov edi,pDestAddr
|
|
mov edx,ulCombineMask
|
|
|
|
jmp ecx ;copy the left edge
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of unrolled edge loop entry points. First entry point is to copy
|
|
; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
pfnCopyEdgeRWEntry_2ws label dword
|
|
INDEX = 1
|
|
rept EDGE_CHUNK_SIZE
|
|
DEFINE_DD EDGE_RW_2WS,%INDEX
|
|
INDEX = INDEX+1
|
|
endm
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Unrolled loop for copying a strip of edge bytes, with 2-wide source and
|
|
; destination both readable and writable.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
COPY_EDGE_RW_2WS macro ENTRY_LABEL,ENTRY_INDEX
|
|
&ENTRY_LABEL&ENTRY_INDEX&:
|
|
mov ax,[esi] ;get word to copy
|
|
add esi,ebx ;point to next source scan
|
|
and eax,edx ;mask in preparation for combining bytes
|
|
or al,ah ;combine the desired parts of the bytes
|
|
mov ah,[edi] ;read to load latches (value doesn't matter)
|
|
mov [edi],al ;write, with the Bit Mask clipping
|
|
; VGA rotates during write
|
|
add edi,ebx ;point to next dest scan
|
|
endm ;-----------------------------------;
|
|
|
|
; EBX = scan line width
|
|
; EDX = mask to preserve desired portions of AH and AL before combining
|
|
; ESI = source address to copy from
|
|
; EDI = target address to copy to
|
|
; Bit Mask set to desired clipping
|
|
; Read Map and Map Mask set to enable the desired plane for read and write
|
|
|
|
copy_edge_rw_2ws_full_chunk:
|
|
UNROLL_LOOP COPY_EDGE_RW_2WS,EDGE_RW_2WS,EDGE_CHUNK_SIZE
|
|
|
|
; Do next plane within this chunk, if any.
|
|
|
|
pop eax ;retrieve plane info
|
|
|
|
shr ah,1 ;advance to next plane
|
|
dec eax ;count down planes
|
|
jns copy_edge_plane_loop_2ws
|
|
|
|
; Remember where we left off, for the next chunk.
|
|
|
|
mov pSrcAddr,esi
|
|
mov pDestAddr,edi
|
|
|
|
; Do next chunk within this bank block, if any.
|
|
|
|
pop ebx ;retrieve remaining scan count
|
|
and ebx,ebx ;any scans left?
|
|
jnz copy_edge_chunk_loop_2ws ;more scans to do
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies an edge from the temp buffer (2 wide) to the screen.
|
|
; Entry:
|
|
; AH = bit mask setting for edge
|
|
; EDI = destination address
|
|
; pTempPlane = temp buffer from which to copy
|
|
; ulBlockHeight = # of bytes to copy per plane
|
|
; ulNextScan = scan width
|
|
; Source and dest banks both pointing to destination
|
|
; ulCombineMask = masking to be applied before ORing the two source
|
|
; bytes together, to keep only the data needed in preparation
|
|
; for the VGA rotator doing its stuff
|
|
; Exit:
|
|
; EDI = next destination address
|
|
;
|
|
; Preserved: EBP
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_buffered_edge_to_screen_2ws::
|
|
|
|
mov pDestAddr,edi
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_BIT_MASK
|
|
out dx,ax
|
|
|
|
mov pTempEntry,offset copy_edge_from_buf_full_chunk_2ws
|
|
;entry point into unrolled loop, assuming the
|
|
; first chunk is full size
|
|
mov ecx,pTempPlane ;temp buffer start (copy from here)
|
|
mov ebx,ulBlockHeight
|
|
|
|
; Copy the edge in a series of chunks, to avoid flicker.
|
|
|
|
copy_from_buffer_chunk_loop_2ws:
|
|
|
|
sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming
|
|
; a full chunk
|
|
jge short @F ;do a full chunk
|
|
add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining
|
|
; scans
|
|
mov ebx,pfnCopyEdgesFromBufferEntry_2ws[-4][ebx*4]
|
|
mov pTempEntry,ebx ;entry point into unrolled loop to copy final
|
|
; chunk size
|
|
sub ebx,ebx ;no scans after this
|
|
@@:
|
|
push ebx ;remember remaining scan count
|
|
|
|
mov al,MM_C3 ;start by copying plane 3
|
|
mov ebx,ulNextScan
|
|
|
|
push ecx ;remember current temp buffer start
|
|
|
|
copy_from_buffer_plane_loop_2ws:
|
|
|
|
; Set Map Mask to enable writes to plane we're copying.
|
|
|
|
mov edx,VGA_BASE + SEQ_DATA
|
|
out dx,al
|
|
|
|
push eax ;preserve plane info
|
|
|
|
mov esi,ecx ;point to current plane's source word
|
|
mov eax,ulBlockHeight
|
|
lea ecx,[ecx+eax*2] ;point to next plane's source word
|
|
|
|
mov edi,pDestAddr
|
|
mov edx,ulCombineMask
|
|
|
|
jmp pTempEntry ;copy the left edge
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Table of unrolled edge copy-from-buffer loop entry points. First entry
|
|
; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE
|
|
; bytes.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
pfnCopyEdgesFromBufferEntry_2WS label dword
|
|
INDEX = 1
|
|
rept EDGE_CHUNK_SIZE
|
|
DEFINE_DD EDGE_FROM_BUFFER_2WS,%INDEX
|
|
INDEX = INDEX+1
|
|
endm
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Unrolled loop for copying a strip of edge bytes (1 wide) from the temp
|
|
; buffer.
|
|
;-----------------------------------------------------------------------;
|
|
|
|
COPY_EDGE_FROM_BUFFER_2WS macro ENTRY_LABEL,ENTRY_INDEX
|
|
&ENTRY_LABEL&ENTRY_INDEX&:
|
|
mov ax,[esi] ;get word to copy
|
|
add esi,2 ;point to next source (temp buffer) word
|
|
and eax,edx ;mask in preparation for combining bytes
|
|
or al,ah ;combine the desired parts of the bytes
|
|
mov ah,[edi] ;latch the destination (value doesn't matter)
|
|
mov [edi],al ;write, with the Bit Mask clipping
|
|
; VGA rotates during write
|
|
add edi,ebx ;point to next dest (screen) scan
|
|
endm ;-----------------------------------;
|
|
|
|
; EBX = scan line width
|
|
; EDX = mask to preserve desired portions of AH and AL before combining
|
|
; ESI = source address to copy from (temp buffer)
|
|
; EDI = target address to copy to (screen)
|
|
; Bit Mask set to desired clipping
|
|
; Map Mask set to enable the desired plane for write
|
|
|
|
copy_edge_from_buf_full_chunk_2ws:
|
|
UNROLL_LOOP COPY_EDGE_FROM_BUFFER_2WS, \
|
|
EDGE_FROM_BUFFER_2WS,EDGE_CHUNK_SIZE
|
|
|
|
; Do next plane within this chunk, if any.
|
|
|
|
pop eax ;retrieve plane info
|
|
shr al,1 ;advance to next plane
|
|
jnz copy_from_buffer_plane_loop_2ws
|
|
|
|
; Remember where we left off, for next chunk.
|
|
|
|
mov pDestAddr,edi
|
|
pop ecx ;get back current temp buffer start
|
|
add ecx,EDGE_CHUNK_SIZE*2 ;point to next chunk's start word
|
|
|
|
; Do next chunk within this bank block, if any.
|
|
|
|
pop ebx ;retrieve remaining scan count
|
|
and ebx,ebx ;any scans left?
|
|
jnz copy_from_buffer_chunk_loop_2ws ;more scans to do
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
; Copies an edge from the screen (2 wide) to the temp buffer.
|
|
; Entry:
|
|
; ESI = source address
|
|
; pTempPlane = temp buffer from which to copy
|
|
; ulBlockHeight = # of bytes to copy per plane
|
|
; ulNextScan = scan width
|
|
; Source bank pointing to source
|
|
; Exit:
|
|
; ESI = next source address
|
|
;
|
|
; Preserved: EBP
|
|
;-----------------------------------------------------------------------;
|
|
|
|
copy_screen_to_buffered_edge_2ws::
|
|
|
|
mov pSrcAddr,esi
|
|
|
|
; Leave the GC Index pointing to the Read Map.
|
|
|
|
mov edx,VGA_BASE + GRAF_ADDR
|
|
mov al,GRAF_READ_MAP
|
|
out dx,al
|
|
|
|
mov ecx,ulNextScan
|
|
mov edi,pTempPlane ;dest offset in temp buffer for plane 3 bytes.
|
|
;The rest of the planes are stored
|
|
; consecutively
|
|
mov eax,3 ;start by copying plane 3
|
|
copy_edge_to_buf_pl_loop_2ws:
|
|
mov esi,pSrcAddr ;source pointer
|
|
|
|
mov edx,VGA_BASE + GRAF_DATA
|
|
out dx,al ;set Read Map to plane from which we're copying
|
|
|
|
mov ebx,ulBlockHeight
|
|
|
|
; EBX = count of unrolled loop iterations
|
|
; ECX = offset from end of one scan's fill to start of next
|
|
; ESI = source address to copy from (screen)
|
|
; EDI = target address to copy to (temp buffer)
|
|
; Read Map set to enable the desired plane for read
|
|
|
|
edge_to_buffer_loop_2ws:
|
|
mov dx,[esi] ;get byte to copy
|
|
add esi,ecx ;point to next source scan
|
|
mov [edi],dx ;copy byte to temp buffer
|
|
add edi,2 ;point to next temp buffer byte
|
|
|
|
dec ebx
|
|
jnz edge_to_buffer_loop_2ws
|
|
|
|
dec eax ;count down planes
|
|
jns copy_edge_to_buf_pl_loop_2ws
|
|
|
|
PLAIN_RET
|
|
|
|
|
|
;-----------------------------------------------------------------------;
|
|
|
|
endProc vNonAlignedSrcCopy
|
|
|
|
_TEXT$04 ends
|
|
|
|
end
|