;---------------------------Module-Header------------------------------; ; Module Name: alignblt.asm ; ; Copyright (c) 1992 Microsoft Corporation ;-----------------------------------------------------------------------; ;-----------------------------------------------------------------------; ; VOID vAlignedSrcCopy(PDEVSURF pdsurf, RECTL * prcldst, PPOINTL * pptlsrc, ; INT icopydir); ; Input: ; pdsurf - surface on which to copy ; prcldest - pointer to destination rectangle ; pptlsrc - pointer to source upper left corner ; icopydir - direction in which copy must proceed to avoid overlap problems ; and synchronize with the clip enumeration visually, according to ; constants CD_RIGHTDOWN, CD_LEFTDOWN, CD_RIGHTUP, and CD_LEFTUP in ; WINDDI.H ; ; Performs accelarated aligned SRCCOPY VGA-to-VGA blts. ; ;-----------------------------------------------------------------------; ; ; Note: Assumes all rectangles have positive heights and widths. Will not ; work properly if this is not the case. ; ;-----------------------------------------------------------------------; comment $ The overall approach of this module for each rectangle to copy is: 1) Precalculate the masks and whole byte widths, and determine which of partial left edge, partial right edge, and whole middle bytes are required for this copy. 2) Set up the starting pointers for each of the areas (left, whole middle, right), the start and stop scan lines, the copying direction (left-to-right or right-to-left, and top-to-bottom or bottom-to-top), the threading (sequence of calls required to do the left/whole/right components in the proper sequence), based on the passed-in copy direction, which in turn is dictated by the nature of the overlap between the source and destination. 3) Execute a loop, based on adapter type (2 R/W windows, 1R/1W window, 1 R/W window, unbanked), that sequences through the intersection of each bank with the source and destination rectangles in the proper direction (top-to-bottom or bottom-to-top, based on the passed-in copy direction), and performs the copy in each such rectangle. The threading vector is used to call the required routines (copy left/whole/right bytes). For 1 R/W and 1R/1W adapters, there is a second threading vector that is called when the source and the destination are both adequately (for the copy purposes) addressable simultaneously (because they're in the same bank), so there's no need to copy through a temp buffer. Obviously, we want to avoid the temp buffer whenever we can, because it's much slower and doesn't let us take advantage of the VGA's hardware. Note: 1 R/W and 1R/1W edges are copied through a temporary buffer. However, each plane's bytes are not stored in the corresponding plane's temp buffer, but rather consecutively in the plane 0 temp buffer. This is to reduce page faulting, and also so that 1R/1W adapters only need a temp buffer large enough to hold 4*tallest bank bytes (2K will do here, but nalgnblt.asm needs 4K). 1 R/W adapters still copy whole bytes through the full temp buffer, using all four planes' temp buffers, so they require a temp buffer big enough to hold a full bank (256K will do). commend $ ;-----------------------------------------------------------------------; ; This is no longer used, but is needed by unroll.inc. LOOP_UNROLL_SHIFT equ 1 ;-----------------------------------------------------------------------; ; Maximum # of edge bytes to process before switching to next plane. Larger ; means faster, but there's more potential for flicker, since the raster scan ; has a better chance of catching bytes that have changed in some planes but ; not all planes. Larger also means bigger. EDGE_CHUNK_SIZE equ 4 ;-----------------------------------------------------------------------; ; Macro to push the current threading sequence (string of routine calls) on the ; stack, then jump to the first threading entry. The threading pointer can be ; specified, or defaults to pCurrentThread. The return address can be ; immediately after the JMP, or can be specified. THREAD_AND_START macro THREADING,RETURN_ADDR local push_base, return_address ifb <&RETURN_ADDR&> push offset return_address ;after all the threaded routines, we ; return here else push offset &RETURN_ADDR& ;return here endif ifb <&THREADING&> mov eax,pCurrentThread else mov eax,&THREADING& endif mov ecx,[eax] ;# of routines to thread (at least 1) lea ecx,[ecx*2+ecx] ;pushes below are 3 bytes each mov edx,offset push_base+3 sub edx,ecx jmp edx ;branch to push or jmp below ; Push the threading addresses on to the stack, so routines perform the ; threading as they return. push dword ptr [eax+12] ;3 byte instruction push dword ptr [eax+8] push_base: jmp dword ptr [eax+4] ;jump to the first threaded routine return_address: endm ;-----------------------------------------------------------------------; .386 ifndef DOS_PLATFORM .model small,c else ifdef STD_CALL .model small,c else .model small,pascal endif; STD_CALL endif; DOS_PLATFORM assume ds:FLAT,es:FLAT,ss:FLAT assume fs:nothing,gs:nothing .xlist include stdcall.inc ;calling convention cmacros include i386\egavga.inc include i386\strucs.inc include i386\unroll.inc include i386\ropdefs.inc .list ;-----------------------------------------------------------------------; .data ; Threads for stringing together left, whole byte, and right operations ; in various orders, both using a temp buffer and not. Data format is: ; ; DWORD +0 = # of calls in thread (1, 2, or 3) ; +4 = first call (required) ; +8 = second call (optional) ; +12 = third call (optional) align 4 ; Copies not involving the temp buffer. Thread_L dd 1 dd copy_left_edge Thread_W dd 1 dd copy_whole_bytes Thread_R dd 1 dd copy_right_edge Thread_LR dd 2 dd copy_left_edge dd copy_right_edge Thread_RL dd 2 dd copy_right_edge dd copy_left_edge Thread_LW dd 2 dd copy_left_edge dd copy_whole_bytes Thread_WL dd 2 dd copy_whole_bytes dd copy_left_edge Thread_WR dd 2 dd copy_whole_bytes dd copy_right_edge Thread_RW dd 2 dd copy_right_edge dd copy_whole_bytes Thread_LWR dd 3 dd copy_left_edge dd copy_whole_bytes dd copy_right_edge Thread_RWL dd 3 dd copy_right_edge dd copy_whole_bytes dd copy_left_edge ; Copies involving the temp buffer. Thread_Lb dd 1 dd copy_left_edge_via_buffer Thread_Wb dd 1 dd copy_whole_bytes_via_buffer Thread_Rb dd 1 dd copy_right_edge_via_buffer Thread_LbRb dd 2 dd copy_left_edge_via_buffer dd copy_right_edge_via_buffer Thread_RbLb dd 2 dd copy_right_edge_via_buffer dd copy_left_edge_via_buffer Thread_LbW dd 2 dd copy_left_edge_via_buffer dd copy_whole_bytes Thread_LbWb dd 2 dd copy_left_edge_via_buffer dd copy_whole_bytes_via_buffer Thread_WLb dd 2 dd copy_whole_bytes dd copy_left_edge_via_buffer Thread_WbLb dd 2 dd copy_whole_bytes_via_buffer dd copy_left_edge_via_buffer Thread_WRb dd 2 dd copy_whole_bytes dd copy_right_edge_via_buffer Thread_WbRb dd 2 dd copy_whole_bytes_via_buffer dd copy_right_edge_via_buffer Thread_RbW dd 2 dd copy_right_edge_via_buffer dd copy_whole_bytes Thread_RbWb dd 2 dd copy_right_edge_via_buffer dd copy_whole_bytes_via_buffer Thread_LbWRb dd 3 dd copy_left_edge_via_buffer dd copy_whole_bytes dd copy_right_edge_via_buffer Thread_LbWbRb dd 3 dd copy_left_edge_via_buffer dd copy_whole_bytes_via_buffer dd copy_right_edge_via_buffer Thread_RbWLb dd 3 dd copy_right_edge_via_buffer dd copy_whole_bytes dd copy_left_edge_via_buffer Thread_RbWbLb dd 3 dd copy_right_edge_via_buffer dd copy_whole_bytes_via_buffer dd copy_left_edge_via_buffer ;-----------------------------------------------------------------------; ; Table of thread selection for various horizontal copy directions, with ; the look-up index a 4-bit field as follows: ; ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left ; Bit 2 = 1 if left edge must be copied ; Bit 1 = 1 if whole bytes must be copied ; Bit 0 = 1 if right edge must be copied ; ; This is used for all cases where both the source and destination are ; simultaneously addressable for our purposes, so there's no need to go ; through the temp buffer (unbanked, 2 R/W, and sometimes for 1 R/W and 1R/1W). MasterThreadTable label dword ;right-to-left dd 0 ; dd Thread_R ;R->L, R dd Thread_W ;R->L, W dd Thread_RW ;R->L, RW dd Thread_L ;R->L, L dd Thread_RL ;R->L, RL dd Thread_WL ;R->L, WL dd Thread_RWL ;R->L, RWL ;left-to-right dd 0 ; dd Thread_R ;L->R, R dd Thread_W ;L->R, W dd Thread_WR ;L->R, WR dd Thread_L ;L->R, L dd Thread_LR ;L->R, LR dd Thread_LW ;L->R, LW dd Thread_LWR ;L->R, LWR ; Table of thread selection for various adapter types and horizontal ; copy directions, with the look-up index a 6-bit field as follows: ; ; Bit 5 = adapter type high bit ; Bit 4 = adapter type low bit ; Bit 3 = 1 if left-to-right copy, 0 if right-to-left ; Bit 2 = 1 if left edge must be copied ; Bit 1 = 1 if whole bytes must be copied ; Bit 0 = 1 if right edge must be copied ; ; This is used for all cases where the source and destination are not both ; simultaneously addressable for our purposes, so we need to go through the ; temp buffer (only for 1 R/W and 1R/1W, and only sometimes). MasterThreadTableViaBuffer label dword ;unbanked (no need for buffer) ;right-to-left dd 0 ; dd Thread_R ;R->L, R dd Thread_W ;R->L, W dd Thread_RW ;R->L, RW dd Thread_L ;R->L, L dd Thread_RL ;R->L, RL dd Thread_WL ;R->L, WL dd Thread_RWL ;R->L, RWL ;left-to-right dd 0 ; dd Thread_R ;L->R, R dd Thread_W ;L->R, W dd Thread_WR ;L->R, WR dd Thread_L ;L->R, L dd Thread_LR ;L->R, LR dd Thread_LW ;L->R, LW dd Thread_LWR ;L->R, LWR ;1 R/W banking window (everything goes through ; buffer) ;right-to-left dd 0 ; dd Thread_Rb ;R->L, R dd Thread_Wb ;R->L, W dd Thread_RbWb ;R->L, RW dd Thread_Lb ;R->L, L dd Thread_RbLb ;R->L, RL dd Thread_WbLb ;R->L, WL dd Thread_RbWbLb ;R->L, RWL ;left-to-right dd 0 ; dd Thread_Rb ;L->R, R dd Thread_Wb ;L->R, W dd Thread_WbRb ;L->R, WR dd Thread_Lb ;L->R, L dd Thread_LbRb ;L->R, LR dd Thread_LbWb ;L->R, LW dd Thread_LbWbRb ;L->R, LWR ;1R/1W banking window (edge go through buffer) ;right-to-left dd 0 ; dd Thread_Rb ;R->L, R dd Thread_W ;R->L, W dd Thread_RbW ;R->L, RW dd Thread_Lb ;R->L, L dd Thread_RbLb ;R->L, RL dd Thread_WLb ;R->L, WL dd Thread_RbWLb ;R->L, RWL ;left-to-right dd 0 ; dd Thread_Rb ;L->R, R dd Thread_W ;L->R, W dd Thread_WRb ;L->R, WR dd Thread_Lb ;L->R, L dd Thread_LbRb ;L->R, LR dd Thread_LbW ;L->R, LW dd Thread_LbWRb ;L->R, LWR ;2 R/W banking window (no need for buffer) ;right-to-left dd 0 ; dd Thread_R ;R->L, R dd Thread_W ;R->L, W dd Thread_RW ;R->L, RW dd Thread_L ;R->L, L dd Thread_RL ;R->L, RL dd Thread_WL ;R->L, WL dd Thread_RWL ;R->L, RWL ;left-to-right dd 0 ; dd Thread_R ;L->R, R dd Thread_W ;L->R, W dd Thread_WR ;L->R, WR dd Thread_L ;L->R, L dd Thread_LR ;L->R, LR dd Thread_LW ;L->R, LW dd Thread_LWR ;L->R, LWR ; Amount to shift adapter type field left for use in MasterThreadTableViaBuffer. ADAPTER_FIELD_SHIFT equ 4 ; Mask for setting left-to-right bit to "left-to-right true" for use in both ; MasterThread tables. LEFT_TO_RIGHT_FIELD_SET equ 1000b ; Table of top-to-bottom loops for adapter types. align 4 TopToBottomLoopTable label dword dd top_to_bottom_2RW ;unbanked is same as 2RW dd top_to_bottom_1RW dd top_to_bottom_1R1W dd top_to_bottom_2RW ; Table of bottom-to-top loops for adapter types. align 4 BottomToTopLoopTable label dword dd bottom_to_top_2RW ;unbanked is same as 2RW dd bottom_to_top_1RW dd bottom_to_top_1R1W dd bottom_to_top_2RW ; Table of routines for setting up to copy in various directions. align 4 SetUpForCopyDirection label dword dd left_to_right_top_to_bottom ;CD_RIGHTDOWN dd right_to_left_top_to_bottom ;CD_LEFTDOWN dd left_to_right_bottom_to_top ;CD_RIGHTUP dd right_to_left_bottom_to_top ;CD_LEFTUP ;-----------------------------------------------------------------------; ; Left edge clip masks for intrabyte start addresses 0 through 7. ; Whole byte cases are flagged as 0ffh. jLeftMaskTable label byte db 0ffh,07fh,03fh,01fh,00fh,007h,003h,001h ;-----------------------------------------------------------------------; ; Right edge clip masks for intrabyte end addresses (non-inclusive) ; 0 through 7. Whole byte cases are flagged as 0ffh. jRightMaskTable label byte db 0ffh,080h,0c0h,0e0h,0f0h,0f8h,0fch,0feh ;-----------------------------------------------------------------------; .code _TEXT$03 SEGMENT DWORD USE32 PUBLIC 'CODE' ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING ;-----------------------------------------------------------------------; cProc vAlignedSrcCopy,16,< \ uses esi edi ebx, \ pdsurf: ptr DEVSURF, \ prcldest : ptr RECTL, \ pptlsrc : ptr POINTL, \ icopydir : dword local culWholeBytesWidth : dword ;# of bytes to copy across each scan local ulBlockHeight : dword ;# of scans to copy per bank block local ulWholeScanDelta : dword;offset from end of one whole bytes ; scan to start of next local ulWholeBytesSrc : dword ;offset in bitmap of first source whole ; byte to copy from local ulWholeBytesDest : dword;offset in bitmap of first source whole ; byte to copy to local ulLeftEdgeSrc : dword ;offset in bitmap of first source left ; edge byte to copy from local ulLeftEdgeDest : dword ;offset in bitmap of first dest left ; edge byte to copy to local ulRightEdgeSrc : dword ;offset in bitmap of first source right ; edge byte to copy from local ulRightEdgeDest : dword ;offset in bitmap of first dest right ; edge byte to copy to local ulNextScan : dword ;width of scan, in bytes local jLeftMask : dword ;left edge clip mask local jRightMask : dword ;right edge clip mask local culTempCount : dword ;handy temporary counter local pTempEntry : dword ;temporary storage for vector into ; unrolled loop local pTempPlane : dword ;pointer to storage in temp buffer for ; edge bytes (which are stored ; consecutively, not in each plane's ; temp buffer, to reduce possible page ; faulting local ppTempPlane0 : dword ;pointer to pointer to storage in temp ; buffer for plane 0, immediately ; preceded by storage for planes 1, 2, ; and 3 local ppTempPlane3 : dword ;like above, but for plane 3 local ulOffsetInBank : dword ;offset relative to bank start local pSrcAddr : dword ;working pointer to first source ; byte to copy from local pDestAddr : dword ;working pointer to first dest ; byte to copy to local ulCurrentJustification:dword ;justification used to map in ; banks; top for top to bottom ; copies, bottom for bottom to top local ulCurrentSrcScan :dword ;scan line used to map in current ; source bank local ulCurrentDestScan:dword ;scan line used to map in current dest ; bank local ulLastDestScan :dword ;scan in target rect at which we stop ; advancing through banks local pCurrentThread : dword ;pointer to data describing the ; threaded calls to be performed to ; perform the current copy local pCurrentThreadViaBuffer:dword ;pointer to data describing the ; threaded calls to be performed to ; perform the current copy in the case ; where the source and destination are ; not simultaneously adequately ; accessible, so the copy has to go ; through a temp buffer (used only for ; 1 R/W and 1R/1W banking) local ulAdapterType : dword ;adapter type code, per VIDEO_BANK_TYPE local ulLWRType : dword ;whether left edge, whole bytes, and ; right edge are involved in the ; current operation; ; bit 2 = 1 if left edge involved ; bit 1 = 1 if whole bytes involved ; bit 0 = 1 if right edge involved local ulLeftEdgeAdjust :dword ;used to bump the whole bytes start ; address past the left edge when the ; left edge is partial ;-----------------------------------------------------------------------; ; Set pointers to temp buffer plane pointers (used only by 1 R/W and 1R/1W ; adapters), and other rectangle-independent variables. mov esi,pdsurf mov eax,[esi].dsurf_pvBankBufferPlane0 mov pTempPlane,eax lea eax,[esi].dsurf_pvBankBufferPlane0 mov ppTempPlane0,eax lea eax,[esi].dsurf_pvBankBufferPlane3 mov ppTempPlane3,eax mov eax,[esi].dsurf_vbtBankingType mov ulAdapterType,eax ; Copy the rectangle. call copy_rect ;-----------------------------------------------------------------------; ; Set the VGA registers back to their default state. ;-----------------------------------------------------------------------; mov edx,VGA_BASE + GRAF_ADDR mov eax,(0ffh shl 8) + GRAF_BIT_MASK out dx,ax ;enable bit mask for all bits mov dl,SEQ_DATA mov al,MM_ALL out dx,al ;enable writes to all planes cld ;restore default direction flag cRet vAlignedSrcCopy ;done ;***********************************************************************; ; ; Copies the specified rectangle. ; ;***********************************************************************; copy_rect: ; Set up masks and whole bytes count, and build left/whole/right index ; indicating which of those parts are involved in the copy. mov edi,prcldest ;point to rectangle to copy mov ebx,[edi].xRight ;right edge of fill (non-inclusive) mov ecx,ebx and ecx,0111b ;intrabyte address of right edge mov ah,jRightMaskTable[ecx] ;right edge mask mov esi,[edi].xLeft ;left edge of fill (inclusive) mov ecx,esi shr ecx,3 ;/8 for start offset from left edge ; of scan line sub ebx,esi ;width in pixels of fill and esi,0111b ;intrabyte address of left edge mov al,jLeftMaskTable[esi] ;left edge mask dec ebx ;make inclusive on right add ebx,esi ;inclusive width, starting counting at ; the beginning of the left edge byte shr ebx,3 ;width of fill in bytes touched - 1 jnz short more_than_1_byte ;more than 1 byte is involved ; Only one byte will be affected. Combine first/last masks. and al,ah ;we'll use first byte mask only xor ah,ah ;want last byte mask to be 0 to ; indicate right edge not involved inc ebx ;so there's one count to subtract below ; if this isn't a whole edge byte more_than_1_byte: ; If all pixels in the left edge are altered, combine the first byte into the ; whole byte count, because we can handle solid edge bytes faster as part of ; the whole bytes. Ditto for the right edge. sub ecx,ecx ;edge whole-status accumulator cmp al,-1 ;is left edge a whole byte or partial? adc ecx,ecx ;ECX=1 if left edge partial, 0 if whole sub ebx,ecx ;if left edge partial, deduct it from ; the whole bytes count mov ulLeftEdgeAdjust,ecx ;for skipping over the left edge if ; it's partial when pointing to the ; whole bytes and ah,ah ;is right edge mask 0, meaning this ; fill is only 1 byte wide? jz short save_masks ;yes, no need to do anything or ecx,40h ;assume there's a partial right edge cmp ah,-1 ;is right edge a whole byte or partial? jnz short save_masks ;partial ;bit 1=0 if left edge partial, 1 whole inc ebx ;if right edge whole, include it in the ; whole bytes count and ecx,not 40h ;there's no partial right edge save_masks: cmp ebx,1 ;do we have any whole bytes? cmc ;CF set if whole byte count > 0 adc ecx,ecx ;if any whole bytes, set whole bytes ; bit in left/whole/right accumulator rol cl,1 ;align the left/whole/right bits mov ulLWRType,ecx ;save left/whole/right status mov byte ptr jLeftMask,al ;save left and right clip masks mov byte ptr jRightMask,ah mov culWholeBytesWidth,ebx ;save # of whole bytes ; Copy the rectangle in the specified direction. mov eax,icopydir jmp SetUpForCopyDirection[eax*4] ;***********************************************************************; ; ; The following routines set up to handle the four possible copy ; directions. ; ;***********************************************************************; ;-----------------------------------------------------------------------; ; Set-up code for left-to-right, top-to-bottom copies. ;-----------------------------------------------------------------------; left_to_right_top_to_bottom:: cld ;we'll copy left to right mov esi,pdsurf mov eax,[esi].dsurf_lNextScan mov ulNextScan,eax ;copy top to bottom sub eax,culWholeBytesWidth ;offset from end of one whole byte scan mov ulWholeScanDelta,eax ; to start of next mov esi,ulLWRType ;3-bit flag field for left, whole, and ; right involvement in operation or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index mov eax,MasterThreadTable[esi*4] mov pCurrentThread,eax ;threading when no buffering is needed mov edx,ulAdapterType shl edx,ADAPTER_FIELD_SHIFT or esi,edx ;factor adapter type into the index mov eax,MasterThreadTableViaBuffer[esi*4] mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed mov ulCurrentJustification,JustifyTop ;copy top to bottom mov esi,prcldest mov eax,[esi].yBottom mov ulLastDestScan,eax ;end at bottom of dest copy rect mov eax,[esi].yTop mov ulCurrentDestScan,eax ;start at top of dest copy rect mul ulNextScan ;offset in bitmap of top dest rect scan mov edx,[esi].xLeft shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first dest byte mov ulLeftEdgeDest,eax ;that's where the left dest edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already mov ulWholeBytesDest,eax ;where the whole dest bytes start add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeDest,eax ;where the right dest edge starts mov esi,pptlsrc mov eax,[esi].ptl_y mov ulCurrentSrcScan,eax ;start at top of source copy rect mul ulNextScan ;offset in bitmap of top dest rect scan mov edx,[esi].ptl_x shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first source byte mov ulLeftEdgeSrc,eax ;that's where the left src edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already mov ulWholeBytesSrc,eax ;where the src whole bytes start add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeSrc,eax ;where the right src edge starts ; Branch to the appropriate top-to-bottom bank enumeration loop. mov eax,ulAdapterType jmp TopToBottomLoopTable[eax*4] ;-----------------------------------------------------------------------; ; Set-up code for right-to-left, top-to-bottom copies. ;-----------------------------------------------------------------------; right_to_left_top_to_bottom:: std ;we'll copy right to left mov esi,pdsurf mov eax,[esi].dsurf_lNextScan mov ulNextScan,eax ;copy top to bottom add eax,culWholeBytesWidth ;offset from end of one whole byte scan mov ulWholeScanDelta,eax ; to start of next, given that we're ; copying one way and going scan-to- ; scan the other way mov esi,ulLWRType ;3-bit flag field for left, whole, and ; right involvement in operation ;leave left-to-right field cleared, so ; we look up right-to-left entries mov eax,MasterThreadTable[esi*4] mov pCurrentThread,eax ;threading when no buffering is needed mov edx,ulAdapterType shl edx,ADAPTER_FIELD_SHIFT or esi,edx ;factor adapter type into the index mov eax,MasterThreadTableViaBuffer[esi*4] mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed mov ulCurrentJustification,JustifyTop ;copy top to bottom mov esi,prcldest mov eax,[esi].yBottom mov ulLastDestScan,eax ;end at bottom of dest copy rect mov eax,[esi].yTop mov ulCurrentDestScan,eax ;start at top of dest copy rect mul ulNextScan ;offset in bitmap of top dest rect scan mov edx,[esi].xLeft shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first dest byte mov ulLeftEdgeDest,eax ;that's where the left dest edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeDest,eax ;where the right dest edge starts dec eax ;back up to the last whole byte mov ulWholeBytesDest,eax ;where the whole dest bytes start mov esi,pptlsrc mov eax,[esi].ptl_y mov ulCurrentSrcScan,eax ;start at top of source copy rect mul ulNextScan ;offset in bitmap of top dest rect scan mov edx,[esi].ptl_x shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first source byte mov ulLeftEdgeSrc,eax ;that's where the left src edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeSrc,eax ;where the right src edge starts dec eax ;back up to the last whole byte mov ulWholeBytesSrc,eax ;where the src whole bytes start ; Branch to the appropriate top-to-bottom bank enumeration loop. mov eax,ulAdapterType jmp TopToBottomLoopTable[eax*4] ;-----------------------------------------------------------------------; ; Set-up code for left-to-right, bottom-to-top copies. ;-----------------------------------------------------------------------; left_to_right_bottom_to_top:: cld ;we'll copy left to right mov edi,pdsurf mov eax,[edi].dsurf_lNextScan neg eax mov ulNextScan,eax ;copy bottom to top sub eax,culWholeBytesWidth ;offset from end of one whole byte scan mov ulWholeScanDelta,eax ; to start of next, given that we're ; copying one way and going scan-to- ; scan the other way mov esi,ulLWRType ;3-bit flag field for left, whole, and ; right involvement in operation or esi,LEFT_TO_RIGHT_FIELD_SET ;add left-to-right into the index mov eax,MasterThreadTable[esi*4] mov pCurrentThread,eax ;threading when no buffering is needed mov edx,ulAdapterType shl edx,ADAPTER_FIELD_SHIFT or esi,edx ;factor adapter type into the index mov eax,MasterThreadTableViaBuffer[esi*4] mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed mov ulCurrentJustification,JustifyBottom ;copy bottom to top mov esi,prcldest mov edx,[esi].yTop mov ulLastDestScan,edx ;end at top of dest copy rect mov eax,[esi].yBottom dec eax ;rectangle definition is non-inclusive, ; so advance to first scan we'll copy sub edx,eax ;-(offset from rect top to bottom) push edx ;remember for use with source mov ulCurrentDestScan,eax ;start at bottom of dest copy rect mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect ; scan (first scan to which to copy) mov edx,[esi].xLeft shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first dest byte mov ulLeftEdgeDest,eax ;that's where the left dest edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already mov ulWholeBytesDest,eax ;where the whole dest bytes start add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeDest,eax ;where the right dest edge starts mov esi,pptlsrc mov eax,[esi].ptl_y pop edx ;retrieve -(offset from top to bottom) sub eax,edx ;advance to bottom of source rect ; (inclusive; this is first scan from ; which to copy) mov ulCurrentSrcScan,eax ;start at bottom of source copy rect mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect ; scan mov edx,[esi].ptl_x shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first source byte mov ulLeftEdgeSrc,eax ;that's where the left src edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already mov ulWholeBytesSrc,eax ;where the src whole bytes start add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeSrc,eax ;where the right src edge starts ; Branch to the appropriate bottom-to-top bank enumeration loop. mov eax,ulAdapterType jmp BottomToTopLoopTable[eax*4] ;-----------------------------------------------------------------------; ; Set-up code for right-to-left, bottom-to-top copies. ;-----------------------------------------------------------------------; right_to_left_bottom_to_top:: std ;we'll copy right to left mov edi,pdsurf mov eax,[edi].dsurf_lNextScan neg eax mov ulNextScan,eax ;copy bottom to top add eax,culWholeBytesWidth ;offset from end of one whole byte scan mov ulWholeScanDelta,eax ; to start of next mov esi,ulLWRType ;3-bit flag field for left, whole, and ; right involvement in operation ;leave left-to-right field cleared, so ; we look up right-to-left entries mov eax,MasterThreadTable[esi*4] mov pCurrentThread,eax ;threading when no buffering is needed mov edx,ulAdapterType shl edx,ADAPTER_FIELD_SHIFT or esi,edx ;factor adapter type into the index mov eax,MasterThreadTableViaBuffer[esi*4] mov pCurrentThreadViaBuffer,eax ;threading when buffering is needed mov ulCurrentJustification,JustifyBottom ;copy bottom to top mov esi,prcldest mov edx,[esi].yTop mov ulLastDestScan,edx ;end at top of dest copy rect mov eax,[esi].yBottom dec eax ;rectangle definition is non-inclusive, ; so advance to first scan we'll copy sub edx,eax ;-(offset from rect top to bottom) push edx ;remember for use with source mov ulCurrentDestScan,eax ;start at bottom of dest copy rect mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect ; scan (first scan to which to copy) mov edx,[esi].xLeft shr edx,3 ;byte X address add eax,edx mov ulLeftEdgeDest,eax ;that's where the left dest edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeDest,eax ;where the right dest edge starts dec eax ;back up to the last whole byte mov ulWholeBytesDest,eax ;where the whole dest bytes start mov esi,pptlsrc mov eax,[esi].ptl_y pop edx ;retrieve -(offset from top to bottom) sub eax,edx ;advance to bottom of source rect ; (inclusive; this is first scan from ; which to copy) mov ulCurrentSrcScan,eax ;start at bottom of source copy rect mul [edi].dsurf_lNextScan ;offset in bitmap of bottom dest rect ; scan mov edx,[esi].ptl_x shr edx,3 ;byte X address add eax,edx ;offset in bitmap of first source byte mov ulLeftEdgeSrc,eax ;that's where the left src edge is add eax,ulLeftEdgeAdjust ;the whole bytes start at the next ; byte, unless the left edge is a whole ; byte and is thus part of the whole ; bytes already add eax,culWholeBytesWidth ;point to the right edge mov ulRightEdgeSrc,eax ;where the right src edge starts dec eax ;back up to the last whole byte mov ulWholeBytesSrc,eax ;where the src whole bytes start ; Branch to the appropriate bottom-to-top bank enumeration loop. mov eax,ulAdapterType jmp BottomToTopLoopTable[eax*4] ;***********************************************************************; ; ; The following routines are the banking loops. ; ;***********************************************************************; ;-----------------------------------------------------------------------; ; Banking for 2 R/W and unbanked adapters, top to bottom. ;-----------------------------------------------------------------------; top_to_bottom_2RW:: ; We're going top to bottom. Map in the source and dest, top-justified. mov ebx,pdsurf mov edx,ulCurrentSrcScan cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than ; current source bank? jl short top_2RW_map_init_src_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than ; current source bank? jl short top_2RW_init_src_bank_mapped ;no, proper bank already mapped top_2RW_map_init_src_bank: ; Map bank containing the top source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_2RW_init_src_bank_mapped: mov edx,ulCurrentDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than ; current dest bank? jl short top_2RW_map_init_dest_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than ; current dest bank? jl short top_2RW_init_dest_bank_mapped ;no, proper bank already mapped top_2RW_map_init_dest_bank: ; Map bank containing the top dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_2RW_init_dest_bank_mapped: ; Bank-by-bank top-to-bottom copy loop. top_2RW_bank_loop: ; Decide how far we can go before we run out of bank or rectangle to copy. mov edx,ulLastDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom jl short @F ;copy rectangle bottom is in this bank mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end ; of bank, at least @@: sub edx,ulCurrentDestScan ;# of scans we can and want to do in ; the dest bank mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank cmp edx,eax jb short @F ;source bank isn't limiting mov edx,eax ;source bank is limiting @@: mov ulBlockHeight,edx ;# of scans we'll do in this bank ; We're ready to copy this block. THREAD_AND_START ; Any more scans to copy? mov eax,ulCurrentDestScan mov esi,ulBlockHeight add eax,esi ;we've copied to dest up to here cmp ulLastDestScan,eax ;are we at the dest rect bottom? jz short top_2RW_done ;yes, we're done mov ulCurrentDestScan,eax ; Now advance either or both banks, as needed. mov ebx,pdsurf cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than ; current dest bank? jl short top_2RW_dest_bank_mapped ;no, proper bank still mapped ; Map bank containing the current dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_2RW_dest_bank_mapped: add esi,ulCurrentSrcScan ;we've copied from source up to here mov ulCurrentSrcScan,esi cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than ; current src bank? jl short top_2RW_src_bank_mapped ;no, proper bank still mapped ; Map bank containing the current source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_2RW_src_bank_mapped: jmp top_2RW_bank_loop top_2RW_done: PLAIN_RET ;-----------------------------------------------------------------------; ; Banking for 2 R/W and unbanked adapters, bottom to top. ;-----------------------------------------------------------------------; bottom_to_top_2RW:: ; We're going bottom to top. Map in the source and dest, bottom-justified. mov ebx,pdsurf mov edx,ulCurrentSrcScan cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than ; current source bank? jl short bot_2RW_map_init_src_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater ; than current src bank? jl short bot_2RW_init_src_bank_mapped ;no, proper bank already mapped bot_2RW_map_init_src_bank: ; Map bank containing the bottom source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_2RW_init_src_bank_mapped: mov edx,ulCurrentDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than ; current dest bank? jl short bot_2RW_map_init_dest_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater ; than current dst bank? jl short bot_2RW_init_dest_bank_mapped ;no, proper bank already mapped bot_2RW_map_init_dest_bank: ; Map bank containing the bottom dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_2RW_init_dest_bank_mapped: ; Bank-by-bank bottom-to-top copy loop. bot_2RW_bank_loop: ; Decide how far we can go before we run out of bank or rectangle to copy. mov edx,ulLastDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop jg short @F ;copy rectangle top is in this bank mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end ; of bank, at least @@: neg edx add edx,ulCurrentDestScan ;# of scans we can and want to do in inc edx ; the dest bank mov eax,ulCurrentSrcScan sub eax,[ebx].dsurf_rcl2WindowClipS.yTop inc eax ;# of scans we can do in the src bank cmp edx,eax jb short @F ;source bank isn't limiting mov edx,eax ;source bank is limiting @@: mov ulBlockHeight,edx ;# of scans we'll do in this bank ; We're ready to copy this block. THREAD_AND_START ; Any more scans to copy? mov eax,ulCurrentDestScan mov esi,ulBlockHeight sub eax,esi ;we've copied to dest up to here cmp ulLastDestScan,eax ;are we past the dest rect top? jg short bot_2RW_done ;yes, we're done mov ulCurrentDestScan,eax ; Now advance either or both banks, as needed. mov ebx,pdsurf cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than ; current dest bank? jge short bot_2RW_dest_bank_mapped ;no, proper bank still mapped ; Map bank containing the current dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_2RW_dest_bank_mapped: mov eax,ulCurrentSrcScan sub eax,esi ;we've copied from source up to here mov ulCurrentSrcScan,eax cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than ; current src bank? jge short bot_2RW_src_bank_mapped ;no, proper bank still mapped ; Map bank containing the current source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_2RW_src_bank_mapped: jmp bot_2RW_bank_loop bot_2RW_done: PLAIN_RET ;-----------------------------------------------------------------------; ; Banking for 1R/1W adapters, top to bottom. ;-----------------------------------------------------------------------; top_to_bottom_1R1W:: ; We're going top to bottom. Map in the source and dest, top-justified. mov ebx,pdsurf mov edx,ulCurrentSrcScan cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source top less than ; current source bank? jl short top_1R1W_map_init_src_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source top greater than ; current source bank? jl short top_1R1W_init_src_bank_mapped ;no, proper bank already mapped top_1R1W_map_init_src_bank: ; Map bank containing the top source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_1R1W_init_src_bank_mapped: mov edx,ulCurrentDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest top less than ; current dest bank? jl short top_1R1W_map_init_dest_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest top greater than ; current dest bank? jl short top_1R1W_init_dest_bank_mapped ;no, proper bank already mapped top_1R1W_map_init_dest_bank: ; Map bank containing the top dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_1R1W_init_dest_bank_mapped: ; Bank-by-bank top-to-bottom copy loop. top_1R1W_bank_loop: ; Decide how far we can go before we run out of bank or rectangle to copy. mov edx,ulLastDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom jl short @F ;copy rectangle bottom is in this bank mov edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest extends to end ; of bank, at least @@: sub edx,ulCurrentDestScan ;# of scans we can and want to do in ; the dest bank mov eax,[ebx].dsurf_rcl2WindowClipS.yBottom sub eax,ulCurrentSrcScan ;# of scans we can do in the src bank cmp edx,eax jb short @F ;source bank isn't limiting mov edx,eax ;source bank is limiting @@: mov ulBlockHeight,edx ;# of scans we'll do in this bank ; We're ready to copy this block. ; Select different threading, depending on whether the source and destination ; are currently in the same bank; we can do edges faster if they are. mov eax,[ebx].dsurf_ulWindowBank cmp eax,[ebx].dsurf_ulWindowBank[4] jz short top_1R1W_copy_same_bank ; Source and dest are currently in different banks, must go through temp buffer. THREAD_AND_START pCurrentThreadViaBuffer,top_1R1W_check_more_scans ; Source and dest are currently in the same bank. top_1R1W_copy_same_bank: THREAD_AND_START ; Any more scans to copy? top_1R1W_check_more_scans: mov eax,ulCurrentDestScan mov esi,ulBlockHeight add eax,esi ;we've copied to dest up to here cmp ulLastDestScan,eax ;are we at the dest rect bottom? jz short top_1R1W_done ;yes, we're done mov ulCurrentDestScan,eax ; Now advance either or both banks, as needed. mov ebx,pdsurf cmp eax,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest scan greater than ; current dest bank? jl short top_1R1W_dest_bank_mapped ;no, proper bank still mapped ; Map bank containing the current dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_1R1W_dest_bank_mapped: add esi,ulCurrentSrcScan ;we've copied from source up to here mov ulCurrentSrcScan,esi cmp esi,[ebx].dsurf_rcl2WindowClipS.yBottom ;src scan greater than ; current src bank? jl short top_1R1W_src_bank_mapped ;no, proper bank still mapped ; Map bank containing the current source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ top_1R1W_src_bank_mapped: jmp top_1R1W_bank_loop top_1R1W_done: PLAIN_RET ;-----------------------------------------------------------------------; ; Banking for 1R/1W adapters, bottom to top. ;-----------------------------------------------------------------------; bottom_to_top_1R1W:: ; We're going bottom to top. Map in the source and dest, bottom-justified. mov ebx,pdsurf mov edx,ulCurrentSrcScan cmp edx,[ebx].dsurf_rcl2WindowClipS.yTop ;is source bottom less than ; current source bank? jl short bot_1R1W_map_init_src_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipS.yBottom ;source bottom greater ; than current src bank? jl short bot_1R1W_init_src_bank_mapped ;no, proper bank already mapped bot_1R1W_map_init_src_bank: ; Map bank containing the bottom source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_1R1W_init_src_bank_mapped: mov edx,ulCurrentDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop ;is dest bottom less than ; current dest bank? jl short bot_1R1W_map_init_dest_bank ;yes, map in proper bank cmp edx,[ebx].dsurf_rcl2WindowClipD.yBottom ;dest bottom greater ; than current dst bank? jl short bot_1R1W_init_dest_bank_mapped ;no, proper bank already mapped bot_1R1W_map_init_dest_bank: ; Map bank containing the bottom dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_1R1W_init_dest_bank_mapped: ; Bank-by-bank bottom-to-top copy loop. bot_1R1W_bank_loop: ; Decide how far we can go before we run out of bank or rectangle to copy. mov edx,ulLastDestScan cmp edx,[ebx].dsurf_rcl2WindowClipD.yTop jg short @F ;copy rectangle top is in this bank mov edx,[ebx].dsurf_rcl2WindowClipD.yTop ;dest extends to end ; of bank, at least @@: neg edx add edx,ulCurrentDestScan ;# of scans we can and want to do in inc edx ; the dest bank mov eax,ulCurrentSrcScan sub eax,[ebx].dsurf_rcl2WindowClipS.yTop inc eax ;# of scans we can do in the src bank cmp edx,eax jb short @F ;source bank isn't limiting mov edx,eax ;source bank is limiting @@: mov ulBlockHeight,edx ;# of scans we'll do in this bank ; We're ready to copy this block. ; Select different threading, depending on whether the source and destination ; are currently in the same bank; we can do edges faster if they are. mov al,byte ptr [ebx].dsurf_ulWindowBank cmp al,byte ptr [ebx].dsurf_ulWindowBank[4] jz short bot_1R1W_copy_same_bank ; Source and dest are currently in different banks, must go through temp buffer. THREAD_AND_START pCurrentThreadViaBuffer,bot_1R1W_check_more_scans ; Source and dest are currently in the same bank. bot_1R1W_copy_same_bank: THREAD_AND_START ; Any more scans to copy? bot_1R1W_check_more_scans: mov eax,ulCurrentDestScan mov esi,ulBlockHeight sub eax,esi ;we've copied to dest up to here cmp ulLastDestScan,eax ;are we past the dest rect top? jg short bot_1R1W_done ;yes, we're done mov ulCurrentDestScan,eax ; Now advance either or both banks, as needed. mov ebx,pdsurf cmp eax,[ebx].dsurf_rcl2WindowClipD.yTop ;dest scan less than ; current dest bank? jge short bot_1R1W_dest_bank_mapped ;no, proper bank still mapped ; Map bank containing the current dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_1R1W_dest_bank_mapped: mov eax,ulCurrentSrcScan sub eax,esi ;we've copied from source up to here mov ulCurrentSrcScan,eax cmp eax,[ebx].dsurf_rcl2WindowClipS.yTop ;src scan less than ; current src bank? jge short bot_1R1W_src_bank_mapped ;no, proper bank still mapped ; Map bank containing the current source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , \ bot_1R1W_src_bank_mapped: jmp bot_1R1W_bank_loop bot_1R1W_done: PLAIN_RET ;-----------------------------------------------------------------------; ; Banking for 1 R/W adapters, top to bottom. ;-----------------------------------------------------------------------; top_to_bottom_1RW:: ; We're going top to bottom. Map in the dest, top-justified. mov ebx,pdsurf mov esi,ulCurrentDestScan cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest top less than ; current bank? jl short top_1RW_map_init_dest_bank ;yes, map in proper bank cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest top greater than ; current bank? jl short top_1RW_init_dest_bank_mapped ;no, proper bank already mapped top_1RW_map_init_dest_bank: ; Map bank containing the top dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , top_1RW_init_dest_bank_mapped: ; Bank-by-bank top-to-bottom copy loop. top_1RW_bank_loop: ; Decide how far we can go before we run out of bank or rectangle to copy. mov edi,ulLastDestScan cmp edi,[ebx].dsurf_rcl1WindowClip.yBottom jl short @F ;copy rectangle bottom is in this bank mov edi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest extends to end ; of bank, at least @@: sub edi,esi ;# of scans we can and want to do in the dest bank ; Now make sure source is mapped in. This is the condition the copying routines ; expect, and we need to figure out how far we can go in the source. sub edx,edx ;assume source and dest are in the same ; bank mov esi,ulCurrentSrcScan cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than ; current bank? jl short top_1RW_map_src_Bank ;yes, must map in cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than ; current bank? jl short top_1RW_src_bank_mapped ;no, proper bank still mapped top_1RW_map_src_Bank: ; Map bank containing the current source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , mov edx,1 ;mark that source and dest are not in ; the same bank top_1RW_src_bank_mapped: mov eax,[ebx].dsurf_rcl1WindowClip.yBottom sub eax,esi ;# of scans we can do in the src bank cmp edi,eax jb short @F ;source bank isn't limiting mov edi,eax ;source bank is limiting @@: mov ulBlockHeight,edi ;# of scans we'll do in this bank ; We're ready to copy this block. ; Select different threading, depending on whether the source and destination ; are currently in the same bank; we can do edges faster if they are. and edx,edx jz short top_1RW_copy_same_bank ; Source and dest are currently in different banks, must go through temp buffer. THREAD_AND_START pCurrentThreadViaBuffer,top_1RW_check_more_scans ; Source and dest are currently in the same bank. top_1RW_copy_same_bank: THREAD_AND_START ; Any more scans to copy? top_1RW_check_more_scans: mov esi,ulCurrentDestScan mov edi,ulBlockHeight add esi,edi ;we've copied to dest up to here cmp ulLastDestScan,esi ;are we at the dest rect bottom? jz short top_1RW_done ;yes, we're done mov ulCurrentDestScan,esi ; Now make sure the dest bank is mapped in. mov ebx,pdsurf cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than ; current bank? jl short top_1RW_map_dest_bank ;yes, map in dest bank cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than ; current bank? jl short top_1RW_dest_bank_mapped ;no, proper bank mapped top_1RW_map_dest_bank: ; Map bank containing the current dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , top_1RW_dest_bank_mapped: add ulCurrentSrcScan,edi ;we've copied from source up to here jmp top_1RW_bank_loop top_1RW_done: PLAIN_RET ;-----------------------------------------------------------------------; ; Banking for 1 R/W adapters, bottom to top. ;-----------------------------------------------------------------------; bottom_to_top_1RW:: ; We're going bottom to top. Map in the dest, bottom-justified. mov ebx,pdsurf mov esi,ulCurrentDestScan cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;is dest bottom less than ; current dest bank? jl short bot_1RW_map_init_dest_bank ;yes, map in proper bank cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest bottom greater ; than current dst bank? jl short bot_1RW_init_dest_bank_mapped ;no, proper bank already mapped bot_1RW_map_init_dest_bank: ; Map bank containing the bottom dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , bot_1RW_init_dest_bank_mapped: ; Bank-by-bank bottom-to-top copy loop. bot_1RW_bank_loop: ; Decide how far we can go before we run out of bank or rectangle to copy. mov edi,ulLastDestScan cmp edi,[ebx].dsurf_rcl1WindowClip.yTop jg short @F ;copy rectangle top is in this bank mov edi,[ebx].dsurf_rcl1WindowClip.yTop ;dest extends to end ; of bank, at least @@: neg edi add edi,esi ;# of scans we can and want to do in inc edi ; the dest bank ; Now make sure source is mapped in. This is the condition the copying routines ; expect, and we need to figure out how far we can go in the source. sub edx,edx ;assume source and dest are in the same ; bank mov esi,ulCurrentSrcScan cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;src scan less than ; current bank? jl short bot_1RW_map_src_Bank ;yes, must map in cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;src scan greater than ; current bank? jl short bot_1RW_src_bank_mapped ;no, proper bank still mapped bot_1RW_map_src_Bank: ; Map bank containing the current source scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , mov edx,1 ;mark that source and dest are not in ; the same bank bot_1RW_src_bank_mapped: sub esi,[ebx].dsurf_rcl1WindowClip.yTop inc esi ;# of scans we can do in the src bank cmp edi,esi jb short @F ;source bank isn't limiting mov edi,esi ;source bank is limiting @@: mov ulBlockHeight,edi ;# of scans we'll do in this bank ; We're ready to copy this block. ; Select different threading, depending on whether the source and destination ; are currently in the same bank; we can copy much faster if they are. and edx,edx jz short bot_1RW_copy_same_bank ; Source and dest are currently in different banks, must go through temp buffer. THREAD_AND_START pCurrentThreadViaBuffer,bot_1RW_check_more_scans ; Source and dest are currently in the same bank. bot_1RW_copy_same_bank: THREAD_AND_START ; Any more scans to copy? bot_1RW_check_more_scans: mov esi,ulCurrentDestScan mov edi,ulBlockHeight sub esi,edi ;we've copied to dest up to here cmp ulLastDestScan,esi ;are we past the dest rect top? jg short bot_1RW_done ;yes, we're done mov ulCurrentDestScan,esi ; Now make sure the dest bank is mapped in. mov ebx,pdsurf cmp esi,[ebx].dsurf_rcl1WindowClip.yTop ;dest scan less than ; current bank? jl short bot_1RW_map_dest_bank ;yes, map in dest bank cmp esi,[ebx].dsurf_rcl1WindowClip.yBottom ;dest scan greater than ; current bank? jl short bot_1RW_dest_bank_mapped ;no, proper bank mapped bot_1RW_map_dest_bank: ; Map bank containing the current dest scan line into source window. ; Note: EBX, ESI, and EDI preserved, according to C calling conventions. ptrCall , bot_1RW_dest_bank_mapped: sub ulCurrentSrcScan,edi ;we've copied from source up to here jmp bot_1RW_bank_loop bot_1RW_done: PLAIN_RET ;***********************************************************************; ; ; The following routines are the low-level copying routines. They know ; almost nothing about banks (the routines that copy through a temp ; buffer know how to switch banks after filling the temp buffer, but ; that's it). Banking should be taken care of at a higher level. ; ;***********************************************************************; ;-----------------------------------------------------------------------; ; Copies a block of solid bytes from the source to the destination via the ; latches. Can only be used by 2 R/W or 1R/1W window banking, or by ; unbanked modes, or by 1 R/W adapters when the source and dest are in the ; same bank. 1 R/W adapters must go through an intermediate local buffer ; when the source and the destination aren't in the same bank. ; ; Input: ; Direction Flag set for desired direction of copy ; culWholeBytesWidth = # of bytes to copy across each scan line ; ulWholeScanDelta = distance to start of next scan from end of current ; ulBlockHeight = # of scans to copy ; ulWholeBytesSrc = start source offset in bitmap ; ulWholeBytesDest = start dest offset in bitmap ; ; Output: ; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last ; scan processed ;-----------------------------------------------------------------------; copy_whole_bytes:: ; Set the bit mask to disable all bits, so we can copy through the latches. mov edx,VGA_BASE + GRAF_ADDR mov eax,(000h shl 8) + GRAF_BIT_MASK out dx,ax ; Set Map Mask to enable writes to all planes. mov dl,SEQ_DATA mov al,MM_ALL out dx,al ; Set up to copy the whole bytes via the latches. mov eax,culWholeBytesWidth mov ebx,ulBlockHeight mov edx,ulWholeScanDelta ; Calculate full start addresses. mov ecx,pdsurf mov esi,ulWholeBytesSrc add esi,[ecx].dsurf_pvBitmapStart2WindowS mov edi,ulWholeBytesDest add edi,[ecx].dsurf_pvBitmapStart2WindowD ; EAX = # of bytes to copy ; EBX = count of unrolled loop iterations ; EDX = offset from end of one scan's fill to start of next ; ESI = source address to copy from ; EDI = target address to copy to whole_latches_loop: mov ecx,eax ;# of whole bytes to copy rep movsb ;copy the bytes via the latches add esi,edx ;point to next source scan add edi,edx ;point to next dest scan dec ebx jnz whole_latches_loop ; Remember where we left off, for next time. mov ecx,pdsurf sub esi,[ecx].dsurf_pvBitmapStart2WindowS mov ulWholeBytesSrc,esi sub edi,[ecx].dsurf_pvBitmapStart2WindowD mov ulWholeBytesDest,edi PLAIN_RET ;-----------------------------------------------------------------------; ; Copies a block of solid bytes from the source to the destination via ; the temp buffer. This should only be used by 1 R/W adapters, and then ; only when the source and dest are in different banks. ; ; All relevant bytes are copied from the source to a temp buffer that's an ; image of the source first. Then, we copy each of the four planes for one scan ; line from the temp buffer to the screen before going on to the next scan line. ; It would be faster to do all scans in one plane, then all in the next, and so ; on, but that would give nasty color effects from pixels that were changed in ; some planes but not in others. A compromise would be to do several scans at a ; pop per plane, as is done with the edge bytes; however, given that there can ; be 128 (or more) bytes across a single whole-bytes scan, if we do 16 scan ; lines per chunk, we're going to be performing up to 128*4*16 accesses per ; chunk; at an assumed 1 microsecond per access, that's 8 millisecond per scan ; line, or about 1/2 of a frame time. We're definitely going to see flicker or ; sparkles from partially updated bytes at that point, in my opinion. Another ; alternative would be to dynamically adjust the number of scans processed at a ; pop per plane, depending on the copy width, with more scans copied for ; narrower widths. For all but very narrow copies, though, it seems to me that ; the actual copy time would far outweigh the time for the OUTs to switch ; planes, and the return for some rather complex code would be marginal. ; ; It would be nice if we copied bytes a word or dword at a time. However, it ; becomes rather complex handling fractional words or dwords, especially when ; copying right-to-left, so this is left for LATER. I haven't unrolled these ; loops because of the possibility of this further word/dword optimization; ; no point in fine-tuning sub-optimal code. ; ; Input: ; Direction Flag set for desired direction of copy ; culWholeBytesWidth = # of bytes to copy across each scan line ; ulWholeScanDelta = distance to start of next scan from end of current ; ulBlockHeight = # of scans to copy ; ulWholeBytesSrc = start source offset in bitmap ; ulWholeBytesDest = start dest offset in bitmap ; ppTempPlane0 = pointer to pointer to plane 0 storage in temp buffer ; ppTempPlane3 = pointer to pointer to plane 3 storage in temp buffer ; Expects the source bank to be mapped in; source bank is mapped in on ; exit ; ; Output: ; Advances ulWholeBytesSrc and ulWholeBytesDest to scan after last ; scan processed ;-----------------------------------------------------------------------; copy_whole_bytes_via_buffer:: ; Calculate start source address from bitmap start address and offset within ; bitmap. mov ecx,pdsurf mov eax,ulWholeBytesSrc add eax,[ecx].dsurf_pvBitmapStart mov pSrcAddr,eax sub eax,[ecx].dsurf_pvStart mov ulOffsetInBank,eax ;will come in handy because we treat the ; temp buffer as an image of the current ; bank ; First, copy all the bytes into the temporary buffer. ; Leave the GC Index pointing to the Read Map. mov edx,VGA_BASE + GRAF_ADDR mov al,GRAF_READ_MAP out dx,al mov eax,3 ;start by copying plane 3 copy_whole_to_buffer_plane_loop: mov ebx,ulBlockHeight ;# of scans to copy mov esi,pSrcAddr ;source offset in screen mov edi,ppTempPlane0 mov edi,[edi+eax*4] ;pointer to current plane in temp buffer add edi,ulOffsetInBank ;dest for plane in temp buffer mov edx,VGA_BASE + GRAF_DATA out dx,al ;set Read Map to plane we're copying from. push eax ;remember plane index mov eax,ulWholeScanDelta ;offset to next scan mov edx,culWholeBytesWidth ;# of bytes per scan copy_whole_to_buffer_scan_loop: mov ecx,edx ;# of bytes per scan rep movsb ;copy the scan line to the temp buffer add esi,eax ;point to next source scan add edi,eax ;point to next dest scan dec ebx ;count down scan lines jnz copy_whole_to_buffer_scan_loop pop eax ;get back plane index dec eax ;count down planes jns copy_whole_to_buffer_plane_loop ; Remember where we left off, for next time. mov ebx,pdsurf sub esi,[ebx].dsurf_pvBitmapStart mov ulWholeBytesSrc,esi ; Now copy the temp buffer to the screen. ; Map in the destination bank, so we can read/write to it and let the Bit Mask ; work. ptrCall , \ ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait ; until now to calculate this, because the dest bank wasn't mapped earlier). mov eax,ulWholeBytesDest add eax,[ebx].dsurf_pvBitmapStart mov pDestAddr,eax ; Set the bit mask to enable all bits. mov edx,VGA_BASE + GRAF_ADDR mov eax,(0ffh shl 8) + GRAF_BIT_MASK out dx,ax mov dl,SEQ_DATA ;leave DX pointing to the SC Data reg ; Set up to copy the whole bytes from the buffer. mov eax,ulBlockHeight ;# of scans to copy mov culTempCount,eax copy_whole_from_buffer_scan_loop: mov ebx,ppTempPlane3 ;point to plane 3's temp buffer offset mov al,MM_C3 ;start by copying plane 3 copy_whole_from_buffer_plane_loop: ; Set Map Mask to enable writes to the plane we're copying. out dx,al ; Select the corresponding plane from the temp buffer. mov esi,[ebx] ;point to plane start in temp buffer add esi,ulOffsetInBank ;point to current scan start in temp buffer mov edi,pDestAddr ;point to destination start mov ecx,culWholeBytesWidth ;# of whole bytes to copy rep movsb ;copy the bytes from the buffer to the screen ; Do next plane, if any. sub ebx,4 ;point to next temp buffer plane ptr shr al,1 ;advance to next plane jnz copy_whole_from_buffer_plane_loop ; Remember where we left off, for next scan. add edi,ulWholeScanDelta ;point to next dest scan mov pDestAddr,edi mov eax,ulNextScan add ulOffsetInBank,eax ;next scan's start in temp buffer, ; relative to start of plane's storage ; Count down scan lines. dec culTempCount jnz copy_whole_from_buffer_scan_loop ; Remember where we left off, for next time. mov ebx,pdsurf sub edi,[ebx].dsurf_pvBitmapStart mov ulWholeBytesDest,edi ; Put back the original source bank. ptrCall , \ PLAIN_RET ;-----------------------------------------------------------------------; ; Copies a strip of left edge bytes from the source to the destination, ; assuming both the source and the destination are both readable and ; writable. Can only be used by 2 R/W window banking, or by unbanked ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local ; buffer when the source and dest are in different banks. Processes up to ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause ; flicker. ; ; Input: ; ulNextScan = width of scan, in bytes ; ulBlockHeight = # of scans to copy ; ulLeftEdgeSrc = start source offset in bitmap ; ulLeftEdgeDest = start dest offset in bitmap ; jLeftMask = left edge clip mask ; ; Output: ; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last ; scan processed ;-----------------------------------------------------------------------; copy_left_edge:: ; Calculate start source and dest addresses from bitmap start addresses and ; offsets within bitmap. mov ecx,pdsurf mov esi,ulLeftEdgeSrc add esi,[ecx].dsurf_pvBitmapStart2WindowS mov edi,ulLeftEdgeDest add edi,[ecx].dsurf_pvBitmapStart2WindowD ; Copy the edge. mov ah,byte ptr jLeftMask ;clip mask for this edge call copy_edge ; Remember where we left off, for next time. mov ecx,pdsurf sub esi,[ecx].dsurf_pvBitmapStart2WindowS mov ulLeftEdgeSrc,esi sub edi,[ecx].dsurf_pvBitmapStart2WindowD mov ulLeftEdgeDest,edi PLAIN_RET ;-----------------------------------------------------------------------; ; Copies a strip of right edge bytes from the source to the destination, ; assuming both the source and the destination are both readable and ; writable. Can only be used by 2 R/W window banking, or by unbanked ; modes. 1 R/W and 1R/1W adapters must go through an intermediate local ; buffer when the source and dest are in different banks. Processes up to ; EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might cause ; flicker. ; ; Input: ; ulNextScan = width of scan, in bytes ; ulBlockHeight = # of scans to copy ; ulRightEdgeSrc = start source offset in bitmap ; ulRightEdgeDest = start dest offset in bitmap ; jRightMask = right edge clip mask ; ; Output: ; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last ; scan processed ;-----------------------------------------------------------------------; copy_right_edge:: ; Calculate start source and dest addresses from bitmap start addresses and ; offsets within bitmap. mov ecx,pdsurf mov esi,ulRightEdgeSrc add esi,[ecx].dsurf_pvBitmapStart2WindowS mov edi,ulRightEdgeDest add edi,[ecx].dsurf_pvBitmapStart2WindowD ; Copy the edge. mov ah,byte ptr jRightMask ;clip mask for this edge call copy_edge ; Remember where we left off, for next time mov ecx,pdsurf sub esi,[ecx].dsurf_pvBitmapStart2WindowS mov ulRightEdgeSrc,esi sub edi,[ecx].dsurf_pvBitmapStart2WindowD mov ulRightEdgeDest,edi PLAIN_RET ;-----------------------------------------------------------------------; ; Copies an edge from the source to the destination on the screen. ; Entry: ; AH = bit mask setting for edge ; ESI = source address ; EDI = destination address ; ulBlockHeight = # of bytes to copy per plane ; ulNextScan = scan width ; Source readable, and destination readable and writable ; Exit: ; ESI = next source address ; EDI = next destination address ; ; Preserved: EBP ;-----------------------------------------------------------------------; copy_edge: mov pSrcAddr,esi mov pDestAddr,edi ; Set the clip mask for this edge. mov edx,VGA_BASE + GRAF_ADDR mov al,GRAF_BIT_MASK out dx,ax ; Leave the GC Index pointing to the Read Map. mov al,GRAF_READ_MAP out dx,al mov ecx,offset copy_edge_rw_full_chunk ;entry point into unrolled loop to copy first ; chunk, assuming it's a full chunk mov ebx,ulBlockHeight ; Copy the edge in a series of chunks. copy_edge_chunk_loop: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming ; a full chunk jge short @F ;do a full chunk add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining ; scans mov ecx,pfnCopyEdgeRWEntry[-4][ebx*4] ;entry point into unrolled loop to copy desired ; chunk size sub ebx,ebx ;no scans after this @@: push ebx ;remember remaining scan count mov ah,MM_C3 ;start by copying plane 3 mov ebx,ulNextScan copy_edge_plane_loop: ; Set Map Mask to enable writes to plane we're copying. mov al,ah mov dl,SEQ_DATA out dx,al ; Set Read Map to same plane. shr al,1 ;map plane into ReadMask cmp al,100b ;set Carry if not C3 (plane 3) adc al,-1 ;sub 1 only if C3 mov dl,GRAF_DATA out dx,al mov esi,pSrcAddr mov edi,pDestAddr jmp ecx ;copy the left edge ;-----------------------------------------------------------------------; ; Table of unrolled edge loop entry points. First entry point is to copy ; 1 byte, last entry point is to copy EDGE_CHUNK_SIZE bytes. ;-----------------------------------------------------------------------; pfnCopyEdgeRWEntry label dword INDEX = 1 rept EDGE_CHUNK_SIZE DEFINE_DD EDGE_RW,%INDEX INDEX = INDEX+1 endm ;-----------------------------------------------------------------------; ; Unrolled loop for copying a strip of edge bytes, with source and ; destination both readable and writable. ;-----------------------------------------------------------------------; COPY_EDGE_RW macro ENTRY_LABEL,ENTRY_INDEX ENTRY_LABEL&ENTRY_INDEX: mov al,[esi] ;get byte to copy add esi,ebx ;point to next source scan mov dl,[edi] ;read to load latches (value doesn't matter) mov [edi],al ;write, with the Bit Mask clipping add edi,ebx ;point to next dest scan endm ;-----------------------------------; ; EBX = scan line width ; ESI = source address to copy from ; EDI = target address to copy to ; Bit Mask set to desired clipping ; Read Map and Map Mask set to enable the desired plane for read and write copy_edge_rw_full_chunk: UNROLL_LOOP COPY_EDGE_RW,EDGE_RW,EDGE_CHUNK_SIZE ; Do next plane within this chunk, if any. shr ah,1 ;advance to next plane jnz copy_edge_plane_loop ; Remember where we left off, for the next chunk. mov pSrcAddr,esi mov pDestAddr,edi ; Do next chunk within this bank block, if any. pop ebx ;retrieve remaining scan count and ebx,ebx ;any scans left? jnz copy_edge_chunk_loop ;more scans to do PLAIN_RET ;-----------------------------------------------------------------------; ; Copies a strip of left edge bytes from the source to the destination ; through an intermediate RAM buffer. This is the approach required by ; 1 R/W and 1R/1W adapters when the source and dest are in different banks. ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might ; cause flicker. ; ; Input: ; ulNextScan = width of scan, in bytes ; ulBlockHeight = # of scans to copy ; ulLeftEdgeSrc = start source offset in bitmap ; ulLeftEdgeDest = start dest offset in bitmap ; jLeftMask = left edge clip mask ; pTempPlane = pointer to temp storage buffer ; ulCurrentSrcScan = scan used to map in source bank ; ulCurrentDestScan = scan used to map in dest bank ; ulCurrentJustification = justification used to map in current bank ; For 1 R/W adapters, expects the source bank to be mapped in; banking ; is the same at exit as it was at entry ; ; Output: ; Advances ulLeftEdgeSrc and ulLeftEdgeDest to scan after last ; scan processed ; ; Note that this should never be called for an unbanked or 2 R/W adapter, ; because the source and dest are always both addressable simultaneously then. ;-----------------------------------------------------------------------; copy_left_edge_via_buffer:: ; First, copy all the bytes into the temporary buffer. ; Calculate start source and dest addresses from bitmap start addresses and ; offsets within bitmap. mov ecx,pdsurf mov esi,ulLeftEdgeSrc add esi,[ecx].dsurf_pvBitmapStart2WindowS ; Copy the edge from the source to the temp buffer. call copy_screen_to_buffered_edge ; Remember where we left off, for next time mov ebx,pdsurf sub esi,[ebx].dsurf_pvBitmapStart2WindowS mov ulLeftEdgeSrc,esi ; Now copy the temp buffer to the screen. ; Map in the source bank to match the destination, so we can read/write to it ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be ; mapped by this call, which is fine. ptrCall , \ ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait ; until now to calculate this, because the dest bank wasn't mapped earlier). mov edi,ulLeftEdgeDest add edi,[ebx].dsurf_pvBitmapStart2WindowD mov ah,byte ptr jLeftMask ;clip mask for this edge call copy_buffered_edge_to_screen ;do the copy ; Remember where we left off, for next time. mov ebx,pdsurf sub edi,[ebx].dsurf_pvBitmapStart2WindowD mov ulLeftEdgeDest,edi ; Put back the original source bank. Note that on a 1 R/W adapter, both banks ; will be mapped by this call, which is fine. ptrCall , \ PLAIN_RET ;-----------------------------------------------------------------------; ; Copies a strip of right edge bytes from the source to the destination ; through an intermediate RAM buffer. This is the approach required by ; 1 R/W and 1R/1W adapters when the source and dest are in different banks. ; Writes up to EDGE_CHUNK_SIZE bytes in each plane at a pop; more bytes might ; cause flicker. ; ; Input: ; ulNextScan = width of scan, in bytes ; ulBlockHeight = # of scans to copy ; ulRightEdgeSrc = start source offset in bitmap ; ulRightEdgeDest = start dest offset in bitmap ; jRightMask = right edge clip mask ; pTempPlane = pointer to temp storage buffer ; ulCurrentSrcScan = scan used to map in source bank ; ulCurrentDestScan = scan used to map in dest bank ; ulCurrentJustification = justification used to map in current bank ; For 1 R/W adapters, expects the source bank to be mapped in; banking ; is the same at exit as it was at entry ; ; Output: ; Advances ulRightEdgeSrc and ulRightEdgeDest to scan after last ; scan processed ; ; Note that this should never be called for an unbanked or 2 R/W adapter, ; because the source and dest are always both addressable simultaneously then. ;-----------------------------------------------------------------------; copy_right_edge_via_buffer:: ; First, copy all the bytes into the temporary buffer. ; Calculate start source address from bitmap start addresses and ; offsets within bitmap. mov ecx,pdsurf mov esi,ulRightEdgeSrc add esi,[ecx].dsurf_pvBitmapStart2WindowS ; Copy the edge from the source to the temp buffer. call copy_screen_to_buffered_edge ; Remember where we left off, for next time mov ebx,pdsurf sub esi,[ebx].dsurf_pvBitmapStart2WindowS mov ulRightEdgeSrc,esi ; Now copy the temp buffer to the screen. ; Map in the source bank to match the destination, so we can read/write to it ; and let the Bit Mask work. Note that on a 1 R/W adapter, both banks will be ; mapped by this call, which is correct. ptrCall , \ ; Calculate dest start address (if this is a 1 R/W adapter, we had to wait ; until now to calculate this, because the dest bank wasn't mapped earlier). mov edi,ulRightEdgeDest add edi,[ebx].dsurf_pvBitmapStart2WindowD mov ah,byte ptr jRightMask ;clip mask for this edge call copy_buffered_edge_to_screen ;do the copy ; Remember where we left off, for next time. mov ebx,pdsurf sub edi,[ebx].dsurf_pvBitmapStart2WindowD mov ulRightEdgeDest,edi ; Put back the original source bank. Note that on a 1 R/W adapter, both banks ; will be mapped by this call, which is fine. ptrCall , \ PLAIN_RET ;-----------------------------------------------------------------------; ; Copies an edge from the temp buffer to the screen. ; Entry: ; AH = bit mask setting for edge ; EDI = destination address ; pTempPlane = temp buffer from which to copy ; ulBlockHeight = # of bytes to copy per plane ; ulNextScan = scan width ; Source and dest banks both pointing to destination ; Exit: ; EDI = next destination address ; ; Preserved: EBP ;-----------------------------------------------------------------------; copy_buffered_edge_to_screen: mov pDestAddr,edi mov edx,VGA_BASE + GRAF_ADDR mov al,GRAF_BIT_MASK out dx,ax mov pTempEntry,offset copy_edge_from_buf_full_chunk ;entry point into unrolled loop to copy first ; chunk, assuming it's a full chunk mov ecx,pTempPlane ;temp buffer start (copy from here) mov ebx,ulBlockHeight ;total # of scans to copy ; Copy the edge in a series of chunks, to avoid flicker. copy_from_buffer_chunk_loop: sub ebx,EDGE_CHUNK_SIZE ;scans remaining after this chunk, assuming ; a full chunk jge short @F ;do a full chunk add ebx,EDGE_CHUNK_SIZE ;not a full chunk; process all remaining ; scans mov ebx,pfnCopyEdgesFromBufferEntry[-4][ebx*4] mov pTempEntry,ebx ;entry point into unrolled loop to copy desired ; chunk size sub ebx,ebx ;no scans after this @@: push ebx ;remember remaining scan count mov al,MM_C3 ;start by copying plane 3 mov ebx,ulNextScan push ecx ;remember current temp buffer start copy_from_buffer_plane_loop: ; Set Map Mask to enable writes to plane we're copying. mov dl,SEQ_DATA ;leave DX pointing to Sequencer Data reg out dx,al ; Calculate the equivalent Read Map, and use that to select the correct plane ; from the temp buffer. mov esi,ecx ;point to current plane's source byte add ecx,ulBlockHeight ;point to next plane's source byte mov edi,pDestAddr jmp pTempEntry ;copy the left edge ;-----------------------------------------------------------------------; ; Table of unrolled edge copy-from-buffer loop entry points. First entry ; point is to copy 1 byte, last entry point is to copy EDGE_CHUNK_SIZE ; bytes. ;-----------------------------------------------------------------------; pfnCopyEdgesFromBufferEntry label dword INDEX = 1 rept EDGE_CHUNK_SIZE DEFINE_DD EDGE_FROM_BUFFER,%INDEX INDEX = INDEX+1 endm ;-----------------------------------------------------------------------; ; Unrolled loop for copying a strip of edge bytes from the temp buffer. ;-----------------------------------------------------------------------; COPY_EDGE_FROM_BUFFER macro ENTRY_LABEL,ENTRY_INDEX ENTRY_LABEL&ENTRY_INDEX: mov ah,[esi] ;get byte to copy inc esi ;point to next source (temp buffer) byte mov dl,[edi] ;latch the destination (value doesn't matter) mov [edi],ah ;write, with the Bit Mask clipping add edi,ebx ;point to next dest (screen) scan endm ;-----------------------------------; ; EBX = scan line width ; ESI = source address to copy from (temp buffer) ; EDI = target address to copy to (screen) ; Bit Mask set to desired clipping ; Map Mask set to enable the desired plane for write copy_edge_from_buf_full_chunk: UNROLL_LOOP COPY_EDGE_FROM_BUFFER,EDGE_FROM_BUFFER,EDGE_CHUNK_SIZE ; Do next plane within this chunk, if any. shr al,1 ;advance to next plane jnz copy_from_buffer_plane_loop ; Remember where we left off, for next chunk. mov pDestAddr,edi pop ecx ;get back current temp buffer start add ecx,EDGE_CHUNK_SIZE ;point to next chunk's start ; Do next chunk within this bank block, if any. pop ebx ;retrieve remaining scan count and ebx,ebx ;any scans left? jnz copy_from_buffer_chunk_loop ;more scans to do PLAIN_RET ;-----------------------------------------------------------------------; ; Copies an edge from the screen to the temp buffer. ; Entry: ; ESI = source address ; pTempPlane = temp buffer from which to copy ; ulBlockHeight = # of bytes to copy per plane ; ulNextScan = scan width ; Source bank pointing to source ; Exit: ; DH = VGA_BASE SHR 8 ; ESI = next source address ; ; Preserved: EBP ;-----------------------------------------------------------------------; copy_screen_to_buffered_edge: mov pSrcAddr,esi ; Leave the GC Index pointing to the Read Map. mov edx,VGA_BASE + GRAF_ADDR mov al,GRAF_READ_MAP out dx,al mov ecx,ulNextScan mov edi,pTempPlane ;dest offset in temp buffer for plane 3 bytes. ;The rest of the planes are stored ; consecutively mov al,3 ;start by copying plane 3 mov dl,GRAF_DATA ;leave DX pointing to GC Data reg copy_edge_to_buffer_plane_loop: mov esi,pSrcAddr ;source pointer out dx,al ;set Read Map to plane we're copying from. mov ebx,ulBlockHeight ;# of unrolled loop iterations ; EBX = count of unrolled loop iterations ; ECX = offset from end of one scan's fill to start of next ; ESI = source address to copy from (screen) ; EDI = target address to copy to (temp buffer) ; Read Map set to enable the desired plane for read edge_to_buffer_loop: mov ah,[esi] ;get byte to copy add esi,ecx ;point to next source scan mov [edi],ah ;copy byte to temp buffer inc edi ;point to next temp buffer byte dec ebx jnz edge_to_buffer_loop dec al ;count down planes jns copy_edge_to_buffer_plane_loop PLAIN_RET ;-----------------------------------------------------------------------; endProc vAlignedSrcCopy _TEXT$03 ends end