;***************************************************************************
; unit:    raster_BCC.asm      release 0.36                                *
; purpose: general manipulation n dimensional matrices n = 1, 2 and 3.     *
;          Use this file or rasterc.c. You cannot link both files together *
; licency:     GPL or LGPL                                                 *
; Copyright: (c) 1998-2025 Jaroslav Fojtik                                 *
;***************************************************************************

.486              ;Target processor.  Use instructions for Pentium class machines
.MODEL FLAT, C    ;Use the flat memory model. Use C calling conventions

.CODE             ;Indicates the start of a code segment.

USE8087	EQU	1

LOCALS @@


	extern	swap_bits_xlat:BYTE
	extern	swap_bits2_xlat:BYTE


;void Conv1_4(BYTE *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv1_4
Conv1_4 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     esi,[Src]      ; 
        mov     edi,[Dest]     ; di=destination pointer (es=segment part)
	or	ecx,ecx
        jz	@@ToEnd     ; it's as if strings are equal

@@octet:  mov     al,[esi]	; new @@octet

	cbw			; Extend 8th bit to AH	

	mov	dl,ah
        and	dl,0F0h		; mask low nibble
        
        rol     al,1		; 40h
        cbw			; Extend 7th bit to AH
        and	ah,0Fh		; mask high nibble
        
        sub	ecx,2
        jbe	@@ToEnd2		; 0 or -1
        
        or	ah,dl
	mov	[edi],ah	; store converted byte
	inc	edi      
        
        rol     al,1		; 20h
        cbw			; Extend 6th bit to AH	

	mov	dl,ah
        and	dl,0F0h		; mask low nibble
        
        rol     al,1		; 10h
        cbw			; Extend 5th bit to AH
        and	ah,0Fh
        
        sub	ecx,2
        jbe	@@ToEnd2		; 0 or -1
        
        or	ah,dl	
	mov	[edi],ah	; store converted byte
	inc	edi   

        rol     al,1		; 08h
        cbw			; Extend 4th bit to AH	

	mov	dl,ah
        and	dl,0F0h		; mask low nibble
        
        rol     al,1		; 04h
	cbw			; Extend 3rd bit to AH
        and	ah,0Fh
        
        sub	ecx,2
        jbe	@@ToEnd2		; 0 or -1
        
        or	ah,dl	
	mov	[edi],ah	; store converted byte
	inc	edi        
        
        rol     al,1		; 02h
        cbw			; Extend 2nd bit to AH

	mov	dl,ah
        and	dl,0F0h		; mask low nibblde
        
        rol     al,1		; 01h
        cbw			; Extend 1st bit to AH
	and	ah,0Fh
	
	sub	ecx,2
        jbe	@@ToEnd2		; 0 or -1
	
        or	ah,dl	        
	mov	[edi],ah	; store converted byte
	
	inc	esi
	inc	edi
	jmp	@@octet
        
	
@@ToEnd2:	jnz	@@ToEnd3		; when NZ, store only a first nibble
	or	dl,ah
@@ToEnd3:	mov	[edi],dl
@@ToEnd:  ret                     ; _cdecl return
        
Conv1_4 endp


;*************************************************************************************


;void Conv1_8(BYTE *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv1_8
Conv1_8 proc \
        uses esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels        

        mov     esi,[Src]      ; 
        mov     edx,[Dest]     ; di=destination pointer (es=segment part)
        cld
        jecxz	@@ToEnd		; it's as if strings are equal

@@octet:  lodsb			; new @@octet

	cbw			; Extend 8th bit to AH
	mov	[edx],ah	; store converted byte
        
        rol     al,1		; 40h
        cbw			; Extend 7th bit to AH
        
        sub	ecx,2
        jbe	StorQ2		; Store 2 or nothing
        
	mov	[edx+1],ah	; store converted byte

        
        rol     al,1		; 20h
        cbw			; Extend 6th bit to AH
	mov	[edx+2],ah	; store converted byte
        
        rol     al,1		; 10h
        cbw			; Extend 5th bit to AH
        
        sub	ecx,2
        jbe	StorQ4
        
	mov	[edx+3],ah	; store converted byte

        rol     al,1		; 08h
        cbw			; Extend 4th bit to AH
	mov	[edx+4],ah	; store converted byte
        
        rol     al,1		; 04h
	cbw			; Extend 3rd bit to AH
	
	sub	ecx,2
	jbe	StorQ6
	
	mov	[edx+5],ah	; store converted byte
        
        rol     al,1		; 02h
        cbw			; Extend 2nd bit to AH
	mov	[edx+6],ah	; store converted byte

	dec	ecx
        jz      @@ToEnd
        
        rol     al,1		; 01h
        cbw			; Extend 1st bit to AH
	mov	[edx+7],ah	; store converted byte

	add	edx,8

	dec	ecx
	jnz     @@octet

@@ToEnd:	ret

StorQ2: jnz	@@ToEnd		; Store 2nd pixel when ECX is not negative
	mov	[edx+1],ah
	jmp	@@ToEnd
StorQ4: jnz	@@ToEnd		; Store 4th pixel when ECX is not negative
	mov	[edx+3],ah
	jmp	@@ToEnd
StorQ6: jnz	@@ToEnd		; Store 6th pixel when ECX is not negative
	mov	[edx+5],ah
	jmp	@@ToEnd
        
        
Conv1_8 endp


;*************************************************************************************

        public  Conv1_16
Conv1_16 proc \
        uses esi edi, \
        Dest:ptr word, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ; 
        jecxz	@@ToEnd		; array has zero size

@@octet:  mov     ah,[esi]	; new @@octet
        
        cwd			; extend 8th bit to DX
        mov	[edi],dx

	dec	ecx
        jz      @@ToEnd
        
        rol	ax,1        
        cwd			; extend 7th bit to DX
        mov	[edi+2],dx

	dec	ecx
        jz      @@ToEnd
        
        rol	ax,1        
        cwd			; extend 6th bit to DX
        mov	[edi+4],dx

	dec	ecx
        jz      @@ToEnd        
        
        rol	ax,1        
        cwd			; extend 5th bit to DX
        mov	[edi+6],dx

	dec	ecx
        jz      @@ToEnd                


        rol	ax,1        
        cwd			; extend 4th bit to DX
        mov	[edi+8],dx

	dec	ecx
        jz      @@ToEnd
        
        
        rol	ax,1        
        cwd			; extend 3rd bit to DX
        mov	[edi+10],dx

	dec	ecx
        jz      @@ToEnd
        
        rol	ax,1        
        cwd			; extend 2nd bit to DX
        mov	[edi+12],dx

	dec	ecx
        jz      @@ToEnd        
        
        rol	ax,1        
        cwd			; extend 1st bit to DX
        mov	[edi+14],dx

        inc	esi
        add	edi,16
                
	dec	ecx
	jnz	@@octet

@@ToEnd:
        ret                     ; _cdecl return

Conv1_16 endp



;void Conv1_24(BYTE *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv1_24
Conv1_24 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        or	ecx,ecx
        jz	@@ToEnd     ; it's as if strings are equal

        mov     edi,[Dest]     ; di=destination pointer (es=segment part)
        mov     esi,[Src]      ; 

@@octet:  mov     ah,[esi]	; new @@octet

	cwd			; Extend 8th bit to DX
	mov	[edi],dx	; store converted byte
	mov	[edi+2],dl	; store converted byte

	dec	ecx
        jz      @@ToEnd

        rol     ax,1		; 40h
        cwd			; Extend 7th bit to DH
	mov	[edi+3],dl	; store converted byte
	mov	[edi+4],dx

	dec	ecx
        jz      @@ToEnd
        
        rol     ax,1		; 20h
        cwd			; Extend 6th bit to DX
	mov	[edi+6],dx	; store converted byte
	mov	[edi+8],dl	; store converted byte

	dec	ecx
        jz      @@ToEnd
        
        rol     ax,1		; 10h
        cwd			; Extend 5th bit to AH	
	mov	[edi+9],dl	; store converted byte
	mov	[edi+10],dx

	dec	ecx
        jz      @@ToEnd

        rol     ax,1		; 08h
        cwd			; Extend 4th bit to AH
	mov	[edi+12],dx	; store converted byte
	mov	[edi+14],dl	; store converted byte

	dec	ecx
        jz      @@ToEnd

        rol     ax,1		; 04h
	cwd			; Extend 3rd bit to AH
	mov	[edi+15],dl	; store converted byte
	mov	[edi+16],dx

	dec	ecx
        jz      @@ToEnd
        
        rol     ax,1		; 02h
        cwd			; Extend 2nd bit to AH
	mov	[edi+18],dx	; store converted byte
	mov	[edi+20],dl	; store converted byte

	dec	ecx
        jz      @@ToEnd

        rol     ax,1		; 01h
        cwd			; Extend 1st bit to AH
	mov	[edi+21],dl	; store converted byte	
	mov	[edi+22],dx	

        inc	esi
        add	edi,24
        
	dec	ecx
	jne	@@octet

@@ToEnd:
        ret                     ; _cdecl return
        
Conv1_24 endp


;*************************************************************************************

        public  Conv1_32
Conv1_32 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD
        
        mov     ecx,[count]     ; cx=amount of pixels        
        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        cld
        jecxz	@@ToEnd		; array has zero size

@@octet:  lodsb			; new @@octet
	shl	eax,24
	
	cdq			; extend 8th bit to EDX        
        mov	[edi],edx

	dec	ecx
        jz      @@ToEnd
        
        rol	eax,1        
        cdq			; extend 7th bit to EDX
        mov	[edi+4],edx

	dec	ecx
        jz      @@ToEnd
        
        rol	eax,1        
        cdq			; extend 6th bit to EDX
        mov	[edi+8],edx

	dec	ecx
        jz      @@ToEnd        
        
        rol	eax,1        
        cdq			; extend 5th bit to EDX
        mov	[edi+12],edx

	dec	ecx
        jz      @@ToEnd                

        rol	eax,1        
        cdq			; extend 4th bit to DX
        mov	[edi+16],edx

	dec	ecx
        jz      @@ToEnd
        
        
        rol	eax,1        
        cdq			; extend 3rd bit to EDX
        mov	[edi+20],edx

	dec	ecx
        jz      @@ToEnd
        
        rol	eax,1        
        cdq			; extend 2nd bit to EDX
        mov	[edi+24],edx

	dec	ecx
        jz      @@ToEnd        
        
        rol	eax,1        
        cdq			; extend 1st bit to EDX
        mov	[edi+28],edx        

	add	edi,32
	
	dec	ecx
	jnz	@@octet

@@ToEnd:
        ret                     ; _cdecl return

Conv1_32 endp


;*************************************************************************************


        public  Conv1_64
Conv1_64 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD
        
        mov     ecx,[count]     ; cx=amount of pixels
        or	ecx,ecx
        jz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ; 

@@octet:  mov     ah,[esi]	; new @@octet
        
        shl	eax,16
        
        cdq			; extend 8th bit to DX
        mov	[edi],edx
        mov	[edi+4],edx

	dec	ecx
        jz      @@ToEnd
        
        rol	eax,1        
        cdq			; extend 7th bit to DX        
        mov	[edi+8],edx
        mov	[edi+12],edx

	dec	ecx
        jz      @@ToEnd
        
        rol	eax,1        
        cdq			; extend 6th bit to DX        
        mov	[edi+16],edx
        mov	[edi+20],edx

	dec	ecx
        jz      @@ToEnd        
        
        rol	eax,1        
        cdq			; extend 5th bit to DX        
        mov	[edi+24],edx
        mov	[edi+28],edx

	dec	ecx
        jz      @@ToEnd                

        rol	eax,1        
        cdq			; extend 4th bit to DX
        mov	[edi+32],edx
        mov	[edi+36],edx

	dec	ecx
        jz      @@ToEnd
        
        
        rol	eax,1        
        cdq			; extend 3rd bit to DX
        mov	[edi+40],edx
        mov	[edi+44],edx

	dec	ecx
        jz      @@ToEnd
        
        rol	eax,1        
        cdq			; extend 2nd bit to DX
        mov	[edi+48],edx
        mov	[edi+52],edx

	dec	ecx
        jz      @@ToEnd        
        
        rol	eax,1        
        cdq			; extend 1st bit to DX
        mov	[edi+56],edx
        mov	[edi+60],edx

        inc	esi
        add	edi,64        
        
	dec	ecx
	jnz	@@octet

@@ToEnd:
        ret                     ; _cdecl return

Conv1_64 endp


;*************************************************************************************
;*************************************************************************************


;void Conv4_1(BYTE *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv4_1
Conv4_1 proc \
        uses esi, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     edx,[Dest]     ; di = destination pointer (es=segment part)
        or	edx,edx
        jz	@@ToEnd
        mov     esi,[Src]      ; di = source pointer
        or	esi,esi
        jz	@@ToEnd
        
        mov     ecx,[count]     ; cx=amount of pixels
	sub	ecx,8		; array has zero, or small size
        jb	@@LastOct

@@NextOct:
        mov	al,[esi]
        shl	ax,1
        shl	al,3
        shl	ax,1
	mov	al,[esi+1]
        shl	ax,1
        shl	al,3
        shl	ax,1        
        mov	al,[esi+2]
        shl	ax,1
        shl	al,3
        shl	ax,1        
        mov	al,[esi+3]
        shl	ax,1
        shl	al,3        
        shl	ax,1

	mov	[edx],ah
	inc	edx
	add	esi,4
	sub	ecx,8
        jae	@@NextOct
        
@@LastOct:add	ecx,8
	jz	@@ToEnd
        
        cld        
        mov	ah,1		; add end byte mark
@@PIXEL:	lodsb
	rol	al,1		; copy the highest bit to CY
	rcl	ah,1		; transfer bit from CY to AH
	
	dec	ecx		; 2nd nibble
	jz	@@First1
	rol	al,4		; copy original 4th bit to CY
	rcl	ah,1		; transfer bit from CY to AH
			
	jnc	No@@octet			
	mov	[edx],ah	; Full 8 bits finished, 1 travelled to CY.
	inc	edx
	mov	ah,1		; add end byte mark
	loop	@@PIXEL
	jmp	@@ToEnd		; all done here
	
No@@octet:loop	@@PIXEL

@@First1:	sal	ah,1		; shift must be finished to 8th bit
	jnc	@@First1
	mov	[edx],ah	; store last incomplete byte

@@ToEnd:
        ret                     ; _cdecl return
                
Conv4_1 endp


;*************************************************************************************


;void Conv4_8(BYTE *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv4_8
Conv4_8 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di = destination pointer (es=segment part)
        mov     esi,[Src]      ; di = source pointer
        
        cld

@@PIXEL:	lodsb
	mov	ah,al
	mov	dx,ax		; 21 21
	rol	ax,4		; 12 12
	and	dx,00FF0h	;  2 1
	and	ax,0F00Fh	; 2   1
	or	ax,dx
	sub	ecx,2
	jb	@@ToEndStor1
	stosw
	jnz	@@PIXEL

@@ToEnd:
        ret                     ; _cdecl return
        
@@ToEndStor1:			; one remaining byte needs to be stored
	stosb
	ret        
                
Conv4_8 endp


;*************************************************************************************


;void Conv4_16(WORD *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv4_16
Conv4_16 proc \
        uses edi esi, \
        Dest:ptr word, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di = destination pointer (es=segment part)
        mov     esi,[Src]      ; di = source pointer
        
        cld        
        
@@PIXEL:	lodsb
	mov	ah,al
	mov	dx,ax
	sal	eax,16
	mov	ax,dx
	mov	edx,eax		; 21 21 21 21
	
	rol	eax,4		; 12 12 12 12
	and	edx,00F0FF0F0h	;  2  2 1  1
	and	eax,0F0F00F0Fh	; 1  1   2  2
	or	eax,edx
	sub	ecx,2
	jb	@@ToEndStor1	; only 1 pixel is remaining
	stosd
	jnz	@@PIXEL

@@ToEnd:
        ret                     ; _cdecl return
        
@@ToEndStor1:
	stosw
        ret                     ; _cdecl return        
                
Conv4_16 endp


;*************************************************************************************


;void Conv4_24(BYTE *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv4_24
Conv4_24 proc \
        uses edi esi, \
        Dest:ptr word, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di = destination pointer (es=segment part)
        mov     esi,[Src]      ; di = source pointer
        
        cld        
        
@@PIXEL:	lodsb
	mov	ah,al
	mov	dx,ax
	sal	eax,16
	mov	ax,dx
	mov	edx,eax		; 21 21 21 21
	
	rol	eax,4		; 12 12 12 12
	and	edx,00F0FF0F0h	;  2  2 1  1
	and	eax,0F0F00F0Fh	; 1  1   2  2
	or	eax,edx
	mov	edx,eax
	sub	ecx,2
	jb	@@ToEndStor1	; only 1 pixel is remaining
	stosw			; 2*lower pix
	ror	eax,8
	stosw			; lower pix & upper pix
	ror	eax,8
	stosw			; 2*upper pix
	jnz	@@PIXEL

@@ToEnd:
        ret                     ; _cdecl return
        
@@ToEndStor1:
	stosw
	stosb
        ret                     ; _cdecl return        
                
Conv4_24 endp


;*************************************************************************************


;void Conv4_32(DWORD *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv4_32
Conv4_32 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di = destination pointer (es=segment part)
        mov     esi,[Src]      ; di = source pointer
        
        cld
        
@@PIXEL:	lodsb
	mov	ah,al
	mov	dx,ax
	sal	eax,16
	mov	ax,dx
	mov	edx,eax		; 21 21 21 21
	
	rol	eax,4		; 12 12 12 12
	and	edx,00F0FF0F0h	;  2  2 1  1
	and	eax,0F0F00F0Fh	; 1  1   2  2
	or	eax,edx
	mov	edx,eax
	rol	eax,16
	xchg	ax,dx	
	stosd
	
	mov	eax,edx		; 2nd pixel		
	sub	ecx,2
	jb	@@ToEnd
	stosd			; prezerves ZF
	jnz	@@PIXEL

@@ToEnd:
        ret                     ; _cdecl return
                
Conv4_32 endp



;*************************************************************************************


;void Conv4_64(DWORD *Dest, const BYTE *Src, unsigned Size1D)
        public  Conv4_64
Conv4_64 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di = destination pointer (es=segment part)
        mov     esi,[Src]      ; di = source pointer
        
        cld
        
@@PIXEL:	lodsb
	mov	ah,al
	mov	dx,ax
	sal	eax,16
	mov	ax,dx
	mov	edx,eax		; 21 21 21 21
	
	rol	eax,4		; 12 12 12 12
	and	edx,00F0FF0F0h	;  2  2 1  1
	and	eax,0F0F00F0Fh	; 1  1   2  2
	or	eax,edx
	mov	edx,eax
	rol	eax,16
	xchg	ax,dx	
	stosd
	stosd
	
	mov	eax,edx		; 2nd pixel		
	sub	ecx,2
	jb	@@ToEnd
	stosd			; prezerves ZF
	stosd
	jnz	@@PIXEL

@@ToEnd:
        ret                     ; _cdecl return
                
Conv4_64 endp




;*************************************************************************************
;*************************************************************************************

        public  Conv8_1
Conv8_1 proc \
        uses esi, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     edx,[Dest]     ; di=first pointer (es=segment part)
        or	edx,edx
        jz	@@ToEnd
        mov     esi,[Src]      ;
        or	esi,esi
	jz	@@ToEnd
        mov     ecx,[count]     ; cx=amount of pixels
        
	sub	ecx,8
        jb	@@LastOct
@@NextOct:mov	al,[esi]
        shl	ax,1
        mov	al,[esi+1]
        shl	ax,1
	mov	al,[esi+2]
        shl	ax,1
        mov	al,[esi+3]
        shl	ax,1
        mov	al,[esi+4]
        shl	ax,1
        mov	al,[esi+5]
        shl	ax,1
	mov	al,[esi+6]
        shl	ax,1
        mov	al,[esi+7]
        shl	ax,1

	mov	[edx],ah
	inc	edx
	add	esi,8
	sub	ecx,8        
        jae	@@NextOct
        
@@LastOct:add	ecx,8		; zero count or all full @@octets exhausted
	jz	@@ToEnd

@@PIXEL1: mov	ah,1		; add end byte mark
@@PIXEL:	mov	al,[esi]
	inc	esi
	rcl	ax,1		; copy the highest bit to AH.
	jc	@@octet
	dec	ecx
	jnz	@@PIXEL

@@First1:	sal	ah,1		; shift must be finished to 8th bit, feed 0
	jnc	@@First1
	mov	[edx],ah	; store last incomplete byte
	jmp	@@ToEnd		; all done here
	
@@octet:	mov	[edx],ah
	inc	edx
	loop	@@PIXEL1	
@@ToEnd:  ret                     ; _cdecl return		
                
Conv8_1 endp


;*************************************************************************************


        public  Conv8_4
Conv8_4 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	lodsb			; load 1st byte
	and	al,0F0h
	
	dec	ecx
	jnz	@@NIBBLE2
	stosb			;store incomplete nibble
	jmp	@@ToEnd

@@NIBBLE2:mov	ah,al
        lodsb			; load 2nd byte
        and	al,0F0h
	ror	al,4
	or	al,ah
	stosb			;store both nibbles
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv8_4 endp



;*************************************************************************************


        public  Conv8_16
Conv8_16 proc \
        uses edi esi, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
	sub	ecx,4
        jl	@@PIXEL1
       
@@PIXEL4:	lodsd			; pixels 1,2,3,4
        mov	edx,eax
        
        mov	al,dh
        sal	eax,16
        mov	ah,dl
        mov	al,dl
        stosd			; converted pixel 1 & 2
        
        shr	edx,16
        mov	al,dh
        mov	ah,dh
        sal	eax,16
        mov	ah,dl
        mov	al,dl
        stosd			; converted pixel 3 & 4

	sub	ecx,4
        jae	@@PIXEL4

@@PIXEL1: add	ecx,4
        jz	@@ToEnd		; array has zero size 
@@PIXEL:	lodsb
	mov	ah,al
	stosw
	loop	@@PIXEL
        
@@ToEnd:	ret			; _cdecl return
                
Conv8_16 endp


;*************************************************************************************

;void Conv8_24(BYTE *Dest, const BYTE *Src, unsigned Size1D);
        public  Conv8_24
Conv8_24 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	lodsb	
	stosb
	stosb
	stosb
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv8_24 endp


;*************************************************************************************

        public  Conv8_32
Conv8_32 proc \
        uses edi esi ebx, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
       
        mov	ebx, 01010101h 
        sub	ecx,4
        jl	@@PIXEL1
        
        push	ebp
        mov	ebp,ebx		; 01010101h 
@@PIXEL4:	mov	ebx,dword ptr [esi]
	add	esi,4
	
	movzx	eax,bl
	mul	ebp	
	shr	ebx,8
	mov	dword ptr [edi],eax
	
	movzx	eax,bl
	mul	ebp
	shr	ebx,8
	mov	dword ptr [edi+4],eax
	
	movzx	eax,bl
	mul	ebp
	shr	ebx,8
	mov	dword ptr [edi+8],eax
	
	movzx	eax,bl
	mul	ebp
	shr	ebx,8
	mov	dword ptr [edi+12],eax        

	add	edi,16
	sub	ecx,4
        jae	@@PIXEL4
        
        mov	ebx,ebp
        pop	ebp        
        
@@PIXEL1: add	ecx,4
        jz	@@ToEnd		; array has zero size or transfer done
        cld
@@PIXEL:	movzx	eax,byte ptr[esi]
	inc	esi
        mul	ebx
        stosd
        loop	@@PIXEL
        
@@ToEnd:	ret                     ; _cdecl return
                
Conv8_32 endp


;*************************************************************************************

;void Conv8_64(QWORD *Dest, const BYTE *Src, unsigned Size1D);
        public  Conv8_64
Conv8_64 proc \
        uses edi esi ebx, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
	mov	ebx, 1010101h
@@PIXEL:	movzx	eax,byte ptr [esi]
	inc	esi
	mul	ebx		; EDX is cleared!
	stosd
	stosd
	loop	@@PIXEL        
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv8_64 endp


;*************************************************************************************
;*************************************************************************************

        public  Conv16_1
Conv16_1 proc \
        uses esi, \
        Dest:ptr byte, \
        Src:ptr word, \
        count:DWORD

        mov     edx,[Dest]     ; di=first pointer (es=segment part)
        or	edx,edx
        jz	@@ToEnd
        mov     esi,[Src]      ;
        or	esi,esi
        jz	@@ToEnd
        inc	esi
	mov     ecx,[count]     ; cx=amount of pixels
	
	sub	ecx,8
        jb	@@LastOct	
@@NextOct:mov	al,[esi]
        rcl	ax,1
        mov	al,[esi+2]
        rcl	ax,1
	mov	al,[esi+4]
        rcl	ax,1
        mov	al,[esi+6]
        rcl	ax,1
        mov	al,[esi+8]
        rcl	ax,1
        mov	al,[esi+10]
        rcl	ax,1
	mov	al,[esi+12]
        rcl	ax,1
        mov	al,[esi+14]
        rcl	ax,1
	mov	[edx],ah
	inc	edx
	add	esi,16
	sub	ecx,8
        jae	@@NextOct
        
@@LastOct:add	ecx,8
	jz	@@ToEnd

@@PIXEL1: mov	ah,1		; add end byte mark
@@PIXEL:	mov	al,[esi]
	add	esi,2
	rcl	ax,1		; copy the highest bit to AH; transfer stop bit to CY
	jc	@@octet
	dec	ecx
	jnz	@@PIXEL
				; Loop finished
@@First1:	sal	ah,1		; shift must be finished to 8th bit
	jnc	@@First1
	mov	[edx],ah	; store last incomplete byte
	jmp	@@ToEnd
	
@@octet:	mov	[edx],ah	; Whole @@octet is completed
	inc	edx
	loop	@@PIXEL1
@@ToEnd:
        ret                     ; _cdecl return
                
Conv16_1 endp



;*************************************************************************************


        public  Conv16_4
Conv16_4 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr word, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	inc	esi
	lodsb			; load 1st hi byte
	and	al,0F0h
	
	dec	ecx
	jnz	@@NIBBLE2
	stosb			;store incomplete nibble
	jmp	@@ToEnd

@@NIBBLE2:mov	ah,al
	inc	esi
        lodsb			; load 2nd byte
        and	al,0F0h
	ror	al,4
	or	al,ah
	stosb			;store both nibbles
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv16_4 endp



;*************************************************************************************


        public  Conv16_8
Conv16_8 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr word, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
        
	sub	ecx,4
        jl	CLASSIC
        
@@PIXEL4: mov	eax,[esi]	;		[2-1-]
	mov	edx,[esi+4]	;		[4-3-]
	add	esi,8
	mov	al,dh		; pixel 3	[2-13]
	mov	dl,ah		; pixel 1	[4-31]
	
	rol	edx,8		;		[-314]
	mov	ah,dl		; pixel 4	[2-43]
	
	ror	eax,16		;		[432-]
	mov	al,dh		; pixel 1	[4321]
	stosd	
        
        sub	ecx,4
        jae	@@PIXEL4        
        
CLASSIC:add	ecx,4
	jz	@@ToEnd

@@PIXEL:	lodsw
	mov	al,ah
	stosb
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv16_8 endp


;*************************************************************************************


        public  Conv16_24
Conv16_24 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr WORD, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	lodsw
	mov	[edi],ah
	inc	edi
	stosb
	mov	[edi],ah
	inc	edi	
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv16_24 endp


;*************************************************************************************

        public  Conv16_32
Conv16_32 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	lodsw
	mov	dx,ax
	rol	eax,16
	mov	ax,dx
	stosd
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv16_32 endp

;*************************************************************************************

        public  Conv16_64
Conv16_64 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	lodsw
	mov	dx,ax
	rol	eax,16
	mov	ax,dx
	stosd
	stosd
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv16_64 endp


;*************************************************************************************
;*************************************************************************************


        public  Conv24_1
Conv24_1 proc \
        uses esi, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     edx,[Dest]     ; di=first pointer (es=segment part)
        or	edx,edx
        jz	@@ToEnd        
        mov     esi,[Src]      ;
        or	esi,esi
        jz	@@ToEnd
        add	esi,2
        
	sub	ecx,8
        jb	@@LastOct	
@@NextOct:mov	al,[esi]
        rcl	ax,1
        mov	al,[esi+3]
        rcl	ax,1
	mov	al,[esi+6]
        rcl	ax,1
        mov	al,[esi+9]
        rcl	ax,1
        mov	al,[esi+12]
        rcl	ax,1
        mov	al,[esi+15]
        rcl	ax,1
	mov	al,[esi+18]
        rcl	ax,1
        mov	al,[esi+21]
        rcl	ax,1
	mov	[edx],ah
	inc	edx
	add	esi,24
	sub	ecx,8
        jae	@@NextOct
        
@@LastOct:add	ecx,8
	jz	@@ToEnd        

@@PIXEL1: mov	ah,1		; add end byte mark
@@PIXEL:	mov	al,[esi]
	add	esi,3
	rcl	ax,1		; copy the highest bit to AH.
	jc	@@octet
	dec	ecx
	jnz	@@PIXEL

@@First1:	sal	ah,1		; shift must be finished to 8th bit, feed 0
	jnc	@@First1
	mov	[edx],ah	; store last incomplete byte
	jmp	@@ToEnd		; all done here
	
@@octet:	mov	[edx],ah
	inc	edx
	loop	@@PIXEL1	
@@ToEnd:  ret                     ; _cdecl return		
                
Conv24_1 endp


;*************************************************************************************


        public  Conv24_4
Conv24_4 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
        add	esi,2
@@PIXEL:	mov	al,[esi]		; load 1st byte
	and	al,0F0h
	
	dec	ecx
	jnz	@@NIBBLE2
	stosb			;store incomplete nibble
	jmp	@@ToEnd

@@NIBBLE2:mov	ah,al
        mov	al,[esi+3]	; load 2nd byte
        and	al,0F0h
	ror	al,4
	or	al,ah
	stosb			;store both nibbles
	add	esi,6
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv24_4 endp


;*************************************************************************************

        public  Conv24_8
Conv24_8 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=destination pointer (es=segment part)
        mov     esi,[Src]      ; si=source pointer 
        
        add	esi,2
        cld
@@PIXEL:	mov	al,[esi]
        add	esi,3	
	stosb
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv24_8 endp


;*************************************************************************************

        public  Conv24_16
Conv24_16 proc \
        uses edi esi, \
        Dest:ptr word, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=destination pointer (es=segment part)
        mov     esi,[Src]      ; si=source pointer 
        
        inc	esi
        cld
@@PIXEL:	mov	al,[esi]
	inc	esi
	mov	ah,[esi]
        add	esi,2	
	stosw
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv24_16 endp


;*************************************************************************************

        public  Conv24_32
Conv24_32 proc \
        uses edi esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=destination pointer (es=segment part)
        mov     esi,[Src]      ; si=source pointer 

        cld
        
@@PIXEL:	lodsw
	shl	eax,16		; B2 B1 x x
	lodsb			; B2 B1 x B3
	mov	ah,al		; B2 B1 B3 B3  duplicate last 8 bits
	ror	eax,8		; B3 B2 B1 B3

	stosd
	
	dec	ecx
	jz	@@ToEnd
	
	lodsb			; x x x B1
	ror	eax,8		; B1 x x x
	lodsw			; B1 x B3 B2
	ror	eax,8		; B2 B1 x B3
	mov	ah,al		; B2 B1 B3 B3	
	ror	eax,8		; B3 B2 B1 B3	
	stosd
	
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv24_32 endp



        public  Conv24_64
Conv24_64 proc \
        uses edi esi, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=destination pointer (es=segment part)
        mov     esi,[Src]      ; si=source pointer 

        cld
        
@@PIXEL:	lodsw			;  x x B2 B1
	mov	dh,ah
	shl	eax,16		; B2 B1 x x
	mov	ah,[esi]	; B2 B1 B3 x
	mov	al,dh		; B2 B1 B3 B2  << Stored lo DWORD
	mov	dl,ah
	inc	esi
	
	stosd
	rol	eax,16		; B3 B2 B2 B1
	mov	ah,al		; B3 B2 B1 B1
	mov	al,dl		; B3 B2 B1 B3  << Stored hi DWORD
	stosd

	dec	ecx
	jz	@@ToEnd
	
	lodsb			; x x x B1
	ror	eax,8		; B1 x x x
	lodsw			; B1 x B3 B2
	mov	dl,al		; B2	
	ror	eax,8		; B2 B1 x B3
	mov	ah,al		; B2 B1 B3 B3
	mov	al,dl		; B2 B1 B3 B2
	mov	dh,ah		; B3	
	stosd
	
	rol	eax,16		; B3 B2 B2 B1
	mov	ah,al		; B3 B2 B1 B1
	mov	al,dh		; B3 B2 B1 B3
	stosd
	
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv24_64 endp


;*************************************************************************************
;*************************************************************************************


        public  Conv32_1
Conv32_1 proc \
        uses esi, \
        Dest:ptr dword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     edx,[Dest]     ; di=first pointer (es=segment part)
        or	edx,edx
        jz	@@ToEnd
        mov     esi,[Src]      ;
        or	esi,esi
	jz	@@ToEnd
        add	esi,3
        
	sub	ecx,8
        jb	@@LastOct	
@@NextOct:mov	al,[esi]
        rcl	ax,1
        mov	al,[esi+4]
        rcl	ax,1
	mov	al,[esi+8]
        rcl	ax,1
        mov	al,[esi+12]
        rcl	ax,1
        mov	al,[esi+16]
        rcl	ax,1
        mov	al,[esi+20]
        rcl	ax,1
	mov	al,[esi+24]
        rcl	ax,1
        mov	al,[esi+28]
        rcl	ax,1
	mov	[edx],ah
	inc	edx
	add	esi,32
	sub	ecx,8
        jae	@@NextOct
        
@@LastOct:add	ecx,8
	jz	@@ToEnd        

@@PIXEL1: mov	ah,1		; add end byte mark        
@@PIXEL:	mov	al,[esi]
	add	esi,4
	rcl	ax,1		; Propagate highest bit to AH, wait for end flagin CY.
	jc	@@octet
	dec	ecx
	jnz	@@PIXEL
	
@@First1:	sal	ah,1		; shift must be finished to 8th bit
	jnc	@@First1
	mov	[edx],ah	; store last incomplete byte
	jmp	@@ToEnd
	
@@octet:	mov	[edx],ah
	inc	edx
	loop	@@PIXEL1
				; all done here
@@ToEnd:	ret                     ; _cdecl return
                
Conv32_1 endp


;*************************************************************************************


        public  Conv32_4
Conv32_4 proc \
        uses edi esi, \
        Dest:ptr byte, \
        Src:ptr dword, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	add	esi,3
	lodsb			; load 1st hi byte
	and	al,0F0h
	
	dec	ecx
	jnz	@@NIBBLE2
	stosb			;store incomplete nibble
	jmp	@@ToEnd

@@NIBBLE2:mov	ah,al
	add	esi,3
        lodsb			; load 2nd hi byte
        and	al,0F0h
	ror	al,4
	or	al,ah
	stosb			;store both nibbles
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv32_4 endp



;*************************************************************************************


        public  Conv32_8
Conv32_8 proc \
        uses edi esi, \
        Dest:ptr qword, \
        Src:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld        
	sub	ecx,4
        jb	@@PIXEL1

@@PIXEL4:	mov	ah,byte ptr[esi+15]
	mov	al,byte ptr[esi+11]
	shl	eax,16
	mov	ah,byte ptr[esi+7]
	mov	al,byte ptr[esi+3]
	stosd
	add	esi,16
	sub	ecx,4
        jae	@@PIXEL4
	
@@PIXEL1: add	ecx,4
	jz	@@ToEnd
        
@@PIXEL:	add	esi,3
	movsb
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv32_8 endp


;*************************************************************************************


        public  Conv32_16
Conv32_16 proc \
        uses edi esi, \
        Dest:ptr word, \
        Src:ptr dword, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	add	esi,2
	movsw
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv32_16 endp


;*************************************************************************************


        public  Conv32_24
Conv32_24 proc \
        uses edi esi, \
        Dest:ptr word, \
        Src:ptr dword, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     esi,[Src]      ;
        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        
        cld

@@PIXEL:	lodsd

	shr	eax,8
	mov	[edi],ax
	add	edi,2
	shr	eax,8
	mov	[edi],ah
	inc	edi

	dec	ecx
	jz	@@ToEnd
	
	lodsd
	shr	eax,8
	mov	[edi],al
	inc	edi
	shr	eax,8
	mov	[edi],ax
        add	edi,2		

	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv32_24 endp


;*************************************************************************************


        public  Conv32_64
Conv32_64 proc \
        uses edi esi, \
        Dest:ptr qword, \
        Src:ptr dword, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]     ; di=first pointer (es=segment part)
        mov     esi,[Src]      ;
        
        cld
@@PIXEL:	lodsd
	stosd
	stosd
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv32_64 endp


;*************************************************************************************


        public  Conv64_32
Conv64_32 proc \
        uses edi esi, \
        Dest:ptr qword, \
        Src:ptr dword, \
        count:DWORD

        mov     ecx,[count]	; cx=amount of pixels
        jecxz	@@ToEnd		; array has zero size

        mov     edi,[Dest]	; di=first pointer (es=segment part)
        mov     esi,[Src]	;

	add	esi,4    
        cld
@@PIXEL:	mov	eax,[esi]
	add	esi,8
	stosd
	loop	@@PIXEL
        
@@ToEnd:
        ret                     ; _cdecl return
                
Conv64_32 endp



;########################################################################################
;########################################################################################
;########################################################################################

	public  Flip1
Flip1	proc \
        uses esi, \
        Data:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     esi,[Data]	; di=first pointer (es=segment part)
        
        cmp	ecx,1
        jle	@@ToEnd		; ignore values 0 and 1

	xor	eax,eax
	dec	ecx        
        shr	ecx,3
        jz	@@LastByte

	add	ecx,esi
		
	xor	edx,edx
@@PIXEL:	mov	al,[ecx]
	mov	dl,[esi]
	mov	al,[swap_bits_xlat+EAX]
	mov	dl,[swap_bits_xlat+EDX]
	mov	[ecx],dl
	mov	[esi],al
	dec	ecx
	inc	esi
	cmp	esi,ecx
	jl	@@PIXEL		
	jne	@@ToEnd8
@@LastByte:mov	al,[esi]
	mov	al,[swap_bits_xlat+EAX]
	mov	[esi],al
	
@@ToEnd8:	mov     ecx,[count]
	mov	edx,ecx
	mov	esi,[Data]
	and	cl,7
	jz	@@ToEnd		; no shift needed
	xor	cl,7
	inc	cl
	
	mov	ch,[esi]	; prepare first byte
	shr	edx,3
	jz	@@LastShift

@@LoopShift:
	mov	al,ch
	mov	ah,[esi+1]
	mov	ch,ah
	rol	ax,cl
	mov	[esi],al
	inc	esi
	dec	edx
	jnz	@@LoopShift
	
@@LastShift: shl	ch,cl
	mov	[esi],ch
	
@@ToEnd:
	ret			; _cdecl return
Flip1	endp


;########################################################################################


	public  Flip2
Flip2	proc \
        uses esi ebx, \
        Data:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        mov     esi,[Data]	; di=first pointer (es=segment part)
        
        cmp	ecx,1
        jle	@@ToEnd		; ignore values 0 and 1

	xor	eax,eax
	dec	ecx        
        shr	ecx,2
        jz	@@LastByte

	add	ecx,esi
		
	xor	ebx,ebx
@@PIXEL:	mov	al,[ecx]
	mov	bl,[esi]
	mov	al,[swap_bits2_xlat+EAX]
	mov	bl,[swap_bits2_xlat+EBX]
	mov	[ecx],bl
	mov	[esi],al
	dec	ecx
	inc	esi
	cmp	esi,ecx
	jl	@@PIXEL		
	jne	@@ToEnd8
@@LastByte:mov	al,[esi]
	mov	al,[swap_bits2_xlat+EAX]
	mov	[esi],al
	
@@ToEnd8:	mov     ecx,[count]
	mov	ebx,ecx
	mov	esi,[Data]
	sal	cl,1
	and	cl,7
	jz	@@ToEnd		; no shift needed
	xor	cl,7
	inc	cl
	
	mov	ch,[esi]	; prepare first byte
	shr	ebx,2
	jz	@@LastShift

@@LoopShift:
	mov	al,ch
	mov	ah,[esi+1]
	mov	ch,ah
	rol	ax,cl
	mov	[esi],al
	inc	esi
	dec	ebx
	jnz	@@LoopShift
	
@@LastShift: shl	ch,cl
	mov	[esi],ch
	
@@ToEnd:
	ret			; _cdecl return
Flip2	endp



;*************************************************************************************


	public  Flip4
Flip4	proc \
        uses edi esi, \
        Data:ptr byte, \        
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        cmp	ecx,1		; 1 or less pixels makes no sense to flip
        jle	@@ToEnd		; array has zero size

        mov     edi,[Data]	; di=first pointer (es=segment part)
        mov     esi,edi	;
        
        shr	ecx,1		; divide 2       
        jc	PixOdd
        
	add	esi,ecx		; pixel count is even
        dec	esi
        
        cmp	edi,esi
        je	LastNibble	; This can occur for size=2.
LoopEven:mov	al,[edi]	; Process first byte with nibbles
	rol	al,4		; this shift flips nibbles	

	mov	dl,[esi]	; Process second byte with nibbles
	rol	dl,4

	mov	[esi],al
	mov	[edi],dl

	dec	esi
	inc	edi
	cmp	edi,esi
	jb	LoopEven	; esi<edi
	jne	@@ToEnd		; No one byte nible needs to be flipped.
LastNibble:
	mov	al,[edi]
	rol	al,4
	mov	[edi],al	; Last byte needs to flip nibbles.	
	jmp	@@ToEnd

                
PixOdd:	add	esi,ecx		; pixel count is odd i.e. >=3.
	mov	dl,[esi]
LoopOddD:mov	dh,dl
	
	mov	al,[edi]
	mov	ah,al
		
	and	ax,0F00Fh	; nibbleA1    0       0      nibbleA0
	and	dx,0FF0h	;    0     nibbleB0 nibbleB1    0
		
	or	ax,dx		; nibbleA1 nibbleB0 nibbleB1 nibbleA0
	;mov	[edi],al	; nibble 1 flipped with nibble n - no need to store here	
	mov	[esi],ah	; nibble n flipped with nibble 1

	dec	esi
	cmp	esi,edi
	je	@@ToEndStore
	
	mov	ah,al		; contained in [edi]
	and	ax,0F00Fh	
	
	mov	dl,[esi]
	mov	dh,dl
	and	dx,0FF0h
	
	or	dx,ax
	mov	[edi],dh	; nibble 2 flipped with nibble n-1		
	;mov	[esi],dl	; nibble n-1 flipped with nibble 2; no need to store here.

	inc	edi	
	cmp	edi,esi
	jb	LoopOddD
	mov	[esi],dl	; after loop exit realise lazy store.
@@ToEnd:
        ret                     ; _cdecl return
        
@@ToEndStore:
	mov	[edi],al	; nibble 1 flipped with nibble n	
        ret                     ; _cdecl return
                
Flip4 endp


;*************************************************************************************


	public  Flip8
Flip8	proc \
        uses esi, \
        Data:ptr byte, \
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels        

        mov     esi,[Data]	; di=first pointer (es=segment part)
        
        cmp	ecx,2
        jle	PxSize2		; ignore values 0 and 1

        test	ecx,1
	jz	PxOp16s		; Test for even value.

			; Classic pixel exchange that works for odd and even values.
	add	ecx,esi
	dec	ecx
@@PIXEL:	mov	al,[ecx]
	mov	ah,[esi]
	mov	[ecx],ah
	mov	[esi],al
	dec	ecx
	inc	esi
	cmp	esi,ecx
	jl	@@PIXEL
@@ToEnd:
	ret			; _cdecl return

PxOp16s:test	ecx,2
	jz	PxOp32s	
	add	ecx,esi
	sub	ecx,2
			; Optimised word loop for even 'x' only.
PxOp16L:mov	dx,[esi]
	mov	ax,[ecx]
	xchg	dl,dh
	xchg	al,ah
	mov	[ecx],dx
	mov	[esi],ax
	add	esi,2
	sub	ecx,2		; Loop entry point here is quite tricky. It alligns ESI to WORD boundary and fixes special case x=2.
	cmp	esi,ecx
	jl	PxOp16L
PxSize2:jnz	@@ToEnd2		; No middle WORD, bail out.

	mov	ax,[esi]	; Middle WORD must be also flipped.
	xchg	al,ah
	mov	[esi],ax
@@ToEnd2:	ret


PxOp32s:sub	ecx,4
	jz	PxSize4
	add	ecx,esi
PxOp32L:mov	edx,[esi]
	mov	eax,[ecx]
	bswap	edx
	bswap	eax
	mov	[ecx],edx
	mov	[esi],eax
	add	esi,4
	sub	ecx,4		; Loop entry point here is quite tricky. It alligns ESI to DWORD boundary and fixes special case x=2.
	cmp	esi,ecx
	jl	PxOp32L
	jnz	@@ToEnd3		; No middle DWORD, bail out.
	
PxSize4:mov	eax,[esi]	; Middle DWORD must be also flipped.
	bswap	eax	
	mov	[esi],eax
@@ToEnd3:	ret

Flip8 endp


;*************************************************************************************


	public  Flip16
Flip16	proc \
        uses edi esi, \
        Data:ptr byte, \        
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        sub	ecx,1
        jle	@@ToEnd		; ignore values 0 and 1

        mov     edi,[Data]	; di=first pointer (es=segment part)
        mov     esi,edi		;
        
        add	esi,ecx
        add	esi,ecx
                
@@PIXEL:	mov	ax,[edi]
	mov	cx,[esi]
	mov	[edi],cx
	mov	[esi],ax
	add	edi,2
	sub	esi,2
	cmp	edi,esi
	jl	@@PIXEL	

@@ToEnd:
        ret                     ; _cdecl return
                
Flip16 endp


;*************************************************************************************


	public  Flip24
Flip24	proc \
        uses edi esi, \
        Data:ptr byte, \        
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        cmp	ecx,1
        jle	@@ToEnd		; array has zero size

        mov     edi,[Data]	; di=first pointer (es=segment part)
        mov     esi,edi
        
        dec	ecx
        add	esi,ecx
        add	esi,ecx
        add	esi,ecx		; 3*(size-1)        
                
@@PIXEL:	mov	al,[edi]	; byte 1
        mov	cl,[esi]
        mov	[edi],cl
        mov	[esi],al
        
        inc	edi
        inc	esi
        mov	al,[edi]	; byte 2
        mov	cl,[esi]
        mov	[edi],cl
        mov	[esi],al
        
        inc	edi
        inc	esi
        mov	al,[edi]	; byte 3
        mov	cl,[esi]
        mov	[edi],cl
        mov	[esi],al
        
        inc	edi
        sub	esi,5		; move to previous pixel +2 needs to shift -3 ...  ofs -5
	
	cmp	edi,esi
	jb	@@PIXEL		; unsigned comparison	

@@ToEnd:
        ret                     ; _cdecl return
                
Flip24 endp


;*************************************************************************************


	public  Flip32
Flip32	proc \
        uses edi esi, \
        Data:ptr byte, \        
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        sub	ecx,1		; count - 1, set flags
        jle	@@ToEnd		; ignore values 0 and 1

        mov     edi,[Data]	; di=first pointer (es=segment part)
        mov     esi,edi		;
	
	shl	ecx, 2		; count*4 - 4
        add	esi,ecx        
                
@@PIXEL:	mov	eax,[edi]
	mov	ecx,[esi]
	mov	[edi],ecx
	mov	[esi],eax
	add	edi,4
	sub	esi,4
	cmp	edi,esi
	jl	@@PIXEL	

@@ToEnd:
        ret                     ; _cdecl return
                
Flip32	endp


;*************************************************************************************


	public  Flip64
Flip64	proc \
        uses edi esi, \
        Data:ptr byte, \        
        count:DWORD

        mov     ecx,[count]     ; cx=amount of pixels
        sub	ecx, 1		; count - 1, set flags
        jle	@@ToEnd		; ignore values 0 and 1

        mov     edi,[Data]	; edi=first pointer (es=segment part)
        mov     esi, edi
	
	shl	ecx, 3		; count*8 - 8
        add	esi,ecx        
       
@@PIXEL:       
       
if USE8087

	fild	QWORD ptr [edi]		; stack ->P0
	fild	QWORD ptr [esi]		; stack ->Pn; P0
	fistp	QWORD ptr [edi]		; stack <-Pn; P0
	fistp	QWORD ptr [esi]		; stack <-P0
	add	edi,8
	sub	esi,8

else	
	mov	eax,[edi]
	mov	ecx,[esi]
	mov	[edi],ecx
	mov	[esi],eax
	add	edi,4
	add	esi,4
	
	mov	eax,[edi]
	mov	ecx,[esi]
	mov	[edi],ecx
	mov	[esi],eax
	add	edi,4
	
	sub	esi,12		; +4 needs to shift -8 i.e. -12
endif

	cmp	edi,esi
	jl	@@PIXEL	
@@ToEnd:
        ret                     ; _cdecl return
                
Flip64	endp


;*************************************************************************************


;void Join1BitNStep(const uint8_t *Buffer1Bit, uint8_t *Buffer, unsigned count, uint16_t PlaneStep)
	public  Join1BitNStep
Join1BitNStep proc \
        uses edi esi ebx, \
        Buffer1Bit: ptr byte, \
        Buffer: ptr byte, \
        count:DWORD, \
        PlaneStep: WORD
        
	mov	edi, [Buffer1Bit]
	or	edi,edi
	jz	@@ToEnd
	mov	esi, [Buffer]
	or	esi,esi
	jz	@@ToEnd
	mov	edx,[count]
	or	edx,edx
	jz	@@ToEnd
	mov	bx,[PlaneStep]
	mov	cl,bl		; nth bit
	mov	bl,bh
	and	ebx,0FFh	; byte increment

	mov	ch,1
	shl	ch,cl		; OR mask
	mov	cl,ch
	not	cl		; AND mask

	mov	al,[edi]	; 1 bit datastream
	stc
	rcl	al,1		; Feed one abundant bit from CY. CY contains bit 8.
	jmp	@@BitLoop2

@@BitLoop:shl	al,1
@@BitLoop2:mov	ah,[esi]
	jc	SetBit
	and	ah,cl
        jmp	StorByte	
	
SetBit:	or	ah,ch
StorByte:mov	[esi],ah
	add	esi,ebx

	cmp	al,80h
	je	Inc1Bit
	dec	edx
	jnz	@@BitLoop
@@ToEnd:
        ret                     ; _cdecl return		

Inc1Bit:dec	edx
	jz	@@ToEnd

	inc	edi
	mov	al,[edi]	; Get a new byte from 1 bit datastream
	stc
	rcl	al,1
	jmp	@@BitLoop2


Join1BitNStep	endp



;void Peel8BitNStep(uint8_t *Buffer8Bit, const uint8_t *BufferSrc, unsigned count, uint8_t ByteStep)
	public  Peel8BitNStep
Peel8BitNStep proc \
        uses edi esi, \
        Data8Bit: ptr byte, \
        DataSrc: ptr byte, \
        count:DWORD, \
        ByteStep: BYTE
        
        mov	edi, [Data8Bit]
        or	edi,edi
        jz	@@ToEnd
        mov	esi, [DataSrc]
        or	esi,esi
        jz	@@ToEnd
        xor	edx,edx
        mov	dl, [ByteStep]
        mov	ecx, [count]
        jecxz	@@ToEnd

	cld
@@ByteLop:mov	al,[esi]
	add	esi,edx
	stosb
	loop	@@ByteLop
@@ToEnd:
        ret                     ; _cdecl return        
        
Peel8BitNStep endp



;void Peel1BitNStep(uint8_t *Buffer1Bit, const uint8_t *BufferSrc, unsigned count, uint16_t PlaneStep)
	public  Peel1BitNStep
Peel1BitNStep proc \
        uses edi esi ebx, \
        @@Buffer1Bit: ptr byte, \
        _BufferSrc: ptr byte, \
        count:DWORD, \
        PlaneStep: WORD
        
	mov	edi, [@@Buffer1Bit]
	or	edi,edi
	jz	@@ToEnd
	mov	esi, [_BufferSrc]
	or	esi,esi
	jz	@@ToEnd
	mov	edx,[count]
	or	edx,edx
	jz	@@ToEnd
	mov	bx,[PlaneStep]
	mov	cl,bl		; nth bit
	mov	bl,bh
	and	ebx,0FFh	; byte increment

	inc	cl

	cld
	
@@BitLoop1:mov	al,1
@@BitLoop:mov	ah,[esi]
	add	esi,ebx
	
	shr	ah,cl		; needed bit goes to CY
	rcl	al,1
	jc	StoreBy
	dec	edx	
	jnz	@@BitLoop

ShiftAll:sal	al,1		; incomplete bits should be shaped and stored.
	jnc	ShiftAll
	mov	[edi],al
	jmp	@@ToEnd
	
StoreBy:stosb			; store 8 bits
	dec	edx	
	jnz	@@BitLoop1

@@ToEnd:
        ret                     ; _cdecl return
                
Peel1BitNStep	endp


;void Join8BitNStep(const uint8_t *Buffer8Bit, uint8_t *Buffer, unsigned count, uint8_t ByteStep)
	public  Join8BitNStep
Join8BitNStep proc \
        uses edi esi, \
        @@Buffer8Bit: ptr byte, \
        Buffer: ptr byte, \
        count:DWORD, \
        ByteStep: BYTE
        
        mov	edi, [Buffer]
        or	edi,edi
        jz	@@ToEnd
        mov	esi, [@@Buffer8Bit]
        or	esi,esi
        jz	@@ToEnd
        xor	edx,edx
        mov	dl, [ByteStep]
        mov	ecx, [count]
        jecxz	@@ToEnd        

	cld
@@ByteLop:lodsb
	mov	[edi],al
	add	edi,edx
	loop	@@ByteLop
@@ToEnd:
        ret                     ; _cdecl return        
        
Join8BitNStep endp



        end
