extern _dc_colourmap
extern _dc_x
extern _dc_yl
extern _dc_yh
extern _dc_yfrac
extern _dc_ystep
extern _dc_source
extern _vb_depth
extern _ylookup
extern _columnofs
extern _loopcount
extern _pixelcount

section .data
global _mmxcomm
_mmxcomm: dd 0

%ifndef CODE_SECTION
%define CODE_SECTION .text
%endif
SECTION CODE_SECTION

;
; id's code. Probably optimal for 486.
; By id software, ported by ES 1998-08-04
;
; -ES- 1998/08/25 Fixed alignment

align 16
global _R_DrawColumn8_id
_R_DrawColumn8_id:
   push ebp
   push esi
   push edi
   push ebx
   mov ebp, [_dc_yl]
   mov ebx, ebp
   mov edi, [_ylookup]
   mov edi, [edi + ebx * 4]
   mov ebx, [_dc_x]
   mov eax, [_columnofs]
   add edi, [eax + ebx * 4]
   mov eax, [_dc_yh]
   inc eax
   sub eax, ebp
   mov [_pixelcount], eax
   js near _rdc8idone
global _rdc8ioffs1
_rdc8ioffs1:
   shr eax, 1
   mov [_loopcount], eax
        
   mov ebp, [_dc_yfrac]
   shl ebp, 9

   mov esi, [_dc_source]
        
   mov ebx, [_dc_ystep]
   shl ebx, 9
   mov [_rdc8ipatch1-4],ebx
global _rdc8ipatcher1
_rdc8ipatcher1:
   mov [_rdc8ipatch2-4],ebx
global _rdc8ipatcher2
_rdc8ipatcher2:
        
   mov ecx, ebp
   add ebp, ebx
   shr ecx, 25
   mov edx, ebp
   add ebp, ebx
   shr edx, 25
   mov eax, [_dc_colourmap]
   mov ebx, eax
   mov al, [esi + ecx]
   mov bl, [esi + edx]
   mov al, [eax]
   mov bl, [ebx]
        
   test dword [_pixelcount], 0xfffffffe
   jnz near _rdc8iloop
global _rdc8ioffs2
_rdc8ioffs2:
   jmp near _rdc8ichecklast
global _rdc8ioffs3
_rdc8ioffs3:

global _rdc8iloop
_rdc8iloop:
   mov ecx, ebp
   add ebp, 0xDeadBeef
global _rdc8ipatch1
_rdc8ipatch1:
   mov [edi], al
   shr ecx, 25
   mov edx, ebp
   add ebp, 0xDeadBeef
global _rdc8ipatch2
_rdc8ipatch2:
   mov [0xDeadBeef + edi], bl
global _rdc8iwidth1
_rdc8iwidth1:
   shr edx, 25
   mov al, [esi + ecx]
   add edi, 0xDeadBeef
global _rdc8iwidth2
_rdc8iwidth2:
   mov bl, [esi + edx]
   dec dword [_loopcount]
   mov al, [eax]
   mov bl, [ebx]
   jnz _rdc8iloop
        
global _rdc8ichecklast
_rdc8ichecklast:
   test dword [_pixelcount], 1
   jz _rdc8idone
   mov [edi], al
global _rdc8idone
_rdc8idone:
   pop ebx
   pop edi
   pop esi
   pop ebp
   ret

global _R_DrawColumn8_id_end
_R_DrawColumn8_id_end:
times 31 db 0


;
; id's code, further optimised by Erik. (-ES- 1998-08-04)"
;

ALIGN 16
GLOBAL _R_DrawColumn8_id_erik
_R_DrawColumn8_id_erik:
   push  ebp
   push  esi
   push  edi
   push  ebx
        mov  ebp,[_dc_yl]
        mov  ebx,ebp
        mov  edi,[_ylookup]
        mov  edi, [edi+ebx*4]
        mov  ebx,[_dc_x]
        mov  eax,[_columnofs]
        add  edi, [eax+ebx*4]

        mov  eax,[_dc_yh]
        inc  eax
        sub  eax,ebp
        mov [_pixelcount], eax
        js near _rdc8edone
GLOBAL _rdc8eoffs1
_rdc8eoffs1:
        shr  eax,1
        mov [_loopcount]. eax

        mov  ebp,[_dc_yfrac]
        shl  ebp,9

        mov  esi,[_dc_source]

        mov  ebx,[_dc_ystep]
        shl  ebx,9
        mov  [_rdc8epatch1-4], ebx
GLOBAL _rdc8epatcher1
_rdc8epatcher1:
        mov  ecx,ebx
        add  ecx,ecx
        mov [_rdc8epatch2-4], ecx
GLOBAL _rdc8epatcher2
_rdc8epatcher2:
        mov [_rdc8epatch3-4], esi
GLOBAL _rdc8epatcher3
_rdc8epatcher3:
        mov [_rdc8epatch4-4], esi
GLOBAL _rdc8epatcher4
_rdc8epatcher4:

        mov  ecx,ebp
        add  ebp,ebx
        shr  ecx,25
        mov  edx,ebp
        add  ebp,ebx
        shr  edx,25
        mov  eax,[_dc_colourmap]
        mov  ebx,eax
        mov  al, [esi + ecx]
        mov  bl, [esi + edx]
        mov  al, [eax]
        mov  bl, [ebx]
        mov  esi,[_loopcount]
        mov  ecx,ebp

        test  dword [_pixelcount], 0xfffffffe
        jnz near _rdc8eloop
GLOBAL _rdc8eoffs2
_rdc8eoffs2
        jmp near _rdc8echecklast
GLOBAL _rdc8eoffs3
_rdc8eoffs3:

GLOBAL _rdc8eloop
_rdc8eloop:
        mov  [edi],al
        lea  edx, [ebp+0xDeadBeef]
GLOBAL _rdc8epatch1
_rdc8epatch1:
        shr  ecx,25
        add  ebp,0xDeadBeef
GLOBAL _rdc8epatch2
_rdc8epatch2:
        mov  [edi+0xDeadBeef],bl
GLOBAL _rdc8ewidth1
_rdc8ewidth1:
        shr  edx,25
        add  edi,0xDeadBeef
GLOBAL _rdc8ewidth2
_rdc8ewidth2:
        mov  al, [ecx+0xDeadBeef]
GLOBAL _rdc8epatch3
_rdc8epatch3:
        mov  bl, [edx+0xDeadBeef]
GLOBAL _rdc8epatch4
_rdc8epatch4:
        mov  ecx,ebp
        dec  esi
        mov  al, [eax]
        mov  bl, [ebx]
        jnz _rdc8eloop

GLOBAL _rdc8echecklast
_rdc8echecklast:
        test  dword [_pixelcount], 1
        jz _rdc8edone
        mov  [edi],al
GLOBAL _rdc8edone
_rdc8edone:

   pop  ebx
   pop  edi
   pop  esi
   pop  ebp
   ret

GLOBAL _R_DrawColumn8_id_erik_end
_R_DrawColumn8_id_erik_end:
times 31 db 0

;
; Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
; By ES 1998/08/01
;

GLOBAL _R_DrawColumn8_pentium
_R_DrawColumn8_pentium:
        push  ebp
        push  ebx
        push  esi
        push  edi
        mov  eax,[_dc_yl]         ; Top pixel
        mov  ebx,[_dc_yh]         ; Bottom pixel
        mov  edi,[_ylookup]
        mov  ecx, [edi+ebx*4]
        sub  ebx,eax            ; ebx=number of pixels-1
        jl near rdc8pdone            ; no pixel to draw, done
        jnz rdc8pmany
        mov  edx,[_dc_x]          ; Special case: only one pixel
        mov  edi,[_columnofs]
        add  ecx, [edi+edx*4]   ; dest pixel at (%ecx)
        mov  esi,[_dc_ystep]
        mov  edi,[_dc_yfrac]  ; texture index in edi
        mov  edx,[_dc_colourmap]
        shr  edi,16
        mov  ebp,[_dc_source]
        and  edi,127
        mov dl, [edi + ebp]
        mov  al, [edx]          ; lookup for light
        mov  [ecx+0],al         ; write it
        jmp rdc8pdone           ; done!
ALIGN 4
rdc8pmany:                       ; draw >1 pixel
        mov  edx,[_dc_x]
        mov  edi,[_columnofs]
        mov  edx, [edi+edx*4]
        lea  edi, [edx + ecx + 0xDeadBeef]
GLOBAL _rdc8pwidth5
_rdc8pwidth5:   ; DeadBeef = -2*SCREENWIDTH
        mov  edx,[_dc_ystep]     ; edx = fracstep
        shl  edx,9              ; fixme: Should get 7.25 fix as input
        mov  ecx,[_dc_yfrac]  ; ecx = frac
        mov  eax,[_dc_colourmap]   ; eax = lighting/special effects LUT
        shl  ecx,9
        mov  esi,[_dc_source]     ; esi = source ptr

        imul  ebx,0DeadBeefh    ; ebx = negative offset to pixel
GLOBAL _rdc8pwidth6
_rdc8pwidth6:   ; DeadBeef = -SCREENWIDTH

; Begin the calculation of the two first pixels
        lea  ebp, [ecx + edx]
        shr  ecx,25
        mov  al, [esi + ecx]
        lea  ecx, [edx + ebp]
        shr  ebp,25
        mov  dl, [eax]

; The main loop
rdc8ploop: 
        mov al, [esi + ebp]
        lea ebp, [ecx + edx]

        shr  ecx,25                     ; shift frac 2
        mov  [edi + ebx + 0xDeadBeef], dl
GLOBAL _rdc8pwidth1
_rdc8pwidth1:   ; DeadBeef = 2*SCREENWIDTH

        mov  al, [eax]                  ; lookup 1

        mov  [edi + ebx + 0xDeadBeef], al
GLOBAL _rdc8pwidth2
_rdc8pwidth2:   ; DeadBeef = 3*SCREENWIDTH
        mov al, [esi + ecx]

        lea ecx, [ebp + edx]

        shr  ebp,25                     ; shift frac 3
        mov  dl, [eax]                  ; lookup 2

        add  ebx,0DeadBeefh             ; counter
GLOBAL _rdc8pwidth3
_rdc8pwidth3:   ; DeadBeef = 2*SCREENWIDTH
        jl rdc8ploop                    ; loop

; End of loop. Write extra pixel or just exit.
        jnz rdc8pdone
        mov [edi + ebx + 0xDeadBeef], dl
GLOBAL _rdc8pwidth4
_rdc8pwidth4:   ; DeadBeef = 2*SCREENWIDTH

rdc8pdone: 

        pop  edi
        pop  esi
        pop  ebx
        pop  ebp
        ret

;
; MMX asm version, optimised for K6
; By ES 1998/07/05
;

GLOBAL _R_DrawColumn8_mmx_k6
_R_DrawColumn8_mmx_k6:
        push  ebp
        push  ebx
        push  esi
        push  edi

        mov  eax,esp    ; Push 8 or 12, so that (%esp) gets aligned by 8
        and  eax,7
        add  eax,8
        mov  [_mmxcomm],eax   ; Temp storage in mmxcomm: (%esp) is used instead
        sub  esp,eax

        mov  edx,[_dc_yl]         ; Top pixel
        mov  ebx,[_dc_yh]         ; Bottom pixel
        mov  edi,[_ylookup]
        mov  ecx, [edi+ebx*4]
        sub  ebx,edx           ; ebx=number of pixels-1
        jl near _rdc8mdone            ; no pixel to draw, done
GLOBAL _rdc8moffs1
_rdc8moffs1:
        jnz rdc8mmany
        mov  eax,[_dc_x]          ; Special case: only one pixel
        mov  edi,[_columnofs]
        add  ecx, [edi+eax*4]    ; dest pixel at (%ecx)
        mov  esi,[_dc_ystep]
        mov  edi,[_dc_yfrac]  ; texture index in edi
        mov  edx,[_dc_colourmap]
        shr  edi,16
        mov  ebp,[_dc_source]
        and  edi,127
        mov dl, [edi + ebp]
        mov  al, [edx]   ; lookup for light
        mov  [ecx+0],al          ; write it
        jmp near _rdc8mdone           ; done!
GLOBAL _rdc8moffs2
_rdc8moffs2:
ALIGN 4, db 090h
rdc8mmany:                        ; draw >1 pixel
        mov  eax,[_dc_x]
        mov  edi,[_columnofs]
        mov  eax, [edi+eax*4]
        lea esi, [eax + ecx + 0xDeadBeef]
GLOBAL _rdc8mwidth3
_rdc8mwidth3:   ; DeadBeef = -2*SCREENWIDTH
        mov  ecx,[_dc_ystep]      ; ecx = fracstep
        shl  ecx,9              ; fixme: Should get 7.25 fix as input
        mov  eax,[_dc_yfrac]  ; eax = frac
        mov  edx,[_dc_colourmap]   ; edx = lighting/special effects LUT
        shl  eax,9
        lea  edi, [ecx + ecx]
        mov  ebp,[_dc_source]     ; ebp = source ptr
        mov  [esp+0],edi       ; Start moving frac and fracstep to MMX regs

        imul  ebx,0DeadBeefh     ; ebx = negative offset to pixel
GLOBAL _rdc8mwidth5
_rdc8mwidth5:   ; DeadBeef = -SCREENWIDTH

        mov  [esp+4],edi
        lea edi, [eax + ecx]
        movq mm1,[esp+0]       ; fracstep:fracstep in mm1
        mov  [esp+0],eax
        shr  eax,25
        mov  [esp+4],edi
        movzx  eax, byte [ebp+eax]
        movq mm0,[esp+0]       ; frac:frac in mm0

        paddd mm0,mm1
        shr  edi,25
        movq mm2,mm0
        psrld mm2,25            ; texture index in mm2
        paddd mm0,mm1
        movq [esp+0],mm2

GLOBAL _rdc8mloop
_rdc8mloop:                                       ; The main loop
        movq mm2,mm0                       ; move 4-5 to temp reg
        movzx  edi, byte [ebp+edi]                   ; read 1

        psrld mm2,25                            ; shift 4-5
        mov cl, [edx + eax]

        mov  eax, [esp+0]                       ; load 2
        add  ebx,0DeadBeefh             ; counter
GLOBAL _rdc8mwidth2
_rdc8mwidth2:   ; DeadBeef = 2*SCREENWIDTH

        mov  [esi + ebx], cl
        mov  ch, [edx + edi]

        mov  [esi + ebx + 0xDeadBeef], ch
GLOBAL _rdc8mwidth1
_rdc8mwidth1:   ; DeadBeef = SCREENWIDTH
        mov  edi, [esp+4]                       ; load 3

        paddd mm0,mm1                           ; frac 6-7
        movzx  eax, byte [ebp+eax]                   ; lookup 2

        movq [esp+0],mm2                     ; store texture index 4-5
        jl _rdc8mloop

        jnz rdc8mno_odd
        mov  cl, [edx + eax]
        mov  [esi+0DeadBeefh],cl
GLOBAL _rdc8mwidth4
_rdc8mwidth4:   ; DeadBeef = 2*SCREENWIDTH
rdc8mno_odd: 

GLOBAL _rdc8mdone
_rdc8mdone:
        emms

        add  esp,[_mmxcomm]
        pop  edi
        pop  esi
        pop  ebx
        pop  ebp
        ret

; Need some extra space to align run-time
GLOBAL _R_DrawColumn8_mmx_k6_end
_R_DrawColumn8_mmx_k6_end:
times 31 db 0

;
; DOSDoom's original assembler, by Chi Hoang
;

ALIGN 16
GLOBAL _R_DrawColumn8_chi
_R_DrawColumn8_chi:
   push  ebp
   push  esi
   push  edi
   push  ebx
        mov  edx,[_dc_yl]
        mov  eax,[_dc_yh]
        sub  eax,edx
        lea  ebx, [eax+1]
        test  ebx,ebx
        jle rdc8ndone
        mov  eax,[_dc_x]
        mov  edi,[_ylookup]
        mov  esi, [edi+edx*4]
        mov  edi,[_columnofs]
        add  esi, [edi+eax*4]
        mov  edi,[_dc_ystep]
        mov  ecx,[_dc_yfrac]

        mov  ebp,[_dc_source]
   xor  edx,edx
   sub  esi,0xDeadBeef
GLOBAL _rdc8nwidth1
_rdc8nwidth1:
ALIGN 4, db 90h
rdc8nloop:
        mov  eax,ecx
        shr  eax,16
        add  ecx,edi
        and  eax,127
        add  esi,0xDeadBeef
GLOBAL _rdc8nwidth2
_rdc8nwidth2:
        mov dl, [eax + ebp]
        mov  eax,[_dc_colourmap]
        mov  al, [eax + edx]
        mov  [esi],al
        dec  ebx
        jne rdc8nloop
rdc8ndone: 
   pop  ebx
   pop  edi
   pop  esi
   pop  ebp
   ret

        ALIGN 4
GLOBAL _R_DrawColumn8_rasem
_R_DrawColumn8_rasem:
        push  ebp
        push  edi
        push  esi
        push  ebx
        mov  edi,[_dc_yl]
        mov  eax,[_dc_yh]
        sub  eax,edi
        mov  ebx,[_ylookup]
        lea  ecx, [eax+1]
        mov  eax,[_columnofs]
        test  ecx,ecx
        jle  L1
        mov  edx,[_dc_x]
        mov  esi, [ebx+edi*4]
        mov  eax, [eax+edx*4]
        mov  ebp,[_dc_ystep]
        add  esi,eax
        mov  edi,[_dc_yfrac]
ALIGN 2, db 90h
L3:

        mov  ebx,edi
        add  edi,ebp
        sar  ebx,16
        mov  edx,[_dc_source]
        and  ebx,127
        xor  eax,eax
        mov al, [ebx + edx]
        mov  ebx,[_dc_colourmap]
        mov dl, [eax + ebx]
        mov  [esi],dl
        add  esi,[_vb_depth]
        dec  ecx
        jnz L3

L1: 
        pop  ebx
        pop  esi
        pop  edi
        pop  ebp
        ret

ALIGN 2, db 90h
;
; DOSDoom's original asm version (by Chi Hoang, I guess)"
; Moved here by -ES- 1998/08/05"
;
GLOBAL _R_DrawColumn16_chi
_R_DrawColumn16_chi:
    push  ebp
    push  ebx
    push  esi
    push  edi
        mov  edx,[_dc_yl]
        mov  ebx,[_dc_yh]
        sub  ebx,edx
        inc  ebx
        jle rdc16odone
        mov  esi,[_ylookup]
        mov  eax,[_dc_x]
        mov  edi,[_columnofs]
        mov  esi, [esi+edx*4]
        add  esi, [edi+eax*4]
        mov  edi,[_dc_ystep]
        mov  ecx,[_dc_yfrac]

        mov  ebp,[_dc_source]
        sub  esi,0xDeadBeef
GLOBAL _rdc16owidth1
_rdc16owidth1:
        xor  edx,edx

        ALIGN 4, db 90h
rdc16oloop: 
        mov  eax,ecx
        shr  eax,16
        add  ecx,edi
        and  eax,127
        add  esi,0xDeadBeef
GLOBAL _rdc16owidth2
_rdc16owidth2:
        mov  dl, [eax + ebp]
        mov  eax,[_dc_colourmap]
        mov  ax, [eax+edx*2]
        mov  [esi],ax
        dec  ebx
        jne rdc16oloop
rdc16odone: 
    pop  edi
    pop  esi
    pop  ebx
    pop  ebp
    ret

GLOBAL _R_DrawColumn16_rasem
_R_DrawColumn16_rasem:
        push  ebp
        push  ebx
        push  esi
        push  edi
        mov  ebx,[_dc_yl]
        mov  eax,[_dc_yh]
        sub  eax,ebx
        mov  ecx,[_ylookup]
        lea  edi, [eax+1]
        mov  edx,[_dc_x]
        test  edi,edi
        jle L7
        mov  eax,[_columnofs]
        mov  esi, [ecx+ebx*4]
        add  esi, [eax+edx*4]
        mov  ebp,[_dc_ystep]
        mov  eax,[_dc_colourmap]
        imul  ebx,ebp
        db 2eh        ;;; -AJA- FUCK !  Means what ??
        mov  esi,esi
        mov  ebx,[_dc_yfrac]

L9: 

        mov  ecx,ebx
        add  ebx,ebp
        sar  ecx,16
        mov  edx,[_dc_source]
        and  ecx,127
        mov  dl, [ecx + edx]
        mov  ecx,[_vb_depth]
        and  edx,255
        mov  dx, [eax+edx*2]
        mov  [esi],dx
        add  esi,ecx
        dec  edi
        jne L9
L7: 
        pop  edi
        pop  esi
        pop  ebx
        pop  ebp
        ret

