// vmap column de Fab dernier
// hmap original

#include "asm_defs.inc" 	// structures, must match the C structures!


// Rappel: seuls EAX, ECX, EDX peuvent tre crass librement.
//	   il faut sauver esi,edi, cd...gs

/* Attention aux comparaisons!						    */
/*									    */
/*	Intel_compare:							    */
/*									    */
/*		cmp	A,B			// A-B , set flags	    */
/*		jg	A_greater_than_B				    */
/*									    */
/*	AT&T_compare:							    */
/*									    */
/*		cmp	A,B			// B-A , set flags	    */
/*		jg	B_greater_than_A				    */
/*									    */
/*	  (soustrait l'oprande source DE l'oprande destination,	    */
/*	   comme sur Motorola! )					    */


// RAPPEL: Intel
//	   SECTION:[BASE+INDEX*SCALE+DISP]
// devient SECTION:DISP(BASE,INDEX,SCALE)

//----------------------------------------------------------------------
//
// R_DrawColumn
//
//   New optimised version 10-01-1998 by D.Fabrice and P.Boris
//   TO DO: optimise it much farther... should take at most 3 cycles/pix
//	    once it's fixed, add code to patch the offsets so that it
//	    works in every screen width.
//
//----------------------------------------------------------------------

    .data

    .align 4
C(loopcount):	.long	0
C(pixelcount):	.long	0
C(tystep):	.long	0

C(rowbytes):	.long	0	//use this one out of the inner loops
				//so you don't need to patch everywhere...

#ifdef USEASM

    .text
.globl C(ASM_PatchRowBytes)
C(ASM_PatchRowBytes):
    pushl   %ebp
    movl    %esp, %ebp	    // assure l'"adressabilit du stack"

    movl    ARG1, %edx	       // lire un argument
    movl    %edx, C(rowbytes)

    //added:28-01-98:this is really crappy but I want to get it working...
    //		     I'll clean this later

    // 1 * rowbytes
    movl    %edx,p1+2

    movl    %edx,p5+2

    // 2 * rowbytes
    addl    ARG1,%edx

    movl    %edx,p2+2

    movl    %edx,p6+2
    movl    %edx,p7+2
    movl    %edx,p8+2
    movl    %edx,p9+2

    // 3 * rowbytes
    addl    ARG1,%edx

    movl    %edx,p3+2

    // 4 * rowbytes
    addl    ARG1,%edx

    movl    %edx,p4+2

    popl    %ebp
    ret


    .text

    .align 5
.globl C(R_DrawColumn)
C(R_DrawColumn):
    pushl   %ebp		// preserve caller's stack frame pointer
    pushl   %esi		// preserve register variables
    pushl   %edi
    pushl   %ebx

//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
    movl     C(dc_yl),%ebp
    movl     %ebp,%ebx
    movl     C(ylookup)(,%ebx,4),%edi
    movl     C(dc_x),%ebx
    addl     C(columnofs)(,%ebx,4),%edi  // edi = dest

//
// pixelcount = yh - yl + 1
//
    movl     C(dc_yh),%eax
    incl     %eax
    subl     %ebp,%eax			 // pixel count
    movl     %eax,C(pixelcount) 	 // save for final pixel
    jle      vdone			 // nothing to scale

//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
    movl     C(dc_iscale),%ecx		 // fracstep
    movl     C(centery),%eax
    subl     %ebp,%eax
    imul     %ecx,%eax
    movl     C(dc_texturemid),%edx
    subl     %eax,%edx
     movl     %edx,%ebx
     shrl     $16,%ebx		// frac int.
     andl     $0x0000007f,%ebx
     shll     $16,%edx		// y frac up

     movl     %ecx,%ebp
     shll     $16,%ebp		// fracstep f. up
     shrl     $16,%ecx		// fracstep i. ->cl
     andb     $0x7f,%cl

    movl     C(dc_source),%esi

//
// lets rock :) !
//
    movl    C(pixelcount),%eax
    movb    %al,%dh
    shrl    $2,%eax
    movb    %al,%ch		// quad count
    movl    C(dc_colormap),%eax
    testb   $3,%dh
    jz	    v4quadloop

//
//  do un-even pixel
//
    testb   $1,%dh
    jz	    2f

    movb    (%esi,%ebx),%al	// prep un-even loops
     addl    %ebp,%edx		  // ypos f += ystep f
    adcb    %cl,%bl		 // ypos i += ystep i
     movb    (%eax),%dl 	  // colormap texel
    andb    $0x7f,%bl		 // mask 0-127 texture index
     movb    %dl,(%edi) 	  // output pixel
    addl    C(rowbytes),%edi

//
//  do two non-quad-aligned pixels
//
2:
    testb   $2,%dh
    jz	    3f

    movb    (%esi,%ebx),%al	 // fetch source texel
     addl    %ebp,%edx		  // ypos f += ystep f
    adcb    %cl,%bl		 // ypos i += ystep i
     movb    (%eax),%dl 	  // colormap texel
    andb    $0x7f,%bl		 // mask 0-127 texture index
     movb    %dl,(%edi) 	  // output pixel

    movb    (%esi,%ebx),%al	 // fetch source texel
     addl    %ebp,%edx		  // ypos f += ystep f
    adcb    %cl,%bl		 // ypos i += ystep i
     movb    (%eax),%dl 	  // colormap texel
    andb    $0x7f,%bl		 // mask 0-127 texture index
    addl    C(rowbytes),%edi
     movb    %dl,(%edi) 	  // output pixel

    addl    C(rowbytes),%edi

//
//  test if there was at least 4 pixels
//
3:
    testb   $0xFF,%ch		// test quad count
    jz	    vdone

//
// ebp : ystep frac. upper 24 bits
// edx : y     frac. upper 24 bits
// ebx : y     i.    lower 7 bits,  masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4quadloop:
    movb    $0x7f,%dh		// prep mask
//    .align  4
vquadloop:
    movb    (%esi,%ebx),%al	// prep loop
     addl    %ebp,%edx		  // ypos f += ystep f
    adcb    %cl,%bl		 // ypos i += ystep i
     movb    (%eax),%dl 	  // colormap texel
    movb    %dl,(%edi)		 // output pixel
     andb    $0x7f,%bl		  // mask 0-127 texture index

    movb    (%esi,%ebx),%al	 // fetch source texel
     addl    %ebp,%edx
    adcb    %cl,%bl
     movb    (%eax),%dl
p1:    movb    %dl,0x12345678(%edi)
     andb    $0x7f,%bl

    movb    (%esi,%ebx),%al	 // fetch source texel
     addl    %ebp,%edx
    adcb    %cl,%bl
     movb    (%eax),%dl
p2:    movb    %dl,2*0x12345678(%edi)
     andb    $0x7f,%bl

    movb    (%esi,%ebx),%al	 // fetch source texel
     addl    %ebp,%edx
    adcb    %cl,%bl
     movb    (%eax),%dl
p3:    movb    %dl,3*0x12345678(%edi)
     andb    $0x7f,%bl

p4:    addl    $4*0x12345678,%edi

    decb   %ch
     jnz    vquadloop

vdone:
    popl    %ebx		// restore register variables
    popl    %edi
    popl    %esi
    popl    %ebp		// restore caller's stack frame pointer
    ret



//----------------------------------------------------------------------
//
// R_DrawSpan
//
// Horizontal texture mapping
//
//----------------------------------------------------------------------

    .data

advancetable:	.long	0, 0
ystep:		.long	0
xstep:		.long	0
C(texwidth):	.long	64	// texture width

    .text

    .align 4
.globl C(R_DrawSpan)
C(R_DrawSpan):
    pushl   %ebp		// preserve caller's stack frame pointer
    pushl   %esi		// preserve register variables
    pushl   %edi
    pushl   %ebx


//
// find loop count
//
    movl    C(ds_x2),%eax
    incl    %eax
    subl    C(ds_x1),%eax		// pixel count
    movl    %eax,C(pixelcount)		// save for final pixel
    js	    hdone			// nothing to scale
    shrl    $1,%eax			// double pixel count
    movl    %eax,C(loopcount)

//
// build composite position
//
    movl    C(ds_xfrac),%ebp
    shll    $10,%ebp
    andl    $0x0ffff0000,%ebp
    movl    C(ds_yfrac),%eax
    shrl    $6,%eax
    andl    $0x0ffff,%eax
    movl    C(ds_y),%edi
    orl     %eax,%ebp

    movl    C(ds_source),%esi

//
// calculate screen dest
//

    movl    C(ylookup)(,%edi,4),%edi
    movl    C(ds_x1),%eax
    addl    C(columnofs)(,%eax,4),%edi

//
// build composite step
//
    movl    C(ds_xstep),%ebx
    shll    $10,%ebx
    andl    $0x0ffff0000,%ebx
    movl    C(ds_ystep),%eax
    shrl    $6,%eax
    andl    $0x0ffff,%eax
    orl     %eax,%ebx

    //movl	  %eax,OFFSET hpatch1+2        // convice tasm to modify code...
    movl    %ebx,hpatch1+2
    //movl	  %eax,OFFSET hpatch2+2        // convice tasm to modify code...
    movl    %ebx,hpatch2+2
    movl    %esi,hpatch3+2
    movl    %esi,hpatch4+2
// %eax      aligned colormap
// %ebx      aligned colormap
// %ecx,%edx  scratch
// %esi      virtual source
// %edi      moving destination pointer
// %ebp      frac
    movl    C(ds_colormap),%eax
//    shld    $22,%ebp,%ecx	      // begin calculating third pixel (y units)
//    shld    $6,%ebp,%ecx	      // begin calculating third pixel (x units)
     movl    %ebp,%ecx
    addl    %ebx,%ebp		    // advance frac pointer
     shrw    $10,%cx
     roll    $6,%ecx
    andl    $4095,%ecx		    // finish calculation for third pixel
//    shld    $22,%ebp,%edx	      // begin calculating fourth pixel (y units)
//    shld    $6,%ebp,%edx	      // begin calculating fourth pixel (x units)
     movl    %ebp,%edx
     shrw    $10,%dx
     roll    $6,%edx
    addl    %ebx,%ebp		    // advance frac pointer
    andl    $4095,%edx		    // finish calculation for fourth pixel
    movl    %eax,%ebx
    movb    (%esi,%ecx),%al	    // get first pixel
    movb    (%esi,%edx),%bl	    // get second pixel
    testl   $0x0fffffffe,C(pixelcount)
    movb    (%eax),%dl		   // color translate first pixel

//    jnz hdoubleloop		  // at least two pixels to map
//    jmp hchecklast

//    movw $0xf0f0,%dx //see visplanes start

    jz	    hchecklast
    movb    (%ebx),%dh		    // color translate second pixel
    movl    C(loopcount),%esi
//    .align  4
hdoubleloop:
//    shld    $22,%ebp,%ecx	   // begin calculating third pixel (y units)
//    shld    $6,%ebp,%ecx	   // begin calculating third pixel (x units)
    movl    %ebp,%ecx
    shrw    $10,%cx
    roll    $6,%ecx
hpatch1:
    addl    $0x012345678,%ebp	 // advance frac pointer
    movw    %dx,(%edi)		 // write first pixel
    andl    $4095,%ecx		 // finish calculation for third pixel
//    shld    $22,%ebp,%edx	   // begin calculating fourth pixel (y units)
//    shld    $6,%ebp,%edx	   // begin calculating fourth pixel (x units)
    movl    %ebp,%edx
    shrw    $10,%dx
    roll    $6,%edx
hpatch3:
    movb    0x012345678(%ecx),%al      // get third pixel
//    movb    %bl,1(%edi)	   // write second pixel
    andl    $4095,%edx		 // finish calculation for fourth pixel
hpatch2:
    addl    $0x012345678,%ebp	 // advance frac pointer
hpatch4:
    movb    0x012345678(%edx),%bl      // get fourth pixel
    movb    (%eax),%dl		 // color translate third pixel
    addl    $2,%edi		 // advance to third pixel destination
    decl    %esi		 // done with loop?
    movb    (%ebx),%dh		 // color translate fourth pixel
    jnz hdoubleloop

// check for final pixel
hchecklast:
    testl   $1,C(pixelcount)
    jz	    hdone
    movb    %dl,(%edi)		 // write final pixel

hdone:
    popl    %ebx		 // restore register variables
    popl    %edi
    popl    %esi
    popl    %ebp		 // restore caller's stack frame pointer
    ret


//.endif


//----------------------------------------------------------------------
//
// FixedDiv and FixedMul, TO DO: inline asm.
//
//    The corresponding C code is really TOO slow, the more complex the
//    levels, more time is spent in these routines.
//
//----------------------------------------------------------------------
	.text

	.align 4
.globl C(FixedMul)
C(FixedMul):
	movl  4(%esp),%eax
	imull 8(%esp)
	shrdl $16,%edx,%eax
	ret


	.align 4
.globl C(FixedDiv2)
C(FixedDiv2):
	movl  4(%esp),%eax
//	  cdq
	  movl	%eax,%edx	// these two instructions allow the next
	  sarl	$31,%edx	// two to pair, on the Pentium processor.
	shldl $16,%eax,%edx
	sall  $16,%eax
	idivl 8(%esp)
	ret


//----------------------------------------------------------------------
//
// R_DrawFuzzColumn
//
// Vertical column texture drawer, with transparency. Replaces Doom2's
// 'fuzz' effect, which was not so beautiful.
// Transparency is always impressive in some way, don't know why...
//
// TO DO: separate this as a new function for colfunc(), and add a new
//	  bit MF_xxx flag to enable transparency on selected things.
//
//----------------------------------------------------------------------

    .text

    .align 5
.globl C(R_DrawFuzzColumn)
C(R_DrawFuzzColumn):
    pushl   %ebp		// preserve caller's stack frame pointer
    pushl   %esi		// preserve register variables
    pushl   %edi
    pushl   %ebx

//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
    movl     C(dc_yl),%ebp
    movl     %ebp,%ebx
    movl     C(ylookup)(,%ebx,4),%edi
    movl     C(dc_x),%ebx
    addl     C(columnofs)(,%ebx,4),%edi  // edi = dest

//
// pixelcount = yh - yl + 1
//
    movl     C(dc_yh),%eax
    incl     %eax
    subl     %ebp,%eax			 // pixel count
    movl     %eax,C(pixelcount) 	 // save for final pixel
    jle      vtdone			  // nothing to scale

//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
    movl     C(dc_iscale),%ecx		 // fracstep
    movl     C(centery),%eax
    subl     %ebp,%eax
    imul     %ecx,%eax
    movl     C(dc_texturemid),%edx
    subl     %eax,%edx
     movl     %edx,%ebx
     shrl     $16,%ebx		// frac int.
     andl     $0x0000007f,%ebx
     shll     $16,%edx		// y frac up

     movl     %ecx,%ebp
     shll     $16,%ebp		// fracstep f. up
     shrl     $16,%ecx		// fracstep i. ->cl
     andb     $0x7f,%cl

    movl     C(dc_source),%esi

//
// lets rock :) !
//
    movl    C(pixelcount),%eax
    movb    %al,%dh
    shrl    $2,%eax
    movb    %al,%ch		// quad count
    movl    C(tinttabtables),%eax
    testb   $0x03,%dh
    jz	    vt4quadloop

//
//  do un-even pixel
//
    testb   $1,%dh
    jz	    2f

    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
     addl    %ebp,%edx
    adcb    %cl,%bl
     movb    (%edi),%al 	  // fetch dest  : index into colormap
    andb    $0x7f,%bl
     movb    (%eax),%dl
    movb    %dl,(%edi)
     addl    C(rowbytes),%edi

//
//  do two non-quad-aligned pixels
//
2:
    testb   $2,%dh
    jz	    3f

    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
     addl    %ebp,%edx
    adcb    %cl,%bl
     movb    (%edi),%al 	  // fetch dest  : index into colormap
    andb    $0x7f,%bl
     movb    (%eax),%dl
    movb    %dl,(%edi)
     addl    C(rowbytes),%edi

    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
     addl    %ebp,%edx
    adcb    %cl,%bl
     movb    (%edi),%al 	  // fetch dest  : index into colormap
    andb    $0x7f,%bl
     movb    (%eax),%dl
    movb    %dl,(%edi)
     addl    C(rowbytes),%edi

//
//  test if there was at least 4 pixels
//
3:
    testb   $0xFF,%ch		// test quad count
    jz	    vtdone

//
// ebp : ystep frac. upper 24 bits
// edx : y     frac. upper 24 bits
// ebx : y     i.    lower 7 bits,  masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
vt4quadloop:
    movb    $0x7f,%dh		// prep mask

    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
p5:    movb    0x12345678(%edi),%al	      // fetch dest  : index into colormap

    movl    %ebp,C(tystep)
    movl    %edi,%ebp
    subl    C(rowbytes),%edi
    jmp inloop
//    .align  4
vtquadloop:
    addl    C(tystep),%edx
    adcb    %cl,%bl
    andb    %dh,%bl
p6:    addl    $2*0x12345678,%ebp
    movb    (%eax),%dl
    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
    movb    %dl,(%edi)
    movb    (%ebp),%al		 // fetch dest	: index into colormap
inloop:
    addl    C(tystep),%edx
    adcb    %cl,%bl
    andb    %dh,%bl
p7:    addl    $2*0x12345678,%edi
    movb    (%eax),%dl
    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
    movb    %dl,(%ebp)
    movb    (%edi),%al		 // fetch dest	: index into colormap

    addl    C(tystep),%edx
    adcb    %cl,%bl
    andb    %dh,%bl
p8:    addl    $2*0x12345678,%ebp
    movb    (%eax),%dl
    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
    movb    %dl,(%edi)
    movb    (%ebp),%al		 // fetch dest	: index into colormap

    addl    C(tystep),%edx
    adcb    %cl,%bl
    andb    %dh,%bl
p9:    addl    $2*0x12345678,%edi
    movb    (%eax),%dl
    movb    (%esi,%ebx),%ah	 // fetch texel : colormap number
    movb    %dl,(%ebp)
    movb    (%edi),%al		 // fetch dest	: index into colormap

//    movb    (%esi,%ebx),%ah	   // fetch texel : colormap number
//     addl    %ebp,%edx
//    adcb    %cl,%bl
//     movb    (%edi),%al	    // fetch dest  : index into colormap
//    andb    %dh,%bl
//     movb    (%eax),%dl
//    movb    %dl,(%edi)
//     addl    $SCREENWIDTH,%edi

    decb   %ch
     jnz    vtquadloop

vtdone:
    popl    %ebx		// restore register variables
    popl    %edi
    popl    %esi
    popl    %ebp		// restore caller's stack frame pointer
    ret

#endif // ifdef USEASM
