//----------------------------------------------------------------------------
//  EDGE Interface to i386 assembler code
//----------------------------------------------------------------------------
// 
//  Copyright (c) 1999-2000  The EDGE Team.
// 
//  This program is free software; you can redistribute it and/or
//  modify it under the terms of the GNU General Public License
//  as published by the Free Software Foundation; either version 2
//  of the License, or (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//----------------------------------------------------------------------------

#include "../i_defs.h"
#include "../i_system.h"

#include "../con_cvar.h"
#include "../m_argv.h"
#include "../r_draw1.h"
#include "../r_draw2.h"
#include "../r_vbinit.h"
#include "../r_state.h"

// -ES- 2000/06/21 Added the possibility to disable all self modifying code
#ifdef DJGPP
#define ALLOW_SELF_MODIFY
#define ALIGN_MANUALLY
#endif

// needed by id routines
int loopcount;
int pixelcount;

extern void R_DrawSpan8_id(void);
extern void R_DrawSpan8_id_erik(void);
extern void R_DrawSpan8_mmx(void);
extern void R_DrawSpan8_rasem(void);
extern void R_DrawColumn8_id(void);
extern void R_DrawColumn8_id_erik(void);
extern void R_DrawColumn8_chi(void);
extern void R_DrawColumn8_rasem(void);
extern void R_DrawColumn8_pentium(void);
extern void R_DrawColumn8_mmx_k6(void);
extern void R_EnlargeView8_2_2_base(void);
extern void R_EnlargeView8_2_2_mmx(void);

#ifndef NOHICOLOUR
extern void R_DrawSpan16_rasem(void);
extern void R_DrawColumn16_chi(void);
extern void R_DrawColumn16_rasem(void);
#endif

typedef struct cpumodel_s cpumodel_t;
struct cpumodel_s
{
  const char *name;
  const cpumodel_t *base;
  const char *RDC8;
  const char *RDC16;
  const char *RDS8;
  const char *RDS16;
  const char *Enlarge8;
  const char *Enlarge16;
};

static struct
{
  cpumodel_t *model;  // The CPU model: contains default routines, and name

  boolean_t fpu;
  boolean_t mmx;
  // the cpuid family/model
  boolean_t id_family;
  boolean_t id_model;
  // 3dnow, identifier names may not start with '3'.
  boolean_t now;
  // Streaming SIMD Extensions (Pentium III)
  boolean_t sse;
  boolean_t cpuid;
  char vendor[13];
}
cpu;

//
// AlignFunctions
//
// DJGPP can't align to 16 or 32 properly, so for routines where alignment
// is essential for speed, the alignment must be done manually. Apart from
// moving around code, we also have to modify all relative jumps before the
// loop that go to somewhere within or after the loop.
// -ES- 1999/10/30 Reworked system, no more inline asm.
//
// ALIGN_LOOP: Helper macro for use in align_r_draw ONLY. Parameters:
// loop_lbl: Loop's label name.
// endfunc: Unused function occupying at least 31 bytes, placed immediately
// after the function containing the loop
// align_offs: Loop will start at align_offs mod 32
// distvar: variable to which we store the distance we move the routine.

#ifdef ALIGN_MANUALLY
static long rdc8_k6_align = 0;
static long rdc8_id_align = 0;
static long rdc8_id2_align = 0;

#define ALIGN_LOOP(loop_lbl,end_lbl,align_offs, distvar)\
{\
  extern void loop_lbl(), end_lbl();\
  start = (char *)loop_lbl;\
  L_WriteDebug("start=%p ",start);\
  new_start=(char*)((((long)start+31-align_offs) & ~31) + align_offs);\
  L_WriteDebug("new_start=%p ",new_start);\
  end_func = (char *)end_lbl;\
  L_WriteDebug("end_func=%p ",end_func);\
  for (i=end_func-start-1; i >= 0; i--)\
    new_start[i] = start[i];\
  dist=new_start-start;\
  L_WriteDebug("dist=%ld",dist);\
  if (dist==1)\
    *start=0x90;/*only one byte to insert, nop is enough*/\
  if (dist>1)\
  {/*more than one byte to insert, use unconditional short jump insead*/\
    *(start)=0xEB;/*jmp rel8*/\
    *(start+1)=(char)dist-2;/*distance to jump*/\
  }/*dist<1 means dist==0, nothing to insert*/\
  distvar = dist;\
}


//
// Jumps that cross the loop label won't work after the alignment. This macro
// makes such jumps work. from_lbl should be an asm label pointing to the
// instruction RIGHT AFTER the jump, and to_lbl should be the jump destination.
// The jump must be relative, to 0xDEAD0FF5 (write this "jmp 0xDEAD0FF5").
// It may be conditional or unconditional.
//
// This macro only works with the latest inner loop aligned with align_loop
//
#define SET_JMPDEST(from_lbl,to_lbl)\
{\
extern void from_lbl(), to_lbl();\
to = (char *)to_lbl;\
from = (char *)from_lbl;\
L_WriteDebug("  to %p from %p", to, from);\
*(long*)(from - 4) = (long)(dist+to-from);\
}
//
// Like SET_JMPDEST, but to_lbl is an absolute, rather than relative, address.
// offs is also added to to_lbl.
//
#define SET_ADDRESS(from_lbl,to_lbl,offs)\
{\
extern void from_lbl(), to_lbl();\
to = ((char *)to_lbl) + (offs);\
from = (char *)from_lbl;\
L_WriteDebug("  to %p from %p", to, from);\
*(long*)(from - 4) = (long)(dist+to);\
L_WriteDebug(", done\n");\
}

// -ES- 1999/06/10 Changed this routine to be called once only.
static void AlignFunctions(void)
{
  char *start;
  long dist;
  long none;
  char *new_start;
  char *end_func;
  char *to;
  char *from;
  int i;

  ALIGN_LOOP(rdc8mloop, R_DrawColumn8_mmx_k6_end, 0, rdc8_k6_align);
  SET_JMPDEST(rdc8moffs1, rdc8mdone);
  SET_JMPDEST(rdc8moffs2, rdc8mdone);

  ALIGN_LOOP(rdc8eloop, R_DrawColumn8_id_erik_end, 0, rdc8_id2_align);
  SET_JMPDEST(rdc8eoffs1, rdc8edone);
  SET_JMPDEST(rdc8eoffs2, rdc8eloop);
  SET_JMPDEST(rdc8eoffs3, rdc8echecklast);
  SET_ADDRESS(rdc8epatcher1, rdc8epatch1, -4);
  SET_ADDRESS(rdc8epatcher2, rdc8epatch2, -4);
  SET_ADDRESS(rdc8epatcher3, rdc8epatch3, -4);
  SET_ADDRESS(rdc8epatcher4, rdc8epatch4, -4);

  // -ES- 1998/08/20 Fixed id alignment
  ALIGN_LOOP(rdc8iloop, R_DrawColumn8_id_end, 0, rdc8_id_align);
  SET_JMPDEST(rdc8ioffs1, rdc8idone);
  SET_JMPDEST(rdc8ioffs2, rdc8iloop);
  SET_JMPDEST(rdc8ioffs3, rdc8ichecklast);
  SET_ADDRESS(rdc8ipatcher1, rdc8ipatch1, -4);
  SET_ADDRESS(rdc8ipatcher2, rdc8ipatch2, -4);

  ALIGN_LOOP(rds8mloop, R_DrawSpan8_mmx_end, 16, none);
  SET_JMPDEST(rds8moffs1, rds8mdone);

  ALIGN_LOOP(rds8eloop, R_DrawSpan8_id_erik_end, 0, none);
  SET_JMPDEST(rds8eoffs1, rds8edone);
  SET_JMPDEST(rds8eoffs2, rds8eloop);
  SET_JMPDEST(rds8eoffs3, rds8echecklast);
  SET_ADDRESS(rds8epatcher1, rds8epatch1, -4);
  SET_ADDRESS(rds8epatcher2, rds8epatch2, -4);
  SET_ADDRESS(rds8epatcher3, rds8epatch3, -4);
  SET_ADDRESS(rds8epatcher4, rds8epatch4, -4);

  ALIGN_LOOP(rds8iloop, R_DrawSpan8_id_end, 0, none);
  SET_JMPDEST(rds8ioffs1, rds8idone);
  SET_JMPDEST(rds8ioffs2, rds8iloop);
  SET_JMPDEST(rds8ioffs3, rds8ichecklast);
  SET_ADDRESS(rds8ipatcher1, rds8ipatch1, -4);
  SET_ADDRESS(rds8ipatcher2, rds8ipatch2, -4);
}
#endif // ALIGN_MANUALLY

static void SetDefaultRoutines(const cpumodel_t *type)
{
  if (type->base)
    SetDefaultRoutines(type->base);

  if (type->RDC8)
    CON_ChooseFunctionFromList(&drawcol8_funcs, type->RDC8);
  if (type->RDS8)
    CON_ChooseFunctionFromList(&drawspan8_funcs, type->RDS8);
  if (type->Enlarge8)
    CON_ChooseFunctionFromList(&enlarge8_2_2_funcs, type->Enlarge8);
#ifndef NOHICOLOUR
  if (type->RDC16)
    CON_ChooseFunctionFromList(&drawcol16_funcs, type->RDC16);
  if (type->RDS16)
    CON_ChooseFunctionFromList(&drawspan16_funcs, type->RDS16);
  if (type->Enlarge16)
    CON_ChooseFunctionFromList(&enlarge16_2_2_funcs, type->Enlarge16);
#endif
}

//
// I_RegiserAssembler
//
void I_RegisterAssembler(void)
{
#ifdef ALIGN_MANUALLY
  AlignFunctions();
#endif

#ifdef ALLOW_SELF_MODIFY
  CON_AddFunctionToList(&drawspan8_funcs, "id", "id's original assembler", R_DrawSpan8_id, NULL);
  CON_AddFunctionToList(&drawspan8_funcs, "id2", "id's assembler, further 486-optimised", R_DrawSpan8_id_erik, NULL);
#endif
  CON_AddFunctionToList(&drawspan8_funcs, "rasem", "Rasem's assembler", R_DrawSpan8_rasem, NULL);
  CON_AddFunctionToList(&drawspan8_funcs, "mmx", "K6-optimised MMX Version", R_DrawSpan8_mmx, NULL);

#ifdef ALLOW_SELF_MODIFY
  CON_AddFunctionToList(&drawcol8_funcs, "id", "id's original assembler", R_DrawColumn8_id, NULL);
  CON_AddFunctionToList(&drawcol8_funcs, "id2", "id's assembler, further 486-optimised", R_DrawColumn8_id_erik, NULL);
  CON_AddFunctionToList(&drawcol8_funcs, "pentium", "Pentium Optimised", R_DrawColumn8_pentium, NULL);
  CON_AddFunctionToList(&drawcol8_funcs, "k6", "MMX Version", R_DrawColumn8_mmx_k6, NULL);
  CON_AddFunctionToList(&drawcol8_funcs, "chi", "Chi Hoang's assembler", R_DrawColumn8_chi, NULL);
#endif
  CON_AddFunctionToList(&drawcol8_funcs, "rasem", "Rasem's assembler", R_DrawColumn8_rasem, NULL);

  CON_AddFunctionToList(&enlarge8_2_2_funcs, "asm", "Assembler Version", R_EnlargeView8_2_2_base, NULL);
  CON_AddFunctionToList(&enlarge8_2_2_funcs, "mmx", "MMX Version", R_EnlargeView8_2_2_mmx, NULL);
#ifndef NOHICOLOUR
  CON_AddFunctionToList(&drawspan16_funcs, "rasem", "Rasem's assembler", R_DrawSpan16_rasem, NULL);
#ifdef ALLOW_SELF_MODIFY
  CON_AddFunctionToList(&drawcol16_funcs, "chi", "Chi Hoang's assembler", R_DrawColumn16_chi, NULL);
#endif
  CON_AddFunctionToList(&drawcol16_funcs, "rasem", "Rasem's assembler", R_DrawColumn16_rasem, NULL);
#endif
}

//
// I_PrepareAssembler
//
// This is called whenever the viewbitmap depth changes.
void I_PrepareAssembler(void)
{
#ifdef ALLOW_SELF_MODIFY
#define SET_VARIABLE(val, lbl, offset)\
{\
extern void lbl(void);\
char *dest = (char *)lbl;\
*(long *)(dest - 4 + offset) = val;\
}
  SET_VARIABLE(vb_depth, rdc8iwidth1, rdc8_id_align);
  SET_VARIABLE(2 * vb_depth, rdc8iwidth2, rdc8_id_align);

  SET_VARIABLE(vb_depth, rdc8ewidth1, rdc8_id2_align);
  SET_VARIABLE(2 * vb_depth, rdc8ewidth2, rdc8_id2_align);

  SET_VARIABLE(vb_depth, rdc8nwidth1, 0);
  SET_VARIABLE(vb_depth, rdc8nwidth2, 0);

  // two of the labels are before the aligned loop
  SET_VARIABLE(vb_depth, rdc8mwidth1, rdc8_k6_align);
  SET_VARIABLE(2 * vb_depth, rdc8mwidth2, rdc8_k6_align);
  SET_VARIABLE(-2 * vb_depth, rdc8mwidth3, 0);
  SET_VARIABLE(2 * vb_depth, rdc8mwidth4, rdc8_k6_align);
  SET_VARIABLE(-vb_depth, rdc8mwidth5, 0);

  SET_VARIABLE(2 * vb_depth, rdc8pwidth1, 0);
  SET_VARIABLE(3 * vb_depth, rdc8pwidth2, 0);
  SET_VARIABLE(2 * vb_depth, rdc8pwidth3, 0);
  SET_VARIABLE(2 * vb_depth, rdc8pwidth4, 0);
  SET_VARIABLE(-2 * vb_depth, rdc8pwidth5, 0);
  SET_VARIABLE(-vb_depth, rdc8pwidth6, 0);


  SET_VARIABLE(vb_depth, rdc16owidth1, 0);
  SET_VARIABLE(vb_depth, rdc16owidth2, 0);

#undef SET_VARIABLE
#endif // ALLOW_SELF_MODIFY
}

//
// I_CheckCPU
// Uses the allegro routine to gain cpu info
//
// -ACB- 1998/07/17 New CPU Checking Code
//
// -ES- 1998/08/05 Newer CPU Checking Code
// -ES- 1998/08/13 Some minor changes
// -ES- 1998/12/18 Added some new routines
// -ES- 1999/05/16 Added smoothing
// -ES- 1999/05/31 Changed struct: added flags in RDC/RDS.
//
// -ACB- 1999/09/19 Disable for the short term - Portability.
//                  NOHICOLOUR defines.
//                  C-Routines Only.
// -ES- 1999/10/29 Major rework. Re-enabled assembler.
// -ES- 1998/08/05 Added cpumodel struct
// -ES- 1999/12/26 Total Rewrite.

typedef enum
{
  // RDP stands for Rendering Drawing Primitive
  RDP_ASM = 0x1,  // uses assembler (ie only works on the x86 ports)
   RDP_FPU = 0x2,  // requires floating point coprocessor (486DX or above)
   RDP_MMX = 0x4,  // requires MMX
   RDP_3DNOW = 0x8,  // requires the 3DNow! instruction set (used in K6-2)
   RDP_SSE = 0x10,  // requires the SSE (Streaming SIMD Extensions) instruction set, used in Pentium III
   RDP_SMOOTH = 0x20,  // interpolates in some way between pixels
   RDP_SMC = 0x40,  // Self-Modifying Code. The routine modifies itself when called.
   RDP_ALIGN = 0x80  // The function needs to be modified with r_align_draw when the resolution changes.
}
rdp_flags_t;

cpumodel_t cpu_c = {"Default CPU", NULL, "default", "default", "default", "default", "default", "default"};

#ifdef ALLOW_SELF_MODIFY
cpumodel_t cpu_386     = {"386", NULL, "id", "rasem", "id", "rasem", "base", "base"};
cpumodel_t cpu_486     = {"486", &cpu_386, "id2", NULL, "id2", NULL, NULL, NULL};
cpumodel_t cpu_pentium = {"Pentium", &cpu_386, "pentium", NULL, "rasem", NULL, NULL, NULL};
#else
cpumodel_t cpu_386     = {"386", NULL, "rasem", "rasem", "rasem", "rasem", "base", "base"};
cpumodel_t cpu_486     = {"486", &cpu_386, "rasem", NULL, "rasem", NULL, NULL, NULL};
cpumodel_t cpu_pentium = {"Pentium", &cpu_386, "rasem", NULL, "rasem", NULL, NULL, NULL};
#endif
cpumodel_t cpu_6x86    = {"6x86", &cpu_pentium, "rasem", NULL, NULL, NULL, NULL, NULL};
cpumodel_t cpu_PMMX    = {"Pentium MMX", &cpu_pentium, NULL, NULL, "mmx", NULL, "mmx", NULL};
#ifdef ALLOW_SELF_MODIFY
cpumodel_t cpu_6x86MX  = {"6x86MX", &cpu_6x86, "k6", NULL, "mmx", NULL, "mmx", NULL};
#else
cpumodel_t cpu_6x86MX  = {"6x86MX", &cpu_6x86, NULL, NULL, "mmx", NULL, "mmx", NULL};
#endif
cpumodel_t cpu_K6      = {"K6", &cpu_6x86MX, NULL, NULL, NULL, NULL, NULL, NULL};
cpumodel_t cpu_K6_2    = {"K6-2 or K6-3", &cpu_6x86MX, NULL, NULL, NULL, NULL, NULL, NULL};
cpumodel_t cpu_PPro    = {"Pentium Pro", NULL, "rasem", "rasem", "rasem", "rasem", "base", "base"};
#ifdef ALLOW_SELF_MODIFY
cpumodel_t cpu_PII     = {"Pentium II", &cpu_PPro, "k6", NULL, "mmx", NULL, "mmx", NULL};
#else
cpumodel_t cpu_PII     = {"Pentium II", &cpu_PPro, NULL, NULL, "mmx", NULL, "mmx", NULL};
#endif
cpumodel_t cpu_newmmx  = {"Unknown MMX CPU", &cpu_PII, NULL, NULL, NULL, NULL, NULL, NULL};
cpumodel_t cpu_new     = {"Unknown new CPU", &cpu_PPro, NULL, NULL, NULL, NULL, NULL, NULL};



//
// CheckCPU has been copied from Allegro.
//

// helpers from i_cpu.asm
long asmret, asmarg1, asmarg2;
extern int I_386Is486(void);
extern int I_386IsFPU(void);
extern int I_386IsCpuidSupported(void);
extern void I_386GetCpuidInfo(void);//long cpuid_levels, long *reg);
extern void I_386IsCyrix(void);

//
// DetectCPU
//
static void DetectCPU()
{
  long cpuid_levels;
  long vendor_temp[4];
  long reg[4];

  I_386IsCpuidSupported();
  if (asmret)
  {
    cpu.cpuid = true;
    asmarg1 = 0;
    asmarg2 = (long)&reg[0];
    I_386GetCpuidInfo();
    cpuid_levels = reg[0];
    vendor_temp[0] = reg[1];
    vendor_temp[1] = reg[3];
    vendor_temp[2] = reg[2];
    vendor_temp[3] = 0;
    memcpy(cpu.vendor, vendor_temp, 13);
    if (cpuid_levels > 0)
    {
      reg[0] = reg[1] = reg[2] = reg[3] = 0;
      asmarg1 = 1;
      asmarg2 = (long)&reg[0];
      I_386GetCpuidInfo();
      cpu.id_family = (reg[0] & 0xF00) >> 8;
      cpu.id_model = (reg[0] & 0xF0) >> 4;
      cpu.fpu = (reg[3] & 1 ? true : false);
      cpu.mmx = (reg[3] & 0x800000 ? true : false);
    }

    asmarg1 = 0x80000000;
    asmarg2 = (long)&reg[0];
    I_386GetCpuidInfo();
    if ((unsigned long)reg[0] > 0x80000000)
    {
      asmarg1 = 0x80000000;
      asmarg2 = (long)&reg[0];
      I_386GetCpuidInfo();
      cpu.now = (reg[3] & 0x80000000 ? true : false);
    }
  }
  else
  {
    cpu.fpu = I_386IsFPU();
    if (!I_386Is486())
    {
      cpu.id_family = 3;
    }
    else
    {
      I_386IsCyrix();
      if (asmret)
      {
        cpu.id_model = 14;
        // I'm assuming that most cyrixes without cpuid are 486-class
        cpu.id_family = 4;
      }
      else
      {
        cpu.id_family = 4;
        cpu.id_model = 15;
      }
    }
  }
}

void I_CheckCPU()
{
  cpu.model = &cpu_c;  // Default: C versions of everything

  cpu.fpu = true;  // I assuming we all have a 486DX at least.
  cpu.mmx = false;
  cpu.now = false;
  cpu.sse = false;
  cpu.cpuid = false;

  if (!M_CheckParm("-UseC"))
  {
    DetectCPU();

    switch (cpu.id_family)
    {
      case 3:  // 386 detected

        cpu.model = &cpu_386;
        break;
      case 4:  // 486 or 5x86 detected

        cpu.model = &cpu_486;
        break;
      case 5:  // Pentium-compatible detected

        switch (cpu.id_model)
        {
          case 4:  // Pentium MMX detected

            cpu.model = &cpu_PMMX;
            break;
          case 6:
          case 7:  // AMD-K6 detected

            cpu.model = &cpu_K6;
            break;
          case 8:
          case 9:  // AMD-K6-2 or K6-3 detected.

            cpu.model = &cpu_K6_2;
            break;
          case 14:  // Cyrix detected
            if (cpu.mmx)  // Check for MMX
              cpu.model = &cpu_6x86MX;
            else
              cpu.model = &cpu_6x86;
            break;
          default:
            cpu.model = &cpu_pentium;  // Pentium as default
        }
        break;
      case 6:
        if (cpu.mmx)
          cpu.model = &cpu_PII;
        else
          cpu.model = &cpu_PPro;
        break;
      default:
        if (cpu.id_family > 6)
        {
          if (cpu.mmx)
            cpu.model = &cpu_newmmx;
          else
            cpu.model = &cpu_new;
        }

    }

    I_Printf("CPU Detected: %s ", cpu.model->name);

    if (cpu.mmx || cpu.fpu || cpu.now || cpu.sse)
    {
      I_Printf("with ");

      if (cpu.mmx)
        I_Printf("MMX,");
      if (cpu.now)
        I_Printf("3dNow!,");
      if (cpu.sse)
        I_Printf("SSE,");
      if (cpu.fpu)
        I_Printf("FPU");

      I_Printf(" Present");
    }

    if (cpu.cpuid)
      I_Printf(" - CPUID: %s", cpu.vendor);

    I_Printf("\n");
  }

  SetDefaultRoutines(cpu.model);
}



