//----------------------------------------------------------------------------
//  EDGE Column/Span Drawing for 16-bit Colour Code
//----------------------------------------------------------------------------
// 
//  Copyright (c) 1999-2001  The EDGE Team.
// 
//  This program is free software; you can redistribute it and/or
//  modify it under the terms of the GNU General Public License
//  as published by the Free Software Foundation; either version 2
//  of the License, or (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//----------------------------------------------------------------------------
//
//  Based on the DOOM source code, released by Id Software under the
//  following copyright:
//
//    Copyright (C) 1993-1996 by id Software, Inc.
//
//----------------------------------------------------------------------------
//
// Note: The best place for optimisation!
//
// -ACB- 1998/09/10 Cleaned up.
//

#include "i_defs.h"

#ifndef NOHICOLOUR

#include "r_draw2.h"

#include "con_defs.h"
#include "dm_defs.h"
#include "dm_state.h"
#include "r_local.h"
#include "v_res.h"
#include "v_colour.h"
#include "w_image.h"
#include "w_wad.h"
#include "z_zone.h"

funclist_t drawcol16_funcs;
funclist_t drawspan16_funcs;

#define FUZZ_SHADE  10  // out of 64

//
// All drawing to the view buffer is accomplished in this file.
//
// The other refresh files only know about ccordinates, not the
// architecture of the frame buffer.
//
// Conveniently, the frame buffer is a linear one, and we need
// only the base address, and the total size.
//

/*
   Here are the necessary changes, written for 0.60 (not 0.64 as I told before).
   If you find my code ugly, than it's not miracle coz I'm Delphi/Pascal/Asm
   programmer and this routine was my first and last experiment with AT&T syntax,
   thanx god for FPC and NASM, I don't have to use it anymore, even in the Linux
   world. The routine is based on look-up table ;-), The BLF_Init routine must
   be called during the start-up. If you think it is not a complete nonsense,
   check it w/ your column routine, it should look great. The precision
   of the filtration can be raised by enlarging the table, but it really slows 
   things down, I plan to check it's behavior using 3Dnow PREFETCH instruction,
   it should help. - Vitek Kavan
 */

// -ES- 1999/01/10 Improved the algorithm a bit: Improved the accuracy
// and decreased the table size from 4 M to 52 K.

#define BLFshift 2  // Detail level. High detail levels look better, but
                   // consume more memory. For each detail increase by one,
                   // the BLF table needs approx. four times as much memory.
#define BLFsz (1<<BLFshift)
#define BLFmax (BLFsz-1)

typedef unsigned long BLF16LUT[256];  // Table with 256 longs containing translucency table-style fix-point RGBs.

static BLF16LUT *BLFTab[BLFsz][BLFsz];  // Totally 8*8*2 BLF16LUTs

static void ColSmoothingOn(funclist_t *fl GCCATTR(unused))
{
  dc_usesmoothing = true;
}
static void ColSmoothingOff(funclist_t *fl GCCATTR(unused))
{
  dc_usesmoothing = false;
}
static void SpanSmoothingOn(funclist_t *fl GCCATTR(unused))
{
  ds_usesmoothing = true;
}
static void SpanSmoothingOff(funclist_t *fl GCCATTR(unused))
{
  ds_usesmoothing = false;
}

static void BLF_Init16(funclist_t *fl)
{
  static boolean_t firsttime = true;

  unsigned int i;
  unsigned int r, g, b, x, y, xy;
  BLF16LUT *BLFBuf;  // Array of all the used BLF16LUTs

  int BLFCheck[4 * BLFsz * BLFsz];  // Stores which index to BLFBuf a certain x*y should use (x and y are 31.1 fix point)

  int count = 0;

  if (fl->dest == &R_DrawColumn)
    ColSmoothingOn(fl);
  else if (fl->dest == &R_DrawSpan)
    SpanSmoothingOn(fl);
  if (!firsttime)
    return;
  firsttime = false;

  I_Printf("BLF_Init: Init Bilinear Filtering");

  // Init BLFCheck
  for (x = 0; x < 4 * BLFsz * BLFsz; x++)
    BLFCheck[x] = -1;
  for (x = 1; x < 2 * BLFsz; x += 2)
    for (y = 1; y < 2 * BLFsz; y += 2)
      if (BLFCheck[x * y] == -1)
      {
        BLFCheck[x * y] = count;
        count++;
      }

  // Allocate the memory if it isn't already allocated. Use 32-byte alignment.
  BLFBuf = BLFTab[0][0];
  if (!BLFBuf)
  {
    // allocate memory
    BLFBuf = (BLF16LUT *) Z_Malloc(count * 2 * sizeof(BLF16LUT) + 31);
    // align
    BLFBuf = (BLF16LUT *) (((long)BLFBuf + 31) & ~31);
  }

  for (x = 0; x < BLFsz; x++)
    for (y = 0; y < BLFsz; y++)
      BLFTab[x][y] = &BLFBuf[2 * BLFCheck[(2 * x + 1) * (2 * y + 1)]];

  for (xy = 0; xy < 4 * BLFsz * BLFsz; xy++)
    if (BLFCheck[xy] != -1)
    {
      for (i = 0; i < 256; i++)
      {
        // Low byte of RGB triplet
        g = (i & 0xE0) >> 5;
        b = i & 0x1F;

        g = ((g * xy) << 5) >> (2 + 2 * BLFshift);
        b = ((b * xy) << 5) >> (2 + 2 * BLFshift);
        (&BLFBuf[2 * BLFCheck[xy]])[0][i] = g | (b << 11);

        // High byte of RGB triplet
        r = ((i << 8) & 0xF800) >> 11;
        g = ((i << 8) & 0x0700) >> 5;

        r = ((r * xy) << 6) >> (2 + 2 * BLFshift);
        g = ((g * xy) << 5) >> (2 + 2 * BLFshift);
        (&BLFBuf[2 * BLFCheck[xy]])[1][i] = g | (r << 21);
      }
    }
  I_Printf("\n");
}

//-----------------------------------------------------------

void resinit_r_draw_c16(void)
{
}

//
// R_DrawColumn16_CVersion
//
// A column is a vertical slice/span from a wall texture that, given the
// DOOM style restrictions on the view orientation, will always have
// constant z depth.
//
// Thus a special case loop for very fast rendering can
// be used. It has also been used with Wolfenstein 3D.
// 
void R_DrawColumn16_CVersion(void)
{
  int count;
  byte *dest;
  short *tempcolourmap;
  fixed_t frac;
  fixed_t fracstep;

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn16_CVersion: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  fracstep = dc_ystep;
  frac = dc_yfrac;

  // Inner loop that does the actual texture mapping, e.g. a DDA-lile scaling.
  tempcolourmap = (short *)(dc_colourmap);

  do
  {
    // Re-map colour indices from wall texture column
    // using a lighting/special effects LUT.
    *(short *)dest = tempcolourmap[dc_source[(frac >> FRACBITS) & 127]];

    dest += vb_pitch;
    frac += fracstep;
  }
  while (--count);
}

//
// R_DrawColumn16_MIP
//
void R_DrawColumn16_MIP(void)
{
  int count;
  byte *dest;
  short *tempcolourmap;

  fixed_t yfrac;
  fixed_t ystep;
  fixed_t ymask;

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  //
  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  //
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  yfrac = dc_yfrac;
  ystep = dc_ystep;
  ymask = (dc_height - 1) << FRACBITS;

  // Inner loop that does the actual texture mapping,
  // e.g. a DDA-lile scaling. This is as fast as it gets.
  
  tempcolourmap = (short *)(dc_colourmap);
  
  do
  {
    // Re-map colour indices from wall texture column
    // using a lighting/special effects LUT.
    *(short *)dest = tempcolourmap[dc_source[(yfrac & ymask) >> FRACBITS]];

    dest += vb_pitch;
    yfrac += ystep;
  }
  while (--count);
}

//
// R_DrawTranslucentColumn16_MIP
//
void R_DrawTranslucentColumn16_MIP(void)
{
  int count;
  byte *dest;
  unsigned short *tempcolourmap;

  fixed_t fglevel, bglevel;
  unsigned long c;  // current colour

  fixed_t yfrac;
  fixed_t ystep;
  fixed_t ymask;

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  fglevel = (dc_translucency + 1023) / 1040;
  bglevel = 64 - fglevel;

  //
  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  //
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  yfrac = dc_yfrac;
  ystep = dc_ystep;
  ymask = (dc_height - 1) << FRACBITS;

  // Inner loop that does the actual texture mapping,
  // e.g. a DDA-lile scaling. This is as fast as it gets.
  
  tempcolourmap = (unsigned short *)(dc_colourmap);
  
  do
  {
    // Re-map colour indices from wall texture column
    // using a lighting/special effects LUT.
    c = tempcolourmap[dc_source[(yfrac & ymask) >> FRACBITS]];

    c = col2rgb16[fglevel][(unsigned char)c][0] +
        col2rgb16[fglevel][c >> 8][1] +
        col2rgb16[bglevel][((byte *)dest)[0]][0] +
        col2rgb16[bglevel][((byte *)dest)[1]][1];
    c |= hicolourtransmask;

    *(short *)dest = (short)(c & (c >> 16));

    dest += vb_pitch;
    yfrac += ystep;
  }
  while (--count);
}

void R_DrawColumn16_KM(void)
{
#ifdef NOSMOOTHING
  R_DrawColumn16_CVersion();
#else
  int count;
  byte *dest;
  unsigned short *tempcolourmap;
  unsigned long frac;
  unsigned long fracstep;

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn16_CVersion: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  fracstep = dc_ystep;
  frac = dc_yfrac;

  // Inner loop that does the actual texture mapping, e.g. a DDA-lile scaling.
  tempcolourmap = (unsigned short *)(dc_colourmap);
  if (fracstep > 0x20000)
  {
    do
    {
      // Re-map colour indices from wall texture column
      // using a lighting/special effects LUT.
      *(short *)dest = tempcolourmap[dc_source[(frac >> FRACBITS) & 127]];

      dest += vb_pitch;
      frac += fracstep;

    }
    while (--count);
  }
  else
  {
    unsigned long spot[4];
    unsigned long level[4];
    unsigned long c;
    int i;

    frac -= FRACUNIT / 2;
    dc_xfrac &= 0xffff;
    do
    {
      level[3] = (frac & 0xffff) * dc_xfrac;
      level[2] = (FRACUNIT - (frac & 0xffff) - 1) * dc_xfrac;
      level[1] = (frac & 0xffff) * (FRACUNIT - dc_xfrac - 1);
      level[0] = (FRACUNIT - (frac & 0xffff) - 1) * (FRACUNIT - dc_xfrac - 1);

      spot[0] = tempcolourmap[dc_source[(frac >> FRACBITS) & 127]];
      spot[1] = tempcolourmap[dc_source[((frac >> FRACBITS) + 1) & 127]];
      spot[2] = tempcolourmap[dc_source2[(frac >> FRACBITS) & 127]];
      spot[3] = tempcolourmap[dc_source2[((frac >> FRACBITS) + 1) & 127]];

      for (i = 0, c = 0; i < 4; i++)
      {
        level[i] >>= 26;
        c += col2rgb16[level[i]][spot[i] & 0xff][0] + col2rgb16[level[i]][(spot[i] >> 8) & 0xff][1];
      }
      c |= hicolourtransmask;
      c &= c >> 16;
      *(short *)dest = (short)c;

      dest += vb_pitch;
      frac += fracstep;
    }
    while (--count);
  }
#endif
}

// -ES- 1999/03/29 Added This
void R_DrawColumn16_BLF(void)
{
#ifdef NOSMOOTHING
  R_DrawColumn16_CVersion();
#else
  int count;
  byte *dest;
  unsigned short *tempcolourmap;
  fixed_t yfrac;
  fixed_t ystep;
  unsigned long col1, col2, col3, col4;
  unsigned long x1, x2, y1, y2;

  if (dc_ystep > 0x20000)
    R_DrawColumn16_CVersion();

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn16_CVersion: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  ystep = dc_ystep;
  yfrac = dc_yfrac - FRACUNIT / 2;

  tempcolourmap = (unsigned short *)(dc_colourmap);

  dc_xfrac &= 0xffff;
  x1 = (dc_xfrac >> (16 - BLFshift));
  x2 = BLFmax - x1;

  // Inner loop that does the actual texture mapping, e.g. a DDA-lile scaling.
  do
  {
    col1 = tempcolourmap[dc_source[(yfrac >> FRACBITS) & 127]];
    col2 = tempcolourmap[dc_source2[(yfrac >> FRACBITS) & 127]];
    col3 = tempcolourmap[dc_source[((yfrac >> FRACBITS) + 1) & 127]];
    col4 = tempcolourmap[dc_source2[((yfrac >> FRACBITS) + 1) & 127]];

    // Get the texture sub-coordinates
    y1 = (yfrac >> (16 - BLFshift)) & BLFmax;
    y2 = BLFmax - y1;

    col1 = BLFTab[x2][y2][0][col1 & 0xff]
        + BLFTab[x2][y2][1][col1 >> 8]
        + BLFTab[x1][y2][0][col2 & 0xff]
        + BLFTab[x1][y2][1][col2 >> 8]
        + BLFTab[x2][y1][0][col3 & 0xff]
        + BLFTab[x2][y1][1][col3 >> 8]
        + BLFTab[x1][y1][0][col4 & 0xff]
        + BLFTab[x1][y1][1][col4 >> 8];

    // Convert to usable RGB
    col1 |= 0x07E0F81F;
    col1 &= col1 >> 16;

    // Store pixel
    *(short *)dest = (short)col1;

    // Next step
    dest += vb_pitch;
    yfrac += ystep;
  }
  while (--count);

#endif
}

//
// R_DrawFuzzColumn16
//
// Creates a fuzzy image by copying pixels from above/below.
//
// Used with an all black colourmap, this could create the FUZZY effect,
// i.e. spectres and invisible players.
//
void R_DrawFuzzColumn16(void)
{
  int count;
  byte *dest;
  unsigned long c;
  unsigned short fuzzcol = ((short *)dc_colourmap)[0];
  int fuzzpos = framecount;

  // Adjust borders. Low...
  if (dc_yl < 6)
    dc_yl = 6;

  count = dc_yh - dc_yl;

  // Zero length.
  if (count < 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawFuzzColumn16: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  dest = ylookup[dc_yl] + columnofs[dc_x];

  do
  {
    fuzzpos = (fuzzpos + 1) & 7;

    // Lookup framebuffer, and retrieve a pixel that is zero to seven
    // pixels above the current one.

    c = col2rgb16[64-FUZZ_SHADE][dest[-fuzzpos*vb_pitch+0]][0] +
        col2rgb16[64-FUZZ_SHADE][dest[-fuzzpos*vb_pitch+1]][1] +
        col2rgb16[FUZZ_SHADE][fuzzcol & 0xFF][0] +
        col2rgb16[FUZZ_SHADE][fuzzcol >> 8][1];
    c |= hicolourtransmask;
    *(short *)dest = (short)(c & (c >> 16));

    dest += vb_pitch;
  }
  while (count--);
}

// -ES- 1998/11/08 New tranlsucency. It's slow, but it works.
// -KM- 1998/11/25 Modified for use with new trans system. (dc_translucency)
// -ES- 1998/11/29 Improved the translucency algorithm.
void R_DrawTranslucentColumn16()
{
  int count;
  byte *dest;
  fixed_t frac;
  fixed_t fracstep;

  fixed_t fglevel, bglevel;
  unsigned long c;  // current colour

#if FADER
  if (dc_translucency == 0x8000)
    fglevel = abs(256 - (leveltime & 0x1ff)) << 8;
#endif

/*  fglevel = fglevel&~0x3ff;
   bglevel = FRACUNIT-fglevel;
   fglevel >>= 10;
   bglevel >>= 10;
 */
  fglevel = (dc_translucency + 1023) / 1040;
  bglevel = 64 - fglevel;

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  //
  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  //
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  fracstep = dc_ystep;
  frac = dc_yfrac;

  //
  // Inner loop that does the actual texture mapping,
  // e.g. a DDA-lile scaling. This is as fast as it gets.
  //
  do
  {
    c = ((unsigned short *)dc_colourmap)[dc_source[(frac >> FRACBITS) & 127]];
    c = col2rgb16[fglevel][(unsigned char)c][0] +
        col2rgb16[fglevel][c >> 8][1] +
        col2rgb16[bglevel][dest[0]][0] +
        col2rgb16[bglevel][dest[1]][1];
    c |= hicolourtransmask;
    *(short *)dest = (short)(c & (c >> 16));
    dest += vb_pitch;
    frac += fracstep;
  }
  while (--count);
}

//
// R_DrawTranslatedColumn16
//
// Uses the translation tables to remap one set of palette colours to
// another. One prime example is the player greens to the other player
// colours.
//
// Could be used with different translation tables, e.g. the lighter coloured
// version of the BaronOfHell, the HellKnight, uses identical sprites,
// kinda brightened up.
//
// 16-Bit Version.
//
// -ES- 1998/11/29 Improved the translucency algorithm.

void R_DrawTranslatedColumn16(void)
{
  int count;
  byte *dest;
  fixed_t frac;
  fixed_t fracstep;
  short *tempcolourmap;

  count = dc_yh - dc_yl;

  if (count < 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawTranslatedColumn16: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Looks familiar.
  fracstep = dc_ystep;
  frac = dc_yfrac;

  // Here we do an additional index re-mapping.
  tempcolourmap = (short *)dc_colourmap;

  do
  {
    *(short *)dest = tempcolourmap[dc_translation[dc_source[frac >> FRACBITS]]];
    dest += vb_pitch;

    frac += fracstep;
  }
  while (count--);
}

void R_DrawTranslucentTranslatedColumn16()
{
  int count;
  byte *dest;
  fixed_t frac;
  fixed_t fracstep;

  fixed_t fglevel, bglevel;
  unsigned long c;  // current colour

  fglevel = dc_translucency;
#if FADER
  if (dc_translucency == 0x8000)
    fglevel = abs(256 - (leveltime & 0x1ff)) << 8;
#endif
  fglevel = fglevel & ~0x3ff;
  bglevel = FRACUNIT - fglevel;
  fglevel >>= 10;
  bglevel >>= 10;

  count = dc_yh - dc_yl + 1;

  // Zero length, column does not exceed a pixel.
  if (count <= 0)
    return;

#ifdef DEVELOPERS
  if ((unsigned int)dc_x >= (unsigned int)SCREENWIDTH || dc_yl < 0 || dc_yh >= SCREENHEIGHT)
    I_Error("R_DrawColumn: %i to %i at %i", dc_yl, dc_yh, dc_x);
#endif

  //
  // Framebuffer destination address.
  // Use ylookup LUT to avoid multiply with ScreenWidth.
  //
  dest = ylookup[dc_yl] + columnofs[dc_x];

  // Determine scaling, which is the only mapping to be done.
  fracstep = dc_ystep;
  frac = dc_yfrac;

  //
  // Inner loop that does the actual texture mapping,
  // e.g. a DDA-lile scaling. This is as fast as it gets.
  //
  do
  {
    c = ((unsigned short *)dc_colourmap)[dc_translation[dc_source[(frac >> FRACBITS) & 127]]];
    c = col2rgb16[fglevel][(unsigned char)c][0] +
        col2rgb16[fglevel][c >> 8][1] +
        col2rgb16[bglevel][dest[0]][0] +
        col2rgb16[bglevel][dest[1]][1];
    c |= hicolourtransmask;
    *(short *)dest = (short)(c & (c >> 16));
    dest += vb_pitch;
    frac += fracstep;
  }
  while (--count);
}

//
// R_DrawSpan16_CVersion
//
// With DOOM style restrictions on view orientation,
// the floors and ceilings consist of horizontal slices
// or spans with constant z depth.
//
// However, rotation around the world z axis is possible,
// thus this mapping, while simpler and faster than
// perspective correct texture mapping, has to traverse
// the texture at an angle in all but a few cases.
//
// In consequence, flats are not stored by column (like walls),
// and the inner loop has to step in texture space u and v.
//
void R_DrawSpan16_CVersion(void)
{
  fixed_t xfrac;
  fixed_t yfrac;
  short *dest;
  int count;
  int spot;
  short *tempcolourmap;

#ifdef DEVELOPERS
  if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned int)ds_y > (unsigned int)SCREENHEIGHT)
    I_Error("R_DrawSpan16_CVersion: %i to %i at %i", ds_x1, ds_x2, ds_y);
#endif

  xfrac = ds_xfrac;
  yfrac = ds_yfrac;

  dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  tempcolourmap = (short *)ds_colourmap;

  do
  {
    // Current texture index in u,v.
    spot = ((yfrac >> (FRACBITS - 6)) & (63 * 64)) + ((xfrac >> FRACBITS) & 63);

    // Lookup pixel from flat texture tile, re-index using light/colourmap.
    *dest++ = tempcolourmap[ds_source[spot]];

    // Next step in u,v.
    xfrac += ds_xstep;
    yfrac += ds_ystep;
  }
  while (count--);
}

void R_DrawSpan16_MIP(void)
{
  fixed_t xfrac, yfrac;
  fixed_t xmask, ymask;
  fixed_t xstep, ystep;

  short *dest;
  int count;
  int spot;
  short *tempcolourmap;

#ifdef DEVELOPERS
  if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned int)ds_y > (unsigned int)SCREENHEIGHT)
    I_Error("R_DrawSpan16_MIP: %i to %i at %i", ds_x1, ds_x2, ds_y);
#endif

  xfrac = ds_xfrac * ds_height;
  xstep = ds_xstep * ds_height;
  xmask = ((ds_width-1) << FRACBITS) * ds_height;

  yfrac = ds_yfrac;
  ystep = ds_ystep;
  ymask = (ds_height-1) << FRACBITS;

  dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  tempcolourmap = (short *)ds_colourmap;

  do
  {
    // Current texture index in u,v.  Maximum texture size is 128x128.
    spot = ((yfrac & ymask) + (xfrac & xmask)) >> FRACBITS;

    // Lookup pixel from flat texture tile, re-index using light/colourmap.
    *dest++ = tempcolourmap[ds_source[spot]];

    // Next step in u,v.
    xfrac += xstep;
    yfrac += ystep;
  }
  while (count--);
}

void R_DrawTranslucentSpan16_MIP(void)
{
  fixed_t xfrac, yfrac;
  fixed_t xmask, ymask;
  fixed_t xstep, ystep;

  short *dest;
  int count;
  int spot;
  unsigned short *tempcolourmap;

  fixed_t fglevel, bglevel;
  unsigned long c;  // current colour

#ifdef DEVELOPERS
  if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned int)ds_y > (unsigned int)SCREENHEIGHT)
    I_Error("R_DrawSpan16_MIP: %i to %i at %i", ds_x1, ds_x2, ds_y);
#endif

  fglevel = (dc_translucency + 1023) / 1040;
  bglevel = 64 - fglevel;

  xfrac = ds_xfrac;
  yfrac = ds_yfrac * ds_width;

  xmask = (ds_width-1) << FRACBITS;
  ymask = ((ds_height-1) * ds_width) << FRACBITS;

  xstep = ds_xstep;
  ystep = ds_ystep * ds_width;

  dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  tempcolourmap = (unsigned short *)ds_colourmap;

  do
  {
    // Current texture index in u,v.  Maximum texture size is 128x128.
    spot = ((yfrac & ymask) + (xfrac & xmask)) >> FRACBITS;

    c = tempcolourmap[ds_source[spot]];

    c = col2rgb16[fglevel][(unsigned char)c][0] +
        col2rgb16[fglevel][c >> 8][1] +
        col2rgb16[bglevel][((byte *)dest)[0]][0] +
        col2rgb16[bglevel][((byte *)dest)[1]][1];
    c |= hicolourtransmask;

    *dest++ = (short)(c & (c >> 16));

    // Next step in u,v.
    xfrac += xstep;
    yfrac += ystep;
  }
  while (count--);
}

void R_DrawHoleySpan16_MIP(void)
{
  fixed_t xfrac, yfrac;
  fixed_t xmask, ymask;
  fixed_t xstep, ystep;

  short *dest;
  int count;
  int spot;
  short *tempcolourmap;

#ifdef DEVELOPERS
  if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned int)ds_y > (unsigned int)SCREENHEIGHT)
    I_Error("R_DrawHoleySpan16_MIP: %i to %i at %i", ds_x1, ds_x2, ds_y);
#endif

  xfrac = ds_xfrac;
  yfrac = ds_yfrac * ds_width;

  xmask = (ds_width-1) << FRACBITS;
  ymask = ((ds_height-1) * ds_width) << FRACBITS;

  xstep = ds_xstep;
  ystep = ds_ystep * ds_width;

  dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  tempcolourmap = (short *)ds_colourmap;

  do
  {
    // Current texture index in u,v.  Maximum texture size is 128x128.
    spot = ((yfrac & ymask) + (xfrac & xmask)) >> FRACBITS;

    // Lookup pixel from flat texture tile, re-index using light/colourmap.
    if (ds_source[spot] != TRANS_PIXEL)
      *dest = tempcolourmap[ds_source[spot]];

    // Next step in u,v.
    dest++;
    xfrac += xstep;
    yfrac += ystep;
  }
  while (count--);
}

void R_DrawTranslucentSpan16(void)
{
  fixed_t xfrac;
  fixed_t yfrac;
  short *dest;
  int count;
  int spot;
  unsigned short *tempcolourmap;

  fixed_t fglevel, bglevel;
  unsigned long c;  // current colour

#ifdef DEVELOPERS
  if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned int)ds_y > (unsigned int)SCREENHEIGHT)
    I_Error("R_DrawSpan16_CVersion: %i to %i at %i", ds_x1, ds_x2, ds_y);
#endif

  fglevel = (dc_translucency + 1023) / 1040;
  bglevel = 64 - fglevel;

  xfrac = ds_xfrac;
  yfrac = ds_yfrac;

  dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  tempcolourmap = (unsigned short *)ds_colourmap;

  do
  {
    // Current texture index in u,v.
    spot = ((yfrac >> (FRACBITS - 6)) & (63 * 64)) + ((xfrac >> FRACBITS) & 63);

    c = tempcolourmap[ds_source[spot]];

    c = col2rgb16[fglevel][(unsigned char)c][0] +
        col2rgb16[fglevel][c >> 8][1] +
        col2rgb16[bglevel][((byte *)dest)[0]][0] +
        col2rgb16[bglevel][((byte *)dest)[1]][1];
    c |= hicolourtransmask;

    *dest++ = (short)(c & (c >> 16));

    // Next step in u,v.
    xfrac += ds_xstep;
    yfrac += ds_ystep;

  }
  while (count--);
}

// -KM- 1998/11/25 Finished this.  It is *REAL* slow. Make sure you have the hardware.
void R_DrawSpan16_KM(void)
{
  unsigned long xfrac;
  unsigned long yfrac;
  short *dest;
  int count;
  int spot;
  short *tempcolourmap;

#ifdef DEVELOPERS
  if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned int)ds_y > (unsigned int)SCREENHEIGHT)
    I_Error("R_DrawSpan16_CVersion: %i to %i at %i", ds_x1, ds_x2, ds_y);
#endif

  xfrac = ds_xfrac;
  yfrac = ds_yfrac;

  dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  if (((ds_xstep & 0xffff) > FRACUNIT) && ((ds_ystep & 0xffff) > FRACUNIT))
  {
    tempcolourmap = (short *)ds_colourmap;
    do
    {
      // Current texture index in u,v.
      spot = ((yfrac >> (FRACBITS - 6)) & (63 * 64)) + ((xfrac >> FRACBITS) & 63);

      // Lookup pixel from flat texture tile, re-index using light/colourmap.
      *dest++ = tempcolourmap[ds_source[spot]];

      // Next step in u,v.
      xfrac += ds_xstep;
      yfrac += ds_ystep;

    }
    while (count--);
  }
  else
  {
    unsigned long spot[4];
    unsigned long level[4];
    unsigned long c;  // current colour

    int i;

    xfrac -= FRACUNIT / 2;
    yfrac -= FRACUNIT / 2;
    do
    {
      spot[0] = ((yfrac >> 16) & 63) * 64 + ((xfrac >> 16) & 63);
      spot[1] = ((yfrac >> 16) & 63) * 64 + (((xfrac >> 16) + 1) & 63);
      spot[2] = (((yfrac >> 16) + 1) & 63) * 64 + ((xfrac >> 16) & 63);
      spot[3] = (((yfrac >> 16) + 1) & 63) * 64 + (((xfrac >> 16) + 1) & 63);

      level[3] = (yfrac & 0xffff) * (xfrac & 0xffff);
      level[1] = (FRACUNIT - (yfrac & 0xffff) - 1) * (xfrac & 0xffff);
      level[2] = (yfrac & 0xffff) * (FRACUNIT - (xfrac & 0xffff) - 1);
      level[0] = (FRACUNIT - (yfrac & 0xffff)) * (FRACUNIT - (xfrac & 0xffff) - 1);

      for (i = 0, c = 0; i < 4; i++)
      {
        level[i] >>= 26;
        spot[i] = (unsigned long)(((unsigned short *)ds_colourmap)[ds_source[spot[i]]]);
        c += col2rgb16[level[i]][spot[i] & 0xff][0] + col2rgb16[level[i]][(spot[i] >> 8) & 0xff][1];
      }
      c |= hicolourtransmask;
      c &= c >> 16;

      *dest++ = (short)c;

      // Next step in u,v.
      xfrac += ds_xstep;
      yfrac += ds_ystep;
    }
    while (count--);
  }
}

//------------------------------------------------------------
// Bilinear Filtering by Vitek Kavan vit.kavan@usa.net
// -ES- 1999/01/10 Improved the algorithm

void R_DrawSpan16_BLF(void)
{
  int count;
  unsigned long col1, col2, col3, col4;
  unsigned long x1, x2, y1, y2;
  short *dest = (short *)(ylookup[ds_y] + columnofs[ds_x1]);
  unsigned short *tempcolourmap = (unsigned short *)ds_colourmap;
  unsigned long xfrac = ds_xfrac - FRACUNIT / 2;
  unsigned long yfrac = ds_yfrac - FRACUNIT / 2;

  // We do not check for zero spans here?
  count = ds_x2 - ds_x1;

  do
  {
    // Get the texture coordinates
    y1 = ((yfrac >> 10) & (63 * 64));
    x1 = ((xfrac >> 16) & 63);
    y2 = (y1 + 64) & (63 * 64);
    x2 = (x1 + 1) & 63;

    // Get the colours of the four corners
    col1 = tempcolourmap[ds_source[y1 + x1]];
    col2 = tempcolourmap[ds_source[y1 + x2]];
    col3 = tempcolourmap[ds_source[y2 + x1]];
    col4 = tempcolourmap[ds_source[y2 + x2]];

    // Get the texture sub-coordinates
    x1 = (xfrac >> (16 - BLFshift)) & BLFmax;
    y1 = (yfrac >> (16 - BLFshift)) & BLFmax;
    x2 = BLFmax - x1;
    y2 = BLFmax - y1;

    // Get the fixed-point RGB value
    col1 = BLFTab[x2][y2][0][col1 & 0xff]
        + BLFTab[x2][y2][1][col1 >> 8]
        + BLFTab[x1][y2][0][col2 & 0xff]
        + BLFTab[x1][y2][1][col2 >> 8]
        + BLFTab[x2][y1][0][col3 & 0xff]
        + BLFTab[x2][y1][1][col3 >> 8]
        + BLFTab[x1][y1][0][col4 & 0xff]
        + BLFTab[x1][y1][1][col4 >> 8];

    // Convert to usable RGB
    col1 |= 0x07E0F81F;
    col1 &= col1 >> 16;

    // Store pixel
    *dest++ = (short)col1;

    // Next step
    xfrac += ds_xstep;
    yfrac += ds_ystep;
  }
  while (count--);
}

//
// R_InitFunctions_Draw2
//
// Adds all hicolour rendering functions
void R_InitFunctions_Draw2(void)
{
  CON_InitFunctionList(&drawcol16_funcs, "col16", R_DrawColumn16_CVersion, ColSmoothingOff);
  CON_InitFunctionList(&drawspan16_funcs, "span16", R_DrawSpan16_CVersion, SpanSmoothingOff);

  CON_AddFunctionToList(&drawcol16_funcs, "BLF", "Bilinear Filtering", R_DrawColumn16_BLF, BLF_Init16);
  CON_AddFunctionToList(&drawcol16_funcs, "KM", "Kester's Smoothing", R_DrawColumn16_KM, ColSmoothingOn);

  CON_AddFunctionToList(&drawspan16_funcs, "BLF", "Bilinear Filtering", R_DrawSpan16_BLF, BLF_Init16);
  CON_AddFunctionToList(&drawspan16_funcs, "KM", "Kester's Smoothing", R_DrawSpan16_KM, SpanSmoothingOn);
}

//
// R_FillBackScreen16
//
// Fills the back screen with a pattern for variable screen sizes
// Also draws a beveled edge.
//
void R_FillBackScreen16(void)
{
#if 0  // OLD CODE
  const byte *src;
  const byte *tempsrc;
  short *dest;
  int x;
  int y;
  const patch_t *patch;

  if ((viewwindowwidth == SCREENWIDTH) && (viewwindowheight == SCREENHEIGHT))
    return;

  src = W_CacheLumpName(currentmap->surround);

  for (y = 0; y < SCREENHEIGHT; y++)
  {
    tempsrc = src + ((y & 63) << 6);
    dest = (short *)(back_scr->data + back_scr->pitch * y);

    for (x = 0; x < back_scr->width; x++)
    {
      *dest = (short)pixel_values[tempsrc[x & 63]];
      dest++;
    }
  }

  W_DoneWithLump(src);

  if (SCREENWIDTH == viewwindowwidth)
    return;

  if ((viewwindowy - 8) >= 0)
  {
    patch = W_CacheLumpName("brdr_t");
    for (x = 0; x < viewwindowwidth; x += 8)
      V_DrawPatch(back_scr, viewwindowx + x, viewwindowy - 8, patch);
    W_DoneWithLump(patch);
  }

  if ((viewwindowy + viewwindowheight + 8) < (SCREENHEIGHT - SBARHEIGHT))
  {
    patch = W_CacheLumpName("brdr_b");
    for (x = 0; x < viewwindowwidth; x += 8)
      V_DrawPatch(back_scr, viewwindowx + x, viewwindowy + viewwindowheight, patch);
    W_DoneWithLump(patch);
  }

  if ((viewwindowx - 8) >= 0)
  {
    patch = W_CacheLumpName("brdr_l");
    for (y = 0; y < viewwindowheight; y += 8)
      V_DrawPatch(back_scr, viewwindowx - 8, viewwindowy + y, patch);
    W_DoneWithLump(patch);
  }

  if ((viewwindowx + viewwindowwidth + 8) < SCREENWIDTH)
  {
    patch = W_CacheLumpName("brdr_r");
    for (y = 0; y < viewwindowheight; y += 8)
      V_DrawPatch(back_scr, viewwindowx + viewwindowwidth, viewwindowy + y, patch);
    W_DoneWithLump(patch);
  }

  // Draw beveled edge.
  if (((viewwindowx - 8) >= 0) && ((viewwindowy - 8) >= 0))
  {
    V_DrawPatchName(back_scr, viewwindowx - 8, viewwindowy - 8, "brdr_tl");
  }

  if (((viewwindowx + viewwindowwidth + 8) < SCREENWIDTH) && ((viewwindowy - 8) >= 0))
  {
    V_DrawPatchName(back_scr, viewwindowx + viewwindowwidth, viewwindowy - 8, "brdr_tr");
  }

  if (((viewwindowx - 8) >= 0) && ((viewwindowy + viewwindowheight + 8) < (SCREENHEIGHT - SBARHEIGHT)))
  {
    V_DrawPatchName(back_scr, viewwindowx - 8, viewwindowy + viewwindowheight, "brdr_bl");
  }

  if (((viewwindowx + viewwindowwidth + 8) < SCREENWIDTH) &&
      ((viewwindowy + viewwindowheight + 8) < (SCREENHEIGHT - SBARHEIGHT)))
  {
    V_DrawPatchName(back_scr, viewwindowx + viewwindowwidth, viewwindowy + viewwindowheight, "brdr_br");
  }
#endif
}

//
// Copy a screen buffer.
//
// ofs shows the number of bytes to skip before drawing, count shows the
// number of pixels to copy.
void R_VideoErase16(unsigned ofs, int count)
{
#if 0  // OLD CODE
  Z_MoveData(main_scr->data + ofs, back_scr->data + ofs, byte, sizeof(short) * count);
#endif
}

//
// R_DrawViewBorder
// Draws the border around the view
//  for different size windows?
//
// -ES- 1999/07/18 Fixed stuff regarding SCREENPITCH
void R_DrawViewBorder16(void)
{
#if 0  // OLD CODE
  int side;
  int ofs;
  int i;

  // if screenwidth>320, draw stuff around status bar, even if
  // viewwindowwidth==SCREENWIDTH
  if ((SCREENWIDTH > 320) && (SCREENHEIGHT != viewwindowheight))
  {
    ofs = (SCREENHEIGHT - SBARHEIGHT) * SCREENPITCH;
    side = (SCREENWIDTH - 320) / 2;

    for (i = 0; i < SBARHEIGHT; i++)
    {
      R_VideoErase16(ofs, side);
      R_VideoErase16(ofs + 2 * (SCREENWIDTH - side), side);
      ofs += SCREENPITCH;
    }
  }

  // don't draw stuff over and under viewwindow if there is no space to draw it
  if ((viewwindowy == 0) && ((viewwindowy + viewwindowheight) >= (SCREENHEIGHT - SBARHEIGHT)))
    return;

  // copy top
  for (i = 0; i < viewwindowy; i++)
    R_VideoErase16(i * SCREENPITCH, SCREENWIDTH);

  // copy left and right side
  for (i = viewwindowy; i < viewwindowy + viewwindowheight; i++)
  {
    // left
    R_VideoErase16(i * SCREENPITCH, viewwindowx);
    // right
    R_VideoErase16(i * SCREENPITCH + 2 * (viewwindowx + viewwindowwidth),
        SCREENWIDTH - (viewwindowx + viewwindowwidth));
  }

  // copy bottom
  for (i = viewwindowy + viewwindowheight; i < SCREENHEIGHT - SBARHEIGHT; i++)
    R_VideoErase16(i * SCREENPITCH, SCREENWIDTH);

/*
   //    if (viewwindowwidth == SCREENWIDTH)
   if (viewwindowheight>=(SCREENHEIGHT-SBARHEIGHT))
   return;

   top = ((SCREENHEIGHT-SBARHEIGHT)-viewwindowheight)/2; 
   side = (SCREENWIDTH-viewwindowwidth)/2; 

   // copy top and one line of left side 
   R_VideoErase16 (0, top*SCREENWIDTH+side);

   // copy one line of right side and bottom 
   ofs = (viewwindowheight+top)*SCREENWIDTH-side; 
   R_VideoErase16 (ofs, top*SCREENWIDTH+side);

   // copy sides using wraparound 
   ofs = top*SCREENWIDTH + SCREENWIDTH-side; 
   side <<= 1;

   for (i=1 ; i<viewwindowheight ; i++) 
   { 
   R_VideoErase16 (ofs, side);
   ofs += SCREENWIDTH; 
   }
 */
  V_MarkRect(0, 0, SCREENWIDTH, SCREENHEIGHT - SBARHEIGHT);
#endif
}

#endif // NOHICOLOUR
