khanat-code-old/code/nel/src/3d/texture_far.cpp
2010-05-06 02:08:41 +02:00

2105 lines
54 KiB
C++

// NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "std3d.h"
#include "nel/3d/texture_far.h"
#include "nel/3d/tile_far_bank.h"
#include "nel/3d/patch.h"
#include "nel/3d/tile_color.h"
#include "nel/3d/zone.h"
#include "nel/3d/landscape.h"
#include "nel/misc/system_info.h"
using namespace NLMISC;
using namespace NL3D;
using namespace std;
namespace NL3D {
CRGBA CTextureFar::_LightmapExpanded[NL_NUM_PIXELS_ON_FAR_TILE_EDGE*NL_MAX_TILES_BY_PATCH_EDGE*NL_NUM_PIXELS_ON_FAR_TILE_EDGE*NL_MAX_TILES_BY_PATCH_EDGE];
uint8 CTextureFar::_LumelExpanded[(NL_MAX_TILES_BY_PATCH_EDGE*NL_LUMEL_BY_TILE+1)*(NL_MAX_TILES_BY_PATCH_EDGE*NL_LUMEL_BY_TILE+1)];
CRGBA CTextureFar::_TileTLIColors[(NL_MAX_TILES_BY_PATCH_EDGE+1)*(NL_MAX_TILES_BY_PATCH_EDGE+1)];
// ***************************************************************************
CTextureFar::CTextureFar()
{
/* NB: define Values work only if NL_MAX_TILES_BY_PATCH_EDGE is 16.
Else must change NL_MAX_FAR_EDGE and NL_NUM_RECTANGLE_RATIO
*/
nlctassert(NL_MAX_TILES_BY_PATCH_EDGE==16);
// This texture is releasable. It doesn't stays in standard memory after been uploaded into video memory.
setReleasable (true);
// Init upload format 16 bits
setUploadFormat(RGB565);
// Set filter mode. No mipmap!
setFilterMode (Linear, LinearMipMapOff);
// Wrap
setWrapS (Clamp);
setWrapT (Clamp);
// init update Lighting
_ULPrec= this;
_ULNext= this;
// Start With All Patch of Max Far (64x64) Frees!
uint freeListId= getFreeListId(NL_MAX_FAR_PATCH_EDGE, NL_MAX_FAR_PATCH_EDGE);
for(uint i=0;i<NL_NUM_FAR_BIGGEST_PATCH_PER_EDGE;i++)
{
for(uint j=0;j<NL_NUM_FAR_BIGGEST_PATCH_PER_EDGE;j++)
{
CVector2s pos;
pos.x= i*NL_MAX_FAR_PATCH_EDGE;
pos.y= j*NL_MAX_FAR_PATCH_EDGE;
// add this place to the free list.
_FreeSpaces[freeListId].push_back(pos);
}
}
// reset
_ItULPatch= _PatchToPosMap.end();
}
// ***************************************************************************
CTextureFar::~CTextureFar()
{
// verify the textureFar is correctly unlinked from any ciruclar list.
nlassert(_ULPrec==this && _ULNext==this);
}
// ***************************************************************************
void CTextureFar::linkBeforeUL(CTextureFar *textNext)
{
nlassert(textNext);
// first, unlink others from me. NB: works even if _ULPrec==_ULNext==this.
_ULNext->_ULPrec= _ULPrec;
_ULPrec->_ULNext= _ULNext;
// link to igNext.
_ULNext= textNext;
_ULPrec= textNext->_ULPrec;
// link others to me.
_ULNext->_ULPrec= this;
_ULPrec->_ULNext= this;
}
// ***************************************************************************
void CTextureFar::unlinkUL()
{
// first, unlink others from me. NB: works even if _ULPrec==_ULNext==this.
_ULNext->_ULPrec= _ULPrec;
_ULPrec->_ULNext= _ULNext;
// reset
_ULPrec= this;
_ULNext= this;
}
// ***************************************************************************
uint CTextureFar::getFreeListId(uint width, uint height)
{
nlassert(width>=height);
nlassert(isPowerOf2(width));
nlassert(isPowerOf2(height));
nlassert(width<=NL_MAX_FAR_PATCH_EDGE);
// compute the level index
uint sizeIndex= getPowerOf2(NL_MAX_FAR_PATCH_EDGE / width);
nlassert(sizeIndex < NL_NUM_FAR_PATCH_EDGE_LEVEL);
// Compute the aspect ratio index.
uint aspectRatioIndex= getPowerOf2(width/height);
nlassert(aspectRatioIndex < NL_NUM_FAR_RECTANGLE_RATIO );
return sizeIndex*NL_NUM_FAR_RECTANGLE_RATIO + aspectRatioIndex;
}
// ***************************************************************************
bool CTextureFar::getUpperSize(uint &width, uint &height)
{
nlassert(width>=height);
nlassert(isPowerOf2(width));
nlassert(isPowerOf2(height));
// if height is smaller than widht, then reduce the ratio
if(width>height)
{
height*= 2;
return true;
}
else
{
// else raise up to the next square level, if possible
if(width<NL_MAX_FAR_PATCH_EDGE)
{
width*= 2;
height*= 2;
return true;
}
else
return false;
}
}
// ***************************************************************************
sint CTextureFar::tryAllocatePatch (CPatch *pPatch, uint farIndex)
{
// get the size of the subtexture to allocate
uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
// make width the biggest
if(width<height)
std::swap(width, height);
// get where to find a subtexture
uint freeListId= getFreeListId(width, height);
// if some place, ok!
if(!_FreeSpaces[freeListId].empty())
return 0;
else
{
// try to get the next size
while( getUpperSize(width, height) )
{
freeListId= getFreeListId(width, height);
// if some subtexture free
if(!_FreeSpaces[freeListId].empty())
{
// Ok! return the size of this texture we must split
return width*height;
}
}
// fail => no more space => -1
return -1;
}
}
// ***************************************************************************
void CTextureFar::recursSplitNext(uint wson, uint hson)
{
// get the upper subTexture
uint wup= wson, hup= hson;
nlverify( getUpperSize(wup, hup) );
// get the list id.
uint fatherListId= getFreeListId(wup, hup);
// if must split bigger patch...
if(_FreeSpaces[fatherListId].empty())
{
// recurs, try to get a bigger subtexture and split it.
recursSplitNext(wup, hup);
}
// OK, now we should have a free entry.
nlassert( !_FreeSpaces[fatherListId].empty() );
// remove from free list, because it is split now!
CVector2s fatherPos= _FreeSpaces[fatherListId].front();
_FreeSpaces[fatherListId].pop_front();
// Create New free rectangles for sons
uint sonListId= getFreeListId(wson, hson);
CVector2s sonPos;
// if my son is a rectangle son
if(wson>hson)
{
// Then Add 2 free Spaces!
sonPos.x= fatherPos.x;
// 1st.
sonPos.y= fatherPos.y;
_FreeSpaces[sonListId].push_back(sonPos);
// 2nd.
sonPos.y= fatherPos.y+hson;
_FreeSpaces[sonListId].push_back(sonPos);
}
else
{
// Then Add 4 free Spaces!
// 1st.
sonPos.x= fatherPos.x;
sonPos.y= fatherPos.y;
_FreeSpaces[sonListId].push_back(sonPos);
// 2nd.
sonPos.x= fatherPos.x+wson;
sonPos.y= fatherPos.y;
_FreeSpaces[sonListId].push_back(sonPos);
// 3rd.
sonPos.x= fatherPos.x;
sonPos.y= fatherPos.y+hson;
_FreeSpaces[sonListId].push_back(sonPos);
// 4th.
sonPos.x= fatherPos.x+wson;
sonPos.y= fatherPos.y+hson;
_FreeSpaces[sonListId].push_back(sonPos);
}
}
// ***************************************************************************
void CTextureFar::allocatePatch (CPatch *pPatch, uint farIndex, float& farUScale, float& farVScale, float& farUBias, float& farVBias, bool& bRot)
{
// get the size of the subtexture to allocate
uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
// make width the biggest
if(width<height)
std::swap(width, height);
// get where to find a subtexture
uint freeListId= getFreeListId(width, height);
// if free list is empty, must split bigger patch...
if(_FreeSpaces[freeListId].empty())
{
// try to get a bigger subtexture and split it.
recursSplitNext(width, height);
}
// now the list should have som free space.
nlassert( !_FreeSpaces[freeListId].empty() );
CVector2s pos= _FreeSpaces[freeListId].front();
// Allocate. Add this entry to the maps
CPatchIdent pid;
pid.Patch= pPatch;
pid.FarIndex= farIndex;
// verify not already here.
nlassert( _PatchToPosMap.find(pid) == _PatchToPosMap.end() );
_PatchToPosMap[pid]= pos;
_PosToPatchMap[pos]= pid;
// remove from free list.
_FreeSpaces[freeListId].pop_front();
// Invalidate the rectangle
CRect rect (pos.x, pos.y, width, height);
ITexture::touchRect (rect);
// ** Return some values
// Rotation flag
bRot = ( pPatch->getOrderS() < pPatch->getOrderT() );
// Scale is the same for all
farUScale=(float)(width-1)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
farVScale=(float)(height-1)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
// UBias is the same for all
farUBias=((float)pos.x+0.5f)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
// UBias is the same for all
farVBias=((float)pos.y+0.5f)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
}
// ***************************************************************************
// Remove a patch in the CTexture Patch
void CTextureFar::removePatch (CPatch *pPatch, uint farIndex)
{
// must be found
CPatchIdent pid;
pid.Patch= pPatch;
pid.FarIndex= farIndex;
TPatchToPosMap::iterator it= _PatchToPosMap.find(pid);
nlassert( it != _PatchToPosMap.end() );
// get the pos where this patch texture is stored
CVector2s pos= it->second;
// If I erase the patch wihch must next UL, then update UL
if( it == _ItULPatch )
_ItULPatch++;
// erase from the 1st map
_PatchToPosMap.erase(it);
// erase from the second map
_PosToPatchMap.erase(pos);
// Append to the free list.
uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
if(width<height)
std::swap(width, height);
uint freeListId= getFreeListId(width, height);
_FreeSpaces[freeListId].push_back(pos);
}
// ***************************************************************************
uint CTextureFar::touchPatchULAndNext()
{
// if there is still a patch here
if( _ItULPatch!=_PatchToPosMap.end() )
{
// Position of the invalide rectangle
int x = _ItULPatch->second.x;
int y = _ItULPatch->second.y;
uint farIndex= _ItULPatch->first.FarIndex;
CPatch *pPatch= _ItULPatch->first.Patch;
// recompute the correct size.
uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
if(width<height)
std::swap(width, height);
// Invalidate the associated rectangle
CRect rect (x, y, width, height);
ITexture::touchRect (rect);
// Go next.
_ItULPatch++;
// return number of pixels touched
return width * height;
}
else
{
// no touch
return 0;
}
}
// ***************************************************************************
void CTextureFar::startPatchULTouch()
{
_ItULPatch= _PatchToPosMap.begin();
}
// ***************************************************************************
bool CTextureFar::endPatchULTouch() const
{
return _ItULPatch == _PatchToPosMap.end();
}
// ***************************************************************************
// Generate the texture. See ITexture::doGenerate().
void CTextureFar::doGenerate (bool async)
{
// Resize. But don't need to fill with 0!!
CBitmap::resize (NL_FAR_TEXTURE_EDGE_SIZE, NL_FAR_TEXTURE_EDGE_SIZE, RGBA, false);
// Rectangle invalidate ?
if (_ListInvalidRect.begin()!=_ListInvalidRect.end())
{
// Yes, rebuild only those rectangles.
// For each rectangle to compute
std::list<NLMISC::CRect>::iterator ite=_ListInvalidRect.begin();
while (ite!=_ListInvalidRect.end())
{
// Get the PatchIdent.
CVector2s pos((uint16)ite->left(), (uint16)ite->top());
TPosToPatchMap::iterator itPosToPid= _PosToPatchMap.find( pos );
// If the patch is still here...
if( itPosToPid!=_PosToPatchMap.end() )
{
// ReBuild the rectangle.
rebuildPatch (pos, itPosToPid->second);
}
// Next rectangle
ite++;
}
}
else
{
// Parse all existing Patchs.
TPosToPatchMap::iterator itPosToPid= _PosToPatchMap.begin();
while( itPosToPid!= _PosToPatchMap.end() )
{
// ReBuild the rectangle.
rebuildPatch (itPosToPid->first, itPosToPid->second);
itPosToPid++;
}
}
}
// ***************************************************************************
// Rebuild the rectangle passed with coordinate passed in parameter
void CTextureFar::rebuildPatch (const CVector2s texturePos, const CPatchIdent &pid)
{
uint x= texturePos.x;
uint y= texturePos.y;
// Patch pointer
CPatch* patch= pid.Patch;
// Check it exists
nlassert (patch);
// get the order
uint nS=patch->getOrderS();
uint nT=patch->getOrderT();
// get the size of the texture to compute
uint subTextWidth=(nS*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(pid.FarIndex-1);
// Check it is a 16 bits texture
nlassert (getPixelFormat()==RGBA);
// Check pixels exist
nlassert (getPixels().size()!=0);
// Base offset of the first pixel of the patch's texture
uint nBaseOffset;
// Delta to add to the destination offset when walk for a pixel to the right in the source tile
sint dstDeltaX;
// Delta to add to the destination offset when walk for a pixel to the bottom in the source tile
sint dstDeltaY;
// larger than higher (regular)
if (nS>=nT)
{
// Regular offset, top left
nBaseOffset= x + y*_Width;
// Regular deltaX, to the right
dstDeltaX=1;
// Regular deltaY, to the bottom
dstDeltaY=_Width;
}
// higher than larger (goofy), the patch is stored with a rotation of 1 (to the left of course)
else
{
// Goofy offset, bottom left
nBaseOffset= x + y*_Width;
nBaseOffset+=(subTextWidth-1)*_Width;
// Goofy deltaX, to the top
dstDeltaX=-(sint)_Width;
// Goofy deltaY, to the right
dstDeltaY=1;
}
// Compute the order of the patch
CTileFarBank::TFarOrder orderX=CTileFarBank::order0;
uint tileSize=0;
switch (pid.FarIndex)
{
case 3:
// Ratio 1:4
orderX=CTileFarBank::order2;
tileSize=NL_NUM_PIXELS_ON_FAR_TILE_EDGE>>2;
break;
case 2:
// Ratio 1:2
orderX=CTileFarBank::order1;
tileSize=NL_NUM_PIXELS_ON_FAR_TILE_EDGE>>1;
break;
case 1:
// Ratio 1:1
orderX=CTileFarBank::order0;
tileSize=NL_NUM_PIXELS_ON_FAR_TILE_EDGE;
break;
default:
// no!: must be one of the previous values
nlassert (0);
}
// Must have a far tile bank pointer set in the CFarTexture
nlassert (_Bank);
// For all the tiles in the textures
sint nTileInPatch=0;
// ** Fill the struct for the tile fill method for each layers
NL3D_CComputeTileFar TileFar;
TileFar.AsmMMX= false;
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
TileFar.AsmMMX= NLMISC::CSystemInfo::hasMMX();
#endif
// Destination pointer
// Destination delta
TileFar.DstDeltaX=dstDeltaX;
TileFar.DstDeltaY=dstDeltaY;
// ** Build expand lightmap..
NL3D_CExpandLightmap lightMap;
// Fill the structure
lightMap.MulFactor=tileSize;
lightMap.ColorTile=&patch->TileColors[0];
lightMap.Width=nS+1;
lightMap.Height=nT+1;
lightMap.StaticLightColor=patch->getZone()->getLandscape()->getStaticLight();
lightMap.DstPixels=_LightmapExpanded;
// Compute current TLI colors.
patch->computeCurrentTLILightmapDiv2(_TileTLIColors);
lightMap.TLIColor= _TileTLIColors;
// Expand the shadowmap
patch->unpackShadowMap (_LumelExpanded);
lightMap.LumelTile=_LumelExpanded;
// Expand the patch lightmap now
NL3D_expandLightmap (&lightMap);
// DeltaY for lightmap
TileFar.SrcLightingDeltaY=nS*tileSize;
// Base Dst pointer on the tile line
uint nBaseDstTileLine=nBaseOffset;
for (uint t=0; t<nT; t++)
{
// Base Dst pointer on the tile
uint nBaseDstTilePixels=nBaseDstTileLine;
// For each tile of the line
for (uint s=0; s<nS; s++)
{
// Base pointer of the destination texture
TileFar.DstPixels=(CRGBA*)&(getPixels()[0])+nBaseDstTilePixels;
// Lightmap pointer
TileFar.SrcLightingPixels=_LightmapExpanded+(s*tileSize)+(t*nS*tileSize*tileSize);
// For each layer of the tile
for (sint l=0; l<3; l++)
{
// Use of additive in this layer ?
bool bAdditive=false;
// Size of the edge far tile
TileFar.Size=tileSize;
// Get a tile element reference for this tile.
const CTileElement &tileElm=patch->Tiles[nTileInPatch];
// Check for 256 tiles...
bool is256x256;
uint8 uvOff;
tileElm.getTile256Info(is256x256, uvOff);
// Get the tile number
sint tile=tileElm.Tile[l];
// Is the last layer ?
bool lastLayer = ( (l == 2) || (tileElm.Tile[l+1] == NL_TILE_ELM_LAYER_EMPTY) );
// Is an non-empty layer ?
if (tile!=NL_TILE_ELM_LAYER_EMPTY)
{
// Get the read only pointer on the far tile
const CTileFarBank::CTileFar* pTile=_Bank->getTile (tile);
// This pointer must not be null, else the farBank is not valid!
if (pTile==NULL)
nlwarning ("FarBank is not valid!");
// If the tile exist
if (pTile)
{
// Tile exist ?
if (pTile->isFill (CTileFarBank::diffuse))
{
// Get rotation of the tile in this layer
sint nRot=tileElm.getTileOrient(l);
// Source pointer
const CRGBA* pSrcDiffusePixels=pTile->getPixels (CTileFarBank::diffuse, orderX);
const CRGBA* pSrcAdditivePixels=NULL;
// Additive ?
if (pTile->isFill (CTileFarBank::additive))
{
// Use it
bAdditive=true;
// Get additive pointer
pSrcAdditivePixels=pTile->getPixels (CTileFarBank::additive, orderX);
}
// Source size
sint sourceSize;
// Source offset (for 256)
uint sourceOffset=0;
// 256 ?
if (is256x256)
{
// On the left ?
if (uvOff&0x02)
sourceOffset+=tileSize;
// On the bottom ?
if ((uvOff==1)||(uvOff==2))
sourceOffset+=2*tileSize*tileSize;
// Yes, 256
sourceSize=tileSize<<1;
}
else
{
// No, 128
sourceSize=tileSize;
}
// Compute offset and deltas
switch (nRot)
{
case 0:
// Source pointers
TileFar.SrcDiffusePixels=pSrcDiffusePixels+sourceOffset;
TileFar.SrcAdditivePixels=pSrcAdditivePixels+sourceOffset;
// Source delta
TileFar.SrcDeltaX=1;
TileFar.SrcDeltaY=sourceSize;
break;
case 1:
{
// Source pointers
uint newOffset=sourceOffset+(tileSize-1);
TileFar.SrcDiffusePixels=pSrcDiffusePixels+newOffset;
TileFar.SrcAdditivePixels=pSrcAdditivePixels+newOffset;
// Source delta
TileFar.SrcDeltaX=sourceSize;
TileFar.SrcDeltaY=-1;
}
break;
case 2:
{
// Destination pointer
uint newOffset=sourceOffset+(tileSize-1)*sourceSize+tileSize-1;
TileFar.SrcDiffusePixels=pSrcDiffusePixels+newOffset;
TileFar.SrcAdditivePixels=pSrcAdditivePixels+newOffset;
// Source delta
TileFar.SrcDeltaX=-1;
TileFar.SrcDeltaY=-sourceSize;
}
break;
case 3:
{
// Destination pointer
uint newOffset=sourceOffset+(tileSize-1)*sourceSize;
TileFar.SrcDiffusePixels=pSrcDiffusePixels+newOffset;
TileFar.SrcAdditivePixels=pSrcAdditivePixels+newOffset;
// Source delta
TileFar.SrcDeltaX=-sourceSize;
TileFar.SrcDeltaY=1;
}
break;
}
// *** Draw the layer
// Alpha layer ?
if (l>0)
{
// Additive layer ?
if (bAdditive && lastLayer)
NL3D_drawFarTileInFarTextureAdditiveAlpha (&TileFar);
else // No additive layer
NL3D_drawFarTileInFarTextureAlpha (&TileFar);
}
else // no alpha
{
// Additive layer ?
if (bAdditive && lastLayer)
NL3D_drawFarTileInFarTextureAdditive (&TileFar);
else // No additive layer
NL3D_drawFarTileInFarTexture (&TileFar);
}
}
}
}
else
// Stop, no more layer
break;
}
// Next tile
nTileInPatch++;
// Next tile on the line
nBaseDstTilePixels+=dstDeltaX*tileSize;
}
// Next line of tiles
nBaseDstTileLine+=dstDeltaY*tileSize;
}
}
} // NL3D
// ***************************************************************************
// ***************************************************************************
// NL3D_ExpandLightmap. C and Asm Part
// ***************************************************************************
// ***************************************************************************
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
// EMMS called not in __asm block.
# pragma warning (disable : 4799)
// ***************************************************************************
inline void NL3D_asmEndMMX()
{
__asm
{
// close MMX computation
emms
}
}
// ***************************************************************************
/** Expand a line of color with MMX.
* NB: start to write at pixel 1.
*/
inline void NL3D_asmExpandLineColor565(const uint16 *src, CRGBA *dst, uint du, uint len)
{
static uint64 blank = 0;
static uint64 cF800 = INT64_CONSTANT (0x0000F8000000F800);
static uint64 cE000 = INT64_CONSTANT (0x0000E0000000E000);
static uint64 c07E0 = INT64_CONSTANT (0x000007E0000007E0);
static uint64 c0600 = INT64_CONSTANT (0x0000060000000600);
static uint64 c001F = INT64_CONSTANT (0x0000001F0000001F);
static uint64 c001C = INT64_CONSTANT (0x0000001C0000001C);
if(len==0)
return;
// Loop for pix.
__asm
{
movq mm7, blank
// start at pixel 1 => increment dst, and start u= du
mov esi, src
mov edi, dst
add edi, 4
mov ecx, len
mov edx, du
// Loop
myLoop:
// Read 565 colors
//----------
// index u.
mov ebx, edx
shr ebx, 8
// pack the 2 colors in eax: // Hedx= color0, Ledx= color1
xor eax, eax // avoid partial stall.
mov ax, [esi + ebx*2]
shl eax, 16
mov ax, [esi + ebx*2 +2]
// store and unpack in mm2: Hmm2= color0, Lmm2= color1
movd mm2, eax
punpcklwd mm2, mm7
// reset accumulator mm3 to black
movq mm3, mm7
// Expand 565 to 888: color0 and color1 in parrallel
// R
movq mm0, mm2
movq mm1, mm2
pand mm0, cF800
pand mm1, cE000
psrld mm0, 8
psrld mm1, 13
por mm3, mm0
por mm3, mm1
// G
movq mm0, mm2
movq mm1, mm2
pand mm0, c07E0
pand mm1, c0600
pslld mm0, 5
psrld mm1, 1
por mm3, mm0
por mm3, mm1
// B
movq mm0, mm2
movq mm1, mm2
pand mm0, c001F
pand mm1, c001C
pslld mm0, 19
pslld mm1, 14
por mm3, mm0
por mm3, mm1
// unpack mm3 quad to mm0=color0 and mm1=color1.
movq mm0, mm3
movq mm1, mm3
psrlq mm0, 32
// Blend.
//----------
// blend factors
mov ebx, edx
mov eax, 256
and ebx, 0xFF
sub eax, ebx
movd mm2, ebx // mm2= factor
movd mm3, eax // mm3= 1-factor
// replicate to the 4 words.
punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
punpckldq mm3, mm3 // mm3= 0000 00AA 0000 00AA
packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
packssdw mm3, mm3 // mm3= 00AA 00AA 00AA 00AA
// mul
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm3 // color0*(1-factor)
pmullw mm1, mm2 // color1*factor
// add, and unpack
paddusw mm0, mm1
psrlw mm0, 8
packuswb mm0, mm0
// store
movd [edi], mm0
// next pix
add edx, du
add edi, 4
dec ecx
jnz myLoop
}
}
// ***************************************************************************
/** Expand a line of color with MMX.
* NB: start to write at pixel 1.
*/
inline void NL3D_asmExpandLineColor8888(const CRGBA *src, CRGBA *dst, uint du, uint len)
{
static uint64 blank = 0;
if(len==0)
return;
// Loop for pix.
__asm
{
movq mm7, blank
// start at pixel 1 => increment dst, and start u= du
mov esi, src
mov edi, dst
add edi, 4
mov ecx, len
mov edx, du
// Loop
myLoop:
// Read 8888 colors
//----------
// index u.
mov ebx, edx
shr ebx, 8
// read the 2 colors: mm0= color0, mm1= color1
movd mm0 , [esi + ebx*4]
movd mm1 , [esi + ebx*4 + 4]
// Blend.
//----------
// blend factors
mov ebx, edx
mov eax, 256
and ebx, 0xFF
sub eax, ebx
movd mm2, ebx // mm2= factor
movd mm3, eax // mm3= 1-factor
// replicate to the 4 words.
punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
punpckldq mm3, mm3 // mm3= 0000 00AA 0000 00AA
packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
packssdw mm3, mm3 // mm3= 00AA 00AA 00AA 00AA
// mul
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm3 // color0*(1-factor)
pmullw mm1, mm2 // color1*factor
// add, and unpack
paddusw mm0, mm1
psrlw mm0, 8
packuswb mm0, mm0
// store
movd [edi], mm0
// next pix
add edx, du
add edi, 4
dec ecx
jnz myLoop
}
}
// ***************************************************************************
/** Blend 2 lines of color into one line.
* NB: start at pix 0 here
*/
inline void NL3D_asmBlendLines(CRGBA *dst, const CRGBA *src0, const CRGBA *src1, uint index, uint len)
{
static uint64 blank = 0;
if(len==0)
return;
// Loop for pix.
__asm
{
movq mm7, blank
// read the factor and expand it to 4 words.
mov ebx, index
mov eax, 256
and ebx, 0xFF
sub eax, ebx
movd mm2, ebx // mm2= factor
movd mm3, eax // mm3= 1-factor
punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
punpckldq mm3, mm3 // mm3= 0000 00AA 0000 00AA
packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
packssdw mm3, mm3 // mm3= 00AA 00AA 00AA 00AA
// setup ptrs
mov esi, src0
mov edx, src1
sub edx, esi // difference between 2 src
mov edi, dst
mov ecx, len
// Loop
myLoop:
// Read
movd mm0, [esi]
movd mm1, [esi+edx]
// mul
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm3 // color0*(1-factor)
pmullw mm1, mm2 // color1*factor
// add, and unpack
paddusw mm0, mm1
psrlw mm0, 8
packuswb mm0, mm0
// store
movd [edi], mm0
// next pix
add esi, 4
add edi, 4
dec ecx
jnz myLoop
}
}
// ***************************************************************************
/** Lightmap Combining for Far level 2 (farthest)
* Average 16 lumels, and deals with UserColor and TLI
*/
static void NL3D_asmAssembleShading1x1(const uint8 *lumels, const CRGBA *colorMap,
const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
{
static uint64 blank = 0;
if(nbTexel==0)
return;
// local var
uint offsetTLIs= ((uint)srcTLIs-(uint)dst);
uint offsetUSCs= ((uint)srcUSCs-(uint)dst);
// Loop for pix.
__asm
{
movq mm7, blank
// setup ptrs
mov esi, lumels
mov edi, dst
mov ecx, nbTexel
// Loop
myLoop:
// Average shade part
//------------
mov ebx, colorMap
mov edx, lineWidth
// read and accumulate shade
xor eax,eax // avoid partial stall
// add with line 0
mov al, [esi + 0]
add al, [esi + 1]
adc ah, 0
add al, [esi + 2]
adc ah, 0
add al, [esi + 3]
adc ah, 0
// add with line 1
add al, [esi + edx + 0]
adc ah, 0
add al, [esi + edx + 1]
adc ah, 0
add al, [esi + edx + 2]
adc ah, 0
add al, [esi + edx + 3]
adc ah, 0
// add with line 2
add al, [esi + edx*2 + 0]
adc ah, 0
add al, [esi + edx*2 + 1]
adc ah, 0
add al, [esi + edx*2 + 2]
adc ah, 0
add al, [esi + edx*2 + 3]
adc ah, 0
// add with line 3
lea edx, [edx + edx*2]
add al, [esi + edx + 0]
adc ah, 0
add al, [esi + edx + 1]
adc ah, 0
add al, [esi + edx + 2]
adc ah, 0
add al, [esi + edx + 3]
adc ah, 0
// average
shr eax, 4
// convert to RGBA from the color Map
movd mm0, [ebx + eax*4]
// Assemble part
//------------
mov edx, offsetTLIs
mov ebx, offsetUSCs
// Add with TLI, and clamp.
paddusb mm0, [edi + edx]
// mul with USC
movd mm1, [edi + ebx]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm1
// unpack
psrlw mm0, 7
packuswb mm0, mm0
// store
movd [edi], mm0
// next pix
add esi, 4 // skip 4 lumels
add edi, 4 // next texel
dec ecx
jnz myLoop
}
}
// ***************************************************************************
/** Lightmap Combining for Far level 1 (middle)
* Average 4 lumels, and deals with UserColor and TLI
*/
static void NL3D_asmAssembleShading2x2(const uint8 *lumels, const CRGBA *colorMap,
const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
{
static uint64 blank = 0;
if(nbTexel==0)
return;
// local var
uint offsetTLIs= ((uint)srcTLIs-(uint)dst);
uint offsetUSCs= ((uint)srcUSCs-(uint)dst);
// Loop for pix.
__asm
{
movq mm7, blank
// setup ptrs
mov esi, lumels
mov edi, dst
mov ecx, nbTexel
// Loop
myLoop:
// Average shade part
//------------
mov ebx, colorMap
mov edx, lineWidth
// read and accumulate shade
xor eax,eax // avoid partial stall
mov al, [esi] // read lumel
// add with nbors
add al, [esi + 1]
adc ah, 0
add al, [esi + edx]
adc ah, 0
add al, [esi + edx + 1]
adc ah, 0
// average
shr eax, 2
// convert to RGBA from the color Map
movd mm0, [ebx + eax*4]
// Assemble part
//------------
mov edx, offsetTLIs
mov ebx, offsetUSCs
// Add with TLI, and clamp.
paddusb mm0, [edi + edx]
// mul with USC
movd mm1, [edi + ebx]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm1
// unpack
psrlw mm0, 7
packuswb mm0, mm0
// store
movd [edi], mm0
// next pix
add esi, 2 // skip 2 lumels
add edi, 4 // next texel
dec ecx
jnz myLoop
}
}
// ***************************************************************************
# pragma warning (disable : 4731) // frame pointer register 'ebp' modified by inline assembly code
/** Lightmap Combining for Far level 0 (nearest)
* read 1 lumel, and deals with UserColor and TLI
*/
static void NL3D_asmAssembleShading4x4(const uint8 *lumels, const CRGBA *colorMap,
const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint nbTexel)
{
static uint64 blank = 0;
if(nbTexel==0)
return;
// Loop for pix.
__asm
{
// Use ebp as a register for faster access...
push ebp
movq mm7, blank
// setup ptrs
mov esi, lumels
mov edi, dst
mov edx, srcTLIs
sub edx, edi // difference src and dest
mov ebx, srcUSCs
sub ebx, edi // difference src and dest
mov ecx, nbTexel
// set ebp after reading locals...
mov ebp, colorMap
// Loop
myLoop:
// read shade RGBA into the color Map
xor eax,eax // avoid partial stall
mov al,[esi] // read lumel
movd mm0, [ebp + eax*4]
// Add with TLI, and clamp.
paddusb mm0, [edi + edx]
// mul with USC
movd mm1, [edi + ebx]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm1
// unpack
psrlw mm0, 7
packuswb mm0, mm0
// store
movd [edi], mm0
// next pix
add esi, 1 // next lumel
add edi, 4 // next texel
dec ecx
jnz myLoop
// restore
pop ebp
}
}
# pragma warning (default : 4731) // frame pointer register 'ebp' modified by inline assembly code
#else // NL_OS_WINDOWS
// Dummy for non-windows platforms
inline void NL3D_asmEndMMX() {}
inline void NL3D_asmExpandLineColor565(const uint16 *src, CRGBA *dst, uint du, uint len) {}
inline void NL3D_asmExpandLineColor8888(const CRGBA *src, CRGBA *dst, uint du, uint len) {}
inline void NL3D_asmBlendLines(CRGBA *dst, const CRGBA *src0, const CRGBA *src1, uint index, uint len) {}
static void NL3D_asmAssembleShading1x1(const uint8 *lumels, const CRGBA *colorMap,
const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
{
}
static void NL3D_asmAssembleShading2x2(const uint8 *lumels, const CRGBA *colorMap,
const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
{
}
static void NL3D_asmAssembleShading4x4(const uint8 *lumels, const CRGBA *colorMap,
const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint nbTexel)
{
}
#endif // NL_OS_WINDOWS
// ***************************************************************************
extern "C" void NL3D_expandLightmap (const NL3D_CExpandLightmap* pLightmap)
{
bool asmMMX= false;
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
asmMMX= CSystemInfo::hasMMX();
// A CTileColor must be a 565 only.
nlassert(sizeof(CTileColor)==2);
#endif
// Expanded width
uint dstWidth=(pLightmap->Width-1)*pLightmap->MulFactor;
uint dstHeight=(pLightmap->Height-1)*pLightmap->MulFactor;
// *** First expand user color and TLI colors
// First pass, expand on U
static CRGBA expandedUserColorLine[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*
(NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
static CRGBA expandedTLIColorLine[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*
(NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
// Second pass, expand on V.
static CRGBA expandedUserColor[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE *
(NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
static CRGBA expandedTLIColor[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE *
(NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
// ** Expand on U
//=========
uint u, v;
// Expansion factor
uint expandFactor=((pLightmap->Width-1)<<8)/(dstWidth-1);
// Destination pointer
CRGBA *expandedUserColorLinePtr= expandedUserColorLine;
CRGBA *expandedTLIColorLinePtr= expandedTLIColorLine;
// Source pointer
const NL3D::CTileColor *colorTilePtr=pLightmap->ColorTile;
const NLMISC::CRGBA *colorTLIPtr= pLightmap->TLIColor;
// Go for U
for (v=0; v<pLightmap->Height; v++)
{
// First pixel
expandedUserColorLinePtr[0].set565 (colorTilePtr[0].Color565);
expandedTLIColorLinePtr[0]= colorTLIPtr[0];
// MMX implementation.
//-------------
if(asmMMX)
{
NL3D_asmExpandLineColor565(&colorTilePtr->Color565, expandedUserColorLinePtr, expandFactor, dstWidth-2);
NL3D_asmExpandLineColor8888(colorTLIPtr, expandedTLIColorLinePtr, expandFactor, dstWidth-2);
}
// C implementation
//-------------
else
{
// Index next pixel
uint srcIndexPixel=expandFactor;
for (u=1; u<dstWidth-1; u++)
{
// Check
nlassert ( (u+v*dstWidth) < (sizeof(expandedUserColorLine)/sizeof(CRGBA)) );
// Color index
uint srcIndex=srcIndexPixel>>8;
//nlassert (srcIndex>=0); // uint => always >= 0
nlassert (srcIndex<pLightmap->Width-1);
// Compute current color
CRGBA color0;
CRGBA color1;
color0.set565 (colorTilePtr[srcIndex].Color565);
color1.set565 (colorTilePtr[srcIndex+1].Color565);
expandedUserColorLinePtr[u].blendFromui (color0, color1, srcIndexPixel&0xff);
// Compute current TLI color
color0= colorTLIPtr[srcIndex];
color1= colorTLIPtr[srcIndex+1];
expandedTLIColorLinePtr[u].blendFromui (color0, color1, srcIndexPixel&0xff);
// Next index
srcIndexPixel+=expandFactor;
}
}
// Last pixel
expandedUserColorLinePtr[dstWidth-1].set565 (colorTilePtr[pLightmap->Width-1].Color565);
expandedTLIColorLinePtr[dstWidth-1]= colorTLIPtr[pLightmap->Width-1];
// Next line
expandedUserColorLinePtr+= dstWidth;
expandedTLIColorLinePtr+= dstWidth;
colorTilePtr+=pLightmap->Width;
colorTLIPtr+=pLightmap->Width;
}
// stop MMX if used
if(asmMMX)
NL3D_asmEndMMX();
// ** Expand on V
//=========
// Expansion factor
expandFactor=((pLightmap->Height-1)<<8)/(dstHeight-1);
// Destination pointer
CRGBA *expandedUserColorPtr= expandedUserColor;
CRGBA *expandedTLIColorPtr= expandedTLIColor;
// Src pointer
expandedUserColorLinePtr= expandedUserColorLine;
expandedTLIColorLinePtr= expandedTLIColorLine;
// Copy first row
memcpy(expandedUserColorPtr, expandedUserColorLinePtr, dstWidth*sizeof(CRGBA));
memcpy(expandedTLIColorPtr, expandedTLIColorLinePtr, dstWidth*sizeof(CRGBA));
// Next line
expandedUserColorPtr+=dstWidth;
expandedTLIColorPtr+=dstWidth;
// Index next pixel
uint indexPixel=expandFactor;
// Go for V
for (v=1; v<dstHeight-1; v++)
{
// Color index
uint index=indexPixel>>8;
// Source pointer
CRGBA *colorTilePtr0= expandedUserColorLine + index*dstWidth;
CRGBA *colorTilePtr1= expandedUserColorLine + (index+1)*dstWidth;
CRGBA *colorTLIPtr0= expandedTLIColorLine + index*dstWidth;
CRGBA *colorTLIPtr1= expandedTLIColorLine + (index+1)*dstWidth;
// MMX implementation.
//-------------
if(asmMMX)
{
NL3D_asmBlendLines(expandedUserColorPtr, colorTilePtr0, colorTilePtr1, indexPixel, dstWidth);
NL3D_asmBlendLines(expandedTLIColorPtr, colorTLIPtr0, colorTLIPtr1, indexPixel, dstWidth);
}
// C implementation
//-------------
else
{
// Copy the row
for (u=0; u<dstWidth; u++)
{
expandedUserColorPtr[u].blendFromui (colorTilePtr0[u], colorTilePtr1[u], indexPixel&0xff);
expandedTLIColorPtr[u].blendFromui (colorTLIPtr0[u], colorTLIPtr1[u], indexPixel&0xff);
}
}
// Next index
indexPixel+=expandFactor;
// Next line
expandedUserColorPtr+=dstWidth;
expandedTLIColorPtr+=dstWidth;
}
// stop MMX if used
if(asmMMX)
NL3D_asmEndMMX();
// Last row
// Destination pointer
expandedUserColorPtr= expandedUserColor + dstWidth*(dstHeight-1);
expandedTLIColorPtr= expandedTLIColor + dstWidth*(dstHeight-1);
// Src pointer
expandedUserColorLinePtr= expandedUserColorLine + dstWidth*(pLightmap->Height-1);
expandedTLIColorLinePtr= expandedTLIColorLine + dstWidth*(pLightmap->Height-1);
// Copy last row
memcpy(expandedUserColorPtr, expandedUserColorLinePtr, dstWidth*sizeof(CRGBA));
memcpy(expandedTLIColorPtr, expandedTLIColorLinePtr, dstWidth*sizeof(CRGBA));
// *** Now combine with shading
//=========
// Switch to the optimal method for each expansion value
switch (pLightmap->MulFactor)
{
case 1:
{
// Make 4x4 -> 1x1 blend
CRGBA *lineUSCPtr= expandedUserColor;
CRGBA *lineTLIPtr= expandedTLIColor;
CRGBA *lineDestPtr=pLightmap->DstPixels;
const uint8 *lineLumelPtr=pLightmap->LumelTile;
uint lineWidth=dstWidth<<2;
uint lineWidthx2=lineWidth<<1;
uint lineWidthx3=lineWidthx2+lineWidth;
uint lineWidthx4=lineWidth<<2;
// For each line
for (v=0; v<dstHeight; v++)
{
// MMX implementation.
//-------------
if(asmMMX)
{
NL3D_asmAssembleShading1x1(lineLumelPtr, pLightmap->StaticLightColor, lineTLIPtr, lineUSCPtr, lineDestPtr,
lineWidth, dstWidth);
}
// C implementation
//-------------
else
{
// For each lumel block
for (u=0; u<dstWidth; u++)
{
// index
uint lumelIndex=u<<2;
// Shading is filtred
uint shading=
((uint)lineLumelPtr[lumelIndex]+(uint)lineLumelPtr[lumelIndex+1]+(uint)lineLumelPtr[lumelIndex+2]+(uint)lineLumelPtr[lumelIndex+3]
+(uint)lineLumelPtr[lumelIndex+lineWidth]+(uint)lineLumelPtr[lumelIndex+1+lineWidth]+(uint)lineLumelPtr[lumelIndex+2+lineWidth]+(uint)lineLumelPtr[lumelIndex+3+lineWidth]
+(uint)lineLumelPtr[lumelIndex+lineWidthx2]+(uint)lineLumelPtr[lumelIndex+1+lineWidthx2]+(uint)lineLumelPtr[lumelIndex+2+lineWidthx2]+(uint)lineLumelPtr[lumelIndex+3+lineWidthx2]
+(uint)lineLumelPtr[lumelIndex+lineWidthx3]+(uint)lineLumelPtr[lumelIndex+1+lineWidthx3]+(uint)lineLumelPtr[lumelIndex+2+lineWidthx3]+(uint)lineLumelPtr[lumelIndex+3+lineWidthx3]
)>>4;
// Add shading with TLI color.
CRGBA col;
col.addRGBOnly(pLightmap->StaticLightColor[shading], lineTLIPtr[u]);
// Mul by the userColor
lineDestPtr[u].modulateFromColorRGBOnly(col, lineUSCPtr[u]);
lineDestPtr[u].R = min(((uint)lineDestPtr[u].R)*2, 255U);
lineDestPtr[u].G = min(((uint)lineDestPtr[u].G)*2, 255U);
lineDestPtr[u].B = min(((uint)lineDestPtr[u].B)*2, 255U);
}
}
// Next line
lineUSCPtr+=dstWidth;
lineTLIPtr+=dstWidth;
lineDestPtr+=dstWidth;
lineLumelPtr+=lineWidthx4;
}
break;
}
case 2:
{
// Make 2x2 -> 1x1 blend
CRGBA *lineUSCPtr= expandedUserColor;
CRGBA *lineTLIPtr= expandedTLIColor;
CRGBA *lineDestPtr=pLightmap->DstPixels;
const uint8 *lineLumelPtr=pLightmap->LumelTile;
uint lineWidth=dstWidth*2;
uint lineWidthx2=lineWidth<<1;
// For each line
for (v=0; v<dstHeight; v++)
{
// MMX implementation.
//-------------
if(asmMMX)
{
NL3D_asmAssembleShading2x2(lineLumelPtr, pLightmap->StaticLightColor, lineTLIPtr, lineUSCPtr, lineDestPtr,
lineWidth, dstWidth);
}
// C implementation
//-------------
else
{
// For each lumel block
for (u=0; u<dstWidth; u++)
{
// index
uint lumelIndex=u<<1;
// Shading is filtred
uint shading=
((uint)lineLumelPtr[lumelIndex]+(uint)lineLumelPtr[lumelIndex+1]+(uint)lineLumelPtr[lumelIndex+lineWidth]+(uint)lineLumelPtr[lumelIndex+1+lineWidth])>>2;
// Add shading with TLI color.
CRGBA col;
col.addRGBOnly(pLightmap->StaticLightColor[shading], lineTLIPtr[u]);
// Mul by the userColor
lineDestPtr[u].modulateFromColorRGBOnly(col, lineUSCPtr[u]);
lineDestPtr[u].R = min(((uint)lineDestPtr[u].R)*2, 255U);
lineDestPtr[u].G = min(((uint)lineDestPtr[u].G)*2, 255U);
lineDestPtr[u].B = min(((uint)lineDestPtr[u].B)*2, 255U);
}
}
// Next line
lineUSCPtr+=dstWidth;
lineTLIPtr+=dstWidth;
lineDestPtr+=dstWidth;
lineLumelPtr+=lineWidthx2;
}
break;
}
case 4:
// Make copy
CRGBA *lineUSCPtr= expandedUserColor;
CRGBA *lineTLIPtr= expandedTLIColor;
CRGBA *lineDestPtr=pLightmap->DstPixels;
const uint8 *lineLumelPtr=pLightmap->LumelTile;
uint nbTexel=dstWidth*dstHeight;
// MMX implementation.
//-------------
if(asmMMX)
{
NL3D_asmAssembleShading4x4(lineLumelPtr, pLightmap->StaticLightColor, lineTLIPtr, lineUSCPtr, lineDestPtr,
nbTexel);
}
// C implementation
//-------------
else
{
// For each pixel
for (u=0; u<nbTexel; u++)
{
// Shading is filtred
uint shading=lineLumelPtr[u];
// Add shading with TLI color.
CRGBA col;
col.addRGBOnly(pLightmap->StaticLightColor[shading], lineTLIPtr[u]);
// Mul by the userColor
lineDestPtr[u].modulateFromColorRGBOnly(col, lineUSCPtr[u]);
lineDestPtr[u].R = min(((uint)lineDestPtr[u].R)*2, 255U);
lineDestPtr[u].G = min(((uint)lineDestPtr[u].G)*2, 255U);
lineDestPtr[u].B = min(((uint)lineDestPtr[u].B)*2, 255U);
}
}
break;
}
// stop MMX if used
if(asmMMX)
NL3D_asmEndMMX();
}
// ***************************************************************************
// ***************************************************************************
// NL3D_drawFarTileInFar*. C and Asm Part
// ***************************************************************************
// ***************************************************************************
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
// ***************************************************************************
inline void NL3D_asmModulateLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
uint len, uint src0DeltaX, uint dstDeltaX)
{
static uint64 blank= 0;
if(len==0)
return;
__asm
{
movq mm7, blank
mov esi, src0 // esi point to src Pixels
mov edx, src1 // edx point to src lighting pixels
mov edi, dst
mov ecx, len
// compute increments for esi and edi
mov eax, src0DeltaX
mov ebx, dstDeltaX
sal eax, 2
sal ebx, 2
myLoop:
// read colors
movd mm0, [esi]
movd mm1, [edx]
// mul mm0 and mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm1
psrlw mm0, 8
// pack
packuswb mm0, mm0
// out
movd [edi], mm0
// increment
add esi, eax
add edi, ebx
add edx, 4
dec ecx
jnz myLoop
}
}
// ***************************************************************************
inline void NL3D_asmModulateAndBlendLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
uint len, uint src0DeltaX, uint dstDeltaX)
{
static uint64 blank= 0;
static uint64 one= INT64_CONSTANT (0x0100010001000100);
if(len==0)
return;
__asm
{
movq mm7, blank
movq mm6, one
mov esi, src0 // esi point to src Pixels
mov edx, src1 // edx point to src lighting pixels
mov edi, dst
mov ecx, len
// compute increments for esi and edi
mov eax, src0DeltaX
mov ebx, dstDeltaX
sal eax, 2
sal ebx, 2
myLoop:
// read colors
movd mm0, [esi]
movd mm1, [edx]
// save and unpack Alpha. NB: ABGR
movq mm2, mm0
psrld mm2, 24 // mm2= 0000 0000 0000 00AA
punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
// negate with 256.
movq mm3, mm6
psubusw mm3, mm2
// mul mm0 and mm1
punpcklbw mm0, mm7
punpcklbw mm1, mm7
pmullw mm0, mm1
psrlw mm0, 8
// Alpha Blend with mm3 and mm2
movd mm1, [edi] // read dest
punpcklbw mm1, mm7
pmullw mm0, mm2 // mm0= srcColor*A
pmullw mm1, mm3 // mm1= dstColor*(1-A)
// add and pack
paddusw mm0, mm1
psrlw mm0, 8
packuswb mm0, mm0
// out
movd [edi], mm0
// increment
add esi, eax
add edi, ebx
add edx, 4
dec ecx
jnz myLoop
}
}
#else // NL_OS_WINDOWS
// Dummy for non-windows platforms
inline void NL3D_asmModulateLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
uint len, uint src0DeltaX, uint dstDeltaX)
{
}
inline void NL3D_asmModulateAndBlendLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
uint len, uint src0DeltaX, uint dstDeltaX)
{
}
#endif
// ***************************************************************************
void NL3D_drawFarTileInFarTexture (const NL3D_CComputeTileFar* pTileFar)
{
// Pointer of the Src diffuse pixels
const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
// Pointer of the Dst pixels
const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
// Pointer of the Dst pixels
CRGBA* pDstPixels=pTileFar->DstPixels;
// For each pixels
int x, y;
for (y=0; y<pTileFar->Size; y++)
{
// MMX implementation
//---------
if(pTileFar->AsmMMX)
{
NL3D_asmModulateLineColors(pDstPixels, pSrcPixels, pSrcLightPixels,
pTileFar->Size, pTileFar->SrcDeltaX, pTileFar->DstDeltaX);
}
// C Implementation.
//---------
else
{
// Pointer of the source line
const CRGBA* pSrcLine=pSrcPixels;
// Pointer of the source lighting line
const CRGBA* pSrcLightingLine=pSrcLightPixels;
// Pointer of the destination line
CRGBA* pDstLine=pDstPixels;
// For each pixels on the line
for (x=0; x<pTileFar->Size; x++)
{
// Read and write a pixel
pDstLine->R=(uint8)(((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8);
pDstLine->G=(uint8)(((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8);
pDstLine->B=(uint8)(((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8);
// Next pixel
pSrcLine+=pTileFar->SrcDeltaX;
pSrcLightingLine++;
pDstLine+=pTileFar->DstDeltaX;
}
}
// Next line
pSrcPixels+=pTileFar->SrcDeltaY;
pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
pDstPixels+=pTileFar->DstDeltaY;
}
// stop MMX if used
if(pTileFar->AsmMMX)
NL3D_asmEndMMX();
}
// ***************************************************************************
void NL3D_drawFarTileInFarTextureAlpha (const NL3D_CComputeTileFar* pTileFar)
{
// Pointer of the Src pixels
const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
// Pointer of the Dst pixels
const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
// Pointer of the Dst pixels
CRGBA* pDstPixels=pTileFar->DstPixels;
// Fill the buffer with layer 0
int x, y;
for (y=0; y<pTileFar->Size; y++)
{
// MMX implementation
//---------
if(pTileFar->AsmMMX)
{
NL3D_asmModulateAndBlendLineColors(pDstPixels, pSrcPixels, pSrcLightPixels,
pTileFar->Size, pTileFar->SrcDeltaX, pTileFar->DstDeltaX);
}
// C Implementation.
//---------
else
{
// Pointer of the source line
const CRGBA* pSrcLine=pSrcPixels;
// Pointer of the source lighting line
const CRGBA* pSrcLightingLine=pSrcLightPixels;
// Pointer of the Dst pixels
CRGBA* pDstLine=pDstPixels;
// For each pixels on the line
for (x=0; x<pTileFar->Size; x++)
{
// Read and write a pixel
register uint alpha=pSrcLine->A;
register uint oneLessAlpha=255-pSrcLine->A;
pDstLine->R=(uint8)(((((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8)*alpha+(uint)pDstLine->R*oneLessAlpha)>>8);
pDstLine->G=(uint8)(((((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8)*alpha+(uint)pDstLine->G*oneLessAlpha)>>8);
pDstLine->B=(uint8)(((((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8)*alpha+(uint)pDstLine->B*oneLessAlpha)>>8);
// Next pixel
pSrcLine+=pTileFar->SrcDeltaX;
pSrcLightingLine++;
pDstLine+=pTileFar->DstDeltaX;
}
}
// Next line
pSrcPixels+=pTileFar->SrcDeltaY;
pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
pDstPixels+=pTileFar->DstDeltaY;
}
// stop MMX if used
if(pTileFar->AsmMMX)
NL3D_asmEndMMX();
}
// ***************************************************************************
// TODO: asm implementation of this function \\//
//#ifdef NL_NO_ASM
void NL3D_drawFarTileInFarTextureAdditive (const NL3D_CComputeTileFar* pTileFar)
{
// Pointer of the Src diffuse pixels
const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
// Pointer of the Src additive pixels
const CRGBA* pSrcAddPixels=pTileFar->SrcAdditivePixels;
// Pointer of the Dst pixels
const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
// Pointer of the Dst pixels
CRGBA* pDstPixels=pTileFar->DstPixels;
// For each pixels
int x, y;
for (y=0; y<pTileFar->Size; y++)
{
// Pointer of the source line
const CRGBA* pSrcLine=pSrcPixels;
// Pointer of the source line
const CRGBA* pSrcAddLine=pSrcAddPixels;
// Pointer of the source lighting line
const CRGBA* pSrcLightingLine=pSrcLightPixels;
// Pointer of the destination line
CRGBA* pDstLine=pDstPixels;
// For each pixels on the line
for (x=0; x<pTileFar->Size; x++)
{
// Read and write a pixel
uint nTmp=(((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8)+(uint)pSrcAddLine->R;
if (nTmp>255)
nTmp=255;
pDstLine->R=(uint8)nTmp;
nTmp=(((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8)+(uint)pSrcAddLine->G;
if (nTmp>255)
nTmp=255;
pDstLine->G=(uint8)nTmp;
nTmp=(((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8)+(uint)pSrcAddLine->B;
if (nTmp>255)
nTmp=255;
pDstLine->B=(uint8)nTmp;
// Next pixel
pSrcLine+=pTileFar->SrcDeltaX;
pSrcAddLine+=pTileFar->SrcDeltaX;
pSrcLightingLine++;
pDstLine+=pTileFar->DstDeltaX;
}
// Next line
pSrcPixels+=pTileFar->SrcDeltaY;
pSrcAddPixels+=pTileFar->SrcDeltaY;
pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
pDstPixels+=pTileFar->DstDeltaY;
}
}
//#endif // NL_NO_ASM
// ***************************************************************************
// TODO: asm implementation of this function \\//
//#ifdef NL_NO_ASM
void NL3D_drawFarTileInFarTextureAdditiveAlpha (const NL3D_CComputeTileFar* pTileFar)
{
// Pointer of the Src pixels
const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
// Pointer of the Src pixels
const CRGBA* pSrcAddPixels=pTileFar->SrcAdditivePixels;
// Pointer of the Src pixels
const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
// Pointer of the Dst pixels
CRGBA* pDstPixels=pTileFar->DstPixels;
// Fill the buffer with layer 0
int x, y;
for (y=0; y<pTileFar->Size; y++)
{
// Pointer of the source line
const CRGBA* pSrcLine=pSrcPixels;
// Pointer of the source line
const CRGBA* pSrcAddLine=pSrcAddPixels;
// Pointer of the source lighting line
const CRGBA* pSrcLightingLine=pSrcLightPixels;
// Pointer of the Dst pixels
CRGBA* pDstLine=pDstPixels;
// For each pixels on the line
for (x=0; x<pTileFar->Size; x++)
{
// Read and write a pixel
register uint alpha=pSrcLine->A;
register uint oneLessAlpha=255-pSrcLine->A;
// Read and write a pixel
uint nTmp=(((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8)+(uint)pSrcAddLine->R;
if (nTmp>255)
nTmp=255;
pDstLine->R=(uint8)((nTmp*alpha+pDstLine->R*oneLessAlpha)>>8);
nTmp=(((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8)+(uint)pSrcAddLine->G;
if (nTmp>255)
nTmp=255;
pDstLine->G=(uint8)((nTmp*alpha+pDstLine->G*oneLessAlpha)>>8);
nTmp=(((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8)+(uint)pSrcAddLine->B;
if (nTmp>255)
nTmp=255;
pDstLine->B=(uint8)((nTmp*alpha+pDstLine->B*oneLessAlpha)>>8);
// Next pixel
pSrcLine+=pTileFar->SrcDeltaX;
pSrcAddLine+=pTileFar->SrcDeltaX;
pSrcLightingLine++;
pDstLine+=pTileFar->DstDeltaX;
}
// Next line
pSrcPixels+=pTileFar->SrcDeltaY;
pSrcAddPixels+=pTileFar->SrcDeltaY;
pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
pDstPixels+=pTileFar->DstDeltaY;
}
}
//#endif // NL_NO_ASM