khanat-opennel-code/code/nel/src/3d/lod_character_manager.cpp
2013-06-17 16:03:24 +02:00

1134 lines
32 KiB
C++

// NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "std3d.h"
#include "nel/misc/common.h"
#include "nel/3d/lod_character_manager.h"
#include "nel/3d/lod_character_shape.h"
#include "nel/3d/lod_character_shape_bank.h"
#include "nel/3d/lod_character_instance.h"
#include "nel/misc/hierarchical_timer.h"
#include "nel/misc/fast_floor.h"
#include "nel/3d/lod_character_texture.h"
#include "nel/3d/ray_mesh.h"
#include "nel/misc/file.h"
#include "nel/misc/algo.h"
#include "nel/misc/fast_mem.h"
#include "nel/misc/system_info.h"
using namespace std;
using namespace NLMISC;
namespace NL3D
{
// ***************************************************************************
// Dest is without Normal because precomputed
#define NL3D_CLOD_VERTEX_FORMAT (CVertexBuffer::PositionFlag | CVertexBuffer::TexCoord0Flag | CVertexBuffer::PrimaryColorFlag)
#define NL3D_CLOD_VERTEX_SIZE 24
#define NL3D_CLOD_UV_OFF 12
#define NL3D_CLOD_COLOR_OFF 20
// size (in block) of the big texture.
#define NL3D_CLOD_TEXT_NLOD_WIDTH 16
#define NL3D_CLOD_TEXT_NLOD_HEIGHT 16
#define NL3D_CLOD_TEXT_NUM_IDS NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_NLOD_HEIGHT
#define NL3D_CLOD_BIGTEXT_WIDTH NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_WIDTH
#define NL3D_CLOD_BIGTEXT_HEIGHT NL3D_CLOD_TEXT_NLOD_HEIGHT*NL3D_CLOD_TEXT_HEIGHT
// Default texture color. Alpha must be 255
#define NL3D_CLOD_DEFAULT_TEXCOLOR CRGBA(255,255,255,255)
// ***************************************************************************
CLodCharacterManager::CLodCharacterManager()
{
_MaxNumVertices= 3000;
_NumVBHard= 8;
_Rendering= false;
_LockDone= false;
// setup the texture.
_BigTexture= new CTextureBlank;
// The texture always reside in memory... This take 1Mo of RAM. (16*32*16*32 * 4)
// NB: this is simplier like that, and this is not a problem, since only 1 or 2 Mo are allocated :o)
_BigTexture->setReleasable(false);
// create the bitmap.
_BigTexture->resize(NL3D_CLOD_BIGTEXT_WIDTH, NL3D_CLOD_BIGTEXT_HEIGHT, CBitmap::RGBA);
// Format of texture, 16 bits and no mipmaps.
_BigTexture->setUploadFormat(ITexture::RGB565);
_BigTexture->setFilterMode(ITexture::Linear, ITexture::LinearMipMapOff);
_BigTexture->setWrapS(ITexture::Clamp);
_BigTexture->setWrapT(ITexture::Clamp);
// Alloc free Ids
_FreeIds.resize(NL3D_CLOD_TEXT_NUM_IDS);
for(uint i=0;i<_FreeIds.size();i++)
{
_FreeIds[i]= i;
}
// setup the material
_Material.initUnlit();
_Material.setAlphaTest(true);
_Material.setDoubleSided(true);
_Material.setTexture(0, _BigTexture);
// setup for lighting, Default for Ryzom setup
_LightCorrectionMatrix.rotateZ((float)Pi/2);
_LightCorrectionMatrix.invert();
NL_SET_IB_NAME(_Triangles, "CLodCharacterManager::_Triangles");
}
// ***************************************************************************
CLodCharacterManager::~CLodCharacterManager()
{
reset();
}
// ***************************************************************************
void CLodCharacterManager::reset()
{
nlassert(!isRendering());
// delete shapeBanks.
for(uint i=0;i<_ShapeBankArray.size();i++)
{
if(_ShapeBankArray[i])
delete _ShapeBankArray[i];
}
// clears containers
contReset(_ShapeBankArray);
contReset(_ShapeMap);
// reset render part.
_VertexStream.release();
}
// ***************************************************************************
uint32 CLodCharacterManager::createShapeBank()
{
// search a free entry
for(uint i=0;i<_ShapeBankArray.size();i++)
{
// if ree, use it.
if(_ShapeBankArray[i]==NULL)
{
_ShapeBankArray[i]= new CLodCharacterShapeBank;
return i;
}
}
// no free entrey, resize array.
_ShapeBankArray.push_back(new CLodCharacterShapeBank);
return (uint32)_ShapeBankArray.size()-1;
}
// ***************************************************************************
const CLodCharacterShapeBank *CLodCharacterManager::getShapeBank(uint32 bankId) const
{
if(bankId>=_ShapeBankArray.size())
return NULL;
else
return _ShapeBankArray[bankId];
}
// ***************************************************************************
CLodCharacterShapeBank *CLodCharacterManager::getShapeBank(uint32 bankId)
{
if(bankId>=_ShapeBankArray.size())
return NULL;
else
return _ShapeBankArray[bankId];
}
// ***************************************************************************
void CLodCharacterManager::deleteShapeBank(uint32 bankId)
{
if(bankId>=_ShapeBankArray.size())
{
if(_ShapeBankArray[bankId])
{
delete _ShapeBankArray[bankId];
_ShapeBankArray[bankId]= NULL;
}
}
}
// ***************************************************************************
sint32 CLodCharacterManager::getShapeIdByName(const std::string &name) const
{
CstItStrIdMap it= _ShapeMap.find(name);
if(it==_ShapeMap.end())
return -1;
else
return it->second;
}
// ***************************************************************************
const CLodCharacterShape *CLodCharacterManager::getShape(uint32 shapeId) const
{
// split the id
uint bankId= shapeId >> 16;
uint shapeInBankId= shapeId &0xFFFF;
// if valid bankId
const CLodCharacterShapeBank *shapeBank= getShapeBank(bankId);
if(shapeBank)
{
// return the shape from the bank
return shapeBank->getShape(shapeInBankId);
}
else
return NULL;
}
// ***************************************************************************
bool CLodCharacterManager::compile()
{
bool error= false;
// clear the map
contReset(_ShapeMap);
// build the map
for(uint i=0; i<_ShapeBankArray.size(); i++)
{
if(_ShapeBankArray[i])
{
// Parse all Shapes
for(uint j=0; j<_ShapeBankArray[i]->getNumShapes(); j++)
{
// build the shape Id
uint shapeId= (i<<16) + j;
// get the shape
const CLodCharacterShape *shape= _ShapeBankArray[i]->getShape(j);
if(shape)
{
const string &name= shape->getName();
ItStrIdMap it= _ShapeMap.find(name);
if(it == _ShapeMap.end())
// insert the id in the map
_ShapeMap.insert(make_pair(name, shapeId));
else
{
error= true;
nlwarning("Found a Character Lod with same name in the manager: %s", name.c_str());
}
}
}
}
}
return error;
}
// ***************************************************************************
// ***************************************************************************
// Render
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
void CLodCharacterManager::setMaxVertex(uint32 maxVertex)
{
// we must not be between beginRender() and endRender()
nlassert(!isRendering());
_MaxNumVertices= maxVertex;
}
// ***************************************************************************
void CLodCharacterManager::setVertexStreamNumVBHard(uint32 numVBHard)
{
// we must not be between beginRender() and endRender()
nlassert(!isRendering());
_NumVBHard= numVBHard;
}
// ***************************************************************************
void CLodCharacterManager::beginRender(IDriver *driver, const CVector &managerPos)
{
H_AUTO( NL3D_CharacterLod_beginRender );
// we must not be between beginRender() and endRender()
nlassert(!isRendering());
// Reset render
//=================
_CurrentVertexId=0;
_CurrentTriId= 0;
// update Driver.
//=================
nlassert(driver);
// test change of vertexStream setup
bool mustChangeVertexStream= _VertexStream.getDriver() != driver;
if(!mustChangeVertexStream)
{
mustChangeVertexStream= _MaxNumVertices != _VertexStream.getMaxVertices();
mustChangeVertexStream= mustChangeVertexStream || _NumVBHard != _VertexStream.getNumVB();
}
// re-init?
if( mustChangeVertexStream )
{
// chech offset
CVertexBuffer vb;
vb.setVertexFormat(NL3D_CLOD_VERTEX_FORMAT);
// NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
nlassert( NL3D_CLOD_UV_OFF == vb.getTexCoordOff());
nlassert( NL3D_CLOD_COLOR_OFF == vb.getColorOff());
// Setup the vertex stream
_VertexStream.release();
_VertexStream.init(driver, NL3D_CLOD_VERTEX_FORMAT, _MaxNumVertices, _NumVBHard, "CLodManagerVB", false); // nb : don't use volatile lock as we keep the buffer locked
}
// prepare for render.
//=================
// Do not Lock Buffer now (will be done at the first instance added)
nlassert(!_LockDone);
_VertexSize= _VertexStream.getVertexSize();
// NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
nlassert( _VertexSize == NL3D_CLOD_VERTEX_SIZE ); // Vector + Normal + UV + RGBA
// Alloc a minimum of primitives (2*vertices), to avoid as possible reallocation in addRenderCharacterKey
if(_Triangles.getNumIndexes()<_MaxNumVertices * 2)
{
_Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
_Triangles.setNumIndexes(_MaxNumVertices * 2);
}
// Local manager matrix
_ManagerMatrixPos= managerPos;
// Ok, start rendering
_Rendering= true;
}
// ***************************************************************************
static inline void computeLodLighting(CRGBA &lightRes, const CVector &lightObjectSpace, const CVector &normalPtr, CRGBA ambient, CRGBA diffuse)
{
float f= lightObjectSpace * normalPtr;
sint f8= NLMISC::OptFastFloor(f);
fastClamp8(f8);
sint r,g,b;
r= (diffuse.R * f8)>>8;
g= (diffuse.G * f8)>>8;
b= (diffuse.B * f8)>>8;
r+= ambient.R;
g+= ambient.G;
b+= ambient.B;
fastClamp8(r);
fastClamp8(g);
fastClamp8(b);
lightRes.R= r;
lightRes.G= g;
lightRes.B= b;
}
// ***************************************************************************
bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instance, const CMatrix &worldMatrix,
CRGBA paramAmbient, CRGBA paramDiffuse, const CVector &lightDir)
{
H_AUTO ( NL3D_CharacterLod_AddRenderKey )
nlassert(_VertexStream.getDriver());
// we must be between beginRender() and endRender()
nlassert(isRendering());
// regroup all variables that will be accessed in the ASM loop (minimize cache problems)
uint numVertices;
const CLodCharacterShape::CVector3s *vertPtr;
const CVector *normalPtr;
const CUV *uvPtr;
const uint8 *alphaPtr;
CVector lightObjectSpace;
CVector matPos;
float a00, a01, a02;
float a10, a11, a12;
float a20, a21, a22;
uint64 blank= 0;
CRGBA ambient= paramAmbient;
CRGBA diffuse= paramDiffuse;
// For ASM / MMX, must set 0 to alpha part, because replaced by *alphaPtr (with add)
ambient.A= 0;
diffuse.A= 0;
// Get the Shape and current key.
//=============
// get the shape
const CLodCharacterShape *clod= getShape(instance.ShapeId);
// if not found quit, return true
if(!clod)
return true;
// get UV/Normal array. NULL => error
normalPtr= clod->getNormals();
// get UV of the instance
uvPtr= instance.getUVs();
// uvPtr is NULL means that initInstance() has not been called!!
nlassert(normalPtr && uvPtr);
// get the anim key
CVector unPackScaleFactor;
vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
// if not found quit, return true
if(!vertPtr)
return true;
// get num verts
numVertices= clod->getNumVertices();
// empty shape??
if(numVertices==0)
return true;
// If too many vertices, quit, returning false.
if(_CurrentVertexId+numVertices > _MaxNumVertices)
return false;
// get alpha array
static vector<uint8> defaultAlphaArray;
// get the instance alpha if correctly setuped
if(instance.VertexAlphas.size() == numVertices)
{
alphaPtr= &instance.VertexAlphas[0];
}
// if error, take 255 as alpha.
else
{
// NB: still use an array. This case should never arise, but support it not at full optim.
if(defaultAlphaArray.size()<numVertices)
defaultAlphaArray.resize(numVertices, 255);
alphaPtr= &defaultAlphaArray[0];
}
// Lock Buffer if not done
//=============
// Do this after code above because we are sure that we will fill something (numVertices>0)
if(!_LockDone)
{
_VertexData= _VertexStream.lock();
_LockDone= true;
}
// After lock, For D3D, the VertexColor may be in BGRA format
if(_VertexStream.isBRGA())
{
// then swap only the B and R (no cpu cycle added per vertex)
ambient.swapBR();
diffuse.swapBR();
}
// Prepare Transform
//=============
// HTimerInfo: all this block takes 0.1%
// Get matrix pos.
matPos= worldMatrix.getPos();
// compute in manager space.
matPos -= _ManagerMatrixPos;
// Get rotation line vectors
const float *worldM= worldMatrix.get();
a00= worldM[0]; a01= worldM[4]; a02= worldM[8];
a10= worldM[1]; a11= worldM[5]; a12= worldM[9];
a20= worldM[2]; a21= worldM[6]; a22= worldM[10];
// get the light in object space.
// Multiply light dir with transpose of worldMatrix. This may be not exact (not uniform scale) but sufficient.
lightObjectSpace.x= a00 * lightDir.x + a10 * lightDir.y + a20 * lightDir.z;
lightObjectSpace.y= a01 * lightDir.x + a11 * lightDir.y + a21 * lightDir.z;
lightObjectSpace.z= a02 * lightDir.x + a12 * lightDir.y + a22 * lightDir.z;
// animation User correction
lightObjectSpace= _LightCorrectionMatrix.mulVector(lightObjectSpace);
// normalize, and neg for Dot Product.
lightObjectSpace.normalize();
lightObjectSpace= -lightObjectSpace;
// preMul by 255 for RGBA uint8
lightObjectSpace*= 255;
// multiply matrix with scale factor for Pos.
a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
// get dst Array.
uint8 *dstPtr;
dstPtr= _VertexData + _CurrentVertexId * _VertexSize;
/* PreCaching Note: CFastMem::precache() has been tested (done on the 4 arrays) but not very interesting,
maybe because the cache miss improve //ism a bit below.
*/
// Fill the VB
//=============
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
// optimized version
if(CSystemInfo::hasMMX())
{
H_AUTO( NL3D_CharacterLod_vertexFill );
if(numVertices)
{
sint f8;
/* NB: order is important for AGP filling optimisation in dstPtr
Pentium2+ optimisation notes:
- "uop" comment formating:
A/B means "A micro-ops in port 0, and B micro-ops in port 2". (port 1 is very rare for FPU)
A/B/C/D means "A micro-ops in port 0, B in port 2, C in port 3 and D in port 4".
The number in () is the delay (if any).
- the "compute lighting part" must done first, because of the "fistp f8" mem writes that must
be place far away from the "mov eax, f8" read in clamp lighting part
(else seems that it crashes all the //ism)
- No need to Interleave on Pentium2+. But prevents "write/read stall" by putting the write
far away from the next read. Else stall of 3 cycles + BIG BREAK OF //ism (I think).
This had save me 120 cycles / 240 !!!
BenchResults:
- The "transform vertex part" and "all next part" cost 42 cycles, but is somewhat optimal:
63 uop (=> min 21 cycles), but 36 uop in the P0 port (=> this is the bottleneck)
- The lighting part adds 1 cycle only ????? (44 cycles) But still relevant and optimal:
43 uop in port P0!!!!
- The UV part adds 4 cycles (47) (should not since 0 in Port P0), still acceptable.
- The clamp part adds 3 cycles (50), and add 11 cycles in "P0 or P1" (but heavy dependency)
If we assume all goes into P1, it should takes 0... still acceptable (optimal==43?)
- The alpha part adds 2 cycles (52, optimal=45). OK.
- The modulate part adds 15 cycles. OK
TOTAL: 67 cycles in theory (write in RAM, no cache miss problem)
BENCH: ASM version: 91 cycles (Write in AGP, some cache miss problems, still good against 67)
C version: 316 cycles.
*/
__asm
{
mov edi, dstPtr
theLoop:
// **** compute lighting
mov esi,normalPtr // uop: 0/1
// dot3
fld dword ptr [esi] // uop: 0/1
fmul lightObjectSpace.x // uop: 1/1 (5)
fld dword ptr [esi+4] // uop: 0/1
fmul lightObjectSpace.y // uop: 1/1 (5)
faddp st(1),st // uop: 1/0 (3)
fld dword ptr [esi+8] // uop: 0/1
fmul lightObjectSpace.z // uop: 1/1 (5)
faddp st(1),st // uop: 1/0 (3)
fistp f8 // uop: 2/0/1/1 (5)
// next
add esi, 12 // uop: 1/0
mov normalPtr, esi // uop: 0/0/1/1
// **** transform vertex, and store
mov esi, vertPtr // uop: 0/1
fild word ptr[esi] // uop: 3/1 (5)
fild word ptr[esi+2] // uop: 3/1 (5)
fild word ptr[esi+4] // uop: 3/1 (5)
// x
fld a00 // uop: 0/1
fmul st, st(3) // uop: 1/0 (5)
fld a01 // uop: 0/1
fmul st, st(3) // uop: 1/0 (5)
faddp st(1), st // uop: 1/0 (3)
fld a02 // uop: 0/1
fmul st, st(2) // uop: 1/0 (5)
faddp st(1), st // uop: 1/0 (3)
fld matPos.x // uop: 0/1
faddp st(1), st // uop: 1/0 (3)
fstp dword ptr[edi] // uop: 0/0/1/1
// y
fld a10
fmul st, st(3)
fld a11
fmul st, st(3)
faddp st(1), st
fld a12
fmul st, st(2)
faddp st(1), st
fld matPos.y
faddp st(1), st
fstp dword ptr[edi+4]
// z
fld a20
fmul st, st(3)
fld a21
fmul st, st(3)
faddp st(1), st
fld a22
fmul st, st(2)
faddp st(1), st
fld matPos.z
faddp st(1), st
fstp dword ptr[edi+8]
// flush stack
fstp st // uop: 1/0
fstp st // uop: 1/0
fstp st // uop: 1/0
// next
add esi, 6 // uop: 1/0
mov vertPtr, esi // uop: 0/0/1/1
// **** copy uv
mov esi, uvPtr // uop: 0/1
mov eax, [esi] // uop: 0/1
mov [edi+NL3D_CLOD_UV_OFF], eax // uop: 0/0/1/1
mov ebx, [esi+4] // uop: 0/1
mov [edi+NL3D_CLOD_UV_OFF+4], ebx // uop: 0/0/1/1
// next
add esi, 8 // uop: 1/0
mov uvPtr, esi // uop: 0/0/1/1
// **** Clamp lighting
// clamp to 0 only. will be clamped to 255 by MMX
mov eax, f8 // uop: 0/1
cmp eax, 0x80000000 // if>=0 => CF=1
sbb ebx, ebx // if>=0 => CF==1 => ebx=0xFFFFFFFF
and eax, ebx // if>=0 => eax unchanged, else eax=0 (clamped)
// **** Modulate lighting modulate with diffuse color, add ambient term, using MMX
movd mm0, eax // 0000000L uop: 1/0
packuswb mm0, mm0 // 000L000L uop: 1/0 (p1)
packuswb mm0, mm0 // 0L0L0L0L uop: 1/0 (p1)
movd mm1, diffuse // uop: 0/1
punpcklbw mm1, blank // uop: 1/1 (p1)
pmullw mm0, mm1 // diffuse*L uop: 1/0 (3)
psrlw mm0, 8 // 0A0B0G0R uop: 1/0 (p1)
packuswb mm0, blank // 0000ABGR uop: 1/1 (p1)
movd mm2, ambient // uop: 0/1
paddusb mm0, mm2 // uop: 1/0
movd ebx, mm0 // ebx= AABBGGRR uop: 1/0
// NB: emms is not so bad on P2+: delay of 6, +11 (NB: far better than no MMX instructions)
emms // uop: 11/0 (6). (?????)
// **** append alpha, and store
mov esi, alphaPtr // uop: 0/1
movzx eax, byte ptr[esi] // uop: 0/1
shl eax, 24 // uop: 1/0
add ebx, eax // uop: 1/0
// now, ebx= AABBGGRR
mov [edi+NL3D_CLOD_COLOR_OFF], ebx // uop: 0/0/1/1
// next
add esi, 1 // uop: 1/0
mov alphaPtr, esi // uop: 0/0/1/1
// **** next
add edi, NL3D_CLOD_VERTEX_SIZE // uop: 1/0
mov eax, numVertices // uop: 0/1
dec eax // uop: 1/0
mov numVertices, eax // uop: 0/0/1/1
jnz theLoop // uop: 1/1 (p1)
// To have same behavior than c code
mov dstPtr, edi
}
}
}
else
#endif
{
H_AUTO( NL3D_CharacterLod_vertexFill );
CVector fVect;
for(;numVertices>0;)
{
// NB: order is important for AGP filling optimisation
// transform vertex, and store.
CVector *dstVector= (CVector*)dstPtr;
fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
++vertPtr;
dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
dstVector->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
dstVector->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
// Copy UV
*(CUV*)(dstPtr + NL3D_CLOD_UV_OFF)= *uvPtr;
++uvPtr;
// Compute Lighting.
CRGBA lightRes;
computeLodLighting(lightRes, lightObjectSpace, *normalPtr, ambient, diffuse);
++normalPtr;
lightRes.A= *alphaPtr;
++alphaPtr;
// store.
*((CRGBA*)(dstPtr + NL3D_CLOD_COLOR_OFF))= lightRes;
// next
dstPtr+= NL3D_CLOD_VERTEX_SIZE;
numVertices--;
}
}
// Add Primitives.
//=============
{
H_AUTO( NL3D_CharacterLod_primitiveFill )
// get number of tri indexes
uint numTriIdxs= clod->getNumTriangles() * 3;
// Yoyo: there is an assert with getPtr(). Not sure, but maybe arise if numTriIdxs==0
if(numTriIdxs)
{
// realloc tris if needed.
if(_CurrentTriId+numTriIdxs > _Triangles.getNumIndexes())
{
_Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
_Triangles.setNumIndexes(_CurrentTriId+numTriIdxs);
}
// reindex and copy tris
CIndexBufferReadWrite iba;
_Triangles.lock(iba);
const TLodCharacterIndexType *srcIdx= clod->getTriangleArray();
nlassert(sizeof(TLodCharacterIndexType) == _Triangles.getIndexNumBytes());
TLodCharacterIndexType *dstIdx= (TLodCharacterIndexType *) iba.getPtr()+_CurrentTriId;
for(;numTriIdxs>0;numTriIdxs--, srcIdx++, dstIdx++)
{
*dstIdx= *srcIdx + _CurrentVertexId;
}
}
}
// Next
//=============
// Inc Vertex count.
_CurrentVertexId+= clod->getNumVertices();
// Inc Prim count.
_CurrentTriId+= clod->getNumTriangles() * 3;
// key added
return true;
}
// ***************************************************************************
void CLodCharacterManager::endRender()
{
H_AUTO ( NL3D_CharacterLod_endRender );
IDriver *driver= _VertexStream.getDriver();
nlassert(driver);
// we must be between beginRender() and endRender()
nlassert(isRendering());
// if something rendered
if(_LockDone)
{
// UnLock Buffer.
_VertexStream.unlock(_CurrentVertexId);
_LockDone= false;
// Render the VBuffer and the primitives.
if(_CurrentTriId>0)
{
// setup matrix.
CMatrix managerMatrix;
managerMatrix.setPos(_ManagerMatrixPos);
driver->setupModelMatrix(managerMatrix);
// active VB
_VertexStream.activate();
// render triangles
driver->activeIndexBuffer(_Triangles);
driver->renderTriangles(_Material, 0, _CurrentTriId/3);
}
// swap Stream VBHard
_VertexStream.swapVBHard();
}
// Ok, end rendering
_Rendering= false;
}
// ***************************************************************************
void CLodCharacterManager::setupNormalCorrectionMatrix(const CMatrix &normalMatrix)
{
_LightCorrectionMatrix= normalMatrix;
_LightCorrectionMatrix.setPos(CVector::Null);
_LightCorrectionMatrix.invert();
}
// ***************************************************************************
// ***************************************************************************
// Texturing.
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
CLodCharacterTmpBitmap::CLodCharacterTmpBitmap()
{
reset();
}
// ***************************************************************************
void CLodCharacterTmpBitmap::reset()
{
// setup a 1*1 bitmap
_Bitmap.resize(1);
_Bitmap[0]= CRGBA::Black;
_WidthPower=0;
_UShift= 8;
_VShift= 8;
}
// ***************************************************************************
void CLodCharacterTmpBitmap::build(const NLMISC::CBitmap &bmpIn)
{
uint width= bmpIn.getWidth();
uint height= bmpIn.getHeight();
nlassert(width>0 && width<=256);
nlassert(height>0 && height<=256);
// resize bitmap.
_Bitmap.resize(width*height);
_WidthPower= getPowerOf2(width);
// compute shift
_UShift= 8-getPowerOf2(width);
_VShift= 8-getPowerOf2(height);
// convert the bitmap.
CBitmap bmp= bmpIn;
bmp.convertToType(CBitmap::RGBA);
CRGBA *src= (CRGBA*)&bmp.getPixels()[0];
CRGBA *dst= _Bitmap.getPtr();
for(sint nPix= width*height;nPix>0;nPix--, src++, dst++)
{
*dst= *src;
}
}
// ***************************************************************************
void CLodCharacterTmpBitmap::build(CRGBA col)
{
// setup a 1*1 bitmap and set it with col
reset();
_Bitmap[0]= col;
}
// ***************************************************************************
void CLodCharacterManager::initInstance(CLodCharacterInstance &instance)
{
// first release in (maybe) other manager.
if(instance._Owner)
instance._Owner->releaseInstance(instance);
// get the shape
const CLodCharacterShape *clod= getShape(instance.ShapeId);
// if not found quit
if(!clod)
return;
// get Uvs.
const CUV *uvSrc= clod->getUVs();
nlassert(uvSrc);
// Ok, init header
instance._Owner= this;
instance._UVs.resize(clod->getNumVertices());
// allocate an id. If cannot, then fill Uvs with 0 => filled with Black. (see endTextureCompute() why).
if(_FreeIds.empty())
{
// set a "Not enough memory" id
instance._TextureId= NL3D_CLOD_TEXT_NUM_IDS;
CUV uv(0,0);
fill(instance._UVs.begin(), instance._UVs.end(), uv);
}
// else OK, can instanciate the Uvs.
else
{
// get the id.
instance._TextureId= _FreeIds.back();
_FreeIds.pop_back();
// get the x/y.
uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
// compute the scale/bias to apply to Uvs.
float scaleU= 1.0f / NL3D_CLOD_TEXT_NLOD_WIDTH;
float scaleV= 1.0f / NL3D_CLOD_TEXT_NLOD_HEIGHT;
float biasU= (float)xId / NL3D_CLOD_TEXT_NLOD_WIDTH;
float biasV= (float)yId / NL3D_CLOD_TEXT_NLOD_HEIGHT;
// apply it to each UVs.
CUV *uvDst= &instance._UVs[0];
for(uint i=0; i<instance._UVs.size();i++)
{
uvDst[i].U= biasU + uvSrc[i].U*scaleU;
uvDst[i].V= biasV + uvSrc[i].V*scaleV;
}
}
}
// ***************************************************************************
void CLodCharacterManager::releaseInstance(CLodCharacterInstance &instance)
{
if(instance._Owner==NULL)
return;
nlassert(this==instance._Owner);
// if the id is not a "Not enough memory" id, release it.
if(instance._TextureId>=0 && instance._TextureId<NL3D_CLOD_TEXT_NUM_IDS)
_FreeIds.push_back(instance._TextureId);
// reset the instance
instance._Owner= NULL;
instance._TextureId= -1;
contReset(instance._UVs);
}
// ***************************************************************************
CRGBA *CLodCharacterManager::getTextureInstance(CLodCharacterInstance &instance)
{
nlassert(instance._Owner==this);
nlassert(instance._TextureId!=-1);
// if the texture id is a "not enough memory", quit.
if(instance._TextureId==NL3D_CLOD_TEXT_NUM_IDS)
return NULL;
// get the x/y.
uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
// get the ptr on the correct pixel.
CRGBA *pix= (CRGBA*)&_BigTexture->getPixels(0)[0];
return pix + yId*NL3D_CLOD_TEXT_HEIGHT*NL3D_CLOD_BIGTEXT_WIDTH + xId*NL3D_CLOD_TEXT_WIDTH;
}
// ***************************************************************************
bool CLodCharacterManager::startTextureCompute(CLodCharacterInstance &instance)
{
CRGBA *dst= getTextureInstance(instance);
if(!dst)
return false;
// erase the texture with 0,0,0,255. Alpha is actually the min "Quality" part of the CTUVQ.
CRGBA col= NL3D_CLOD_DEFAULT_TEXCOLOR;
for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
{
// erase the line
for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
dst[x]= col;
// Next line
dst+= NL3D_CLOD_BIGTEXT_WIDTH;
}
return true;
}
// ***************************************************************************
void CLodCharacterManager::addTextureCompute(CLodCharacterInstance &instance, const CLodCharacterTexture &lodTexture)
{
CRGBA *dst= getTextureInstance(instance);
if(!dst)
return;
// get lookup ptr.
nlassert(lodTexture.Texture.size()==NL3D_CLOD_TEXT_SIZE);
const CLodCharacterTexture::CTUVQ *lookUpPtr= &lodTexture.Texture[0];
// apply the lodTexture, taking only better quality (ie nearer 0)
for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
{
// erase the line
for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
{
CLodCharacterTexture::CTUVQ lut= *lookUpPtr;
// if this quality is better than the one stored
if(lut.Q<dst[x].A)
{
// get what texture to read, and read the pixel.
CRGBA col= _TmpBitmaps[lut.T].getPixel(lut.U, lut.V);
// set quality.
col.A= lut.Q;
// set in dest
dst[x]= col;
}
// next lookup
lookUpPtr++;
}
// Next line
dst+= NL3D_CLOD_BIGTEXT_WIDTH;
}
}
// ***************************************************************************
void CLodCharacterManager::endTextureCompute(CLodCharacterInstance &instance, uint numBmpToReset)
{
CRGBA *dst= getTextureInstance(instance);
if(!dst)
return;
// reset All Alpha values to 255 => no AlphaTest problems
for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
{
// erase the line
for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
{
dst[x].A= 255;
}
// Next line
dst+= NL3D_CLOD_BIGTEXT_WIDTH;
}
// If the id == 0 then must reset the 0,0 Pixel to black. for the "Not Enough memory" case in initInstance().
if(instance._TextureId==0)
*(CRGBA*)&_BigTexture->getPixels(0)[0]= NL3D_CLOD_DEFAULT_TEXCOLOR;
// get the x/y.
uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
// touch the texture for Driver update.
_BigTexture->touchRect(
CRect(xId*NL3D_CLOD_TEXT_WIDTH, yId*NL3D_CLOD_TEXT_HEIGHT, NL3D_CLOD_TEXT_WIDTH, NL3D_CLOD_TEXT_HEIGHT) );
// reset tmpBitmaps / free memory.
for(uint i=0; i<numBmpToReset; i++)
{
_TmpBitmaps[i].reset();
}
// TestYoyo
/*NLMISC::COFile f("tam.tga");
_BigTexture->writeTGA(f,32);*/
}
// ***************************************************************************
bool CLodCharacterManager::fastIntersect(const CLodCharacterInstance &instance, const NLMISC::CMatrix &toRaySpace, float &dist2D, float &distZ, bool computeDist2D)
{
H_AUTO ( NL3D_CharacterLod_fastIntersect )
uint numVertices;
const CLodCharacterShape::CVector3s *vertPtr;
CVector matPos;
float a00, a01, a02;
float a10, a11, a12;
float a20, a21, a22;
// Get the Shape and current key.
//=============
// get the shape
const CLodCharacterShape *clod= getShape(instance.ShapeId);
// if not found quit
if(!clod)
return false;
// get the anim key
CVector unPackScaleFactor;
vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
// if not found quit
if(!vertPtr)
return false;
// get num verts
numVertices= clod->getNumVertices();
// empty shape??
if(numVertices==0)
return false;
// Prepare Transform
//=============
// Get matrix pos.
matPos= toRaySpace.getPos();
// Get rotation line vectors
const float *rayM= toRaySpace.get();
a00= rayM[0]; a01= rayM[4]; a02= rayM[8];
a10= rayM[1]; a11= rayM[5]; a12= rayM[9];
a20= rayM[2]; a21= rayM[6]; a22= rayM[10];
// multiply matrix with scale factor for Pos.
a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
// get dst Array.
// enlarge temp buffer
static std::vector<CVector> lodInRaySpace;
if(numVertices>lodInRaySpace.size())
lodInRaySpace.resize(numVertices);
CVector *dstPtr= &lodInRaySpace[0];
// Fill the temp skin
//=============
{
CVector fVect;
for(;numVertices>0;)
{
// transform vertex, and store.
fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
++vertPtr;
dstPtr->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
dstPtr->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
dstPtr->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
// next
dstPtr++;
numVertices--;
}
}
// Test intersection
//=============
return CRayMesh::getRayIntersection(lodInRaySpace, clod->getTriangleIndices(), dist2D, distZ, computeDist2D);
}
} // NL3D