From 556a41afeef5bb4d01f81a33e6f264f9be24757f Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 19:26:22 +0200 Subject: [PATCH] SSE2: Implement alignment for arena allocator --HG-- branch : sse2 --- .../nel/include/nel/misc/fixed_size_allocator.h | 1 + code/nel/src/misc/fixed_size_allocator.cpp | 17 ++++++++++++----- code/nel/src/misc/object_arena_allocator.cpp | 14 ++++++++------ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/code/nel/include/nel/misc/fixed_size_allocator.h b/code/nel/include/nel/misc/fixed_size_allocator.h index 9eb1d8a10..80b9ed491 100644 --- a/code/nel/include/nel/misc/fixed_size_allocator.h +++ b/code/nel/include/nel/misc/fixed_size_allocator.h @@ -53,6 +53,7 @@ public: uint getNumAllocatedBlocks() const { return _NumAlloc; } private: class CChunk; + NL_ALIGN(NL_DEFAULT_MEMORY_ALIGNMENT) class CNode { public: diff --git a/code/nel/src/misc/fixed_size_allocator.cpp b/code/nel/src/misc/fixed_size_allocator.cpp index 790275ec6..30693ddfd 100644 --- a/code/nel/src/misc/fixed_size_allocator.cpp +++ b/code/nel/src/misc/fixed_size_allocator.cpp @@ -33,6 +33,9 @@ CFixedSizeAllocator::CFixedSizeAllocator(uint numBytesPerBlock, uint numBlockPer _NumChunks = 0; nlassert(numBytesPerBlock > 1); _NumBytesPerBlock = numBytesPerBlock; + const uint mask = NL_DEFAULT_MEMORY_ALIGNMENT - 1; + _NumBytesPerBlock = (_NumBytesPerBlock + mask) & ~mask; + nlassert(_NumBytesPerBlock >= numBytesPerBlock); _NumBlockPerChunk = std::max(numBlockPerChunk, (uint) 3); _NumAlloc = 0; } @@ -67,12 +70,14 @@ void *CFixedSizeAllocator::alloc() return _FreeSpace->unlink(); } +#define aligned_offsetof(s, m) ((offsetof(s, m) + (NL_DEFAULT_MEMORY_ALIGNMENT - 1)) & ~(NL_DEFAULT_MEMORY_ALIGNMENT - 1)) + // ***************************************************************************************************************** void CFixedSizeAllocator::free(void *block) { if (!block) return; /// get the node from the object - CNode *node = (CNode *) ((uint8 *) block - offsetof(CNode, Next)); + CNode *node = (CNode *) ((uint8 *) block - aligned_offsetof(CNode, Next)); // nlassert(node->Chunk != NULL); nlassert(node->Chunk->Allocator == this); @@ -84,7 +89,9 @@ void CFixedSizeAllocator::free(void *block) // ***************************************************************************************************************** uint CFixedSizeAllocator::CChunk::getBlockSizeWithOverhead() const { - return std::max((uint)(sizeof(CNode) - offsetof(CNode, Next)),(uint)(Allocator->getNumBytesPerBlock())) + offsetof(CNode, Next); + nlctassert((sizeof(CNode) % NL_DEFAULT_MEMORY_ALIGNMENT) == 0); + return std::max((uint)(sizeof(CNode) - aligned_offsetof(CNode, Next)), + (uint)(Allocator->getNumBytesPerBlock())) + aligned_offsetof(CNode, Next); } // ***************************************************************************************************************** @@ -105,7 +112,7 @@ CFixedSizeAllocator::CChunk::~CChunk() nlassert(NumFreeObjs == 0); nlassert(Allocator->_NumChunks > 0); -- (Allocator->_NumChunks); - delete[] Mem; + aligned_free(Mem); //delete[] Mem; } // ***************************************************************************************************************** @@ -115,7 +122,7 @@ void CFixedSizeAllocator::CChunk::init(CFixedSizeAllocator *alloc) nlassert(alloc != NULL); Allocator = alloc; // - Mem = new uint8[getBlockSizeWithOverhead() * alloc->getNumBlockPerChunk()]; + Mem = (uint8 *)aligned_malloc(getBlockSizeWithOverhead() * alloc->getNumBlockPerChunk(), NL_DEFAULT_MEMORY_ALIGNMENT); // new uint8[getBlockSizeWithOverhead() * alloc->getNumBlockPerChunk()]; // getNode(0).Chunk = this; getNode(0).Next = &getNode(1); @@ -179,7 +186,7 @@ void *CFixedSizeAllocator::CNode::unlink() *Prev = Next; nlassert(Chunk->NumFreeObjs > 0); Chunk->grab(); // tells the containing chunk that a node has been allocated - return (void *) &Next; + return (void *)((uintptr_t)(this) + aligned_offsetof(CNode, Next)); //(void *) &Next; } // ***************************************************************************************************************** diff --git a/code/nel/src/misc/object_arena_allocator.cpp b/code/nel/src/misc/object_arena_allocator.cpp index 9c73f5059..8084b4ac9 100644 --- a/code/nel/src/misc/object_arena_allocator.cpp +++ b/code/nel/src/misc/object_arena_allocator.cpp @@ -68,21 +68,23 @@ void *CObjectArenaAllocator::alloc(uint size) if (size >= _MaxAllocSize) { // use standard allocator - uint8 *block = new uint8[size + sizeof(uint)]; // an additionnal uint is needed to store size of block + nlctassert(NL_DEFAULT_MEMORY_ALIGNMENT > sizeof(uint)); + uint8 *block = (uint8 *)aligned_malloc(NL_DEFAULT_MEMORY_ALIGNMENT + size, NL_DEFAULT_MEMORY_ALIGNMENT); //new uint8[size + sizeof(uint)]; // an additionnal uint is needed to store size of block if (!block) return NULL; #ifdef NL_DEBUG _MemBlockToAllocID[block] = _AllocID; #endif *(uint *) block = size; - return block + sizeof(uint); + return block + NL_DEFAULT_MEMORY_ALIGNMENT; } uint entry = ((size + (_Granularity - 1)) / _Granularity) ; nlassert(entry < _ObjectSizeToAllocator.size()); if (!_ObjectSizeToAllocator[entry]) { - _ObjectSizeToAllocator[entry] = new CFixedSizeAllocator(entry * _Granularity + sizeof(uint), _MaxAllocSize / size); // an additionnal uint is needed to store size of block + _ObjectSizeToAllocator[entry] = new CFixedSizeAllocator(entry * _Granularity + NL_DEFAULT_MEMORY_ALIGNMENT, _MaxAllocSize / size); // an additionnal uint is needed to store size of block } void *block = _ObjectSizeToAllocator[entry]->alloc(); + nlassert(((uintptr_t)block % NL_DEFAULT_MEMORY_ALIGNMENT) == 0); #ifdef NL_DEBUG if (block) { @@ -91,14 +93,14 @@ void *CObjectArenaAllocator::alloc(uint size) ++_AllocID; #endif *(uint *) block = size; - return (void *) ((uint8 *) block + sizeof(uint)); + return (void *) ((uint8 *) block + NL_DEFAULT_MEMORY_ALIGNMENT); } // ***************************************************************************************************************** void CObjectArenaAllocator::free(void *block) { if (!block) return; - uint8 *realBlock = (uint8 *) block - sizeof(uint); // a uint is used at start of block to give its size + uint8 *realBlock = (uint8 *) block - NL_DEFAULT_MEMORY_ALIGNMENT; // sizeof(uint); // a uint is used at start of block to give its size uint size = *(uint *) realBlock; if (size >= _MaxAllocSize) { @@ -107,7 +109,7 @@ void CObjectArenaAllocator::free(void *block) nlassert(it != _MemBlockToAllocID.end()); _MemBlockToAllocID.erase(it); #endif - delete realBlock; + aligned_free(realBlock); return; } uint entry = ((size + (_Granularity - 1)) / _Granularity);