783 lines
15 KiB
C++
783 lines
15 KiB
C++
// Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
|
|
// Copyright (C) 2010 Winch Gate Property Limited
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as
|
|
// published by the Free Software Foundation, either version 3 of the
|
|
// License, or (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
#ifndef RY_PD_TOKENIZER_H
|
|
#define RY_PD_TOKENIZER_H
|
|
|
|
// Nel Misc
|
|
#include "nel/misc/types_nl.h"
|
|
#include <nel/misc/debug.h>
|
|
#include <nel/misc/file.h>
|
|
|
|
// STL
|
|
#include <string>
|
|
#include <map>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
|
|
enum TToken
|
|
{
|
|
TokenUnknown,
|
|
TokenEOF,
|
|
|
|
TokenIncludeScript,
|
|
|
|
TokenIdentifier,
|
|
TokenScopedIdentifier,
|
|
TokenRefineScope,
|
|
TokenNumber,
|
|
TokenString,
|
|
TokenCppCode,
|
|
TokenSlash,
|
|
TokenDescription,
|
|
|
|
TokenOpenBrace,
|
|
TokenCloseBrace,
|
|
TokenOpenParenthesis,
|
|
TokenCloseParenthesis,
|
|
TokenOpenBracket,
|
|
TokenCloseBracket,
|
|
TokenLessThan,
|
|
TokenGreaterThan,
|
|
TokenEqual,
|
|
TokenComma,
|
|
TokenDot,
|
|
TokenColon,
|
|
TokenSemiColon,
|
|
TokenMinus,
|
|
TokenPlus,
|
|
TokenTimes,
|
|
TokenAntiSlash,
|
|
TokenMod,
|
|
TokenSharp,
|
|
TokenAnd,
|
|
TokenOr,
|
|
TokenCirc,
|
|
TokenInterrog,
|
|
TokenExclam,
|
|
|
|
TokenClass,
|
|
TokenEnum,
|
|
TokenDimension,
|
|
TokenParent,
|
|
TokenFlag,
|
|
TokenFile,
|
|
TokenDb,
|
|
TokenType,
|
|
TokenKey,
|
|
TokenHidden,
|
|
TokenExtern,
|
|
TokenMirrored,
|
|
TokenImplements,
|
|
TokenMapped,
|
|
TokenDerived,
|
|
TokenInitFill,
|
|
TokenReserve,
|
|
TokenInclude,
|
|
TokenWriteTrigger,
|
|
TokenSeparated,
|
|
|
|
TokenUsePch,
|
|
|
|
TokenLogMsg,
|
|
TokenLogContext,
|
|
};
|
|
|
|
|
|
class CTokenizer
|
|
{
|
|
public:
|
|
|
|
class CToken
|
|
{
|
|
public:
|
|
CToken(uint start = 0, uint end = 0, TToken token = TokenUnknown, CTokenizer* tokenizer = NULL) : Tokenizer(tokenizer), Start(start), End(end), Token(token) {}
|
|
CToken(CTokenizer* tokenizer) : Tokenizer(tokenizer), Start(0), End(0), Token(TokenUnknown) {}
|
|
|
|
CTokenizer* Tokenizer;
|
|
uint Start;
|
|
uint End;
|
|
TToken Token;
|
|
|
|
std::string get() const { return Tokenizer->get(*this); }
|
|
};
|
|
|
|
CTokenizer() : _Buffer(NULL), _Size(0), _CurrentToken(0) { }
|
|
|
|
CTokenizer(const std::string &text)
|
|
{
|
|
init(text);
|
|
}
|
|
|
|
~CTokenizer()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
/// Init
|
|
void readFile(const std::string &filename)
|
|
{
|
|
clear();
|
|
|
|
NLMISC::CIFile f;
|
|
if (!f.open(filename))
|
|
return;
|
|
|
|
uint size = f.getFileSize();
|
|
char *buffer = new char[size+1];
|
|
f.serialBuffer((uint8*)buffer, size);
|
|
buffer[size] = '\0';
|
|
|
|
f.close();
|
|
|
|
_File = filename;
|
|
|
|
init(std::string(buffer));
|
|
|
|
delete buffer;
|
|
}
|
|
|
|
/// Init
|
|
void init(const std::string &text)
|
|
{
|
|
_Str = text;
|
|
init(_Str.c_str());
|
|
}
|
|
|
|
/// Init
|
|
void init(const char* text, uint size = 0)
|
|
{
|
|
_Size = (size > 0 ? size : (uint)strlen(text));
|
|
_Buffer = text;
|
|
nlassert(_Buffer != NULL);
|
|
_TempToken.Start = 0;
|
|
_TempToken.End = 0;
|
|
_CurrentToken = 0;
|
|
_Mark = 0;
|
|
|
|
initOneLetterTokens();
|
|
initKeywords();
|
|
}
|
|
|
|
///
|
|
bool tokenize()
|
|
{
|
|
while (true)
|
|
{
|
|
CToken token = nextToken();
|
|
|
|
nlassert(token.Tokenizer != NULL);
|
|
|
|
if (token.Token == TokenUnknown)
|
|
return false;
|
|
|
|
if (token.Token == TokenEOF)
|
|
break;
|
|
|
|
if (token.Token == TokenIncludeScript)
|
|
{
|
|
CToken scriptFile = nextToken();
|
|
if (scriptFile.Token != TokenString)
|
|
error(scriptFile);
|
|
|
|
std::string file = scriptFile.get();
|
|
|
|
CTokenizer* sub = new CTokenizer();
|
|
_Includes.push_back(sub);
|
|
sub->readFile(file);
|
|
if (!sub->tokenize())
|
|
return false;
|
|
_Tokens.insert(_Tokens.end(), sub->_Tokens.begin(), sub->_Tokens.end());
|
|
}
|
|
else
|
|
{
|
|
_Tokens.push_back(token);
|
|
}
|
|
}
|
|
|
|
std::vector<CToken>::iterator it;
|
|
for (it=_Tokens.begin(); it!=_Tokens.end(); ++it)
|
|
{
|
|
if ((*it).Token == TokenIdentifier)
|
|
{
|
|
std::vector<CToken>::iterator startit = it;
|
|
std::vector<CToken>::iterator endit = it;
|
|
while ((++it) != _Tokens.end() &&
|
|
(*it).Token == TokenRefineScope &&
|
|
(*it).Tokenizer == (*startit).Tokenizer &&
|
|
(++it) != _Tokens.end() &&
|
|
(*it).Token == TokenIdentifier &&
|
|
(*it).Tokenizer == (*startit).Tokenizer)
|
|
{
|
|
endit = it;
|
|
}
|
|
if (endit != startit)
|
|
{
|
|
CToken newToken(this);
|
|
newToken.Token = TokenScopedIdentifier;
|
|
newToken.Start = (*startit).Start;
|
|
newToken.End = (*endit).End;
|
|
++endit;
|
|
it = _Tokens.erase(startit, endit);
|
|
it = _Tokens.insert(it, newToken);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
///
|
|
bool next()
|
|
{
|
|
++_CurrentToken;
|
|
return (!end());
|
|
}
|
|
|
|
///
|
|
const CToken ¤tToken() const
|
|
{
|
|
nlassert(!end());
|
|
return _Tokens[_CurrentToken];
|
|
}
|
|
|
|
///
|
|
TToken current() const
|
|
{
|
|
return currentToken().Token;
|
|
}
|
|
|
|
///
|
|
bool end() const
|
|
{
|
|
return _CurrentToken >= _Tokens.size();
|
|
}
|
|
|
|
///
|
|
void push()
|
|
{
|
|
_Stack.push_back(_CurrentToken);
|
|
}
|
|
|
|
///
|
|
void pop()
|
|
{
|
|
nlassert(!_Stack.empty());
|
|
_CurrentToken = _Stack.back();
|
|
if (_CurrentToken < _Mark)
|
|
error(_Tokens[_Mark], "parse");
|
|
_Stack.pop_back();
|
|
}
|
|
|
|
///
|
|
void leaveMark()
|
|
{
|
|
_Mark = _CurrentToken;
|
|
}
|
|
|
|
/// get token text
|
|
std::string get(const CToken &token) const
|
|
{
|
|
nlassert(token.Tokenizer != NULL);
|
|
|
|
if (token.Tokenizer != this)
|
|
return token.Tokenizer->get(token);
|
|
|
|
std::string str(_Buffer+token.Start, token.End-token.Start);
|
|
if (token.Token == TokenString)
|
|
{
|
|
std::string::size_type pos = 0;
|
|
while ((pos = str.find('\\', pos)) != std::string::npos)
|
|
{
|
|
if (pos+1 == str.size())
|
|
break;
|
|
switch (str[pos+1])
|
|
{
|
|
case 'n':
|
|
str.erase(pos, 2);
|
|
str.insert(pos, "\n");
|
|
break;
|
|
case 'r':
|
|
str.erase(pos, 2);
|
|
str.insert(pos, "\r");
|
|
break;
|
|
case 't':
|
|
str.erase(pos, 2);
|
|
str.insert(pos, "\t");
|
|
break;
|
|
default:
|
|
str.erase(pos, 1);
|
|
++pos;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return str;
|
|
}
|
|
|
|
/// get file, line
|
|
void getFileLine(const CToken& token, uint &line, uint &col, std::string &file)
|
|
{
|
|
nlassert(token.Tokenizer != NULL);
|
|
|
|
if (token.Tokenizer != this)
|
|
{
|
|
token.Tokenizer->getFileLine(token, line, col, file);
|
|
return;
|
|
}
|
|
|
|
file = _File;
|
|
|
|
uint n = 0;
|
|
uint pos = token.Start;
|
|
|
|
line = 1;
|
|
col = 1;
|
|
|
|
while (n < pos)
|
|
{
|
|
if (_Buffer[n] == '\0')
|
|
break;
|
|
if (_Buffer[n] == '\t')
|
|
col += 4;
|
|
else if (_Buffer[n] != '\r')
|
|
++col;
|
|
if (_Buffer[n] == '\n')
|
|
++line, col = 1;
|
|
|
|
++n;
|
|
}
|
|
}
|
|
|
|
/// error at
|
|
void error(const CToken& token, const char *errType = "syntax", const char *errMsg = NULL)
|
|
{
|
|
if (token.Tokenizer != this && token.Tokenizer != NULL)
|
|
{
|
|
token.Tokenizer->error(token, errType, errMsg);
|
|
return;
|
|
}
|
|
|
|
uint pos = token.Start;
|
|
uint n = 0;
|
|
uint line = 1, col = 1;
|
|
uint lineStartAt = 0, lineEndAt;
|
|
|
|
while (n < pos)
|
|
{
|
|
if (_Buffer[n] == '\0')
|
|
break;
|
|
if (_Buffer[n] == '\t')
|
|
col += 4;
|
|
else if (_Buffer[n] != '\r')
|
|
++col;
|
|
else
|
|
lineStartAt = n+1;
|
|
if (_Buffer[n] == '\n')
|
|
++line, col = 1, lineStartAt = n+1;
|
|
|
|
++n;
|
|
}
|
|
|
|
lineEndAt = n;
|
|
while (_Buffer[lineEndAt] != '\0' && _Buffer[lineEndAt] != '\n' && _Buffer[lineEndAt] != '\r')
|
|
++lineEndAt;
|
|
|
|
NLMISC::createDebug ();
|
|
|
|
std::string errorMsg = NLMISC::toString("PD_PARSE: file %s, %s error at line %d, column %d%s%s", _File.c_str(), errType, line, col, (errMsg != NULL ? ": " : ""), (errMsg != NULL ? errMsg : ""));
|
|
|
|
NLMISC::ErrorLog->displayRawNL("%s", errorMsg.c_str());
|
|
std::string extr(_Buffer+lineStartAt, lineEndAt-lineStartAt);
|
|
NLMISC::ErrorLog->displayRawNL("%s", extr.c_str());
|
|
uint i;
|
|
for (i=0; i<extr.size() && i<n-lineStartAt; ++i)
|
|
if (extr[i] != '\t')
|
|
extr[i] = ' ';
|
|
extr.erase(n-lineStartAt);
|
|
extr += '^';
|
|
NLMISC::ErrorLog->displayRawNL("%s", extr.c_str());
|
|
nlerror("%s", errorMsg.c_str());
|
|
}
|
|
|
|
private:
|
|
|
|
/// Original text
|
|
std::string _Str;
|
|
|
|
/// Parsed buffer
|
|
const char *_Buffer;
|
|
|
|
/// Buffer size
|
|
uint _Size;
|
|
|
|
/// Keywords
|
|
std::map<std::string, TToken> _Keywords;
|
|
|
|
/// One letter tokens
|
|
TToken _OneLetterTokens[256];
|
|
|
|
/// List of tokens
|
|
std::vector<CToken> _Tokens;
|
|
|
|
/// Current token
|
|
uint _CurrentToken;
|
|
|
|
/// State stack
|
|
std::vector<uint> _Stack;
|
|
|
|
/// Currently used token
|
|
CToken _TempToken;
|
|
|
|
/// Mark
|
|
uint _Mark;
|
|
|
|
/// Loaded file
|
|
std::string _File;
|
|
|
|
/// Subtokenizers
|
|
std::vector<CTokenizer*> _Includes;
|
|
|
|
|
|
void clear()
|
|
{
|
|
_Str.clear();
|
|
_Size = 0;
|
|
_TempToken.Start = 0;
|
|
_TempToken.End = 0;
|
|
_Buffer = NULL;
|
|
_Mark = 0;
|
|
_File.clear();
|
|
}
|
|
|
|
|
|
CToken nextToken()
|
|
{
|
|
skipSpaces();
|
|
if (posAtEnd())
|
|
return CToken(0, 0, TokenEOF, this);
|
|
|
|
_TempToken.Start = _TempToken.End;
|
|
_TempToken.Token = TokenUnknown;
|
|
_TempToken.Tokenizer = this;
|
|
|
|
char parse = popChar();
|
|
|
|
if (isalpha(parse) || parse == '_')
|
|
{
|
|
// identifier
|
|
while (!posAtEnd() && (isalnum(parse = getChar()) || parse == '_'))
|
|
popChar();
|
|
|
|
std::map<std::string, TToken>::iterator it;
|
|
_TempToken.Token = ((it = _Keywords.find(get(_TempToken))) != _Keywords.end() ? (*it).second : TokenIdentifier);
|
|
}
|
|
else if (isdigit(parse))
|
|
{
|
|
// number
|
|
while (!posAtEnd() && isdigit(getChar()))
|
|
popChar();
|
|
|
|
_TempToken.Token = TokenNumber;
|
|
}
|
|
else if (parse == '"')
|
|
{
|
|
// string
|
|
do
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
|
|
parse = popChar();
|
|
if (parse == '"')
|
|
break;
|
|
|
|
if (parse == '\\')
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
parse = popChar();
|
|
}
|
|
}
|
|
while (true);
|
|
_TempToken.Token = TokenString;
|
|
return CToken(_TempToken.Start+1, _TempToken.End-1, _TempToken.Token, this);
|
|
}
|
|
else if (parse == '@')
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
|
|
parse = popChar();
|
|
|
|
if (parse == '[')
|
|
{
|
|
// user code
|
|
do
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
|
|
parse = popChar();
|
|
if (parse == ']')
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
if (popChar() == '@')
|
|
break;
|
|
else
|
|
pushChar();
|
|
}
|
|
}
|
|
while (true);
|
|
|
|
uint startTrim = _TempToken.Start+2;
|
|
uint endTrim = _TempToken.End-2;
|
|
|
|
while (startTrim < endTrim && (isspace(_Buffer[startTrim]) || _Buffer[startTrim] == '\r'))
|
|
++startTrim;
|
|
while (startTrim < endTrim && (isspace(_Buffer[endTrim-1]) || _Buffer[endTrim-1] == '\r'))
|
|
--endTrim;
|
|
|
|
_TempToken.Token = TokenCppCode;
|
|
return CToken(startTrim, endTrim, _TempToken.Token, this);
|
|
}
|
|
else if (parse == '/')
|
|
{
|
|
// description
|
|
do
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
|
|
parse = popChar();
|
|
if (parse == '/')
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
if (popChar() == '@')
|
|
break;
|
|
else
|
|
pushChar();
|
|
}
|
|
}
|
|
while (true);
|
|
|
|
uint startTrim = _TempToken.Start+2;
|
|
uint endTrim = _TempToken.End-2;
|
|
|
|
while (startTrim < endTrim && (isspace(_Buffer[startTrim]) || _Buffer[startTrim] == '\r'))
|
|
++startTrim;
|
|
while (startTrim < endTrim && (isspace(_Buffer[endTrim-1]) || _Buffer[endTrim-1] == '\r'))
|
|
--endTrim;
|
|
|
|
_TempToken.Token = TokenDescription;
|
|
return CToken(startTrim, endTrim, _TempToken.Token, this);
|
|
}
|
|
else
|
|
{
|
|
error(_TempToken);
|
|
}
|
|
}
|
|
else if (parse == '/')
|
|
{
|
|
_TempToken.Token = TokenSlash;
|
|
if (!posAtEnd())
|
|
{
|
|
parse = popChar();
|
|
if (parse == '/')
|
|
{
|
|
// skip to end of line
|
|
while (!posAtEnd() && (parse = popChar()) != '\n' && parse != '\r')
|
|
;
|
|
return nextToken();
|
|
}
|
|
else if (parse == '*')
|
|
{
|
|
// skip to comment close
|
|
while (true)
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
|
|
parse = popChar();
|
|
if (parse == '*')
|
|
{
|
|
if (posAtEnd())
|
|
error(_TempToken);
|
|
if (popChar() == '/')
|
|
break;
|
|
else
|
|
pushChar();
|
|
}
|
|
}
|
|
return nextToken();
|
|
}
|
|
else
|
|
{
|
|
pushChar();
|
|
}
|
|
}
|
|
}
|
|
else if (parse == ':')
|
|
{
|
|
if (posAtEnd())
|
|
_TempToken.Token = TokenColon;
|
|
else
|
|
{
|
|
parse = popChar();
|
|
if (parse == ':')
|
|
{
|
|
_TempToken.Token = TokenRefineScope;
|
|
}
|
|
else
|
|
{
|
|
pushChar();
|
|
_TempToken.Token = TokenColon;
|
|
}
|
|
}
|
|
}
|
|
else if (getOLToken(parse) != TokenUnknown)
|
|
_TempToken.Token = getOLToken(parse);
|
|
|
|
if (_TempToken.Token == TokenUnknown)
|
|
error(_TempToken);
|
|
|
|
return _TempToken;
|
|
}
|
|
|
|
|
|
bool posAtEnd() const
|
|
{
|
|
return _TempToken.End >= _Size;
|
|
}
|
|
|
|
/// reset the buffer
|
|
void reset()
|
|
{
|
|
_TempToken.End = 0;
|
|
}
|
|
|
|
/// skip spaces
|
|
void skipSpaces()
|
|
{
|
|
while (!posAtEnd() && isspace(_Buffer[_TempToken.End]))
|
|
++(_TempToken.End);
|
|
}
|
|
|
|
/// pop char
|
|
char popChar()
|
|
{
|
|
nlassert(!posAtEnd());
|
|
return _Buffer[(_TempToken.End)++];
|
|
}
|
|
|
|
/// get char
|
|
char getChar()
|
|
{
|
|
nlassert(!posAtEnd());
|
|
return _Buffer[_TempToken.End];
|
|
}
|
|
|
|
/// push char
|
|
void pushChar()
|
|
{
|
|
nlassert(_TempToken.End > 0);
|
|
--(_TempToken.End);
|
|
}
|
|
|
|
/// init one letter tokens
|
|
void initOneLetterTokens()
|
|
{
|
|
uint i;
|
|
for (i=0; i<256; ++i)
|
|
_OneLetterTokens[i] = TokenUnknown;
|
|
|
|
setOLToken('{', TokenOpenBrace);
|
|
setOLToken('}', TokenCloseBrace);
|
|
setOLToken('(', TokenOpenParenthesis);
|
|
setOLToken(')', TokenCloseParenthesis);
|
|
setOLToken('[', TokenOpenBracket);
|
|
setOLToken(']', TokenCloseBracket);
|
|
setOLToken('<', TokenLessThan);
|
|
setOLToken('>', TokenGreaterThan);
|
|
setOLToken('=', TokenEqual);
|
|
setOLToken(',', TokenComma);
|
|
setOLToken('.', TokenDot);
|
|
setOLToken(':', TokenColon);
|
|
setOLToken(';', TokenSemiColon);
|
|
setOLToken('-', TokenMinus);
|
|
setOLToken('+', TokenPlus);
|
|
setOLToken('*', TokenTimes);
|
|
setOLToken('\\', TokenAntiSlash);
|
|
setOLToken('%', TokenMod);
|
|
setOLToken('#', TokenSharp);
|
|
setOLToken('&', TokenAnd);
|
|
setOLToken('|', TokenOr);
|
|
setOLToken('^', TokenCirc);
|
|
setOLToken('?', TokenInterrog);
|
|
setOLToken('!', TokenExclam);
|
|
}
|
|
|
|
/// set one letter token
|
|
void setOLToken(char c, TToken token)
|
|
{
|
|
_OneLetterTokens[(uint)c] = token;
|
|
}
|
|
|
|
/// set one letter token
|
|
TToken getOLToken(char c) const
|
|
{
|
|
return _OneLetterTokens[(uint)c];
|
|
}
|
|
|
|
/// init keywords
|
|
void initKeywords()
|
|
{
|
|
_Keywords.clear();
|
|
|
|
_Keywords["verbatim"] = TokenIncludeScript;
|
|
|
|
_Keywords["class"] = TokenClass;
|
|
_Keywords["enum"] = TokenEnum;
|
|
_Keywords["dimension"] = TokenDimension;
|
|
_Keywords["parent"] = TokenParent;
|
|
_Keywords["flag"] = TokenFlag;
|
|
_Keywords["file"] = TokenFile;
|
|
_Keywords["db"] = TokenDb;
|
|
_Keywords["type"] = TokenType;
|
|
_Keywords["key"] = TokenKey;
|
|
_Keywords["hidden"] = TokenHidden;
|
|
_Keywords["extern"] = TokenExtern;
|
|
_Keywords["mirrored"] = TokenMirrored;
|
|
_Keywords["implements"] = TokenImplements;
|
|
_Keywords["mapped"] = TokenMapped;
|
|
_Keywords["derived"] = TokenDerived;
|
|
_Keywords["initfill"] = TokenInitFill;
|
|
_Keywords["logmsg"] = TokenLogMsg;
|
|
_Keywords["logcontext"] = TokenLogContext;
|
|
_Keywords["reserve"] = TokenReserve;
|
|
_Keywords["include"] = TokenInclude;
|
|
_Keywords["usepch"] = TokenUsePch;
|
|
_Keywords["writetriggered"] = TokenWriteTrigger;
|
|
_Keywords["separated"] = TokenSeparated;
|
|
}
|
|
};
|
|
|
|
|
|
#endif
|