diff --git a/code/nel/src/gui/group_html_parser.cpp b/code/nel/src/gui/group_html_parser.cpp
index 19d1efe1a..e6f63d464 100644
--- a/code/nel/src/gui/group_html_parser.cpp
+++ b/code/nel/src/gui/group_html_parser.cpp
@@ -117,10 +117,144 @@ namespace NLGUI
}
}
+ // ***************************************************************************
+ // http://stackoverflow.com/a/18335183
+ static std::string correct_non_utf_8(const std::string &str)
+ {
+ int i,f_size=str.size();
+ unsigned char c,c2,c3,c4;
+ std::string to;
+ to.reserve(f_size);
+
+ for(i=0 ; i127 && c2<192){//valid 2byte UTF8
+ if(c==194 && c2<160){//control char, skipping
+ ;
+ }else{
+ to.append(1,c);
+ to.append(1,c2);
+ }
+ i++;
+ continue;
+ }
+ }else if(c<240 && i+2127 && c2<192 && c3>127 && c3<192){//valid 3byte UTF8
+ to.append(1,c);
+ to.append(1,c2);
+ to.append(1,c3);
+ i+=2;
+ continue;
+ }
+ }else if(c<245 && i+3127 && c2<192 && c3>127 && c3<192 && c4>127 && c4<192){//valid 4byte UTF8
+ to.append(1,c);
+ to.append(1,c2);
+ to.append(1,c3);
+ to.append(1,c4);
+ i+=3;
+ continue;
+ }
+ }
+ //invalid UTF8, converting ASCII (c>245 || string too short for multi-byte))
+ to.append(1,(unsigned char)195);
+ to.append(1,c-64);
+ }
+ return to;
+ }
+
+ // ***************************************************************************
+ static void patchHtmlQuirks(std::string &htmlString)
+ {
+ size_t npos = std::string::npos;
+ size_t pos;
+
+ // get rid of BOM (some ingame help files does not show up otherwise)
+ if (htmlString.substr(0, 3) == "\xEF\xBB\xBF")
+ {
+ htmlString.erase(0, 3);
+ }
+
+ // if any element is before , then parser adds
+ // and original tags are ignored (their attributes not processed)
+ //
+ // only fix situation when there is tag with attributes
+ //
+ // tags are considered to be lowercase
+
+ pos = htmlString.find("
+ if (htmlString.substr(start, 2) == "");
+ if (end != npos && start < end && end < pos)
+ {
+ // body tag end position
+ size_t insert = htmlString.find(">", pos);
+ if (insert != npos)
+ {
+ std::string str = htmlString.substr(start, end - start);
+ htmlString.insert(insert+1, str);
+ htmlString.erase(start, str.size());
+ }
+ }
+ }
+
+ // make sure (if present) is last in document or tags coming after it are ignored
+ pos = htmlString.find("");
+ if (pos != npos && htmlString.find("<", pos+1) > pos)
+ {
+ htmlString.erase(pos, 7);
+ htmlString += "";
+ }
+
+ // if there is invalid utf-8 chars, then libxml will break everything after first it finds.
+ htmlString = correct_non_utf_8(htmlString);
+ }
+
// ***************************************************************************
bool CGroupHTML::parseHtml(std::string htmlString)
{
- htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, XML_CHAR_ENCODING_NONE);
+ htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, XML_CHAR_ENCODING_UTF8);
if (!parser)
{
nlwarning("Creating html parser context failed");
@@ -129,6 +263,9 @@ namespace NLGUI
htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
+ // parser is little strict on tag order, so fix whats needed
+ patchHtmlQuirks(htmlString);
+
htmlParseChunk(parser, htmlString.c_str(), htmlString.size(), 0);
htmlParseChunk(parser, "", 0, 1);