Added support for reading UTF-8 encoded of xml files

This commit is contained in:
Robert Osfield
2018-09-04 12:19:14 +01:00
parent ae3133522d
commit afe5644b9f
2 changed files with 66 additions and 22 deletions

View File

@@ -105,9 +105,9 @@ class OSGDB_EXPORT XmlNode : public osg::Referenced
size_type currentPosition() const { return _currentPos; }
int get() { if (_currentPos<_buffer.size()) return _buffer[_currentPos++]; else return -1; }
int get() { if (_currentPos<_buffer.size()) return static_cast<unsigned char>(_buffer[_currentPos++]); else return -1; }
int operator [] (size_type i) const { if ((_currentPos+i)<_buffer.size()) return _buffer[_currentPos+i]; else return -1; }
int operator [] (size_type i) const { if ((_currentPos+i)<_buffer.size()) return static_cast<unsigned char>(_buffer[_currentPos+i]); else return -1; }
void operator ++ () { if (_currentPos<_buffer.size()) ++_currentPos; }
@@ -129,12 +129,56 @@ class OSGDB_EXPORT XmlNode : public osg::Referenced
bool match(const std::string& str) { return (_currentPos<_buffer.size()) ? _buffer.compare(_currentPos, str.size(), str)==0 : false; }
enum Encoding
{
ENCODING_ASCII,
ENCODING_UTF8
};
void setEncoding(Encoding encoding) { _encoding = encoding; }
Encoding getEncoding() const { return _encoding; }
inline void copyCharacterToString(std::string& str)
{
if (_currentPos>=_buffer.size()) return;
switch (_encoding)
{
case(ENCODING_UTF8) :
{
int char0 = static_cast<unsigned char>(_buffer[_currentPos]); ++_currentPos;
str.push_back(char0);
if (char0 < 0x80 || _currentPos>=_buffer.size()) break; // 1-byte character
str.push_back(_buffer[_currentPos]); ++_currentPos;
if (char0<0xe0 || _currentPos<_buffer.size()) break; // 2-byte character
str.push_back(_buffer[_currentPos]); ++_currentPos;
if (char0<0xf0 || _currentPos>=_buffer.size()) break; // 3-byte character
str.push_back(_buffer[_currentPos]); ++_currentPos;
if (char0<0xf8 || _currentPos>=_buffer.size()) break; // 4-byte character
if (_currentPos>=_buffer.size()) break;
str.push_back(_buffer[_currentPos]); ++_currentPos; // 5-byte character?
break;
}
case(ENCODING_ASCII) :
default:
str.push_back(_buffer[_currentPos]);
++_currentPos;
return;
}
}
private:
size_type _currentPos;
size_type _currentPos;
std::ifstream _fin;
std::string _buffer;
std::ifstream _fin;
std::string _buffer;
Encoding _encoding;
};