git-svn-id: http://svn.openscenegraph.org/osg/OpenSceneGraph/trunk@14989 16af8721-9629-0410-8352-f15c8da7e697
300 lines
10 KiB
C++
300 lines
10 KiB
C++
/* -*-c++-*-
|
|
* Copyright (C) 2010 Cedric Pinson <cedric.pinson@plopbyte.net>
|
|
*
|
|
* This library is open source and may be redistributed and/or modified under
|
|
* the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or
|
|
* (at your option) any later version. The full license is in LICENSE file
|
|
* included with this distribution, and on the openscenegraph.org website.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* OpenSceneGraph Public License for more details.
|
|
*
|
|
* Authors:
|
|
* Cedric Pinson <cedric.pinson@plopbyte.net>
|
|
*/
|
|
|
|
#ifndef JSON_STREAM
|
|
#define JSON_STREAM
|
|
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <cctype> // control characters
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <cmath>
|
|
#include <limits>
|
|
|
|
#include <osgDB/fstream>
|
|
|
|
using namespace std;
|
|
|
|
// A simple class wrapping ofstream calls to enable generic cleaning of json data.
|
|
// Especially 'standard' json should:
|
|
// * have utf-8 encoded string
|
|
// * disallow some control characters
|
|
// * does not support inf or nan values
|
|
|
|
#if defined(WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<=1700)
|
|
inline int isfinite( double x ) { return _finite( x ); }
|
|
inline int isinf( double x ) { return !_finite( x ) && !_isnan( x ); }
|
|
#endif
|
|
|
|
|
|
class json_stream : public osgDB::ofstream {
|
|
public:
|
|
json_stream(const std::string& filename, bool strict=true) :
|
|
_stream(filename.c_str()),
|
|
_strict(strict)
|
|
{}
|
|
|
|
~json_stream() {
|
|
_stream.close();
|
|
}
|
|
|
|
operator bool() const {
|
|
return _stream.is_open();
|
|
}
|
|
|
|
// forward std::endl
|
|
typedef std::ostream& (*ostream_manipulator)(std::ostream&);
|
|
json_stream& operator<<(ostream_manipulator pf) {
|
|
if (_stream.is_open()) {
|
|
_stream << pf;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
template<typename T>
|
|
json_stream& operator<<(const T& data) {
|
|
if (_stream.is_open()) {
|
|
_stream << sanitize(data);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
template<typename T>
|
|
const T sanitize(const T& t) {
|
|
return t;
|
|
}
|
|
|
|
double sanitize(const float f) {
|
|
return sanitize(static_cast<double>(f));
|
|
}
|
|
|
|
double sanitize(const double d) {
|
|
if(_strict) {
|
|
return to_valid_float(d);
|
|
}
|
|
return d;
|
|
}
|
|
|
|
double to_valid_float(const double d) {
|
|
if(isfinite(d)) {
|
|
return d;
|
|
}
|
|
else {
|
|
if(isinf(d)) {
|
|
return std::numeric_limits<double>::max();
|
|
}
|
|
// no much way to do better than replace invalid float NaN by 0
|
|
return 0.;
|
|
}
|
|
}
|
|
|
|
std::string sanitize(const std::string& s) {
|
|
if(_strict) {
|
|
return to_json_utf8(s);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
std::string sanitize(const char* s) {
|
|
return sanitize(std::string(s));
|
|
}
|
|
|
|
std::string to_json_utf8(const std::string& s) {
|
|
// TODO: try to decode latin1 if string is not valid utf8
|
|
// before actually fixing bad 'chars'
|
|
return clean_invalid_utf8(s);
|
|
}
|
|
|
|
|
|
protected:
|
|
std::ofstream _stream;
|
|
bool _strict;
|
|
|
|
std::string json_encode_control_char(int ctrl) {
|
|
// see http://json.org
|
|
std::ostringstream oss;
|
|
|
|
if(ctrl == 8 || // \b
|
|
ctrl == 9 || // \t
|
|
ctrl == 10 || // \n
|
|
ctrl == 12 || // \f
|
|
ctrl == 13 || // \r
|
|
ctrl == 27 || //
|
|
ctrl == 34 || // \"
|
|
ctrl == 47 // \/
|
|
) {
|
|
oss << static_cast<char>(ctrl);
|
|
}
|
|
else {
|
|
oss.fill('0');
|
|
oss << "\\u" << std::setw(4) << std::hex << ctrl;
|
|
}
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
|
|
inline bool is_valid_continuation_byte(unsigned int byte) {
|
|
return ((byte & 0xC0) == 0x80);
|
|
}
|
|
|
|
inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) {
|
|
if(iterator != end_iterator) {
|
|
return *(++ iterator);
|
|
}
|
|
else {
|
|
return 0; // invalid continuation byte
|
|
}
|
|
}
|
|
|
|
// from http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
|
|
std::string utf8_encode_codepoint(unsigned code_point)
|
|
{
|
|
std::string output;
|
|
|
|
if(code_point > 0x590 && code_point < 0x5F4) {
|
|
return output;
|
|
}
|
|
|
|
// out of range
|
|
if(code_point > 1114112) {
|
|
return utf8_encode_codepoint(0xfffd);
|
|
}
|
|
|
|
if (code_point < 0x80) {
|
|
output.push_back(code_point);
|
|
}
|
|
else if (code_point <= 0x7FF) {
|
|
output.push_back((code_point >> 6) + 0xC0);
|
|
output.push_back((code_point & 0x3F) + 0x80);
|
|
}
|
|
else if (code_point <= 0xFFFF) {
|
|
output.push_back((code_point >> 12) + 0xE0);
|
|
output.push_back(((code_point >> 6) & 0x3F) + 0x80);
|
|
output.push_back((code_point & 0x3F) + 0x80);
|
|
}
|
|
else if (code_point <= 0x10FFFF) {
|
|
output.push_back((code_point >> 18) + 0xF0);
|
|
output.push_back(((code_point >> 12) & 0x3F) + 0x80);
|
|
output.push_back(((code_point >> 6) & 0x3F) + 0x80);
|
|
output.push_back((code_point & 0x3F) + 0x80);
|
|
}
|
|
return output;
|
|
}
|
|
|
|
// from http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
|
|
std::string clean_invalid_utf8(const std::string& input,
|
|
const int replacement_codepoint=0xfffd) {
|
|
int code_unit1, code_unit2, code_unit3, code_unit4;
|
|
std::string output, replacement = utf8_encode_codepoint(replacement_codepoint);
|
|
|
|
for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) {
|
|
code_unit1 = *iterator;
|
|
if (code_unit1 < 0x80) {
|
|
if(std::iscntrl(code_unit1)) {
|
|
output += json_encode_control_char(code_unit1);
|
|
}
|
|
else {
|
|
output.push_back(code_unit1);
|
|
}
|
|
}
|
|
else if (code_unit1 < 0xC2) {
|
|
// continuation or overlong 2-byte sequence
|
|
output += replacement;
|
|
}
|
|
else if (code_unit1 < 0xE0) {
|
|
// 2-byte sequence
|
|
code_unit2 = get_next_byte(iterator, input.end());
|
|
|
|
if (!is_valid_continuation_byte(code_unit2)) {
|
|
output += replacement;
|
|
output += replacement;
|
|
}
|
|
else {
|
|
output += utf8_encode_codepoint((code_unit1 << 6) + code_unit2 - 0x3080);
|
|
}
|
|
}
|
|
else if (code_unit1 < 0xF0) {
|
|
// 3-byte sequence
|
|
code_unit2 = get_next_byte(iterator, input.end());
|
|
|
|
if (!is_valid_continuation_byte(code_unit2) ||
|
|
(code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ {
|
|
output += replacement;
|
|
output += replacement;
|
|
}
|
|
else {
|
|
code_unit3 = get_next_byte(iterator, input.end());
|
|
|
|
if (!is_valid_continuation_byte(code_unit3)) {
|
|
output += replacement;
|
|
output += replacement;
|
|
output += replacement;
|
|
}
|
|
else {
|
|
output += utf8_encode_codepoint((code_unit1 << 12) +
|
|
(code_unit2 << 6) +
|
|
code_unit3 - 0xE2080);
|
|
}
|
|
}
|
|
}
|
|
else if (code_unit1 < 0xF5) {
|
|
// 4-byte sequence
|
|
code_unit2 = get_next_byte(iterator, input.end());
|
|
if(!is_valid_continuation_byte(code_unit2) ||
|
|
(code_unit1 == 0xF0 && code_unit2 < 0x90) || /* overlong */
|
|
(code_unit1 == 0xF4 && code_unit2 >= 0x90)) { /* > U+10FFFF */
|
|
output += replacement;
|
|
output += replacement;
|
|
}
|
|
else {
|
|
code_unit3 = get_next_byte(iterator, input.end());
|
|
if(!is_valid_continuation_byte(code_unit3)) {
|
|
output += replacement;
|
|
output += replacement;
|
|
output += replacement;
|
|
}
|
|
else {
|
|
code_unit4 = get_next_byte(iterator, input.end());
|
|
if(!is_valid_continuation_byte(code_unit4)) {
|
|
output += replacement;
|
|
output += replacement;
|
|
output += replacement;
|
|
output += replacement;
|
|
}
|
|
else {
|
|
output += utf8_encode_codepoint((code_unit1 << 18) +
|
|
(code_unit2 << 12) +
|
|
(code_unit3 << 6) +
|
|
code_unit4 - 0x3C82080);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
/* > U+10FFFF */
|
|
output += replacement;
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
};
|
|
|
|
|
|
#endif
|