Files
OpenSceneGraph/src/osgPlugins/osgjs/json_stream
2015-07-20 07:42:05 +00:00

300 lines
10 KiB
C++

/* -*-c++-*-
* Copyright (C) 2010 Cedric Pinson <cedric.pinson@plopbyte.net>
*
* This library is open source and may be redistributed and/or modified under
* the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or
* (at your option) any later version. The full license is in LICENSE file
* included with this distribution, and on the openscenegraph.org website.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* OpenSceneGraph Public License for more details.
*
* Authors:
* Cedric Pinson <cedric.pinson@plopbyte.net>
*/
#ifndef JSON_STREAM
#define JSON_STREAM
#include <iostream>
#include <iomanip>
#include <cctype> // control characters
#include <sstream>
#include <string>
#include <cmath>
#include <limits>
#include <osgDB/fstream>
using namespace std;
// A simple class wrapping ofstream calls to enable generic cleaning of json data.
// Especially 'standard' json should:
// * have utf-8 encoded string
// * disallow some control characters
// * does not support inf or nan values
#if defined(WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<=1700)
inline int isfinite( double x ) { return _finite( x ); }
inline int isinf( double x ) { return !_finite( x ) && !_isnan( x ); }
#endif
class json_stream : public osgDB::ofstream {
public:
json_stream(const std::string& filename, bool strict=true) :
_stream(filename.c_str()),
_strict(strict)
{}
~json_stream() {
_stream.close();
}
operator bool() const {
return _stream.is_open();
}
// forward std::endl
typedef std::ostream& (*ostream_manipulator)(std::ostream&);
json_stream& operator<<(ostream_manipulator pf) {
if (_stream.is_open()) {
_stream << pf;
}
return *this;
}
template<typename T>
json_stream& operator<<(const T& data) {
if (_stream.is_open()) {
_stream << sanitize(data);
}
return *this;
}
template<typename T>
const T sanitize(const T& t) {
return t;
}
double sanitize(const float f) {
return sanitize(static_cast<double>(f));
}
double sanitize(const double d) {
if(_strict) {
return to_valid_float(d);
}
return d;
}
double to_valid_float(const double d) {
if(isfinite(d)) {
return d;
}
else {
if(isinf(d)) {
return std::numeric_limits<double>::max();
}
// no much way to do better than replace invalid float NaN by 0
return 0.;
}
}
std::string sanitize(const std::string& s) {
if(_strict) {
return to_json_utf8(s);
}
return s;
}
std::string sanitize(const char* s) {
return sanitize(std::string(s));
}
std::string to_json_utf8(const std::string& s) {
// TODO: try to decode latin1 if string is not valid utf8
// before actually fixing bad 'chars'
return clean_invalid_utf8(s);
}
protected:
std::ofstream _stream;
bool _strict;
std::string json_encode_control_char(int ctrl) {
// see http://json.org
std::ostringstream oss;
if(ctrl == 8 || // \b
ctrl == 9 || // \t
ctrl == 10 || // \n
ctrl == 12 || // \f
ctrl == 13 || // \r
ctrl == 27 || //
ctrl == 34 || // \"
ctrl == 47 // \/
) {
oss << static_cast<char>(ctrl);
}
else {
oss.fill('0');
oss << "\\u" << std::setw(4) << std::hex << ctrl;
}
return oss.str();
}
inline bool is_valid_continuation_byte(unsigned int byte) {
return ((byte & 0xC0) == 0x80);
}
inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) {
if(iterator != end_iterator) {
return *(++ iterator);
}
else {
return 0; // invalid continuation byte
}
}
// from http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
std::string utf8_encode_codepoint(unsigned code_point)
{
std::string output;
if(code_point > 0x590 && code_point < 0x5F4) {
return output;
}
// out of range
if(code_point > 1114112) {
return utf8_encode_codepoint(0xfffd);
}
if (code_point < 0x80) {
output.push_back(code_point);
}
else if (code_point <= 0x7FF) {
output.push_back((code_point >> 6) + 0xC0);
output.push_back((code_point & 0x3F) + 0x80);
}
else if (code_point <= 0xFFFF) {
output.push_back((code_point >> 12) + 0xE0);
output.push_back(((code_point >> 6) & 0x3F) + 0x80);
output.push_back((code_point & 0x3F) + 0x80);
}
else if (code_point <= 0x10FFFF) {
output.push_back((code_point >> 18) + 0xF0);
output.push_back(((code_point >> 12) & 0x3F) + 0x80);
output.push_back(((code_point >> 6) & 0x3F) + 0x80);
output.push_back((code_point & 0x3F) + 0x80);
}
return output;
}
// from http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
std::string clean_invalid_utf8(const std::string& input,
const int replacement_codepoint=0xfffd) {
int code_unit1, code_unit2, code_unit3, code_unit4;
std::string output, replacement = utf8_encode_codepoint(replacement_codepoint);
for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) {
code_unit1 = *iterator;
if (code_unit1 < 0x80) {
if(std::iscntrl(code_unit1)) {
output += json_encode_control_char(code_unit1);
}
else {
output.push_back(code_unit1);
}
}
else if (code_unit1 < 0xC2) {
// continuation or overlong 2-byte sequence
output += replacement;
}
else if (code_unit1 < 0xE0) {
// 2-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit2)) {
output += replacement;
output += replacement;
}
else {
output += utf8_encode_codepoint((code_unit1 << 6) + code_unit2 - 0x3080);
}
}
else if (code_unit1 < 0xF0) {
// 3-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit2) ||
(code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ {
output += replacement;
output += replacement;
}
else {
code_unit3 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit3)) {
output += replacement;
output += replacement;
output += replacement;
}
else {
output += utf8_encode_codepoint((code_unit1 << 12) +
(code_unit2 << 6) +
code_unit3 - 0xE2080);
}
}
}
else if (code_unit1 < 0xF5) {
// 4-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit2) ||
(code_unit1 == 0xF0 && code_unit2 < 0x90) || /* overlong */
(code_unit1 == 0xF4 && code_unit2 >= 0x90)) { /* > U+10FFFF */
output += replacement;
output += replacement;
}
else {
code_unit3 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit3)) {
output += replacement;
output += replacement;
output += replacement;
}
else {
code_unit4 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit4)) {
output += replacement;
output += replacement;
output += replacement;
output += replacement;
}
else {
output += utf8_encode_codepoint((code_unit1 << 18) +
(code_unit2 << 12) +
(code_unit3 << 6) +
code_unit4 - 0x3C82080);
}
}
}
}
else {
/* > U+10FFFF */
output += replacement;
}
}
return output;
}
};
#endif