/* -*-c++-*- * Copyright (C) 2010 Cedric Pinson * * This library is open source and may be redistributed and/or modified under * the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or * (at your option) any later version. The full license is in LICENSE file * included with this distribution, and on the openscenegraph.org website. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * OpenSceneGraph Public License for more details. * * Authors: * Cedric Pinson */ #ifndef JSON_STREAM #define JSON_STREAM #include #include #include // control characters #include #include #include #include #include using namespace std; // A simple class wrapping ofstream calls to enable generic cleaning of json data. // Especially 'standard' json should: // * have utf-8 encoded string // * disallow some control characters // * does not support inf or nan values #if defined(WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<=1700) inline int isfinite( double x ) { return _finite( x ); } inline int isinf( double x ) { return !_finite( x ) && !_isnan( x ); } #endif class json_stream : public osgDB::ofstream { public: json_stream(const std::string& filename, bool strict=true) : _stream(filename.c_str()), _strict(strict) {} ~json_stream() { _stream.close(); } operator bool() const { return _stream.is_open(); } // forward std::endl typedef std::ostream& (*ostream_manipulator)(std::ostream&); json_stream& operator<<(ostream_manipulator pf) { if (_stream.is_open()) { _stream << pf; } return *this; } template json_stream& operator<<(const T& data) { if (_stream.is_open()) { _stream << sanitize(data); } return *this; } template const T sanitize(const T& t) { return t; } double sanitize(const float f) { return sanitize(static_cast(f)); } double sanitize(const double d) { if(_strict) { return to_valid_float(d); } return d; } double to_valid_float(const double d) { if(isfinite(d)) { return d; } else { if(isinf(d)) { return std::numeric_limits::max(); } // no much way to do better than replace invalid float NaN by 0 return 0.; } } std::string sanitize(const std::string& s) { if(_strict) { return to_json_utf8(s); } return s; } std::string sanitize(const char* s) { return sanitize(std::string(s)); } std::string to_json_utf8(const std::string& s) { // TODO: try to decode latin1 if string is not valid utf8 // before actually fixing bad 'chars' return clean_invalid_utf8(s); } protected: std::ofstream _stream; bool _strict; std::string json_encode_control_char(int ctrl) { // see http://json.org std::ostringstream oss; if(ctrl == 8 || // \b ctrl == 9 || // \t ctrl == 10 || // \n ctrl == 12 || // \f ctrl == 13 || // \r ctrl == 27 || // ctrl == 34 || // \" ctrl == 47 // \/ ) { oss << static_cast(ctrl); } else { oss.fill('0'); oss << "\\u" << std::setw(4) << std::hex << ctrl; } return oss.str(); } inline bool is_valid_continuation_byte(unsigned int byte) { return ((byte & 0xC0) == 0x80); } inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) { if(iterator != end_iterator) { return *(++ iterator); } else { return 0; // invalid continuation byte } } // from http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences std::string utf8_encode_codepoint(unsigned code_point) { std::string output; if(code_point > 0x590 && code_point < 0x5F4) { return output; } // out of range if(code_point > 1114112) { return utf8_encode_codepoint(0xfffd); } if (code_point < 0x80) { output.push_back(code_point); } else if (code_point <= 0x7FF) { output.push_back((code_point >> 6) + 0xC0); output.push_back((code_point & 0x3F) + 0x80); } else if (code_point <= 0xFFFF) { output.push_back((code_point >> 12) + 0xE0); output.push_back(((code_point >> 6) & 0x3F) + 0x80); output.push_back((code_point & 0x3F) + 0x80); } else if (code_point <= 0x10FFFF) { output.push_back((code_point >> 18) + 0xF0); output.push_back(((code_point >> 12) & 0x3F) + 0x80); output.push_back(((code_point >> 6) & 0x3F) + 0x80); output.push_back((code_point & 0x3F) + 0x80); } return output; } // from http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences std::string clean_invalid_utf8(const std::string& input, const int replacement_codepoint=0xfffd) { int code_unit1, code_unit2, code_unit3, code_unit4; std::string output, replacement = utf8_encode_codepoint(replacement_codepoint); for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) { code_unit1 = *iterator; if (code_unit1 < 0x80) { if(std::iscntrl(code_unit1)) { output += json_encode_control_char(code_unit1); } else { output.push_back(code_unit1); } } else if (code_unit1 < 0xC2) { // continuation or overlong 2-byte sequence output += replacement; } else if (code_unit1 < 0xE0) { // 2-byte sequence code_unit2 = get_next_byte(iterator, input.end()); if (!is_valid_continuation_byte(code_unit2)) { output += replacement; output += replacement; } else { output += utf8_encode_codepoint((code_unit1 << 6) + code_unit2 - 0x3080); } } else if (code_unit1 < 0xF0) { // 3-byte sequence code_unit2 = get_next_byte(iterator, input.end()); if (!is_valid_continuation_byte(code_unit2) || (code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ { output += replacement; output += replacement; } else { code_unit3 = get_next_byte(iterator, input.end()); if (!is_valid_continuation_byte(code_unit3)) { output += replacement; output += replacement; output += replacement; } else { output += utf8_encode_codepoint((code_unit1 << 12) + (code_unit2 << 6) + code_unit3 - 0xE2080); } } } else if (code_unit1 < 0xF5) { // 4-byte sequence code_unit2 = get_next_byte(iterator, input.end()); if(!is_valid_continuation_byte(code_unit2) || (code_unit1 == 0xF0 && code_unit2 < 0x90) || /* overlong */ (code_unit1 == 0xF4 && code_unit2 >= 0x90)) { /* > U+10FFFF */ output += replacement; output += replacement; } else { code_unit3 = get_next_byte(iterator, input.end()); if(!is_valid_continuation_byte(code_unit3)) { output += replacement; output += replacement; output += replacement; } else { code_unit4 = get_next_byte(iterator, input.end()); if(!is_valid_continuation_byte(code_unit4)) { output += replacement; output += replacement; output += replacement; output += replacement; } else { output += utf8_encode_codepoint((code_unit1 << 18) + (code_unit2 << 12) + (code_unit3 << 6) + code_unit4 - 0x3C82080); } } } } else { /* > U+10FFFF */ output += replacement; } } return output; } }; #endif