From e9b1ab4e0a191a178be27696858678a0fd04f978 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 11 Sep 2021 15:40:47 +0200 Subject: [PATCH] :memo: update table for binary formats --- .../docs/features/binary_formats/index.md | 24 ++- test/src/unit-binary_formats.cpp | 202 ++++++++++++++++++ 2 files changed, 218 insertions(+), 8 deletions(-) create mode 100644 test/src/unit-binary_formats.cpp diff --git a/doc/mkdocs/docs/features/binary_formats/index.md b/doc/mkdocs/docs/features/binary_formats/index.md index bfa18c3c6..9ad0ce335 100644 --- a/doc/mkdocs/docs/features/binary_formats/index.md +++ b/doc/mkdocs/docs/features/binary_formats/index.md @@ -36,13 +36,21 @@ See [binary values](../binary_values.md) for more information. ### Sizes -| Format | canada.json | twitter.json | citm_catalog.json | jeopardy.json | -| ------------------ | ----------- | ------------ | ----------------- | ------------- | -| BSON | 85,8 % | 95,2 % | 95,8 % | 106,7 % | -| CBOR | 50,5 % | 86,3 % | 68,4 % | 88,0 % | -| MessagePack | 50,6 % | 86,0 % | 68,5 % | 87,9 % | -| UBJSON | 53,2 % | 91,3 % | 78,2 % | 96,6 % | -| UBJSON (size) | 58,6 % | 92,3 % | 86,8 % | 97,4 % | -| UBJSON (size+type) | 55,9 % | 92,3 % | 85,0 % | 95,0 % | +| Format | [canada.json](https://github.com/nlohmann/json_test_data/blob/master/nativejson-benchmark/canada.json) | [twitter.json](https://github.com/nlohmann/json_test_data/blob/master/nativejson-benchmark/twitter.json) | [citm_catalog.json](https://github.com/nlohmann/json_test_data/blob/master/nativejson-benchmark/citm_catalog.json) | [jeopardy.json](https://github.com/nlohmann/json_test_data/blob/master/jeopardy/jeopardy.json) | [sample.json](https://github.com/nlohmann/json_test_data/blob/master/json_testsuite/sample.json) | +| ------------------ | ----------- | ------------ | ----------------- | ------------- | ------------- | +| BSON | 85,8 % | 95,2 % | 95,8 % | 106,7 % (1) | N/A (2) | +| CBOR | 50,5 % | 86,3 % | 68,4 % | 88,0 % | 87,2 % | +| MessagePack | 50,6 % | 86,0 % | 68,5 % | 87,9 % | 87,2 % | +| UBJSON | 53,2 % | 91,3 % | 78,2 % | 96,6 % | 88,2 % | +| UBJSON (size) | 58,6 % | 92,3 % | 86,8 % | 97,4 % | 89,3 % | +| UBJSON (size+type) | 55,9 % | 92,3 % | 85,0 % | 95,0 % | 89,5 % | +| BON8 | 50,5 % | 83,8 % | 63,5 % | 87,5 % | 85,6 % | Sizes compared to minified JSON value. + +Notes: + +- (1) The JSON value is an array that needed to be wrapped in an object to be processed by BSON. We used an empty object key for minimal overhead. +- (2) The JSON value contained a string with code point `U+0000` which cannot be represented by BSON. + +The JSON files are part of the [nlohmann/json_test_data](https://github.com/nlohmann/json_test_data) repository. diff --git a/test/src/unit-binary_formats.cpp b/test/src/unit-binary_formats.cpp new file mode 100644 index 000000000..d9f69ea2f --- /dev/null +++ b/test/src/unit-binary_formats.cpp @@ -0,0 +1,202 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.10.2 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "doctest_compatibility.h" + +#include +using nlohmann::json; + +#include +#include + +TEST_CASE("Binary Formats" * doctest::skip()) +{ + SECTION("canada.json") + { + const auto* filename = TEST_DATA_DIRECTORY "/nativejson-benchmark/canada.json"; + json j = json::parse(std::ifstream(filename)); + + const auto json_size = j.dump().size(); + const auto bson_size = json::to_bson(j).size(); + const auto cbor_size = json::to_cbor(j).size(); + const auto msgpack_size = json::to_msgpack(j).size(); + const auto ubjson_1_size = json::to_ubjson(j).size(); + const auto ubjson_2_size = json::to_ubjson(j, true).size(); + const auto ubjson_3_size = json::to_ubjson(j, true, true).size(); + const auto bon8_size = json::to_bon8(j).size(); + + CHECK(json_size == 2090303); + CHECK(bson_size == 1794522); + CHECK(cbor_size == 1055552); + CHECK(msgpack_size == 1056145); + CHECK(ubjson_1_size == 1112030); + CHECK(ubjson_2_size == 1224148); + CHECK(ubjson_3_size == 1169069); + CHECK(bon8_size == 1055789); + + CHECK((100.0 * double(json_size) / double(json_size)) == Approx(100.0)); + CHECK((100.0 * double(bson_size) / double(json_size)) == Approx(85.849)); + CHECK((100.0 * double(cbor_size) / double(json_size)) == Approx(50.497)); + CHECK((100.0 * double(msgpack_size) / double(json_size)) == Approx(50.526)); + CHECK((100.0 * double(ubjson_1_size) / double(json_size)) == Approx(53.199)); + CHECK((100.0 * double(ubjson_2_size) / double(json_size)) == Approx(58.563)); + CHECK((100.0 * double(ubjson_3_size) / double(json_size)) == Approx(55.928)); + CHECK((100.0 * double(bon8_size) / double(json_size)) == Approx(50.509)); + } + + SECTION("twitter.json") + { + const auto* filename = TEST_DATA_DIRECTORY "/nativejson-benchmark/twitter.json"; + json j = json::parse(std::ifstream(filename)); + + const auto json_size = j.dump().size(); + const auto bson_size = json::to_bson(j).size(); + const auto cbor_size = json::to_cbor(j).size(); + const auto msgpack_size = json::to_msgpack(j).size(); + const auto ubjson_1_size = json::to_ubjson(j).size(); + const auto ubjson_2_size = json::to_ubjson(j, true).size(); + const auto ubjson_3_size = json::to_ubjson(j, true, true).size(); + const auto bon8_size = json::to_bon8(j).size(); + + CHECK(json_size == 466906); + CHECK(bson_size == 444568); + CHECK(cbor_size == 402814); + CHECK(msgpack_size == 401510); + CHECK(ubjson_1_size == 426160); + CHECK(ubjson_2_size == 430788); + CHECK(ubjson_3_size == 430798); + CHECK(bon8_size == 391172); + + CHECK((100.0 * double(json_size) / double(json_size)) == Approx(100.0)); + CHECK((100.0 * double(bson_size) / double(json_size)) == Approx(95.215)); + CHECK((100.0 * double(cbor_size) / double(json_size)) == Approx(86.273)); + CHECK((100.0 * double(msgpack_size) / double(json_size)) == Approx(85.993)); + CHECK((100.0 * double(ubjson_1_size) / double(json_size)) == Approx(91.273)); + CHECK((100.0 * double(ubjson_2_size) / double(json_size)) == Approx(92.264)); + CHECK((100.0 * double(ubjson_3_size) / double(json_size)) == Approx(92.266)); + CHECK((100.0 * double(bon8_size) / double(json_size)) == Approx(83.779)); + } + + SECTION("citm_catalog.json") + { + const auto* filename = TEST_DATA_DIRECTORY "/nativejson-benchmark/citm_catalog.json"; + json j = json::parse(std::ifstream(filename)); + + const auto json_size = j.dump().size(); + const auto bson_size = json::to_bson(j).size(); + const auto cbor_size = json::to_cbor(j).size(); + const auto msgpack_size = json::to_msgpack(j).size(); + const auto ubjson_1_size = json::to_ubjson(j).size(); + const auto ubjson_2_size = json::to_ubjson(j, true).size(); + const auto ubjson_3_size = json::to_ubjson(j, true, true).size(); + const auto bon8_size = json::to_bon8(j).size(); + + CHECK(json_size == 500299); + CHECK(bson_size == 479430); + CHECK(cbor_size == 342373); + CHECK(msgpack_size == 342473); + CHECK(ubjson_1_size == 391463); + CHECK(ubjson_2_size == 434239); + CHECK(ubjson_3_size == 425073); + CHECK(bon8_size == 317877); + + CHECK((100.0 * double(json_size) / double(json_size)) == Approx(100.0)); + CHECK((100.0 * double(bson_size) / double(json_size)) == Approx(95.828)); + CHECK((100.0 * double(cbor_size) / double(json_size)) == Approx(68.433)); + CHECK((100.0 * double(msgpack_size) / double(json_size)) == Approx(68.453)); + CHECK((100.0 * double(ubjson_1_size) / double(json_size)) == Approx(78.245)); + CHECK((100.0 * double(ubjson_2_size) / double(json_size)) == Approx(86.795)); + CHECK((100.0 * double(ubjson_3_size) / double(json_size)) == Approx(84.963)); + CHECK((100.0 * double(bon8_size) / double(json_size)) == Approx(63.537)); + } + + SECTION("jeopardy.json") + { + const auto* filename = TEST_DATA_DIRECTORY "/jeopardy/jeopardy.json"; + json j = json::parse(std::ifstream(filename)); + + const auto json_size = j.dump().size(); + const auto bson_size = json::to_bson({{"", j}}).size(); // wrap array in object for BSON + const auto cbor_size = json::to_cbor(j).size(); + const auto msgpack_size = json::to_msgpack(j).size(); + const auto ubjson_1_size = json::to_ubjson(j).size(); + const auto ubjson_2_size = json::to_ubjson(j, true).size(); + const auto ubjson_3_size = json::to_ubjson(j, true, true).size(); + const auto bon8_size = json::to_bon8(j).size(); + + CHECK(json_size == 52508728); + CHECK(bson_size == 56008520); + CHECK(cbor_size == 46187320); + CHECK(msgpack_size == 46158575); + CHECK(ubjson_1_size == 50710965); + CHECK(ubjson_2_size == 51144830); + CHECK(ubjson_3_size == 49861422); + CHECK(bon8_size == 45942080); + + CHECK((100.0 * double(json_size) / double(json_size)) == Approx(100.0)); + CHECK((100.0 * double(bson_size) / double(json_size)) == Approx(106.665)); + CHECK((100.0 * double(cbor_size) / double(json_size)) == Approx(87.961)); + CHECK((100.0 * double(msgpack_size) / double(json_size)) == Approx(87.906)); + CHECK((100.0 * double(ubjson_1_size) / double(json_size)) == Approx(96.576)); + CHECK((100.0 * double(ubjson_2_size) / double(json_size)) == Approx(97.402)); + CHECK((100.0 * double(ubjson_3_size) / double(json_size)) == Approx(94.958)); + CHECK((100.0 * double(bon8_size) / double(json_size)) == Approx(87.494)); + } + + SECTION("sample.json") + { + const auto* filename = TEST_DATA_DIRECTORY "/json_testsuite/sample.json"; + json j = json::parse(std::ifstream(filename)); + + const auto json_size = j.dump().size(); + // BSON cannot process the file as it contains code point U+0000 + const auto cbor_size = json::to_cbor(j).size(); + const auto msgpack_size = json::to_msgpack(j).size(); + const auto ubjson_1_size = json::to_ubjson(j).size(); + const auto ubjson_2_size = json::to_ubjson(j, true).size(); + const auto ubjson_3_size = json::to_ubjson(j, true, true).size(); + const auto bon8_size = json::to_bon8(j).size(); + + CHECK(json_size == 168677); + CHECK(cbor_size == 147095); + CHECK(msgpack_size == 147017); + CHECK(ubjson_1_size == 148695); + CHECK(ubjson_2_size == 150569); + CHECK(ubjson_3_size == 150883); + CHECK(bon8_size == 144463); + + CHECK((100.0 * double(json_size) / double(json_size)) == Approx(100.0)); + CHECK((100.0 * double(cbor_size) / double(json_size)) == Approx(87.205)); + CHECK((100.0 * double(msgpack_size) / double(json_size)) == Approx(87.158)); + CHECK((100.0 * double(ubjson_1_size) / double(json_size)) == Approx(88.153)); + CHECK((100.0 * double(ubjson_2_size) / double(json_size)) == Approx(89.264)); + CHECK((100.0 * double(ubjson_3_size) / double(json_size)) == Approx(89.450)); + CHECK((100.0 * double(bon8_size) / double(json_size)) == Approx(85.644)); + } +}