# Copyright 2009-present MongoDB, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tools for using Python's :mod:`json` module with BSON documents. This module provides two helper methods `dumps` and `loads` that wrap the native :mod:`json` methods and provide explicit BSON conversion to and from JSON. :class:`~bson.json_util.JSONOptions` provides a way to control how JSON is emitted and parsed, with the default being the Relaxed Extended JSON format. :mod:`~bson.json_util` can also generate Canonical or legacy `Extended JSON`_ when :const:`CANONICAL_JSON_OPTIONS` or :const:`LEGACY_JSON_OPTIONS` is provided, respectively. .. _Extended JSON: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst Example usage (deserialization): .. doctest:: >>> from bson.json_util import loads >>> loads( ... '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "80", "$binary": "AQIDBA=="}}]' ... ) [{'foo': [1, 2]}, {'bar': {'hello': 'world'}}, {'code': Code('function x() { return 1; }', {})}, {'bin': Binary(b'...', 128)}] Example usage with :const:`RELAXED_JSON_OPTIONS` (the default): .. doctest:: >>> from bson import Binary, Code >>> from bson.json_util import dumps >>> dumps( ... [ ... {"foo": [1, 2]}, ... {"bar": {"hello": "world"}}, ... {"code": Code("function x() { return 1; }")}, ... {"bin": Binary(b"\x01\x02\x03\x04")}, ... ] ... ) '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]' Example usage (with :const:`CANONICAL_JSON_OPTIONS`): .. doctest:: >>> from bson import Binary, Code >>> from bson.json_util import dumps, CANONICAL_JSON_OPTIONS >>> dumps( ... [ ... {"foo": [1, 2]}, ... {"bar": {"hello": "world"}}, ... {"code": Code("function x() { return 1; }")}, ... {"bin": Binary(b"\x01\x02\x03\x04")}, ... ], ... json_options=CANONICAL_JSON_OPTIONS, ... ) '[{"foo": [{"$numberInt": "1"}, {"$numberInt": "2"}]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]' Example usage (with :const:`LEGACY_JSON_OPTIONS`): .. doctest:: >>> from bson import Binary, Code >>> from bson.json_util import dumps, LEGACY_JSON_OPTIONS >>> dumps( ... [ ... {"foo": [1, 2]}, ... {"bar": {"hello": "world"}}, ... {"code": Code("function x() { return 1; }", {})}, ... {"bin": Binary(b"\x01\x02\x03\x04")}, ... ], ... json_options=LEGACY_JSON_OPTIONS, ... ) '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]' Alternatively, you can manually pass the `default` to :func:`json.dumps`. It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code` instances (as they are extended strings you can't provide custom defaults), but it will be faster as there is less recursion. .. note:: If your application does not need the flexibility offered by :class:`JSONOptions` and spends a large amount of time in the `json_util` module, look to `python-bsonjs `_ for a nice performance improvement. `python-bsonjs` is a fast BSON to MongoDB Extended JSON converter for Python built on top of `libbson `_. `python-bsonjs` works best with PyMongo when using :class:`~bson.raw_bson.RawBSONDocument`. """ import base64 import datetime import json import math import re import uuid from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, Union, cast from bson.binary import ALL_UUID_SUBTYPES, UUID_SUBTYPE, Binary, UuidRepresentation from bson.code import Code from bson.codec_options import CodecOptions, DatetimeConversion from bson.datetime_ms import ( EPOCH_AWARE, DatetimeMS, _datetime_to_millis, _max_datetime_ms, _millis_to_datetime, ) from bson.dbref import DBRef from bson.decimal128 import Decimal128 from bson.int64 import Int64 from bson.max_key import MaxKey from bson.min_key import MinKey from bson.objectid import ObjectId from bson.regex import Regex from bson.son import RE_TYPE, SON from bson.timestamp import Timestamp from bson.tz_util import utc _RE_OPT_TABLE = { "i": re.I, "l": re.L, "m": re.M, "s": re.S, "u": re.U, "x": re.X, } class DatetimeRepresentation: LEGACY = 0 """Legacy MongoDB Extended JSON datetime representation. :class:`datetime.datetime` instances will be encoded to JSON in the format `{"$date": }`, where `dateAsMilliseconds` is a 64-bit signed integer giving the number of milliseconds since the Unix epoch UTC. This was the default encoding before PyMongo version 3.4. .. versionadded:: 3.4 """ NUMBERLONG = 1 """NumberLong datetime representation. :class:`datetime.datetime` instances will be encoded to JSON in the format `{"$date": {"$numberLong": ""}}`, where `dateAsMilliseconds` is the string representation of a 64-bit signed integer giving the number of milliseconds since the Unix epoch UTC. .. versionadded:: 3.4 """ ISO8601 = 2 """ISO-8601 datetime representation. :class:`datetime.datetime` instances greater than or equal to the Unix epoch UTC will be encoded to JSON in the format `{"$date": ""}`. :class:`datetime.datetime` instances before the Unix epoch UTC will be encoded as if the datetime representation is :const:`~DatetimeRepresentation.NUMBERLONG`. .. versionadded:: 3.4 """ class JSONMode: LEGACY = 0 """Legacy Extended JSON representation. In this mode, :func:`~bson.json_util.dumps` produces PyMongo's legacy non-standard JSON output. Consider using :const:`~bson.json_util.JSONMode.RELAXED` or :const:`~bson.json_util.JSONMode.CANONICAL` instead. .. versionadded:: 3.5 """ RELAXED = 1 """Relaxed Extended JSON representation. In this mode, :func:`~bson.json_util.dumps` produces Relaxed Extended JSON, a mostly JSON-like format. Consider using this for things like a web API, where one is sending a document (or a projection of a document) that only uses ordinary JSON type primitives. In particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric types are represented in the native JSON number format. This output is also the most human readable and is useful for debugging and documentation. .. seealso:: The specification for Relaxed `Extended JSON`_. .. versionadded:: 3.5 """ CANONICAL = 2 """Canonical Extended JSON representation. In this mode, :func:`~bson.json_util.dumps` produces Canonical Extended JSON, a type preserving format. Consider using this for things like testing, where one has to precisely specify expected types in JSON. In particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric types are encoded with type wrappers. .. seealso:: The specification for Canonical `Extended JSON`_. .. versionadded:: 3.5 """ class JSONOptions(CodecOptions): json_mode: int strict_number_long: bool datetime_representation: int strict_uuid: bool def __init__(self, *args, **kwargs): """Encapsulates JSON options for :func:`dumps` and :func:`loads`. :Parameters: - `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects are encoded to MongoDB Extended JSON's *Strict mode* type `NumberLong`, ie ``'{"$numberLong": "" }'``. Otherwise they will be encoded as an `int`. Defaults to ``False``. - `datetime_representation`: The representation to use when encoding instances of :class:`datetime.datetime`. Defaults to :const:`~DatetimeRepresentation.LEGACY`. - `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it will be encoded as ``'{"$uuid": "" }'``. Defaults to ``False``. - `json_mode`: The :class:`JSONMode` to use when encoding BSON types to Extended JSON. Defaults to :const:`~JSONMode.LEGACY`. - `document_class`: BSON documents returned by :func:`loads` will be decoded to an instance of this class. Must be a subclass of :class:`collections.MutableMapping`. Defaults to :class:`dict`. - `uuid_representation`: The :class:`~bson.binary.UuidRepresentation` to use when encoding and decoding instances of :class:`uuid.UUID`. Defaults to :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`. - `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type `Date` will be decoded to timezone aware instances of :class:`datetime.datetime`. Otherwise they will be naive. Defaults to ``False``. - `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the timezone from which :class:`~datetime.datetime` objects should be decoded. Defaults to :const:`~bson.tz_util.utc`. - `datetime_conversion`: Specifies how UTC datetimes should be decoded within BSON. Valid options include 'datetime_ms' to return as a DatetimeMS, 'datetime' to return as a datetime.datetime and raising a ValueError for out-of-range values, 'datetime_auto' to return DatetimeMS objects when the underlying datetime is out-of-range and 'datetime_clamp' to clamp to the minimum and maximum possible datetimes. Defaults to 'datetime'. See :ref:`handling-out-of-range-datetimes` for details. - `args`: arguments to :class:`~bson.codec_options.CodecOptions` - `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions` .. seealso:: The specification for Relaxed and Canonical `Extended JSON`_. .. versionchanged:: 4.0 The default for `json_mode` was changed from :const:`JSONMode.LEGACY` to :const:`JSONMode.RELAXED`. The default for `uuid_representation` was changed from :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`. .. versionchanged:: 3.5 Accepts the optional parameter `json_mode`. .. versionchanged:: 4.0 Changed default value of `tz_aware` to False. """ super().__init__() def __new__( cls: Type["JSONOptions"], strict_number_long: Optional[bool] = None, datetime_representation: Optional[int] = None, strict_uuid: Optional[bool] = None, json_mode: int = JSONMode.RELAXED, *args: Any, **kwargs: Any, ) -> "JSONOptions": kwargs["tz_aware"] = kwargs.get("tz_aware", False) if kwargs["tz_aware"]: kwargs["tzinfo"] = kwargs.get("tzinfo", utc) if datetime_representation not in ( DatetimeRepresentation.LEGACY, DatetimeRepresentation.NUMBERLONG, DatetimeRepresentation.ISO8601, None, ): raise ValueError( "JSONOptions.datetime_representation must be one of LEGACY, " "NUMBERLONG, or ISO8601 from DatetimeRepresentation." ) self = cast(JSONOptions, super().__new__(cls, *args, **kwargs)) if json_mode not in (JSONMode.LEGACY, JSONMode.RELAXED, JSONMode.CANONICAL): raise ValueError( "JSONOptions.json_mode must be one of LEGACY, RELAXED, " "or CANONICAL from JSONMode." ) self.json_mode = json_mode if self.json_mode == JSONMode.RELAXED: if strict_number_long: raise ValueError("Cannot specify strict_number_long=True with JSONMode.RELAXED") if datetime_representation not in (None, DatetimeRepresentation.ISO8601): raise ValueError( "datetime_representation must be DatetimeRepresentation." "ISO8601 or omitted with JSONMode.RELAXED" ) if strict_uuid not in (None, True): raise ValueError("Cannot specify strict_uuid=False with JSONMode.RELAXED") self.strict_number_long = False self.datetime_representation = DatetimeRepresentation.ISO8601 self.strict_uuid = True elif self.json_mode == JSONMode.CANONICAL: if strict_number_long not in (None, True): raise ValueError("Cannot specify strict_number_long=False with JSONMode.RELAXED") if datetime_representation not in (None, DatetimeRepresentation.NUMBERLONG): raise ValueError( "datetime_representation must be DatetimeRepresentation." "NUMBERLONG or omitted with JSONMode.RELAXED" ) if strict_uuid not in (None, True): raise ValueError("Cannot specify strict_uuid=False with JSONMode.RELAXED") self.strict_number_long = True self.datetime_representation = DatetimeRepresentation.NUMBERLONG self.strict_uuid = True else: # JSONMode.LEGACY self.strict_number_long = False self.datetime_representation = DatetimeRepresentation.LEGACY self.strict_uuid = False if strict_number_long is not None: self.strict_number_long = strict_number_long if datetime_representation is not None: self.datetime_representation = datetime_representation if strict_uuid is not None: self.strict_uuid = strict_uuid return self def _arguments_repr(self) -> str: return ( "strict_number_long={!r}, " "datetime_representation={!r}, " "strict_uuid={!r}, json_mode={!r}, {}".format( self.strict_number_long, self.datetime_representation, self.strict_uuid, self.json_mode, super()._arguments_repr(), ) ) def _options_dict(self) -> Dict[Any, Any]: # TODO: PYTHON-2442 use _asdict() instead options_dict = super()._options_dict() options_dict.update( { "strict_number_long": self.strict_number_long, "datetime_representation": self.datetime_representation, "strict_uuid": self.strict_uuid, "json_mode": self.json_mode, } ) return options_dict def with_options(self, **kwargs: Any) -> "JSONOptions": """ Make a copy of this JSONOptions, overriding some options:: >>> from bson.json_util import CANONICAL_JSON_OPTIONS >>> CANONICAL_JSON_OPTIONS.tz_aware True >>> json_options = CANONICAL_JSON_OPTIONS.with_options(tz_aware=False, tzinfo=None) >>> json_options.tz_aware False .. versionadded:: 3.12 """ opts = self._options_dict() for opt in ("strict_number_long", "datetime_representation", "strict_uuid", "json_mode"): opts[opt] = kwargs.get(opt, getattr(self, opt)) opts.update(kwargs) return JSONOptions(**opts) LEGACY_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.LEGACY) """:class:`JSONOptions` for encoding to PyMongo's legacy JSON format. .. seealso:: The documentation for :const:`bson.json_util.JSONMode.LEGACY`. .. versionadded:: 3.5 """ CANONICAL_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.CANONICAL) """:class:`JSONOptions` for Canonical Extended JSON. .. seealso:: The documentation for :const:`bson.json_util.JSONMode.CANONICAL`. .. versionadded:: 3.5 """ RELAXED_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.RELAXED) """:class:`JSONOptions` for Relaxed Extended JSON. .. seealso:: The documentation for :const:`bson.json_util.JSONMode.RELAXED`. .. versionadded:: 3.5 """ DEFAULT_JSON_OPTIONS: JSONOptions = RELAXED_JSON_OPTIONS """The default :class:`JSONOptions` for JSON encoding/decoding. The same as :const:`RELAXED_JSON_OPTIONS`. .. versionchanged:: 4.0 Changed from :const:`LEGACY_JSON_OPTIONS` to :const:`RELAXED_JSON_OPTIONS`. .. versionadded:: 3.4 """ def dumps(obj: Any, *args: Any, **kwargs: Any) -> str: """Helper function that wraps :func:`json.dumps`. Recursive function that handles all BSON types including :class:`~bson.binary.Binary` and :class:`~bson.code.Code`. :Parameters: - `json_options`: A :class:`JSONOptions` instance used to modify the encoding of MongoDB Extended JSON types. Defaults to :const:`DEFAULT_JSON_OPTIONS`. .. versionchanged:: 4.0 Now outputs MongoDB Relaxed Extended JSON by default (using :const:`DEFAULT_JSON_OPTIONS`). .. versionchanged:: 3.4 Accepts optional parameter `json_options`. See :class:`JSONOptions`. """ json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS) return json.dumps(_json_convert(obj, json_options), *args, **kwargs) def loads(s: Union[str, bytes, bytearray], *args: Any, **kwargs: Any) -> Any: """Helper function that wraps :func:`json.loads`. Automatically passes the object_hook for BSON type conversion. Raises ``TypeError``, ``ValueError``, ``KeyError``, or :exc:`~bson.errors.InvalidId` on invalid MongoDB Extended JSON. :Parameters: - `json_options`: A :class:`JSONOptions` instance used to modify the decoding of MongoDB Extended JSON types. Defaults to :const:`DEFAULT_JSON_OPTIONS`. .. versionchanged:: 4.0 Now loads :class:`datetime.datetime` instances as naive by default. To load timezone aware instances utilize the `json_options` parameter. See :ref:`tz_aware_default_change` for an example. .. versionchanged:: 3.5 Parses Relaxed and Canonical Extended JSON as well as PyMongo's legacy format. Now raises ``TypeError`` or ``ValueError`` when parsing JSON type wrappers with values of the wrong type or any extra keys. .. versionchanged:: 3.4 Accepts optional parameter `json_options`. See :class:`JSONOptions`. """ json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS) kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs, json_options) return json.loads(s, *args, **kwargs) def _json_convert(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: """Recursive helper method that converts BSON types so they can be converted into json. """ if hasattr(obj, "items"): return SON(((k, _json_convert(v, json_options)) for k, v in obj.items())) elif hasattr(obj, "__iter__") and not isinstance(obj, (str, bytes)): return [_json_convert(v, json_options) for v in obj] try: return default(obj, json_options) except TypeError: return obj def object_pairs_hook( pairs: Sequence[Tuple[str, Any]], json_options: JSONOptions = DEFAULT_JSON_OPTIONS ) -> Any: return object_hook(json_options.document_class(pairs), json_options) def object_hook(dct: Mapping[str, Any], json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: if "$oid" in dct: return _parse_canonical_oid(dct) if ( isinstance(dct.get("$ref"), str) and "$id" in dct and isinstance(dct.get("$db"), (str, type(None))) ): return _parse_canonical_dbref(dct) if "$date" in dct: return _parse_canonical_datetime(dct, json_options) if "$regex" in dct: return _parse_legacy_regex(dct) if "$minKey" in dct: return _parse_canonical_minkey(dct) if "$maxKey" in dct: return _parse_canonical_maxkey(dct) if "$binary" in dct: if "$type" in dct: return _parse_legacy_binary(dct, json_options) else: return _parse_canonical_binary(dct, json_options) if "$code" in dct: return _parse_canonical_code(dct) if "$uuid" in dct: return _parse_legacy_uuid(dct, json_options) if "$undefined" in dct: return None if "$numberLong" in dct: return _parse_canonical_int64(dct) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) if "$numberDecimal" in dct: return _parse_canonical_decimal128(dct) if "$dbPointer" in dct: return _parse_canonical_dbpointer(dct) if "$regularExpression" in dct: return _parse_canonical_regex(dct) if "$symbol" in dct: return _parse_canonical_symbol(dct) if "$numberInt" in dct: return _parse_canonical_int32(dct) if "$numberDouble" in dct: return _parse_canonical_double(dct) return dct def _parse_legacy_regex(doc: Any) -> Any: pattern = doc["$regex"] # Check if this is the $regex query operator. if not isinstance(pattern, (str, bytes)): return doc flags = 0 # PyMongo always adds $options but some other tools may not. for opt in doc.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) return Regex(pattern, flags) def _parse_legacy_uuid(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: """Decode a JSON legacy $uuid to Python UUID.""" if len(doc) != 1: raise TypeError(f"Bad $uuid, extra field(s): {doc}") if not isinstance(doc["$uuid"], str): raise TypeError(f"$uuid must be a string: {doc}") if json_options.uuid_representation == UuidRepresentation.UNSPECIFIED: return Binary.from_uuid(uuid.UUID(doc["$uuid"])) else: return uuid.UUID(doc["$uuid"]) def _binary_or_uuid(data: Any, subtype: int, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: # special handling for UUID if subtype in ALL_UUID_SUBTYPES: uuid_representation = json_options.uuid_representation binary_value = Binary(data, subtype) if uuid_representation == UuidRepresentation.UNSPECIFIED: return binary_value if subtype == UUID_SUBTYPE: # Legacy behavior: use STANDARD with binary subtype 4. uuid_representation = UuidRepresentation.STANDARD elif uuid_representation == UuidRepresentation.STANDARD: # subtype == OLD_UUID_SUBTYPE # Legacy behavior: STANDARD is the same as PYTHON_LEGACY. uuid_representation = UuidRepresentation.PYTHON_LEGACY return binary_value.as_uuid(uuid_representation) if subtype == 0: return cast(uuid.UUID, data) return Binary(data, subtype) def _parse_legacy_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: if isinstance(doc["$type"], int): doc["$type"] = "%02x" % doc["$type"] subtype = int(doc["$type"], 16) if subtype >= 0xFFFFFF80: # Handle mongoexport values subtype = int(doc["$type"][6:], 16) data = base64.b64decode(doc["$binary"].encode()) return _binary_or_uuid(data, subtype, json_options) def _parse_canonical_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: binary = doc["$binary"] b64 = binary["base64"] subtype = binary["subType"] if not isinstance(b64, str): raise TypeError(f"$binary base64 must be a string: {doc}") if not isinstance(subtype, str) or len(subtype) > 2: raise TypeError(f"$binary subType must be a string at most 2 characters: {doc}") if len(binary) != 2: raise TypeError(f'$binary must include only "base64" and "subType" components: {doc}') data = base64.b64decode(b64.encode()) return _binary_or_uuid(data, int(subtype, 16), json_options) def _parse_canonical_datetime( doc: Any, json_options: JSONOptions ) -> Union[datetime.datetime, DatetimeMS]: """Decode a JSON datetime to python datetime.datetime.""" dtm = doc["$date"] if len(doc) != 1: raise TypeError(f"Bad $date, extra field(s): {doc}") # mongoexport 2.6 and newer if isinstance(dtm, str): # Parse offset if dtm[-1] == "Z": dt = dtm[:-1] offset = "Z" elif dtm[-6] in ("+", "-") and dtm[-3] == ":": # (+|-)HH:MM dt = dtm[:-6] offset = dtm[-6:] elif dtm[-5] in ("+", "-"): # (+|-)HHMM dt = dtm[:-5] offset = dtm[-5:] elif dtm[-3] in ("+", "-"): # (+|-)HH dt = dtm[:-3] offset = dtm[-3:] else: dt = dtm offset = "" # Parse the optional factional seconds portion. dot_index = dt.rfind(".") microsecond = 0 if dot_index != -1: microsecond = int(float(dt[dot_index:]) * 1000000) dt = dt[:dot_index] aware = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S").replace( microsecond=microsecond, tzinfo=utc ) if offset and offset != "Z": if len(offset) == 6: hours, minutes = offset[1:].split(":") secs = int(hours) * 3600 + int(minutes) * 60 elif len(offset) == 5: secs = int(offset[1:3]) * 3600 + int(offset[3:]) * 60 elif len(offset) == 3: secs = int(offset[1:3]) * 3600 if offset[0] == "-": secs *= -1 aware = aware - datetime.timedelta(seconds=secs) if json_options.tz_aware: if json_options.tzinfo: aware = aware.astimezone(json_options.tzinfo) if json_options.datetime_conversion == DatetimeConversion.DATETIME_MS: return DatetimeMS(aware) return aware else: aware_tzinfo_none = aware.replace(tzinfo=None) if json_options.datetime_conversion == DatetimeConversion.DATETIME_MS: return DatetimeMS(aware_tzinfo_none) return aware_tzinfo_none return _millis_to_datetime(int(dtm), json_options) def _parse_canonical_oid(doc: Any) -> ObjectId: """Decode a JSON ObjectId to bson.objectid.ObjectId.""" if len(doc) != 1: raise TypeError(f"Bad $oid, extra field(s): {doc}") return ObjectId(doc["$oid"]) def _parse_canonical_symbol(doc: Any) -> str: """Decode a JSON symbol to Python string.""" symbol = doc["$symbol"] if len(doc) != 1: raise TypeError(f"Bad $symbol, extra field(s): {doc}") return str(symbol) def _parse_canonical_code(doc: Any) -> Code: """Decode a JSON code to bson.code.Code.""" for key in doc: if key not in ("$code", "$scope"): raise TypeError(f"Bad $code, extra field(s): {doc}") return Code(doc["$code"], scope=doc.get("$scope")) def _parse_canonical_regex(doc: Any) -> Regex: """Decode a JSON regex to bson.regex.Regex.""" regex = doc["$regularExpression"] if len(doc) != 1: raise TypeError(f"Bad $regularExpression, extra field(s): {doc}") if len(regex) != 2: raise TypeError( 'Bad $regularExpression must include only "pattern"' 'and "options" components: {}'.format(doc) ) opts = regex["options"] if not isinstance(opts, str): raise TypeError( "Bad $regularExpression options, options must be string, was type %s" % (type(opts)) ) return Regex(regex["pattern"], opts) def _parse_canonical_dbref(doc: Any) -> DBRef: """Decode a JSON DBRef to bson.dbref.DBRef.""" return DBRef(doc.pop("$ref"), doc.pop("$id"), database=doc.pop("$db", None), **doc) def _parse_canonical_dbpointer(doc: Any) -> Any: """Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef.""" dbref = doc["$dbPointer"] if len(doc) != 1: raise TypeError(f"Bad $dbPointer, extra field(s): {doc}") if isinstance(dbref, DBRef): dbref_doc = dbref.as_doc() # DBPointer must not contain $db in its value. if dbref.database is not None: raise TypeError(f"Bad $dbPointer, extra field $db: {dbref_doc}") if not isinstance(dbref.id, ObjectId): raise TypeError(f"Bad $dbPointer, $id must be an ObjectId: {dbref_doc}") if len(dbref_doc) != 2: raise TypeError(f"Bad $dbPointer, extra field(s) in DBRef: {dbref_doc}") return dbref else: raise TypeError(f"Bad $dbPointer, expected a DBRef: {doc}") def _parse_canonical_int32(doc: Any) -> int: """Decode a JSON int32 to python int.""" i_str = doc["$numberInt"] if len(doc) != 1: raise TypeError(f"Bad $numberInt, extra field(s): {doc}") if not isinstance(i_str, str): raise TypeError(f"$numberInt must be string: {doc}") return int(i_str) def _parse_canonical_int64(doc: Any) -> Int64: """Decode a JSON int64 to bson.int64.Int64.""" l_str = doc["$numberLong"] if len(doc) != 1: raise TypeError(f"Bad $numberLong, extra field(s): {doc}") return Int64(l_str) def _parse_canonical_double(doc: Any) -> float: """Decode a JSON double to python float.""" d_str = doc["$numberDouble"] if len(doc) != 1: raise TypeError(f"Bad $numberDouble, extra field(s): {doc}") if not isinstance(d_str, str): raise TypeError(f"$numberDouble must be string: {doc}") return float(d_str) def _parse_canonical_decimal128(doc: Any) -> Decimal128: """Decode a JSON decimal128 to bson.decimal128.Decimal128.""" d_str = doc["$numberDecimal"] if len(doc) != 1: raise TypeError(f"Bad $numberDecimal, extra field(s): {doc}") if not isinstance(d_str, str): raise TypeError(f"$numberDecimal must be string: {doc}") return Decimal128(d_str) def _parse_canonical_minkey(doc: Any) -> MinKey: """Decode a JSON MinKey to bson.min_key.MinKey.""" if type(doc["$minKey"]) is not int or doc["$minKey"] != 1: raise TypeError(f"$minKey value must be 1: {doc}") if len(doc) != 1: raise TypeError(f"Bad $minKey, extra field(s): {doc}") return MinKey() def _parse_canonical_maxkey(doc: Any) -> MaxKey: """Decode a JSON MaxKey to bson.max_key.MaxKey.""" if type(doc["$maxKey"]) is not int or doc["$maxKey"] != 1: raise TypeError("$maxKey value must be 1: %s", (doc,)) if len(doc) != 1: raise TypeError(f"Bad $minKey, extra field(s): {doc}") return MaxKey() def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any: if json_options.json_mode == JSONMode.LEGACY: return SON([("$binary", base64.b64encode(data).decode()), ("$type", "%02x" % subtype)]) return { "$binary": SON([("base64", base64.b64encode(data).decode()), ("subType", "%02x" % subtype)]) } def default(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: # We preserve key order when rendering SON, DBRef, etc. as JSON by # returning a SON for those types instead of a dict. if isinstance(obj, ObjectId): return {"$oid": str(obj)} if isinstance(obj, DBRef): return _json_convert(obj.as_doc(), json_options=json_options) if isinstance(obj, datetime.datetime): if json_options.datetime_representation == DatetimeRepresentation.ISO8601: if not obj.tzinfo: obj = obj.replace(tzinfo=utc) assert obj.tzinfo is not None if obj >= EPOCH_AWARE: off = obj.tzinfo.utcoffset(obj) if (off.days, off.seconds, off.microseconds) == (0, 0, 0): # type: ignore tz_string = "Z" else: tz_string = obj.strftime("%z") millis = int(obj.microsecond / 1000) fracsecs = ".%03d" % (millis,) if millis else "" return { "$date": "{}{}{}".format(obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string) } millis = _datetime_to_millis(obj) if json_options.datetime_representation == DatetimeRepresentation.LEGACY: return {"$date": millis} return {"$date": {"$numberLong": str(millis)}} if isinstance(obj, DatetimeMS): if ( json_options.datetime_representation == DatetimeRepresentation.ISO8601 and 0 <= int(obj) <= _max_datetime_ms() ): return default(obj.as_datetime(), json_options) elif json_options.datetime_representation == DatetimeRepresentation.LEGACY: return {"$date": str(int(obj))} return {"$date": {"$numberLong": str(int(obj))}} if json_options.strict_number_long and isinstance(obj, Int64): return {"$numberLong": str(obj)} if isinstance(obj, (RE_TYPE, Regex)): flags = "" if obj.flags & re.IGNORECASE: flags += "i" if obj.flags & re.LOCALE: flags += "l" if obj.flags & re.MULTILINE: flags += "m" if obj.flags & re.DOTALL: flags += "s" if obj.flags & re.UNICODE: flags += "u" if obj.flags & re.VERBOSE: flags += "x" if isinstance(obj.pattern, str): pattern = obj.pattern else: pattern = obj.pattern.decode("utf-8") if json_options.json_mode == JSONMode.LEGACY: return SON([("$regex", pattern), ("$options", flags)]) return {"$regularExpression": SON([("pattern", pattern), ("options", flags)])} if isinstance(obj, MinKey): return {"$minKey": 1} if isinstance(obj, MaxKey): return {"$maxKey": 1} if isinstance(obj, Timestamp): return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])} if isinstance(obj, Code): if obj.scope is None: return {"$code": str(obj)} return SON([("$code", str(obj)), ("$scope", _json_convert(obj.scope, json_options))]) if isinstance(obj, Binary): return _encode_binary(obj, obj.subtype, json_options) if isinstance(obj, bytes): return _encode_binary(obj, 0, json_options) if isinstance(obj, uuid.UUID): if json_options.strict_uuid: binval = Binary.from_uuid(obj, uuid_representation=json_options.uuid_representation) return _encode_binary(binval, binval.subtype, json_options) else: return {"$uuid": obj.hex} if isinstance(obj, Decimal128): return {"$numberDecimal": str(obj)} if isinstance(obj, bool): return obj if json_options.json_mode == JSONMode.CANONICAL and isinstance(obj, int): if -(2**31) <= obj < 2**31: return {"$numberInt": str(obj)} return {"$numberLong": str(obj)} if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float): if math.isnan(obj): return {"$numberDouble": "NaN"} elif math.isinf(obj): representation = "Infinity" if obj > 0 else "-Infinity" return {"$numberDouble": representation} elif json_options.json_mode == JSONMode.CANONICAL: # repr() will return the shortest string guaranteed to produce the # original value, when float() is called on it. return {"$numberDouble": str(repr(obj))} raise TypeError("%r is not JSON serializable" % obj)