impuls/lib/python3.11/site-packages/bson/json_util.py

919 lines
36 KiB
Python

# Copyright 2009-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for using Python's :mod:`json` module with BSON documents.
This module provides two helper methods `dumps` and `loads` that wrap the
native :mod:`json` methods and provide explicit BSON conversion to and from
JSON. :class:`~bson.json_util.JSONOptions` provides a way to control how JSON
is emitted and parsed, with the default being the Relaxed Extended JSON format.
:mod:`~bson.json_util` can also generate Canonical or legacy `Extended JSON`_
when :const:`CANONICAL_JSON_OPTIONS` or :const:`LEGACY_JSON_OPTIONS` is
provided, respectively.
.. _Extended JSON: https://github.com/mongodb/specifications/blob/master/source/extended-json.rst
Example usage (deserialization):
.. doctest::
>>> from bson.json_util import loads
>>> loads(
... '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "80", "$binary": "AQIDBA=="}}]'
... )
[{'foo': [1, 2]}, {'bar': {'hello': 'world'}}, {'code': Code('function x() { return 1; }', {})}, {'bin': Binary(b'...', 128)}]
Example usage with :const:`RELAXED_JSON_OPTIONS` (the default):
.. doctest::
>>> from bson import Binary, Code
>>> from bson.json_util import dumps
>>> dumps(
... [
... {"foo": [1, 2]},
... {"bar": {"hello": "world"}},
... {"code": Code("function x() { return 1; }")},
... {"bin": Binary(b"\x01\x02\x03\x04")},
... ]
... )
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
Example usage (with :const:`CANONICAL_JSON_OPTIONS`):
.. doctest::
>>> from bson import Binary, Code
>>> from bson.json_util import dumps, CANONICAL_JSON_OPTIONS
>>> dumps(
... [
... {"foo": [1, 2]},
... {"bar": {"hello": "world"}},
... {"code": Code("function x() { return 1; }")},
... {"bin": Binary(b"\x01\x02\x03\x04")},
... ],
... json_options=CANONICAL_JSON_OPTIONS,
... )
'[{"foo": [{"$numberInt": "1"}, {"$numberInt": "2"}]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }"}}, {"bin": {"$binary": {"base64": "AQIDBA==", "subType": "00"}}}]'
Example usage (with :const:`LEGACY_JSON_OPTIONS`):
.. doctest::
>>> from bson import Binary, Code
>>> from bson.json_util import dumps, LEGACY_JSON_OPTIONS
>>> dumps(
... [
... {"foo": [1, 2]},
... {"bar": {"hello": "world"}},
... {"code": Code("function x() { return 1; }", {})},
... {"bin": Binary(b"\x01\x02\x03\x04")},
... ],
... json_options=LEGACY_JSON_OPTIONS,
... )
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$code": "function x() { return 1; }", "$scope": {}}}, {"bin": {"$binary": "AQIDBA==", "$type": "00"}}]'
Alternatively, you can manually pass the `default` to :func:`json.dumps`.
It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
instances (as they are extended strings you can't provide custom defaults),
but it will be faster as there is less recursion.
.. note::
If your application does not need the flexibility offered by
:class:`JSONOptions` and spends a large amount of time in the `json_util`
module, look to
`python-bsonjs <https://pypi.python.org/pypi/python-bsonjs>`_ for a nice
performance improvement. `python-bsonjs` is a fast BSON to MongoDB
Extended JSON converter for Python built on top of
`libbson <https://github.com/mongodb/libbson>`_. `python-bsonjs` works best
with PyMongo when using :class:`~bson.raw_bson.RawBSONDocument`.
"""
import base64
import datetime
import json
import math
import re
import uuid
from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, Union, cast
from bson.binary import ALL_UUID_SUBTYPES, UUID_SUBTYPE, Binary, UuidRepresentation
from bson.code import Code
from bson.codec_options import CodecOptions, DatetimeConversion
from bson.datetime_ms import (
EPOCH_AWARE,
DatetimeMS,
_datetime_to_millis,
_max_datetime_ms,
_millis_to_datetime,
)
from bson.dbref import DBRef
from bson.decimal128 import Decimal128
from bson.int64 import Int64
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.son import RE_TYPE, SON
from bson.timestamp import Timestamp
from bson.tz_util import utc
_RE_OPT_TABLE = {
"i": re.I,
"l": re.L,
"m": re.M,
"s": re.S,
"u": re.U,
"x": re.X,
}
class DatetimeRepresentation:
LEGACY = 0
"""Legacy MongoDB Extended JSON datetime representation.
:class:`datetime.datetime` instances will be encoded to JSON in the
format `{"$date": <dateAsMilliseconds>}`, where `dateAsMilliseconds` is
a 64-bit signed integer giving the number of milliseconds since the Unix
epoch UTC. This was the default encoding before PyMongo version 3.4.
.. versionadded:: 3.4
"""
NUMBERLONG = 1
"""NumberLong datetime representation.
:class:`datetime.datetime` instances will be encoded to JSON in the
format `{"$date": {"$numberLong": "<dateAsMilliseconds>"}}`,
where `dateAsMilliseconds` is the string representation of a 64-bit signed
integer giving the number of milliseconds since the Unix epoch UTC.
.. versionadded:: 3.4
"""
ISO8601 = 2
"""ISO-8601 datetime representation.
:class:`datetime.datetime` instances greater than or equal to the Unix
epoch UTC will be encoded to JSON in the format `{"$date": "<ISO-8601>"}`.
:class:`datetime.datetime` instances before the Unix epoch UTC will be
encoded as if the datetime representation is
:const:`~DatetimeRepresentation.NUMBERLONG`.
.. versionadded:: 3.4
"""
class JSONMode:
LEGACY = 0
"""Legacy Extended JSON representation.
In this mode, :func:`~bson.json_util.dumps` produces PyMongo's legacy
non-standard JSON output. Consider using
:const:`~bson.json_util.JSONMode.RELAXED` or
:const:`~bson.json_util.JSONMode.CANONICAL` instead.
.. versionadded:: 3.5
"""
RELAXED = 1
"""Relaxed Extended JSON representation.
In this mode, :func:`~bson.json_util.dumps` produces Relaxed Extended JSON,
a mostly JSON-like format. Consider using this for things like a web API,
where one is sending a document (or a projection of a document) that only
uses ordinary JSON type primitives. In particular, the ``int``,
:class:`~bson.int64.Int64`, and ``float`` numeric types are represented in
the native JSON number format. This output is also the most human readable
and is useful for debugging and documentation.
.. seealso:: The specification for Relaxed `Extended JSON`_.
.. versionadded:: 3.5
"""
CANONICAL = 2
"""Canonical Extended JSON representation.
In this mode, :func:`~bson.json_util.dumps` produces Canonical Extended
JSON, a type preserving format. Consider using this for things like
testing, where one has to precisely specify expected types in JSON. In
particular, the ``int``, :class:`~bson.int64.Int64`, and ``float`` numeric
types are encoded with type wrappers.
.. seealso:: The specification for Canonical `Extended JSON`_.
.. versionadded:: 3.5
"""
class JSONOptions(CodecOptions):
json_mode: int
strict_number_long: bool
datetime_representation: int
strict_uuid: bool
def __init__(self, *args, **kwargs):
"""Encapsulates JSON options for :func:`dumps` and :func:`loads`.
:Parameters:
- `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects
are encoded to MongoDB Extended JSON's *Strict mode* type
`NumberLong`, ie ``'{"$numberLong": "<number>" }'``. Otherwise they
will be encoded as an `int`. Defaults to ``False``.
- `datetime_representation`: The representation to use when encoding
instances of :class:`datetime.datetime`. Defaults to
:const:`~DatetimeRepresentation.LEGACY`.
- `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to
MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it
will be encoded as ``'{"$uuid": "<hex>" }'``. Defaults to ``False``.
- `json_mode`: The :class:`JSONMode` to use when encoding BSON types to
Extended JSON. Defaults to :const:`~JSONMode.LEGACY`.
- `document_class`: BSON documents returned by :func:`loads` will be
decoded to an instance of this class. Must be a subclass of
:class:`collections.MutableMapping`. Defaults to :class:`dict`.
- `uuid_representation`: The :class:`~bson.binary.UuidRepresentation`
to use when encoding and decoding instances of :class:`uuid.UUID`.
Defaults to :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
- `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
`Date` will be decoded to timezone aware instances of
:class:`datetime.datetime`. Otherwise they will be naive. Defaults
to ``False``.
- `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the
timezone from which :class:`~datetime.datetime` objects should be
decoded. Defaults to :const:`~bson.tz_util.utc`.
- `datetime_conversion`: Specifies how UTC datetimes should be decoded
within BSON. Valid options include 'datetime_ms' to return as a
DatetimeMS, 'datetime' to return as a datetime.datetime and
raising a ValueError for out-of-range values, 'datetime_auto' to
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'. See
:ref:`handling-out-of-range-datetimes` for details.
- `args`: arguments to :class:`~bson.codec_options.CodecOptions`
- `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions`
.. seealso:: The specification for Relaxed and Canonical `Extended JSON`_.
.. versionchanged:: 4.0
The default for `json_mode` was changed from :const:`JSONMode.LEGACY`
to :const:`JSONMode.RELAXED`.
The default for `uuid_representation` was changed from
:const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
.. versionchanged:: 3.5
Accepts the optional parameter `json_mode`.
.. versionchanged:: 4.0
Changed default value of `tz_aware` to False.
"""
super().__init__()
def __new__(
cls: Type["JSONOptions"],
strict_number_long: Optional[bool] = None,
datetime_representation: Optional[int] = None,
strict_uuid: Optional[bool] = None,
json_mode: int = JSONMode.RELAXED,
*args: Any,
**kwargs: Any,
) -> "JSONOptions":
kwargs["tz_aware"] = kwargs.get("tz_aware", False)
if kwargs["tz_aware"]:
kwargs["tzinfo"] = kwargs.get("tzinfo", utc)
if datetime_representation not in (
DatetimeRepresentation.LEGACY,
DatetimeRepresentation.NUMBERLONG,
DatetimeRepresentation.ISO8601,
None,
):
raise ValueError(
"JSONOptions.datetime_representation must be one of LEGACY, "
"NUMBERLONG, or ISO8601 from DatetimeRepresentation."
)
self = cast(JSONOptions, super().__new__(cls, *args, **kwargs))
if json_mode not in (JSONMode.LEGACY, JSONMode.RELAXED, JSONMode.CANONICAL):
raise ValueError(
"JSONOptions.json_mode must be one of LEGACY, RELAXED, "
"or CANONICAL from JSONMode."
)
self.json_mode = json_mode
if self.json_mode == JSONMode.RELAXED:
if strict_number_long:
raise ValueError("Cannot specify strict_number_long=True with JSONMode.RELAXED")
if datetime_representation not in (None, DatetimeRepresentation.ISO8601):
raise ValueError(
"datetime_representation must be DatetimeRepresentation."
"ISO8601 or omitted with JSONMode.RELAXED"
)
if strict_uuid not in (None, True):
raise ValueError("Cannot specify strict_uuid=False with JSONMode.RELAXED")
self.strict_number_long = False
self.datetime_representation = DatetimeRepresentation.ISO8601
self.strict_uuid = True
elif self.json_mode == JSONMode.CANONICAL:
if strict_number_long not in (None, True):
raise ValueError("Cannot specify strict_number_long=False with JSONMode.RELAXED")
if datetime_representation not in (None, DatetimeRepresentation.NUMBERLONG):
raise ValueError(
"datetime_representation must be DatetimeRepresentation."
"NUMBERLONG or omitted with JSONMode.RELAXED"
)
if strict_uuid not in (None, True):
raise ValueError("Cannot specify strict_uuid=False with JSONMode.RELAXED")
self.strict_number_long = True
self.datetime_representation = DatetimeRepresentation.NUMBERLONG
self.strict_uuid = True
else: # JSONMode.LEGACY
self.strict_number_long = False
self.datetime_representation = DatetimeRepresentation.LEGACY
self.strict_uuid = False
if strict_number_long is not None:
self.strict_number_long = strict_number_long
if datetime_representation is not None:
self.datetime_representation = datetime_representation
if strict_uuid is not None:
self.strict_uuid = strict_uuid
return self
def _arguments_repr(self) -> str:
return (
"strict_number_long={!r}, "
"datetime_representation={!r}, "
"strict_uuid={!r}, json_mode={!r}, {}".format(
self.strict_number_long,
self.datetime_representation,
self.strict_uuid,
self.json_mode,
super()._arguments_repr(),
)
)
def _options_dict(self) -> Dict[Any, Any]:
# TODO: PYTHON-2442 use _asdict() instead
options_dict = super()._options_dict()
options_dict.update(
{
"strict_number_long": self.strict_number_long,
"datetime_representation": self.datetime_representation,
"strict_uuid": self.strict_uuid,
"json_mode": self.json_mode,
}
)
return options_dict
def with_options(self, **kwargs: Any) -> "JSONOptions":
"""
Make a copy of this JSONOptions, overriding some options::
>>> from bson.json_util import CANONICAL_JSON_OPTIONS
>>> CANONICAL_JSON_OPTIONS.tz_aware
True
>>> json_options = CANONICAL_JSON_OPTIONS.with_options(tz_aware=False, tzinfo=None)
>>> json_options.tz_aware
False
.. versionadded:: 3.12
"""
opts = self._options_dict()
for opt in ("strict_number_long", "datetime_representation", "strict_uuid", "json_mode"):
opts[opt] = kwargs.get(opt, getattr(self, opt))
opts.update(kwargs)
return JSONOptions(**opts)
LEGACY_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.LEGACY)
""":class:`JSONOptions` for encoding to PyMongo's legacy JSON format.
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.LEGACY`.
.. versionadded:: 3.5
"""
CANONICAL_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.CANONICAL)
""":class:`JSONOptions` for Canonical Extended JSON.
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.CANONICAL`.
.. versionadded:: 3.5
"""
RELAXED_JSON_OPTIONS: JSONOptions = JSONOptions(json_mode=JSONMode.RELAXED)
""":class:`JSONOptions` for Relaxed Extended JSON.
.. seealso:: The documentation for :const:`bson.json_util.JSONMode.RELAXED`.
.. versionadded:: 3.5
"""
DEFAULT_JSON_OPTIONS: JSONOptions = RELAXED_JSON_OPTIONS
"""The default :class:`JSONOptions` for JSON encoding/decoding.
The same as :const:`RELAXED_JSON_OPTIONS`.
.. versionchanged:: 4.0
Changed from :const:`LEGACY_JSON_OPTIONS` to
:const:`RELAXED_JSON_OPTIONS`.
.. versionadded:: 3.4
"""
def dumps(obj: Any, *args: Any, **kwargs: Any) -> str:
"""Helper function that wraps :func:`json.dumps`.
Recursive function that handles all BSON types including
:class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
:Parameters:
- `json_options`: A :class:`JSONOptions` instance used to modify the
encoding of MongoDB Extended JSON types. Defaults to
:const:`DEFAULT_JSON_OPTIONS`.
.. versionchanged:: 4.0
Now outputs MongoDB Relaxed Extended JSON by default (using
:const:`DEFAULT_JSON_OPTIONS`).
.. versionchanged:: 3.4
Accepts optional parameter `json_options`. See :class:`JSONOptions`.
"""
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
return json.dumps(_json_convert(obj, json_options), *args, **kwargs)
def loads(s: Union[str, bytes, bytearray], *args: Any, **kwargs: Any) -> Any:
"""Helper function that wraps :func:`json.loads`.
Automatically passes the object_hook for BSON type conversion.
Raises ``TypeError``, ``ValueError``, ``KeyError``, or
:exc:`~bson.errors.InvalidId` on invalid MongoDB Extended JSON.
:Parameters:
- `json_options`: A :class:`JSONOptions` instance used to modify the
decoding of MongoDB Extended JSON types. Defaults to
:const:`DEFAULT_JSON_OPTIONS`.
.. versionchanged:: 4.0
Now loads :class:`datetime.datetime` instances as naive by default. To
load timezone aware instances utilize the `json_options` parameter.
See :ref:`tz_aware_default_change` for an example.
.. versionchanged:: 3.5
Parses Relaxed and Canonical Extended JSON as well as PyMongo's legacy
format. Now raises ``TypeError`` or ``ValueError`` when parsing JSON
type wrappers with values of the wrong type or any extra keys.
.. versionchanged:: 3.4
Accepts optional parameter `json_options`. See :class:`JSONOptions`.
"""
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs, json_options)
return json.loads(s, *args, **kwargs)
def _json_convert(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any:
"""Recursive helper method that converts BSON types so they can be
converted into json.
"""
if hasattr(obj, "items"):
return SON(((k, _json_convert(v, json_options)) for k, v in obj.items()))
elif hasattr(obj, "__iter__") and not isinstance(obj, (str, bytes)):
return [_json_convert(v, json_options) for v in obj]
try:
return default(obj, json_options)
except TypeError:
return obj
def object_pairs_hook(
pairs: Sequence[Tuple[str, Any]], json_options: JSONOptions = DEFAULT_JSON_OPTIONS
) -> Any:
return object_hook(json_options.document_class(pairs), json_options)
def object_hook(dct: Mapping[str, Any], json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any:
if "$oid" in dct:
return _parse_canonical_oid(dct)
if (
isinstance(dct.get("$ref"), str)
and "$id" in dct
and isinstance(dct.get("$db"), (str, type(None)))
):
return _parse_canonical_dbref(dct)
if "$date" in dct:
return _parse_canonical_datetime(dct, json_options)
if "$regex" in dct:
return _parse_legacy_regex(dct)
if "$minKey" in dct:
return _parse_canonical_minkey(dct)
if "$maxKey" in dct:
return _parse_canonical_maxkey(dct)
if "$binary" in dct:
if "$type" in dct:
return _parse_legacy_binary(dct, json_options)
else:
return _parse_canonical_binary(dct, json_options)
if "$code" in dct:
return _parse_canonical_code(dct)
if "$uuid" in dct:
return _parse_legacy_uuid(dct, json_options)
if "$undefined" in dct:
return None
if "$numberLong" in dct:
return _parse_canonical_int64(dct)
if "$timestamp" in dct:
tsp = dct["$timestamp"]
return Timestamp(tsp["t"], tsp["i"])
if "$numberDecimal" in dct:
return _parse_canonical_decimal128(dct)
if "$dbPointer" in dct:
return _parse_canonical_dbpointer(dct)
if "$regularExpression" in dct:
return _parse_canonical_regex(dct)
if "$symbol" in dct:
return _parse_canonical_symbol(dct)
if "$numberInt" in dct:
return _parse_canonical_int32(dct)
if "$numberDouble" in dct:
return _parse_canonical_double(dct)
return dct
def _parse_legacy_regex(doc: Any) -> Any:
pattern = doc["$regex"]
# Check if this is the $regex query operator.
if not isinstance(pattern, (str, bytes)):
return doc
flags = 0
# PyMongo always adds $options but some other tools may not.
for opt in doc.get("$options", ""):
flags |= _RE_OPT_TABLE.get(opt, 0)
return Regex(pattern, flags)
def _parse_legacy_uuid(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]:
"""Decode a JSON legacy $uuid to Python UUID."""
if len(doc) != 1:
raise TypeError(f"Bad $uuid, extra field(s): {doc}")
if not isinstance(doc["$uuid"], str):
raise TypeError(f"$uuid must be a string: {doc}")
if json_options.uuid_representation == UuidRepresentation.UNSPECIFIED:
return Binary.from_uuid(uuid.UUID(doc["$uuid"]))
else:
return uuid.UUID(doc["$uuid"])
def _binary_or_uuid(data: Any, subtype: int, json_options: JSONOptions) -> Union[Binary, uuid.UUID]:
# special handling for UUID
if subtype in ALL_UUID_SUBTYPES:
uuid_representation = json_options.uuid_representation
binary_value = Binary(data, subtype)
if uuid_representation == UuidRepresentation.UNSPECIFIED:
return binary_value
if subtype == UUID_SUBTYPE:
# Legacy behavior: use STANDARD with binary subtype 4.
uuid_representation = UuidRepresentation.STANDARD
elif uuid_representation == UuidRepresentation.STANDARD:
# subtype == OLD_UUID_SUBTYPE
# Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
uuid_representation = UuidRepresentation.PYTHON_LEGACY
return binary_value.as_uuid(uuid_representation)
if subtype == 0:
return cast(uuid.UUID, data)
return Binary(data, subtype)
def _parse_legacy_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]:
if isinstance(doc["$type"], int):
doc["$type"] = "%02x" % doc["$type"]
subtype = int(doc["$type"], 16)
if subtype >= 0xFFFFFF80: # Handle mongoexport values
subtype = int(doc["$type"][6:], 16)
data = base64.b64decode(doc["$binary"].encode())
return _binary_or_uuid(data, subtype, json_options)
def _parse_canonical_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]:
binary = doc["$binary"]
b64 = binary["base64"]
subtype = binary["subType"]
if not isinstance(b64, str):
raise TypeError(f"$binary base64 must be a string: {doc}")
if not isinstance(subtype, str) or len(subtype) > 2:
raise TypeError(f"$binary subType must be a string at most 2 characters: {doc}")
if len(binary) != 2:
raise TypeError(f'$binary must include only "base64" and "subType" components: {doc}')
data = base64.b64decode(b64.encode())
return _binary_or_uuid(data, int(subtype, 16), json_options)
def _parse_canonical_datetime(
doc: Any, json_options: JSONOptions
) -> Union[datetime.datetime, DatetimeMS]:
"""Decode a JSON datetime to python datetime.datetime."""
dtm = doc["$date"]
if len(doc) != 1:
raise TypeError(f"Bad $date, extra field(s): {doc}")
# mongoexport 2.6 and newer
if isinstance(dtm, str):
# Parse offset
if dtm[-1] == "Z":
dt = dtm[:-1]
offset = "Z"
elif dtm[-6] in ("+", "-") and dtm[-3] == ":":
# (+|-)HH:MM
dt = dtm[:-6]
offset = dtm[-6:]
elif dtm[-5] in ("+", "-"):
# (+|-)HHMM
dt = dtm[:-5]
offset = dtm[-5:]
elif dtm[-3] in ("+", "-"):
# (+|-)HH
dt = dtm[:-3]
offset = dtm[-3:]
else:
dt = dtm
offset = ""
# Parse the optional factional seconds portion.
dot_index = dt.rfind(".")
microsecond = 0
if dot_index != -1:
microsecond = int(float(dt[dot_index:]) * 1000000)
dt = dt[:dot_index]
aware = datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S").replace(
microsecond=microsecond, tzinfo=utc
)
if offset and offset != "Z":
if len(offset) == 6:
hours, minutes = offset[1:].split(":")
secs = int(hours) * 3600 + int(minutes) * 60
elif len(offset) == 5:
secs = int(offset[1:3]) * 3600 + int(offset[3:]) * 60
elif len(offset) == 3:
secs = int(offset[1:3]) * 3600
if offset[0] == "-":
secs *= -1
aware = aware - datetime.timedelta(seconds=secs)
if json_options.tz_aware:
if json_options.tzinfo:
aware = aware.astimezone(json_options.tzinfo)
if json_options.datetime_conversion == DatetimeConversion.DATETIME_MS:
return DatetimeMS(aware)
return aware
else:
aware_tzinfo_none = aware.replace(tzinfo=None)
if json_options.datetime_conversion == DatetimeConversion.DATETIME_MS:
return DatetimeMS(aware_tzinfo_none)
return aware_tzinfo_none
return _millis_to_datetime(int(dtm), json_options)
def _parse_canonical_oid(doc: Any) -> ObjectId:
"""Decode a JSON ObjectId to bson.objectid.ObjectId."""
if len(doc) != 1:
raise TypeError(f"Bad $oid, extra field(s): {doc}")
return ObjectId(doc["$oid"])
def _parse_canonical_symbol(doc: Any) -> str:
"""Decode a JSON symbol to Python string."""
symbol = doc["$symbol"]
if len(doc) != 1:
raise TypeError(f"Bad $symbol, extra field(s): {doc}")
return str(symbol)
def _parse_canonical_code(doc: Any) -> Code:
"""Decode a JSON code to bson.code.Code."""
for key in doc:
if key not in ("$code", "$scope"):
raise TypeError(f"Bad $code, extra field(s): {doc}")
return Code(doc["$code"], scope=doc.get("$scope"))
def _parse_canonical_regex(doc: Any) -> Regex:
"""Decode a JSON regex to bson.regex.Regex."""
regex = doc["$regularExpression"]
if len(doc) != 1:
raise TypeError(f"Bad $regularExpression, extra field(s): {doc}")
if len(regex) != 2:
raise TypeError(
'Bad $regularExpression must include only "pattern"'
'and "options" components: {}'.format(doc)
)
opts = regex["options"]
if not isinstance(opts, str):
raise TypeError(
"Bad $regularExpression options, options must be string, was type %s" % (type(opts))
)
return Regex(regex["pattern"], opts)
def _parse_canonical_dbref(doc: Any) -> DBRef:
"""Decode a JSON DBRef to bson.dbref.DBRef."""
return DBRef(doc.pop("$ref"), doc.pop("$id"), database=doc.pop("$db", None), **doc)
def _parse_canonical_dbpointer(doc: Any) -> Any:
"""Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef."""
dbref = doc["$dbPointer"]
if len(doc) != 1:
raise TypeError(f"Bad $dbPointer, extra field(s): {doc}")
if isinstance(dbref, DBRef):
dbref_doc = dbref.as_doc()
# DBPointer must not contain $db in its value.
if dbref.database is not None:
raise TypeError(f"Bad $dbPointer, extra field $db: {dbref_doc}")
if not isinstance(dbref.id, ObjectId):
raise TypeError(f"Bad $dbPointer, $id must be an ObjectId: {dbref_doc}")
if len(dbref_doc) != 2:
raise TypeError(f"Bad $dbPointer, extra field(s) in DBRef: {dbref_doc}")
return dbref
else:
raise TypeError(f"Bad $dbPointer, expected a DBRef: {doc}")
def _parse_canonical_int32(doc: Any) -> int:
"""Decode a JSON int32 to python int."""
i_str = doc["$numberInt"]
if len(doc) != 1:
raise TypeError(f"Bad $numberInt, extra field(s): {doc}")
if not isinstance(i_str, str):
raise TypeError(f"$numberInt must be string: {doc}")
return int(i_str)
def _parse_canonical_int64(doc: Any) -> Int64:
"""Decode a JSON int64 to bson.int64.Int64."""
l_str = doc["$numberLong"]
if len(doc) != 1:
raise TypeError(f"Bad $numberLong, extra field(s): {doc}")
return Int64(l_str)
def _parse_canonical_double(doc: Any) -> float:
"""Decode a JSON double to python float."""
d_str = doc["$numberDouble"]
if len(doc) != 1:
raise TypeError(f"Bad $numberDouble, extra field(s): {doc}")
if not isinstance(d_str, str):
raise TypeError(f"$numberDouble must be string: {doc}")
return float(d_str)
def _parse_canonical_decimal128(doc: Any) -> Decimal128:
"""Decode a JSON decimal128 to bson.decimal128.Decimal128."""
d_str = doc["$numberDecimal"]
if len(doc) != 1:
raise TypeError(f"Bad $numberDecimal, extra field(s): {doc}")
if not isinstance(d_str, str):
raise TypeError(f"$numberDecimal must be string: {doc}")
return Decimal128(d_str)
def _parse_canonical_minkey(doc: Any) -> MinKey:
"""Decode a JSON MinKey to bson.min_key.MinKey."""
if type(doc["$minKey"]) is not int or doc["$minKey"] != 1:
raise TypeError(f"$minKey value must be 1: {doc}")
if len(doc) != 1:
raise TypeError(f"Bad $minKey, extra field(s): {doc}")
return MinKey()
def _parse_canonical_maxkey(doc: Any) -> MaxKey:
"""Decode a JSON MaxKey to bson.max_key.MaxKey."""
if type(doc["$maxKey"]) is not int or doc["$maxKey"] != 1:
raise TypeError("$maxKey value must be 1: %s", (doc,))
if len(doc) != 1:
raise TypeError(f"Bad $minKey, extra field(s): {doc}")
return MaxKey()
def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any:
if json_options.json_mode == JSONMode.LEGACY:
return SON([("$binary", base64.b64encode(data).decode()), ("$type", "%02x" % subtype)])
return {
"$binary": SON([("base64", base64.b64encode(data).decode()), ("subType", "%02x" % subtype)])
}
def default(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any:
# We preserve key order when rendering SON, DBRef, etc. as JSON by
# returning a SON for those types instead of a dict.
if isinstance(obj, ObjectId):
return {"$oid": str(obj)}
if isinstance(obj, DBRef):
return _json_convert(obj.as_doc(), json_options=json_options)
if isinstance(obj, datetime.datetime):
if json_options.datetime_representation == DatetimeRepresentation.ISO8601:
if not obj.tzinfo:
obj = obj.replace(tzinfo=utc)
assert obj.tzinfo is not None
if obj >= EPOCH_AWARE:
off = obj.tzinfo.utcoffset(obj)
if (off.days, off.seconds, off.microseconds) == (0, 0, 0): # type: ignore
tz_string = "Z"
else:
tz_string = obj.strftime("%z")
millis = int(obj.microsecond / 1000)
fracsecs = ".%03d" % (millis,) if millis else ""
return {
"$date": "{}{}{}".format(obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string)
}
millis = _datetime_to_millis(obj)
if json_options.datetime_representation == DatetimeRepresentation.LEGACY:
return {"$date": millis}
return {"$date": {"$numberLong": str(millis)}}
if isinstance(obj, DatetimeMS):
if (
json_options.datetime_representation == DatetimeRepresentation.ISO8601
and 0 <= int(obj) <= _max_datetime_ms()
):
return default(obj.as_datetime(), json_options)
elif json_options.datetime_representation == DatetimeRepresentation.LEGACY:
return {"$date": str(int(obj))}
return {"$date": {"$numberLong": str(int(obj))}}
if json_options.strict_number_long and isinstance(obj, Int64):
return {"$numberLong": str(obj)}
if isinstance(obj, (RE_TYPE, Regex)):
flags = ""
if obj.flags & re.IGNORECASE:
flags += "i"
if obj.flags & re.LOCALE:
flags += "l"
if obj.flags & re.MULTILINE:
flags += "m"
if obj.flags & re.DOTALL:
flags += "s"
if obj.flags & re.UNICODE:
flags += "u"
if obj.flags & re.VERBOSE:
flags += "x"
if isinstance(obj.pattern, str):
pattern = obj.pattern
else:
pattern = obj.pattern.decode("utf-8")
if json_options.json_mode == JSONMode.LEGACY:
return SON([("$regex", pattern), ("$options", flags)])
return {"$regularExpression": SON([("pattern", pattern), ("options", flags)])}
if isinstance(obj, MinKey):
return {"$minKey": 1}
if isinstance(obj, MaxKey):
return {"$maxKey": 1}
if isinstance(obj, Timestamp):
return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])}
if isinstance(obj, Code):
if obj.scope is None:
return {"$code": str(obj)}
return SON([("$code", str(obj)), ("$scope", _json_convert(obj.scope, json_options))])
if isinstance(obj, Binary):
return _encode_binary(obj, obj.subtype, json_options)
if isinstance(obj, bytes):
return _encode_binary(obj, 0, json_options)
if isinstance(obj, uuid.UUID):
if json_options.strict_uuid:
binval = Binary.from_uuid(obj, uuid_representation=json_options.uuid_representation)
return _encode_binary(binval, binval.subtype, json_options)
else:
return {"$uuid": obj.hex}
if isinstance(obj, Decimal128):
return {"$numberDecimal": str(obj)}
if isinstance(obj, bool):
return obj
if json_options.json_mode == JSONMode.CANONICAL and isinstance(obj, int):
if -(2**31) <= obj < 2**31:
return {"$numberInt": str(obj)}
return {"$numberLong": str(obj)}
if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float):
if math.isnan(obj):
return {"$numberDouble": "NaN"}
elif math.isinf(obj):
representation = "Infinity" if obj > 0 else "-Infinity"
return {"$numberDouble": representation}
elif json_options.json_mode == JSONMode.CANONICAL:
# repr() will return the shortest string guaranteed to produce the
# original value, when float() is called on it.
return {"$numberDouble": str(repr(obj))}
raise TypeError("%r is not JSON serializable" % obj)