impuls/lib/python3.11/site-packages/bson/_cbsonmodule.c

3129 lines
99 KiB
C

/*
* Copyright 2009-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This file contains C implementations of some of the functions
* needed by the bson module. If possible, these implementations
* should be used to speed up BSON encoding and decoding.
*/
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "datetime.h"
#include "buffer.h"
#include "time64.h"
#define _CBSON_MODULE
#include "_cbsonmodule.h"
/* New module state and initialization code.
* See the module-initialization-and-state
* section in the following doc:
* http://docs.python.org/release/3.1.3/howto/cporting.html
* which references the following pep:
* http://www.python.org/dev/peps/pep-3121/
* */
struct module_state {
PyObject* Binary;
PyObject* Code;
PyObject* ObjectId;
PyObject* DBRef;
PyObject* Regex;
PyObject* UUID;
PyObject* Timestamp;
PyObject* MinKey;
PyObject* MaxKey;
PyObject* UTC;
PyTypeObject* REType;
PyObject* BSONInt64;
PyObject* Decimal128;
PyObject* Mapping;
PyObject* DatetimeMS;
PyObject* _min_datetime_ms;
PyObject* _max_datetime_ms;
PyObject* _type_marker_str;
};
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
/* Maximum number of regex flags */
#define FLAGS_SIZE 7
/* Default UUID representation type code. */
#define PYTHON_LEGACY 3
/* Other UUID representations. */
#define STANDARD 4
#define JAVA_LEGACY 5
#define CSHARP_LEGACY 6
#define UNSPECIFIED 0
#define BSON_MAX_SIZE 2147483647
/* The smallest possible BSON document, i.e. "{}" */
#define BSON_MIN_SIZE 5
/* Datetime codec options */
#define DATETIME 1
#define DATETIME_CLAMP 2
#define DATETIME_MS 3
#define DATETIME_AUTO 4
/* Converts integer to its string representation in decimal notation. */
extern int cbson_long_long_to_str(long long num, char* str, size_t size) {
// Buffer should fit 64-bit signed integer
if (size < 21) {
PyErr_Format(
PyExc_RuntimeError,
"Buffer too small to hold long long: %d < 21", size);
return -1;
}
int index = 0;
int sign = 1;
// Convert to unsigned to handle -LLONG_MIN overflow
unsigned long long absNum;
// Handle the case of 0
if (num == 0) {
str[index++] = '0';
str[index] = '\0';
return 0;
}
// Handle negative numbers
if (num < 0) {
sign = -1;
absNum = 0ULL - (unsigned long long)num;
} else {
absNum = (unsigned long long)num;
}
// Convert the number to string
unsigned long long digit;
while (absNum > 0) {
digit = absNum % 10ULL;
str[index++] = (char)digit + '0'; // Convert digit to character
absNum /= 10;
}
// Add minus sign if negative
if (sign == -1) {
str[index++] = '-';
}
str[index] = '\0'; // Null terminator
// Reverse the string
int start = 0;
int end = index - 1;
while (start < end) {
char temp = str[start];
str[start++] = str[end];
str[end--] = temp;
}
return 0;
}
static PyObject* _test_long_long_to_str(PyObject* self, PyObject* args) {
// Test extreme values
Py_ssize_t maxNum = PY_SSIZE_T_MAX;
Py_ssize_t minNum = PY_SSIZE_T_MIN;
Py_ssize_t num;
char str_1[BUF_SIZE];
char str_2[BUF_SIZE];
int res = LL2STR(str_1, (long long)minNum);
if (res == -1) {
return NULL;
}
INT2STRING(str_2, (long long)minNum);
if (strcmp(str_1, str_2) != 0) {
PyErr_Format(
PyExc_RuntimeError,
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
return NULL;
}
LL2STR(str_1, (long long)maxNum);
INT2STRING(str_2, (long long)maxNum);
if (strcmp(str_1, str_2) != 0) {
PyErr_Format(
PyExc_RuntimeError,
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
return NULL;
}
// Test common values
for (num = 0; num < 10000; num++) {
char str_1[BUF_SIZE];
char str_2[BUF_SIZE];
LL2STR(str_1, (long long)num);
INT2STRING(str_2, (long long)num);
if (strcmp(str_1, str_2) != 0) {
PyErr_Format(
PyExc_RuntimeError,
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
return NULL;
}
}
return args;
}
/* Get an error class from the bson.errors module.
*
* Returns a new ref */
static PyObject* _error(char* name) {
PyObject* error;
PyObject* errors = PyImport_ImportModule("bson.errors");
if (!errors) {
return NULL;
}
error = PyObject_GetAttrString(errors, name);
Py_DECREF(errors);
return error;
}
/* Safely downcast from Py_ssize_t to int, setting an
* exception and returning -1 on error. */
static int
_downcast_and_check(Py_ssize_t size, uint8_t extra) {
if (size > BSON_MAX_SIZE || ((BSON_MAX_SIZE - extra) < size)) {
PyObject* InvalidStringData = _error("InvalidStringData");
if (InvalidStringData) {
PyErr_SetString(InvalidStringData,
"String length must be <= 2147483647");
Py_DECREF(InvalidStringData);
}
return -1;
}
return (int)size + extra;
}
static PyObject* elements_to_dict(PyObject* self, const char* string,
unsigned max,
const codec_options_t* options);
static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
int type_byte, PyObject* value,
unsigned char check_keys,
const codec_options_t* options,
unsigned char in_custom_call,
unsigned char in_fallback_call);
/* Write a RawBSONDocument to the buffer.
* Returns the number of bytes written or 0 on failure.
*/
static int write_raw_doc(buffer_t buffer, PyObject* raw);
/* Date stuff */
static PyObject* datetime_from_millis(long long millis) {
/* To encode a datetime instance like datetime(9999, 12, 31, 23, 59, 59, 999999)
* we follow these steps:
* 1. Calculate a timestamp in seconds: 253402300799
* 2. Multiply that by 1000: 253402300799000
* 3. Add in microseconds divided by 1000 253402300799999
*
* (Note: BSON doesn't support microsecond accuracy, hence the rounding.)
*
* To decode we could do:
* 1. Get seconds: timestamp / 1000: 253402300799
* 2. Get micros: (timestamp % 1000) * 1000: 999000
* Resulting in datetime(9999, 12, 31, 23, 59, 59, 999000) -- the expected result
*
* Now what if the we encode (1, 1, 1, 1, 1, 1, 111111)?
* 1. and 2. gives: -62135593139000
* 3. Gives us: -62135593138889
*
* Now decode:
* 1. Gives us: -62135593138
* 2. Gives us: -889000
* Resulting in datetime(1, 1, 1, 1, 1, 2, 15888216) -- an invalid result
*
* If instead to decode we do:
* diff = ((millis % 1000) + 1000) % 1000: 111
* seconds = (millis - diff) / 1000: -62135593139
* micros = diff * 1000 111000
* Resulting in datetime(1, 1, 1, 1, 1, 1, 111000) -- the expected result
*/
int diff = (int)(((millis % 1000) + 1000) % 1000);
int microseconds = diff * 1000;
Time64_T seconds = (millis - diff) / 1000;
struct TM timeinfo;
cbson_gmtime64_r(&seconds, &timeinfo);
return PyDateTime_FromDateAndTime(timeinfo.tm_year + 1900,
timeinfo.tm_mon + 1,
timeinfo.tm_mday,
timeinfo.tm_hour,
timeinfo.tm_min,
timeinfo.tm_sec,
microseconds);
}
static long long millis_from_datetime(PyObject* datetime) {
struct TM timeinfo;
long long millis;
timeinfo.tm_year = PyDateTime_GET_YEAR(datetime) - 1900;
timeinfo.tm_mon = PyDateTime_GET_MONTH(datetime) - 1;
timeinfo.tm_mday = PyDateTime_GET_DAY(datetime);
timeinfo.tm_hour = PyDateTime_DATE_GET_HOUR(datetime);
timeinfo.tm_min = PyDateTime_DATE_GET_MINUTE(datetime);
timeinfo.tm_sec = PyDateTime_DATE_GET_SECOND(datetime);
millis = cbson_timegm64(&timeinfo) * 1000;
millis += PyDateTime_DATE_GET_MICROSECOND(datetime) / 1000;
return millis;
}
/* Extended-range datetime, returns a DatetimeMS object with millis */
static PyObject* datetime_ms_from_millis(PyObject* self, long long millis){
// Allocate a new DatetimeMS object.
struct module_state *state = GETSTATE(self);
PyObject* dt;
PyObject* ll_millis;
if (!(ll_millis = PyLong_FromLongLong(millis))){
return NULL;
}
dt = PyObject_CallFunctionObjArgs(state->DatetimeMS, ll_millis, NULL);
Py_DECREF(ll_millis);
return dt;
}
/* Extended-range datetime, takes a DatetimeMS object and extracts the long long value. */
static int millis_from_datetime_ms(PyObject* dt, long long* out){
PyObject* ll_millis;
long long millis;
if (!(ll_millis = PyNumber_Long(dt))){
return 0;
}
millis = PyLong_AsLongLong(ll_millis);
Py_DECREF(ll_millis);
if (millis == -1 && PyErr_Occurred()) { /* Overflow */
PyErr_SetString(PyExc_OverflowError,
"MongoDB datetimes can only handle up to 8-byte ints");
return 0;
}
*out = millis;
return 1;
}
/* Just make this compatible w/ the old API. */
int buffer_write_bytes(buffer_t buffer, const char* data, int size) {
if (pymongo_buffer_write(buffer, data, size)) {
return 0;
}
return 1;
}
int buffer_write_double(buffer_t buffer, double data) {
double data_le = BSON_DOUBLE_TO_LE(data);
return buffer_write_bytes(buffer, (const char*)&data_le, 8);
}
int buffer_write_int32(buffer_t buffer, int32_t data) {
uint32_t data_le = BSON_UINT32_TO_LE(data);
return buffer_write_bytes(buffer, (const char*)&data_le, 4);
}
int buffer_write_int64(buffer_t buffer, int64_t data) {
uint64_t data_le = BSON_UINT64_TO_LE(data);
return buffer_write_bytes(buffer, (const char*)&data_le, 8);
}
void buffer_write_int32_at_position(buffer_t buffer,
int position,
int32_t data) {
uint32_t data_le = BSON_UINT32_TO_LE(data);
memcpy(pymongo_buffer_get_buffer(buffer) + position, &data_le, 4);
}
static int write_unicode(buffer_t buffer, PyObject* py_string) {
int size;
const char* data;
PyObject* encoded = PyUnicode_AsUTF8String(py_string);
if (!encoded) {
return 0;
}
data = PyBytes_AS_STRING(encoded);
if (!data)
goto unicodefail;
if ((size = _downcast_and_check(PyBytes_GET_SIZE(encoded), 1)) == -1)
goto unicodefail;
if (!buffer_write_int32(buffer, (int32_t)size))
goto unicodefail;
if (!buffer_write_bytes(buffer, data, size))
goto unicodefail;
Py_DECREF(encoded);
return 1;
unicodefail:
Py_DECREF(encoded);
return 0;
}
/* returns 0 on failure */
static int write_string(buffer_t buffer, PyObject* py_string) {
int size;
const char* data;
if (PyUnicode_Check(py_string)){
return write_unicode(buffer, py_string);
}
data = PyBytes_AsString(py_string);
if (!data) {
return 0;
}
if ((size = _downcast_and_check(PyBytes_Size(py_string), 1)) == -1)
return 0;
if (!buffer_write_int32(buffer, (int32_t)size)) {
return 0;
}
if (!buffer_write_bytes(buffer, data, size)) {
return 0;
}
return 1;
}
/*
* Are we in the main interpreter or a sub-interpreter?
* Useful for deciding if we can use cached pure python
* types in mod_wsgi.
*/
static int
_in_main_interpreter(void) {
static PyInterpreterState* main_interpreter = NULL;
PyInterpreterState* interpreter;
if (main_interpreter == NULL) {
interpreter = PyInterpreterState_Head();
while (PyInterpreterState_Next(interpreter))
interpreter = PyInterpreterState_Next(interpreter);
main_interpreter = interpreter;
}
return (main_interpreter == PyThreadState_Get()->interp);
}
/*
* Get a reference to a pure python type. If we are in the
* main interpreter return the cached object, otherwise import
* the object we need and return it instead.
*/
static PyObject*
_get_object(PyObject* object, char* module_name, char* object_name) {
if (_in_main_interpreter()) {
Py_XINCREF(object);
return object;
} else {
PyObject* imported = NULL;
PyObject* module = PyImport_ImportModule(module_name);
if (!module)
return NULL;
imported = PyObject_GetAttrString(module, object_name);
Py_DECREF(module);
return imported;
}
}
/* Load a Python object to cache.
*
* Returns non-zero on failure. */
static int _load_object(PyObject** object, char* module_name, char* object_name) {
PyObject* module;
module = PyImport_ImportModule(module_name);
if (!module) {
return 1;
}
*object = PyObject_GetAttrString(module, object_name);
Py_DECREF(module);
return (*object) ? 0 : 2;
}
/* Load all Python objects to cache.
*
* Returns non-zero on failure. */
static int _load_python_objects(PyObject* module) {
PyObject* empty_string = NULL;
PyObject* re_compile = NULL;
PyObject* compiled = NULL;
struct module_state *state = GETSTATE(module);
/* Python str for faster _type_marker check */
state->_type_marker_str = PyUnicode_FromString("_type_marker");
if (_load_object(&state->Binary, "bson.binary", "Binary") ||
_load_object(&state->Code, "bson.code", "Code") ||
_load_object(&state->ObjectId, "bson.objectid", "ObjectId") ||
_load_object(&state->DBRef, "bson.dbref", "DBRef") ||
_load_object(&state->Timestamp, "bson.timestamp", "Timestamp") ||
_load_object(&state->MinKey, "bson.min_key", "MinKey") ||
_load_object(&state->MaxKey, "bson.max_key", "MaxKey") ||
_load_object(&state->UTC, "bson.tz_util", "utc") ||
_load_object(&state->Regex, "bson.regex", "Regex") ||
_load_object(&state->BSONInt64, "bson.int64", "Int64") ||
_load_object(&state->Decimal128, "bson.decimal128", "Decimal128") ||
_load_object(&state->UUID, "uuid", "UUID") ||
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
_load_object(&state->DatetimeMS, "bson.datetime_ms", "DatetimeMS") ||
_load_object(&state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms") ||
_load_object(&state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms")) {
return 1;
}
/* Reload our REType hack too. */
empty_string = PyBytes_FromString("");
if (empty_string == NULL) {
state->REType = NULL;
return 1;
}
if (_load_object(&re_compile, "re", "compile")) {
state->REType = NULL;
Py_DECREF(empty_string);
return 1;
}
compiled = PyObject_CallFunction(re_compile, "O", empty_string);
Py_DECREF(re_compile);
if (compiled == NULL) {
state->REType = NULL;
Py_DECREF(empty_string);
return 1;
}
Py_INCREF(Py_TYPE(compiled));
state->REType = Py_TYPE(compiled);
Py_DECREF(empty_string);
Py_DECREF(compiled);
return 0;
}
/*
* Get the _type_marker from an Object.
*
* Return the type marker, 0 if there is no marker, or -1 on failure.
*/
static long _type_marker(PyObject* object, PyObject* _type_marker_str) {
PyObject* type_marker = NULL;
long type = 0;
if (PyObject_HasAttr(object, _type_marker_str)) {
type_marker = PyObject_GetAttr(object, _type_marker_str);
if (type_marker == NULL) {
return -1;
}
}
/*
* Python objects with broken __getattr__ implementations could return
* arbitrary types for a call to PyObject_GetAttrString. For example
* pymongo.database.Database returns a new Collection instance for
* __getattr__ calls with names that don't match an existing attribute
* or method. In some cases "value" could be a subtype of something
* we know how to serialize. Make a best effort to encode these types.
*/
if (type_marker && PyLong_CheckExact(type_marker)) {
type = PyLong_AsLong(type_marker);
Py_DECREF(type_marker);
} else {
Py_XDECREF(type_marker);
}
return type;
}
/* Fill out a type_registry_t* from a TypeRegistry object.
*
* Return 1 on success. options->document_class is a new reference.
* Return 0 on failure.
*/
int cbson_convert_type_registry(PyObject* registry_obj, type_registry_t* registry) {
registry->encoder_map = NULL;
registry->decoder_map = NULL;
registry->fallback_encoder = NULL;
registry->registry_obj = NULL;
registry->encoder_map = PyObject_GetAttrString(registry_obj, "_encoder_map");
if (registry->encoder_map == NULL) {
goto fail;
}
registry->is_encoder_empty = (PyDict_Size(registry->encoder_map) == 0);
registry->decoder_map = PyObject_GetAttrString(registry_obj, "_decoder_map");
if (registry->decoder_map == NULL) {
goto fail;
}
registry->is_decoder_empty = (PyDict_Size(registry->decoder_map) == 0);
registry->fallback_encoder = PyObject_GetAttrString(registry_obj, "_fallback_encoder");
if (registry->fallback_encoder == NULL) {
goto fail;
}
registry->has_fallback_encoder = (registry->fallback_encoder != Py_None);
registry->registry_obj = registry_obj;
Py_INCREF(registry->registry_obj);
return 1;
fail:
Py_XDECREF(registry->encoder_map);
Py_XDECREF(registry->decoder_map);
Py_XDECREF(registry->fallback_encoder);
return 0;
}
/* Fill out a codec_options_t* from a CodecOptions object.
*
* Return 1 on success. options->document_class is a new reference.
* Return 0 on failure.
*/
int convert_codec_options(PyObject* self, PyObject* options_obj, codec_options_t* options) {
PyObject* type_registry_obj = NULL;
long type_marker;
options->unicode_decode_error_handler = NULL;
if (!PyArg_ParseTuple(options_obj, "ObbzOOb",
&options->document_class,
&options->tz_aware,
&options->uuid_rep,
&options->unicode_decode_error_handler,
&options->tzinfo,
&type_registry_obj,
&options->datetime_conversion)) {
return 0;
}
type_marker = _type_marker(options->document_class,
GETSTATE(self)->_type_marker_str);
if (type_marker < 0) {
return 0;
}
if (!cbson_convert_type_registry(type_registry_obj,
&options->type_registry)) {
return 0;
}
options->is_raw_bson = (101 == type_marker);
options->options_obj = options_obj;
Py_INCREF(options->options_obj);
Py_INCREF(options->document_class);
Py_INCREF(options->tzinfo);
return 1;
}
void destroy_codec_options(codec_options_t* options) {
Py_CLEAR(options->document_class);
Py_CLEAR(options->tzinfo);
Py_CLEAR(options->options_obj);
Py_CLEAR(options->type_registry.registry_obj);
Py_CLEAR(options->type_registry.encoder_map);
Py_CLEAR(options->type_registry.decoder_map);
Py_CLEAR(options->type_registry.fallback_encoder);
}
static int write_element_to_buffer(PyObject* self, buffer_t buffer,
int type_byte, PyObject* value,
unsigned char check_keys,
const codec_options_t* options,
unsigned char in_custom_call,
unsigned char in_fallback_call) {
int result = 0;
if(Py_EnterRecursiveCall(" while encoding an object to BSON ")) {
return 0;
}
result = _write_element_to_buffer(self, buffer, type_byte,
value, check_keys, options,
in_custom_call, in_fallback_call);
Py_LeaveRecursiveCall();
return result;
}
static void
_set_cannot_encode(PyObject* value) {
PyObject* type = NULL;
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument == NULL) {
goto error;
}
type = PyObject_Type(value);
if (type == NULL) {
goto error;
}
PyErr_Format(InvalidDocument, "cannot encode object: %R, of type: %R",
value, type);
error:
Py_XDECREF(type);
Py_XDECREF(InvalidDocument);
}
/*
* Encode a builtin Python regular expression or our custom Regex class.
*
* Sets exception and returns 0 on failure.
*/
static int _write_regex_to_buffer(
buffer_t buffer, int type_byte, PyObject* value) {
PyObject* py_flags;
PyObject* py_pattern;
PyObject* encoded_pattern;
PyObject* decoded_pattern;
long int_flags;
char flags[FLAGS_SIZE];
char check_utf8 = 0;
const char* pattern_data;
int pattern_length, flags_length;
/*
* Both the builtin re type and our Regex class have attributes
* "flags" and "pattern".
*/
py_flags = PyObject_GetAttrString(value, "flags");
if (!py_flags) {
return 0;
}
int_flags = PyLong_AsLong(py_flags);
Py_DECREF(py_flags);
if (int_flags == -1 && PyErr_Occurred()) {
return 0;
}
py_pattern = PyObject_GetAttrString(value, "pattern");
if (!py_pattern) {
return 0;
}
if (PyUnicode_Check(py_pattern)) {
encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
Py_DECREF(py_pattern);
if (!encoded_pattern) {
return 0;
}
} else {
encoded_pattern = py_pattern;
check_utf8 = 1;
}
if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
Py_DECREF(encoded_pattern);
return 0;
}
if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
Py_DECREF(encoded_pattern);
return 0;
}
if (strlen(pattern_data) != (size_t) pattern_length){
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyErr_SetString(InvalidDocument,
"regex patterns must not contain the NULL byte");
Py_DECREF(InvalidDocument);
}
Py_DECREF(encoded_pattern);
return 0;
}
if (check_utf8) {
decoded_pattern = PyUnicode_DecodeUTF8(pattern_data, (Py_ssize_t) pattern_length, NULL);
if (decoded_pattern == NULL) {
PyErr_Clear();
PyObject* InvalidStringData = _error("InvalidStringData");
if (InvalidStringData) {
PyErr_SetString(InvalidStringData,
"regex patterns must be valid UTF-8");
Py_DECREF(InvalidStringData);
}
Py_DECREF(encoded_pattern);
return 0;
}
Py_DECREF(decoded_pattern);
}
if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
Py_DECREF(encoded_pattern);
return 0;
}
Py_DECREF(encoded_pattern);
flags[0] = 0;
if (int_flags & 2) {
STRCAT(flags, FLAGS_SIZE, "i");
}
if (int_flags & 4) {
STRCAT(flags, FLAGS_SIZE, "l");
}
if (int_flags & 8) {
STRCAT(flags, FLAGS_SIZE, "m");
}
if (int_flags & 16) {
STRCAT(flags, FLAGS_SIZE, "s");
}
if (int_flags & 32) {
STRCAT(flags, FLAGS_SIZE, "u");
}
if (int_flags & 64) {
STRCAT(flags, FLAGS_SIZE, "x");
}
flags_length = (int)strlen(flags) + 1;
if (!buffer_write_bytes(buffer, flags, flags_length)) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x0B;
return 1;
}
/* Write a single value to the buffer (also write its type_byte, for which
* space has already been reserved.
*
* returns 0 on failure */
static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
int type_byte, PyObject* value,
unsigned char check_keys,
const codec_options_t* options,
unsigned char in_custom_call,
unsigned char in_fallback_call) {
struct module_state *state = GETSTATE(self);
PyObject* mapping_type;
PyObject* new_value = NULL;
int retval;
PyObject* uuid_type;
/*
* Don't use PyObject_IsInstance for our custom types. It causes
* problems with python sub interpreters. Our custom types should
* have a _type_marker attribute, which we can switch on instead.
*/
long type = _type_marker(value, state->_type_marker_str);
if (type < 0) {
return 0;
}
switch (type) {
case 5:
{
/* Binary */
PyObject* subtype_object;
char subtype;
const char* data;
int size;
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x05;
subtype_object = PyObject_GetAttrString(value, "subtype");
if (!subtype_object) {
return 0;
}
subtype = (char)PyLong_AsLong(subtype_object);
if (subtype == -1) {
Py_DECREF(subtype_object);
return 0;
}
size = _downcast_and_check(PyBytes_Size(value), 0);
if (size == -1) {
Py_DECREF(subtype_object);
return 0;
}
Py_DECREF(subtype_object);
if (subtype == 2) {
int other_size = _downcast_and_check(PyBytes_Size(value), 4);
if (other_size == -1)
return 0;
if (!buffer_write_int32(buffer, other_size)) {
return 0;
}
if (!buffer_write_bytes(buffer, &subtype, 1)) {
return 0;
}
}
if (!buffer_write_int32(buffer, size)) {
return 0;
}
if (subtype != 2) {
if (!buffer_write_bytes(buffer, &subtype, 1)) {
return 0;
}
}
data = PyBytes_AsString(value);
if (!data) {
return 0;
}
if (!buffer_write_bytes(buffer, data, size)) {
return 0;
}
return 1;
}
case 7:
{
/* ObjectId */
const char* data;
PyObject* pystring = PyObject_GetAttrString(value, "binary");
if (!pystring) {
return 0;
}
data = PyBytes_AsString(pystring);
if (!data) {
Py_DECREF(pystring);
return 0;
}
if (!buffer_write_bytes(buffer, data, 12)) {
Py_DECREF(pystring);
return 0;
}
Py_DECREF(pystring);
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x07;
return 1;
}
case 11:
{
/* Regex */
return _write_regex_to_buffer(buffer, type_byte, value);
}
case 13:
{
/* Code */
int start_position,
length_location,
length;
PyObject* scope = PyObject_GetAttrString(value, "scope");
if (!scope) {
return 0;
}
if (scope == Py_None) {
Py_DECREF(scope);
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x0D;
return write_string(buffer, value);
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x0F;
start_position = pymongo_buffer_get_position(buffer);
/* save space for length */
length_location = pymongo_buffer_save_space(buffer, 4);
if (length_location == -1) {
Py_DECREF(scope);
return 0;
}
if (!write_string(buffer, value)) {
Py_DECREF(scope);
return 0;
}
if (!write_dict(self, buffer, scope, 0, options, 0)) {
Py_DECREF(scope);
return 0;
}
Py_DECREF(scope);
length = pymongo_buffer_get_position(buffer) - start_position;
buffer_write_int32_at_position(
buffer, length_location, (int32_t)length);
return 1;
}
case 17:
{
/* Timestamp */
PyObject* obj;
unsigned long i;
obj = PyObject_GetAttrString(value, "inc");
if (!obj) {
return 0;
}
i = PyLong_AsUnsignedLong(obj);
Py_DECREF(obj);
if (i == (unsigned long)-1 && PyErr_Occurred()) {
return 0;
}
if (!buffer_write_int32(buffer, (int32_t)i)) {
return 0;
}
obj = PyObject_GetAttrString(value, "time");
if (!obj) {
return 0;
}
i = PyLong_AsUnsignedLong(obj);
Py_DECREF(obj);
if (i == (unsigned long)-1 && PyErr_Occurred()) {
return 0;
}
if (!buffer_write_int32(buffer, (int32_t)i)) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x11;
return 1;
}
case 18:
{
/* Int64 */
const long long ll = PyLong_AsLongLong(value);
if (PyErr_Occurred()) { /* Overflow */
PyErr_SetString(PyExc_OverflowError,
"MongoDB can only handle up to 8-byte ints");
return 0;
}
if (!buffer_write_int64(buffer, (int64_t)ll)) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x12;
return 1;
}
case 19:
{
/* Decimal128 */
const char* data;
PyObject* pystring = PyObject_GetAttrString(value, "bid");
if (!pystring) {
return 0;
}
data = PyBytes_AsString(pystring);
if (!data) {
Py_DECREF(pystring);
return 0;
}
if (!buffer_write_bytes(buffer, data, 16)) {
Py_DECREF(pystring);
return 0;
}
Py_DECREF(pystring);
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x13;
return 1;
}
case 100:
{
/* DBRef */
PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL);
if (!as_doc) {
return 0;
}
if (!write_dict(self, buffer, as_doc, 0, options, 0)) {
Py_DECREF(as_doc);
return 0;
}
Py_DECREF(as_doc);
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x03;
return 1;
}
case 101:
{
/* RawBSONDocument */
if (!write_raw_doc(buffer, value)) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x03;
return 1;
}
case 255:
{
/* MinKey */
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0xFF;
return 1;
}
case 127:
{
/* MaxKey */
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x7F;
return 1;
}
}
/* No _type_marker attribute or not one of our types. */
if (PyBool_Check(value)) {
const char c = (value == Py_True) ? 0x01 : 0x00;
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x08;
return buffer_write_bytes(buffer, &c, 1);
}
else if (PyLong_Check(value)) {
const long long_value = PyLong_AsLong(value);
const int int_value = (int)long_value;
if (PyErr_Occurred() || long_value != int_value) { /* Overflow */
long long long_long_value;
PyErr_Clear();
long_long_value = PyLong_AsLongLong(value);
if (PyErr_Occurred()) { /* Overflow AGAIN */
PyErr_SetString(PyExc_OverflowError,
"MongoDB can only handle up to 8-byte ints");
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x12;
return buffer_write_int64(buffer, (int64_t)long_long_value);
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x10;
return buffer_write_int32(buffer, (int32_t)int_value);
} else if (PyFloat_Check(value)) {
const double d = PyFloat_AsDouble(value);
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x01;
return buffer_write_double(buffer, d);
} else if (value == Py_None) {
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x0A;
return 1;
} else if (PyDict_Check(value)) {
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x03;
return write_dict(self, buffer, value, check_keys, options, 0);
} else if (PyList_Check(value) || PyTuple_Check(value)) {
Py_ssize_t items, i;
int start_position,
length_location,
length;
char zero = 0;
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x04;
start_position = pymongo_buffer_get_position(buffer);
/* save space for length */
length_location = pymongo_buffer_save_space(buffer, 4);
if (length_location == -1) {
return 0;
}
if ((items = PySequence_Size(value)) > BSON_MAX_SIZE) {
PyObject* BSONError = _error("BSONError");
if (BSONError) {
PyErr_SetString(BSONError,
"Too many items to serialize.");
Py_DECREF(BSONError);
}
return 0;
}
for(i = 0; i < items; i++) {
int list_type_byte = pymongo_buffer_save_space(buffer, 1);
char name[BUF_SIZE];
PyObject* item_value;
if (list_type_byte == -1) {
return 0;
}
int res = LL2STR(name, (long long)i);
if (res == -1) {
return 0;
}
if (!buffer_write_bytes(buffer, name, (int)strlen(name) + 1)) {
return 0;
}
if (!(item_value = PySequence_GetItem(value, i)))
return 0;
if (!write_element_to_buffer(self, buffer, list_type_byte,
item_value, check_keys, options,
0, 0)) {
Py_DECREF(item_value);
return 0;
}
Py_DECREF(item_value);
}
/* write null byte and fill in length */
if (!buffer_write_bytes(buffer, &zero, 1)) {
return 0;
}
length = pymongo_buffer_get_position(buffer) - start_position;
buffer_write_int32_at_position(
buffer, length_location, (int32_t)length);
return 1;
/* Python3 special case. Store bytes as BSON binary subtype 0. */
} else if (PyBytes_Check(value)) {
char subtype = 0;
int size;
const char* data = PyBytes_AS_STRING(value);
if (!data)
return 0;
if ((size = _downcast_and_check(PyBytes_GET_SIZE(value), 0)) == -1)
return 0;
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x05;
if (!buffer_write_int32(buffer, (int32_t)size)) {
return 0;
}
if (!buffer_write_bytes(buffer, &subtype, 1)) {
return 0;
}
if (!buffer_write_bytes(buffer, data, size)) {
return 0;
}
return 1;
} else if (PyUnicode_Check(value)) {
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x02;
return write_unicode(buffer, value);
} else if (PyDateTime_Check(value)) {
long long millis;
PyObject* utcoffset = PyObject_CallMethod(value, "utcoffset", NULL);
if (utcoffset == NULL)
return 0;
if (utcoffset != Py_None) {
PyObject* result = PyNumber_Subtract(value, utcoffset);
Py_DECREF(utcoffset);
if (!result) {
return 0;
}
millis = millis_from_datetime(result);
Py_DECREF(result);
} else {
millis = millis_from_datetime(value);
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
return buffer_write_int64(buffer, (int64_t)millis);
} else if (PyObject_TypeCheck(value, (PyTypeObject *) state->DatetimeMS)) {
long long millis;
if (!millis_from_datetime_ms(value, &millis)) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
return buffer_write_int64(buffer, (int64_t)millis);
} else if (PyObject_TypeCheck(value, state->REType)) {
return _write_regex_to_buffer(buffer, type_byte, value);
}
/*
* Try Mapping and UUID last since we have to import
* them if we're in a sub-interpreter.
*/
mapping_type = _get_object(state->Mapping, "collections.abc", "Mapping");
if (mapping_type && PyObject_IsInstance(value, mapping_type)) {
Py_DECREF(mapping_type);
/* PyObject_IsInstance returns -1 on error */
if (PyErr_Occurred()) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x03;
return write_dict(self, buffer, value, check_keys, options, 0);
}
uuid_type = _get_object(state->UUID, "uuid", "UUID");
if (uuid_type && PyObject_IsInstance(value, uuid_type)) {
PyObject* binary_type = NULL;
PyObject* binary_value = NULL;
int result;
Py_DECREF(uuid_type);
/* PyObject_IsInstance returns -1 on error */
if (PyErr_Occurred()) {
return 0;
}
binary_type = _get_object(state->Binary, "bson", "Binary");
if (binary_type == NULL) {
return 0;
}
binary_value = PyObject_CallMethod(binary_type, "from_uuid", "(Oi)", value, options->uuid_rep);
if (binary_value == NULL) {
Py_DECREF(binary_type);
return 0;
}
result = _write_element_to_buffer(self, buffer,
type_byte, binary_value,
check_keys, options,
in_custom_call,
in_fallback_call);
Py_DECREF(binary_type);
Py_DECREF(binary_value);
return result;
}
Py_XDECREF(mapping_type);
Py_XDECREF(uuid_type);
/* Try a custom encoder if one is provided and we have not already
* attempted to use a type encoder. */
if (!in_custom_call && !options->type_registry.is_encoder_empty) {
PyObject* value_type = NULL;
PyObject* converter = NULL;
value_type = PyObject_Type(value);
if (value_type == NULL) {
return 0;
}
converter = PyDict_GetItem(options->type_registry.encoder_map, value_type);
Py_XDECREF(value_type);
if (converter != NULL) {
/* Transform types that have a registered converter.
* A new reference is created upon transformation. */
new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
if (new_value == NULL) {
return 0;
}
retval = write_element_to_buffer(self, buffer, type_byte, new_value,
check_keys, options, 1, 0);
Py_XDECREF(new_value);
return retval;
}
}
/* Try the fallback encoder if one is provided and we have not already
* attempted to use the fallback encoder. */
if (!in_fallback_call && options->type_registry.has_fallback_encoder) {
new_value = PyObject_CallFunctionObjArgs(
options->type_registry.fallback_encoder, value, NULL);
if (new_value == NULL) {
// propagate any exception raised by the callback
return 0;
}
retval = write_element_to_buffer(self, buffer, type_byte, new_value,
check_keys, options, 0, 1);
Py_XDECREF(new_value);
return retval;
}
/* We can't determine value's type. Fail. */
_set_cannot_encode(value);
return 0;
}
static int check_key_name(const char* name, int name_length) {
if (name_length > 0 && name[0] == '$') {
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyObject* errmsg = PyUnicode_FromFormat(
"key '%s' must not start with '$'", name);
if (errmsg) {
PyErr_SetObject(InvalidDocument, errmsg);
Py_DECREF(errmsg);
}
Py_DECREF(InvalidDocument);
}
return 0;
}
if (strchr(name, '.')) {
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyObject* errmsg = PyUnicode_FromFormat(
"key '%s' must not contain '.'", name);
if (errmsg) {
PyErr_SetObject(InvalidDocument, errmsg);
Py_DECREF(errmsg);
}
Py_DECREF(InvalidDocument);
}
return 0;
}
return 1;
}
/* Write a (key, value) pair to the buffer.
*
* Returns 0 on failure */
int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_length,
PyObject* value, unsigned char check_keys,
const codec_options_t* options, unsigned char allow_id) {
int type_byte;
/* Don't write any _id elements unless we're explicitly told to -
* _id has to be written first so we do so, but don't bother
* deleting it from the dictionary being written. */
if (!allow_id && strcmp(name, "_id") == 0) {
return 1;
}
type_byte = pymongo_buffer_save_space(buffer, 1);
if (type_byte == -1) {
return 0;
}
if (check_keys && !check_key_name(name, name_length)) {
return 0;
}
if (!buffer_write_bytes(buffer, name, name_length + 1)) {
return 0;
}
if (!write_element_to_buffer(self, buffer, type_byte,
value, check_keys, options, 0, 0)) {
return 0;
}
return 1;
}
int decode_and_write_pair(PyObject* self, buffer_t buffer,
PyObject* key, PyObject* value,
unsigned char check_keys,
const codec_options_t* options,
unsigned char top_level) {
PyObject* encoded;
const char* data;
int size;
if (PyUnicode_Check(key)) {
encoded = PyUnicode_AsUTF8String(key);
if (!encoded) {
return 0;
}
if (!(data = PyBytes_AS_STRING(encoded))) {
Py_DECREF(encoded);
return 0;
}
if ((size = _downcast_and_check(PyBytes_GET_SIZE(encoded), 1)) == -1) {
Py_DECREF(encoded);
return 0;
}
if (strlen(data) != (size_t)(size - 1)) {
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyErr_SetString(InvalidDocument,
"Key names must not contain the NULL byte");
Py_DECREF(InvalidDocument);
}
Py_DECREF(encoded);
return 0;
}
} else {
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyObject* repr = PyObject_Repr(key);
if (repr) {
PyObject* errmsg = PyUnicode_FromString(
"documents must have only string keys, key was ");
if (errmsg) {
PyObject* error = PyUnicode_Concat(errmsg, repr);
if (error) {
PyErr_SetObject(InvalidDocument, error);
Py_DECREF(error);
}
Py_DECREF(errmsg);
Py_DECREF(repr);
} else {
Py_DECREF(repr);
}
}
Py_DECREF(InvalidDocument);
}
return 0;
}
/* If top_level is True, don't allow writing _id here - it was already written. */
if (!write_pair(self, buffer, data,
size - 1, value, check_keys, options, !top_level)) {
Py_DECREF(encoded);
return 0;
}
Py_DECREF(encoded);
return 1;
}
/* Write a RawBSONDocument to the buffer.
* Returns the number of bytes written or 0 on failure.
*/
static int write_raw_doc(buffer_t buffer, PyObject* raw) {
char* bytes;
Py_ssize_t len;
int len_int;
int bytes_written = 0;
PyObject* bytes_obj = NULL;
bytes_obj = PyObject_GetAttrString(raw, "raw");
if (!bytes_obj) {
goto fail;
}
if (-1 == PyBytes_AsStringAndSize(bytes_obj, &bytes, &len)) {
goto fail;
}
len_int = _downcast_and_check(len, 0);
if (-1 == len_int) {
goto fail;
}
if (!buffer_write_bytes(buffer, bytes, len_int)) {
goto fail;
}
bytes_written = len_int;
fail:
Py_XDECREF(bytes_obj);
return bytes_written;
}
/* returns the number of bytes written or 0 on failure */
int write_dict(PyObject* self, buffer_t buffer,
PyObject* dict, unsigned char check_keys,
const codec_options_t* options, unsigned char top_level) {
PyObject* key;
PyObject* iter;
char zero = 0;
int length;
int length_location;
struct module_state *state = GETSTATE(self);
PyObject* mapping_type;
long type_marker;
/* check for RawBSONDocument */
type_marker = _type_marker(dict, state->_type_marker_str);
if (type_marker < 0) {
return 0;
}
if (101 == type_marker) {
return write_raw_doc(buffer, dict);
}
mapping_type = _get_object(state->Mapping, "collections.abc", "Mapping");
if (mapping_type) {
if (!PyObject_IsInstance(dict, mapping_type)) {
PyObject* repr;
Py_DECREF(mapping_type);
if ((repr = PyObject_Repr(dict))) {
PyObject* errmsg = PyUnicode_FromString(
"encoder expected a mapping type but got: ");
if (errmsg) {
PyObject* error = PyUnicode_Concat(errmsg, repr);
if (error) {
PyErr_SetObject(PyExc_TypeError, error);
Py_DECREF(error);
}
Py_DECREF(errmsg);
Py_DECREF(repr);
}
else {
Py_DECREF(repr);
}
} else {
PyErr_SetString(PyExc_TypeError,
"encoder expected a mapping type");
}
return 0;
}
Py_DECREF(mapping_type);
/* PyObject_IsInstance returns -1 on error */
if (PyErr_Occurred()) {
return 0;
}
}
length_location = pymongo_buffer_save_space(buffer, 4);
if (length_location == -1) {
return 0;
}
/* Write _id first if this is a top level doc. */
if (top_level) {
/*
* If "dict" is a defaultdict we don't want to call
* PyMapping_GetItemString on it. That would **create**
* an _id where one didn't previously exist (PYTHON-871).
*/
if (PyDict_Check(dict)) {
/* PyDict_GetItemString returns a borrowed reference. */
PyObject* _id = PyDict_GetItemString(dict, "_id");
if (_id) {
if (!write_pair(self, buffer, "_id", 3,
_id, check_keys, options, 1)) {
return 0;
}
}
} else if (PyMapping_HasKeyString(dict, "_id")) {
PyObject* _id = PyMapping_GetItemString(dict, "_id");
if (!_id) {
return 0;
}
if (!write_pair(self, buffer, "_id", 3,
_id, check_keys, options, 1)) {
Py_DECREF(_id);
return 0;
}
/* PyMapping_GetItemString returns a new reference. */
Py_DECREF(_id);
}
}
iter = PyObject_GetIter(dict);
if (iter == NULL) {
return 0;
}
while ((key = PyIter_Next(iter)) != NULL) {
PyObject* value = PyObject_GetItem(dict, key);
if (!value) {
PyErr_SetObject(PyExc_KeyError, key);
Py_DECREF(key);
Py_DECREF(iter);
return 0;
}
if (!decode_and_write_pair(self, buffer, key, value,
check_keys, options, top_level)) {
Py_DECREF(key);
Py_DECREF(value);
Py_DECREF(iter);
return 0;
}
Py_DECREF(key);
Py_DECREF(value);
}
Py_DECREF(iter);
if (PyErr_Occurred()) {
return 0;
}
/* write null byte and fill in length */
if (!buffer_write_bytes(buffer, &zero, 1)) {
return 0;
}
length = pymongo_buffer_get_position(buffer) - length_location;
buffer_write_int32_at_position(
buffer, length_location, (int32_t)length);
return length;
}
static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
PyObject* dict;
PyObject* result;
unsigned char check_keys;
unsigned char top_level = 1;
PyObject* options_obj;
codec_options_t options;
buffer_t buffer;
PyObject* raw_bson_document_bytes_obj;
long type_marker;
if (!(PyArg_ParseTuple(args, "ObO|b", &dict, &check_keys,
&options_obj, &top_level) &&
convert_codec_options(self, options_obj, &options))) {
return NULL;
}
/* check for RawBSONDocument */
type_marker = _type_marker(dict, GETSTATE(self)->_type_marker_str);
if (type_marker < 0) {
destroy_codec_options(&options);
return NULL;
} else if (101 == type_marker) {
destroy_codec_options(&options);
raw_bson_document_bytes_obj = PyObject_GetAttrString(dict, "raw");
if (NULL == raw_bson_document_bytes_obj) {
return NULL;
}
return raw_bson_document_bytes_obj;
}
buffer = pymongo_buffer_new();
if (!buffer) {
destroy_codec_options(&options);
return NULL;
}
if (!write_dict(self, buffer, dict, check_keys, &options, top_level)) {
destroy_codec_options(&options);
pymongo_buffer_free(buffer);
return NULL;
}
/* objectify buffer */
result = Py_BuildValue("y#", pymongo_buffer_get_buffer(buffer),
(Py_ssize_t)pymongo_buffer_get_position(buffer));
destroy_codec_options(&options);
pymongo_buffer_free(buffer);
return result;
}
/*
* Hook for optional decoding BSON documents to DBRef.
*/
static PyObject *_dbref_hook(PyObject* self, PyObject* value) {
struct module_state *state = GETSTATE(self);
PyObject* dbref = NULL;
PyObject* dbref_type = NULL;
PyObject* ref = NULL;
PyObject* id = NULL;
PyObject* database = NULL;
PyObject* ret = NULL;
int db_present = 0;
/* Decoding for DBRefs */
if (PyMapping_HasKeyString(value, "$ref") && PyMapping_HasKeyString(value, "$id")) { /* DBRef */
ref = PyMapping_GetItemString(value, "$ref");
/* PyMapping_GetItemString returns NULL to indicate error. */
if (!ref) {
goto invalid;
}
id = PyMapping_GetItemString(value, "$id");
/* PyMapping_GetItemString returns NULL to indicate error. */
if (!id) {
goto invalid;
}
if (PyMapping_HasKeyString(value, "$db")) {
database = PyMapping_GetItemString(value, "$db");
if (!database) {
goto invalid;
}
db_present = 1;
} else {
database = Py_None;
Py_INCREF(database);
}
// check types
if (!(PyUnicode_Check(ref) && (database == Py_None || PyUnicode_Check(database)))) {
ret = value;
goto invalid;
}
PyMapping_DelItemString(value, "$ref");
PyMapping_DelItemString(value, "$id");
if (db_present) {
PyMapping_DelItemString(value, "$db");
}
if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) {
dbref = PyObject_CallFunctionObjArgs(dbref_type, ref, id, database, value, NULL);
Py_DECREF(value);
ret = dbref;
}
} else {
ret = value;
}
invalid:
Py_XDECREF(dbref_type);
Py_XDECREF(ref);
Py_XDECREF(id);
Py_XDECREF(database);
return ret;
}
static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
unsigned* position, unsigned char type,
unsigned max, const codec_options_t* options, int raw_array) {
struct module_state *state = GETSTATE(self);
PyObject* value = NULL;
switch (type) {
case 1:
{
double d;
if (max < 8) {
goto invalid;
}
memcpy(&d, buffer + *position, 8);
value = PyFloat_FromDouble(BSON_DOUBLE_FROM_LE(d));
*position += 8;
break;
}
case 2:
case 14:
{
uint32_t value_length;
if (max < 4) {
goto invalid;
}
memcpy(&value_length, buffer + *position, 4);
value_length = BSON_UINT32_FROM_LE(value_length);
/* Encoded string length + string */
if (!value_length || max < value_length || max < 4 + value_length) {
goto invalid;
}
*position += 4;
/* Strings must end in \0 */
if (buffer[*position + value_length - 1]) {
goto invalid;
}
value = PyUnicode_DecodeUTF8(
buffer + *position, value_length - 1,
options->unicode_decode_error_handler);
if (!value) {
goto invalid;
}
*position += value_length;
break;
}
case 3:
{
uint32_t size;
if (max < 4) {
goto invalid;
}
memcpy(&size, buffer + *position, 4);
size = BSON_UINT32_FROM_LE(size);
if (size < BSON_MIN_SIZE || max < size) {
goto invalid;
}
/* Check for bad eoo */
if (buffer[*position + size - 1]) {
goto invalid;
}
if (options->is_raw_bson) {
value = PyObject_CallFunction(
options->document_class, "y#O",
buffer + *position, (Py_ssize_t)size, options->options_obj);
if (!value) {
goto invalid;
}
*position += size;
break;
}
value = elements_to_dict(self, buffer + *position + 4,
size - 5, options);
if (!value) {
goto invalid;
}
/* Hook for DBRefs */
value = _dbref_hook(self, value);
if (!value) {
goto invalid;
}
*position += size;
break;
}
case 4:
{
uint32_t size, end;
if (max < 4) {
goto invalid;
}
memcpy(&size, buffer + *position, 4);
size = BSON_UINT32_FROM_LE(size);
if (size < BSON_MIN_SIZE || max < size) {
goto invalid;
}
end = *position + size - 1;
/* Check for bad eoo */
if (buffer[end]) {
goto invalid;
}
if (raw_array != 0) {
// Treat it as a binary buffer.
value = PyBytes_FromStringAndSize(buffer + *position, size);
*position += size;
break;
}
*position += 4;
value = PyList_New(0);
if (!value) {
goto invalid;
}
while (*position < end) {
PyObject* to_append;
unsigned char bson_type = (unsigned char)buffer[(*position)++];
size_t key_size = strlen(buffer + *position);
if (max < key_size) {
Py_DECREF(value);
goto invalid;
}
/* just skip the key, they're in order. */
*position += (unsigned)key_size + 1;
if (Py_EnterRecursiveCall(" while decoding a list value")) {
Py_DECREF(value);
goto invalid;
}
to_append = get_value(self, name, buffer, position, bson_type,
max - (unsigned)key_size, options, raw_array);
Py_LeaveRecursiveCall();
if (!to_append) {
Py_DECREF(value);
goto invalid;
}
if (PyList_Append(value, to_append) < 0) {
Py_DECREF(value);
Py_DECREF(to_append);
goto invalid;
}
Py_DECREF(to_append);
}
if (*position != end) {
goto invalid;
}
(*position)++;
break;
}
case 5:
{
PyObject* data;
PyObject* st;
PyObject* type_to_create;
uint32_t length, length2;
unsigned char subtype;
if (max < 5) {
goto invalid;
}
memcpy(&length, buffer + *position, 4);
length = BSON_UINT32_FROM_LE(length);
if (max < length) {
goto invalid;
}
subtype = (unsigned char)buffer[*position + 4];
*position += 5;
if (subtype == 2) {
if (length < 4) {
goto invalid;
}
memcpy(&length2, buffer + *position, 4);
length2 = BSON_UINT32_FROM_LE(length2);
if (length2 != length - 4) {
goto invalid;
}
}
/* Python3 special case. Decode BSON binary subtype 0 to bytes. */
if (subtype == 0) {
value = PyBytes_FromStringAndSize(buffer + *position, length);
*position += length;
break;
}
if (subtype == 2) {
data = PyBytes_FromStringAndSize(buffer + *position + 4, length - 4);
} else {
data = PyBytes_FromStringAndSize(buffer + *position, length);
}
if (!data) {
goto invalid;
}
/* Encode as UUID or Binary based on options->uuid_rep */
if (subtype == 3 || subtype == 4) {
PyObject* binary_type = NULL;
PyObject* binary_value = NULL;
char uuid_rep = options->uuid_rep;
/* UUID should always be 16 bytes */
if (length != 16) {
goto uuiderror;
}
binary_type = _get_object(state->Binary, "bson", "Binary");
if (binary_type == NULL) {
goto uuiderror;
}
binary_value = PyObject_CallFunction(binary_type, "(Oi)", data, subtype);
if (binary_value == NULL) {
goto uuiderror;
}
if ((uuid_rep == UNSPECIFIED) ||
(subtype == 4 && uuid_rep != STANDARD) ||
(subtype == 3 && uuid_rep == STANDARD)) {
value = binary_value;
Py_INCREF(value);
} else {
value = PyObject_CallMethod(binary_value, "as_uuid", "(i)", uuid_rep);
}
uuiderror:
Py_XDECREF(binary_type);
Py_XDECREF(binary_value);
Py_DECREF(data);
if (!value) {
goto invalid;
}
*position += length;
break;
}
st = PyLong_FromLong(subtype);
if (!st) {
Py_DECREF(data);
goto invalid;
}
if ((type_to_create = _get_object(state->Binary, "bson.binary", "Binary"))) {
value = PyObject_CallFunctionObjArgs(type_to_create, data, st, NULL);
Py_DECREF(type_to_create);
}
Py_DECREF(st);
Py_DECREF(data);
if (!value) {
goto invalid;
}
*position += length;
break;
}
case 6:
case 10:
{
value = Py_None;
Py_INCREF(value);
break;
}
case 7:
{
PyObject* objectid_type;
if (max < 12) {
goto invalid;
}
if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) {
value = PyObject_CallFunction(objectid_type, "y#",
buffer + *position, (Py_ssize_t)12);
Py_DECREF(objectid_type);
}
*position += 12;
break;
}
case 8:
{
char boolean_raw = buffer[(*position)++];
if (0 == boolean_raw) {
value = Py_False;
} else if (1 == boolean_raw) {
value = Py_True;
} else {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_Format(InvalidBSON, "invalid boolean value: %x", boolean_raw);
Py_DECREF(InvalidBSON);
}
return NULL;
}
Py_INCREF(value);
break;
}
case 9:
{
PyObject* utc_type;
PyObject* naive;
PyObject* replace;
PyObject* args;
PyObject* kwargs;
PyObject* astimezone;
int64_t millis;
if (max < 8) {
goto invalid;
}
memcpy(&millis, buffer + *position, 8);
millis = (int64_t)BSON_UINT64_FROM_LE(millis);
*position += 8;
if (options->datetime_conversion == DATETIME_MS){
value = datetime_ms_from_millis(self, millis);
break;
}
int dt_clamp = options->datetime_conversion == DATETIME_CLAMP;
int dt_auto = options->datetime_conversion == DATETIME_AUTO;
if (dt_clamp || dt_auto){
PyObject *min_millis_fn = _get_object(state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms");
PyObject *max_millis_fn = _get_object(state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms");
PyObject *min_millis_fn_res;
PyObject *max_millis_fn_res;
int64_t min_millis;
int64_t max_millis;
if (min_millis_fn == NULL || max_millis_fn == NULL) {
Py_XDECREF(min_millis_fn);
Py_XDECREF(max_millis_fn);
goto invalid;
}
if (options->tz_aware){
PyObject* tzinfo = options->tzinfo;
if (tzinfo == Py_None) {
// Default to UTC.
utc_type = _get_object(state->UTC, "bson.tz_util", "utc");
tzinfo = utc_type;
}
min_millis_fn_res = PyObject_CallFunctionObjArgs(min_millis_fn, tzinfo, NULL);
max_millis_fn_res = PyObject_CallFunctionObjArgs(max_millis_fn, tzinfo, NULL);
} else {
min_millis_fn_res = PyObject_CallObject(min_millis_fn, NULL);
max_millis_fn_res = PyObject_CallObject(max_millis_fn, NULL);
}
Py_DECREF(min_millis_fn);
Py_DECREF(max_millis_fn);
if (!min_millis_fn_res || !max_millis_fn_res){
Py_XDECREF(min_millis_fn_res);
Py_XDECREF(max_millis_fn_res);
goto invalid;
}
min_millis = PyLong_AsLongLong(min_millis_fn_res);
max_millis = PyLong_AsLongLong(max_millis_fn_res);
if ((min_millis == -1 || max_millis == -1) && PyErr_Occurred())
{
// min/max_millis check
goto invalid;
}
if (dt_clamp) {
if (millis < min_millis) {
millis = min_millis;
} else if (millis > max_millis) {
millis = max_millis;
}
// Continues from here to return a datetime.
} else { // dt_auto
if (millis < min_millis || millis > max_millis){
value = datetime_ms_from_millis(self, millis);
break; // Out-of-range so done.
}
}
}
naive = datetime_from_millis(millis);
if (!options->tz_aware) { /* In the naive case, we're done here. */
value = naive;
break;
}
if (!naive) {
goto invalid;
}
replace = PyObject_GetAttrString(naive, "replace");
Py_DECREF(naive);
if (!replace) {
goto invalid;
}
args = PyTuple_New(0);
if (!args) {
Py_DECREF(replace);
goto invalid;
}
kwargs = PyDict_New();
if (!kwargs) {
Py_DECREF(replace);
Py_DECREF(args);
goto invalid;
}
utc_type = _get_object(state->UTC, "bson.tz_util", "utc");
if (!utc_type || PyDict_SetItemString(kwargs, "tzinfo", utc_type) == -1) {
Py_DECREF(replace);
Py_DECREF(args);
Py_DECREF(kwargs);
Py_XDECREF(utc_type);
goto invalid;
}
Py_XDECREF(utc_type);
value = PyObject_Call(replace, args, kwargs);
if (!value) {
Py_DECREF(replace);
Py_DECREF(args);
Py_DECREF(kwargs);
goto invalid;
}
/* convert to local time */
if (options->tzinfo != Py_None) {
astimezone = PyObject_GetAttrString(value, "astimezone");
Py_DECREF(value);
if (!astimezone) {
Py_DECREF(replace);
Py_DECREF(args);
Py_DECREF(kwargs);
goto invalid;
}
value = PyObject_CallFunctionObjArgs(astimezone, options->tzinfo, NULL);
Py_DECREF(astimezone);
}
Py_DECREF(replace);
Py_DECREF(args);
Py_DECREF(kwargs);
break;
}
case 11:
{
PyObject* regex_class;
PyObject* pattern;
int flags;
size_t flags_length, i;
size_t pattern_length = strlen(buffer + *position);
if (pattern_length > BSON_MAX_SIZE || max < pattern_length) {
goto invalid;
}
pattern = PyUnicode_DecodeUTF8(
buffer + *position, pattern_length,
options->unicode_decode_error_handler);
if (!pattern) {
goto invalid;
}
*position += (unsigned)pattern_length + 1;
flags_length = strlen(buffer + *position);
if (flags_length > BSON_MAX_SIZE ||
(BSON_MAX_SIZE - pattern_length) < flags_length) {
Py_DECREF(pattern);
goto invalid;
}
if (max < pattern_length + flags_length) {
Py_DECREF(pattern);
goto invalid;
}
flags = 0;
for (i = 0; i < flags_length; i++) {
if (buffer[*position + i] == 'i') {
flags |= 2;
} else if (buffer[*position + i] == 'l') {
flags |= 4;
} else if (buffer[*position + i] == 'm') {
flags |= 8;
} else if (buffer[*position + i] == 's') {
flags |= 16;
} else if (buffer[*position + i] == 'u') {
flags |= 32;
} else if (buffer[*position + i] == 'x') {
flags |= 64;
}
}
*position += (unsigned)flags_length + 1;
regex_class = _get_object(state->Regex, "bson.regex", "Regex");
if (regex_class) {
value = PyObject_CallFunction(regex_class,
"Oi", pattern, flags);
Py_DECREF(regex_class);
}
Py_DECREF(pattern);
break;
}
case 12:
{
uint32_t coll_length;
PyObject* collection;
PyObject* id = NULL;
PyObject* objectid_type;
PyObject* dbref_type;
if (max < 4) {
goto invalid;
}
memcpy(&coll_length, buffer + *position, 4);
coll_length = BSON_UINT32_FROM_LE(coll_length);
/* Encoded string length + string + 12 byte ObjectId */
if (!coll_length || max < coll_length || max < 4 + coll_length + 12) {
goto invalid;
}
*position += 4;
/* Strings must end in \0 */
if (buffer[*position + coll_length - 1]) {
goto invalid;
}
collection = PyUnicode_DecodeUTF8(
buffer + *position, coll_length - 1,
options->unicode_decode_error_handler);
if (!collection) {
goto invalid;
}
*position += coll_length;
if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) {
id = PyObject_CallFunction(objectid_type, "y#",
buffer + *position, (Py_ssize_t)12);
Py_DECREF(objectid_type);
}
if (!id) {
Py_DECREF(collection);
goto invalid;
}
*position += 12;
if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) {
value = PyObject_CallFunctionObjArgs(dbref_type, collection, id, NULL);
Py_DECREF(dbref_type);
}
Py_DECREF(collection);
Py_DECREF(id);
break;
}
case 13:
{
PyObject* code;
PyObject* code_type;
uint32_t value_length;
if (max < 4) {
goto invalid;
}
memcpy(&value_length, buffer + *position, 4);
value_length = BSON_UINT32_FROM_LE(value_length);
/* Encoded string length + string */
if (!value_length || max < value_length || max < 4 + value_length) {
goto invalid;
}
*position += 4;
/* Strings must end in \0 */
if (buffer[*position + value_length - 1]) {
goto invalid;
}
code = PyUnicode_DecodeUTF8(
buffer + *position, value_length - 1,
options->unicode_decode_error_handler);
if (!code) {
goto invalid;
}
*position += value_length;
if ((code_type = _get_object(state->Code, "bson.code", "Code"))) {
value = PyObject_CallFunctionObjArgs(code_type, code, NULL, NULL);
Py_DECREF(code_type);
}
Py_DECREF(code);
break;
}
case 15:
{
uint32_t c_w_s_size;
uint32_t code_size;
uint32_t scope_size;
PyObject* code;
PyObject* scope;
PyObject* code_type;
if (max < 8) {
goto invalid;
}
memcpy(&c_w_s_size, buffer + *position, 4);
c_w_s_size = BSON_UINT32_FROM_LE(c_w_s_size);
*position += 4;
if (max < c_w_s_size) {
goto invalid;
}
memcpy(&code_size, buffer + *position, 4);
code_size = BSON_UINT32_FROM_LE(code_size);
/* code_w_scope length + code length + code + scope length */
if (!code_size || max < code_size || max < 4 + 4 + code_size + 4) {
goto invalid;
}
*position += 4;
/* Strings must end in \0 */
if (buffer[*position + code_size - 1]) {
goto invalid;
}
code = PyUnicode_DecodeUTF8(
buffer + *position, code_size - 1,
options->unicode_decode_error_handler);
if (!code) {
goto invalid;
}
*position += code_size;
memcpy(&scope_size, buffer + *position, 4);
scope_size = BSON_UINT32_FROM_LE(scope_size);
if (scope_size < BSON_MIN_SIZE) {
Py_DECREF(code);
goto invalid;
}
/* code length + code + scope length + scope */
if ((4 + code_size + 4 + scope_size) != c_w_s_size) {
Py_DECREF(code);
goto invalid;
}
/* Check for bad eoo */
if (buffer[*position + scope_size - 1]) {
goto invalid;
}
scope = elements_to_dict(self, buffer + *position + 4,
scope_size - 5, options);
if (!scope) {
Py_DECREF(code);
goto invalid;
}
*position += scope_size;
if ((code_type = _get_object(state->Code, "bson.code", "Code"))) {
value = PyObject_CallFunctionObjArgs(code_type, code, scope, NULL);
Py_DECREF(code_type);
}
Py_DECREF(code);
Py_DECREF(scope);
break;
}
case 16:
{
int32_t i;
if (max < 4) {
goto invalid;
}
memcpy(&i, buffer + *position, 4);
i = (int32_t)BSON_UINT32_FROM_LE(i);
value = PyLong_FromLong(i);
if (!value) {
goto invalid;
}
*position += 4;
break;
}
case 17:
{
uint32_t time, inc;
PyObject* timestamp_type;
if (max < 8) {
goto invalid;
}
memcpy(&inc, buffer + *position, 4);
memcpy(&time, buffer + *position + 4, 4);
inc = BSON_UINT32_FROM_LE(inc);
time = BSON_UINT32_FROM_LE(time);
if ((timestamp_type = _get_object(state->Timestamp, "bson.timestamp", "Timestamp"))) {
value = PyObject_CallFunction(timestamp_type, "II", time, inc);
Py_DECREF(timestamp_type);
}
*position += 8;
break;
}
case 18:
{
int64_t ll;
PyObject* bson_int64_type = _get_object(state->BSONInt64,
"bson.int64", "Int64");
if (!bson_int64_type)
goto invalid;
if (max < 8) {
Py_DECREF(bson_int64_type);
goto invalid;
}
memcpy(&ll, buffer + *position, 8);
ll = (int64_t)BSON_UINT64_FROM_LE(ll);
value = PyObject_CallFunction(bson_int64_type, "L", ll);
*position += 8;
Py_DECREF(bson_int64_type);
break;
}
case 19:
{
PyObject* dec128;
if (max < 16) {
goto invalid;
}
if ((dec128 = _get_object(state->Decimal128,
"bson.decimal128",
"Decimal128"))) {
value = PyObject_CallMethod(dec128,
"from_bid",
"y#",
buffer + *position,
(Py_ssize_t)16);
Py_DECREF(dec128);
}
*position += 16;
break;
}
case 255:
{
PyObject* minkey_type = _get_object(state->MinKey, "bson.min_key", "MinKey");
if (!minkey_type)
goto invalid;
value = PyObject_CallFunctionObjArgs(minkey_type, NULL);
Py_DECREF(minkey_type);
break;
}
case 127:
{
PyObject* maxkey_type = _get_object(state->MaxKey, "bson.max_key", "MaxKey");
if (!maxkey_type)
goto invalid;
value = PyObject_CallFunctionObjArgs(maxkey_type, NULL);
Py_DECREF(maxkey_type);
break;
}
default:
{
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyObject* bobj = PyBytes_FromFormat("%c", type);
if (bobj) {
PyObject* repr = PyObject_Repr(bobj);
Py_DECREF(bobj);
/*
* See http://bugs.python.org/issue22023 for why we can't
* just use PyUnicode_FromFormat with %S or %R to do this
* work.
*/
if (repr) {
PyObject* left = PyUnicode_FromString(
"Detected unknown BSON type ");
if (left) {
PyObject* lmsg = PyUnicode_Concat(left, repr);
Py_DECREF(left);
if (lmsg) {
PyObject* errmsg = PyUnicode_FromFormat(
"%U for fieldname '%U'. Are you using the "
"latest driver version?", lmsg, name);
if (errmsg) {
PyErr_SetObject(InvalidBSON, errmsg);
Py_DECREF(errmsg);
}
Py_DECREF(lmsg);
}
}
Py_DECREF(repr);
}
}
Py_DECREF(InvalidBSON);
}
goto invalid;
}
}
if (value) {
if (!options->type_registry.is_decoder_empty) {
PyObject* value_type = NULL;
PyObject* converter = NULL;
value_type = PyObject_Type(value);
if (value_type == NULL) {
goto invalid;
}
converter = PyDict_GetItem(options->type_registry.decoder_map, value_type);
if (converter != NULL) {
PyObject* new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
Py_DECREF(value_type);
Py_DECREF(value);
return new_value;
} else {
Py_DECREF(value_type);
return value;
}
}
return value;
}
invalid:
/*
* Wrap any non-InvalidBSON errors in InvalidBSON.
*/
if (PyErr_Occurred()) {
PyObject *etype, *evalue, *etrace;
PyObject *InvalidBSON;
/*
* Calling _error clears the error state, so fetch it first.
*/
PyErr_Fetch(&etype, &evalue, &etrace);
/* Dont reraise anything but PyExc_Exceptions as InvalidBSON. */
if (PyErr_GivenExceptionMatches(etype, PyExc_Exception)) {
InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
if (!PyErr_GivenExceptionMatches(etype, InvalidBSON)) {
/*
* Raise InvalidBSON(str(e)).
*/
Py_DECREF(etype);
etype = InvalidBSON;
if (evalue) {
PyObject *msg = PyObject_Str(evalue);
Py_DECREF(evalue);
evalue = msg;
}
PyErr_NormalizeException(&etype, &evalue, &etrace);
} else {
/*
* The current exception matches InvalidBSON, so we don't
* need this reference after all.
*/
Py_DECREF(InvalidBSON);
}
}
}
/* Steals references to args. */
PyErr_Restore(etype, evalue, etrace);
} else {
PyObject *InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "invalid length or type code");
Py_DECREF(InvalidBSON);
}
}
return NULL;
}
/*
* Get the next 'name' and 'value' from a document in a string, whose position
* is provided.
*
* Returns the position of the next element in the document, or -1 on error.
*/
static int _element_to_dict(PyObject* self, const char* string,
unsigned position, unsigned max,
const codec_options_t* options,
int raw_array,
PyObject** name, PyObject** value) {
unsigned char type = (unsigned char)string[position++];
size_t name_length = strlen(string + position);
if (name_length > BSON_MAX_SIZE || position + name_length >= max) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "field name too large");
Py_DECREF(InvalidBSON);
}
return -1;
}
*name = PyUnicode_DecodeUTF8(
string + position, name_length,
options->unicode_decode_error_handler);
if (!*name) {
/* If NULL is returned then wrap the UnicodeDecodeError
in an InvalidBSON error */
PyObject *etype, *evalue, *etrace;
PyObject *InvalidBSON;
PyErr_Fetch(&etype, &evalue, &etrace);
if (PyErr_GivenExceptionMatches(etype, PyExc_Exception)) {
InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
Py_DECREF(etype);
etype = InvalidBSON;
if (evalue) {
PyObject *msg = PyObject_Str(evalue);
Py_DECREF(evalue);
evalue = msg;
}
PyErr_NormalizeException(&etype, &evalue, &etrace);
}
}
PyErr_Restore(etype, evalue, etrace);
return -1;
}
position += (unsigned)name_length + 1;
*value = get_value(self, *name, string, &position, type,
max - position, options, raw_array);
if (!*value) {
Py_DECREF(*name);
return -1;
}
return position;
}
static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) {
/* TODO: Support buffer protocol */
char* string;
PyObject* bson;
PyObject* options_obj;
codec_options_t options;
unsigned position;
unsigned max;
int new_position;
int raw_array = 0;
PyObject* name;
PyObject* value;
PyObject* result_tuple;
if (!(PyArg_ParseTuple(args, "OIIOp", &bson, &position, &max,
&options_obj, &raw_array) &&
convert_codec_options(self, options_obj, &options))) {
return NULL;
}
if (!PyBytes_Check(bson)) {
PyErr_SetString(PyExc_TypeError, "argument to _element_to_dict must be a bytes object");
return NULL;
}
string = PyBytes_AS_STRING(bson);
new_position = _element_to_dict(self, string, position, max, &options, raw_array, &name, &value);
if (new_position < 0) {
return NULL;
}
result_tuple = Py_BuildValue("NNi", name, value, new_position);
if (!result_tuple) {
Py_DECREF(name);
Py_DECREF(value);
return NULL;
}
destroy_codec_options(&options);
return result_tuple;
}
static PyObject* _elements_to_dict(PyObject* self, const char* string,
unsigned max,
const codec_options_t* options) {
unsigned position = 0;
PyObject* dict = PyObject_CallObject(options->document_class, NULL);
if (!dict) {
return NULL;
}
int raw_array = 0;
while (position < max) {
PyObject* name = NULL;
PyObject* value = NULL;
int new_position;
new_position = _element_to_dict(
self, string, position, max, options, raw_array, &name, &value);
if (new_position < 0) {
Py_DECREF(dict);
return NULL;
} else {
position = (unsigned)new_position;
}
PyObject_SetItem(dict, name, value);
Py_DECREF(name);
Py_DECREF(value);
}
return dict;
}
static PyObject* elements_to_dict(PyObject* self, const char* string,
unsigned max,
const codec_options_t* options) {
PyObject* result;
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
return NULL;
result = _elements_to_dict(self, string, max, options);
Py_LeaveRecursiveCall();
return result;
}
static int _get_buffer(PyObject *exporter, Py_buffer *view) {
if (PyObject_GetBuffer(exporter, view, PyBUF_SIMPLE) == -1) {
return 0;
}
if (!PyBuffer_IsContiguous(view, 'C')) {
PyErr_SetString(PyExc_ValueError,
"must be a contiguous buffer");
goto fail;
}
if (!view->buf || view->len < 0) {
PyErr_SetString(PyExc_ValueError, "invalid buffer");
goto fail;
}
if (view->itemsize != 1) {
PyErr_SetString(PyExc_ValueError,
"buffer data must be ascii or utf8");
goto fail;
}
return 1;
fail:
PyBuffer_Release(view);
return 0;
}
static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
int32_t size;
Py_ssize_t total_size;
const char* string;
PyObject* bson;
codec_options_t options;
PyObject* result = NULL;
PyObject* options_obj;
Py_buffer view = {0};
if (! (PyArg_ParseTuple(args, "OO", &bson, &options_obj) &&
convert_codec_options(self, options_obj, &options))) {
return result;
}
if (!_get_buffer(bson, &view)) {
destroy_codec_options(&options);
return result;
}
total_size = view.len;
if (total_size < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON,
"not enough data for a BSON document");
Py_DECREF(InvalidBSON);
}
goto done;;
}
string = (char*)view.buf;
memcpy(&size, string, 4);
size = (int32_t)BSON_UINT32_FROM_LE(size);
if (size < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "invalid message size");
Py_DECREF(InvalidBSON);
}
goto done;
}
if (total_size < size || total_size > BSON_MAX_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "objsize too large");
Py_DECREF(InvalidBSON);
}
goto done;
}
if (size != total_size || string[size - 1]) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "bad eoo");
Py_DECREF(InvalidBSON);
}
goto done;
}
/* No need to decode fields if using RawBSONDocument */
if (options.is_raw_bson) {
result = PyObject_CallFunction(
options.document_class, "y#O", string, (Py_ssize_t)size,
options_obj);
}
else {
result = elements_to_dict(self, string + 4, (unsigned)size - 5, &options);
}
done:
PyBuffer_Release(&view);
destroy_codec_options(&options);
return result;
}
static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
int32_t size;
Py_ssize_t total_size;
const char* string;
PyObject* bson;
PyObject* dict;
PyObject* result = NULL;
codec_options_t options;
PyObject* options_obj = NULL;
Py_buffer view = {0};
if (!(PyArg_ParseTuple(args, "OO", &bson, &options_obj) &&
convert_codec_options(self, options_obj, &options))) {
return NULL;
}
if (!_get_buffer(bson, &view)) {
destroy_codec_options(&options);
return NULL;
}
total_size = view.len;
string = (char*)view.buf;
if (!(result = PyList_New(0))) {
goto fail;
}
while (total_size > 0) {
if (total_size < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON,
"not enough data for a BSON document");
Py_DECREF(InvalidBSON);
}
Py_DECREF(result);
goto fail;
}
memcpy(&size, string, 4);
size = (int32_t)BSON_UINT32_FROM_LE(size);
if (size < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "invalid message size");
Py_DECREF(InvalidBSON);
}
Py_DECREF(result);
goto fail;
}
if (total_size < size) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "objsize too large");
Py_DECREF(InvalidBSON);
}
Py_DECREF(result);
goto fail;
}
if (string[size - 1]) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "bad eoo");
Py_DECREF(InvalidBSON);
}
Py_DECREF(result);
goto fail;
}
/* No need to decode fields if using RawBSONDocument. */
if (options.is_raw_bson) {
dict = PyObject_CallFunction(
options.document_class, "y#O", string, (Py_ssize_t)size,
options_obj);
} else {
dict = elements_to_dict(self, string + 4, (unsigned)size - 5, &options);
}
if (!dict) {
Py_DECREF(result);
goto fail;
}
if (PyList_Append(result, dict) < 0) {
Py_DECREF(dict);
Py_DECREF(result);
goto fail;
}
Py_DECREF(dict);
string += size;
total_size -= size;
}
goto done;
fail:
result = NULL;
done:
PyBuffer_Release(&view);
destroy_codec_options(&options);
return result;
}
static PyObject* _cbson_array_of_documents_to_buffer(PyObject* self, PyObject* args) {
uint32_t size;
uint32_t value_length;
uint32_t position = 0;
buffer_t buffer;
const char* string;
PyObject* arr;
PyObject* result = NULL;
Py_buffer view = {0};
if (!PyArg_ParseTuple(args, "O", &arr)) {
return NULL;
}
if (!_get_buffer(arr, &view)) {
return NULL;
}
buffer = pymongo_buffer_new();
if (!buffer) {
PyBuffer_Release(&view);
return NULL;
}
string = (char*)view.buf;
if (view.len < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON,
"not enough data for a BSON document");
Py_DECREF(InvalidBSON);
}
goto done;
}
memcpy(&size, string, 4);
size = BSON_UINT32_FROM_LE(size);
/* save space for length */
if (pymongo_buffer_save_space(buffer, size) == -1) {
goto fail;
}
pymongo_buffer_update_position(buffer, 0);
position += 4;
while (position < size - 1) {
// Verify the value is an object.
unsigned char type = (unsigned char)string[position];
if (type != 3) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "array element was not an object");
Py_DECREF(InvalidBSON);
}
goto fail;
}
// Just skip the keys.
position = position + strlen(string + position) + 1;
if (position >= size || (size - position) < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "invalid array content");
Py_DECREF(InvalidBSON);
}
goto fail;
}
memcpy(&value_length, string + position, 4);
value_length = BSON_UINT32_FROM_LE(value_length);
if (value_length < BSON_MIN_SIZE) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "invalid message size");
Py_DECREF(InvalidBSON);
}
goto fail;
}
if (view.len < size) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "objsize too large");
Py_DECREF(InvalidBSON);
}
goto fail;
}
if (string[size - 1]) {
PyObject* InvalidBSON = _error("InvalidBSON");
if (InvalidBSON) {
PyErr_SetString(InvalidBSON, "bad eoo");
Py_DECREF(InvalidBSON);
}
goto fail;
}
if (pymongo_buffer_write(buffer, string + position, value_length) == 1) {
goto fail;
}
position += value_length;
}
/* objectify buffer */
result = Py_BuildValue("y#", pymongo_buffer_get_buffer(buffer),
(Py_ssize_t)pymongo_buffer_get_position(buffer));
goto done;
fail:
result = NULL;
done:
PyBuffer_Release(&view);
pymongo_buffer_free(buffer);
return result;
}
static PyMethodDef _CBSONMethods[] = {
{"_dict_to_bson", _cbson_dict_to_bson, METH_VARARGS,
"convert a dictionary to a string containing its BSON representation."},
{"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS,
"convert a BSON string to a SON object."},
{"_decode_all", _cbson_decode_all, METH_VARARGS,
"convert binary data to a sequence of documents."},
{"_element_to_dict", _cbson_element_to_dict, METH_VARARGS,
"Decode a single key, value pair."},
{"_array_of_documents_to_buffer", _cbson_array_of_documents_to_buffer, METH_VARARGS, "Convert raw array of documents to a stream of BSON documents"},
{"_test_long_long_to_str", _test_long_long_to_str, METH_VARARGS, "Test conversion of extreme and common Py_ssize_t values to str."},
{NULL, NULL, 0, NULL}
};
#define INITERROR return NULL
static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->Binary);
Py_VISIT(GETSTATE(m)->Code);
Py_VISIT(GETSTATE(m)->ObjectId);
Py_VISIT(GETSTATE(m)->DBRef);
Py_VISIT(GETSTATE(m)->Regex);
Py_VISIT(GETSTATE(m)->UUID);
Py_VISIT(GETSTATE(m)->Timestamp);
Py_VISIT(GETSTATE(m)->MinKey);
Py_VISIT(GETSTATE(m)->MaxKey);
Py_VISIT(GETSTATE(m)->UTC);
Py_VISIT(GETSTATE(m)->REType);
return 0;
}
static int _cbson_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->Binary);
Py_CLEAR(GETSTATE(m)->Code);
Py_CLEAR(GETSTATE(m)->ObjectId);
Py_CLEAR(GETSTATE(m)->DBRef);
Py_CLEAR(GETSTATE(m)->Regex);
Py_CLEAR(GETSTATE(m)->UUID);
Py_CLEAR(GETSTATE(m)->Timestamp);
Py_CLEAR(GETSTATE(m)->MinKey);
Py_CLEAR(GETSTATE(m)->MaxKey);
Py_CLEAR(GETSTATE(m)->UTC);
Py_CLEAR(GETSTATE(m)->REType);
Py_CLEAR(GETSTATE(m)->_type_marker_str);
return 0;
}
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_cbson",
NULL,
sizeof(struct module_state),
_CBSONMethods,
NULL,
_cbson_traverse,
_cbson_clear,
NULL
};
PyMODINIT_FUNC
PyInit__cbson(void)
{
PyObject *m;
PyObject *c_api_object;
static void *_cbson_API[_cbson_API_POINTER_COUNT];
PyDateTime_IMPORT;
if (PyDateTimeAPI == NULL) {
INITERROR;
}
/* Export C API */
_cbson_API[_cbson_buffer_write_bytes_INDEX] = (void *) buffer_write_bytes;
_cbson_API[_cbson_write_dict_INDEX] = (void *) write_dict;
_cbson_API[_cbson_write_pair_INDEX] = (void *) write_pair;
_cbson_API[_cbson_decode_and_write_pair_INDEX] = (void *) decode_and_write_pair;
_cbson_API[_cbson_convert_codec_options_INDEX] = (void *) convert_codec_options;
_cbson_API[_cbson_destroy_codec_options_INDEX] = (void *) destroy_codec_options;
_cbson_API[_cbson_buffer_write_double_INDEX] = (void *) buffer_write_double;
_cbson_API[_cbson_buffer_write_int32_INDEX] = (void *) buffer_write_int32;
_cbson_API[_cbson_buffer_write_int64_INDEX] = (void *) buffer_write_int64;
_cbson_API[_cbson_buffer_write_int32_at_position_INDEX] =
(void *) buffer_write_int32_at_position;
_cbson_API[_cbson_downcast_and_check_INDEX] = (void *) _downcast_and_check;
/* PyCapsule is new in python 3.1 */
c_api_object = PyCapsule_New((void *) _cbson_API, "_cbson._C_API", NULL);
if (c_api_object == NULL)
INITERROR;
m = PyModule_Create(&moduledef);
if (m == NULL) {
Py_DECREF(c_api_object);
INITERROR;
}
/* Import several python objects */
if (_load_python_objects(m)) {
Py_DECREF(c_api_object);
Py_DECREF(m);
INITERROR;
}
if (PyModule_AddObject(m, "_C_API", c_api_object) < 0) {
Py_DECREF(c_api_object);
Py_DECREF(m);
INITERROR;
}
return m;
}