Exemplo n.º 1
0
def _get_string(data, position, obj_end, dummy):
    """Decode a BSON string to python unicode string."""
    length = _UNPACK_INT(data[position:position + 4])[0]
    position += 4
    if length < 1 or obj_end - position < length:
        raise InvalidBSON("invalid string length")
    end = position + length - 1
    if data[end:end + 1] != b"\x00":
        raise InvalidBSON("invalid end of string")
    return _utf_8_decode(data[position:end], None, True)[0], end + 1
Exemplo n.º 2
0
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
    obj_size = struct.unpack("<i", data[:4])[0]
    length = len(data)
    if length < obj_size:
        raise InvalidBSON("objsize too large")
    if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
        raise InvalidBSON("bad eoo")
    elements = data[4:obj_size - 1]
    return (_elements_to_dict(elements, as_class, tz_aware,
                              uuid_subtype), data[obj_size:])
Exemplo n.º 3
0
def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
    """Decode BSON data to multiple documents.

    `data` must be a string of concatenated, valid, BSON-encoded
    documents.

    :Parameters:
      - `data`: BSON data
      - `codec_options` (optional): An instance of
        :class:`~bson.codec_options.CodecOptions`.

    .. versionchanged:: 3.0
       Removed `compile_re` option: PyMongo now always represents BSON regular
       expressions as :class:`~bson.regex.Regex` objects. Use
       :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
       BSON regular expression to a Python regular expression object.

       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
       `codec_options`.

    .. versionchanged:: 2.7
       Added `compile_re` option. If set to False, PyMongo represented BSON
       regular expressions as :class:`~bson.regex.Regex` objects instead of
       attempting to compile BSON regular expressions as Python native
       regular expressions, thus preventing errors for some incompatible
       patterns, see `PYTHON-500`_.

    .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
    """
    if not isinstance(codec_options, CodecOptions):
        raise _CODEC_OPTIONS_TYPE_ERROR

    docs = []
    position = 0
    end = len(data) - 1
    try:
        while position < end:
            obj_size = _UNPACK_INT(data[position:position + 4])[0]
            if len(data) - position < obj_size:
                raise InvalidBSON("invalid object size")
            obj_end = position + obj_size - 1
            if data[obj_end:position + obj_size] != b"\x00":
                raise InvalidBSON("bad eoo")
            docs.append(_elements_to_dict(data,
                                          position + 4,
                                          obj_end,
                                          codec_options))
            position += obj_size
        return docs
    except InvalidBSON:
        raise
    except Exception:
        # Change exception type to InvalidBSON but preserve traceback.
        _, exc_value, exc_tb = sys.exc_info()
        reraise(InvalidBSON, exc_value, exc_tb)
Exemplo n.º 4
0
def _get_string(data, view, position, obj_end, opts, dummy):
    """Decode a BSON string to python unicode string."""
    length = _UNPACK_INT_FROM(data, position)[0]
    position += 4
    if length < 1 or obj_end - position < length:
        raise InvalidBSON("invalid string length")
    end = position + length - 1
    if data[end] != 0:
        raise InvalidBSON("invalid end of string")
    return _utf_8_decode(view[position:end], opts.unicode_decode_error_handler,
                         True)[0], end + 1
Exemplo n.º 5
0
def decode_all(data, as_class=dict,
               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True,
               codec_options=None):
    """Decode BSON data to multiple documents.

    `data` must be a string of concatenated, valid, BSON-encoded
    documents.

    :Parameters:
      - `data`: BSON data
      - `as_class` (optional): the class to use for the resulting
        documents
      - `tz_aware` (optional): if ``True``, return timezone-aware
        :class:`~datetime.datetime` instances
      - `uuid_subtype` (optional): The BSON representation to use for UUIDs.
        See the :mod:`bson.binary` module for all options.
      - `compile_re` (optional): if ``False``, don't attempt to compile
        BSON regular expressions into Python regular expressions. Return
        instances of :class:`~bson.regex.Regex` instead. Can avoid
        :exc:`~bson.errors.InvalidBSON` errors when receiving
        Python-incompatible regular expressions, for example from ``currentOp``

    .. versionchanged:: 2.7
       Added `compile_re` option.
    .. versionadded:: 1.9
    """
    if codec_options is not None:
        if not isinstance(codec_options, CodecOptions):
            raise _CODEC_OPTIONS_TYPE_ERROR
        as_class = codec_options.document_class
        tz_aware = codec_options.tz_aware
        uuid_subtype = codec_options.uuid_representation
    docs = []
    position = 0
    end = len(data) - 1
    try:
        while position < end:
            obj_size = struct.unpack("<i", data[position:position + 4])[0]
            if len(data) - position < obj_size:
                raise InvalidBSON("objsize too large")
            if data[position + obj_size - 1:position + obj_size] != ZERO:
                raise InvalidBSON("bad eoo")
            elements = data[position + 4:position + obj_size - 1]
            position += obj_size
            docs.append(_elements_to_dict(elements, as_class,
                                          tz_aware, uuid_subtype, compile_re))
        return docs
    except InvalidBSON:
        raise
    except Exception:
        # Change exception type to InvalidBSON but preserve traceback.
        exc_type, exc_value, exc_tb = sys.exc_info()
        raise InvalidBSON, str(exc_value), exc_tb
Exemplo n.º 6
0
def _get_string(
    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy: Any
) -> Tuple[str, int]:
    """Decode a BSON string to python str."""
    length = _UNPACK_INT_FROM(data, position)[0]
    position += 4
    if length < 1 or obj_end - position < length:
        raise InvalidBSON("invalid string length")
    end = position + length - 1
    if data[end] != 0:
        raise InvalidBSON("invalid end of string")
    return _utf_8_decode(view[position:end], opts.unicode_decode_error_handler, True)[0], end + 1
Exemplo n.º 7
0
def _get_object(data, position, obj_end, opts):
    """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef."""
    obj_size = _UNPACK_INT(data[position:position + 4])[0]
    end = position + obj_size - 1
    if data[end:position + obj_size] != b"\x00":
        raise InvalidBSON("bad eoo")
    if end >= obj_end:
        raise InvalidBSON("invalid object length")
    obj = _elements_to_dict(data, position + 4, end, opts, subdocument=True)

    position += obj_size
    if "$ref" in obj:
        return (DBRef(obj.pop("$ref"), obj.pop("$id", None),
                      obj.pop("$db", None), obj), position)
    return obj, position
def _get_string(data, position, obj_end, opts, dummy):
    """Decode a BSON string to python unicode string."""
    length = _UNPACK_INT(data[position:position + 4])[0]
    position += 4
    if length < 1 or obj_end - position < length:
        raise InvalidBSON("invalid string length")
    if opts.use_unicode:
        end = position + length - 1
        if data[end:end + 1] != b"\x00":
            raise InvalidBSON("invalid end of string")
        return _utf_8_decode(data[position:end],
                             opts.unicode_decode_error_handler,
                             True)[0], end + 1
    else:
        return data[position:], len(data) - position
Exemplo n.º 9
0
def _get_object_size(data, position, obj_end):
    """Validate and return a BSON document's size."""
    try:
        obj_size = _UNPACK_INT_FROM(data, position)[0]
    except struct.error as exc:
        raise InvalidBSON(str(exc))
    end = position + obj_size - 1
    if data[end] != 0:
        raise InvalidBSON("bad eoo")
    if end >= obj_end:
        raise InvalidBSON("invalid object length")
    # If this is the top-level document, validate the total size too.
    if position == 0 and obj_size != obj_end:
        raise InvalidBSON("invalid object length")
    return obj_size, end
Exemplo n.º 10
0
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
    try:
        obj_size = struct.unpack("<i", data[:4])[0]
    except struct.error as e:
        raise InvalidBSON(str(e))
    length = len(data)
    if length < obj_size:
        raise InvalidBSON("objsize too large")
    if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
        raise InvalidBSON("bad eoo")
    elements = data[4:obj_size - 1]
    dct = _elements_to_dict(elements, as_class, tz_aware, uuid_subtype,
                            compile_re)

    return dct, data[obj_size:]
Exemplo n.º 11
0
def _get_binary(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    length, position = _get_int(data, position)
    subtype = ord(data[position:position + 1])
    position += 1
    if subtype == 2:
        length2, position = _get_int(data, position)
        if length2 != length - 4:
            raise InvalidBSON("invalid binary (st 2) - lengths don't match!")
        length = length2
    if subtype in (3, 4) and _use_uuid:
        # Java Legacy
        if uuid_subtype == JAVA_LEGACY:
            java = data[position:position + length]
            value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1])
        # C# legacy
        elif uuid_subtype == CSHARP_LEGACY:
            value = uuid.UUID(bytes_le=data[position:position + length])
        # Python
        else:
            value = uuid.UUID(bytes=data[position:position + length])
        position += length
        return (value, position)
    # Python3 special case. Decode subtype 0 to 'bytes'.
    if PY3 and subtype == 0:
        value = data[position:position + length]
    else:
        value = Binary(data[position:position + length], subtype)
    position += length
    return value, position
Exemplo n.º 12
0
def _get_array(
        data, position, name, as_class, tz_aware, uuid_subtype, compile_re):
    size = struct.unpack("<i", data[position:position + 4])[0]
    end = position + size - 1
    if data[end:end + 1] != ZERO:
        raise InvalidBSON("bad eoo")

    position += 4
    end -= 1
    result = []

    # Avoid doing global and attibute lookups in the loop.
    append = result.append
    index = data.index
    getter = _element_getter

    while position < end:
        element_type = data[position:position + 1]
        # Just skip the keys.
        position = index(ZERO, position) + 1
        try:
            value, position = getter[element_type](
                data, position, name,
                as_class, tz_aware, uuid_subtype, compile_re)
        except KeyError:
            _raise_unknown_type(element_type, name)
        append(value)
    return result, position + 1
Exemplo n.º 13
0
def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS):
    """Decode bson data from a file to multiple documents as a generator.

    Works similarly to the decode_all function, but reads from the file object
    in chunks and parses bson in chunks, yielding one document at a time.

    :Parameters:
      - `file_obj`: A file object containing BSON data.
      - `codec_options` (optional): An instance of
        :class:`~bson.codec_options.CodecOptions`.

    .. versionchanged:: 3.0
       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
       `codec_options`.

    .. versionadded:: 2.8
    """
    while True:
        # Read size of next object.
        size_data = file_obj.read(4)
        if not size_data:
            break  # Finished with file normaly.
        elif len(size_data) != 4:
            raise InvalidBSON("cut off in middle of objsize")
        obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4
        elements = size_data + file_obj.read(max(0, obj_size))
        yield _bson_to_dict(elements, codec_options)
Exemplo n.º 14
0
def _get_binary(data, position, dummy0, opts, dummy1):
    """Decode a BSON binary to bson.binary.Binary or python UUID."""
    length, subtype = _UNPACK_LENGTH_SUBTYPE(data[position:position + 5])
    position += 5
    if subtype == 2:
        length2 = _UNPACK_INT(data[position:position + 4])[0]
        position += 4
        if length2 != length - 4:
            raise InvalidBSON("invalid binary (st 2) - lengths don't match!")
        length = length2
    end = position + length
    if subtype in (3, 4):
        # Java Legacy
        uuid_representation = opts.uuid_representation
        if uuid_representation == JAVA_LEGACY:
            java = data[position:end]
            value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1])
        # C# legacy
        elif uuid_representation == CSHARP_LEGACY:
            value = uuid.UUID(bytes_le=data[position:end])
        # Python
        else:
            value = uuid.UUID(bytes=data[position:end])
        return value, end
    # Python3 special case. Decode subtype 0 to 'bytes'.
    if PY3 and subtype == 0:
        value = data[position:end]
    else:
        value = Binary(data[position:end], subtype)
    return value, end
Exemplo n.º 15
0
def _get_array(data, position, obj_end, opts, element_name):
    """Decode a BSON array to python list."""
    size = _UNPACK_INT(data[position:position + 4])[0]
    end = position + size - 1
    if data[end:end + 1] != b"\x00":
        raise InvalidBSON("bad eoo")

    position += 4
    end -= 1
    result = []

    # Avoid doing global and attibute lookups in the loop.
    append = result.append
    index = data.index
    getter = _ELEMENT_GETTER

    while position < end:
        element_type = data[position:position + 1]
        # Just skip the keys.
        position = index(b'\x00', position) + 1
        try:
            value, position = getter[element_type](data, position, obj_end,
                                                   opts, element_name)
        except KeyError:
            _raise_unknown_type(element_type, element_name)
        append(value)
    return result, position + 1
Exemplo n.º 16
0
def _get_c_string(data, length=None):
    if length is None:
        try:
            length = data.index("\x00")
        except ValueError:
            raise InvalidBSON()

    return (unicode(data[:length], "utf-8"), data[length + 1:])
Exemplo n.º 17
0
def _get_int(data, position, as_class=None, tz_aware=False, unsigned=False):
    format = unsigned and "I" or "i"
    try:
        value = struct.unpack("<%s" % format, data[position:position + 4])[0]
    except struct.error:
        raise InvalidBSON()
    position += 4
    return value, position
Exemplo n.º 18
0
def _get_int(data, as_class=None, tz_aware=False, unsigned=False):
    format = unsigned and "I" or "i"
    try:
        value = struct.unpack("<%s" % format, data[:4])[0]
    except struct.error:
        raise InvalidBSON()

    return (value, data[4:])
Exemplo n.º 19
0
def _bson_to_dict(data, opts):
    """Decode a BSON string to document_class."""
    try:
        obj_size = _UNPACK_INT(data[:4])[0]
    except struct.error as exc:
        raise InvalidBSON(str(exc))
    if obj_size != len(data):
        raise InvalidBSON("invalid object size")
    if data[obj_size - 1:obj_size] != b"\x00":
        raise InvalidBSON("bad eoo")
    try:
        return _elements_to_dict(data, 4, obj_size - 1, opts)
    except InvalidBSON:
        raise
    except Exception:
        # Change exception type to InvalidBSON but preserve traceback.
        _, exc_value, exc_tb = sys.exc_info()
        reraise(InvalidBSON, exc_value, exc_tb)
Exemplo n.º 20
0
def _get_boolean(data, view, position, dummy0, dummy1, dummy2):
    """Decode a BSON true/false to python True/False."""
    end = position + 1
    boolean_byte = data[position:end]
    if boolean_byte == b'\x00':
        return False, end
    elif boolean_byte == b'\x01':
        return True, end
    raise InvalidBSON('invalid boolean value: %r' % boolean_byte)
Exemplo n.º 21
0
def _elements_to_dict(data, position, obj_end, opts):
    """Decode a BSON document."""
    result = opts.document_class()
    pos = position
    for key, value, pos in _iterate_elements(data, position, obj_end, opts):
        result[key] = value
    if pos != obj_end:
        raise InvalidBSON('bad object or element length')
    return result
Exemplo n.º 22
0
def _get_code_w_scope(data, position, obj_end, opts, element_name):
    """Decode a BSON code_w_scope to bson.code.Code."""
    code_end = position + _UNPACK_INT(data[position:position + 4])[0]
    code, position = _get_string(data, position + 4, code_end, opts,
                                 element_name)
    scope, position = _get_object(data, position, code_end, opts, element_name)
    if position != code_end:
        raise InvalidBSON('scope outside of javascript code boundaries')
    return Code(code, scope), position
Exemplo n.º 23
0
def _get_int(data, position, as_class=None,
             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
             compile_re=True, unsigned=False):
    format = unsigned and "I" or "i"
    try:
        value = struct.unpack("%s" % format, data[position:position + 4])[0]
    except struct.error:
        raise InvalidBSON()
    position += 4
    return value, position
Exemplo n.º 24
0
def _get_code_w_scope(
    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str
) -> Tuple[Code, int]:
    """Decode a BSON code_w_scope to bson.code.Code."""
    code_end = position + _UNPACK_INT_FROM(data, position)[0]
    code, position = _get_string(data, view, position + 4, code_end, opts, element_name)
    scope, position = _get_object(data, view, position, code_end, opts, element_name)
    if position != code_end:
        raise InvalidBSON("scope outside of javascript code boundaries")
    return Code(code, scope), position
Exemplo n.º 25
0
def decode_file_iter(file_obj, as_class=dict, tz_aware=True,
                     uuid_subtype=OLD_UUID_SUBTYPE):
    """Decode bson data from a file to multiple documents as a generator. Works
    similarly to the decode_all function, but reads from the file object in
    chunks and parses bson in chunks, yielding one document at a time.

    :Parameters:
      - `file_obj`: A file object containing BSON data.
      - `as_class` (optional): the class to use for the resulting
        documents
      - `tz_aware` (optional): if ``True``, return timezone-aware
        :class:`~datetime.datetime` instances

    .. versionadded:: 2.5
    """
    while True:
        # Read size of next object.
        size_data = file_obj.read(4)
        if len(size_data) == 0:
            break  # Finished with file normaly.
        elif len(size_data) != 4:
            raise InvalidBSON("cut off in middle of objsize")
        obj_size = struct.unpack("<i", size_data)[0]
        if obj_size < 5:
            # The obj_size should at least be big enough to encode the
            # obj_size and EOO itself, even on a zero-sized elements.
            raise InvalidBSON("objsize too small")

        # Actual data for elements is total size - size_prefix - suffix, but
        # we read the suffix together with the element to reduce number of
        # reads.
        elements_size = obj_size - 4

        # Read object itself and the EOO in one read (to reduce num reads).
        elements = file_obj.read(elements_size)
        if len(elements) != elements_size:
            raise InvalidBSON("objsize too large")
        if elements[-1] != ZERO:
            raise InvalidBSON("bad eoo")

        yield _elements_to_dict(elements[:-1], as_class,
                                tz_aware, uuid_subtype)
Exemplo n.º 26
0
def decode_all(data, as_class=dict,
               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
    """Decode BSON data to multiple documents.

    `data` must be a string of concatenated, valid, BSON-encoded
    documents.

    :Parameters:
      - `data`: BSON data
      - `as_class` (optional): the class to use for the resulting
        documents
      - `tz_aware` (optional): if ``True``, return timezone-aware
        :class:`~datetime.datetime` instances
      - `compile_re` (optional): if ``False``, don't attempt to compile
        BSON regular expressions into Python regular expressions. Return
        instances of :class:`~bson.regex.Regex` instead. Can avoid
        :exc:`~bson.errors.InvalidBSON` errors when receiving
        Python-incompatible regular expressions, for example from ``currentOp``

    .. versionchanged:: 2.7
       Added `compile_re` option.
    .. versionadded:: 1.9
    """
    docs = []
    position = 0
    end = len(data) - 1
    try:
        while position < end:
            obj_size = struct.unpack("i", data[position:position + 4])[0]
            if len(data) - position < obj_size:
                raise InvalidBSON("objsize too large")
            if data[position + obj_size - 1:position + obj_size] != ZERO:
                raise InvalidBSON("bad eoo")
            elements = data[position + 4:position + obj_size - 1]
            position += obj_size
            docs.append(_elements_to_dict(elements, as_class,
                                          tz_aware, uuid_subtype, compile_re))
        return docs
    except InvalidBSON:
        raise
    except Exception as e:
        reraise(InvalidBSON, InvalidBSON(e), sys.exc_info()[2])
Exemplo n.º 27
0
def _get_object(data, position, as_class, tz_aware, uuid_subtype):
    obj_size = struct.unpack("<i", data[position:position + 4])[0]
    if data[position + obj_size - 1:position + obj_size] != ZERO:
        raise InvalidBSON("bad eoo")
    encoded = data[position + 4:position + obj_size - 1]
    object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
    position += obj_size
    if "$ref" in object:
        return (DBRef(object.pop("$ref"), object.pop("$id", None),
                      object.pop("$db", None), object), position)
    return object, position
Exemplo n.º 28
0
def _elements_to_dict(data, view, position, obj_end, opts, result=None):
    """Decode a BSON document into result."""
    if result is None:
        result = opts.document_class()
    end = obj_end - 1
    while position < end:
        key, value, position = _element_to_dict(data, view, position, obj_end, opts)
        result[key] = value
    if position != obj_end:
        raise InvalidBSON('bad object or element length')
    return result
Exemplo n.º 29
0
def _get_boolean(
    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
) -> Tuple[bool, int]:
    """Decode a BSON true/false to python True/False."""
    end = position + 1
    boolean_byte = data[position:end]
    if boolean_byte == b"\x00":
        return False, end
    elif boolean_byte == b"\x01":
        return True, end
    raise InvalidBSON("invalid boolean value: %r" % boolean_byte)
Exemplo n.º 30
0
def _get_c_string(data, position, length=None):
    if length is None:
        try:
            end = data.index(ZERO, position)
        except ValueError:
            raise InvalidBSON()
    else:
        end = position + length
    value = data[position:end].decode("utf-8")
    position = end + 1

    return value, position