def test_uuid(self):
     doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}
     self.round_trip(doc)
     self.assertEqual(
         '{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}',
         json_util.dumps(doc))
     self.assertEqual(
         '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
         json_util.dumps(doc, json_options=json_util.STRICT_JSON_OPTIONS))
     self.assertEqual(
         '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
         json_util.dumps(doc,
                         json_options=json_util.JSONOptions(
                             strict_uuid=True,
                             uuid_representation=STANDARD)))
     self.assertEqual(
         doc,
         json_util.loads(
             '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}'
         ))
     self.assertEqual(
         doc,
         json_util.loads(
             '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}'
         ))
     self.round_trip(doc,
                     json_options=json_util.JSONOptions(
                         strict_uuid=True, uuid_representation=JAVA_LEGACY))
     self.round_trip(doc,
                     json_options=json_util.JSONOptions(
                         strict_uuid=True,
                         uuid_representation=CSHARP_LEGACY))
Exemplo n.º 2
0
 def test_uuid(self):
     doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}
     self.round_trip(doc)
     self.assertEqual(
         '{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}',
         json_util.dumps(doc))
     self.assertEqual(
         '{"uuid": '
         '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
         json_util.dumps(
             doc, json_options=json_util.STRICT_JSON_OPTIONS))
     self.assertEqual(
         '{"uuid": '
         '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
         json_util.dumps(
             doc, json_options=json_util.JSONOptions(
                 strict_uuid=True, uuid_representation=STANDARD)))
     self.assertEqual(
         doc, json_util.loads(
             '{"uuid": '
             '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}'))
     for uuid_representation in ALL_UUID_REPRESENTATIONS:
         options = json_util.JSONOptions(
             strict_uuid=True, uuid_representation=uuid_representation)
         self.round_trip(doc, json_options=options)
         # Ignore UUID representation when decoding BSON binary subtype 4.
         self.assertEqual(doc, json_util.loads(
             '{"uuid": '
             '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
             json_options=options))
Exemplo n.º 3
0
 def test_loads_document_class(self):
     # document_class dict should always work
     self.assertEqual({"foo": "bar"}, json_util.loads(
         '{"foo": "bar"}',
         json_options=json_util.JSONOptions(document_class=dict)))
     self.assertEqual(SON([("foo", "bar"), ("b", 1)]), json_util.loads(
         '{"foo": "bar", "b": 1}',
         json_options=json_util.JSONOptions(document_class=SON)))
 def test_loads_document_class(self):
     # document_class dict should always work
     self.assertEqual({"foo": "bar"}, json_util.loads(
         '{"foo": "bar"}',
         json_options=json_util.JSONOptions(document_class=dict)))
     if PY26 and not HAS_SIMPLE_JSON:
         self.assertRaises(
             ConfigurationError, json_util.JSONOptions, document_class=SON)
     else:
         self.assertEqual(SON([("foo", "bar"), ("b", 1)]), json_util.loads(
             '{"foo": "bar", "b": 1}',
             json_options=json_util.JSONOptions(document_class=SON)))
Exemplo n.º 5
0
    def create_tests(self):
        for dirpath, _, filenames in os.walk(self.test_path):
            dirname = os.path.split(dirpath)[-1]

            for filename in filenames:
                with open(os.path.join(dirpath, filename)) as scenario_stream:
                    # Use tz_aware=False to match how CodecOptions decodes
                    # dates.
                    opts = json_util.JSONOptions(tz_aware=False)
                    scenario_def = ScenarioDict(
                        json_util.loads(scenario_stream.read(),
                                        json_options=opts))

                test_type = os.path.splitext(filename)[0]

                # Construct test from scenario.
                for test_def in self.tests(scenario_def):
                    test_name = 'test_%s_%s_%s' % (
                        dirname, test_type.replace("-", "_").replace('.', '_'),
                        str(test_def['description'].replace(" ", "_").replace(
                            '.', '_')))

                    new_test = self._create_test(scenario_def, test_def,
                                                 test_name)
                    new_test = self._ensure_min_max_server_version(
                        scenario_def, new_test)
                    new_test = self.ensure_run_on(scenario_def, new_test)

                    new_test.__name__ = test_name
                    setattr(self._test_class, new_test.__name__, new_test)
    def test_uuid_uuid_rep_unspecified(self):
        _uuid = uuid.uuid4()
        options = json_util.JSONOptions(
            strict_uuid=True,
            uuid_representation=UuidRepresentation.UNSPECIFIED)

        # Cannot directly encode native UUIDs with UNSPECIFIED.
        doc = {'uuid': _uuid}
        with self.assertRaises(ValueError):
            json_util.dumps(doc, json_options=options)

        # All UUID subtypes are decoded as Binary with UNSPECIFIED.
        # subtype 3
        doc = {'uuid': Binary(_uuid.bytes, subtype=3)}
        ext_json_str = json_util.dumps(doc)
        self.assertEqual(
            doc, json_util.loads(ext_json_str, json_options=options))
        # subtype 4
        doc = {'uuid': Binary(_uuid.bytes, subtype=4)}
        ext_json_str = json_util.dumps(doc)
        self.assertEqual(
            doc, json_util.loads(ext_json_str, json_options=options))
        # $uuid-encoded fields
        doc = {'uuid': Binary(_uuid.bytes, subtype=4)}
        ext_json_str = json_util.dumps({'uuid': _uuid})
        self.assertEqual(
            doc, json_util.loads(ext_json_str, json_options=options))
def generate_test_classes(test_path,
                          module=__name__,
                          class_name_prefix='',
                          expected_failures=[],
                          bypass_test_generation_errors=False,
                          **kwargs):
    """Method for generating test classes. Returns a dictionary where keys are
    the names of test classes and values are the test class objects."""
    test_klasses = {}

    def test_base_class_factory(test_spec):
        """Utility that creates the base class to use for test generation.
        This is needed to ensure that cls.TEST_SPEC is appropriately set when
        the metaclass __init__ is invoked."""
        class SpecTestBase(with_metaclass(UnifiedSpecTestMeta)):
            TEST_SPEC = test_spec
            EXPECTED_FAILURES = expected_failures

        return SpecTestBase

    for dirpath, _, filenames in os.walk(test_path):
        dirname = os.path.split(dirpath)[-1]

        for filename in filenames:
            fpath = os.path.join(dirpath, filename)
            with open(fpath) as scenario_stream:
                # Use tz_aware=False to match how CodecOptions decodes
                # dates.
                opts = json_util.JSONOptions(tz_aware=False)
                scenario_def = json_util.loads(scenario_stream.read(),
                                               json_options=opts)

            test_type = os.path.splitext(filename)[0]
            snake_class_name = 'Test%s_%s_%s' % (
                class_name_prefix, dirname.replace(
                    '-', '_'), test_type.replace('-', '_').replace('.', '_'))
            class_name = snake_to_camel(snake_class_name)

            try:
                schema_version = Version.from_string(
                    scenario_def['schemaVersion'])
                mixin_class = _SCHEMA_VERSION_MAJOR_TO_MIXIN_CLASS.get(
                    schema_version[0])
                if mixin_class is None:
                    raise ValueError(
                        "test file '%s' has unsupported schemaVersion '%s'" %
                        (fpath, schema_version))
                module_dict = {'__module__': module}
                module_dict.update(kwargs)
                test_klasses[class_name] = type(class_name, (
                    mixin_class,
                    test_base_class_factory(scenario_def),
                ), module_dict)
            except Exception:
                if bypass_test_generation_errors:
                    continue
                raise

    return test_klasses
 def test_numberlong(self):
     jsn = '{"weight": {"$numberLong": "65535"}}'
     self.assertEqual(json_util.loads(jsn)['weight'], Int64(65535))
     self.assertEqual(json_util.dumps({"weight": Int64(65535)}),
                      '{"weight": 65535}')
     json_options = json_util.JSONOptions(strict_number_long=True)
     self.assertEqual(
         json_util.dumps({"weight": Int64(65535)},
                         json_options=json_options), jsn)
Exemplo n.º 9
0
def showMessagesJson():
    lastMessages = findLastMessages()
    for message in lastMessages:
        del message['_id']

    return Response(json_util.dumps(
        lastMessages,
        json_options=json_util.JSONOptions(
            datetime_representation=json_util.DatetimeRepresentation.ISO8601)),
                    mimetype='application/json')
Exemplo n.º 10
0
from pymongo import MongoClient
from bson import json_util
import bson 
from bson.codec_options import CodecOptions

# Generate test data from this matrix of axes.
axes = [
    ("kms", [ "aws", "local" ]),
    ("type", [ "double", "string", "object", "array", "binData=00", "binData=04", "undefined", "objectId", "bool", "date", "null", "regex", "dbPointer", "javascript", "symbol", "javascriptWithScope", "int", "timestamp", "long", "decimal", "minKey", "maxKey" ]),
    ("algo", [ "rand", "det" ]),
    ("method", [ "auto", "explicit" ]),
    ("identifier", [ "id", "altname" ])
]

codec_options = CodecOptions(uuid_representation=bson.binary.STANDARD)
json_options = json_util.JSONOptions(json_mode=json_util.JSONMode.CANONICAL, uuid_representation=bson.binary.STANDARD)

def allowed(map):
    if map["type"] in ["undefined", "minKey", "maxKey", "null"]:
        return False
    if map["type"] in ["object", "array", "double", "decimal", "javascriptWithScope", "bool"]  and map["algo"] == "det":
        return False
    if map["algo"] == "det" and map["identifier"] == "altname" and map["method"] == "auto":
        # prohibited per SERVER-42010
        return False
    return True

def gen_schema (map):
    fmt = """ "%s" : { "bsonType": "object", "properties": { "value": { "encrypt": { "keyId": %s, "algorithm": "%s", "bsonType": "%s" } } } } """

    if not allowed(map):
Exemplo n.º 11
0
    def test_datetime(self):
        # only millis, not micros
        self.round_trip({"date": datetime.datetime(2009, 12, 9, 15,
                                                   49, 45, 191000, utc)})

        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000+0000"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000000+0000"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000+00:00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000000+00:00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000000+00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000Z"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000000Z"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        # No explicit offset
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T00:00:00.000000"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        # Localtime behind UTC
        jsn = '{"dt": { "$date" : "1969-12-31T16:00:00.000-0800"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1969-12-31T16:00:00.000000-0800"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1969-12-31T16:00:00.000-08:00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1969-12-31T16:00:00.000000-08:00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1969-12-31T16:00:00.000000-08"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        # Localtime ahead of UTC
        jsn = '{"dt": { "$date" : "1970-01-01T01:00:00.000+0100"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T01:00:00.000000+0100"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T01:00:00.000+01:00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T01:00:00.000000+01:00"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])
        jsn = '{"dt": { "$date" : "1970-01-01T01:00:00.000000+01"}}'
        self.assertEqual(EPOCH_AWARE, json_util.loads(jsn)["dt"])

        dtm = datetime.datetime(1, 1, 1, 1, 1, 1, 0, utc)
        jsn = '{"dt": {"$date": -62135593139000}}'
        self.assertEqual(dtm, json_util.loads(jsn)["dt"])
        jsn = '{"dt": {"$date": {"$numberLong": "-62135593139000"}}}'
        self.assertEqual(dtm, json_util.loads(jsn)["dt"])

        # Test dumps format
        pre_epoch = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000, utc)}
        post_epoch = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc)}
        self.assertEqual(
            '{"dt": {"$date": -62135593138990}}',
            json_util.dumps(pre_epoch))
        self.assertEqual(
            '{"dt": {"$date": 63075661010}}',
            json_util.dumps(post_epoch))
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch, json_options=STRICT_JSON_OPTIONS))
        self.assertEqual(
            '{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}',
            json_util.dumps(post_epoch, json_options=STRICT_JSON_OPTIONS))

        number_long_options = json_util.JSONOptions(
            datetime_representation=DatetimeRepresentation.NUMBERLONG)
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "63075661010"}}}',
            json_util.dumps(post_epoch, json_options=number_long_options))
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch, json_options=number_long_options))

        # ISO8601 mode assumes naive datetimes are UTC
        pre_epoch_naive = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000)}
        post_epoch_naive = {
            "dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000)}
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch_naive, json_options=STRICT_JSON_OPTIONS))
        self.assertEqual(
            '{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}',
            json_util.dumps(post_epoch_naive,
                            json_options=STRICT_JSON_OPTIONS))

        # Test tz_aware and tzinfo options
        self.assertEqual(
            datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc),
            json_util.loads(
                '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}')["dt"])
        self.assertEqual(
            datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc),
            json_util.loads(
                '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
                json_options=json_util.JSONOptions(tz_aware=True,
                                                   tzinfo=utc))["dt"])
        self.assertEqual(
            datetime.datetime(1972, 1, 1, 1, 1, 1, 10000),
            json_util.loads(
                '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
                json_options=json_util.JSONOptions(tz_aware=False))["dt"])
        self.round_trip(pre_epoch_naive, json_options=json_util.JSONOptions(
            tz_aware=False))

        # Test a non-utc timezone
        pacific = FixedOffset(-8 * 60, 'US/Pacific')
        aware_datetime = {"dt": datetime.datetime(2002, 10, 27, 6, 0, 0, 10000,
                                                  pacific)}
        self.assertEqual(
            '{"dt": {"$date": "2002-10-27T06:00:00.010-0800"}}',
            json_util.dumps(aware_datetime, json_options=STRICT_JSON_OPTIONS))
        self.round_trip(aware_datetime, json_options=json_util.JSONOptions(
            tz_aware=True, tzinfo=pacific))
        self.round_trip(aware_datetime, json_options=json_util.JSONOptions(
            datetime_representation=DatetimeRepresentation.ISO8601,
            tz_aware=True, tzinfo=pacific))
Exemplo n.º 12
0
                                _SENSITIVE_COMMANDS)
from pymongo.read_concern import ReadConcern
from pymongo.read_preferences import ReadPreference
from pymongo.results import BulkWriteResult
from pymongo.server_api import ServerApi
from pymongo.write_concern import WriteConcern

from test import client_context, unittest, IntegrationTest
from test.utils import (camel_to_snake, rs_or_single_client, single_client,
                        snake_to_camel)

from test.version import Version
from test.utils import (camel_to_snake_args, parse_collection_options,
                        parse_spec_options, prepare_spec_arguments)

JSON_OPTS = json_util.JSONOptions(tz_aware=False)


def with_metaclass(meta, *bases):
    """Create a base class with a metaclass.

    Vendored from six: https://github.com/benjaminp/six/blob/master/six.py
    """

    # This requires a bit of explanation: the basic idea is to make a dummy
    # metaclass for one level of class instantiation that replaces itself with
    # the actual metaclass.
    class metaclass(type):
        def __new__(cls, name, this_bases, d):
            if sys.version_info[:2] >= (3, 7):
                # This version introduced PEP 560 that requires a bit
Exemplo n.º 13
0
        self.insert_log_q = "insert into %s (spider, create_time, log_count, items, pages) \
                                        values (?, ?, ?, ?, ?); " % table

    def insert_log(self, create_time, spider, log_count, pages, items):
        self.conn.execute(q, (spider, create_time, log_count, items, pages))
        self.conn.commit()

    def get_all_stats(self, st, et):
        pass

    def get_stats(self, st, et, spider):
        pass


json_options = json_util.JSONOptions(
    tz_aware=False,
    datetime_representation=json_util.DatetimeRepresentation.ISO8601)


def encode(obj, json_options=json_options):
    return sqlite3.Binary(
        json_util.dumps(obj, json_options=json_options).encode('ascii'))


def decode(obj, json_options=json_options):
    return json_util.loads(bytes(obj).decode('ascii'),
                           json_options=json_options)


class JsonSqliteList(object):
    """SQLite-backed list"""
Exemplo n.º 14
0
_DEPRECATED_BSON_TYPES = {
    # Symbol
    '0x0E': text_type,
    # Undefined
    '0x06': type(None),
    # DBPointer
    '0x0C': DBRef
}

# Need to set tz_aware=True in order to use "strict" dates in extended JSON.
codec_options = CodecOptions(tz_aware=True, document_class=SON)
# We normally encode UUID as binary subtype 0x03,
# but we'll need to encode to subtype 0x04 for one of the tests.
codec_options_uuid_04 = codec_options._replace(uuid_representation=STANDARD)
json_options_uuid_04 = json_util.JSONOptions(json_mode=JSONMode.CANONICAL,
                                             uuid_representation=STANDARD)
json_options_iso8601 = json_util.JSONOptions(
    datetime_representation=json_util.DatetimeRepresentation.ISO8601)
to_extjson = functools.partial(json_util.dumps,
                               json_options=json_util.CANONICAL_JSON_OPTIONS)
to_extjson_uuid_04 = functools.partial(json_util.dumps,
                                       json_options=json_options_uuid_04)
to_extjson_iso8601 = functools.partial(json_util.dumps,
                                       json_options=json_options_iso8601)
to_relaxed_extjson = functools.partial(
    json_util.dumps, json_options=json_util.RELAXED_JSON_OPTIONS)
to_bson_uuid_04 = functools.partial(BSON.encode,
                                    codec_options=codec_options_uuid_04)
to_bson = functools.partial(BSON.encode, codec_options=codec_options)
decode_bson = lambda bbytes: BSON(bbytes).decode(codec_options=codec_options)
if json_util._HAS_OBJECT_PAIRS_HOOK:
Exemplo n.º 15
0
    # Symbol
    '0x0E': text_type,
    # Undefined
    '0x06': type(None),
    # DBPointer
    '0x0C': DBRef
}

# Need to set tz_aware=True in order to use "strict" dates in extended JSON.
codec_options = CodecOptions(tz_aware=True, document_class=SON)
# We normally encode UUID as binary subtype 0x03,
# but we'll need to encode to subtype 0x04 for one of the tests.
codec_options_uuid_04 = codec_options._replace(uuid_representation=STANDARD)
json_options_uuid_04 = json_util.JSONOptions(
    strict_number_long=True,
    strict_uuid=True,
    datetime_representation=json_util.DatetimeRepresentation.NUMBERLONG,
    uuid_representation=STANDARD)
json_options_iso8601 = json_util.JSONOptions(
    datetime_representation=json_util.DatetimeRepresentation.ISO8601)
to_extjson = functools.partial(json_util.dumps,
                               json_options=json_util.CANONICAL_JSON_OPTIONS)
to_extjson_uuid_04 = functools.partial(json_util.dumps,
                                       json_options=json_options_uuid_04)
to_extjson_iso8601 = functools.partial(json_util.dumps,
                                       json_options=json_options_iso8601)
to_bson_uuid_04 = functools.partial(BSON.encode,
                                    codec_options=codec_options_uuid_04)
to_bson = functools.partial(BSON.encode, codec_options=codec_options)
decode_bson = lambda bbytes: BSON(bbytes).decode(codec_options=codec_options)
if json_util._HAS_OBJECT_PAIRS_HOOK:
Exemplo n.º 16
0
from pymongo import MongoClient
import bson
from bson import json_util

json_options = json_util.JSONOptions(
    uuid_representation=bson.binary.STANDARD)

codec_options = json_util.CodecOptions(
    uuid_representation=bson.binary.STANDARD)

validator = """{
    "$jsonSchema": {
        "properties": {
            "ssn": {
                "encrypt": {
                    "keyId": [{
                        "$binary": {
                            "base64": "AAAAAAAAAAAAAAAAAAAAAA==",
                            "subType": "04"
                        }
                    }],
                    "bsonType": "string",
                    "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic",
                    "initializationVector": {
                            "$binary": {
                                "base64": "aWlpaWlpaWlpaWlpaWlpaQ==",
                                "subType": "00"
                            }
                    }
                }
            }
Exemplo n.º 17
0
    '0x0E': text_type,
    # Undefined
    '0x06': type(None),
    # DBPointer
    '0x0C': DBRef
}


class TestBSONCorpus(unittest.TestCase):
    pass


# Need to set tz_aware=True in order to use "strict" dates in extended JSON.
codec_options = CodecOptions(tz_aware=True)
json_options = json_util.JSONOptions(
    strict_number_long=True,
    strict_uuid=True,
    datetime_representation=json_util.DatetimeRepresentation.NUMBERLONG)
# We normally encode UUID as binary subtype 0x03,
# but we'll need to encode to subtype 0x04 for one of the tests.
codec_options_uuid_04 = codec_options._replace(uuid_representation=STANDARD)
json_options_uuid_04 = json_util.JSONOptions(
    strict_number_long=True,
    strict_uuid=True,
    datetime_representation=json_util.DatetimeRepresentation.NUMBERLONG,
    uuid_representation=STANDARD)
json_options_iso8601 = json_util.JSONOptions(
    datetime_representation=json_util.DatetimeRepresentation.ISO8601)
to_extjson = functools.partial(json_util.dumps, json_options=json_options)
to_extjson_uuid_04 = functools.partial(json_util.dumps,
                                       json_options=json_options_uuid_04)
to_extjson_iso8601 = functools.partial(json_util.dumps,
Exemplo n.º 18
0
def read_rss(source_site, all_urls_for_site):
    try:
        for rssLink in all_urls_for_site:
            # get list of articles already processed and saved
            id_list_from_file = dataFileOps.get_article_id_from_file()
            article_id_list = list()

            # id_list_from_file = dataFileOps.get_id_list_from_data_file(rssLink['site'])
            filename_ts = strftime("_%Y_%m_%d_%H_%M", gmtime())

            entries_list = list()
            entry_dict = dict()
            d = feedparser.parse(rssLink)

            for entry in d.entries:

                # author not present in TOI
                # entry_dict['author'] = entry['author']
                entry_dict['title'] = entry['title']
                entry_dict['summary'] = entry['summary']
                entry_dict['link'] = entry['link']
                entry_dict['source_site'] = source_site
                entry_dict['_id'] = get_article_id(entry_dict['source_site'],
                                                   entry_dict['link'])

                article_details_dict = None

                # check if execution on Rasp Pi or not.
                # If yes, do file ops.
                # Else do DB ops.
                # if os.uname()[4].startswith("arm"):
                # check if ID already present in db. If yes, do not scrap URL and upsert
                if os.uname()[4].startswith("arm"):
                    if entry_dict['_id'] not in id_list_from_file:
                        print('datafile parse needed: ' + entry_dict['_id'])

                        article_details_dict = get_article_details(
                            source_site, entry_dict['link'],
                            entry['published'])
                        entry_dict['articleText'] = article_details_dict[
                            'articleText']
                        # entry_dict['published'] = article_details_dict['publishTimeUTC']

                        #fix needed for timezone conversion. now storing as GMT?
                        temp_string = json_util.dumps(
                            article_details_dict['publishTimeUTC'],
                            json_options=json_util.JSONOptions(tz_aware=True))
                        #convert string to dict and store
                        entry_dict['published'] = ast.literal_eval(temp_string)
                        entry_dict['logInsertTime'] = ast.literal_eval(
                            json_util.dumps(datetime.datetime.utcnow(),
                                            json_options=json_util.JSONOptions(
                                                tz_aware=True)))
                        entry_dict['dataInsertType'] = 'fileImportToDB'

                        #build list of dicts to be written to JSON file
                        temp_dict = copy.deepcopy(entry_dict)
                        entries_list.append(temp_dict)

                        article_id_list.append(temp_dict['_id'])

                elif not os.uname()[4].startswith("arm"):
                    if not dbOps.checkDBforId(entry_dict['_id']):
                        print('mongo parse needed: ' + entry_dict['_id'])

                        article_details_dict = get_article_details(
                            source_site, entry_dict['link'],
                            entry['published'])
                        entry_dict['articleText'] = article_details_dict[
                            'articleText']
                        entry_dict['published'] = article_details_dict[
                            'publishTimeUTC']
                        entry_dict['dataInsertType'] = 'directWriteToDB'
                        #write each dict to DB. Consider bulk writing a list
                        dbOps.upsertToDB(entry_dict)

            # if list not empty, write to file
            if len(entries_list) > 0:
                dataFileOps.write_to_data_file(entries_list, filename_ts)

            #update article_id file for lookup in next run
            dataFileOps.file_update_article_id(article_id_list)
    except Exception as e:
        print('error caught in read_rss')
Exemplo n.º 19
0
def __dumps(obj, *args, **kwargs):
    json_options = kwargs.pop("json_options", json_util.JSONOptions(json_mode=json_util.JSONMode.RELAXED))
    return __old_dumps(obj, json_options=json_options, *args, **kwargs)
Exemplo n.º 20
0
 def set_default_json_options():
     # how should we handle datetimes? This forces non timezone aware datetimes
     # TODO: Either throw exception when a tz aware datetime is received, or handle both correctly
     res = json_util.DEFAULT_JSON_OPTIONS = json_util.JSONOptions(tz_aware=False)
     return res