Exemplo n.º 1
0
def import_customer(last_pull_date):
    """
    ETL function for import new customer data
    :param last_pull_date: date where retrieve can begin
    :return:
    """
    if not _check_webservice_status():
        logger.error(
            "Web service not available, not ETL integration of Customer")
        return
    url_target = "{0}/find/CustomerRevision".format(__etl_http_link)
    params = dumps({"lastUpdate": {
        "$gt": parse(last_pull_date)
    }},
                   json_options=JSONOptions(datetime_representation=2))

    r = requests.get(url_target,
                     params={"query": params},
                     auth=(__user_etl_mongo, __etl_etl_mongo))

    if r.status_code == 200:
        try:
            for elem in loads(_decompress(r.content)):
                insert_customer(DB_scoped_session, elem, mapping)
            DB_scoped_session.commit()
            logger.info(
                'Successful ETL Customer since {}'.format(last_pull_date))
        except Exception as e:
            logger.error(
                "Error in ETL task (import_customer)({0}): {1}".format(
                    last_pull_date, e))
            DB_scoped_session.rollback()
        finally:
            DB_scoped_session.close()
Exemplo n.º 2
0
def create_tests():
    assert os.path.isdir(_TEST_PATH)
    for dirpath, _, filenames in os.walk(_TEST_PATH):
        dirname = os.path.split(dirpath)[-1]

        for filename in filenames:
            test_type, ext = os.path.splitext(filename)
            if ext != '.json':
                continue

            with open(os.path.join(dirpath, filename)) as scenario_stream:
                opts = JSONOptions(document_class=ScenarioDict)
                scenario_def = json_util.loads(scenario_stream.read(),
                                               json_options=opts)

            # Construct test from scenario.
            for test in scenario_def['tests']:
                test_name = 'test_%s_%s_%s' % (
                    dirname, test_type.replace(
                        "-", "_"), str(test['description'].replace(" ", "_")))

                new_test = create_test(scenario_def, test)
                new_test = env.require(lambda: not test.get('skipReason'),
                                       test.get('skipReason'), new_test)

                if 'secondary' in test_name:
                    new_test = env.require(lambda: env.secondaries,
                                           'No secondaries', new_test)

                # In Python 2, case test_name from unicode to str.
                new_test.__name__ = str(test_name)
                setattr(MotorTransactionTest, new_test.__name__, new_test)
Exemplo n.º 3
0
    def test_json_options_with_options(self):
        opts = JSONOptions(
            datetime_representation=DatetimeRepresentation.NUMBERLONG,
            json_mode=JSONMode.LEGACY)
        self.assertEqual(opts.datetime_representation,
                         DatetimeRepresentation.NUMBERLONG)
        opts2 = opts.with_options(
            datetime_representation=DatetimeRepresentation.ISO8601,
            json_mode=JSONMode.LEGACY)
        self.assertEqual(opts2.datetime_representation,
                         DatetimeRepresentation.ISO8601)

        opts = JSONOptions(strict_number_long=True, json_mode=JSONMode.LEGACY)
        self.assertEqual(opts.strict_number_long, True)
        opts2 = opts.with_options(strict_number_long=False)
        self.assertEqual(opts2.strict_number_long, False)

        opts = json_util.CANONICAL_JSON_OPTIONS
        self.assertNotEqual(opts.uuid_representation,
                            UuidRepresentation.JAVA_LEGACY)
        opts2 = opts.with_options(
            uuid_representation=UuidRepresentation.JAVA_LEGACY)
        self.assertEqual(opts2.uuid_representation,
                         UuidRepresentation.JAVA_LEGACY)
        self.assertEqual(opts2.document_class, dict)
        opts3 = opts2.with_options(document_class=SON)
        self.assertEqual(opts3.uuid_representation,
                         UuidRepresentation.JAVA_LEGACY)
        self.assertEqual(opts3.document_class, SON)
Exemplo n.º 4
0
 def test_numberlong(self):
     jsn = '{"weight": {"$numberLong": "65535"}}'
     self.assertEqual(json_util.loads(jsn)['weight'], Int64(65535))
     self.assertEqual(json_util.dumps({"weight": Int64(65535)}),
                      '{"weight": 65535}')
     json_options = JSONOptions(strict_number_long=True,
                                json_mode=JSONMode.LEGACY)
     self.assertEqual(
         json_util.dumps({"weight": Int64(65535)},
                         json_options=json_options), jsn)
Exemplo n.º 5
0
 def test_uuid(self):
     doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}
     uuid_legacy_opts = LEGACY_JSON_OPTIONS.with_options(
         uuid_representation=UuidRepresentation.PYTHON_LEGACY)
     self.round_trip(doc, json_options=uuid_legacy_opts)
     self.assertEqual(
         '{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}',
         json_util.dumps(doc, json_options=LEGACY_JSON_OPTIONS))
     self.assertEqual(
         '{"uuid": '
         '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
         json_util.dumps(
             doc,
             json_options=STRICT_JSON_OPTIONS.with_options(
                 uuid_representation=UuidRepresentation.PYTHON_LEGACY)))
     self.assertEqual(
         '{"uuid": '
         '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
         json_util.dumps(doc,
                         json_options=JSONOptions(
                             strict_uuid=True,
                             json_mode=JSONMode.LEGACY,
                             uuid_representation=STANDARD)))
     self.assertEqual(
         doc,
         json_util.loads(
             '{"uuid": '
             '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
             json_options=uuid_legacy_opts))
     for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) -
                                 {UuidRepresentation.UNSPECIFIED}):
         options = JSONOptions(strict_uuid=True,
                               json_mode=JSONMode.LEGACY,
                               uuid_representation=uuid_representation)
         self.round_trip(doc, json_options=options)
         # Ignore UUID representation when decoding BSON binary subtype 4.
         self.assertEqual(
             doc,
             json_util.loads(
                 '{"uuid": '
                 '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
                 json_options=options))
Exemplo n.º 6
0
    def write_record(self, obj: dict) -> None:
        """
        Prints the document via a INFO log

        :param obj: dict.
        :return:
        """
        opts = JSONOptions(
            strict_number_long=False,
            datetime_representation=DatetimeRepresentation.ISO8601,
            json_mode=JSONMode.RELAXED)
        obj_str = json_util.dumps(obj)
        logger.info(extra=dict(Func='Record',
                               Op='Tail',
                               Attributes={
                                   'identifier': self.identifier,
                                   'record': obj_str
                               }),
                    msg=obj_str)
Exemplo n.º 7
0
    def assertBinaryUUID(self, val):
        self.assertIsInstance(val, Binary)
        self.assertEqual(val.subtype, UUID_SUBTYPE)


# Location of JSON test files.
BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                    'client-side-encryption')
SPEC_PATH = os.path.join(BASE, 'spec')

OPTS = CodecOptions(uuid_representation=STANDARD)

# Use SON to preserve the order of fields while parsing json. Use tz_aware
# =False to match how CodecOptions decodes dates.
JSON_OPTS = JSONOptions(document_class=SON,
                        uuid_representation=STANDARD,
                        tz_aware=False)


def read(*paths):
    with open(os.path.join(BASE, *paths)) as fp:
        return fp.read()


def json_data(*paths):
    return json_util.loads(read(*paths), json_options=JSON_OPTS)


def bson_data(*paths):
    return encode(json_data(*paths), codec_options=OPTS)
Exemplo n.º 8
0
def handler(event, context):
    """
    Perform a complete backup of a MongoDB database to a S3 bucket. Each collection
    will be stored in a separate JSON file, as output by `mongodump`.
    Required environment values are MONGO_URI, BUCKET_NAME. Optionally
    MONGO_DATABASE, BUCKET_FOLDER and COLLECTION_BLACKLIST can be set.
    """
    collection_blacklist = environ.get("COLLECTION_BLACKLIST")
    bucket_folder = environ.get("BUCKET_FOLDER", "backups")
    bucket_name = environ["BUCKET_NAME"]
    db_uri = environ["MONGO_URI"]

    if environ.get("MONGO_URI_IS_ENCRYPTED"):
        from base64 import b64decode

        kms = boto.client("kms")
        decrypted = kms.decrypt(CiphertextBlob=b64decode(db_uri))
        db_uri = decrypted["Plaintext"].decode()

    db_name = environ.get("MONGO_DATABASE")
    if db_name is None:
        from urllib.parse import urlparse

        loc = urlparse(db_uri)
        db_name = loc.path.strip("/")

    LOGGER.info("Backing up collections from database %s in bucket %s",
                db_name, bucket_name)

    s3.meta.client.head_bucket(
        Bucket=bucket_name)  # Check that the given bucket actually exists.

    client = MongoClient(db_uri)
    database = client.get_database(db_name)

    skip = ([s.strip() for s in collection_blacklist.split(",")]
            if collection_blacklist else [])
    eligible_collections = [
        name for name in database.collection_names() if not name in skip
    ]

    json_options = JSONOptions(
        datetime_representation=DatetimeRepresentation.ISO8601)

    def write_all_docs(collection_name, writer):
        for doc in database.get_collection(collection_name).find():
            writer.write(dumps(doc, json_options=json_options) + "\n")

    for collection_name in eligible_collections:
        if environ.get("IN_MEMORY"):
            writer = StringIO()
            write_all_docs(collection_name, writer)

            s3.Bucket(bucket_name).put_object(
                Body=writer.getvalue().encode(),
                Key="{}/{}.json".format(bucket_folder, collection_name),
            )

        else:
            with open(temp_filepath, "w") as writer:
                write_all_docs(collection_name, writer)

            s3.Bucket(bucket_name).upload_file(
                temp_filepath, "{}/{}.json".format(bucket_folder,
                                                   collection_name))

        LOGGER.info("Done backing up collection {}".format(collection_name))
Exemplo n.º 9
0
import json
import math
from datetime import datetime
from enum import Enum
from typing import Any, Tuple
from uuid import UUID

import numpy as np
from bson import SON, ObjectId
from bson.json_util import JSONOptions, JSONMode, dumps, loads

__all__ = ['json_dumps', 'json_loads']

JSON_OPTIONS = JSONOptions(json_mode=JSONMode.RELAXED)
JSON_OPTIONS.strict_uuid = False  # do not move it to the constructor above!


def _json_convert(o: Any, no_dollar_field: bool = False) -> Any:
    if isinstance(o, bool):
        # special fix: bool is subclass of int, we do not want to convert True
        # to 1, so we need to fix it.
        o = o
    elif isinstance(o, Enum):
        o = _json_convert(o.value, no_dollar_field)
    elif hasattr(o, 'items'):
        o = SON((k, _json_convert(v, no_dollar_field)) for k, v in o.items())
    elif hasattr(o, '__iter__') and not isinstance(o,
                                                   (str, bytes, np.ndarray)):
        o = [_json_convert(v, no_dollar_field) for v in o]
    elif isinstance(o, str):
        o = str(o)
Exemplo n.º 10
0
            "key": "key",
            "endpoint": "example.com"
        }
        encrypter.create_data_key("aws", master_key=master_key)
        self.assertEqual("example.com:443", mock_key_vault.kms_endpoint)


def read(filename, **kwargs):
    with open(os.path.join(DATA_DIR, filename), **kwargs) as fp:
        return fp.read()


OPTS = CodecOptions(uuid_representation=STANDARD)

# Use SON to preserve the order of fields while parsing json.
JSON_OPTS = JSONOptions(document_class=SON, uuid_representation=STANDARD)


def json_data(filename):
    return json_util.loads(read(filename), json_options=JSON_OPTS)


def bson_data(filename):
    return BSON.encode(json_data(filename), codec_options=OPTS)


def http_data(filename):
    data = read(filename, mode='rb')
    return data.replace(b'\n', b'\r\n')

Exemplo n.º 11
0
    def test_datetime(self):
        tz_aware_opts = json_util.DEFAULT_JSON_OPTIONS.with_options(
            tz_aware=True)
        # only millis, not micros
        self.round_trip(
            {"date": datetime.datetime(2009, 12, 9, 15, 49, 45, 191000, utc)},
            json_options=tz_aware_opts)
        self.round_trip(
            {"date": datetime.datetime(2009, 12, 9, 15, 49, 45, 191000)})

        for jsn in [
                '{"dt": { "$date" : "1970-01-01T00:00:00.000+0000"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000000+0000"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000+00:00"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000000+00:00"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000000+00"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000Z"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000000Z"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00Z"}}',
                '{"dt": {"$date": "1970-01-01T00:00:00.000"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00"}}',
                '{"dt": { "$date" : "1970-01-01T00:00:00.000000"}}',
                '{"dt": { "$date" : "1969-12-31T16:00:00.000-0800"}}',
                '{"dt": { "$date" : "1969-12-31T16:00:00.000000-0800"}}',
                '{"dt": { "$date" : "1969-12-31T16:00:00.000-08:00"}}',
                '{"dt": { "$date" : "1969-12-31T16:00:00.000000-08:00"}}',
                '{"dt": { "$date" : "1969-12-31T16:00:00.000000-08"}}',
                '{"dt": { "$date" : "1970-01-01T01:00:00.000+0100"}}',
                '{"dt": { "$date" : "1970-01-01T01:00:00.000000+0100"}}',
                '{"dt": { "$date" : "1970-01-01T01:00:00.000+01:00"}}',
                '{"dt": { "$date" : "1970-01-01T01:00:00.000000+01:00"}}',
                '{"dt": { "$date" : "1970-01-01T01:00:00.000000+01"}}'
        ]:
            self.assertEqual(
                EPOCH_AWARE,
                json_util.loads(jsn, json_options=tz_aware_opts)["dt"])
            self.assertEqual(EPOCH_NAIVE, json_util.loads(jsn)["dt"])

        dtm = datetime.datetime(1, 1, 1, 1, 1, 1, 0, utc)
        jsn = '{"dt": {"$date": -62135593139000}}'
        self.assertEqual(
            dtm,
            json_util.loads(jsn, json_options=tz_aware_opts)["dt"])
        jsn = '{"dt": {"$date": {"$numberLong": "-62135593139000"}}}'
        self.assertEqual(
            dtm,
            json_util.loads(jsn, json_options=tz_aware_opts)["dt"])

        # Test dumps format
        pre_epoch = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000, utc)}
        post_epoch = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc)}
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch))
        self.assertEqual('{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}',
                         json_util.dumps(post_epoch))
        self.assertEqual(
            '{"dt": {"$date": -62135593138990}}',
            json_util.dumps(pre_epoch, json_options=LEGACY_JSON_OPTIONS))
        self.assertEqual(
            '{"dt": {"$date": 63075661010}}',
            json_util.dumps(post_epoch, json_options=LEGACY_JSON_OPTIONS))
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch, json_options=STRICT_JSON_OPTIONS))
        self.assertEqual(
            '{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}',
            json_util.dumps(post_epoch, json_options=STRICT_JSON_OPTIONS))

        number_long_options = JSONOptions(
            datetime_representation=DatetimeRepresentation.NUMBERLONG,
            json_mode=JSONMode.LEGACY)
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "63075661010"}}}',
            json_util.dumps(post_epoch, json_options=number_long_options))
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch, json_options=number_long_options))

        # ISO8601 mode assumes naive datetimes are UTC
        pre_epoch_naive = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000)}
        post_epoch_naive = {
            "dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000)
        }
        self.assertEqual(
            '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
            json_util.dumps(pre_epoch_naive, json_options=STRICT_JSON_OPTIONS))
        self.assertEqual(
            '{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}',
            json_util.dumps(post_epoch_naive,
                            json_options=STRICT_JSON_OPTIONS))

        # Test tz_aware and tzinfo options
        self.assertEqual(
            datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc),
            json_util.loads(
                '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
                json_options=tz_aware_opts)["dt"])
        self.assertEqual(
            datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc),
            json_util.loads(
                '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
                json_options=JSONOptions(tz_aware=True, tzinfo=utc))["dt"])
        self.assertEqual(
            datetime.datetime(1972, 1, 1, 1, 1, 1, 10000),
            json_util.loads(
                '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
                json_options=JSONOptions(tz_aware=False))["dt"])
        self.round_trip(pre_epoch_naive,
                        json_options=JSONOptions(tz_aware=False))

        # Test a non-utc timezone
        pacific = FixedOffset(-8 * 60, 'US/Pacific')
        aware_datetime = {
            "dt": datetime.datetime(2002, 10, 27, 6, 0, 0, 10000, pacific)
        }
        self.assertEqual(
            '{"dt": {"$date": "2002-10-27T06:00:00.010-0800"}}',
            json_util.dumps(aware_datetime, json_options=STRICT_JSON_OPTIONS))
        self.round_trip(aware_datetime,
                        json_options=JSONOptions(json_mode=JSONMode.LEGACY,
                                                 tz_aware=True,
                                                 tzinfo=pacific))
        self.round_trip(
            aware_datetime,
            json_options=JSONOptions(
                datetime_representation=DatetimeRepresentation.ISO8601,
                json_mode=JSONMode.LEGACY,
                tz_aware=True,
                tzinfo=pacific))
Exemplo n.º 12
0
                         USER_DEFINED_SUBTYPE, UuidRepresentation, STANDARD)
from bson.code import Code
from bson.dbref import DBRef
from bson.int64 import Int64
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
from bson.tz_util import FixedOffset, utc

from test import unittest, IntegrationTest

STRICT_JSON_OPTIONS = JSONOptions(
    strict_number_long=True,
    datetime_representation=DatetimeRepresentation.ISO8601,
    strict_uuid=True,
    json_mode=JSONMode.LEGACY)


class TestJsonUtil(unittest.TestCase):
    def round_tripped(self, doc, **kwargs):
        return json_util.loads(json_util.dumps(doc, **kwargs), **kwargs)

    def round_trip(self, doc, **kwargs):
        self.assertEqual(doc, self.round_tripped(doc, **kwargs))

    def test_basic(self):
        self.round_trip({"hello": "world"})

    def test_json_options_with_options(self):
Exemplo n.º 13
0
 def get_formatted_params(self, params):
     return dumps(params,
                  json_options=JSONOptions(datetime_representation=2))
Exemplo n.º 14
0
def import_consolidation(serial_number, last_pull_date):
    """
    ETL function for import new consolidation data
    :param serial_number: serial number to retrieve
    :param last_pull_date: date where retrieve can begin
    :return:
    """
    if not _check_webservice_status():
        logger.error(
            "Web service not available, not ETL integration of consolidation for {}"
            .format(serial_number))
        return
    launch_purge = False
    url_target = "{0}/aggregate/IBAutomation".format(__etl_http_link)
    params = dumps([{
        "$unwind": "$data.resultRows"
    }, {
        "$match": {
            "data.serialNumber": serial_number,
            "data.dwibInformationType": {
                "$in": [
                    "EltXAConsolidation", "EltMGConsolidation",
                    "EltCTConsolidation", "EltRFConsolidation"
                ]
            },
            "receptionDate": {
                "$gt": parse(last_pull_date)
            }
        }
    }],
                   json_options=JSONOptions(datetime_representation=2))

    r = requests.get(url_target,
                     params={"query": params},
                     auth=(__user_etl_mongo, __etl_etl_mongo))

    if r.status_code == 200:
        try:
            _list_to_insert = []
            for elem in loads(_decompress(r.content)):
                # upsert_consolidation(DB_scoped_session, elem, mapping)
                _list_to_insert.append(construct_consolidation(elem, mapping))

            DB_scoped_session.bulk_save_objects(_list_to_insert)
            if DB_scoped_session.query(Customer).filter(
                    Customer.serial_number == serial_number, Customer.is_last
                    == True).count() != 0:
                DB_scoped_session.query(Customer).filter(
                    Customer.serial_number == serial_number,
                    Customer.is_last == True).update({"is_monitored": 1})
            DB_scoped_session.commit()
            launch_purge = True
            logger.info(
                'Successful ETL consolidation for {0} since {1}'.format(
                    serial_number, last_pull_date))
        except Exception as e:
            logger.error(
                "Error in ETL task (import_consolidation)({0}-{1}): {2}".
                format(serial_number, last_pull_date, e))
            DB_scoped_session.rollback()
        finally:
            DB_scoped_session.close()

    if launch_purge and __ACTIVATED_PURGE:
        url_target = "{0}/delete/IBAutomation".format(__etl_http_link)
        params = dumps(
            {
                "data.serialNumber": serial_number,
                "data.dwibInformationType": {
                    "$in": [
                        "EltXAConsolidation", "EltMGConsolidation",
                        "EltCTConsolidation", "EltRFConsolidation"
                    ]
                },
                "receptionDate": {
                    "$lt": parse(last_pull_date)
                }
            },
            json_options=JSONOptions(datetime_representation=2))

        r = requests.delete(url_target,
                            params={"query": params},
                            auth=(__user_etl_mongo, __etl_etl_mongo))

        logger.warning(
            "ETL consolidation purge task ->status_code {0} : {1}".format(
                r.status_code, r.content))
Exemplo n.º 15
0
            "key": "key",
            "endpoint": "example.com"
        }
        encrypter.create_data_key("aws", master_key=master_key)
        self.assertEqual("example.com:443", mock_key_vault.kms_endpoint)


def read(filename, **kwargs):
    with open(os.path.join(DATA_DIR, filename), **kwargs) as fp:
        return fp.read()


OPTS = CodecOptions(uuid_representation=UuidRepresentation.UNSPECIFIED)

# Use SON to preserve the order of fields while parsing json.
JSON_OPTS = JSONOptions(document_class=SON,
                        uuid_representation=UuidRepresentation.UNSPECIFIED)


def json_data(filename):
    return json_util.loads(read(filename), json_options=JSON_OPTS)


def bson_data(filename):
    return bson.encode(json_data(filename), codec_options=OPTS)


def http_data(filename):
    data = read(filename, mode='rb')
    return data.replace(b'\n', b'\r\n')