Пример #1
0
    def test_transfer(self):
        # drop/truncate table
        # create schema
        # transfers collections
        cursor = pg.conn.cursor()

        # reset CM in the database
        cursor.execute(query["table_drop_purr_cm"])
        cursor.execute(query["table_drop_company"])
        cursor.execute(query["table_drop_employee"])
        create_and_populate_company_mdb()
        cm.create_table(pg, mock.coll_config)

        # collection which will be transferred
        collection_names = mock.coll_names

        coll_config = copy.deepcopy(mock.coll_config_company_employee)
        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)
        ex.transfer(collection_names)
        for i in range(len(mock.rel_names)):
            relation = mock.rel_names[i]
            cursor.execute("SELECT count(*) FROM %s" % (relation))
            cnt_pg = cursor.fetchone()
            cnt_mongo = mongo[collection_names[i]].count()
            if cnt_mongo != cnt_pg[0]:
                print("Postgres:", cnt_pg[0])
                print("MongoDB:", cnt_mongo)
                assert False

        cursor.close()
        del ex

        assert True
Пример #2
0
    def test_transfer_coll(self):
        # TODO: check

        cursor = pg.conn.cursor()

        # reset CM in the database
        cursor.execute(query["table_drop_purr_cm"])
        cursor.execute(query["table_drop_company"])
        cursor.execute(query["table_drop_employee"])
        create_and_populate_company_mdb()
        cm.create_table(pg, mock.coll_config)

        # collection which will be transferred
        collection = mock.coll_names[0]
        relation = mock.rel_names[0]

        coll_config = copy.deepcopy(mock.coll_config_company_employee)
        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)
        ex.transfer_coll(collection)
        cursor.execute("SELECT count(*) FROM %s" % (relation))
        cnt_pg = cursor.fetchone()
        cnt_mongo = mongo[collection].count()
        print("Postgres:", cnt_pg[0])
        print("MongoDB:", cnt_mongo)

        cursor.close()
        del ex
        assert cnt_mongo == cnt_pg[0]
Пример #3
0
    def test_update_coll_map_changed(self):

        # collection map is changed and
        # extractor.coll_map_cur needs to be updated

        # purr_collection_map needs old values
        cursor = pg.conn.cursor()
        cursor.execute(query["table_drop_purr_cm"])

        # create table for CM in the database
        cm.create_table(pg, mock.coll_config)

        coll_config = copy.deepcopy(mock.coll_config_company_employee)

        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)

        # this pulls the map from the db (mock.coll_config)
        ex.update_coll_map()
        print("NEW", ex.coll_map_cur)
        print("MOCK", mock.coll_config_db_company_employee)
        # case 2: collection map is changed and
        # extractor.coll_map_cur needs to be updated
        cursor.close()

        res = (ex.coll_map_cur == mock.coll_config_db)
        del ex
        assert res
Пример #4
0
    def test_update_multiple(self):
        # no unset
        # TODO: test when there is value in unset
        cursor = pg.conn.cursor()

        # reset CM in the database
        cursor.execute(query["table_drop_purr_cm"])
        cursor.execute(query["table_drop_company"])

        create_and_populate_company_mdb()
        cm.create_table(pg, mock.coll_config)

        coll = mock.coll_names[0]
        rel = mock.rel_names[0]
        docs = []
        mock_updated = mock.data_mdb_company_updated
        for doc in mongo[coll].find():
            docs.append({
                "_id": doc["_id"],
                "active": mock_updated["active"],
                "signupCode": mock_updated["signupCode"],
                "domains": mock_updated["domains"]
            })
        coll_config = copy.deepcopy(mock.coll_config_company_employee)
        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)
        schema = mock.setup_pg["schema_name"]

        r = relation.Relation(pg, schema, rel, True)
        attrs = mock.attrs_company
        types = mock.types_company
        r.create(attrs, types)

        ex.update_multiple(docs, r, coll)

        cmd = "SELECT %s FROM %s order by id" % (", ".join(attrs[1:]), rel)
        cursor.execute(cmd)
        mocked = mock.data_pg_company_updated_no_id
        res = cursor.fetchall()
        print("MOCKED")
        print(mocked)
        print("RESULT")
        print(res)

        cursor.close()
        del r
        del ex
        assert mocked == res
Пример #5
0
    def test_table_untrack(self):
        # TODO:
        # - check if the table is left in the PG database
        # - try to insert new data to mongodb and check
        # if its left out from the data transfer
        # this one should remove employee from the collection map
        create_and_populate_company_mdb()
        create_and_populate_employee_mdb()

        cursor = pg.conn.cursor()
        cursor.execute(query["table_drop_company"])
        cursor.execute(query["table_drop_employee"])
        cursor.execute(query["table_drop_purr_cm"])

        # create table for CM in the database
        cm.create_table(pg, mock.coll_config)
        coll_config = copy.deepcopy(mock.coll_config_company_employee)

        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)

        # # changes extractor's collection definition for every collection
        # # and transfers

        coll_map_old = copy.deepcopy(mock.coll_config_db_company_employee)
        coll_map_new = copy.deepcopy(mock.coll_config_db)
        ex.table_untrack(coll_map_old, coll_map_new)

        mocked = copy.deepcopy(mock.coll_config)
        if (len(ex.coll_def) != len(mocked)):
            print("NEW", ex.coll_def)
            print("OlD", mocked)
            assert False

        for k, v in ex.coll_def.items():
            if v != mocked[k]:
                print(k)
                print(v[":columns"])
                print(mocked[k][":columns"])
                if v[":columns"] != mocked[k][":columns"]:
                    assert False

        cursor.close()
        del ex
        assert True
Пример #6
0
    def test_table_track(self):
        # TODO: see if the table is transfered to the PG database
        create_and_populate_company_mdb()
        create_and_populate_employee_mdb()

        cursor = pg.conn.cursor()
        cursor.execute(query["table_drop_company"])
        cursor.execute(query["table_drop_employee"])
        cursor.execute(query["table_drop_purr_cm"])

        coll_config = copy.deepcopy(mock.coll_config)
        # create table for CM in the database
        cm.create_table(pg, mock.coll_config)
        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)

        # # changes extractor's collection definition for every collection
        # # and transfers

        coll_map_old = mock.coll_config_db
        coll_map_new = mock.coll_config_db_company_employee
        ex.table_track(coll_map_old, coll_map_new)

        mocked = mock.coll_config_company_employee
        if (len(ex.coll_def) != len(mocked)):
            assert False

        for k, v in ex.coll_def.items():
            if v != mocked[k]:
                print(k)
                print(v[":columns"])
                print(mocked[k][":columns"])
                if v[":columns"] != mocked[k][":columns"]:
                    assert False

        cursor.execute(query["table_drop_purr_cm"])
        cursor.close()
        del ex

        assert True
Пример #7
0
    def test_prepare_attr_details_with_extra_props(self):
        coll_config = copy.deepcopy(mock.coll_config_company_employee)
        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)
        type_extra_props_pg = 'jsonb'

        ex.include_extra_props = True
        attrs = copy.deepcopy(mock.attrs_company)
        fields = copy.deepcopy(mock.fields_company)
        types = []
        for item in mock.types_company:
            types.append(item.lower())

        attribute_details = ex.prepare_attr_details(attrs, fields, types,
                                                    type_extra_props_pg)

        print("RESULT")
        print(attribute_details)
        print("MOCKED")
        print(mock.attr_details)
        del ex
        assert mock.attr_details == attribute_details
Пример #8
0
    def test_update_coll_map_unchanged(self):

        # collection map is not changed
        # extractor.coll_map_cur stays the same

        cursor = pg.conn.cursor()

        cursor.execute(query["table_drop_purr_cm"])

        # create table for CM in the database
        cm.create_table(pg, mock.coll_config)

        coll_config = copy.deepcopy(mock.coll_config)
        ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                                 coll_config)
        ex.update_coll_map()
        print("OLD", ex.coll_map_cur)
        print("NEW", mock.coll_config_db)
        cursor.close()

        res = (ex.coll_map_cur == mock.coll_config_db)
        del ex
        assert res
Пример #9
0
from etl.transform import relation
from etl.extract import extractor
import unittest
from tests.meta import mock
from etl.extract import collection_map as cm
import copy

pg = mock.pg
mongo = mock.mongo
query = mock.query
rel_name_company = mock.rel_name_company
coll_name_company = mock.coll_name_company
coll_name_employee = mock.coll_name_employee
coll_conf_new = mock.coll_config_new
ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                         mock.coll_config)
pg_cm_attrs = mock.pg_coll_map_attrs


def create_and_populate_company_pg():
    cursor = pg.conn.cursor()
    cursor.execute(query["table_drop_purr_cm"])
    # create table for CM in the database
    cm.create_table(pg, mock.coll_config)

    cursor.execute(query["table_drop_company"])
    cursor.execute(query["table_create_company"])

    cursor.execute("""insert into company(
            id, active, domains, signup_code
            ) values(
Пример #10
0
from etl.extract import collection
from etl.extract import tailer
from etl.extract import extractor
from etl.extract import transfer_info
import unittest
from tests.meta import mock
from bson import ObjectId
import datetime

mongo = mock.mongo
pg = mock.pg
query = mock.query
ex = extractor.Extractor(pg, mongo, mock.setup_pg, mock.settings,
                         mock.coll_config_company_employee)

settings_company = {
    ':columns': [{
        ':source': '_id',
        ':type': 'TEXT',
        'id': None
    }, {
        ':source': 'active',
        ':type': 'BOOLEAN',
        'active': None
    }, {
        ':source': 'domains',
        ':type': 'JSONB',
        'domains': None
    }, {
        ':source': 'signupCode',
        ':type': 'TEXT',
Пример #11
0
def start(settings, coll_map):
    """
    Starts Purr.
    Returns
    -------
    -

    Parameters
    ----------
    settings : dict
             : basic settings for both PG and MongoDB
             (connection strings, schema name)

    coll_map : dict
                : config file for collections

    TODO
    ----
    - create table with attributes and types
    """

    logger.info("Starting Purr v%s ..." % get_version(), CURR_FILE)

    logger.info("PID=%s" % os.getpid())

    mode_tailing = False
    if settings["tailing"] or settings["tailing_from"] or settings[
            "tailing_from"]:
        mode_tailing = True

    logger.info("TAILING=%s" % ("ON" if mode_tailing else "OFF"))

    setup_pg = settings["postgres"]
    setup_mdb = settings["mongo"]

    pg = postgres.PgConnection(setup_pg["connection"])
    mongo = mongodb.MongoConnection(setup_mdb)

    cm.create_table(pg, coll_map, setup_pg["schema_name"])
    transfer_info.save_logs_to_db(pg, setup_pg["schema_name"])

    ex = extractor.Extractor(pg, mongo.conn, setup_pg, settings, coll_map)

    start_date_time = datetime.utcnow()

    if mode_tailing:
        try:
            # first just transfer the data
            tailing_start = False
            thr_transfer = transfer.TransferThread(settings, coll_map, pg,
                                                   mongo, ex, tailing_start)
            thr_transfer.start()
            # wait until thread is finished
            thr_transfer.join()
            handle_coll_map_changes(settings, coll_map, pg, mongo, ex,
                                    start_date_time)
        except (KeyboardInterrupt, SystemExit):
            logger.error('Stopping transfer.', CURR_FILE)
            for t in THREADS:
                t.stop()
            raise SystemExit()

        except Exception as ex:
            logger.error("Unable to start transfer thread. Details: %s" % ex,
                         CURR_FILE)
            raise SystemExit()

    else:
        transfer.start(ex, coll_map)