Пример #1
0
def test_insert_by_pandas_multi(conn_str, load_data):
    """
    Test the data by inserting from Pandas to_sql API
    """
    logging.info("Test case insert data from Pandas DataFrame with to_sql")
    start_time = time()

    with DBConnector(conn_str) as db:
        load_data.to_sql("kaggle_data",
                         db.engine,
                         index=False,
                         method='multi',
                         chunksize=10000,
                         if_exists='append')
        logging.info("Complated writing the data to DB")
        end_time = time()

        total_write_time = end_time - start_time

        tola_min, toal_seconds = divmod(total_write_time, 60)

        logging.info("Took {0} minutes {1} seconds to write the data".format(
            tola_min, toal_seconds))

        db.engine.connect().execute("TRUNCATE TABLE kaggle_data")

        logging.info("Truncated the table")
Пример #2
0
def test_insert_multiple_update(sample_dataframe, conn_str):
    """
    Testing to insert same data frame twice
    The second attempt is an update. Since it is same record
    and columns have PK restriction this should fail.
    """
    with DBConnector(conn_str) as db_conn:
        db_conn.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))

        all_records = db_conn.db_session.query(UserData).all()
        logging.info("total recoreds in table {0}".format(len(all_records)))

        assert len(all_records) == 4

        logging.info("Inserting four records inserting the same records again")
        try:
            db_conn.db_session.bulk_update_mappings(
                UserData, sample_dataframe.to_dict(orient="records"))
        except IntegrityError as sqlinterr:
            logging.error(sqlinterr)
            db_conn.db_session.rollback()
            logging.info("Filed duplicate reod insert")

        logging.info("if you see this message this is done")
Пример #3
0
def test_insert_multiple(sample_dataframe, conn_str):
    """
    Testing to insert same data frame twice
    This should should fail
    """
    with DBConnector(conn_str) as db_conn:
        logging.info("Inserting first set of data")
        db_conn.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))

        all_records = db_conn.db_session.query(UserData).all()
        logging.info("total recoreds in table {0}".format(len(all_records)))

        assert len(all_records) == 4

        logging.info("Inserting four records inserting the same records again")
        try:
            db_conn.db_session.bulk_insert_mappings(
                UserData, sample_dataframe.to_dict(orient="records"))
            logging.info("Compleated bulk update mapping")
        except IntegrityError as sqlinterr:
            logging.error(sqlinterr)
            db_conn.db_session.rollback()
            logging.info(
                "Filed duplicate reod insert and rolled back the changes")

        db_conn.db_session.rollback()

        logging.info("if you see this message this is done")
Пример #4
0
def test_insert_rt_by_skiiping_exiting(conn_str, sample_dataframe,
                                       sample_dataframe_half):
    """
    Test to insert by skiiping the existing reods
    """

    with DBConnector(conn_str) as conn:
        logging.info("Insert base data")
        conn.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))
        logging.info("Inserted base data")
        conn.db_session.commit()

        try:
            logging.info("Trying to insert data with duplicates")
            conn.db_session.bulk_insert_mappings(
                UserData, sample_dataframe_half.to_dict(orient="records"))
        except IntegrityError as sqleer:
            #import pdb; pdb.set_trace()
            #logging.info(sqleer)
            conn.db_session.rollback()
            integ_key_error = sqleer.orig.diag.message_detail
            logging.info(sqleer)
            primary_key, pk_val = parse_integrity_err(integ_key_error)

            pk_object = getattr(UserData, primary_key)

            exiting_records = conn.db_session.query(pk_object).filter(
                pk_object.in_(
                    sample_dataframe_half[primary_key].to_list())).all()
            exiting_records = list(itertools.chain(*exiting_records))

            logging.info(exiting_records)

            duplicate_emiminated_data = sample_dataframe_half[
                ~sample_dataframe_half[primary_key].isin(exiting_records)]

            logging.info(
                "Found existing recods and created new data DataFrame")

            conn.db_session.bulk_insert_mappings(
                UserData, duplicate_emiminated_data.to_dict(orient="records"))

            retrive_all = conn.db_session.query(UserData).all()

            assert len(retrive_all) == 7
            assert retrive_all[0].user_name == "jagan"

            assert retrive_all[-1].user_name == "vidya"

            #conn.db_session.commit()

            logging.info('Inserted the clean data and test complete')

    truncate_stmt = UserData.__table__.delete()
    conn.db_session.execute(truncate_stmt)

    conn.db_session.commit()
    logging.info("Cleaned the DB")
Пример #5
0
def test_insert_read_by_pandas(conn_str, load_data):
    """
    Test the data by inserting from Pandas to_sql API
    """
    logging.info("Test case insert data from Pandas DataFrame with to_sql")
    start_time = time()

    with DBConnector(conn_str) as db:
        load_data.to_sql(
            "kaggle_data",
            db.engine,
            index=False,
            #method='multi',
            if_exists='append')
        logging.info("Complated writing the data to DB")
        end_time = time()

        total_write_time = end_time - start_time

        tola_min, toal_seconds = divmod(total_write_time, 60)

        logging.info("Took {0} minutes {1} seconds to write the data".format(
            tola_min, toal_seconds))

        logging.info("Start read testing with read_sql_table")
        read_start = time()
        try:
            data_back = pd.read_sql_table("kaggle_data", con=db.engine)
            logging.info("Complated the reading")
        except Exception as err:
            logging.error("error in read data {0}".format(err))
        read_end = time()

        total_read_min, read_sec = divmod(read_end - read_start, 60)

        logging.info("Took {0} minutes {1} seconds to read data".format(
            total_read_min, read_sec))

        logging.info("Start read testing with read_sql")
        read_start_two = time()
        try:
            data_back_two = pd.read_sql("""SELECT * FROM kaggle_data""",
                                        con=db.engine)
            logging.info("Complated the reading")
        except Exception as err:
            logging.error("error in read data {0}".format(err))
        read_end_two = time()

        total_read_min_two, read_sec_two = divmod(
            read_end_two - read_start_two, 60)

        logging.info("Took {0} minutes {1} seconds to read data".format(
            total_read_min_two, read_sec_two))

        logging.info("Truncating the table")
        db.engine.connect().execute("TRUNCATE TABLE kaggle_data")

        logging.info("Truncated the table")
Пример #6
0
def test_get_pandas_schema(conn_str, sample_dataframe):
    """
    Test the behviour of get_schemaa in Pandas
    It is not accurate as per schema in the table
    """
    from pandas.io.sql import get_schema

    with DBConnector(conn_str) as conn:
        logging.info(get_schema(sample_dataframe, "userdata", con=conn.engine))
Пример #7
0
def test_insert_single_dup(sample_dataframe_half, sample_dataframe, conn_str):
    """
    Testing to insert same data frame twice
    This should should fail
    """
    logging.info("Testing single duplicate case One")
    with DBConnector(conn_str) as db_conn:
        db_conn.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))
        logging.info("Inserted base data")
        try:
            db_conn.db_session.bulk_insert_mappings(
                UserData, sample_dataframe_half.to_dict(orient="records"))
        except IntegrityError as sqleint:
            logging.error(sqleint)
            db_conn.db_session.rollback()
            logging.info("Truncated the table")
    logging.info("Compleated the single duplicate test")
Пример #8
0
def test_insert_read_by_model(conn_str, load_data):
    """
    Test insert pandas data by SQLAlchecmy Model
    """

    with DBConnector(conn_str) as db:
        logging.info("Starting the SQLAlchemy insert test")
        start_time = time()

        db.db_session.bulk_insert_mappings(KaggleData,
                                           load_data.to_dict(orient="records"))

        end_time = time()

        total_write_time = end_time - start_time

        tola_min, toal_seconds = divmod(total_write_time, 60)

        logging.info("Complated data insert in {0} minutes {1} seconds".format(
            tola_min, toal_seconds))

        db.db_session.commit()

        logging.info("Start the read operation")

        try:
            start_time = time()
            data_back = pd.read_sql(db.db_session.query(KaggleData).statement,
                                    con=db.engine)
            logging.info(data_back.head(2))
        except Exception as err:
            logging.error("Error in reading the data")

        read_end = time()
        total_read_min, read_sec = divmod(read_end - start_time, 60)

        logging.info("Took {0} minutes {1} seconds to read data".format(
            total_read_min, read_sec))

        truncate_stmt = KaggleData.__table__.delete()
        db.db_session.execute(truncate_stmt)
        db.db_session.commit()
        logging.info("truncated the table")
Пример #9
0
def test_inset_single(sample_dataframe, conn_str):
    """
    Insert no dupicate screnario
    """
    logging.info("Starting the no duplicate insert test")
    logging.info(sample_dataframe.head(2))
    with DBConnector(conn_str) as db_conn:

        truncate_stmt = UserData.__table__.delete()
        db_conn.db_session.execute(truncate_stmt)

        db_conn.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))

        all_records = db_conn.db_session.query(UserData).all()
        logging.info("total recoreds in table {0}".format(len(all_records)))
        assert len(all_records) == 4

        db_conn.db_session.rollback()

    logging.info("Compleated no deuplicate insert test")
Пример #10
0
def test_update_with_pandas(sample_dataframe, sample_dataframe_update,
                            conn_str):
    """
    Test data uipdate with pandas 
    """
    logging.info("Testing single update case")
    with DBConnector(conn_str) as db_conn:
        db_conn.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))
        try:
            db_conn.db_session.bulk_update_mappings(
                UserData, sample_dataframe_update.to_dict(orient="records"))
            updated_record = db_conn.db_session.query(UserData).filter_by(
                user_name="jagan").first()
            assert updated_record.user_name == "jagan"
            assert updated_record.record_id == 13
        except IntegrityError as sqleint:
            logging.error(sqleint)
            logging.info("Truncated the table")
    db_conn.db_session.rollback()
    logging.info("Compleated the single duplicate test")
Пример #11
0
def test_alchemy_integrity_error_cpature(sample_dataframe, conn_str):
    """
    Capture SQL alchmy errro details de,o
    """

    logging.info("Writing the initial data")

    with DBConnector(conn_str) as db:
        db.db_session.bulk_insert_mappings(
            UserData, sample_dataframe.to_dict(orient="records"))

        logging.info("insrted base data now duplicate entry time")

        try:
            db.db_session.bulk_insert_mappings(
                UserData, sample_dataframe.to_dict(orient="records"))
            logging.info("Inserted duplicate :-()")
        except IntegrityError as sqlerr:
            logging.error(
                "Error in inseting data. Table {0} Constraint {1}".format(
                    sqlerr.orig.diag.table_name,
                    sqlerr.orig.diag.constraint_name))
            logging.info("Cause of the trouble is {0} ".format(
                sqlerr.orig.diag.message_detail))
Пример #12
0
def test_get_schema(load_data, conn_str):
    with DBConnector(conn_str) as conn:
        logging.info(get_schema(load_data, "kaggle_data", con=conn.engine))