示例#1
0
def create_view(view_name, dest_conn=None):
    """
    Checks if view exists before getting a sample json and creating a view based on the table_name
        :param view_name: table to sample from...also name of destination view to be created
        :param dest_conn: Destination connection
        :param src_conn: Source connection
        :return: True if view created, false if not
    """
    view_count_sql = view_count_query(view_name)
    view_count = get_rows(view_count_sql, dest_conn)[0].count
    if view_count == 0:
        sample_rows = sample_rows_query(view_name)
        rows = get_rows(sample_rows, dest_conn)
        if len(rows) > 0:
            print(rows[0])
            sample_json = rows[0].fields_json['f1']
            fields = ""
            for k, v in sample_json.items():
                fields += "fields #>> '{f1,%s}' as %s, " % (k, k)

            fields = fields[:-2]
            print('Creating view %s' % view_name)
            view_sql = view_create_query(D_TABLE, fields, view_name)
            execute(view_sql, dest_conn)
            return True
    return False
示例#2
0
def test_get_rows_with_parameters(pgtestdb_test_tables, pgtestdb_conn,
                                  test_table_data):
    # parameters=None is tested by default in other tests

    # Bind by index
    sql = "SELECT * FROM src where ID = %s"
    result = get_rows(sql, pgtestdb_conn, parameters=(1, ))
    assert result == [test_table_data[0]]

    # Bind by name
    sql = "SELECT * FROM src where ID = %(identifier)s"
    result = get_rows(sql, pgtestdb_conn, parameters={'identifier': 1})
    assert result == [test_table_data[0]]
def test_copy_table_rows_on_error(test_tables, testdb_conn, test_table_data):
    # Arrange
    duplicate_id_row_sql = """
       INSERT INTO dest (id)
       VALUES (
         1
         )""".strip()
    execute(duplicate_id_row_sql, testdb_conn)

    # Act
    errors = []
    copy_table_rows('src',
                    testdb_conn,
                    testdb_conn,
                    target='dest',
                    on_error=errors.extend)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Check that first row was caught as error
    row, exception = errors[0]
    assert row.id == 1
    assert "unique" in str(exception).lower()

    # Check that other rows were inserted correctly
    assert result[1:] == test_table_data[1:]
示例#4
0
def test_insert_rows_on_error(pgtestdb_conn, pgtestdb_test_tables,
                              pgtestdb_insert_sql, test_table_data,
                              commit_chunks):
    # Parameterized to ensure success with and without commit_chunks
    # Arrange
    insert_sql = pgtestdb_insert_sql.replace('src', 'dest')
    # Create duplicated rows to data that will fail primary key check
    duplicated_rows = test_table_data * 2

    # Act
    errors = []
    executemany(insert_sql,
                pgtestdb_conn,
                duplicated_rows,
                on_error=errors.extend,
                commit_chunks=commit_chunks)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, pgtestdb_conn)
    assert result == test_table_data

    # Assert full set of failed rows failing unique constraint
    failed_rows, exceptions = zip(*errors)
    assert set(failed_rows) == set(test_table_data)
    assert all(['unique' in str(e).lower() for e in exceptions])
def test_copy_rows_with_dict_row_factory(pgtestdb_conn, pgtestdb_test_tables,
                                         pgtestdb_insert_sql, test_table_data):
    # Arrange
    select_sql = "SELECT * FROM src"
    insert_sql = """
        INSERT INTO dest (id, value, simple_text, utf8_text, day, date_time)
        VALUES (
            %(id)s, %(value)s, %(simple_text)s,
            %(utf8_text)s, %(day)s, %(date_time)s
        );
    """

    expected = [(1001, 1.234, 'TEXT', 'Öæ° z', date(2018, 12, 7),
                 datetime(2018, 12, 7, 13, 1, 59)),
                (1002, 2.234, 'TEXT', 'Öæ° z', date(2018, 12, 8),
                 datetime(2018, 12, 8, 13, 1, 59)),
                (1003, 2.234, 'TEXT', 'Öæ° z', date(2018, 12, 9),
                 datetime(2018, 12, 9, 13, 1, 59))]

    # Act
    copy_rows(select_sql,
              pgtestdb_conn,
              insert_sql,
              pgtestdb_conn,
              transform=transform_modify_dict,
              row_factory=dict_row_factory)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, pgtestdb_conn)
    assert result == expected
def test_get_rows_with_parameters(test_tables, testdb_conn, test_table_data):
    # parameters=None is tested by default in other tests

    # Bind by index
    sql = "SELECT * FROM src where ID = ?"
    result = get_rows(sql, testdb_conn, parameters=(1, ))
    assert len(result) == 1
    assert result[0].id == 1
def test_load_named_tuples(testdb_conn, test_tables, test_table_data):
    # Act
    load('dest', testdb_conn, test_table_data)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)
    assert result == test_table_data
示例#8
0
def get_rows_func(query, conn):
    if engine == ENGINES['mysql']:
        cursor = conn.cursor()
        cursor.execute(query)
        rows = cursor.fetchall()
        return rows
    rows = get_rows(query, conn)
    return rows
示例#9
0
def test_copy_table_rows_happy_path(test_tables, testdb_conn, test_table_data):
    # Arrange and act
    copy_table_rows('src', testdb_conn, testdb_conn, target='dest')

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    assert result == test_table_data
示例#10
0
def test_load_named_tuples_chunk_size(pgtestdb_conn, pgtestdb_test_tables,
                                      test_table_data, chunk_size):
    # Act
    load('dest', pgtestdb_conn, test_table_data, chunk_size=chunk_size)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, pgtestdb_conn)
    assert result == test_table_data
示例#11
0
def test_get_rows_with_transform(pgtestdb_test_tables, pgtestdb_conn):
    sql = "SELECT * FROM src"

    def my_transform(rows):
        # Simple transform function that changes size and number of rows
        return [row.id for row in rows if row.id > 1]

    result = get_rows(sql, pgtestdb_conn, transform=my_transform)
    assert result == [2, 3]
示例#12
0
def test_execute_happy_path(pgtestdb_test_tables, pgtestdb_conn):
    # Arrange
    sql = "DELETE FROM src;"

    # Act
    execute(sql, pgtestdb_conn)

    # Assert
    result = get_rows('SELECT * FROM src;', pgtestdb_conn)
    assert result == []
def test_copy_table_rows_happy_path_fast_true(test_tables, testdb_conn,
                                              testdb_conn2, test_table_data):
    # Note: ODBC driver requires separate connections for source and destination,
    # even if they are the same database.
    # Arrange and act
    copy_table_rows('src', testdb_conn, testdb_conn2, target='dest')

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)
    assert result == test_table_data
示例#14
0
def test_load_dicts(pgtestdb_conn, pgtestdb_test_tables, test_table_data):
    # Arrange
    data_as_dicts = [row._asdict() for row in test_table_data]

    # Act
    load('dest', pgtestdb_conn, data_as_dicts)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, pgtestdb_conn)
    assert result == test_table_data
示例#15
0
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data):
    # Arrange and act
    select_sql = "SELECT * FROM src"
    insert_sql = INSERT_SQL.format(tablename='dest')
    copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    assert result == test_table_data
示例#16
0
def copy_src_to_dest():
    delete_sql = table_delete_query(D_TABLE)  # USE THIS TO CLEAR DESTINATION FOR IDEMPOTENCE

    src_conn = get_source_connection()
    print('Connected to source')
    dest_conn = get_destination_connection()
    print('Connected to destination')

    s_tables = []
    if DCLEAR:
        execute(delete_sql, dest_conn)
        print('Cleared destination')

    if engine == ENGINES['mysql']:
        mysql_copy_src_to_dest(src_conn, s_tables, dest_conn)
    else:
        if S_TABLES == '__all__':
            tables_query = get_tables_query()
            rows = get_rows(tables_query, src_conn)
            for row in rows:
                s_tables.append(row.tablename)
        else:
            s_tables = S_TABLES.split(",")

        for S_DB_TABLE in s_tables:
            record_count = get_rows("select count(*) from {0}".format(S_DB_TABLE), src_conn)[0].count
            offset = 0
            limit = 50000
            print('Copying {0} records from {1}'.format(record_count, S_DB_TABLE))
            while record_count > 0:
                select_sql = table_select_query(S_DB_TABLE, src_conn, offset=offset, limit=limit)
                print('Table: {} | Records remaining: {} | Limit: {} | Offset: {} '.format(S_DB_TABLE, record_count,limit, offset))
                insert_sql = table_insert_query(D_TABLE)
                copy_rows(select_sql, src_conn, insert_sql, dest_conn)
                record_count -= limit
                offset += limit

            create_view(S_DB_TABLE, dest_conn)

    refresh_mat_views(dest_conn)
示例#17
0
def test_execute_with_params(pgtestdb_test_tables, pgtestdb_conn,
                             test_table_data):
    # Arrange
    sql = "DELETE FROM src WHERE id = %s;"
    params = [1]
    expected = test_table_data[1:]

    # Act
    execute(sql, pgtestdb_conn, parameters=params)

    # Assert
    result = get_rows('SELECT * FROM src;', pgtestdb_conn)
    assert result == expected
def test_copy_rows_happy_path_fast_true(test_tables, testdb_conn, testdb_conn2,
                                        test_table_data):
    # Note: ODBC driver requires separate connections for source and destination,
    # even if they are the same database.
    # Arrange and act
    select_sql = "SELECT * FROM src"
    insert_sql = INSERT_SQL.format(tablename='dest')
    copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn2)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)
    assert result == test_table_data
示例#19
0
def test_copy_table_rows_happy_path(test_tables, testdb_conn, test_table_data):
    # Arrange and act
    copy_table_rows('src', testdb_conn, testdb_conn, target='dest')

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Fix result date and datetime strings to native classes
    fixed_dates = []
    for row in result:
        fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME))

    assert fixed_dates == test_table_data
示例#20
0
def test_insert_rows_chunked(pgtestdb_conn, pgtestdb_test_tables,
                             pgtestdb_insert_sql, test_table_data, monkeypatch,
                             chunk_size):
    # Arrange
    monkeypatch.setattr('etlhelper.etl.CHUNKSIZE', chunk_size)
    insert_sql = pgtestdb_insert_sql.replace('src', 'dest')

    # Act
    executemany(insert_sql, pgtestdb_conn, test_table_data)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, pgtestdb_conn)
    assert result == test_table_data
示例#21
0
def refresh_mat_views(dest_conn):
    """
    Checks if refresh_matviews() function is defined in destination database before calling it
    :param dest_conn: Destination connection
    :return: True if materialized views refreshed, false if not.
    """
    count_query = "SELECT count(*)FROM information_schema.routines where routine_name = 'refresh_matviews';"
    counts = get_rows(count_query, dest_conn)[0].count
    if counts > 0:
        print('Refreshing materialized views')
        q = "select * from refresh_matviews();"
        execute(q, dest_conn)
        print('Done')
        return True
    return False
示例#22
0
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data):
    # Arrange and act
    select_sql = "SELECT * FROM src"
    insert_sql = INSERT_SQL.format(tablename='dest')
    copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Fix result date and datetime strings to native classes
    fixed_dates = []
    for row in result:
        fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME))

    assert fixed_dates == test_table_data
示例#23
0
def test_insert_rows_happy_path(pgtestdb_conn, pgtestdb_test_tables,
                                pgtestdb_insert_sql, test_table_data,
                                commit_chunks):
    # Parameterized to ensure success with and without commit_chunks
    # Arrange
    insert_sql = pgtestdb_insert_sql.replace('src', 'dest')

    # Act
    executemany(insert_sql,
                pgtestdb_conn,
                test_table_data,
                commit_chunks=commit_chunks)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, pgtestdb_conn)
    assert result == test_table_data
示例#24
0
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data):
    # Arrange and act
    select_sql = "SELECT * FROM src"
    insert_sql = INSERT_SQL.format(tablename='dest')
    copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Oracle returns date object as datetime, convert test data to compare
    for i, row in enumerate(test_table_data):
        row_with_datetimes = [datetime.combine(x, datetime.min.time())
                              if isinstance(x, date) and not isinstance(x, datetime)
                              else x
                              for x in row]
        assert result[i] == tuple(row_with_datetimes)
示例#25
0
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data):
    # Arrange and act
    select_sql = "SELECT * FROM src"
    insert_sql = INSERT_SQL.format(tablename='dest')
    copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Fix result date and datetime strings to native classes
    fixed = []
    for row in result:
        fixed.append(
            (*row[:4], dt.datetime.strptime(row.day, '%Y-%m-%d').date(),
             dt.datetime.strptime(row.date_time, '%Y-%m-%d %H:%M:%S')))

    assert fixed == test_table_data
def test_copy_rows_happy_path_deprecated_tables_fast_true(
        test_deprecated_tables, testdb_conn, testdb_conn2, test_table_data):
    # Note: ODBC driver requires separate connections for source and destination,
    # even if they are the same database.
    # Arrange and act
    select_sql = "SELECT * FROM src"
    insert_sql = INSERT_SQL.format(tablename='dest')
    with pytest.warns(UserWarning) as record:
        copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn2)

    # Assert
    assert len(record) == 1
    assert str(
        record[0].message).startswith("fast_executemany execution failed")

    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)
    assert result == test_table_data
示例#27
0
def test_load_named_tuples(testdb_conn, test_tables, test_table_data):
    # Arrange
    # Convert to plain tuples as ORACLE makes column names upper case
    expected = [tuple(row) for row in test_table_data]

    # Act
    load('dest', testdb_conn, test_table_data)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Fix result date and datetime strings to native classes
    fixed_dates = []
    for row in result:
        fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME))

    assert fixed_dates == expected
def test_get_rows_with_modify_dict(pgtestdb_conn, pgtestdb_test_tables):
    # Arrange
    select_sql = "SELECT * FROM src LIMIT 1"
    expected = [{
        'date_time': datetime(2018, 12, 7, 13, 1, 59),
        'day': date(2018, 12, 7),
        'id': 1001,
        'simple_text': 'TEXT',
        'utf8_text': 'Öæ° z',
        'value': 1.234
    }]

    # Act
    result = get_rows(select_sql,
                      pgtestdb_conn,
                      row_factory=dict_row_factory,
                      transform=transform_modify_dict)

    # Assert
    assert result == expected
示例#29
0
def test_copy_table_rows_on_error(test_tables, testdb_conn, test_table_data):
    # Arrange
    duplicate_id_row_sql = """
       INSERT INTO dest (id, day, date_time)
       VALUES (
         1,
         TO_DATE('2003/05/03 21:02:44', 'yyyy/mm/dd hh24:mi:ss'),
         TO_DATE('2003/05/03 21:02:44', 'yyyy/mm/dd hh24:mi:ss')
         )""".strip()
    execute(duplicate_id_row_sql, testdb_conn)

    # Act
    errors = []
    copy_table_rows('src',
                    testdb_conn,
                    testdb_conn,
                    target='dest',
                    on_error=errors.extend)

    # Assert
    sql = "SELECT * FROM dest"
    result = get_rows(sql, testdb_conn)

    # Fix result date and datetime strings to native classes
    fixed_dates = []
    for row in result:
        fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME))

    # Check that first row was caught as error, noting that Oracle
    # changes the case of column names
    row, exception = errors[0]
    assert row.ID == 1
    assert "unique" in str(exception).lower()

    # Check that other rows were inserted correctly
    assert fixed_dates[1:] == test_table_data[1:]
示例#30
0
def test_get_rows_happy_path(pgtestdb_test_tables, pgtestdb_conn,
                             test_table_data):
    sql = "SELECT * FROM src"
    result = get_rows(sql, pgtestdb_conn)
    assert result == test_table_data