Exemplo n.º 1
0
def test_upsert_new_cols(pandabase_loaded_db, constants, col_to_duplicate):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    df['bonus_col'] = df[col_to_duplicate].copy()

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert',
              add_new_columns=True)

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
    assert 'bonus_col' in df.columns
Exemplo n.º 2
0
def test_create_read_table_no_index(empty_db, minimal_df):
    """add a new minimal table & read it back with pandabase"""
    table = pb.to_sql(
        minimal_df,
        table_name='sample',
        con=empty_db,
        how='create_only',
        auto_index=True,
    )

    # print(table.columns)
    assert table.columns[PANDABASE_DEFAULT_INDEX].primary_key
    loaded = pb.read_sql('sample', con=empty_db)

    assert pb.has_table(empty_db, 'sample')
    assert pb.companda(loaded, minimal_df, ignore_index=True)
Exemplo n.º 3
0
def test_append_bad_pk_fails(pandabase_loaded_db, simple_df, constants,
                             unique_index_name):
    """Try to append rows with conflicting index columns"""
    table_name = constants.TABLE_NAME
    assert pb.has_table(pandabase_loaded_db, table_name)

    simple_df.index = simple_df['integer']
    if unique_index_name:
        simple_df[constants.SAMPLE_INDEX_NAME] = simple_df.integer
        simple_df = simple_df.drop('integer', axis=1)

    with pytest.raises(NameError):
        pb.to_sql(simple_df,
                  table_name=table_name,
                  con=pandabase_loaded_db,
                  how='append')
Exemplo n.º 4
0
def test_add_fails_invalid_timezone(pandabase_loaded_db, how, constants, tz):
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=range(5),
                      columns=['date'],
                      data=pd.date_range('2019-06-06',
                                         periods=5,
                                         freq='h',
                                         tz=tz))
    df.index.name = constants.SAMPLE_INDEX_NAME

    print(df.date)

    with pytest.raises(ValueError):
        pb.to_sql(df,
                  table_name=constants.TABLE_NAME,
                  con=pandabase_loaded_db,
                  how=how)
Exemplo n.º 5
0
def test_coerce_integer(pandabase_loaded_db, how, constants):
    """insert an integer into float column"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[1], columns=['integer'], data=[[77.0]])
    df.index.name = constants.SAMPLE_INDEX_NAME
    types = df.dtypes

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    for col in df.columns:
        assert types[col] == df.dtypes[col]

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert loaded.loc[1, 'integer'] == 77
Exemplo n.º 6
0
def test_add_new_rows(pandabase_loaded_db, simple_df, how, constants):
    """upsert or append new complete rows"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = simple_df.copy()
    df.index = df.index + 100

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how=how)

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    # print('loaded post-upsert by pandabase:')
    # print(loaded)

    assert loaded.isna().sum().sum() == 0
    assert companda(simple_df, loaded.loc[simple_df.index])
    assert companda(df, loaded.loc[df.index])
Exemplo n.º 7
0
def test_create_select_table_range_int_index(empty_db, simple_df, constants):
    """add a new table with explicit index, read it back with pandabase, check equality"""
    table = pb.to_sql(simple_df,
                      table_name='sample',
                      con=empty_db,
                      how='create_only')

    # print(table.columns)
    assert table.columns[constants.SAMPLE_INDEX_NAME].primary_key
    assert pb.has_table(empty_db, 'sample')

    loaded0 = pb.read_sql('sample', con=empty_db, lowest=1, highest=0)
    print(loaded0)
    assert len(loaded0) == 0

    loaded = pb.read_sql('sample', con=empty_db,
                         lowest=simple_df.index[0],
                         highest=simple_df.index[-1])
    assert pb.companda(loaded, simple_df, ignore_all_nan_columns=True)
Exemplo n.º 8
0
def test_append_auto_index(empty_db, minimal_df):
    """add a new minimal table; add it again"""
    pb.to_sql(minimal_df,
              table_name='sample',
              con=empty_db,
              auto_index=True,
              how='create_only')
    table2 = pb.to_sql(minimal_df,
                       table_name='sample',
                       con=empty_db,
                       auto_index=True,
                       how='append')

    assert table2.columns[PANDABASE_DEFAULT_INDEX].primary_key
    loaded = pb.read_sql('sample', con=empty_db)

    assert pb.has_table(empty_db, 'sample')
    double_df = pd.concat([minimal_df, minimal_df], ignore_index=True)
    assert pb.companda(loaded, double_df, ignore_index=True)
    assert len(loaded) == len(minimal_df) * 2
Exemplo n.º 9
0
def test_create_read_table_with_different_index(session_db, simple_df, table_name, index_col_name):
    """create new tables in empty db, using different col types as index, read with pandabase"""
    orig_df = simple_df.copy()
    orig_df.index = orig_df[index_col_name]
    print(orig_df[index_col_name])
    print(orig_df.index)
    orig_df = orig_df.drop(index_col_name, axis=1)

    table = pb.to_sql(orig_df,
                      table_name=table_name,
                      con=session_db,
                      how='create_only')

    assert table.columns[index_col_name].primary_key
    assert pb.has_table(session_db, table_name)

    loaded = pb.read_sql(table_name, con=session_db)
    c = pb.companda(loaded, orig_df, ignore_all_nan_columns=True)
    if not c:
        raise ValueError(c.message)
Exemplo n.º 10
0
def test_create_table_with_different_index_pandas(session_db, simple_df,
                                                  table_name, index_col_name):
    """create new tables in empty db, using different col types as index, read with Pandas"""
    df = simple_df.copy()
    df.index = df[index_col_name]
    df = df.drop(index_col_name, axis=1)

    table = pb.to_sql(df,
                      table_name=table_name,
                      con=session_db,
                      how='create_only')

    assert table.columns[index_col_name].primary_key
    assert pb.has_table(session_db, table_name)

    # read with PANDAS
    loaded = pd.read_sql_table(table_name,
                               con=session_db,
                               index_col=index_col_name)

    # make an integer index, since pd.read_sql_table doesn't know to do this
    new_index = loaded.index.name
    loaded[new_index] = loaded.index
    if isinstance(loaded[new_index].iloc[0], str):
        print('converting')
        loaded[new_index] = loaded[new_index].apply(lambda x: float(x))
    loaded.index = loaded[new_index]
    loaded = loaded.drop(new_index, axis=1)

    # pandas doesn't know about UTC
    if 'date' in loaded.columns:
        print('converting date to UTC')
        loaded.date = pd.to_datetime(loaded.date, utc=True)
    else:
        print('making new UTC index (Fake!)')
        loaded.index = df.index

    c = pb.companda(loaded, df, ignore_all_nan_columns=True)
    if not c:
        raise ValueError(c.message)
Exemplo n.º 11
0
def test_upsert_complete_rows(pandabase_loaded_db, constants):
    """upsert, changing individual values"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert df.date.dt.tz == UTC

    df.loc[778, 'float'] = 9.9
    df.loc[779, 'integer'] = 999
    df.loc[780, 'string'] = 'nah'
    df.loc[781, 'date'] = pd.to_datetime('1968-01-01', utc=True)

    # check that all values still exist
    assert df.loc[1, 'integer'] == 778
    assert df.date.dt.tz == UTC

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
Exemplo n.º 12
0
def test_upsert_valid_bool(pandabase_loaded_db, how, constants):
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)

    df = pd.DataFrame(index=[101, 102, 103],
                      columns=['boolean'],
                      data=[True, False, None])
    df.index.name = constants.SAMPLE_INDEX_NAME

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how=how)

    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    # Int64Dtype is a fine way to store nullable boolean values
    # Stored in database as boolean or NULL so the data can only be 0, 1, or None
    assert is_bool_dtype(df.boolean) or is_integer_dtype(df.boolean)
    assert df.loc[101, 'boolean']
    assert not df.loc[102, 'boolean']
    assert pd.np.isnan(df.loc[103, 'boolean'])
    with pytest.raises(KeyError):
        _ = df.loc[104, 'boolean']
Exemplo n.º 13
0
def test_upsert_incomplete_rows(pandabase_loaded_db, constants):
    """upsert new rows with only 1 of 5 values (and index)"""
    assert pb.has_table(pandabase_loaded_db, constants.TABLE_NAME)
    df = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)

    df.loc[11, 'float'] = 9.9
    df.loc[12, 'integer'] = 999
    df.loc[13, 'string'] = 'nah'
    df.loc[14, 'date'] = pd.to_datetime('1968-01-01', utc=True)

    # check that these values exist
    assert df.loc[1, 'integer'] == 778
    assert pd.isna(df.loc[11, 'integer'])
    assert df.loc[13, 'string'] == 'nah'

    pb.to_sql(df,
              table_name=constants.TABLE_NAME,
              con=pandabase_loaded_db,
              how='upsert')

    # check against pandabase read
    loaded = pb.read_sql(constants.TABLE_NAME, con=pandabase_loaded_db)
    assert companda(df, loaded)
Exemplo n.º 14
0
def test_create_table_no_index_load_pandas(empty_db, minimal_df):
    """add a new minimal table, read with Pandas"""
    table = pb.to_sql(
        minimal_df,
        table_name='sample',
        con=empty_db,
        how='create_only',
        auto_index=True,
    )

    # print(table.columns)
    assert table.columns[PANDABASE_DEFAULT_INDEX].primary_key
    assert pb.has_table(empty_db, 'sample')

    loaded = pd.read_sql_table('sample',
                               con=empty_db,
                               index_col=PANDABASE_DEFAULT_INDEX)
    # pandas doesn't know about default index
    loaded.index.name = None
    # pandas doesn't know stored as UTC w/o timezone info
    loaded.date = pd.to_datetime(loaded.date, utc=True)

    assert pb.companda(loaded, minimal_df, ignore_index=True)
Exemplo n.º 15
0
def test_drop_table(pandabase_loaded_db):
    names = pb.util.get_db_table_names(pandabase_loaded_db)
    for name in names:
        assert pb.has_table(pandabase_loaded_db, table_name=name)
        pb.util.drop_db_table(con=pandabase_loaded_db, table_name=name)
        assert not pb.has_table(pandabase_loaded_db, table_name=name)