Пример #1
0
def test_from_sql(make_sql_connection):
    filename = "test_from_sql.db"
    table = "test_from_sql"
    conn = make_sql_connection(filename, table)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, conn)
    modin_df = pd.read_sql(query, conn)

    assert modin_df_equals_pandas(modin_df, pandas_df)

    pandas_df = pandas.read_sql(query, conn, index_col="index")
    modin_df = pd.read_sql(query, conn, index_col="index")

    assert modin_df_equals_pandas(modin_df, pandas_df)

    with pytest.warns(UserWarning):
        pd.read_sql_query(query, conn)

    with pytest.warns(UserWarning):
        pd.read_sql_table(table, conn)

    # Test SQLAlchemy engine
    conn = sa.create_engine(conn)
    pandas_df = pandas.read_sql(query, conn)
    modin_df = pd.read_sql(query, conn)

    assert modin_df_equals_pandas(modin_df, pandas_df)

    # Test SQLAlchemy Connection
    conn = conn.connect()
    pandas_df = pandas.read_sql(query, conn)
    modin_df = pd.read_sql(query, conn)

    assert modin_df_equals_pandas(modin_df, pandas_df)
Пример #2
0
def test_from_sql():
    conn = sqlite3.connect(TEST_SQL_FILENAME)
    setup_sql_file(conn, True)

    pandas_df = pandas.read_sql("select * from test", conn)
    modin_df = pd.read_sql("select * from test", conn)

    assert modin_df_equals_pandas(modin_df, pandas_df)

    teardown_sql_file()
Пример #3
0
def test_from_sql_with_chunksize(make_sql_connection):
    filename = "test_from_sql.db"
    table = "test_from_sql"
    conn = make_sql_connection(filename, table)
    query = "select * from {0}".format(table)

    pandas_gen = pandas.read_sql(query, conn, chunksize=10)
    modin_gen = pd.read_sql(query, conn, chunksize=10)
    for modin_df, pandas_df in zip(modin_gen, pandas_gen):
        df_equals(modin_df, pandas_df)
Пример #4
0
def test_from_sql(make_sql_connection):
    filename = "test_from_sql.db"
    table = "test_from_sql"
    conn = make_sql_connection(filename, table)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, conn)
    modin_df = pd.read_sql(query, conn)

    assert modin_df_equals_pandas(modin_df, pandas_df)
Пример #5
0
def test_from_sql():
    filename = "test_from_sql.db"
    teardown_sql_file(filename)
    conn = sqlite3.connect(filename)
    table = "test_from_sql"
    setup_sql_file(conn, filename, table, True)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, conn)
    modin_df = pd.read_sql(query, conn)

    assert modin_df_equals_pandas(modin_df, pandas_df)

    teardown_sql_file(filename)
Пример #6
0
from contexttimer import Timer
from docopt import docopt
from dask.distributed import Client, LocalCluster

if __name__ == "__main__":
    args = docopt(__doc__, version="1.0")
    conn = os.environ["POSTGRES_URL"]
    table = os.environ["POSTGRES_TABLE"]

    partitions = int(args["<num>"])
    config.NPartitions.put(partitions)

    cluster = LocalCluster(n_workers=partitions,
                           scheduler_port=0,
                           memory_limit="230G")
    client = Client(cluster)

    with Timer() as timer:
        df = pd.read_sql(
            f"SELECT * FROM {table}",
            conn,
            parse_dates=[
                "l_shipdate",
                "l_commitdate",
                "l_receiptdate",
            ],
        )
    print(f"[Total] {timer.elapsed:.2f}s")

    print(df.head())