def test_from_sql(make_sql_connection): filename = "test_from_sql.db" table = "test_from_sql" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df = pd.read_sql(query, conn) assert modin_df_equals_pandas(modin_df, pandas_df) pandas_df = pandas.read_sql(query, conn, index_col="index") modin_df = pd.read_sql(query, conn, index_col="index") assert modin_df_equals_pandas(modin_df, pandas_df) with pytest.warns(UserWarning): pd.read_sql_query(query, conn) with pytest.warns(UserWarning): pd.read_sql_table(table, conn) # Test SQLAlchemy engine conn = sa.create_engine(conn) pandas_df = pandas.read_sql(query, conn) modin_df = pd.read_sql(query, conn) assert modin_df_equals_pandas(modin_df, pandas_df) # Test SQLAlchemy Connection conn = conn.connect() pandas_df = pandas.read_sql(query, conn) modin_df = pd.read_sql(query, conn) assert modin_df_equals_pandas(modin_df, pandas_df)
def test_from_sql(): conn = sqlite3.connect(TEST_SQL_FILENAME) setup_sql_file(conn, True) pandas_df = pandas.read_sql("select * from test", conn) modin_df = pd.read_sql("select * from test", conn) assert modin_df_equals_pandas(modin_df, pandas_df) teardown_sql_file()
def test_from_sql_with_chunksize(make_sql_connection): filename = "test_from_sql.db" table = "test_from_sql" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_gen = pandas.read_sql(query, conn, chunksize=10) modin_gen = pd.read_sql(query, conn, chunksize=10) for modin_df, pandas_df in zip(modin_gen, pandas_gen): df_equals(modin_df, pandas_df)
def test_from_sql(make_sql_connection): filename = "test_from_sql.db" table = "test_from_sql" conn = make_sql_connection(filename, table) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df = pd.read_sql(query, conn) assert modin_df_equals_pandas(modin_df, pandas_df)
def test_from_sql(): filename = "test_from_sql.db" teardown_sql_file(filename) conn = sqlite3.connect(filename) table = "test_from_sql" setup_sql_file(conn, filename, table, True) query = "select * from {0}".format(table) pandas_df = pandas.read_sql(query, conn) modin_df = pd.read_sql(query, conn) assert modin_df_equals_pandas(modin_df, pandas_df) teardown_sql_file(filename)
from contexttimer import Timer from docopt import docopt from dask.distributed import Client, LocalCluster if __name__ == "__main__": args = docopt(__doc__, version="1.0") conn = os.environ["POSTGRES_URL"] table = os.environ["POSTGRES_TABLE"] partitions = int(args["<num>"]) config.NPartitions.put(partitions) cluster = LocalCluster(n_workers=partitions, scheduler_port=0, memory_limit="230G") client = Client(cluster) with Timer() as timer: df = pd.read_sql( f"SELECT * FROM {table}", conn, parse_dates=[ "l_shipdate", "l_commitdate", "l_receiptdate", ], ) print(f"[Total] {timer.elapsed:.2f}s") print(df.head())