def test_HDF_parse_dates_auto(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") dl.load_data(columns=rename) df = pd.read_hdf(fp.name, "data/eod") assert df.dtypes["date"] == dtype("<M8[ns]")
def test_SQL_parse_dates_auto(): engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") dl.load_data(columns=rename) df = pd.read_sql_table("eod", engine) assert df.dtypes["date"] == dtype("<M8[ns]")
def test_create_database(self): engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') dl.load_data() self.assertEqual(len(pd.read_sql_table('eod', engine)), 10030) self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 5)
def test_create_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, "data/eod")), 10030) self.assertEqual(len(pd.read_hdf(fp.name, "updated/eod")), 5)
def test_create_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 10030) self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 5)
def test_create_database(self): engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") dl.load_data() self.assertEqual(len(pd.read_sql_table("eod", engine)), 10030) self.assertEqual(len(pd.read_sql_table("updated_eod", engine)), 5)
def test_SQL_parse_dates_auto(): engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') dl.load_data(columns=rename) df = pd.read_sql_table('eod', engine) assert df.dtypes['date'] == dtype('<M8[ns]')
def test_HDF_parse_dates_auto(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data(columns=rename) df = pd.read_hdf(fp.name, 'data/eod') assert df.dtypes['date'] == dtype('<M8[ns]')
def test_existing_database(self): engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') shutil.copy2('tests/data/eoddata/INDEX_20180731.txt', 'tests/data/eoddata/INDEX_20000000.txt') dl.load_data() self.assertEqual(len(pd.read_sql_table('eod', engine)), 12053) self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 6)
def test_existing_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') shutil.copy2('tests/data/eoddata/INDEX_20180731.txt', 'tests/data/eoddata/INDEX_20000000.txt') dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 12053) self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 6)
def test_SQL_rename_columns(): engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") dl.load_data(columns=rename) df = pd.read_sql_table("eod", engine) assert len(df) == 10030 cols = ["symbol", "date", "open", "high", "low", "close", "vol"] for x, y in zip(df.columns, cols): assert x == y
def test_SQL_rename_columns(): engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') dl.load_data(columns=rename) df = pd.read_sql_table('eod', engine) assert len(df) == 10030 cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol'] for x, y in zip(df.columns, cols): assert x == y
def test_HDF_rename_columns(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") dl.load_data(columns=rename) df = pd.read_hdf(fp.name, "data/eod") assert len(df) == 10030 assert len(pd.read_hdf(fp.name, "updated/eod")) == 5 cols = ["symbol", "date", "open", "high", "low", "close", "vol"] for x, y in zip(df.columns, cols): assert x == y
def test_HDF_rename_columns(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data(columns=rename) df = pd.read_hdf(fp.name, 'data/eod') assert len(df) == 10030 assert len(pd.read_hdf(fp.name, 'updated/eod')) == 5 cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol'] for x, y in zip(df.columns, cols): assert x == y
def test_existing_database(self): engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") shutil.copy2( "tests/data/eoddata/INDEX_20180731.txt", "tests/data/eoddata/INDEX_20000000.txt", ) dl.load_data() self.assertEqual(len(pd.read_sql_table("eod", engine)), 12053) self.assertEqual(len(pd.read_sql_table("updated_eod", engine)), 6)
def test_existing_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") shutil.copy2( "tests/data/eoddata/INDEX_20180731.txt", "tests/data/eoddata/INDEX_20000000.txt", ) dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, "data/eod")), 12053) self.assertEqual(len(pd.read_hdf(fp.name, "updated/eod")), 6)
def test_collate_data(): df = collate_data('tests/data/NASDAQ/data', parse_dates=['Date']) df = df.rename(lambda x: x.lower(), axis='columns') df = df.sort_values(by=['date', 'symbol']) engine = create_engine('sqlite://') dl = DataLoader(directory='tests/data/NASDAQ/data', mode='SQL', engine=engine, tablename='eod') dl.load_data() df2 = pd.read_sql_table('eod', engine).sort_values(by=['date', 'symbol']) assert len(df) == len(df2) for i in range(100): assert compare(df, df2)
def test_collate_data(): df = collate_data("tests/data/NASDAQ/data", parse_dates=["Date"]) df = df.rename(lambda x: x.lower(), axis="columns") df = df.sort_values(by=["date", "symbol"]) engine = create_engine("sqlite://") dl = DataLoader(directory="tests/data/NASDAQ/data", mode="SQL", engine=engine, tablename="eod") dl.load_data() df2 = pd.read_sql_table("eod", engine).sort_values(by=["date", "symbol"]) assert len(df) == len(df2) for i in range(100): assert compare(df, df2)
def test_wrong_mode(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='SQL', tablename='eod') with self.assertRaises(Exception): dl.load_data() with self.assertRaises(TypeError): DataLoader('tests/data/eoddata', engine='some_random_mode', mode='CSV', tablename='eod')
def test_wrong_mode(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="SQL", tablename="eod") with self.assertRaises(Exception): dl.load_data() with self.assertRaises(TypeError): DataLoader( "tests/data/eoddata", engine="some_random_mode", mode="CSV", tablename="eod", )
def test_HDF_post_func(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') def add_filename(x, y, z): x['filename'] = y x['avgprice'] = (x['open'] + x['close']) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_hdf(fp.name, 'data/eod') assert df.dtypes['date'] == dtype('<M8[ns]') assert df.shape[1] == 9 assert 'filename' in df.columns assert 'avgprice' in df.columns
def test_SQL_post_func(): engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') def add_filename(x, y, z): x['filename'] = y x['avgprice'] = (x['open'] + x['close']) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_sql_table('eod', engine) assert df.dtypes['date'] == dtype('<M8[ns]') assert df.shape[1] == 9 assert 'filename' in df.columns assert 'avgprice' in df.columns
def test_SQL_post_func(): engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") def add_filename(x, y, z): x["filename"] = y x["avgprice"] = (x["open"] + x["close"]) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_sql_table("eod", engine) assert df.dtypes["date"] == dtype("<M8[ns]") assert df.shape[1] == 9 assert "filename" in df.columns assert "avgprice" in df.columns
def test_HDF_post_func(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") def add_filename(x, y, z): x["filename"] = y x["avgprice"] = (x["open"] + x["close"]) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_hdf(fp.name, "data/eod") assert df.dtypes["date"] == dtype("<M8[ns]") assert df.shape[1] == 9 assert "filename" in df.columns assert "avgprice" in df.columns
def test_apply_split_HDF_dataloader(): with tempfile.NamedTemporaryFile() as fp: engine = fp.name dl = DataLoader( directory="tests/data/NASDAQ/data", mode="HDF", engine=engine, tablename="eod", ) dl.load_data() dl.apply_splits(directory="tests/data/NASDAQ/adjustments/") df = pd.read_hdf(engine, "data/eod") result = pd.read_csv("tests/data/NASDAQ/nasdaq_results.csv", parse_dates=["date"]) splits = pd.read_csv("tests/data/NASDAQ/adjustments/splits.csv", parse_dates=["date"]) for i, row in splits.iterrows(): sym = row.at["symbol"] cond = 'symbol == "{}"'.format(sym) frame1 = df.query(cond).sort_values(by="date").reset_index( drop=True) frame2 = result.query(cond).sort_values(by="date").reset_index( drop=True) L = len(frame1) cols = frame1.columns for i in range(L): for j in cols: if j in ["open", "high", "low", "close", "volume"]: a = frame1.loc[i, j] b = frame2.loc[i, j] print(a, b, sym) assert isclose(a, b, abs_tol=0.015) else: assert frame1.loc[i, j] == frame2.loc[i, j]
def test_apply_split_SQL_dataloader(): engine = create_engine("sqlite://") dl = DataLoader(directory="tests/data/NASDAQ/data", mode="SQL", engine=engine, tablename="eod") dl.load_data() dl.apply_splits(directory="tests/data/NASDAQ/adjustments/") df = pd.read_sql_table("eod", engine) result = pd.read_csv("NASDAQ/nasdaq_results.csv", parse_dates=["date"]) splits = pd.read_csv("tests/data/NASDAQ/adjustments/splits.csv", parse_dates=["date"]) for i, row in splits.iterrows(): sym = row.at["symbol"] cond = 'symbol == "{}"'.format(sym) frame1 = df.query(cond).sort_values(by="date").reset_index(drop=True) frame2 = result.query(cond).sort_values(by="date").reset_index( drop=True) L = len(frame1) cols = frame1.columns for i in range(L): for j in cols: if j in ["open", "high", "low", "close", "volume"]: a = frame1.loc[i, j] b = frame2.loc[i, j] assert isclose(a, b, abs_tol=0.015) else: assert frame1.loc[i, j] == frame2.loc[i, j]
def test_apply_split_HDF_dataloader(): with tempfile.NamedTemporaryFile() as fp: engine = fp.name dl = DataLoader(directory='tests/data/NASDAQ/data', mode='HDF', engine=engine, tablename='eod') dl.load_data() dl.apply_splits(directory='tests/data/NASDAQ/adjustments/') df = pd.read_hdf(engine, 'data/eod') result = pd.read_csv('tests/data/NASDAQ/nasdaq_results.csv', parse_dates=['date']) splits = pd.read_csv('tests/data/NASDAQ/adjustments/splits.csv', parse_dates=['date']) for i, row in splits.iterrows(): sym = row.at['symbol'] cond = 'symbol == "{}"'.format(sym) frame1 = df.query(cond).sort_values(by='date').reset_index( drop=True) frame2 = result.query(cond).sort_values(by='date').reset_index( drop=True) L = len(frame1) cols = frame1.columns for i in range(L): for j in cols: if j in ['open', 'high', 'low', 'close', 'volume']: a = frame1.loc[i, j] b = frame2.loc[i, j] print(a, b, sym) assert isclose(a, b, abs_tol=0.015) else: assert frame1.loc[i, j] == frame2.loc[i, j]
def test_run_loader_multiple_times(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader("tests/data/eoddata", engine=fp.name, mode="HDF", tablename="eod") for i in range(5): dl.load_data() engine = create_engine("sqlite://") dl = DataLoader("tests/data/eoddata", engine=engine, mode="SQL", tablename="eod") for i in range(5): dl.load_data() shape_hdf = len(pd.read_hdf(fp.name, "data/eod")) shape_sql = len(pd.read_sql_table("eod", engine)) self.assertEqual(shape_hdf, shape_sql) self.assertEqual(shape_hdf, 12053)
def test_run_loader_multiple_times(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('tests/data/eoddata', engine=fp.name, mode='HDF', tablename='eod') for i in range(5): dl.load_data() engine = create_engine('sqlite://') dl = DataLoader('tests/data/eoddata', engine=engine, mode='SQL', tablename='eod') for i in range(5): dl.load_data() shape_hdf = len(pd.read_hdf(fp.name, 'data/eod')) shape_sql = len(pd.read_sql_table('eod', engine)) self.assertEqual(shape_hdf, shape_sql) self.assertEqual(shape_hdf, 12053)