示例#1
0
def test_HDF_parse_dates_auto():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader("tests/data/eoddata",
                        engine=fp.name,
                        mode="HDF",
                        tablename="eod")
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, "data/eod")
        assert df.dtypes["date"] == dtype("<M8[ns]")
示例#2
0
def test_SQL_parse_dates_auto():
    engine = create_engine("sqlite://")
    dl = DataLoader("tests/data/eoddata",
                    engine=engine,
                    mode="SQL",
                    tablename="eod")
    dl.load_data(columns=rename)
    df = pd.read_sql_table("eod", engine)
    assert df.dtypes["date"] == dtype("<M8[ns]")
示例#3
0
 def test_create_database(self):
     engine = create_engine('sqlite://')
     dl = DataLoader('tests/data/eoddata',
                     engine=engine,
                     mode='SQL',
                     tablename='eod')
     dl.load_data()
     self.assertEqual(len(pd.read_sql_table('eod', engine)), 10030)
     self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 5)
示例#4
0
 def test_create_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader("tests/data/eoddata",
                         engine=fp.name,
                         mode="HDF",
                         tablename="eod")
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, "data/eod")), 10030)
         self.assertEqual(len(pd.read_hdf(fp.name, "updated/eod")), 5)
示例#5
0
 def test_create_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('tests/data/eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 10030)
         self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 5)
示例#6
0
 def test_create_database(self):
     engine = create_engine("sqlite://")
     dl = DataLoader("tests/data/eoddata",
                     engine=engine,
                     mode="SQL",
                     tablename="eod")
     dl.load_data()
     self.assertEqual(len(pd.read_sql_table("eod", engine)), 10030)
     self.assertEqual(len(pd.read_sql_table("updated_eod", engine)), 5)
示例#7
0
def test_SQL_parse_dates_auto():
    engine = create_engine('sqlite://')
    dl = DataLoader('tests/data/eoddata',
                    engine=engine,
                    mode='SQL',
                    tablename='eod')
    dl.load_data(columns=rename)
    df = pd.read_sql_table('eod', engine)
    assert df.dtypes['date'] == dtype('<M8[ns]')
示例#8
0
def test_HDF_parse_dates_auto():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('tests/data/eoddata',
                        engine=fp.name,
                        mode='HDF',
                        tablename='eod')
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert df.dtypes['date'] == dtype('<M8[ns]')
示例#9
0
 def test_existing_database(self):
     engine = create_engine('sqlite://')
     dl = DataLoader('tests/data/eoddata',
                     engine=engine,
                     mode='SQL',
                     tablename='eod')
     shutil.copy2('tests/data/eoddata/INDEX_20180731.txt',
                  'tests/data/eoddata/INDEX_20000000.txt')
     dl.load_data()
     self.assertEqual(len(pd.read_sql_table('eod', engine)), 12053)
     self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 6)
示例#10
0
 def test_existing_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('tests/data/eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         shutil.copy2('tests/data/eoddata/INDEX_20180731.txt',
                      'tests/data/eoddata/INDEX_20000000.txt')
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 12053)
         self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 6)
示例#11
0
def test_SQL_rename_columns():
    engine = create_engine("sqlite://")
    dl = DataLoader("tests/data/eoddata",
                    engine=engine,
                    mode="SQL",
                    tablename="eod")
    dl.load_data(columns=rename)
    df = pd.read_sql_table("eod", engine)
    assert len(df) == 10030
    cols = ["symbol", "date", "open", "high", "low", "close", "vol"]
    for x, y in zip(df.columns, cols):
        assert x == y
示例#12
0
def test_SQL_rename_columns():
    engine = create_engine('sqlite://')
    dl = DataLoader('tests/data/eoddata',
                    engine=engine,
                    mode='SQL',
                    tablename='eod')
    dl.load_data(columns=rename)
    df = pd.read_sql_table('eod', engine)
    assert len(df) == 10030
    cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol']
    for x, y in zip(df.columns, cols):
        assert x == y
示例#13
0
def test_HDF_rename_columns():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader("tests/data/eoddata",
                        engine=fp.name,
                        mode="HDF",
                        tablename="eod")
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, "data/eod")
        assert len(df) == 10030
        assert len(pd.read_hdf(fp.name, "updated/eod")) == 5
        cols = ["symbol", "date", "open", "high", "low", "close", "vol"]
        for x, y in zip(df.columns, cols):
            assert x == y
示例#14
0
def test_HDF_rename_columns():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('tests/data/eoddata',
                        engine=fp.name,
                        mode='HDF',
                        tablename='eod')
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert len(df) == 10030
        assert len(pd.read_hdf(fp.name, 'updated/eod')) == 5
        cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol']
        for x, y in zip(df.columns, cols):
            assert x == y
示例#15
0
 def test_existing_database(self):
     engine = create_engine("sqlite://")
     dl = DataLoader("tests/data/eoddata",
                     engine=engine,
                     mode="SQL",
                     tablename="eod")
     shutil.copy2(
         "tests/data/eoddata/INDEX_20180731.txt",
         "tests/data/eoddata/INDEX_20000000.txt",
     )
     dl.load_data()
     self.assertEqual(len(pd.read_sql_table("eod", engine)), 12053)
     self.assertEqual(len(pd.read_sql_table("updated_eod", engine)), 6)
示例#16
0
 def test_existing_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader("tests/data/eoddata",
                         engine=fp.name,
                         mode="HDF",
                         tablename="eod")
         shutil.copy2(
             "tests/data/eoddata/INDEX_20180731.txt",
             "tests/data/eoddata/INDEX_20000000.txt",
         )
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, "data/eod")), 12053)
         self.assertEqual(len(pd.read_hdf(fp.name, "updated/eod")), 6)
示例#17
0
def test_collate_data():
    df = collate_data('tests/data/NASDAQ/data', parse_dates=['Date'])
    df = df.rename(lambda x: x.lower(), axis='columns')
    df = df.sort_values(by=['date', 'symbol'])
    engine = create_engine('sqlite://')
    dl = DataLoader(directory='tests/data/NASDAQ/data',
                    mode='SQL',
                    engine=engine,
                    tablename='eod')
    dl.load_data()
    df2 = pd.read_sql_table('eod', engine).sort_values(by=['date', 'symbol'])
    assert len(df) == len(df2)
    for i in range(100):
        assert compare(df, df2)
示例#18
0
def test_collate_data():
    df = collate_data("tests/data/NASDAQ/data", parse_dates=["Date"])
    df = df.rename(lambda x: x.lower(), axis="columns")
    df = df.sort_values(by=["date", "symbol"])
    engine = create_engine("sqlite://")
    dl = DataLoader(directory="tests/data/NASDAQ/data",
                    mode="SQL",
                    engine=engine,
                    tablename="eod")
    dl.load_data()
    df2 = pd.read_sql_table("eod", engine).sort_values(by=["date", "symbol"])
    assert len(df) == len(df2)
    for i in range(100):
        assert compare(df, df2)
示例#19
0
    def test_wrong_mode(self):
        with tempfile.NamedTemporaryFile() as fp:
            dl = DataLoader('tests/data/eoddata',
                            engine=fp.name,
                            mode='SQL',
                            tablename='eod')
            with self.assertRaises(Exception):
                dl.load_data()

        with self.assertRaises(TypeError):
            DataLoader('tests/data/eoddata',
                       engine='some_random_mode',
                       mode='CSV',
                       tablename='eod')
示例#20
0
    def test_wrong_mode(self):
        with tempfile.NamedTemporaryFile() as fp:
            dl = DataLoader("tests/data/eoddata",
                            engine=fp.name,
                            mode="SQL",
                            tablename="eod")
            with self.assertRaises(Exception):
                dl.load_data()

        with self.assertRaises(TypeError):
            DataLoader(
                "tests/data/eoddata",
                engine="some_random_mode",
                mode="CSV",
                tablename="eod",
            )
示例#21
0
def test_HDF_post_func():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('tests/data/eoddata',
                        engine=fp.name,
                        mode='HDF',
                        tablename='eod')

        def add_filename(x, y, z):
            x['filename'] = y
            x['avgprice'] = (x['open'] + x['close']) / 2
            return x

        dl.load_data(columns=rename, postfunc=add_filename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert df.dtypes['date'] == dtype('<M8[ns]')
        assert df.shape[1] == 9
        assert 'filename' in df.columns
        assert 'avgprice' in df.columns
示例#22
0
def test_SQL_post_func():
    engine = create_engine('sqlite://')
    dl = DataLoader('tests/data/eoddata',
                    engine=engine,
                    mode='SQL',
                    tablename='eod')

    def add_filename(x, y, z):
        x['filename'] = y
        x['avgprice'] = (x['open'] + x['close']) / 2
        return x

    dl.load_data(columns=rename, postfunc=add_filename)
    df = pd.read_sql_table('eod', engine)
    assert df.dtypes['date'] == dtype('<M8[ns]')
    assert df.shape[1] == 9
    assert 'filename' in df.columns
    assert 'avgprice' in df.columns
示例#23
0
def test_SQL_post_func():
    engine = create_engine("sqlite://")
    dl = DataLoader("tests/data/eoddata",
                    engine=engine,
                    mode="SQL",
                    tablename="eod")

    def add_filename(x, y, z):
        x["filename"] = y
        x["avgprice"] = (x["open"] + x["close"]) / 2
        return x

    dl.load_data(columns=rename, postfunc=add_filename)
    df = pd.read_sql_table("eod", engine)
    assert df.dtypes["date"] == dtype("<M8[ns]")
    assert df.shape[1] == 9
    assert "filename" in df.columns
    assert "avgprice" in df.columns
示例#24
0
def test_HDF_post_func():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader("tests/data/eoddata",
                        engine=fp.name,
                        mode="HDF",
                        tablename="eod")

        def add_filename(x, y, z):
            x["filename"] = y
            x["avgprice"] = (x["open"] + x["close"]) / 2
            return x

        dl.load_data(columns=rename, postfunc=add_filename)
        df = pd.read_hdf(fp.name, "data/eod")
        assert df.dtypes["date"] == dtype("<M8[ns]")
        assert df.shape[1] == 9
        assert "filename" in df.columns
        assert "avgprice" in df.columns
示例#25
0
def test_apply_split_HDF_dataloader():
    with tempfile.NamedTemporaryFile() as fp:
        engine = fp.name
        dl = DataLoader(
            directory="tests/data/NASDAQ/data",
            mode="HDF",
            engine=engine,
            tablename="eod",
        )
        dl.load_data()
        dl.apply_splits(directory="tests/data/NASDAQ/adjustments/")
        df = pd.read_hdf(engine, "data/eod")
        result = pd.read_csv("tests/data/NASDAQ/nasdaq_results.csv",
                             parse_dates=["date"])
        splits = pd.read_csv("tests/data/NASDAQ/adjustments/splits.csv",
                             parse_dates=["date"])
        for i, row in splits.iterrows():
            sym = row.at["symbol"]
            cond = 'symbol == "{}"'.format(sym)
            frame1 = df.query(cond).sort_values(by="date").reset_index(
                drop=True)
            frame2 = result.query(cond).sort_values(by="date").reset_index(
                drop=True)
            L = len(frame1)
            cols = frame1.columns
            for i in range(L):
                for j in cols:
                    if j in ["open", "high", "low", "close", "volume"]:
                        a = frame1.loc[i, j]
                        b = frame2.loc[i, j]
                        print(a, b, sym)
                        assert isclose(a, b, abs_tol=0.015)
                    else:
                        assert frame1.loc[i, j] == frame2.loc[i, j]
示例#26
0
def test_apply_split_SQL_dataloader():
    engine = create_engine("sqlite://")
    dl = DataLoader(directory="tests/data/NASDAQ/data",
                    mode="SQL",
                    engine=engine,
                    tablename="eod")
    dl.load_data()
    dl.apply_splits(directory="tests/data/NASDAQ/adjustments/")
    df = pd.read_sql_table("eod", engine)
    result = pd.read_csv("NASDAQ/nasdaq_results.csv", parse_dates=["date"])
    splits = pd.read_csv("tests/data/NASDAQ/adjustments/splits.csv",
                         parse_dates=["date"])
    for i, row in splits.iterrows():
        sym = row.at["symbol"]
        cond = 'symbol == "{}"'.format(sym)
        frame1 = df.query(cond).sort_values(by="date").reset_index(drop=True)
        frame2 = result.query(cond).sort_values(by="date").reset_index(
            drop=True)
        L = len(frame1)
        cols = frame1.columns
        for i in range(L):
            for j in cols:
                if j in ["open", "high", "low", "close", "volume"]:
                    a = frame1.loc[i, j]
                    b = frame2.loc[i, j]
                    assert isclose(a, b, abs_tol=0.015)
                else:
                    assert frame1.loc[i, j] == frame2.loc[i, j]
示例#27
0
def test_apply_split_HDF_dataloader():
    with tempfile.NamedTemporaryFile() as fp:
        engine = fp.name
        dl = DataLoader(directory='tests/data/NASDAQ/data',
                        mode='HDF',
                        engine=engine,
                        tablename='eod')
        dl.load_data()
        dl.apply_splits(directory='tests/data/NASDAQ/adjustments/')
        df = pd.read_hdf(engine, 'data/eod')
        result = pd.read_csv('tests/data/NASDAQ/nasdaq_results.csv',
                             parse_dates=['date'])
        splits = pd.read_csv('tests/data/NASDAQ/adjustments/splits.csv',
                             parse_dates=['date'])
        for i, row in splits.iterrows():
            sym = row.at['symbol']
            cond = 'symbol == "{}"'.format(sym)
            frame1 = df.query(cond).sort_values(by='date').reset_index(
                drop=True)
            frame2 = result.query(cond).sort_values(by='date').reset_index(
                drop=True)
            L = len(frame1)
            cols = frame1.columns
            for i in range(L):
                for j in cols:
                    if j in ['open', 'high', 'low', 'close', 'volume']:
                        a = frame1.loc[i, j]
                        b = frame2.loc[i, j]
                        print(a, b, sym)
                        assert isclose(a, b, abs_tol=0.015)
                    else:
                        assert frame1.loc[i, j] == frame2.loc[i, j]
示例#28
0
 def test_run_loader_multiple_times(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader("tests/data/eoddata",
                         engine=fp.name,
                         mode="HDF",
                         tablename="eod")
         for i in range(5):
             dl.load_data()
         engine = create_engine("sqlite://")
         dl = DataLoader("tests/data/eoddata",
                         engine=engine,
                         mode="SQL",
                         tablename="eod")
         for i in range(5):
             dl.load_data()
         shape_hdf = len(pd.read_hdf(fp.name, "data/eod"))
         shape_sql = len(pd.read_sql_table("eod", engine))
         self.assertEqual(shape_hdf, shape_sql)
         self.assertEqual(shape_hdf, 12053)
示例#29
0
 def test_run_loader_multiple_times(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('tests/data/eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         for i in range(5):
             dl.load_data()
         engine = create_engine('sqlite://')
         dl = DataLoader('tests/data/eoddata',
                         engine=engine,
                         mode='SQL',
                         tablename='eod')
         for i in range(5):
             dl.load_data()
         shape_hdf = len(pd.read_hdf(fp.name, 'data/eod'))
         shape_sql = len(pd.read_sql_table('eod', engine))
         self.assertEqual(shape_hdf, shape_sql)
         self.assertEqual(shape_hdf, 12053)