示例#1
0
 def test_existing_hdf_file(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader("tests/data/eoddata",
                         engine=fp.name,
                         mode="HDF",
                         tablename="eod")
         shutil.copy2(
             "tests/data/eoddata/INDEX_20180731.txt",
             "tests/data/eoddata/INDEX_20000000.txt",
         )
         dl.load_data()
         self.assertEqual(len(pd.read_hdf(fp.name, "data/eod")), 12053)
         self.assertEqual(len(pd.read_hdf(fp.name, "updated/eod")), 6)
示例#2
0
def test_HDF_rename_columns():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('tests/data/eoddata',
                        engine=fp.name,
                        mode='HDF',
                        tablename='eod')
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert len(df) == 10030
        assert len(pd.read_hdf(fp.name, 'updated/eod')) == 5
        cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol']
        for x, y in zip(df.columns, cols):
            assert x == y
示例#3
0
def test_HDF_rename_columns():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader("tests/data/eoddata",
                        engine=fp.name,
                        mode="HDF",
                        tablename="eod")
        dl.load_data(columns=rename)
        df = pd.read_hdf(fp.name, "data/eod")
        assert len(df) == 10030
        assert len(pd.read_hdf(fp.name, "updated/eod")) == 5
        cols = ["symbol", "date", "open", "high", "low", "close", "vol"]
        for x, y in zip(df.columns, cols):
            assert x == y
示例#4
0
def test_collate_data():
    df = collate_data("tests/data/NASDAQ/data", parse_dates=["Date"])
    df = df.rename(lambda x: x.lower(), axis="columns")
    df = df.sort_values(by=["date", "symbol"])
    engine = create_engine("sqlite://")
    dl = DataLoader(directory="tests/data/NASDAQ/data",
                    mode="SQL",
                    engine=engine,
                    tablename="eod")
    dl.load_data()
    df2 = pd.read_sql_table("eod", engine).sort_values(by=["date", "symbol"])
    assert len(df) == len(df2)
    for i in range(100):
        assert compare(df, df2)
示例#5
0
    def test_wrong_mode(self):
        with tempfile.NamedTemporaryFile() as fp:
            dl = DataLoader('tests/data/eoddata',
                            engine=fp.name,
                            mode='SQL',
                            tablename='eod')
            with self.assertRaises(Exception):
                dl.load_data()

        with self.assertRaises(TypeError):
            DataLoader('tests/data/eoddata',
                       engine='some_random_mode',
                       mode='CSV',
                       tablename='eod')
示例#6
0
def test_collate_data():
    df = collate_data('tests/data/NASDAQ/data', parse_dates=['Date'])
    df = df.rename(lambda x: x.lower(), axis='columns')
    df = df.sort_values(by=['date', 'symbol'])
    engine = create_engine('sqlite://')
    dl = DataLoader(directory='tests/data/NASDAQ/data',
                    mode='SQL',
                    engine=engine,
                    tablename='eod')
    dl.load_data()
    df2 = pd.read_sql_table('eod', engine).sort_values(by=['date', 'symbol'])
    assert len(df) == len(df2)
    for i in range(100):
        assert compare(df, df2)
示例#7
0
    def test_wrong_mode(self):
        with tempfile.NamedTemporaryFile() as fp:
            dl = DataLoader("tests/data/eoddata",
                            engine=fp.name,
                            mode="SQL",
                            tablename="eod")
            with self.assertRaises(Exception):
                dl.load_data()

        with self.assertRaises(TypeError):
            DataLoader(
                "tests/data/eoddata",
                engine="some_random_mode",
                mode="CSV",
                tablename="eod",
            )
示例#8
0
def test_SQL_post_func():
    engine = create_engine("sqlite://")
    dl = DataLoader("tests/data/eoddata",
                    engine=engine,
                    mode="SQL",
                    tablename="eod")

    def add_filename(x, y, z):
        x["filename"] = y
        x["avgprice"] = (x["open"] + x["close"]) / 2
        return x

    dl.load_data(columns=rename, postfunc=add_filename)
    df = pd.read_sql_table("eod", engine)
    assert df.dtypes["date"] == dtype("<M8[ns]")
    assert df.shape[1] == 9
    assert "filename" in df.columns
    assert "avgprice" in df.columns
示例#9
0
def test_HDF_post_func():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader("tests/data/eoddata",
                        engine=fp.name,
                        mode="HDF",
                        tablename="eod")

        def add_filename(x, y, z):
            x["filename"] = y
            x["avgprice"] = (x["open"] + x["close"]) / 2
            return x

        dl.load_data(columns=rename, postfunc=add_filename)
        df = pd.read_hdf(fp.name, "data/eod")
        assert df.dtypes["date"] == dtype("<M8[ns]")
        assert df.shape[1] == 9
        assert "filename" in df.columns
        assert "avgprice" in df.columns
示例#10
0
def test_SQL_post_func():
    engine = create_engine('sqlite://')
    dl = DataLoader('tests/data/eoddata',
                    engine=engine,
                    mode='SQL',
                    tablename='eod')

    def add_filename(x, y, z):
        x['filename'] = y
        x['avgprice'] = (x['open'] + x['close']) / 2
        return x

    dl.load_data(columns=rename, postfunc=add_filename)
    df = pd.read_sql_table('eod', engine)
    assert df.dtypes['date'] == dtype('<M8[ns]')
    assert df.shape[1] == 9
    assert 'filename' in df.columns
    assert 'avgprice' in df.columns
示例#11
0
def test_HDF_post_func():
    with tempfile.NamedTemporaryFile() as fp:
        dl = DataLoader('tests/data/eoddata',
                        engine=fp.name,
                        mode='HDF',
                        tablename='eod')

        def add_filename(x, y, z):
            x['filename'] = y
            x['avgprice'] = (x['open'] + x['close']) / 2
            return x

        dl.load_data(columns=rename, postfunc=add_filename)
        df = pd.read_hdf(fp.name, 'data/eod')
        assert df.dtypes['date'] == dtype('<M8[ns]')
        assert df.shape[1] == 9
        assert 'filename' in df.columns
        assert 'avgprice' in df.columns
示例#12
0
 def test_run_loader_multiple_times(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader("tests/data/eoddata",
                         engine=fp.name,
                         mode="HDF",
                         tablename="eod")
         for i in range(5):
             dl.load_data()
         engine = create_engine("sqlite://")
         dl = DataLoader("tests/data/eoddata",
                         engine=engine,
                         mode="SQL",
                         tablename="eod")
         for i in range(5):
             dl.load_data()
         shape_hdf = len(pd.read_hdf(fp.name, "data/eod"))
         shape_sql = len(pd.read_sql_table("eod", engine))
         self.assertEqual(shape_hdf, shape_sql)
         self.assertEqual(shape_hdf, 12053)
示例#13
0
 def test_run_loader_multiple_times(self):
     with tempfile.NamedTemporaryFile() as fp:
         dl = DataLoader('tests/data/eoddata',
                         engine=fp.name,
                         mode='HDF',
                         tablename='eod')
         for i in range(5):
             dl.load_data()
         engine = create_engine('sqlite://')
         dl = DataLoader('tests/data/eoddata',
                         engine=engine,
                         mode='SQL',
                         tablename='eod')
         for i in range(5):
             dl.load_data()
         shape_hdf = len(pd.read_hdf(fp.name, 'data/eod'))
         shape_sql = len(pd.read_sql_table('eod', engine))
         self.assertEqual(shape_hdf, shape_sql)
         self.assertEqual(shape_hdf, 12053)