def test_build_from_cache(self): """ Build the same package twice and verify that the cache is used and that the package is successfully generated. """ mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build_large.yml') teststore = store.PackageStore() # Build once to populate cache build.build_package(None, 'test_cache', PACKAGE, path) # Verify cache contents srcpath = os.path.join(mydir, 'data/10KRows13Cols.csv') path_hash = build._path_hash(srcpath, 'csv', {'parse_dates': ['Date0']}) assert os.path.exists(teststore.cache_path(path_hash)) # Build again using the cache build.build_package(None, 'test_cache', PACKAGE, path) # TODO load DFs based on contents of .yml file at PATH # not hardcoded vals (this will require loading modules from variable # names, probably using __module__) from quilt.data.test_cache.groot import dataframes, README self._test_dataframes(dataframes) assert os.path.exists(README())
def test_build_parquet_pyarrow(self): """ Test compilation Parquet via pyarrow """ os.environ["QUILT_PARQUET_LIBRARY"] = ParquetLib.ARROW.value Package.reset_parquet_lib() mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build.yml') build.build_package('test_arrow', PACKAGE, path) # TODO load DFs based on contents of .yml file at path # not hardcoded vals (this will require loading modules from variable # names, probably using __module__) from quilt.data.test_arrow.groot import dataframes, README csv = dataframes.csv() tsv = dataframes.csv() xls = dataframes.xls() rows = len(csv.index) assert rows == len(tsv.index) and rows == len(xls.index), \ 'Expected dataframes to have same # rows' cols = len(csv.columns) print(csv.columns, xls.columns, tsv.columns) assert cols == len(tsv.columns) and cols == len(xls.columns), \ 'Expected dataframes to have same # columns' assert os.path.exists(README()) # TODO add more integrity checks, incl. negative test cases assert Package.get_parquet_lib() is ParquetLib.ARROW del os.environ["QUILT_PARQUET_LIBRARY"]
def test_build_parquet_default(self): """ Test compilation to Parquet via the default library """ PackageStore.reset_parquet_lib() mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build_large.yml') build.build_package(None, 'test_parquet', PACKAGE, path) # TODO load DFs based on contents of .yml file at PATH # not hardcoded vals (this will require loading modules from variable # names, probably using __module__) from quilt.data.test_parquet.groot import dataframes, README self._test_dataframes(dataframes) assert os.path.exists(README())
def test_build_parquet_pyarrow(self): """ Test compilation Parquet via pyarrow """ os.environ["QUILT_PARQUET_LIBRARY"] = ParquetLib.ARROW.value Package.reset_parquet_lib() mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build.yml') build.build_package('test_arrow', PACKAGE, path) from quilt.data.test_arrow.groot import dataframes, README self._test_dataframes(dataframes) assert os.path.exists(README()) assert Package.get_parquet_lib() is ParquetLib.ARROW del os.environ["QUILT_PARQUET_LIBRARY"]
def test_build_parquet_default(self): """ Test compilation to Parquet via the default library """ Package.reset_parquet_lib() mydir = os.path.dirname(__file__) path = os.path.join(mydir, './build.yml') build.build_package('test_parquet', PACKAGE, path) # TODO load DFs based on contents of .yml file at PATH # not hardcoded vals (this will require loading modules from variable # names, probably using __module__) from quilt.data.test_parquet.groot import dataframes, README csv = dataframes.csv() tsv = dataframes.csv() xls = dataframes.xls() rows = len(csv.index) assert rows == len(tsv.index) and rows == len(xls.index), \ 'Expected dataframes to have same # rows' assert os.path.exists(README()) cols = len(csv.columns) print(csv.columns, xls.columns, tsv.columns) assert cols == len(tsv.columns) and cols == len(xls.columns), \ 'Expected dataframes to have same # columns'