Пример #1
0
    def test_build_from_cache(self):
        """
        Build the same package twice and verify that the cache is used and
        that the package is successfully generated.
        """
        mydir = os.path.dirname(__file__)
        path = os.path.join(mydir, './build_large.yml')
        teststore = store.PackageStore()

        # Build once to populate cache
        build.build_package(None, 'test_cache', PACKAGE, path)

        # Verify cache contents
        srcpath = os.path.join(mydir, 'data/10KRows13Cols.csv')
        path_hash = build._path_hash(srcpath, 'csv',
                                     {'parse_dates': ['Date0']})
        assert os.path.exists(teststore.cache_path(path_hash))

        # Build again using the cache
        build.build_package(None, 'test_cache', PACKAGE, path)

        # TODO load DFs based on contents of .yml file at PATH
        # not hardcoded vals (this will require loading modules from variable
        # names, probably using __module__)
        from quilt.data.test_cache.groot import dataframes, README
        self._test_dataframes(dataframes)
        assert os.path.exists(README())
Пример #2
0
 def test_build_parquet_pyarrow(self):
     """
     Test compilation Parquet via pyarrow
     """
     os.environ["QUILT_PARQUET_LIBRARY"] = ParquetLib.ARROW.value
     Package.reset_parquet_lib()
     mydir = os.path.dirname(__file__)
     path = os.path.join(mydir, './build.yml')
     build.build_package('test_arrow', PACKAGE, path)
     # TODO load DFs based on contents of .yml file at path
     # not hardcoded vals (this will require loading modules from variable
     # names, probably using __module__)
     from quilt.data.test_arrow.groot import dataframes, README
     csv = dataframes.csv()
     tsv = dataframes.csv()
     xls = dataframes.xls()
     rows = len(csv.index)
     assert rows == len(tsv.index) and rows == len(xls.index), \
         'Expected dataframes to have same # rows'
     cols = len(csv.columns)
     print(csv.columns, xls.columns, tsv.columns)
     assert cols == len(tsv.columns) and cols == len(xls.columns), \
         'Expected dataframes to have same # columns'
     assert os.path.exists(README())
     # TODO add more integrity checks, incl. negative test cases
     assert Package.get_parquet_lib() is ParquetLib.ARROW
     del os.environ["QUILT_PARQUET_LIBRARY"]
Пример #3
0
 def test_build_parquet_default(self):
     """
     Test compilation to Parquet via the default library
     """
     PackageStore.reset_parquet_lib()
     mydir = os.path.dirname(__file__)
     path = os.path.join(mydir, './build_large.yml')
     build.build_package(None, 'test_parquet', PACKAGE, path)
     # TODO load DFs based on contents of .yml file at PATH
     # not hardcoded vals (this will require loading modules from variable
     # names, probably using __module__)
     from quilt.data.test_parquet.groot import dataframes, README
     self._test_dataframes(dataframes)
     assert os.path.exists(README())
Пример #4
0
 def test_build_parquet_pyarrow(self):
     """
     Test compilation Parquet via pyarrow
     """
     os.environ["QUILT_PARQUET_LIBRARY"] = ParquetLib.ARROW.value
     Package.reset_parquet_lib()
     mydir = os.path.dirname(__file__)
     path = os.path.join(mydir, './build.yml')
     build.build_package('test_arrow', PACKAGE, path)
     from quilt.data.test_arrow.groot import dataframes, README
     self._test_dataframes(dataframes)
     assert os.path.exists(README())
     assert Package.get_parquet_lib() is ParquetLib.ARROW
     del os.environ["QUILT_PARQUET_LIBRARY"]
Пример #5
0
 def test_build_parquet_default(self):
     """
     Test compilation to Parquet via the default library
     """
     Package.reset_parquet_lib()
     mydir = os.path.dirname(__file__)
     path = os.path.join(mydir, './build.yml')
     build.build_package('test_parquet', PACKAGE, path)
     # TODO load DFs based on contents of .yml file at PATH
     # not hardcoded vals (this will require loading modules from variable
     # names, probably using __module__)
     from quilt.data.test_parquet.groot import dataframes, README
     csv = dataframes.csv()
     tsv = dataframes.csv()
     xls = dataframes.xls()
     rows = len(csv.index)
     assert rows == len(tsv.index) and rows == len(xls.index), \
         'Expected dataframes to have same # rows'
     assert os.path.exists(README())
     cols = len(csv.columns)
     print(csv.columns, xls.columns, tsv.columns)
     assert cols == len(tsv.columns) and cols == len(xls.columns), \
         'Expected dataframes to have same # columns'