示例#1
0
 def test_build_parquet_pyarrow(self):
     """
     Test compilation Parquet via pyarrow
     """
     os.environ["QUILT_PARQUET_LIBRARY"] = ParquetLib.ARROW.value
     Package.reset_parquet_lib()
     mydir = os.path.dirname(__file__)
     path = os.path.join(mydir, './build.yml')
     build.build_package('test_arrow', PACKAGE, path)
     # TODO load DFs based on contents of .yml file at path
     # not hardcoded vals (this will require loading modules from variable
     # names, probably using __module__)
     from quilt.data.test_arrow.groot import dataframes, README
     csv = dataframes.csv()
     tsv = dataframes.csv()
     xls = dataframes.xls()
     rows = len(csv.index)
     assert rows == len(tsv.index) and rows == len(xls.index), \
         'Expected dataframes to have same # rows'
     cols = len(csv.columns)
     print(csv.columns, xls.columns, tsv.columns)
     assert cols == len(tsv.columns) and cols == len(xls.columns), \
         'Expected dataframes to have same # columns'
     assert os.path.exists(README())
     # TODO add more integrity checks, incl. negative test cases
     assert Package.get_parquet_lib() is ParquetLib.ARROW
     del os.environ["QUILT_PARQUET_LIBRARY"]
示例#2
0
 def _test_dataframes(self, dataframes):
     csv = dataframes.csv()
     tsv = dataframes.csv()
     xls = dataframes.xls()
     xls_skip = dataframes.xls_skip()
     rows = len(csv.index)
     assert rows == len(tsv.index) and rows == len(xls.index), \
         'Expected dataframes to have same # rows'
     cols = len(csv.columns)
     assert cols == len(tsv.columns) and cols == len(xls.columns), \
         'Expected dataframes to have same # columns'
     assert xls_skip.shape == (9997, 13), \
         'Expected 9,997 Rows and 13 Columns'
     nulls = dataframes.nulls()
     assert ptypes.is_string_dtype(nulls['strings']), \
         'Expected column of strings to deserialize as strings'
     assert ptypes.is_integer_dtype(nulls['integers']), \
         'Expected column of integers to deserialize as integers'
     assert ptypes.is_float_dtype(nulls['floats']), \
         'Expected column of floats to deserialize as floats'
     assert ptypes.is_numeric_dtype(nulls['integers_nulled']), \
         'Expected column of ints with nulls to deserialize as numeric'
示例#3
0
 def test_build_parquet_default(self):
     """
     Test compilation to Parquet via the default library
     """
     Package.reset_parquet_lib()
     mydir = os.path.dirname(__file__)
     path = os.path.join(mydir, './build.yml')
     build.build_package('test_parquet', PACKAGE, path)
     # TODO load DFs based on contents of .yml file at PATH
     # not hardcoded vals (this will require loading modules from variable
     # names, probably using __module__)
     from quilt.data.test_parquet.groot import dataframes, README
     csv = dataframes.csv()
     tsv = dataframes.csv()
     xls = dataframes.xls()
     rows = len(csv.index)
     assert rows == len(tsv.index) and rows == len(xls.index), \
         'Expected dataframes to have same # rows'
     assert os.path.exists(README())
     cols = len(csv.columns)
     print(csv.columns, xls.columns, tsv.columns)
     assert cols == len(tsv.columns) and cols == len(xls.columns), \
         'Expected dataframes to have same # columns'