示例#1
0
 def test_is_match(self):
     """
     Determine if the parquet file can be automatically identified from
     byte stream or file path
     """
     for input_file in self.file_or_buf_list:
         self.assertTrue(ParquetData.is_match(input_file["path"]))
示例#2
0
    def test_is_structured(self):
        # Default construction
        data = ParquetData()
        self.assertTrue(data.is_structured)

        # With option specifying dataframe as data_format
        data = ParquetData(options={"data_format": "dataframe"})
        self.assertTrue(data.is_structured)

        # With option specifying records as data_format
        data = ParquetData(options={"data_format": "records"})
        self.assertFalse(data.is_structured)

        # With option specifying json as data_format
        data = ParquetData(options={"data_format": "json"})
        self.assertFalse(data.is_structured)
示例#3
0
    def test_mixed_non_string_col(self):
        """
        Determine if parquet can handle mixed non-string column types.
        """

        test_file = os.path.join(test_root_path, "data", "parquet",
                                 "mixed_datetime_data_col.parquet")
        parq_data = ParquetData(test_file)

        # assert str and not bytes
        self.assertIsInstance(parq_data.data["col2"][1], str)
        self.assertIsInstance(parq_data.data["col2"][3], str)

        # assert no 'b"data"' encapsulated, just 'data'
        self.assertNotIn('b"', parq_data.data["col2"][1])
        self.assertNotIn("b'", parq_data.data["col2"][1])
        self.assertNotIn('b"', parq_data.data["col2"][3])
        self.assertNotIn("b'", parq_data.data["col2"][3])
示例#4
0
 def test_file_encoding(self):
     """Tests to ensure file_encoding set to None"""
     for input_file in self.file_or_buf_list:
         data = ParquetData(input_file["path"])
         self.assertIsNone(data.file_encoding)