def test_deserialize_from_csv_no_header(self): # Arrange data = b'1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n' # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSVNoHeader) # Assert self.assertIsNotNone(result) expected = [ { 0: 1.0, 1: 2.0 }, { 0: 5.1, 1: 10.1, 2: 20.1 }, { 0: 50.2, 2: 50.3 }, ] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_csv_bom(self): # Arrange data = b'\xef\xbb\xbfa,b,c\n1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n' # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSV) # Assert self.assertIsNotNone(result) expected = [ { 'a': 1.0, 'b': 2.0 }, { 'a': 5.1, 'b': 10.1, 'c': 20.1 }, { 'a': 50.2, 'c': 50.3 }, ] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_csv_spaces(self): # Arrange data = b'a, b, c\n1.0, two, nan\n5.1, "ten point one", 20.1\n50.2, , 50.3\n' # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSV) # Assert self.assertIsNotNone(result) expected = [ { 'a': 1.0, 'b': 'two' }, { 'a': 5.1, 'b': 'ten point one', 'c': 20.1 }, { 'a': 50.2, 'c': 50.3 }, ] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_arff(self): # Arrange data = b"""@RELATION Unnamed @ATTRIBUTE Class NUMERIC @ATTRIBUTE age NUMERIC @ATTRIBUTE menopause NUMERIC @ATTRIBUTE tumor-size NUMERIC @DATA 0,5,1,1 0,5,4,4 1,4,8,8 """ # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.ARFF) print(result) # Assert self.assertIsNotNone(result) expected = [ {'Class': 0., 'age': 5., 'menopause': 1., 'tumor-size':1.}, {'Class': 0., 'age': 5., 'menopause': 4., 'tumor-size':4.}, {'Class': 1., 'age': 4., 'menopause': 8., 'tumor-size':8.}, ] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_unsupported_data_type_id(self): # Arrange data = b'1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n' # Act reader = BytesIO(data) with self.assertRaises(UnsupportedDatasetTypeError): result = deserialize_dataframe(reader, 'Unsupported')
def test_deserialize_from_csv_no_header(self): # Arrange data = b"1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n" # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSVNoHeader) # Assert self.assertIsNotNone(result) expected = [{0: 1.0, 1: 2.0}, {0: 5.1, 1: 10.1, 2: 20.1}, {0: 50.2, 2: 50.3}] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_csv_spaces(self): # Arrange data = b'a, b, c\n1.0, two, nan\n5.1, "ten point one", 20.1\n50.2, , 50.3\n' # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSV) # Assert self.assertIsNotNone(result) expected = [{"a": 1.0, "b": "two"}, {"a": 5.1, "b": "ten point one", "c": 20.1}, {"a": 50.2, "c": 50.3}] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_csv_bom(self): # Arrange data = b"\xef\xbb\xbfa,b,c\n1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n" # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSV) # Assert self.assertIsNotNone(result) expected = [{"a": 1.0, "b": 2.0}, {"a": 5.1, "b": 10.1, "c": 20.1}, {"a": 50.2, "c": 50.3}] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_plain_text_bom(self): # Arrange data = b'\xef\xbb\xbfJohn enjoyed his vacation in California. His personal favorite on the trip was Los Angeles.\r\nMicrosoft announced upgrades to their line of products for information workers. The announcement was made at a partner conference at Boston.' # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.PlainText) # Assert self.assertIsNotNone(result) expected = [ {0: 'John enjoyed his vacation in California. His personal favorite on the trip was Los Angeles.'}, {0: 'Microsoft announced upgrades to their line of products for information workers. The announcement was made at a partner conference at Boston.'}, ] assert_frame_equal(pd.DataFrame(expected), result)
def test_deserialize_from_csv(self): # Arrange data = b'a,b,c\n1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n' # Act reader = BytesIO(data) result = deserialize_dataframe(reader, DataTypeIds.GenericCSV) # Assert self.assertIsNotNone(result) expected = [ {'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}, {'a': 50.2, 'c': 50.3}, ] assert_frame_equal(pd.DataFrame(expected), result)