def test_attributes_arff_from_df_mixed_dtype_categories(self):
     # liac-arff imposed categorical attributes to be of sting dtype. We
     # raise an error if this is not the case.
     df = pd.DataFrame([[1], ['2'], [3.]])
     df[0] = df[0].astype('category')
     err_msg = "The column '0' of the dataframe is of 'category' dtype."
     with pytest.raises(ValueError, match=err_msg):
         attributes_arff_from_df(df)
 def test_attributes_arff_from_df_unknown_dtype(self):
     # check that an error is raised when the dtype is not supptagorted by
     # liac-arff
     data = [
         [[1], ['2'], [3.]],
         [pd.Timestamp('2012-05-01'),
          pd.Timestamp('2012-05-02')],
     ]
     dtype = ['mixed-integer', 'datetime64']
     for arr, dt in zip(data, dtype):
         df = pd.DataFrame(arr)
         err_msg = ("The dtype '{}' of the column '0' is not currently "
                    "supported by liac-arff".format(dt))
         with pytest.raises(ValueError, match=err_msg):
             attributes_arff_from_df(df)
 def test_attributes_arff_from_df_unknown_dtype(self):
     # check that an error is raised when the dtype is not supported by
     # liac-arff
     data = [
         [[1], ['2'], [3.]],
         [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02')],
     ]
     dtype = [
         'mixed-integer',
         'datetime64'
     ]
     for arr, dt in zip(data, dtype):
         df = pd.DataFrame(arr)
         err_msg = ("The dtype '{}' of the column '0' is not currently "
                    "supported by liac-arff".format(dt))
         with pytest.raises(ValueError, match=err_msg):
             attributes_arff_from_df(df)
 def test_attributes_arff_from_df(self):
     # DataFrame case
     df = pd.DataFrame(
         [[1, 1.0, 'xxx', 'A', True], [2, 2.0, 'yyy', 'B', False]],
         columns=['integer', 'floating', 'string', 'category', 'boolean'])
     df['category'] = df['category'].astype('category')
     attributes = attributes_arff_from_df(df)
     self.assertEqual(attributes, [('integer', 'INTEGER'),
                                   ('floating', 'REAL'),
                                   ('string', 'STRING'),
                                   ('category', ['A', 'B']),
                                   ('boolean', ['True', 'False'])])
     # SparseDataFrame case
     df = pd.SparseDataFrame([[1, 1.0], [2, 2.0], [0, 0]],
                             columns=['integer', 'floating'],
                             default_fill_value=0)
     df['integer'] = df['integer'].astype(np.int64)
     attributes = attributes_arff_from_df(df)
     self.assertEqual(attributes, [('integer', 'INTEGER'),
                                   ('floating', 'REAL')])
 def test_attributes_arff_from_df(self):
     # DataFrame case
     df = pd.DataFrame(
         [[1, 1.0, 'xxx', 'A', True], [2, 2.0, 'yyy', 'B', False]],
         columns=['integer', 'floating', 'string', 'category', 'boolean']
     )
     df['category'] = df['category'].astype('category')
     attributes = attributes_arff_from_df(df)
     self.assertEqual(attributes, [('integer', 'INTEGER'),
                                   ('floating', 'REAL'),
                                   ('string', 'STRING'),
                                   ('category', ['A', 'B']),
                                   ('boolean', ['True', 'False'])])
     # SparseDataFrame case
     df = pd.SparseDataFrame([[1, 1.0],
                              [2, 2.0],
                              [0, 0]],
                             columns=['integer', 'floating'],
                             default_fill_value=0)
     df['integer'] = df['integer'].astype(np.int64)
     attributes = attributes_arff_from_df(df)
     self.assertEqual(attributes, [('integer', 'INTEGER'),
                                   ('floating', 'REAL')])