def test_nominal_as_category(self): data = pandas.DataFrame({ 'attr_nominal': pandas.Categorical( ['water', 'wine', 'beer', None, 'wine', 'water'], ordered=False), 'attr_nominal_spaces': pandas.Categorical([ "'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'", 'mate' ], ordered=False) }) fp = tempfile.NamedTemporaryFile(delete=False) try: with TextIOWrapper(fp) as tfp: writearff(data, tfp, relation_name='test_nominal', index=False) with open(fp.name, 'r') as fp: read_date = fp.readlines() finally: os.unlink(fp.name) expected = EXPECTED_1.copy() expected[ 3] = "@attribute attr_nominal_spaces\t{\"hard liquor\",\"red wine\",mate}\n" self.assertListEqual(expected, read_date)
def _make_and_write_data(fp, n_samples, n_features, with_index, with_labels, seed, column_prefix="V"): x, event, time = _make_survival_data(n_samples, n_features, seed) columns = ["{}{}".format(column_prefix, i) for i in range(n_features)] if with_labels: columns += ["event", "time"] arr = numpy.column_stack((x, event, time)) else: arr = x if with_index: index = numpy.arange(n_samples, dtype=numpy.float_) numpy.random.RandomState(0).shuffle(index) else: index = None dataset = pandas.DataFrame(arr, index=index, columns=columns) dataset.index.name = "index" writearff(dataset, fp, index=with_index) return dataset
def test_writearff_unsupported_column_type(temp_file): data = pandas.DataFrame({ "attr_datetime": numpy.array([2 + 3j, 45.1 - 1j, 0 - 1j, 7 + 0j, 132 - 3j, 1 - 0.41j], dtype="complex128") }) with pytest.raises(TypeError, match="unsupported type complex128"): writearff(data, temp_file, relation_name='test_delta', index=False)
def test_writearff_nominal_with_quotes(temp_file): data = pandas.DataFrame({'attr_nominal': ['water', 'wine', 'beer', None, 'wine', 'water'], 'attr_nominal_spaces': ["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'", 'mate']}) writearff(data, temp_file, relation_name='test_nominal', index=False) with open(temp_file.name, 'r') as fp: read_date = fp.readlines() assert EXPECTED_1 == read_date
def test_writearff_datetime(temp_file): data = pandas.DataFrame( {"attr_datetime": numpy.array( ["2014-10-31 14:13:01", "2004-03-13 19:49:31", "1998-12-06 09:10:11"], dtype="datetime64")}) writearff(data, temp_file, relation_name='test_datetime', index=False) with open(temp_file.name, 'r') as fp: read_date = fp.readlines() assert EXPECTED_DATETIME == read_date
def test_writearff_nominal_as_category(temp_file): data = pandas.DataFrame({'attr_nominal': pandas.Categorical(['water', 'wine', 'beer', None, 'wine', 'water'], ordered=False), 'attr_nominal_spaces': pandas.Categorical(["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'", 'mate'], ordered=False)}) writearff(data, temp_file, relation_name='test_nominal', index=False) with open(temp_file.name, 'r') as fp: read_date = fp.readlines() expected = EXPECTED_1.copy() expected[3] = "@attribute attr_nominal_spaces\t{\"hard liquor\",\"red wine\",mate}\n" assert expected == read_date
def test_writearff_nominal_with_category_ordering(temp_file): data = pandas.DataFrame({'attr_nominal': pandas.Categorical(['water', 'wine', 'beer', None, 'wine', 'water'], categories=['water', 'coke', 'beer', 'wine'], ordered=False), 'attr_nominal_spaces': ["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'", 'mate']}) writearff(data, temp_file, relation_name='test_nominal', index=False) with open(temp_file.name, 'r') as fp: read_date = fp.readlines() expected = EXPECTED_1.copy() expected[2] = "@attribute attr_nominal\t{water,coke,beer,wine}\n" assert expected == read_date
def test_nominal_with_quotes(self): data = pandas.DataFrame({'attr_nominal': ['water', 'wine', 'beer', None, 'wine', 'water'], 'attr_nominal_spaces': ["'red wine'", "'hard liquor'", None, 'mate', "'hard liquor'", 'mate']}) fp = tempfile.NamedTemporaryFile(delete=False) try: with TextIOWrapper(fp) as tfp: writearff(data, tfp, relation_name='test_nominal', index=False) with open(fp.name, 'r') as fp: read_date = fp.readlines() finally: os.unlink(fp.name) self.assertListEqual(EXPECTED_1, read_date)
def test_datetime(self): data = pandas.DataFrame( {"attr_datetime": numpy.array( ["2014-10-31 14:13:01", "2004-03-13 19:49:31", "1998-12-06 09:10:11"], dtype="datetime64")}) fp = tempfile.NamedTemporaryFile(delete=False) try: with TextIOWrapper(fp) as tfp: writearff(data, tfp, relation_name='test_datetime', index=False) with open(fp.name, 'r') as fp: read_date = fp.readlines() finally: os.unlink(fp.name) self.assertListEqual(EXPECTED_DATETIME, read_date)