def test_filelike(self): """Test reading from file-like object (StringIO)""" f1 = open(test1) f2 = StringIO(open(test1).read()) data1, meta1 = loadarff(f1) data2, meta2 = loadarff(f2) assert_(data1 == data2) assert_(repr(meta1) == repr(meta2))
def test_filelike(self): # Test reading from file-like object (StringIO) with open(test1) as f1: data1, meta1 = loadarff(f1) with open(test1) as f2: data2, meta2 = loadarff(StringIO(f2.read())) assert_(data1 == data2) assert_(repr(meta1) == repr(meta2))
def test_filelike(self): # Test reading from file-like object (StringIO) f1 = open(test1) data1, meta1 = loadarff(f1) f1.close() f2 = open(test1) data2, meta2 = loadarff(StringIO(f2.read())) f2.close() assert_(data1 == data2) assert_(repr(meta1) == repr(meta2))
def test_path(self): # Test reading from `pathlib.Path` object from pathlib import Path with open(test1) as f1: data1, meta1 = loadarff(f1) data2, meta2 = loadarff(Path(test1)) assert_(data1 == data2) assert_(repr(meta1) == repr(meta2))
def test_nodata(self): # The file nodata.arff has no data in the @DATA section. # Reading it should result in an array with length 0. nodata_filename = os.path.join(data_path, 'nodata.arff') data, meta = loadarff(nodata_filename) expected_dtype = np.dtype([ ('sepallength', '<f8'), ('sepalwidth', '<f8'), ('petallength', '<f8'), ('petalwidth', '<f8'), ('class', 'S15') ]) assert_equal(data.dtype, expected_dtype) assert_equal(data.size, 0)
def test_nodata(self): # The file nodata.arff has no data in the @DATA section. # Reading it should result in an array with length 0. nodata_filename = os.path.join(data_path, 'nodata.arff') data, meta = loadarff(nodata_filename) expected_dtype = np.dtype([('sepallength', '<f8'), ('sepalwidth', '<f8'), ('petallength', '<f8'), ('petalwidth', '<f8'), ('class', 'S15')]) assert_equal(data.dtype, expected_dtype) assert_equal(data.size, 0)
def characterize(file_name): a = arffread.loadarff(file_name) df = pd.DataFrame(a[0]) df = numerify_columns(df) X, y = df.loc[:, df.columns != 'LEAVE'], df['LEAVE'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.5) decision_tree(X_train, y_train) min_cols, max_cols = 3, len(X_train.columns) for evaluator in [evaluate_by_rank, evaluate_by_pca]: best_evaluated = best_ranker_accuracy(X_train, y_train, evaluator, min_cols, max_cols) log.info(f"Best {evaluator}: {best_evaluated}")
def test_missing(self): data, meta = loadarff(missing) for i in ['yop', 'yap']: assert_array_almost_equal(data[i], expect_missing[i])
def _test(self, test_file): data, meta = loadarff(test_file) for i in range(len(data)): for j in range(4): assert_array_almost_equal(expect4_data[i][j], data[i][j]) assert_equal(meta.types(), expected_types)
def setup_method(self): self.data, self.meta = loadarff(test_quoted_nominal_spaces)
def setup_method(self): self.data, self.meta = loadarff(test10)
def setUp(self): self.data, self.meta = loadarff(test7)
import pandas as pd import numpy as np from scipy.io.arff import arffread from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, cross_val_score from sklearn.feature_selection import RFE from sklearn.pipeline import Pipeline if __name__ == '__main__': a = arffread.loadarff('../data/churn.arff') churn = pd.DataFrame(a[0]) type_map = {} for c in churn.columns: if churn[c].dtype.name == 'object': churn[c] = churn[c].apply(lambda x: x.decode('utf8')) type_map[c] = ['empty'] + list(churn[c].unique()) churn.loc[churn[c].isna(), c] = type_map[c][0] churn[c] = churn[c].apply(lambda l: type_map[c].index(l)) churn[c].astype(int) X, y = churn.loc[:, churn.columns != 'LEAVE'], churn['LEAVE'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.5) classer = DecisionTreeClassifier() print(f"churn columns: {churn.columns}") result = classer.fit(X_train, y_train) print(f"CLASSER RESULT: {result}") important_cols = [ c for i, c in enumerate(X.columns) if result.feature_importances_[i] > 0.07