def test_init(self): res = testing.Results(nmethods=2, nrows=100) res.actual[:50] = 0 res.actual[50:] = 1 res.predicted = np.vstack((res.actual, res.actual)) np.testing.assert_almost_equal(scoring.CA(res), [1, 1]) res.predicted[0][0] = 1 np.testing.assert_almost_equal(scoring.CA(res), [0.99, 1]) res.predicted[1] = 1 - res.predicted[1] np.testing.assert_almost_equal(scoring.CA(res), [0.99, 0])
def test_bayes(self): x = np.random.random_integers(1, 3, (100, 5)) col = np.random.randint(5) y = x[:, col].copy().reshape(100, 1) t = Table(x, y) t = discretization.DiscretizeTable(t, method=EqualWidth(n=3)) res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()]) np.testing.assert_almost_equal(scoring.CA(res), [1]) t.Y[-20:] = 4 - t.Y[-20:] res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()]) self.assertGreaterEqual(scoring.CA(res)[0], 0.75) self.assertLess(scoring.CA(res)[0], 1)
def test_NaiveBayes(self): table = Orange.data.Table('titanic') bayes = nb.BayesLearner() results = testing.CrossValidation(table[::20], [bayes], k=10) ca = scoring.CA(results) self.assertGreater(ca, 0.7) self.assertLess(ca, 0.9)
def test_RandomForest(self): table = Orange.data.Table('iris') forest = rf.RandomForestLearner() results = testing.CrossValidation(table, [forest], k=10) ca = scoring.CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def test_split_by_classifier(self): learners = [random_learner, random_learner, random_learner] ds = data.Table("lenses") cv = testing.cross_validation(learners, ds, folds=5, store_examples=True) cv_split = scoring.split_by_classifiers(cv) ca_scores = scoring.CA(cv) auc_scores = scoring.AUC(cv) for i, cv1 in enumerate(cv_split): self.assertEqual(cv1.class_values, cv.class_values) self.assertEqual(cv1.classifier_names, [cv.classifier_names[i]]) self.assertEqual(cv1.number_of_iterations, cv.number_of_iterations) self.assertEqual(cv1.number_of_learners, 1) self.assertEqual(cv1.base_class, cv.base_class) self.assertEqual(cv1.weights, cv.weights) self.assertEqual(len(cv1.results), len(cv.results)) self.assertEqual(cv1.examples, cv.examples) ca_one = scoring.CA(cv1)[0] auc_one = scoring.AUC(cv1)[0] self.assertAlmostEqual(ca_scores[i], ca_one, delta=1e-10) self.assertAlmostEquals(auc_scores[i], auc_one, delta=1e-10)
def predict_wine_quality(table, n): #Make the continous varibles discrete disc = Discretize() disc.method = discretize.EqualWidth(n=n) table = disc(table) #Define domain feature_vars = list(table.domain[1:]) class_label_var = table.domain[0] wine_domain = Domain(feature_vars, class_label_var) table = Table.from_table(domain=wine_domain, source=table) #Construct learner and print results tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=4000) eval_results = CrossValidation(table, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format( scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
def test_ca_from_confusion_matrix_for_classification_on_iris_se(self): ds = data.Table("iris") pt = testing.proportion_test([self.learner], ds, times=1) self.assertEqual(pt.number_of_iterations, 1) ca = scoring.CA(pt, report_se=True) self.assertEqual(len(ca), 1)
def predict_cal_acc(test_dataset, data_table, tree_learner): y_pred = decision_tree(test_dataset) eval_results = CrossValidation(data_table, [tree_learner], k=10) print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
norm_data_table = normalize_table(data_table) print("Applying learner on total data records {}".format(len(norm_data_table))) #Create a NN classifier learner then = datetime.datetime.now() ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 ) ann_classifier = ann_learner(norm_data_table) #Do the 10 folds cross validation eval_results = CrossValidation(norm_data_table, [ann_learner], k=10) now = datetime.datetime.now() tdelta = now - then print("Processing completed after: {} ".format(tdelta)) #Accuracy and area under (receiver operating characteristic, ROC) curve (AUC) print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0])) #Remove minority classes value_counts = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() > 10] value_counts1 = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() < 10] print(value_counts1) first_elts = [x[0] for x in value_counts.keys().tolist()] sel = [i for i, d in enumerate(raw_data_table) if d["quality"] in first_elts] subset_data = raw_data_table[sel] subset_data_table = Table.from_table(domain=wine_domain, source=subset_data) norm_subset_data_table = normalize_table(subset_data_table) print("Applying learner on filtered data recordset, total data records {}".format(len(norm_subset_data_table))) then = datetime.datetime.now() ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 )
iris_domain = Domain(feature_vars, class_label_var) data_tab = Table.from_table(domain=iris_domain, source=data_tab) print("DOMAIN: %s \nVARIABLES: %s \nATTRIBUTES: %s \nCLASS_VAR: %s" % (data_tab.domain, data_tab.domain.variables, data_tab.domain.attributes, data_tab.domain.class_var)) print(len(data_tab)) tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=1750) #Accuracy of cross validation: 0.953 #AUC: 0.991 eval_results = CrossValidation(data_tab, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format( scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0])) #####################TASK 3########################## tree_learner2 = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=2000, verbose=True, solver="sgd", learning_rate_init=3, learning_rate="invscaling", power_t=0.3) #Needs ~1667 iterations. Same accuracy and AUC as before. #When learning rate init = 0.01 we need 445 iterations. Better Acc and AUC. Accuracy: 0.960, AUC: 0.997 #When learning rate init = 1 we need 81 iterations. Accuracy: 0.960, AUC: 0.993. #When learning rate init = 3, we need 4 iterations. Accuracy: 0.587, AUC: 0.729. #When learning rate dynamically adjusted, we need 7 iterations. Almost back to same accuracy. Definetly better than before. Accuracy: 0.913, AUC: 0.966
with open(exportFilePath, "w") as output: datae1.to_csv(output, header=True, sep=",") path1 = ("C:\\Users\\acer\\Desktop\\friends\\export.csv") datae = p.read_csv(path1) ''' now fro b part''' from Orange.classification import SklTreeLearner td = Table.from_file("C:\\Users\\acer\\Desktop\\friends\\export.csv") #print(data1.domain) #print(d) feature_vars = list(td.domain.variables[1:]) class_label_var = td.domain.variables[7] print(class_label_var) md = Domain(feature_vars, class_label_var) #print(d_dis[0]) td = Table.from_table(domain=md, source=td) #print(.domain.variables[1:]) n1 = td.approx_len() print(n1 * 80 / 100) train_data_set = td[:1360] test_data_set = td[1360:] #print(train_data_set.domain) #print(test_data_set.domain) tree_learner = SklTreeLearner() decision_tree = tree_learner(train_data_set) results = CrossValidation(td, [tree_learner], k=10) print(decision_tree(test_data_set)) print("Accuracy", scoring.CA(results)[0]) print("AUC", scoring.AUC(results)[0])
def test_ca_on_iris(self): ds = data.Table("iris") cv = testing.cross_validation([self.learner], ds, folds=5) ca = scoring.CA(cv, report_se=True) self.assertEqual(len(ca), 1)
# Description: Naive Bayes Learner with auto adjusted treshold # Category: classification # Uses: iris # Referenced: Orange.classification.bayes # Classes: Orange.classification.bayes.NaiveLearner, Orange.classification.bayes.NaiveClassifier import Orange from Orange.classification import bayes from Orange.evaluation import testing, scoring adult = Orange.data.Table("adult_sample.tab") nb = bayes.NaiveLearner(name="Naive Bayes") adjusted_nb = bayes.NaiveLearner(adjust_threshold=True, name="Adjusted Naive Bayes") results = testing.cross_validation([nb, adjusted_nb], adult) print "%.6f, %.6f" % tuple(scoring.CA(results))
from Orange import data from Orange.classification import svm vehicle = data.Table("vehicle.tab") svm_easy = svm.SVMLearnerEasy(name="svm easy", folds=3) svm_normal = svm.SVMLearner(name="svm") learners = [svm_easy, svm_normal] from Orange.evaluation import testing, scoring results = testing.cross_validation(learners, vehicle, folds=5) print "Name CA AUC" for learner, CA, AUC in zip(learners, scoring.CA(results), scoring.AUC(results)): print "%-8s %.2f %.2f" % (learner.name, CA, AUC)
def test_ca_from_confusion_matrix_list_on_iris(self): ds = data.Table("iris") cv = testing.cross_validation([self.learner], ds, folds=5) cm = scoring.confusion_matrices(cv) ca = scoring.CA(cm) self.assertEqual(len(ca), 1)
# % (data_tab.domain, data_tab.domain.variables, data_tab.domain.attributes, # data_tab.domain.class_var)) data_tab.shuffle() indx = int(len(data_tab)*0.8) train_data_tab = data_tab[:indx] test_data_tab = data_tab[indx:] #########################TASK 2################################ def prediction (decision_tree, samples, lables): return [lables[int(x)] for x in decision_tree(samples)] tree_learner = SklTreeLearner() decision_tree = tree_learner(train_data_tab) class_labels = data_tab.domain.class_var.values p = prediction(decision_tree, test_data_tab, class_labels) matches = 0 for i in range(len(test_data_tab)): if test_data_tab[:, 0][i][0] == p[i]: matches += 1 accuracy = matches/len(test_data_tab) print("ACCURACY OF DECISION TREE: ") print(accuracy) #########################TASK 3################################ eval_results = CrossValidation(data_tab, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format(scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
import Orange import Orange.preprocess import pandas as pd data = Table.from_file('white wine.csv') CLabel = Orange.preprocess.Discretize() CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3) newCLabel = CLabel(data[:, 0]) FeatureV = data[:, 1:].domain.variables CLabelSet = newCLabel.domain.variables WineDomain = Domain(FeatureV, CLabelSet) data = Table.from_table(domain=WineDomain, source=data) TLearner = NNClassificationLearner(hidden_layer_sizes=(10, 1), max_iter=4000) evalR = CrossValidation(data, [TLearner], k=10) print("Accuracy: {:.3f}".format(scoring.CA(evalR)[0])) print("AUC: {:.3f}".format(scoring.AUC(evalR)[0])) n = sum(1 for d in data if (d["quality"] == 1.0 or d["quality"] == 2.0 or d["quality"] == 9.0)) SubSet = Table(data.domain, [d for d in data if (d["quality"] < 4.0 or d["quality"] > 8.0)]) for d in SubSet: del data[d] CLabel = Orange.preprocess.Discretize() CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3) DataSet = CLabel(data[:, 0]) FeatureV = data[:, 1:].domain.variables newCLabelSet = DataSet.domain.variables wineDomain = Domain(FeatureV, newCLabelSet) data = Table.from_table(domain=wineDomain, source=data)
def test_ca_from_confusion_matrix_on_iris_se(self): ds = data.Table("iris") cv = testing.cross_validation([self.learner], ds, folds=5) cm = scoring.confusion_matrices(cv, class_index=1) ca = scoring.CA(cm[0], report_se=True) self.assertEqual(len(ca), 1)
"InputLearner", [ "learner", # :: Orange.base.Learner "results", # :: Option[Try[Orange.evaluation.Results]] "stats" ] # :: Option[Sequence[Try[float]]] ) def classification_stats(results): return tuple(score(results) for score in classification_stats.scores) classification_stats.headers, classification_stats.scores = zip(*( ("AUC", scoring.AUC), ("CA", lambda res, *args, **kwargs: scoring.CA(res)), ("F1", (lambda res, target=None: scoring.F1( res, target=target, average='weighted'))), ("Precision", (lambda res, target=None: scoring.Precision( res, target=target, average='weighted'))), ("Recall", (lambda res, target=None: scoring.Recall( res, target=target, average='weighted'))), )) def regression_stats(results): return tuple(score(results) for score in regression_stats.scores) regression_stats.headers, regression_stats.scores = zip(*( ("MSE", scoring.MSE),