Пример #1
0
    def test_init(self):
        res = testing.Results(nmethods=2, nrows=100)
        res.actual[:50] = 0
        res.actual[50:] = 1
        res.predicted = np.vstack((res.actual, res.actual))
        np.testing.assert_almost_equal(scoring.CA(res), [1, 1])

        res.predicted[0][0] = 1
        np.testing.assert_almost_equal(scoring.CA(res), [0.99, 1])

        res.predicted[1] = 1 - res.predicted[1]
        np.testing.assert_almost_equal(scoring.CA(res), [0.99, 0])
Пример #2
0
    def test_bayes(self):
        x = np.random.random_integers(1, 3, (100, 5))
        col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Table(x, y)
        t = discretization.DiscretizeTable(t, method=EqualWidth(n=3))

        res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()])
        np.testing.assert_almost_equal(scoring.CA(res), [1])

        t.Y[-20:] = 4 - t.Y[-20:]
        res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()])
        self.assertGreaterEqual(scoring.CA(res)[0], 0.75)
        self.assertLess(scoring.CA(res)[0], 1)
Пример #3
0
 def test_NaiveBayes(self):
     table = Orange.data.Table('titanic')
     bayes = nb.BayesLearner()
     results = testing.CrossValidation(table[::20], [bayes], k=10)
     ca = scoring.CA(results)
     self.assertGreater(ca, 0.7)
     self.assertLess(ca, 0.9)
Пример #4
0
 def test_RandomForest(self):
     table = Orange.data.Table('iris')
     forest = rf.RandomForestLearner()
     results = testing.CrossValidation(table, [forest], k=10)
     ca = scoring.CA(results)
     self.assertGreater(ca, 0.9)
     self.assertLess(ca, 0.99)
Пример #5
0
    def test_split_by_classifier(self):
        learners = [random_learner, random_learner, random_learner]
        ds = data.Table("lenses")
        cv = testing.cross_validation(learners,
                                      ds,
                                      folds=5,
                                      store_examples=True)
        cv_split = scoring.split_by_classifiers(cv)
        ca_scores = scoring.CA(cv)
        auc_scores = scoring.AUC(cv)
        for i, cv1 in enumerate(cv_split):
            self.assertEqual(cv1.class_values, cv.class_values)
            self.assertEqual(cv1.classifier_names, [cv.classifier_names[i]])
            self.assertEqual(cv1.number_of_iterations, cv.number_of_iterations)
            self.assertEqual(cv1.number_of_learners, 1)
            self.assertEqual(cv1.base_class, cv.base_class)
            self.assertEqual(cv1.weights, cv.weights)
            self.assertEqual(len(cv1.results), len(cv.results))
            self.assertEqual(cv1.examples, cv.examples)

            ca_one = scoring.CA(cv1)[0]
            auc_one = scoring.AUC(cv1)[0]
            self.assertAlmostEqual(ca_scores[i], ca_one, delta=1e-10)
            self.assertAlmostEquals(auc_scores[i], auc_one, delta=1e-10)
Пример #6
0
def predict_wine_quality(table, n):
    #Make the continous varibles discrete
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=n)
    table = disc(table)
    #Define domain
    feature_vars = list(table.domain[1:])
    class_label_var = table.domain[0]
    wine_domain = Domain(feature_vars, class_label_var)
    table = Table.from_table(domain=wine_domain, source=table)
    #Construct learner and print results
    tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                           max_iter=4000)
    eval_results = CrossValidation(table, [tree_learner], k=10)
    print("Accuracy of cross validation: {:.3f}".format(
        scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
Пример #7
0
 def test_ca_from_confusion_matrix_for_classification_on_iris_se(self):
     ds = data.Table("iris")
     pt = testing.proportion_test([self.learner], ds, times=1)
     self.assertEqual(pt.number_of_iterations, 1)
     ca = scoring.CA(pt, report_se=True)
     self.assertEqual(len(ca), 1)
def predict_cal_acc(test_dataset, data_table, tree_learner):
    y_pred = decision_tree(test_dataset)
    eval_results = CrossValidation(data_table, [tree_learner], k=10)
    print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
Пример #9
0
norm_data_table = normalize_table(data_table)
print("Applying learner on total data records {}".format(len(norm_data_table)))

#Create a NN classifier learner
then = datetime.datetime.now()
ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 )
ann_classifier = ann_learner(norm_data_table) 

#Do the 10 folds cross validation 
eval_results = CrossValidation(norm_data_table, [ann_learner], k=10)
now = datetime.datetime.now()
tdelta = now - then
print("Processing completed after: {} ".format(tdelta))

#Accuracy and area under (receiver operating characteristic, ROC) curve (AUC)
print("Accuracy: {:.3f}".format(scoring.CA(eval_results)[0]))
print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))

#Remove minority classes
value_counts = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() > 10]
value_counts1 = pd.Series(raw_data_table[:,0]).value_counts()[pd.Series(raw_data_table[:,0]).value_counts() < 10]
print(value_counts1)
first_elts = [x[0] for x in value_counts.keys().tolist()]
sel = [i for i, d in enumerate(raw_data_table) if d["quality"] in first_elts]
subset_data = raw_data_table[sel]
subset_data_table = Table.from_table(domain=wine_domain, source=subset_data)
norm_subset_data_table = normalize_table(subset_data_table)
print("Applying learner on filtered data recordset, total data records {}".format(len(norm_subset_data_table)))

then = datetime.datetime.now()
ann_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),max_iter=4000 )
Пример #10
0
iris_domain = Domain(feature_vars, class_label_var)

data_tab = Table.from_table(domain=iris_domain, source=data_tab)

print("DOMAIN: %s \nVARIABLES: %s \nATTRIBUTES: %s \nCLASS_VAR: %s" %
      (data_tab.domain, data_tab.domain.variables, data_tab.domain.attributes,
       data_tab.domain.class_var))
print(len(data_tab))

tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                       max_iter=1750)
#Accuracy of cross validation: 0.953
#AUC: 0.991
eval_results = CrossValidation(data_tab, [tree_learner], k=10)
print("Accuracy of cross validation: {:.3f}".format(
    scoring.CA(eval_results)[0]))
print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))

#####################TASK 3##########################
tree_learner2 = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                        max_iter=2000,
                                        verbose=True,
                                        solver="sgd",
                                        learning_rate_init=3,
                                        learning_rate="invscaling",
                                        power_t=0.3)
#Needs ~1667 iterations. Same accuracy and AUC as before.
#When learning rate init = 0.01 we need 445 iterations. Better Acc and AUC. Accuracy: 0.960, AUC: 0.997
#When learning rate init = 1 we need 81 iterations. Accuracy: 0.960, AUC: 0.993.
#When learning rate init = 3, we need 4 iterations. Accuracy: 0.587, AUC: 0.729.
#When learning rate dynamically adjusted, we need 7 iterations. Almost back to same accuracy. Definetly better than before. Accuracy: 0.913, AUC: 0.966
Пример #11
0
with open(exportFilePath, "w") as output:
    datae1.to_csv(output, header=True, sep=",")

path1 = ("C:\\Users\\acer\\Desktop\\friends\\export.csv")
datae = p.read_csv(path1)
'''
now fro b part'''
from Orange.classification import SklTreeLearner
td = Table.from_file("C:\\Users\\acer\\Desktop\\friends\\export.csv")
#print(data1.domain)
#print(d)
feature_vars = list(td.domain.variables[1:])
class_label_var = td.domain.variables[7]
print(class_label_var)
md = Domain(feature_vars, class_label_var)
#print(d_dis[0])
td = Table.from_table(domain=md, source=td)
#print(.domain.variables[1:])

n1 = td.approx_len()
print(n1 * 80 / 100)
train_data_set = td[:1360]
test_data_set = td[1360:]
#print(train_data_set.domain)
#print(test_data_set.domain)
tree_learner = SklTreeLearner()
decision_tree = tree_learner(train_data_set)
results = CrossValidation(td, [tree_learner], k=10)
print(decision_tree(test_data_set))
print("Accuracy", scoring.CA(results)[0])
print("AUC", scoring.AUC(results)[0])
Пример #12
0
 def test_ca_on_iris(self):
     ds = data.Table("iris")
     cv = testing.cross_validation([self.learner], ds, folds=5)
     ca = scoring.CA(cv, report_se=True)
     self.assertEqual(len(ca), 1)
Пример #13
0
# Description: Naive Bayes Learner with auto adjusted treshold
# Category:    classification
# Uses:        iris
# Referenced:  Orange.classification.bayes
# Classes:     Orange.classification.bayes.NaiveLearner, Orange.classification.bayes.NaiveClassifier

import Orange
from Orange.classification import bayes
from Orange.evaluation import testing, scoring

adult = Orange.data.Table("adult_sample.tab")

nb = bayes.NaiveLearner(name="Naive Bayes")
adjusted_nb = bayes.NaiveLearner(adjust_threshold=True,
                                 name="Adjusted Naive Bayes")

results = testing.cross_validation([nb, adjusted_nb], adult)
print "%.6f, %.6f" % tuple(scoring.CA(results))
Пример #14
0
from Orange import data
from Orange.classification import svm

vehicle = data.Table("vehicle.tab")

svm_easy = svm.SVMLearnerEasy(name="svm easy", folds=3)
svm_normal = svm.SVMLearner(name="svm")
learners = [svm_easy, svm_normal]

from Orange.evaluation import testing, scoring

results = testing.cross_validation(learners, vehicle, folds=5)
print "Name     CA        AUC"
for learner, CA, AUC in zip(learners, scoring.CA(results),
                            scoring.AUC(results)):
    print "%-8s %.2f      %.2f" % (learner.name, CA, AUC)
Пример #15
0
 def test_ca_from_confusion_matrix_list_on_iris(self):
     ds = data.Table("iris")
     cv = testing.cross_validation([self.learner], ds, folds=5)
     cm = scoring.confusion_matrices(cv)
     ca = scoring.CA(cm)
     self.assertEqual(len(ca), 1)
Пример #16
0
#      % (data_tab.domain, data_tab.domain.variables, data_tab.domain.attributes, 
#         data_tab.domain.class_var))

data_tab.shuffle()
indx = int(len(data_tab)*0.8)
train_data_tab = data_tab[:indx]
test_data_tab = data_tab[indx:]

#########################TASK 2################################
def prediction (decision_tree, samples, lables):
    return [lables[int(x)] for x in decision_tree(samples)]

tree_learner = SklTreeLearner()
decision_tree = tree_learner(train_data_tab)
class_labels = data_tab.domain.class_var.values
p = prediction(decision_tree, test_data_tab, class_labels)

matches = 0
for i in range(len(test_data_tab)):
    if test_data_tab[:, 0][i][0] == p[i]:
        matches += 1
        
accuracy = matches/len(test_data_tab)
print("ACCURACY OF DECISION TREE: ")
print(accuracy)

#########################TASK 3################################
eval_results = CrossValidation(data_tab, [tree_learner], k=10)
print("Accuracy of cross validation: {:.3f}".format(scoring.CA(eval_results)[0]))
print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
Пример #17
0
import Orange
import Orange.preprocess
import pandas as pd

data = Table.from_file('white wine.csv')
CLabel = Orange.preprocess.Discretize()
CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3)
newCLabel = CLabel(data[:, 0])
FeatureV = data[:, 1:].domain.variables
CLabelSet = newCLabel.domain.variables
WineDomain = Domain(FeatureV, CLabelSet)
data = Table.from_table(domain=WineDomain, source=data)

TLearner = NNClassificationLearner(hidden_layer_sizes=(10, 1), max_iter=4000)
evalR = CrossValidation(data, [TLearner], k=10)
print("Accuracy: {:.3f}".format(scoring.CA(evalR)[0]))
print("AUC: {:.3f}".format(scoring.AUC(evalR)[0]))

n = sum(1 for d in data
        if (d["quality"] == 1.0 or d["quality"] == 2.0 or d["quality"] == 9.0))
SubSet = Table(data.domain,
               [d for d in data if (d["quality"] < 4.0 or d["quality"] > 8.0)])
for d in SubSet:
    del data[d]
CLabel = Orange.preprocess.Discretize()
CLabel.method = Orange.preprocess.discretize.EqualWidth(n=3)
DataSet = CLabel(data[:, 0])
FeatureV = data[:, 1:].domain.variables
newCLabelSet = DataSet.domain.variables
wineDomain = Domain(FeatureV, newCLabelSet)
data = Table.from_table(domain=wineDomain, source=data)
Пример #18
0
 def test_ca_from_confusion_matrix_on_iris_se(self):
     ds = data.Table("iris")
     cv = testing.cross_validation([self.learner], ds, folds=5)
     cm = scoring.confusion_matrices(cv, class_index=1)
     ca = scoring.CA(cm[0], report_se=True)
     self.assertEqual(len(ca), 1)
Пример #19
0
    "InputLearner",
    [
        "learner",  # :: Orange.base.Learner
        "results",  # :: Option[Try[Orange.evaluation.Results]]
        "stats"
    ]  # :: Option[Sequence[Try[float]]]
)


def classification_stats(results):
    return tuple(score(results) for score in classification_stats.scores)


classification_stats.headers, classification_stats.scores = zip(*(
    ("AUC", scoring.AUC),
    ("CA", lambda res, *args, **kwargs: scoring.CA(res)),
    ("F1", (lambda res, target=None: scoring.F1(
        res, target=target, average='weighted'))),
    ("Precision", (lambda res, target=None: scoring.Precision(
        res, target=target, average='weighted'))),
    ("Recall", (lambda res, target=None: scoring.Recall(
        res, target=target, average='weighted'))),
))


def regression_stats(results):
    return tuple(score(results) for score in regression_stats.scores)


regression_stats.headers, regression_stats.scores = zip(*(
    ("MSE", scoring.MSE),