def tune_penalty(self, data): learner = LRRulesLearner(fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling) penalties = [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10., 100.] scores = [] for pen in penalties: learner.penalty = pen res = CrossValidation(data, [learner], k=5, random_state=1111) ll = LogLoss(res) scores.append(ll) return penalties[scores.index(min(scores))]
def test_log_loss_calc(self): data = Table('titanic') learner = LogisticRegressionLearner() results = TestOnTrainingData(data, [learner]) actual = np.copy(results.actual) actual = actual.reshape(actual.shape[0], 1) actual = np.hstack((1 - actual, actual)) probab = results.probabilities[0] ll_calc = self._log_loss(actual, probab) ll_orange = LogLoss(results) self.assertAlmostEqual(ll_calc, ll_orange[0])
def test_log_loss(self): data = Table('iris') majority = MajorityLearner() results = TestOnTrainingData(data, [majority]) ll = LogLoss(results) self.assertAlmostEqual(ll[0], - np.log(1 / 3))
fmodel = open("model.txt".format(cycle), "wt") fmodel.write(str(classifier)) # test model + other methods testdata = Orange.data.Table('testdata') bayes = Orange.classification.NaiveBayesLearner() logistic = Orange.classification.LogisticRegressionLearner() random_forest = Orange.classification.RandomForestLearner() svm = Orange.classification.SVMLearner() tree = Orange.classification.TreeLearner() cn2 = Orange.classification.rules.CN2UnorderedLearner() learners = [learner, logistic, tree, bayes, cn2, random_forest, svm] res = TestOnTestData(data, testdata, learners) ca = CA(res) auc = AUC(res) ll = LogLoss(res) names = [ 'logrules', 'logistic', 'tree', 'naive-bayes', 'cn2', 'random-forest', 'svm' ] scores = "" scores += "CA\tAUC\tLogLoss\tMethod\n" for ni, n in enumerate(names): scores += "{}\t{}\t{}\t{}\n".format(ca[ni], auc[ni], ll[ni], n) print(scores) fscores = open("scores.txt", "wt") fscores.write(scores) # find critical examples indices, criticality, rules = arg.find_critical(learner, data)
from Orange.classification import LogisticRegressionLearner, NaiveBayesLearner, \ RandomForestLearner import orangecontrib.evcrules.rules as rules from orangecontrib.evcrules.logistic import LRRulesLearner datasets = [ 'ionosphere', 'adult_sample', 'iris', 'breast-cancer', 'bupa', 'titanic' ] for d in datasets: data = Table(d) rule_learner = rules.RulesStar(evc=True, add_sub_rules=True, parent_alpha=0.5) rule_learner_m = rules.RulesStar(evc=False, m=22, add_sub_rules=True, parent_alpha=0.5) # compare lr with rules, lr without rules and sklearn's lr learners = [ LRRulesLearner(opt_penalty=True, rule_learner=rule_learner), LRRulesLearner(opt_penalty=True, rule_learner=rule_learner_m), LRRulesLearner(opt_penalty=True), LogisticRegressionLearner(C=1), NaiveBayesLearner(), RandomForestLearner() ] res = CrossValidation(data, learners, k=5) print("Dataset: ", d) for l, ca, auc, ll in zip(learners, CA(res), AUC(res), LogLoss(res)): print("learner: {}\nCA: {}\nAUC: {}\n LL: {}".format(l, ca, auc, ll))