Пример #1
0
def main():
    print "loading"
    annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME)
    table = annotations.as_orange_table()
    cv_indices = orange.MakeRandomIndices2(table, p0=0.5)
    print "indices", set(cv_indices)
    print "splitting"
    training, testing = annotation_reader.split(annotations, cv_indices)
    print "features"

    engine = PairwiseEngine(training)
    
    training_table = engine.training_table
    testing_table = engine.makeTable(testing)
    print len(training_table), "training"
    print len(testing_table), "testing"
    
    learners = [orange.MajorityLearner(),
                orngEnsemble.RandomForestLearner(),
                ]
    results = orngTest.learnAndTestOnTestData(learners, 
                                              training_table, testing_table)

    for accuracy, cm in zip(orngStat.CA(results),
                            orngStat.confusionMatrices(results)):
        print orangeUtils.confusion_matrix_to_string(table.domain, cm)
        print "accuracy: %.2f%%" % (accuracy*100)
    def constructLearner(self):
        rand = random.Random(self.rseed)

        attrs = None
        if self.attributes:
            attrs = self.attributesP

        smallLearner = orngTree.TreeLearner()

        if self.preNodeInst:
            smallLearner.stop.minExamples = self.preNodeInstP
        else:
            smallLearner.stop.minExamples = 0

        smallLearner.storeExamples = 1
        smallLearner.storeNodeClassifier = 1
        smallLearner.storeContingencies = 1
        smallLearner.storeDistributions = 1

        if self.limitDepth:
            smallLearner.maxDepth = self.limitDepthP

        learner = orngEnsemble.RandomForestLearner(base_learner=smallLearner,
                                                   trees=self.trees,
                                                   rand=rand,
                                                   attributes=attrs)

        if self.preprocessor:
            learner = self.preprocessor.wrapLearner(learner)
        learner.name = self.name
        return learner
Пример #3
0
    def constructLearner(self):
        rand = random.Random(self.rseed)

        attrs = None
        if self.attributes:
            attrs = self.attributesP

        from Orange.classification.tree import SimpleTreeLearner

        smallLearner = SimpleTreeLearner()

        if self.preNodeInst:
            smallLearner.min_instances = self.preNodeInstP
        else:
            smallLearner.min_instances = 0

        if self.limitDepth:
            smallLearner.max_depth = self.limitDepthP

        learner = orngEnsemble.RandomForestLearner(base_learner=smallLearner,
                            trees=self.trees, rand=rand, attributes=attrs)

        if self.preprocessor:
            learner = self.preprocessor.wrapLearner(learner)
        learner.name = self.name
        return learner
Пример #4
0
    def train(self,trainset):
        """
        Trains a random forest using Orange.
        """
        
        self.n_classes = len(trainset.metadata['targets'])

        trainset_orange = make_orange_dataset(trainset)
        self.trainset_domain = trainset_orange.domain

        import random
        
        self.forest = orngEnsemble.RandomForestLearner(trees=self.n_trees, 
                                                       attributes = self.n_features_per_node,
                                                       rand = random.Random(self.seed),
                                                       name="forest")(trainset_orange)
Пример #5
0
def main():
    print "loading"
    annotations = annotation_reader.from_file(
        "%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME)
    annotator2 = annotation_reader.from_file(
        "%s/data/directions/breadbox/nouns_dlaude.partial.txt" % TKLIB_HOME)
    #histogram(annotations)
    print "table"
    table = annotations.as_orange_table()
    cv_indices = orange.MakeRandomIndices2(table, p0=0.5)
    print "indices", set(cv_indices)
    print "splitting"
    training, testing = annotation_reader.split(annotations, cv_indices)
    print "features"

    engine = WordnetParentsEngine(training)
    training_table = engine.makeTable(training)
    testing_table = engine.makeTable(testing)

    #training_table, testing_table = wordnet_parents(training, testing)
    #training_table, testing_table = wordnet_glosses(training, testing)
    #training_table, testing_table = flickr_parents(training, testing)

    print len(training_table), "training examples"
    print len(testing_table), "testing examples"

    #training_table = annotation_reader.to_big_small(training_table)
    #testing_table = annotation_reader.to_big_small(testing_table)

    #information_gain = orange.MeasureAttribute_info()
    #for x in training_table.domain.attributes:
    #    print "x", information_gain(x, training_table)

    learners = [
        orange.MajorityLearner(),
        orngEnsemble.RandomForestLearner(), WordnetKnnClassifier,
        agreement.WizardOfOzLearner(annotator2.as_orange_table())
    ]
    results = orngTest.learnAndTestOnTestData(learners, training_table,
                                              testing_table)
    for accuracy, cm in zip(orngStat.CA(results),
                            orngStat.confusionMatrices(results)):
        print orangeUtils.confusion_matrix_to_string(table.domain, cm)
        print "accuracy: %.2f%%" % (accuracy * 100)
Пример #6
0
    def __init__(self, training):
        self.training = training
        self.ancestor_to_count = training.ancestor_map()

        self.all_ancestors = list(self.ancestor_to_count.keys())
        self.all_ancestors.sort(key=lambda a: self.ancestor_to_count[a],
                                reverse=True)

        self.used_ancestors = self.all_ancestors
        print "name", self.used_ancestors[0].name
        self.attributes = [
            orange.EnumVariable(a.name, values=["True", "False"])
            for a in self.used_ancestors
        ]
        #self.attributes = [orange.FloatVariable(a.name)
        #                   for a in self.used_ancestors]
        print "got", len(self.used_ancestors), "features"
        self.domain = orange.Domain(self.attributes, training.orange_class_var)
        self.domain.addmeta(orange.newmetaid(), orange.StringVariable("word"))
        table = self.makeTable(self.training)
        self.classifier = orngEnsemble.RandomForestLearner()(table)
Пример #7
0
    def setLearner(self):

	self.progressBarInit()

        if hasattr(self, "btnApply"):
            self.btnApply.setFocus()

	#assemble learner
	rand = random.Random(self.rseed)

        attrs = None
        if self.attributes: attrs = self.attributesP

	self.learner = orngEnsemble.RandomForestLearner(trees = self.trees, rand=rand, attributes=attrs, callback=self.pbchange)

	if self.preNodeInst: self.learner.learner.stop.minExamples = self.preNodeInstP
        else: self.learner.learner.stop.minExamples = 0

	self.learner.learner.storeExamples = 1
	self.learner.learner.storeNodeClassifier = 1
	self.learner.learner.storeContigencies = 1
	self.learner.learner.storeDistributions = 1

	if self.limitDepth: self.learner.learner.maxDepth = self.limitDepthP

        self.learner.name = self.name
        self.send("Learner", self.learner)

        self.error()

        if self.data:
            try:
                self.classifier = self.learner(self.data)
                self.classifier.name = self.name
		self.streeEnabled(True)
            except Exception, (errValue):
                self.error(str(errValue))
                self.classifier = None
		self.streeEnabled(False)
Пример #8
0
    def learnModel(self, X, y):
        if numpy.unique(y).shape[0] != 2:
            raise ValueError("Can only operate on binary data")

        classes = numpy.unique(y)
        self.worstResponse = classes[classes != self.bestResponse][0]

        #We need to convert y into indices
        newY = self.labelsToInds(y)

        XY = numpy.c_[X, newY]
        attrList = []
        for i in range(X.shape[1]):
            attrList.append(orange.FloatVariable("X" + str(i)))

        attrList.append(orange.EnumVariable("y"))
        attrList[-1].addValue(str(self.bestResponse))
        attrList[-1].addValue(str(self.worstResponse))

        self.domain = orange.Domain(attrList)
        eTable = orange.ExampleTable(self.domain, XY)

        #Weight examples
        preprocessor = orange.Preprocessor_addClassWeight(equalize=1)
        preprocessor.classWeights = [1 - self.weight, self.weight]
        eTable, weightID = preprocessor(eTable)
        eTable.domain.addmeta(weightID, orange.FloatVariable("w"))

        tree = orngTree.TreeLearner(mForPruning=self.m,
                                    measure="gainRatio",
                                    minExamples=self.minSplit,
                                    maxDepth=self.maxDepth).instance()

        self.learner = orngEnsemble.RandomForestLearner(
            learner=tree,
            trees=self.numTrees,
            attributes=numpy.round(X.shape[1] * self.featureSize))
        self.classifier = self.learner(eTable, weightID)
Пример #9
0
    def fit(self, X, y):
        # Check params
        self.n_features_ = X.shape[1]

        if isinstance(self.max_features, str):
            if self.max_features == "auto":
                max_features = max(1, int(np.sqrt(self.n_features_)))
            elif self.max_features == "sqrt":
                max_features = max(1, int(np.sqrt(self.n_features_)))
            elif self.max_features == "log2":
                max_features = max(1, int(np.log2(self.n_features_)))
            else:
                raise ValueError(
                    'Invalid value for max_features. Allowed string '
                    'values are "auto", "sqrt" or "log2".')
        elif self.max_features is None:
            max_features = self.n_features_
        elif isinstance(self.max_features, (numbers.Integral, np.integer)):
            max_features = self.max_features
        else:  # float
            max_features = int(self.max_features * self.n_features_)

        # Convert data
        self.classes_ = np.unique(y)
        self.n_classes_ = len(self.classes_)
        y = np.searchsorted(self.classes_, y)
        X = X.astype(np.float32)

        self.table_ = make_orange_dataset(X, y, self.n_classes_)

        # Run
        self.model_ = orngEnsemble.RandomForestLearner(self.table_,
                                                       trees=self.n_estimators,
                                                       attributes=max_features)

        return self
Пример #10
0
    def constructLearner(self):
        rand = random.Random(self.rseed)

        attrs = None
        if self.attributes: attrs = self.attributesP

        learner = orngEnsemble.RandomForestLearner(trees=self.trees,
                                                   rand=rand,
                                                   attributes=attrs)

        if self.preNodeInst:
            learner.learner.stop.minExamples = self.preNodeInstP
        else:
            learner.learner.stop.minExamples = 0

        learner.learner.storeExamples = 1
        learner.learner.storeNodeClassifier = 1
        learner.learner.storeContingencies = 1
        learner.learner.storeDistributions = 1

        if self.limitDepth: learner.learner.maxDepth = self.limitDepthP

        learner.name = self.name
        return learner
Пример #11
0
 def setUp(self):
     import orngEnsemble, orngTree
     self.learner = orngEnsemble.RandomForestLearner()
Пример #12
0
    def train(self, keyword, learner="svm"):
        training_docs, test_docs, train_label, test_label = self.get_training_test_sets(
            keyword, 0.8)

        if (len(train_label) == 0):
            return None

        F_train = []
        print "--------------TRAIN:", keyword, "-------------------"
        for i, doc_i in enumerate(training_docs):
            #print self.documents[i]
            myfeatures = self.get_features(keyword, self.documents[doc_i])
            if (not '1' in myfeatures):
                continue

            myfeatures.append(str(int(train_label[i])))
            F_train.append(myfeatures)

        if (len(F_train) == 0):
            return None

        #create the attributes and domain
        table = orange.ExampleTable(self.get_domain())

        #define the rest of the table by addign elements to it
        for i in range(len(F_train)):
            #print self.known_objects
            #print "i=", i
            #print "ftrain[i]", zip(self.known_objects, F_train[i]),
            #print " label[i]", train_label[i]
            #F_train[i].append(str(int(train_label[i])))
            table.append(F_train[i])

        #perform the learning

        if (learner == "bayes"):
            print "running bayes"
            classifier = orngBayes.BayesLearner(table)
            #classifier = orngBayes.BayesLearner(table, m=2)
        elif (learner == "tree"):
            print "running tree"
            classifier = orngTree.TreeLearner(table)
        elif (learner == "svm"):
            #can't load the svmlearner
            print "running svm"
            classifier = orngSVM.SVMLearner(table,
                                            svm_type=orange.SVMLearner.Nu_SVC,
                                            nu=0.3,
                                            probability=True)
        elif (learner == "boosting"):
            #problem here too
            #this is meant to be adaboost
            classifier = orngTree.BoostedLearner(table)
        elif (learner == "randomforest"):
            #problem here too
            classifier = orngEnsemble.RandomForestLearner(table,
                                                          trees=50,
                                                          name="forest")

        else:
            print "unknown learner"
            raise

        return classifier
Пример #13
0
# Description: Demonstrates the use of random forests from orngEnsemble module
# Category:    classification, ensembles
# Classes:     RandomForestLearner
# Uses:        bupa.tab
# Referenced:  orngEnsemble.htm

import orange, orngTree, orngEnsemble

data = orange.ExampleTable('bupa.tab')
tree = orngTree.TreeLearner(minExamples=2, mForPrunning=2, \
                            sameMajorityPruning=True, name='tree')
forest = orngEnsemble.RandomForestLearner(trees=50, name="forest")
learners = [tree, forest]

import orngTest, orngStat
results = orngTest.crossValidation(learners, data, folds=3)
print "Learner  CA     Brier  AUC"
for i in range(len(learners)):
    print "%-8s %5.3f  %5.3f  %5.3f" % (learners[i].name, \
        orngStat.CA(results)[i],
        orngStat.BrierScore(results)[i],
        orngStat.AUC(results)[i])
Пример #14
0
# Description: Defines a tree learner (trunks of depth less than 5) and uses them in forest tree, prints out the number of nodes in each tree
# Category:    classification, ensembles
# Classes:     RandomForestLearner
# Uses:        bupa.tab
# Referenced:  orngEnsemble.htm

import orange, orngTree, orngEnsemble

data = orange.ExampleTable('bupa.tab')

tree = orngTree.TreeLearner(storeNodeClassifier = 0, storeContingencies=0, \
  storeDistributions=1, minExamples=5, ).instance()
gini = orange.MeasureAttribute_gini()
tree.split.discreteSplitConstructor.measure = \
  tree.split.continuousSplitConstructor.measure = gini
tree.maxDepth = 5
tree.split = orngEnsemble.SplitConstructor_AttributeSubset(tree.split, 3)

forestLearner = orngEnsemble.RandomForestLearner(learner=tree, trees=50)
forest = forestLearner(data)

for c in forest.classifiers:
    print orngTree.countNodes(c),
print
    def train(self):
        F_train_obs = []
        F_train_trans = []
        print "--------------creating tables-------------------"
        for i, obs in enumerate(self.dataset.observations):
            print i, "of", len(self.dataset.observations)

            f_obs, f_trans = self.dataset.to_orange_entries(obs)

            for i, fs in enumerate(f_obs):
                print i, f_obs[0]
                F_train_obs.append(fs)

            for fs in f_trans:
                F_train_trans.append(fs)

        if (len(F_train_obs) == 0):
            return None

        print ">> adding elements to tables"
        #create the attributes and domain
        #define the rest of the table by addign elements to it
        table_obs = orange.ExampleTable(self.get_domain_obs())

        for i in range(len(F_train_obs)):
            table_obs.append(F_train_obs[i])

        table_trans = orange.ExampleTable(self.get_domain_trans())
        for i in range(len(F_train_trans)):
            table_trans.append(F_train_trans[i])

        #perform the learning
        print "training"
        if (self.learner == "bayes"):
            print "training bayes obs"
            self.classifier_obs = orngBayes.BayesLearner(table_obs)
            print "training bayes trans"
            self.classifier_trans = orngBayes.BayesLearner(table_trans)
        elif (self.learner == "tree"):
            print "running tree"
            self.classifier_obs = orngTree.TreeLearner(table_obs)
            self.classifier_trans = orngTree.TreeLearner(table_trans)
        elif (self.learner == "svm"):
            #can't load the svmlearner
            print "trianing observation svm"
            self.classifier_obs = orngSVM.SVMLearner(
                table_obs,
                svm_type=orange.SVMLearner.Nu_SVC,
                nu=0.3,
                probability=True)

            print "trianing transition svm"
            self.classifier_trans = orngSVM.SVMLearner(
                table_trans,
                svm_type=orange.SVMLearner.Nu_SVC,
                nu=0.3,
                probability=True)
        elif (self.learner == "boosting"):
            #problem here too
            #this is meant to be adaboost
            self.classifier_obs = orngTree.BoostedLearner(table_obs)
            self.classifier_trans = orngTree.BoostedLearner(table_trans)
        elif (self.learner == "randomforest"):
            #problem here too
            self.classifier_obs = orngEnsemble.RandomForestLearner(
                table_obs, trees=50, name="forest")
            self.classifier_trans = orngEnsemble.RandomForestLearner(
                table_trans, trees=50, name="forest")
        else:
            print "unknown learner"
            raise

        return self.classifier_obs, self.classifier_trans