Exemplo n.º 1
0
 def __call__(self, attr, data, aprioriDist=None, weightID=None):
     import numpy
     from orngContingency import Entropy
     if attr in data.domain:  # if we receive attr as string we have to convert to variable
         attr = data.domain[attr]
     attrClassCont = orange.ContingencyAttrClass(attr, data)
     dist = []
     for vals in attrClassCont.values():
         dist += list(vals)
     classAttrEntropy = Entropy(numpy.array(dist))
     infoGain = orange.MeasureAttribute_info(attr, data)
     if classAttrEntropy > 0:
         return float(infoGain) / classAttrEntropy
     else:
         return 0
Exemplo n.º 2
0
    def __call__(self, gen, weightID=0):
        selectBest = orngMisc.BestOnTheFly()
        for attr in gen.domain.attributes:
            selectBest.candidate(self.measure(attr, gen, None, weightID))
        bestAttr = gen.domain.attributes[selectBest.winnerIndex()]
        classifier = orange.ClassifierByLookupTable(gen.domain.classVar,
                                                    bestAttr)

        contingency = orange.ContingencyAttrClass(bestAttr, gen, weightID)
        for i in range(len(contingency)):
            classifier.lookupTable[i] = contingency[i].modus()
            classifier.distributions[i] = contingency[i]
        classifier.lookupTable[-1] = contingency.innerDistribution.modus()
        classifier.distributions[-1] = contingency.innerDistribution
        for d in classifier.distributions:
            d.normalize()

        return classifier
Exemplo n.º 3
0
    def test_attrClass_disc(self):
        d = orange.ExampleTable("zoo")
        cd = orange.get_class_distribution(d)
        ad = orange.Distribution(0, d)
        cont = orange.ContingencyAttrClass(0, d)
        self.assertEqual(cont.inner_distribution, cd)
        self.assertEqual(cont.outer_distribution, ad)
        self.assertNotEqual(cont[0], cont[1])
        self.assertEqual(id(cont[0]), id(cont[d.domain[0].values[0]]))
        self.assertEqual(len(cont), len(ad))
        for cc in cont:
            self.assertEqual(cc, cont[0])
            break
        self.assertEqual(cont.keys(), d.domain[0].values)
        self.assertEqual(cont.values()[0], cont[0])
        self.assertEqual(cont.values()[1], cont[1])
        k, v = cont.items()[0]
        self.assertEqual(k, d.domain[0].values[0])
        self.assertEqual(v, cont[0])
     
        s = pickle.dumps(cont)
        cont2 = pickle.loads(s)

        self.assertEqual(cont.innerDistribution, cont2.innerDistribution)
        self.assertEqual(cont.innerVariable, cont2.innerVariable)
        self.assertEqual(cont.outerDistribution, cont2.outerDistribution)
        self.assertEqual(cont.outerVariable, cont2.outerVariable)
        self.assertEqual(cont[0], cont2[0])
        self.assertEqual(cont[1], cont2[1])
        
        cont.normalize()
        self.assertAlmostEqual(sum(cont.p_class(0)), 1.0)
        self.assertEqual(cont.p_class(0)[0], cont.p_class(0, 0))
        self.assertEqual(cont.p_class(0)[0], cont2.p_class(0)[0]/cont2.p_class(0).abs)

        x = cont[0][0]
        cont.add_var_class(0, 0, 0.5)
        self.assertEqual(x+0.5, cont[0][0])
        
        self.assertEqual(cont[0][0], cont[0,0])
        self.assertEqual(cont[d.domain[0].values[0], d.domain.classVar.values[0]], cont[0,0])
        
        with self.assertRaises(IndexError):
            cont["?"]
Exemplo n.º 4
0
    def setData(self, attr, data):
        self.clearAll()
        self.attr, self.data = attr, data
        self.curCutPoints = []

        if not data or not attr:
            self.snapDecimals = 1
            self.probDist = None
            return

        if data.domain.classVar:
            self.contingency = orange.ContingencyAttrClass(attr, data)
            try:
                self.condProb = orange.ConditionalProbabilityEstimatorConstructor_loess(
                   self.contingency,
                   nPoints=50)
            except:
                self.condProb = None
            self.probDist = None
            attrValues = self.contingency.keys()
        else:
            self.condProb = self.contingency = None
            self.probDist = orange.Distribution(attr, data)
            attrValues = self.probDist.keys()

        if attrValues:
            self.minVal, self.maxVal = min(attrValues), max(attrValues)
        else:
            self.minVal, self.maxVal = 0, 1
        mdist = self.maxVal - self.minVal
        if mdist > 1e-30:
            self.snapDecimals = -int(math.ceil(math.log(mdist, 10)) -2)
        else:
            self.snapDecimals = 1

        self.baseCurveX = None

        self.plotRug(True)
        self.plotProbCurve(True)
        self.plotCutLines(True)

        self.updateLayout()
        self.replot()
Exemplo n.º 5
0
 def __call__(self, attr, data, aprioriDist=None, weightID=None):
     attrClassCont = orange.ContingencyAttrClass(attr, data)
     classDist = orange.Distribution(data.domain.classVar, data).values()
     nCls = len(classDist)
     nEx = len(data)
     priorMDL = logMultipleCombs(nEx, classDist) + logMultipleCombs(
         nEx + nCls - 1, [nEx, nCls - 1])
     postPart1 = [
         logMultipleCombs(sum(attrClassCont[key]),
                          attrClassCont[key].values())
         for key in attrClassCont.keys()
     ]
     postPart2 = [
         logMultipleCombs(
             sum(attrClassCont[key]) + nCls - 1,
             [sum(attrClassCont[key]), nCls - 1])
         for key in attrClassCont.keys()
     ]
     ret = priorMDL
     for val in postPart1 + postPart2:
         ret -= val
     return ret / max(1, nEx)
Exemplo n.º 6
0
    def test_attrClass_cont(self):
        d = orange.ExampleTable("iris")
        cd = orange.get_class_distribution(d)
        ad = orange.Distribution(0, d)
        cont = orange.ContingencyAttrClass(0, d)
        fv = cont.keys()[0]
        self.assertEqual(cont.inner_distribution, cd)
        self.assertEqual(cont.outer_distribution, ad)
        self.assertEqual(len(cont), len(ad))
        s = set()
        for v in d:
            s.add(v[0])
        self.assertEqual(s, set(cont.keys()))
        self.assertEqual(len(d), sum(sum(v) for v in cont.values()))
        
        s = pickle.dumps(cont)
        cont2 = pickle.loads(s)
        self.assertEqual(cont.innerDistribution, cont2.innerDistribution)
        self.assertEqual(cont.innerVariable, cont2.innerVariable)
        self.assertEqual(cont.outerDistribution, cont2.outerDistribution)
        self.assertEqual(cont.outerVariable, cont2.outerVariable)
        self.assertEqual(cont[fv], cont2[fv])
        
        cont.normalize()
        self.assertAlmostEqual(sum(cont.p_class(fv)), 1.0)
        self.assertEqual(cont.p_class(fv)[0], cont.p_class(fv, 0))
        self.assertEqual(cont.p_class(fv)[0], cont2.p_class(fv)[0]/cont2.p_class(fv).abs)

        x = cont[0][0]
        cont.add_var_class(0, 0, 0.5)
        self.assertEqual(x+0.5, cont[0][0])
        
        self.assertEqual(cont[fv][0], cont[fv,0])
        
        with self.assertRaises(IndexError):
            cont["?"]
Exemplo n.º 7
0
# Description: Shows the limitations of contingencies with continuous outer attributes
# Category:    statistics
# Classes:     Contingency
# Uses:        iris
# Referenced:  contingency.htm

import orange
data = orange.ExampleTable("iris")
cont = orange.ContingencyAttrClass(0, data)

print "Contingency items:"
for val, dist in cont.items()[:5]:
    print val, dist
print

print "Contingency keys: ", cont.keys()[:3]
print "Contingency values: ", cont.values()[:3]
print "Contingency items: ", cont.items()[:3]
print

try:
    midkey = (cont.keys()[0] + cont.keys()[1])/2.0
    print "cont[%5.3f] =" % (midkey, cont[midkey])
except Exception, v:
    print "Error: ", v
Exemplo n.º 8
0
# Description: Demonstrates the use of ContingencyAttrClass
# Category:    statistics
# Classes:     Contingency, ContingencyAttrClass
# Uses:        monk1
# Referenced:  contingency.htm

import orange
data = orange.ExampleTable("monk1")
cont = orange.ContingencyAttrClass("e", data)

print "Inner variable: ", cont.innerVariable.name
print "Outer variable: ", cont.outerVariable.name
print
print "Class variable: ", cont.classVar.name
print "Attribute:      ", cont.variable.name
print

print "Distributions:"
for val in cont.variable:
    print "  p(.|%s) = %s" % (val.native(), cont.p_class(val))
print

firstclass = orange.Value(cont.classVar, 1)
firstnative = firstclass.native()
print "Probabilities of class '%s'" % firstnative
for val in cont.variable:
    print "  p(%s|%s) = %5.3f" % (firstnative, val.native(),
                                  cont.p_class(val, firstclass))
print

cont = orange.ContingencyAttrClass(data.domain["e"], data.domain.classVar)
# Category:    feature induction
# Classes:     FeatureByMinComplexity, FeatureByIM, FeatureByKramer, FeatureByCartesianProduct
# Uses:        monks-2.tab

import orange
import orngCI

data = orange.ExampleTable("../datasets/monks-2")

ab, quality = orngCI.FeatureByMinComplexity(data, ["a", "b"])
print "Quality: %.3f" % quality
print "Values", ab.values

data2 = orngCI.addAnAttribute(ab, data)

c = orange.ContingencyAttrClass(ab, data)
for i in c:
    print i

ab, quality = orngCI.FeatureByIM(data, ["a", "b"])
print "Quality: %.3f" % quality
print "Values", ab.values

data2 = orngCI.addAnAttribute(ab, data)

c = orange.ContingencyAttrClass(ab, data)
for i in c:
    print i

ab, quality = orngCI.FeatureByKramer(data, ["a", "b"])
print "Quality: %.3f" % quality
Exemplo n.º 10
0
# Description: Shows how to assess the quality of attributes
# Category:    attribute quality
# Classes:     MeasureAttribute, MeasureAttribute_info,
# Uses:        lenses
# Referenced:  MeasureAttribute.htm

import orange, random
data = orange.ExampleTable("lenses")

meas = orange.MeasureAttribute_info()

astigm = data.domain["astigmatic"]
print "Information gain of 'astigmatic': %6.4f" % meas(astigm, data)

classdistr = orange.Distribution(data.domain.classVar, data)
cont = orange.ContingencyAttrClass("tear_rate", data)
print "Information gain of 'tear_rate': %6.4f" % meas(cont, classdistr)

dcont = orange.DomainContingency(data)
print "Information gain of the first attribute: %6.4f" % meas(0, dcont)
print

print "*** A set of more exhaustive tests for different way of passing arguments to MeasureAttribute ***"

names = [a.name for a in data.domain.attributes]
attrs = len(names)

print("%30s" + "%15s" * attrs) % (("", ) + tuple(names))

fstr = "%30s" + "%15.4f" * attrs