Exemplo n.º 1
0
import orange, orngTree, orngStat, orngWrap

learner = orngTree.TreeLearner()
data = orange.ExampleTable("voting")
tuner = orngWrap.TuneMParameters(object=learner,
                                 parameters=[
                                     ("minSubset", [2, 5, 10, 20]),
                                     ("measure", [
                                         orange.MeasureAttribute_gainRatio(),
                                         orange.MeasureAttribute_gini()
                                     ])
                                 ],
                                 evaluate=orngStat.AUC)
classifier = tuner(data)
Exemplo n.º 2
0
    def __init__(self,
                 learner=None,
                 trees=100,
                 attributes=None,
                 name='Random Forest',
                 rand=None,
                 callback=None):
        """random forest learner"""
        self.trees = trees
        self.name = name
        self.learner = learner
        self.attributes = attributes
        self.callback = callback
        if rand:
            self.rand = rand
        else:
            self.rand = random.Random()
            self.rand.seed(0)

        self.randstate = self.rand.getstate()  #original state

        if not learner:
            # tree learner assembled as suggested by Brieman (2001)
            smallTreeLearner = orngTree.TreeLearner(storeNodeClassifier=0,
                                                    storeContingencies=0,
                                                    storeDistributions=1,
                                                    minExamples=5).instance()
            smallTreeLearner.split.discreteSplitConstructor.measure = smallTreeLearner.split.continuousSplitConstructor.measure = orange.MeasureAttribute_gini(
            )
            smallTreeLearner.split = SplitConstructor_AttributeSubset(
                smallTreeLearner.split, attributes, self.rand)
            self.learner = smallTreeLearner
Exemplo n.º 3
0
def selectAttributes(data, attrContOrder, attrDiscOrder, projections=None):
    if data.domain.classVar == None or data.domain.classVar.varType != orange.VarTypes.Discrete:
        return ([attr.name for attr in data.domain.attributes], [], 0)

    shown = [data.domain.classVar.name]
    hidden = []
    maxIndex = 0  # initialize outputs

    # # both are RELIEF
    if attrContOrder == "ReliefF" and attrDiscOrder == "ReliefF":
        attrVals = orngFSS.attMeasure(data, orange.MeasureAttribute_relief())
        s, h = getTopAttrs(attrVals, 0.95)
        return (shown + s, hidden + h, 0)

    # # both are NONE
    elif attrContOrder == "None" and attrDiscOrder == "None":
        for item in data.domain.attributes:
            shown.append(item.name)
        return (shown, hidden, 0)

    # disc and cont attribute list
    discAttrs = []
    contAttrs = []
    for attr in data.domain.attributes:
        if attr.varType == orange.VarTypes.Continuous:
            contAttrs.append(attr.name)
        elif attr.varType == orange.VarTypes.Discrete:
            discAttrs.append(attr.name)

    ###############################
    # sort continuous attributes
    if attrContOrder == "None":
        shown += contAttrs
    elif attrContOrder in [
            "ReliefF", "Fisher discriminant", "Signal to Noise",
            "Signal to Noise For Each Class"
    ]:
        if attrContOrder == "ReliefF":
            measure = orange.MeasureAttribute_relief(k=10, m=50)
        elif attrContOrder == "Fisher discriminant":
            measure = MeasureFisherDiscriminant()
        elif attrContOrder == "Signal to Noise":
            measure = S2NMeasure()
        else:
            measure = S2NMeasureMix()

        dataNew = data.select(contAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s, h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h
    else:
        print "Unknown value for attribute order: ", attrContOrder

    # ###############################
    # sort discrete attributes
    if attrDiscOrder == "None":
        shown += discAttrs
    elif attrDiscOrder == "GainRatio" or attrDiscOrder == "Gini" or attrDiscOrder == "ReliefF":
        if attrDiscOrder == "GainRatio":
            measure = orange.MeasureAttribute_gainRatio()
        elif attrDiscOrder == "Gini":
            measure = orange.MeasureAttribute_gini()
        else:
            measure = orange.MeasureAttribute_relief()

        dataNew = data.select(discAttrs + [data.domain.classVar])
        attrVals = orngFSS.attMeasure(dataNew, measure)
        s, h = getTopAttrs(attrVals, 0.95)
        shown += s
        hidden += h

    elif attrDiscOrder == "Oblivious decision graphs":
        #shown.append(data.domain.classVar.name)
        attrs = getFunctionalList(data)
        for item in attrs:
            shown.append(item)
        for attr in data.domain.attributes:
            if attr.name not in shown and attr.varType == orange.VarTypes.Discrete:
                hidden.append(attr.name)
    else:
        print "Unknown value for attribute order: ", attrDiscOrder

    return (shown, hidden, maxIndex)
                  tuple([meas(i, data2) for i in range(attrs)]))

    meas.unknownsTreatment = meas.UnknownsAsValue
    print fstr % (("- unknowns as value:", ) +
                  tuple([meas(i, data2) for i in range(attrs)]))
    print


print "Information gain"
printVariants(orange.MeasureAttribute_info())

print "Gain ratio"
printVariants(orange.MeasureAttribute_gainRatio())

print "Gini index"
printVariants(orange.MeasureAttribute_gini())

print "Relief"
meas = orange.MeasureAttribute_relief()
print fstr % (
    ("- no unknowns:", ) + tuple([meas(i, data) for i in range(attrs)]))
print fstr % (
    ("- with unknowns:", ) + tuple([meas(i, data2) for i in range(attrs)]))
print

print "Cost matrix ((0, 5), (1, 0))"
meas = orange.MeasureAttribute_cost()
meas.cost = ((0, 5), (1, 0))
printVariants(meas)

print "Relevance"
Exemplo n.º 5
0
    def __init__(self, parent=None, signalManager=None, name="Interactive Discretization"):
        OWWidget.__init__(self, parent, signalManager, name)
        self.showBaseLine=1
        self.showLookaheadLine=1
        self.showTargetClassProb=1
        self.showRug=0
        self.snap=1
        self.measure=0
        self.targetClass=0
        self.discretization = self.classDiscretization = self.indiDiscretization = 1
        self.intervals = self.classIntervals = self.indiIntervals = 3
        self.outputOriginalClass = True
        self.indiData = []
        self.indiLabels = []
        self.resetIndividuals = 0
        self.customClassSplits = ""

        self.selectedAttr = 0
        self.customSplits = ["", "", ""]
        self.autoApply = True
        self.dataChanged = False
        self.autoSynchronize = True
        self.pointsChanged = False

        self.customLineEdits = []
        self.needsDiscrete = []

        self.data = self.originalData = None

        self.loadSettings()

        self.inputs=[("Data", ExampleTable, self.setData)]
        self.outputs=[("Data", ExampleTable)]
        self.measures=[("Information gain", orange.MeasureAttribute_info()),
                       #("Gain ratio", orange.MeasureAttribute_gainRatio),
                       ("Gini", orange.MeasureAttribute_gini()),
                       ("chi-square", orange.MeasureAttribute_chiSquare()),
                       ("chi-square prob.", orange.MeasureAttribute_chiSquare(computeProbabilities=1)),
                       ("Relevance", orange.MeasureAttribute_relevance()),
                       ("ReliefF", orange.MeasureAttribute_relief())]
        self.discretizationMethods=["Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove continuous attributes"]
        self.classDiscretizationMethods=["Equal-frequency discretization", "Equal-width discretization"]
        self.indiDiscretizationMethods=["Default", "Leave continuous", "Entropy-MDL discretization", "Equal-frequency discretization", "Equal-width discretization", "Remove attribute"]

        self.mainHBox =  OWGUI.widgetBox(self.mainArea, orientation=0)

        vbox = self.controlArea
        box = OWGUI.radioButtonsInBox(vbox, self, "discretization", self.discretizationMethods[:-1], "Default discretization", callback=[self.clearLineEditFocus, self.defaultMethodChanged])
        self.needsDiscrete.append(box.buttons[1])
        box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed))
        indent = OWGUI.checkButtonOffsetHint(self.needsDiscrete[-1])
        self.interBox = OWGUI.widgetBox(OWGUI.indentedBox(box, sep=indent))
        OWGUI.widgetLabel(self.interBox, "Number of intervals (for equal width/frequency)")
        OWGUI.separator(self.interBox, height=4)
        self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(self.interBox), self, "intervals", None, 2, 10, callback=[self.clearLineEditFocus, self.defaultMethodChanged])
        OWGUI.appendRadioButton(box, self, "discretization", self.discretizationMethods[-1])
        OWGUI.separator(vbox)

        ribg = OWGUI.radioButtonsInBox(vbox, self, "resetIndividuals", ["Use default discretization for all attributes", "Explore and set individual discretizations"], "Individual attribute treatment", callback = self.setAllIndividuals)
        ll = QWidget(ribg)
        ll.setFixedHeight(1)
        OWGUI.widgetLabel(ribg, "Set discretization of all attributes to")
        hcustbox = OWGUI.widgetBox(OWGUI.indentedBox(ribg), 0, 0)
        for c in range(1, 4):
            OWGUI.appendRadioButton(ribg, self, "resetIndividuals", "Custom %i" % c, insertInto = hcustbox)

        OWGUI.separator(vbox)

        box = self.classDiscBox = OWGUI.radioButtonsInBox(vbox, self, "classDiscretization", self.classDiscretizationMethods, "Class discretization", callback=[self.clearLineEditFocus, self.classMethodChanged])
        cinterBox = OWGUI.widgetBox(box)
        self.intervalSlider=OWGUI.hSlider(OWGUI.indentedBox(cinterBox, sep=indent), self, "classIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.classMethodChanged], label="Number of intervals")
        hbox = OWGUI.widgetBox(box, orientation = 0)
        OWGUI.appendRadioButton(box, self, "discretization", "Custom" + "  ", insertInto = hbox)
        self.classCustomLineEdit = OWGUI.lineEdit(hbox, self, "customClassSplits", callback = self.classCustomChanged, focusInCallback = self.classCustomSelected)
#        Can't validate - need to allow spaces
        box.setSizePolicy(QSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed))
        OWGUI.separator(box)
        self.classIntervalsLabel = OWGUI.widgetLabel(box, "Current splits: ")
        OWGUI.separator(box)
        OWGUI.checkBox(box, self, "outputOriginalClass", "Output original class", callback = self.commitIf)
        OWGUI.widgetLabel(box, "("+"Widget always uses discretized class internally."+")")

        OWGUI.separator(vbox)
        #OWGUI.rubber(vbox)

        box = OWGUI.widgetBox(vbox, "Commit")
        applyButton = OWGUI.button(box, self, "Commit", callback = self.commit, default=True)
        autoApplyCB = OWGUI.checkBox(box, self, "autoApply", "Commit automatically", callback=[self.clearLineEditFocus])
        OWGUI.setStopper(self, applyButton, autoApplyCB, "dataChanged", self.commit)
        OWGUI.rubber(vbox)

        #self.mainSeparator = OWGUI.separator(self.mainHBox, width=25)        # space between control and main area
        self.mainIABox =  OWGUI.widgetBox(self.mainHBox, "Individual attribute settings")
        self.mainBox = OWGUI.widgetBox(self.mainIABox, orientation=0)
        OWGUI.separator(self.mainIABox)#, height=30)
        graphBox = OWGUI.widgetBox(self.mainIABox, "", orientation=0)
        
        
#        self.needsDiscrete.append(graphBox)
        graphOptBox = OWGUI.widgetBox(graphBox)
        OWGUI.separator(graphBox, width=10)
        
        graphGraphBox = OWGUI.widgetBox(graphBox)
        self.graph = DiscGraph(self, graphGraphBox)
        graphGraphBox.layout().addWidget(self.graph)
        reportButton2 = OWGUI.button(graphGraphBox, self, "Report Graph", callback = self.reportGraph, debuggingEnabled=0)

        #graphOptBox.layout().setSpacing(4)
        box = OWGUI.widgetBox(graphOptBox, "Split gain measure", addSpace=True)
        self.measureCombo=OWGUI.comboBox(box, self, "measure", orientation=0, items=[e[0] for e in self.measures], callback=[self.clearLineEditFocus, self.graph.invalidateBaseScore, self.graph.plotBaseCurve])
        OWGUI.checkBox(box, self, "showBaseLine", "Show discretization gain", callback=[self.clearLineEditFocus, self.graph.plotBaseCurve])
        OWGUI.checkBox(box, self, "showLookaheadLine", "Show lookahead gain", callback=self.clearLineEditFocus)
        self.needsDiscrete.append(box)

        box = OWGUI.widgetBox(graphOptBox, "Target class", addSpace=True)
        self.targetCombo=OWGUI.comboBox(box, self, "targetClass", orientation=0, callback=[self.clearLineEditFocus, self.graph.targetClassChanged])
        stc = OWGUI.checkBox(box, self, "showTargetClassProb", "Show target class probability", callback=[self.clearLineEditFocus, self.graph.plotProbCurve])
        OWGUI.checkBox(box, self, "showRug", "Show rug (may be slow)", callback=[self.clearLineEditFocus, self.graph.plotRug])
        self.needsDiscrete.extend([self.targetCombo, stc])

        box = OWGUI.widgetBox(graphOptBox, "Editing", addSpace=True)
        OWGUI.checkBox(box, self, "snap", "Snap to grid", callback=[self.clearLineEditFocus])
        syncCB = OWGUI.checkBox(box, self, "autoSynchronize", "Apply on the fly", callback=self.clearLineEditFocus)
        syncButton = OWGUI.button(box, self, "Apply", callback = self.synchronizePressed)
        OWGUI.setStopper(self, syncButton, syncCB, "pointsChanged", self.synchronize)
        OWGUI.rubber(graphOptBox)

        self.attrList = OWGUI.listBox(self.mainBox, self, callback = self.individualSelected)
        self.attrList.setItemDelegate(CustomListItemDelegate(self.attrList))
        self.attrList.setFixedWidth(300)

        self.defaultMethodChanged()

        OWGUI.separator(self.mainBox, width=10)
        box = OWGUI.radioButtonsInBox(OWGUI.widgetBox(self.mainBox), self, "indiDiscretization", [], callback=[self.clearLineEditFocus, self.indiMethodChanged])
        #hbbox = OWGUI.widgetBox(box)
        #hbbox.layout().setSpacing(4)
        for meth in self.indiDiscretizationMethods[:-1]:
            OWGUI.appendRadioButton(box, self, "indiDiscretization", meth)
        self.needsDiscrete.append(box.buttons[2])
        self.indiInterBox = OWGUI.indentedBox(box, sep=indent, orientation = "horizontal")
        OWGUI.widgetLabel(self.indiInterBox, "Num. of intervals: ")
        self.indiIntervalSlider = OWGUI.hSlider(self.indiInterBox, self, "indiIntervals", None, 2, 10, callback=[self.clearLineEditFocus, self.indiMethodChanged], width = 100)
        OWGUI.rubber(self.indiInterBox) 
        OWGUI.appendRadioButton(box, self, "indiDiscretization", self.indiDiscretizationMethods[-1])
        #OWGUI.rubber(hbbox)
        #OWGUI.separator(box)
        #hbbox = OWGUI.widgetBox(box)
        for i in range(3):
            hbox = OWGUI.widgetBox(box, orientation = "horizontal")
            OWGUI.appendRadioButton(box, self, "indiDiscretization", "Custom %i" % (i+1) + " ", insertInto = hbox)
            le = OWGUI.lineEdit(hbox, self, "", callback = lambda w=i: self.customChanged(w), focusInCallback = lambda w=i: self.customSelected(w))
            le.setFixedWidth(110)
            self.customLineEdits.append(le)
            OWGUI.toolButton(hbox, self, "CC", width=30, callback = lambda w=i: self.copyToCustom(w))
            OWGUI.rubber(hbox)
        OWGUI.rubber(box)

        #self.controlArea.setFixedWidth(0)

        self.contAttrIcon =  self.createAttributeIconDict()[orange.VarTypes.Continuous]
        
        self.setAllIndividuals()