示例#1
0
    def mousePressed(self, name, ev):
        pos = QPointF(ev.pos())
        if ev.button() == Qt.LeftButton and name == "graph":
            for name in self.rectNames:
                if self.rectNames[name].contains(pos):
                    self._setAttrVisible(name, not self.getAttrVisible(name))
                    self.showInteractionRects(self.data)
                    #self.canvasR.update()
                    return
            for (attr1, attr2, rect) in self.lines:
                if rect.contains(pos):
                    self.send("Interacting Features", [attr1, attr2])
                    return
        elif ev.button() == Qt.LeftButton and name == "interactions":
            self.rest = None
            for rects in self.interactionRects:
                if 1 in [item.contains(pos) for item in rects]:
                    (rect1, rect2, rect3, nbrect, text1, text2) = rects
                    self.send("Interacting Features", [str(text1.toPlainText()), str(text2.toPlainText())])

        elif ev.button() == Qt.RightButton and name == "interactions" and self.mergeAttributes:
            found = 0
            for rects in self.interactionRects:
                (rect1, rect2, rect3, nbrect, text1, text2) = rects
                if 1 in [item.contains(pos) for item in rects]:
                    attr1 = str(text1.toPlainText()); attr2 = str(text2.toPlainText())
                    found = 1
                    break
            if not found: return

            data = self.interactionMatrix.discData
            (cart, profit) = FeatureByCartesianProduct(data, [data.domain[attr1], data.domain[attr2]])
            if cart in data.domain: return  # if this attribute already in domain return

            for attr in data.domain:
                if cart.name == attr.name:
                    print "Attribute pair already in the domain"
                    return

            tempData = data.select(list(data.domain) + [cart])
            dd = orange.DomainDistributions(tempData)
            vals = []
            for i in range(len(cart.values)):
                if dd[cart][i] != 0.0:
                    vals.append(cart.values[i])

            newVar = orange.EnumVariable(cart.name, values = vals)
            newData = data.select(list(data.domain) + [newVar])
            for i in range(len(newData)):
                newData[i][newVar] = tempData[i][cart]

            #rest = newData.select({cart.name:todoList})

            #print "intervals = %d, non clear values = %d" % (len(cart.values), len(todoList))
            #print "entropy left = %f" % (float(len(rest)) / float(self.dataSize))
            self.updateNewData(newData)
    def updateGraph(self, *args):
        for item in self.canvas.items():
            self.canvas.removeItem(item)  # remove all canvas items
        if not self.data: return
        if not self.attrX or not self.attrY: return

        data = self.getConditionalData()
        if not data or len(data) == 0: return

        valsX = []
        valsY = []
        contX = orange.ContingencyAttrAttr(self.attrX, self.attrX,
                                           data)  # distribution of X attribute
        contY = orange.ContingencyAttrAttr(self.attrY, self.attrY,
                                           data)  # distribution of Y attribute

        # compute contingency of x and y attributes
        for key in contX.keys():
            sum = 0
            try:
                for val in contX[key]:
                    sum += val
            except:
                pass
            valsX.append(sum)

        for key in contY.keys():
            sum = 0
            try:
                for val in contY[key]:
                    sum += val
            except:
                pass
            valsY.append(sum)

        # create cartesian product of selected attributes and compute contingency
        (cart, profit) = FeatureByCartesianProduct(
            data, [data.domain[self.attrX], data.domain[self.attrY]])
        tempData = data.select(list(data.domain) + [cart])
        contXY = orange.ContingencyAttrAttr(
            cart, cart, tempData)  # distribution of X attribute

        # compute probabilities
        probs = {}
        for i in range(len(valsX)):
            valx = valsX[i]
            for j in range(len(valsY)):
                valy = valsY[j]

                actualProb = 0
                try:
                    for val in contXY['%s-%s' %
                                      (contX.keys()[i], contY.keys()[j])]:
                        actualProb += val
                except:
                    actualProb = 0
                probs['%s-%s' %
                      (contX.keys()[i], contY.keys()[j])] = ((contX.keys()[i],
                                                              valx),
                                                             (contY.keys()[j],
                                                              valy),
                                                             actualProb,
                                                             len(data))

        # get text width of Y attribute name
        text = OWCanvasText(self.canvas,
                            data.domain[self.attrY].name,
                            x=0,
                            y=0,
                            bold=1,
                            show=0)
        xOff = int(text.boundingRect().width() + 40)
        yOff = 50
        sqareSize = min(self.canvasView.width() - xOff - 35,
                        self.canvasView.height() - yOff - 30)
        if sqareSize < 0: return  # canvas is too small to draw rectangles
        self.canvasView.setSceneRect(0, 0, self.canvasView.width(),
                                     self.canvasView.height())

        # print graph name
        if self.attrCondition == "(None)":
            name = "<b>P(%s, %s) &#8800; P(%s)&times;P(%s)</b>" % (
                self.attrX, self.attrY, self.attrX, self.attrY)
        else:
            name = "<b>P(%s, %s | %s = %s) &#8800; P(%s | %s = %s)&times;P(%s | %s = %s)</b>" % (
                self.attrX, self.attrY, self.attrCondition,
                getHtmlCompatibleString(
                    self.attrConditionValue), self.attrX, self.attrCondition,
                getHtmlCompatibleString(
                    self.attrConditionValue), self.attrY, self.attrCondition,
                getHtmlCompatibleString(self.attrConditionValue))
        OWCanvasText(self.canvas,
                     "",
                     xOff + sqareSize / 2,
                     20,
                     Qt.AlignCenter,
                     htmlText=name)
        OWCanvasText(self.canvas,
                     "N = " + str(len(data)),
                     xOff + sqareSize / 2,
                     38,
                     Qt.AlignCenter,
                     bold=0)

        ######################
        # compute chi-square
        chisquare = 0.0
        for i in range(len(valsX)):
            for j in range(len(valsY)):
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum) = probs['%s-%s' % (contX.keys()[i], contY.keys()[j])]
                expected = float(xVal * yVal) / float(sum)
                if expected == 0: continue
                pearson2 = (actual - expected) * (actual - expected) / expected
                chisquare += pearson2

        ######################
        # draw rectangles
        currX = xOff
        for i in range(len(valsX)):
            if valsX[i] == 0: continue
            currY = yOff
            width = int(float(sqareSize * valsX[i]) / float(len(data)))

            #for j in range(len(valsY)):
            for j in range(len(valsY) - 1, -1,
                           -1):  # this way we sort y values correctly
                ((xAttr, xVal), (yAttr, yVal), actual,
                 sum) = probs['%s-%s' % (contX.keys()[i], contY.keys()[j])]
                if valsY[j] == 0: continue
                height = int(float(sqareSize * valsY[j]) / float(len(data)))

                # create rectangle
                rect = OWCanvasRectangle(self.canvas,
                                         currX + 2,
                                         currY + 2,
                                         width - 4,
                                         height - 4,
                                         z=-10)
                self.addRectIndependencePearson(rect, currX + 2, currY + 2,
                                                width - 4, height - 4,
                                                (xAttr, xVal), (yAttr, yVal),
                                                actual, sum)

                expected = float(xVal * yVal) / float(sum)
                pearson = (actual - expected) / sqrt(expected)
                tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(x)): <b>%d (%.2f%%)</b><hr>
                                <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(y)): <b>%d (%.2f%%)</b><hr>
                                <b>Number Of Examples (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b>
                                <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % (
                    self.attrX, getHtmlCompatibleString(xAttr), xVal,
                    100.0 * float(xVal) / float(sum), self.attrY,
                    getHtmlCompatibleString(yAttr), yVal,
                    100.0 * float(yVal) / float(sum), expected,
                    100.0 * float(xVal * yVal) / float(sum * sum), actual,
                    100.0 * float(actual) / float(sum), chisquare, pearson)
                rect.setToolTip(tooltipText)

                currY += height
                if currX == xOff:
                    OWCanvasText(self.canvas,
                                 "",
                                 xOff - 10,
                                 currY - height / 2,
                                 Qt.AlignRight | Qt.AlignVCenter,
                                 htmlText=getHtmlCompatibleString(
                                     data.domain[self.attrY].values[j]))

            OWCanvasText(self.canvas,
                         "",
                         currX + width / 2,
                         yOff + sqareSize + 5,
                         Qt.AlignCenter,
                         htmlText=getHtmlCompatibleString(
                             data.domain[self.attrX].values[i]))
            currX += width

        # show attribute names
        OWCanvasText(self.canvas,
                     self.attrY,
                     xOff - 20,
                     yOff + sqareSize / 2,
                     Qt.AlignRight,
                     bold=1)
        OWCanvasText(self.canvas,
                     self.attrX,
                     xOff + sqareSize / 2,
                     yOff + sqareSize + 15,
                     Qt.AlignCenter,
                     bold=1)
示例#3
0
    def computeProbabilities(self):
        self.probabilities = {}
        if not self.data: return

        self.setStatusBarText("Please wait. Computing...")
        total = len(self.data)
        conts = {}
        dc = []
        for i in range(len(self.data.domain)):
            dc.append(
                orange.ContingencyAttrAttr(self.data.domain[i],
                                           self.data.domain[i], self.data))

        for i in range(len(self.data.domain)):
            if self.data.domain[i].varType == orange.VarTypes.Continuous:
                continue  # we can only check discrete attributes

            cont = dc[i]  # distribution of X attribute
            vals = []
            # compute contingency of x attribute
            for key in cont.keys():
                sum = 0
                try:
                    for val in cont[key]:
                        sum += val
                except:
                    pass
                vals.append(sum)
            conts[self.data.domain[i].name] = (cont, vals)

        for attrX in range(len(self.data.domain)):
            if self.data.domain[attrX].varType == orange.VarTypes.Continuous:
                continue  # we can only check discrete attributes

            for attrY in range(attrX, len(self.data.domain)):
                if self.data.domain[
                        attrY].varType == orange.VarTypes.Continuous:
                    continue  # we can only check discrete attributes

                (contX, valsX) = conts[self.data.domain[attrX].name]
                (contY, valsY) = conts[self.data.domain[attrY].name]

                # create cartesian product of selected attributes and compute contingency
                (cart, profit) = FeatureByCartesianProduct(
                    self.data,
                    [self.data.domain[attrX], self.data.domain[attrY]])
                tempData = self.data.select(list(self.data.domain) + [cart])
                contXY = orange.ContingencyAttrClass(
                    cart, tempData)  # distribution of X attribute

                # compute probabilities
                for i in range(len(valsX)):
                    valx = valsX[i]
                    for j in range(len(valsY)):
                        valy = valsY[j]

                        actualCount = 0
                        try:
                            for val in contXY['%s-%s' % (contX.keys()[i],
                                                         contY.keys()[j])]:
                                actualCount += val
                        except:
                            pass
                        self.probabilities['%s+%s:%s+%s' % (
                            self.data.domain[attrX].name, contX.keys()[i],
                            self.data.domain[attrY].name, contY.keys()[j])] = (
                                (contX.keys()[i], valx),
                                (contY.keys()[j], valy), actualCount, total)
                        self.probabilities['%s+%s:%s+%s' % (
                            self.data.domain[attrY].name, contY.keys()[j],
                            self.data.domain[attrX].name, contX.keys()[i])] = (
                                (contY.keys()[j], valy),
                                (contX.keys()[i], valx), actualCount, total)
        self.setStatusBarText("")