def mousePressed(self, name, ev): pos = QPointF(ev.pos()) if ev.button() == Qt.LeftButton and name == "graph": for name in self.rectNames: if self.rectNames[name].contains(pos): self._setAttrVisible(name, not self.getAttrVisible(name)) self.showInteractionRects(self.data) #self.canvasR.update() return for (attr1, attr2, rect) in self.lines: if rect.contains(pos): self.send("Interacting Features", [attr1, attr2]) return elif ev.button() == Qt.LeftButton and name == "interactions": self.rest = None for rects in self.interactionRects: if 1 in [item.contains(pos) for item in rects]: (rect1, rect2, rect3, nbrect, text1, text2) = rects self.send("Interacting Features", [str(text1.toPlainText()), str(text2.toPlainText())]) elif ev.button() == Qt.RightButton and name == "interactions" and self.mergeAttributes: found = 0 for rects in self.interactionRects: (rect1, rect2, rect3, nbrect, text1, text2) = rects if 1 in [item.contains(pos) for item in rects]: attr1 = str(text1.toPlainText()); attr2 = str(text2.toPlainText()) found = 1 break if not found: return data = self.interactionMatrix.discData (cart, profit) = FeatureByCartesianProduct(data, [data.domain[attr1], data.domain[attr2]]) if cart in data.domain: return # if this attribute already in domain return for attr in data.domain: if cart.name == attr.name: print "Attribute pair already in the domain" return tempData = data.select(list(data.domain) + [cart]) dd = orange.DomainDistributions(tempData) vals = [] for i in range(len(cart.values)): if dd[cart][i] != 0.0: vals.append(cart.values[i]) newVar = orange.EnumVariable(cart.name, values = vals) newData = data.select(list(data.domain) + [newVar]) for i in range(len(newData)): newData[i][newVar] = tempData[i][cart] #rest = newData.select({cart.name:todoList}) #print "intervals = %d, non clear values = %d" % (len(cart.values), len(todoList)) #print "entropy left = %f" % (float(len(rest)) / float(self.dataSize)) self.updateNewData(newData)
def updateGraph(self, *args): for item in self.canvas.items(): self.canvas.removeItem(item) # remove all canvas items if not self.data: return if not self.attrX or not self.attrY: return data = self.getConditionalData() if not data or len(data) == 0: return valsX = [] valsY = [] contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data) # distribution of X attribute contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data) # distribution of Y attribute # compute contingency of x and y attributes for key in contX.keys(): sum = 0 try: for val in contX[key]: sum += val except: pass valsX.append(sum) for key in contY.keys(): sum = 0 try: for val in contY[key]: sum += val except: pass valsY.append(sum) # create cartesian product of selected attributes and compute contingency (cart, profit) = FeatureByCartesianProduct( data, [data.domain[self.attrX], data.domain[self.attrY]]) tempData = data.select(list(data.domain) + [cart]) contXY = orange.ContingencyAttrAttr( cart, cart, tempData) # distribution of X attribute # compute probabilities probs = {} for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] actualProb = 0 try: for val in contXY['%s-%s' % (contX.keys()[i], contY.keys()[j])]: actualProb += val except: actualProb = 0 probs['%s-%s' % (contX.keys()[i], contY.keys()[j])] = ((contX.keys()[i], valx), (contY.keys()[j], valy), actualProb, len(data)) # get text width of Y attribute name text = OWCanvasText(self.canvas, data.domain[self.attrY].name, x=0, y=0, bold=1, show=0) xOff = int(text.boundingRect().width() + 40) yOff = 50 sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 30) if sqareSize < 0: return # canvas is too small to draw rectangles self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) # print graph name if self.attrCondition == "(None)": name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" % ( self.attrX, self.attrY, self.attrX, self.attrY) else: name = "<b>P(%s, %s | %s = %s) ≠ P(%s | %s = %s)×P(%s | %s = %s)</b>" % ( self.attrX, self.attrY, self.attrCondition, getHtmlCompatibleString( self.attrConditionValue), self.attrX, self.attrCondition, getHtmlCompatibleString( self.attrConditionValue), self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue)) OWCanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, htmlText=name) OWCanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold=0) ###################### # compute chi-square chisquare = 0.0 for i in range(len(valsX)): for j in range(len(valsY)): ((xAttr, xVal), (yAttr, yVal), actual, sum) = probs['%s-%s' % (contX.keys()[i], contY.keys()[j])] expected = float(xVal * yVal) / float(sum) if expected == 0: continue pearson2 = (actual - expected) * (actual - expected) / expected chisquare += pearson2 ###################### # draw rectangles currX = xOff for i in range(len(valsX)): if valsX[i] == 0: continue currY = yOff width = int(float(sqareSize * valsX[i]) / float(len(data))) #for j in range(len(valsY)): for j in range(len(valsY) - 1, -1, -1): # this way we sort y values correctly ((xAttr, xVal), (yAttr, yVal), actual, sum) = probs['%s-%s' % (contX.keys()[i], contY.keys()[j])] if valsY[j] == 0: continue height = int(float(sqareSize * valsY[j]) / float(len(data))) # create rectangle rect = OWCanvasRectangle(self.canvas, currX + 2, currY + 2, width - 4, height - 4, z=-10) self.addRectIndependencePearson(rect, currX + 2, currY + 2, width - 4, height - 4, (xAttr, xVal), (yAttr, yVal), actual, sum) expected = float(xVal * yVal) / float(sum) pearson = (actual - expected) / sqrt(expected) tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(x)): <b>%d (%.2f%%)</b><hr> <b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of examples (p(y)): <b>%d (%.2f%%)</b><hr> <b>Number Of Examples (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> <hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" % ( self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0 * float(xVal) / float(sum), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0 * float(yVal) / float(sum), expected, 100.0 * float(xVal * yVal) / float(sum * sum), actual, 100.0 * float(actual) / float(sum), chisquare, pearson) rect.setToolTip(tooltipText) currY += height if currX == xOff: OWCanvasText(self.canvas, "", xOff - 10, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, htmlText=getHtmlCompatibleString( data.domain[self.attrY].values[j])) OWCanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize + 5, Qt.AlignCenter, htmlText=getHtmlCompatibleString( data.domain[self.attrX].values[i])) currX += width # show attribute names OWCanvasText(self.canvas, self.attrY, xOff - 20, yOff + sqareSize / 2, Qt.AlignRight, bold=1) OWCanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + 15, Qt.AlignCenter, bold=1)
def computeProbabilities(self): self.probabilities = {} if not self.data: return self.setStatusBarText("Please wait. Computing...") total = len(self.data) conts = {} dc = [] for i in range(len(self.data.domain)): dc.append( orange.ContingencyAttrAttr(self.data.domain[i], self.data.domain[i], self.data)) for i in range(len(self.data.domain)): if self.data.domain[i].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes cont = dc[i] # distribution of X attribute vals = [] # compute contingency of x attribute for key in cont.keys(): sum = 0 try: for val in cont[key]: sum += val except: pass vals.append(sum) conts[self.data.domain[i].name] = (cont, vals) for attrX in range(len(self.data.domain)): if self.data.domain[attrX].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes for attrY in range(attrX, len(self.data.domain)): if self.data.domain[ attrY].varType == orange.VarTypes.Continuous: continue # we can only check discrete attributes (contX, valsX) = conts[self.data.domain[attrX].name] (contY, valsY) = conts[self.data.domain[attrY].name] # create cartesian product of selected attributes and compute contingency (cart, profit) = FeatureByCartesianProduct( self.data, [self.data.domain[attrX], self.data.domain[attrY]]) tempData = self.data.select(list(self.data.domain) + [cart]) contXY = orange.ContingencyAttrClass( cart, tempData) # distribution of X attribute # compute probabilities for i in range(len(valsX)): valx = valsX[i] for j in range(len(valsY)): valy = valsY[j] actualCount = 0 try: for val in contXY['%s-%s' % (contX.keys()[i], contY.keys()[j])]: actualCount += val except: pass self.probabilities['%s+%s:%s+%s' % ( self.data.domain[attrX].name, contX.keys()[i], self.data.domain[attrY].name, contY.keys()[j])] = ( (contX.keys()[i], valx), (contY.keys()[j], valy), actualCount, total) self.probabilities['%s+%s:%s+%s' % ( self.data.domain[attrY].name, contY.keys()[j], self.data.domain[attrX].name, contX.keys()[i])] = ( (contY.keys()[j], valy), (contX.keys()[i], valx), actualCount, total) self.setStatusBarText("")