def computeCorrelationInsideClassesBetweenAttributes(data, attrList, minCorrelation=0.0, progressCallback=None): if not data.domain.classVar or data.domain.classVar.varType == orange.VarTypes.Continuous: return [] correlations = [] attrListLen = len(attrList) iterCount = attrListLen * (attrListLen - 1) / 2 iter = 0 milestones = progressBarMilestones(iterCount) for i in range(len(attrList)): if data.domain.attributes[i].varType != orange.VarTypes.Continuous: continue for j in range(i + 1, len(attrList)): if data.domain.attributes[j].varType != orange.VarTypes.Continuous: continue corr, corrs, lengths = computeCorrelationInsideClasses( data, attrList[i], attrList[j]) if corr >= minCorrelation: correlations.append((corr, attrList[i], attrList[j])) iter += 1 if progressCallback and iter in milestones: progressCallback(100.0 * iter / iterCount) return sorted(correlations, reverse=True)
def computeMatrix(self): if not self.data: return data = self.data dist = self.metrics[self.Metrics][1](data) self.error(0) try: self.matrix = orange.SymMatrix(len(data)) except orange.KernelException as ex: self.error(0, "Could not create distance matrix! %s" % str(ex)) self.matrix = None self.send("Distance Matrix", None) return self.matrix.setattr('items', data) pb = OWGUI.ProgressBar(self, 100) milestones = orngMisc.progressBarMilestones( len(data) * (len(data) - 1) / 2, 100) count = 0 for i in range(len(data)): for j in range(i + 1): self.matrix[i, j] = dist(data[i], data[j]) if count in milestones: pb.advance() count += 1 pb.finish() self.send("Distance Matrix", self.matrix)
def computeCorrelationBetweenAttributes(data, attrList, minCorrelation = 0.0, progressCallback=None): correlations = [] attrListLen = len(attrList) iterCount = attrListLen * (attrListLen - 1) / 2 iter = 0 milestones = progressBarMilestones(iterCount) for i in range(len(attrList)): if data.domain.attributes[i].varType != orange.VarTypes.Continuous: continue for j in range(i+1, len(attrList)): if data.domain.attributes[j].varType != orange.VarTypes.Continuous: continue val = abs(computeCorrelation(data, attrList[i], attrList[j])) if val >= minCorrelation: correlations.append((val, attrList[i], attrList[j])) iter += 1 if progressCallback and iter in milestones: progressCallback(100.0 * iter / iterCount) return sorted(correlations, reverse=True)
def computeCorrelationInsideClassesBetweenAttributes(data, attrList, minCorrelation = 0.0, progressCallback=None): if not data.domain.classVar or data.domain.classVar.varType == orange.VarTypes.Continuous: return [] correlations = [] attrListLen = len(attrList) iterCount = attrListLen * (attrListLen - 1) / 2 iter = 0 milestones = progressBarMilestones(iterCount) for i in range(len(attrList)): if data.domain.attributes[i].varType != orange.VarTypes.Continuous: continue for j in range(i+1, len(attrList)): if data.domain.attributes[j].varType != orange.VarTypes.Continuous: continue corr, corrs, lengths = computeCorrelationInsideClasses(data, attrList[i], attrList[j]) if corr >= minCorrelation: correlations.append((corr, attrList[i], attrList[j])) iter += 1 if progressCallback and iter in milestones: progressCallback(100.0 * iter / iterCount) return sorted(correlations, reverse=True)
def computeCorrelationBetweenAttributes(data, attrList, minCorrelation=0.0, progressCallback=None): correlations = [] attrListLen = len(attrList) iterCount = attrListLen * (attrListLen - 1) / 2 iter = 0 milestones = progressBarMilestones(iterCount) for i in range(len(attrList)): if data.domain.attributes[i].varType != orange.VarTypes.Continuous: continue for j in range(i + 1, len(attrList)): if data.domain.attributes[j].varType != orange.VarTypes.Continuous: continue val = abs(computeCorrelation(data, attrList[i], attrList[j])) if val >= minCorrelation: correlations.append((val, attrList[i], attrList[j])) iter += 1 if progressCallback and iter in milestones: progressCallback(100.0 * iter / iterCount) return sorted(correlations, reverse=True)
class OWExampleDistance(OWWidget): settingsList = ["Metrics", "Normalize"] contextHandlers = {"": DomainContextHandler("", ["Label"])} def __init__(self, parent=None, signalManager = None): OWWidget.__init__(self, parent, signalManager, 'ExampleDistance', wantMainArea = 0, resizingEnabled = 0) self.inputs = [("Data", ExampleTable, self.dataset)] self.outputs = [("Distances", orange.SymMatrix)] self.Metrics = 0 self.Normalize = True self.Label = "" self.loadSettings() self.data = None self.matrix = None self.metrics = [ ("Euclidean", orange.ExamplesDistanceConstructor_Euclidean), ("Pearson Correlation", orngClustering.ExamplesDistanceConstructor_PearsonR), ("Spearman Rank Correlation", orngClustering.ExamplesDistanceConstructor_SpearmanR), ("Manhattan", orange.ExamplesDistanceConstructor_Manhattan), ("Hamming", orange.ExamplesDistanceConstructor_Hamming), ("Relief", orange.ExamplesDistanceConstructor_Relief), ] cb = OWGUI.comboBox(self.controlArea, self, "Metrics", box="Distance Metrics", items=[x[0] for x in self.metrics], tooltip="Choose metrics to measure pairwise distance between examples.", callback=self.distMetricChanged, valueType=str) cb.setMinimumWidth(170) OWGUI.separator(self.controlArea) box = OWGUI.widgetBox(self.controlArea, "Normalization", addSpace=True) self.normalizeCB = OWGUI.checkBox(box, self, "Normalize", "Normalize data", callback=self.computeMatrix) self.normalizeCB.setEnabled(self.Metrics in [0, 3]) self.labelCombo = OWGUI.comboBox(self.controlArea, self, "Label", box="Example Label", items=[], tooltip="Attribute used for example labels", callback=self.setLabel, sendSelectedValue = 1) self.labelCombo.setDisabled(1) OWGUI.rubber(self.controlArea) def sendReport(self): self.reportSettings("Settings", [("Metrics", self.metrics[self.Metrics][0]), ("Label", self.Label)]) self.reportData(self.data) def distMetricChanged(self): self.normalizeCB.setEnabled(self.Metrics in [0, 3]) self.computeMatrix() def computeMatrix(self): if not self.data: return data = self.data constructor = self.metrics[self.Metrics][1]() constructor.normalize = self.Normalize dist = constructor(data) self.error(0) try: self.matrix = orange.SymMatrix(len(data)) except orange.KernelException, ex: self.error(0, "Could not create distance matrix! %s" % str(ex)) self.matrix = None self.send("Distances", None) return self.matrix.setattr('items', data) pb = OWGUI.ProgressBar(self, 100) milestones = orngMisc.progressBarMilestones(len(data)*(len(data)-1)/2, 100) count = 0 for i in range(len(data)): for j in range(i+1): self.matrix[i, j] = dist(data[i], data[j]) if count in milestones: pb.advance() count += 1 pb.finish() self.send("Distances", self.matrix)
def readMatrix(fn, progress=None): msg = None matrix = labels = data = None if type(fn) != file and (os.path.splitext(fn)[1] == '.pkl' or os.path.splitext(fn)[1] == '.sym'): pkl_file = open(fn, 'rb') matrix = pickle.load(pkl_file) data = None if hasattr(matrix, 'items'): items = matrix.items if isinstance(items, orange.ExampleTable): data = items elif isinstance(items, list) or hasattr(item, "__iter__"): labels = items pkl_file.close() elif type(fn) != file and os.path.splitext(fn)[1] == '.npy': import numpy nmatrix = numpy.load(fn) matrix = orange.SymMatrix(len(nmatrix)) milestones = orngMisc.progressBarMilestones(matrix.dim, 100) for i in range(len(nmatrix)): for j in range(i+1): matrix[j,i] = nmatrix[i,j] if progress and i in milestones: progress.advance() #labels = [""] * len(nmatrix) else: if type(fn) == file: fle = fn else: fle = open(fn) while 1: lne = fle.readline().strip() if lne: break spl = lne.split() try: dim = int(spl[0]) except IndexError: raise ValueError("Matrix dimension expected in the first line.") #print dim labeled = len(spl) > 1 and spl[1] in ["labelled", "labeled"] matrix = orange.SymMatrix(dim) data = None milestones = orngMisc.progressBarMilestones(dim, 100) if labeled: labels = [] else: labels = [""] * dim for li, lne in enumerate(fle): if li > dim: if not li.strip(): continue raise ValueError("File to long") spl = lne.split("\t") if labeled: labels.append(spl[0].strip()) spl = spl[1:] if len(spl) > dim: raise ValueError("Line %i too long" % li+2) for lj, s in enumerate(spl): if s: try: matrix[li, lj] = float(s) except ValueError: raise ValueError("Invalid number in line %i, column %i" % (li+2, lj)) if li in milestones: if progress: progress.advance() if progress: progress.finish() return matrix, labels, data