def WrapperFSS(data, learner, verbose=0, folds=10): classVar = data.domain.classVar currentAtt = [] freeAttributes = list(data.domain.attributes) newDomain = orange.Domain(currentAtt + [classVar]) d = data.select(newDomain) results = orngTest.crossValidation([learner], d, folds=folds) maxStat = orngStat.CA(results)[0] if verbose>=2: print "start (%5.3f)" % maxStat while 1: stat = [] for a in freeAttributes: newDomain = orange.Domain([a] + currentAtt + [classVar]) d = data.select(newDomain) results = orngTest.crossValidation([learner], d, folds=folds) stat.append(orngStat.CA(results)[0]) if verbose>=2: print " %s gained %5.3f" % (a.name, orngStat.CA(results)[0]) if (max(stat) > maxStat): oldMaxStat = maxStat maxStat = max(stat) bestVarIndx = stat.index(max(stat)) if verbose: print "gain: %5.3f, attribute: %s" % (maxStat-oldMaxStat, freeAttributes[bestVarIndx].name) currentAtt = currentAtt + [freeAttributes[bestVarIndx]] del freeAttributes[bestVarIndx] else: if verbose: print "stopped (%5.3f)" % (max(stat) - maxStat) return orange.Domain(currentAtt + [classVar]) break
def __call__(self, examples): examples = createFullNoDiscTable(examples) classifiers = [ LogRegLearner( orange.Preprocessor_dropMissing( examples.select(orange.Domain(x, examples.domain.classVar)))) for x in examples.domain.attributes ] maj_classifier = LogRegLearner( orange.Preprocessor_dropMissing( examples.select(orange.Domain(examples.domain.classVar)))) beta = [maj_classifier.beta[0]] + [x.beta[1] for x in classifiers] beta_se = [maj_classifier.beta_se[0] ] + [x.beta_se[1] for x in classifiers] P = [maj_classifier.P[0]] + [x.P[1] for x in classifiers] wald_Z = [maj_classifier.wald_Z[0] ] + [x.wald_Z[1] for x in classifiers] domain = examples.domain return Univariate_LogRegClassifier(beta=beta, beta_se=beta_se, P=P, wald_Z=wald_Z, domain=domain)
def get_example_table(self): import orange data = self.run(count=False, header=True) if self.format.lower() == "tsv": header, data = data.split("\n", 1) domain = orange.Domain( [orange.StringVariable(name) for name in header.split("\t")], False) data = [ line.split("\t") for line in data.split("\n") if line.strip() ] return orange.ExampleTable(domain, data) if data else None elif self.format.lower() == "fasta": domain = orange.Domain([ orange.StringVariable("id"), orange.StringVariable("sequence") ], False) # TODO: meaningful id examples = [] from StringIO import StringIO from Bio import SeqIO for seq in SeqIO.parse(StringIO(data), "fasta"): examples.append([seq.id, str(seq.seq)]) return orange.ExampleTable(domain, examples) else: raise BioMartError("Unsupported format: %" % self.format)
def test_saveTab(self): d = orange.ExampleTable("iris")[:3] d.save("test-save.tab") try: d2 = orange.ExampleTable("test-save.tab") for e1, e2 in zip(d, d2): self.assertEqual(e1, e2) finally: os.remove("test-save.tab") dom = orange.Domain([orange.ContinuousVariable("a")]) d = orange.ExampleTable(dom) d += [[i] for i in range(3)] d.save("test-save.tab") try: d2 = orange.ExampleTable("test-save.tab") self.assertEqual(len(d.domain.attributes), 0) self.assertEqual(d.domain.classVar, dom[0]) for i in range(3): self.assertEqual(d2[i], [i]) finally: os.remove("test-save.tab") dom = orange.Domain([orange.ContinuousVariable("a")], None) d = orange.ExampleTable(dom) d += [[i] for i in range(3)] d.save("test-save.tab") try: d2 = orange.ExampleTable("test-save.tab") self.assertEqual(len(d.domain.attributes), 1) self.assertEqual(d.domain[0], dom[0]) for i in range(3): self.assertEqual(d2[i], [i]) finally: os.remove("test-save.tab")
def sendData(self, km=None): if km is None: km = self.bestRun[1] if self.optimized else self.km if not self.data or not km: self.send("Examples", None) self.send("Centroids", None) return clustVar = orange.EnumVariable( self.classifyName, values=["C%d" % (x + 1) for x in range(km.k)]) origDomain = self.data.domain if self.addIdAs == 0: domain = orange.Domain(origDomain.attributes, clustVar) if origDomain.classVar: domain.addmeta(orange.newmetaid(), origDomain.classVar) aid = -1 elif self.addIdAs == 1: domain = orange.Domain(origDomain.attributes + [clustVar], origDomain.classVar) aid = len(origDomain.attributes) else: domain = orange.Domain(origDomain.attributes, origDomain.classVar) aid = orange.newmetaid() domain.addmeta(aid, clustVar) domain.addmetas(origDomain.getmetas()) # construct a new data set, with a class as assigned by k-means clustering new = orange.ExampleTable(domain, self.data) for ex, midx in izip(new, km.clusters): ex[aid] = midx self.send("Examples", new) self.send("Centroids", orange.ExampleTable(km.centroids))
def sendList(self, selectedInd): if self.data and type(self.data[0]) == str: xAttr=orange.FloatVariable("X") yAttr=orange.FloatVariable("Y") nameAttr= orange.StringVariable("name") if self.selectionOptions == 1: domain = orange.Domain([xAttr, yAttr, nameAttr]) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append(list(self.mds.points[selectedInd[i]]) + [self.data[i]]) else: domain = orange.Domain([nameAttr]) if self.selectionOptions: domain.addmeta(orange.newmetaid(), xAttr) domain.addmeta(orange.newmetaid(), yAttr) selection = orange.ExampleTable(domain) for i in range(len(selectedInd)): selection.append([self.data[i]]) if self.selectionOptions: selection[i][xAttr]=self.mds.points[selectedInd[i]][0] selection[i][yAttr]=self.mds.points[selectedInd[i]][1] self.send("Data", selection) return if not selectedInd: self.send("Structured Data Files", None) else: datasets=[self.data[i] for i in selectedInd] names=list(set([d.dirname for d in datasets])) data=[(name, [d for d in filter(lambda a:a.strain==name, datasets)]) for name in names] self.send("Structured Data Files",data)
def commit(self): self.clearLineEditFocus() if self.data: newattrs=[] for attr, disc in zip(self.data.domain.attributes, self.discretizers): if disc: if disc.getValueFrom.transformer.points: newattrs.append(disc) elif disc == None: # can also be False -> remove newattrs.append(attr) if self.data.domain.classVar: if self.outputOriginalClass: newdomain = orange.Domain(newattrs, self.originalData.domain.classVar) else: newdomain = orange.Domain(newattrs, self.data.domain.classVar) else: newdomain = orange.Domain(newattrs, None) newdata = orange.ExampleTable(newdomain, self.originalData) elif self.discClassData and self.outputOriginalClass: newdata = self.discClassData elif self.originalData and not (self.originalData.domain.classVar and self.originalData.domain.classVar.varType == orange.VarTypes.Continuous and not self.discClassData): # no continuous attributes... newdata = self.originalData else: newdata = None self.send("Data", newdata) dataChanged = False
def test_memory_leaks(self): import sys f = orange.Domain(self.vars) refcount = sys.getrefcount(self.vars[0]) for i in range(1000): f = orange.Domain(self.vars) refcount2 = sys.getrefcount(self.vars[0]) self.assertEqual(refcount, refcount2)
def setUp(self): self.contvars = [orange.ContinuousVariable(x) for x in "abcde"] self.discvars = [orange.DiscreteVariable(x, values=["ana", "berta", "cilka"]) for x in "ABCDE"] self.yvar = [orange.DiscreteVariable("y", values="01")] self.contdomain = orange.Domain(self.contvars, self.yvar) self.discdomain = orange.Domain(self.discvars, self.yvar) self.allvars = self.contvars+self.discvars+[self.yvar] self.domain = orange.Domain(self.contvars+self.discvars, self.yvar)
def to_network(self, terms=None): """ Return an Orange.network.Network instance constructed from this ontology. """ edge_types = self.edge_types() terms = self.terms() from Orange.orng import orngNetwork import orange network = orngNetwork.Network(len(terms), True, len(edge_types)) network.objects = dict([(term.id, i) for i, term in enumerate(terms)]) edges = defaultdict(set) for term in self.terms(): related = self.related_terms(term) for relType, relTerm in related: edges[(term.id, relTerm)].add(relType) edgeitems = edges.items() for (src, dst), eTypes in edgeitems: network[src, dst] = [1 if e in eTypes else 0 for e in edge_types] domain = orange.Domain([ orange.StringVariable("id"), orange.StringVariable("name"), orange.StringVariable("def"), ], False) items = orange.ExampleTable(domain) for term in terms: ex = orange.Example( domain, [term.id, term.name, term.values.get("def", [""])[0]]) items.append(ex) relationships = set( [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems]) domain = orange.Domain([ orange.FloatVariable("u"), orange.FloatVariable("v"), orange.EnumVariable("relationship", values=list(edge_types)) ], False) id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)]) links = orange.ExampleTable(domain) for (src, dst), eTypes in edgeitems: ex = orange.Example(domain, [id2index[src], id2index[dst], eTypes.pop()]) links.append(ex) network.items = items network.links = links network.optimization = None return network
def test_construction_attribute_list(self): dold = orange.Domain(self.vars) d = orange.Domain([self.vars[1], 0, "c"], source=dold) self.assertEqual(d.variables, [self.vars[1], self.vars[0], self.vars[2]]) d = orange.Domain([self.vars[1], 0, "c"], source=dold, class_var="d") self.assertEqual(d.variables, [self.vars[1], self.vars[0], self.vars[2], self.vars[3]]) self.assertEqual(d.attributes, [self.vars[1], self.vars[0], self.vars[2]]) self.assertEqual(d.class_var, self.vars[3])
def test_memory_leaks_error(self): import sys f = orange.Domain(self.vars) refcount = sys.getrefcount(self.vars[0]) for i in range(1000): try: f = orange.Domain([self.vars[0], "abc"]) except: pass refcount2 = sys.getrefcount(self.vars[0]) self.assertEqual(refcount, refcount2)
def test_convert_through_append(self): d = orange.ExampleTable("iris") dom2 = orange.Domain([d.domain[0], d.domain[2], d.domain[4]]) d2 = orange.ExampleTable(dom2) dom3 = orange.Domain([d.domain[1], d.domain[2]], None) d3 = orange.ExampleTable(dom3) for e in d[:5]: d2.append(e) d3.append(e) for e, e2, e3 in zip(d, d2, d3): self.assertEqual(e[0], e2[0]) self.assertEqual(e[1], e3[0])
def test_construction(self): d = orange.Domain([]) self.assertEqual(len(d.variables), 0) self.assertEqual(len(d.attributes), 0) self.assertEqual(d.class_var, None) d = orange.Domain([], True) self.assertEqual(len(d.variables), 0) self.assertEqual(len(d.attributes), 0) self.assertEqual(d.class_var, None) d = orange.Domain([], False) self.assertEqual(len(d.variables), 0) self.assertEqual(len(d.attributes), 0) self.assertEqual(d.class_var, None) d = orange.Domain(self.vars[:1]) self.assertEqual(d.variables, self.vars[:1]) self.assertEqual(len(d.attributes), 0) self.assertEqual(d.class_var, self.vars[0]) d = orange.Domain(self.vars[:1], True) self.assertEqual(d.variables, self.vars[:1]) self.assertEqual(len(d.attributes), 0) self.assertEqual(d.class_var, self.vars[0]) d = orange.Domain(self.vars[:1], False) self.assertEqual(d.variables, self.vars[:1]) self.assertEqual(d.attributes, self.vars[:1]) self.assertEqual(d.class_var, None) d = orange.Domain(self.vars) self.assertEqual(d.variables, self.vars) self.assertEqual(d.attributes, self.vars[:-1]) self.assertEqual(d.class_var, self.vars[-1]) d = orange.Domain(self.vars, True) self.assertEqual(d.variables, self.vars) self.assertEqual(d.attributes, self.vars[:-1]) self.assertEqual(d.class_var, self.vars[-1]) d = orange.Domain(self.vars, False) self.assertEqual(d.variables, self.vars) self.assertEqual(d.attributes, self.vars) self.assertEqual(d.class_var, None) d = orange.Domain(self.vars, self.vars[5]) self.assertEqual(d.variables, self.vars+[self.vars[5]]) self.assertEqual(d.attributes, self.vars) self.assertEqual(d.class_var, self.vars[5])
def getCinfonyDescResults(origData,descList,radius=1): """Calculates the cinfony descriptors on origData maintains the input variables and class Adds the Cinfony descritors Returns a new Dataset""" if not origData or not descList: return None smilesName = getSMILESAttr(origData) if not smilesName: return None #Create a new domain saving original smiles and other attributes newDomain = orange.Domain([attr for attr in origData.domain if attr is not origData.domain.classVar] + [orange.StringVariable("origSmiles")],origData.domain.classVar) data = dataUtilities.DataTable(newDomain, origData) # Standardize SMILES for ex in data: ex["origSmiles"] = ex[smilesName].value #TODO: Create a method in dataUtilities to standardize the attribute smilesName in place having the attr origSmiles as ID if "AZutilities.extraUtilities" in sys.modules and hasattr(extraUtilities, "StandardizeSMILES"): # Call a method for standardizing the SMILES in Data. # The method is expected to change the attribute defined as smiAttr in data object # +->Data +-> SMILES attribuite name +->Compound Name or attribute to act as an ID" extraUtilities.StandardizeSMILES(data, smiAttr = smilesName, cName="origSmiles") results = [] # Calculate available descriptors res = getObabelDescResult(data,descList) if res: results.append(res) res = getRdkDescResult(data,descList,radius) if res: results.append(res) res = getWebelDescResult(data,descList) if res: results.append(res) res = getCdkDescResult(data,descList) if res: results.append(res) # Convert any nan to a '?' if len(results): for res in results: for ex in res: for attr in ex.domain: if ex[attr] != ex[attr]: # Will fail if it is 'nan' ex[attr] = '?' # return None if no results at all if not results: return None resData = results[0] if len(results) > 1: for res in results[1:]: resData = dataUtilities.horizontalMerge(resData, res, smilesName, smilesName) data = dataUtilities.horizontalMerge(data, resData, smilesName, smilesName) # Revert the SMILES back to it's original state for ex in data: ex[smilesName] = ex["origSmiles"] #Remove the origSmiles attributes data = dataUtilities.DataTable(orange.Domain([attr for attr in data.domain if attr.name != "origSmiles" and attr is not data.domain.classVar],data.domain.classVar),data) return data
def __call__(self, examples, weight=0): imputer = getattr(self, "imputer", None) or None if getattr(self, "removeMissing", 0): examples = orange.Preprocessor_dropMissing(examples) ## if hasDiscreteValues(examples.domain): ## examples = createNoDiscTable(examples) if not len(examples): return None if getattr(self, "stepwiseLR", 0): addCrit = getattr(self, "addCrit", 0.2) removeCrit = getattr(self, "removeCrit", 0.3) numAttr = getattr(self, "numAttr", -1) attributes = StepWiseFSS(examples, addCrit=addCrit, deleteCrit=removeCrit, imputer=imputer, numAttr=numAttr) tmpDomain = orange.Domain(attributes, examples.domain.classVar) tmpDomain.addmetas(examples.domain.getmetas()) examples = examples.select(tmpDomain) learner = orange.LogRegLearner() learner.imputerConstructor = imputer if imputer: examples = self.imputer(examples)(examples) examples = orange.Preprocessor_dropMissing(examples) if self.fitter: learner.fitter = self.fitter if self.removeSingular: lr = learner.fitModel(examples, weight) else: lr = learner(examples, weight) while isinstance(lr, orange.Variable): if isinstance(lr.getValueFrom, orange.ClassifierFromVar) and isinstance( lr.getValueFrom.transformer, orange.Discrete2Continuous): lr = lr.getValueFrom.variable attributes = examples.domain.attributes[:] if lr in attributes: attributes.remove(lr) else: attributes.remove(lr.getValueFrom.variable) newDomain = orange.Domain(attributes, examples.domain.classVar) newDomain.addmetas(examples.domain.getmetas()) examples = examples.select(newDomain) lr = learner.fitModel(examples, weight) return lr
def build_orange_data_from_eig_vector(self): eig_vector = self.load_eigen_vector(matlab=True) # create table for orange to clustering ''' How to convert a data table 1. create features as you wish 2. create domain based on the features 3. add meta-attributes for the domain 4. create data, actually, instance list 5. create data table base on Domain and instances list ''' # 1 new_features = list() new_features.append(Orange.feature.Continuous('eigValue')) # 2 new_domain = orange.Domain(new_features, False) # 3 # new_domain.add_meta(Orange.feature.Descriptor.new_meta_id(),Orange.feature.Continuous('graphIndex')) new_domain.add_meta( Orange.feature.Descriptor.new_meta_id(), Orange.feature.Continuous(SQLDao.LABEL_USER_GROUP_INFO_USERID)) # new_domain.add_meta(Orange.feature.Descriptor.new_meta_id(),Orange.feature.Continuous(SQLDao.LABEL_USER_GROUP_INFO_GROUPID)) # 4 new_datas = [] for graphIndex, i in enumerate(eig_vector): t = Orange.data.Instance(new_domain, [i]) t[SQLDao.LABEL_USER_GROUP_INFO_USERID] = self.g.vs[graphIndex][ SQLDao.LABEL_USER_GROUP_INFO_USERID] # you dont have a group id. What are you doing? # t[SQLDao.LABEL_USER_GROUP_INFO_GROUPID]=self.g.vs[graphIndex][SQLDao.LABEL_USER_GROUP_INFO_GROUPID] new_datas.append(t) # 5 data = Orange.data.Table(new_domain, new_datas) return data pass
def applySettings(self): """use the setting from the widget, identify the outliers""" if self.haveInput == 1: outlier = self.outlier outlier.setKNN(self.ks[self.k][1]) newdomain = orange.Domain(self.data.domain) newdomain.addmeta(orange.newmetaid(), orange.FloatVariable("Z score")) self.newdata = orange.ExampleTable(newdomain, self.data) zv = outlier.zValues() for i, el in enumerate(zv): self.newdata[i]["Z score"] = el self.send("Examples with Z-scores", self.newdata) filterout = orange.Filter_values(domain=self.newdata.domain) filterout["Z score"] = (orange.Filter_values.Greater, eval(self.zscore)) outliers = filterout(self.newdata) filterin = orange.Filter_values(domain=self.newdata.domain) filterin["Z score"] = (orange.Filter_values.LessEqual, eval(self.zscore)) inliers = filterin(self.newdata) self.send("Outliers", outliers) self.send("Inliers", inliers) else: self.send("Examples with Z-scores", None) self.send("Outliers", None) self.send("Inliers", None)
def __makeExampleTable(namesDict, data): import orange from constants import CLASS_ATRR_NAME, CONTROL_GROUP_KEY, DATA_GROUP_KEY geneIDs = sorted(data.keys()) attrList = [orange.FloatVariable(name=str(geneID)) for geneID in geneIDs] classAttr = orange.EnumVariable(name=CLASS_ATRR_NAME, values=[CONTROL_GROUP_KEY, DATA_GROUP_KEY]) domain = orange.Domain(attrList, classAttr) table = orange.ExampleTable(domain) # first half: group 1 for attrName in namesDict[CONTROL_GROUP_KEY].keys(): exampleValues = [ data[geneID][CONTROL_GROUP_KEY][attrName] for geneID in geneIDs ] + [CONTROL_GROUP_KEY] example = orange.Example(domain, exampleValues) table.append(example) # second half: group 2 for attrName in namesDict[DATA_GROUP_KEY].keys(): exampleValues = [ data[geneID][DATA_GROUP_KEY][attrName] for geneID in geneIDs ] + [DATA_GROUP_KEY] example = orange.Example(domain, exampleValues) table.append(example) return table
def relabel(self): #print 'relabel' self.error() matrix = self.matrix if matrix is not None and self.data is not None: if self.takeAttributeNames: domain = self.data.domain if matrix.dim == len(domain.attributes): matrix.setattr("items", domain.attributes) elif matrix.dim == len(domain.variables): matrix.setattr("items", domain.variables) else: self.error("The number of attributes doesn't match the matrix dimension") else: if matrix.dim == len(self.data): matrix.setattr("items", self.data) else: self.error("The number of examples doesn't match the matrix dimension") elif matrix and self.labels: lbl = orange.StringVariable('label') self.data = orange.ExampleTable(orange.Domain([lbl]), [[str(l)] for l in self.labels]) for e, label in zip(self.data, self.labels): e.name = label matrix.setattr("items", self.data) if self.data == None and self.labels == None: matrix.setattr("items", [str(i) for i in range(matrix.dim)]) self.matrix.matrixType = orange.SymMatrix.Symmetric self.send("Distances", self.matrix)
def removeSelectedClassLabel(self): index = self.selectedClassLabelIndex() if index is not None and len(self.classValuesModel) > 1: label = self.classValuesModel[index] examples = [ ex for ex in self.graph.data if str(ex.getclass()) != label ] values = [val for val in self.classValuesModel if val != label] newclass = orange.EnumVariable("Class label", values=values) newdomain = orange.Domain(self.graph.data.domain.attributes, newclass) newdata = orange.ExampleTable(newdomain) for ex in examples: if ex[self.classVariable] != label and ex[ self.classVariable] in values: newdata.append( orange.Example(newdomain, [ex[a] for a in ex.domain.attributes] + [str(ex.getclass())])) self.classVariable = newclass self.classValuesModel.wrap(self.classVariable.values) self.graph.data = newdata self.graph.updateGraph() newindex = self.classValuesModel.index(max(0, index - 1)) self.classValuesView.selectionModel().select( newindex, QItemSelectionModel.ClearAndSelect) self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
def wordnet_glosses(training, testing): stopwords = set(nltk.corpus.stopwords.words()) gloss_dist = training.gloss_map() used_words = [ k for k in gloss_dist.keys() if not k in stopwords and gloss_dist[k] > 2 ] print "words", used_words attributes = [ orange.EnumVariable(a, values=["True", "False"]) for a in used_words ] print "got", len(used_words), "features" domain = orange.Domain(attributes, training.orange_class_var) results = [] for annotation in [training, testing]: table = orange.ExampleTable(domain) results.append(table) for i, (word, label) in enumerate(annotation.data): ancestors = annotation.ancestors(i) ex = orange.Example(domain) ex["class"] = label ex["word"] = word for a_i, a in enumerate(attributes): word_i = used_words[a_i] if word_i in annotation.synset(i).definition: ex[a.name] = "True" else: ex[a.name] = "False" table.append(ex) training_table, testing_table = results return training_table, testing_table
def commit(self): rows = self.tableView.selectionModel().selectedRows() rows = [self.proxyModel.mapToSource(index) for index in rows] rows = [index.row() for index in rows] selectedRules = [self.classifier.rules[row] for row in rows] if selectedRules: examples = self.classifier.examples selectedExamples = self.selectedExamplesFromRules( selectedRules, self.classifier.examples) selectedAttrs = self.selectedAttrsFromRules(selectedRules) selectedAttrs = [ attr for attr in examples.domain.attributes if attr in selectedAttrs ] # restore the order if self.selectedAttrsOnly: domain = orange.Domain(selectedAttrs, examples.domain.classVar) domain.addmetas(examples.domain.getmetas()) selectedExamples = orange.ExampleTable(domain, selectedExamples) else: selectedExamples = orange.ExampleTable(selectedExamples) self.send("Data", selectedExamples) self.send("Features", orange.VarList(list(selectedAttrs))) else: self.send("Data", None) self.send("Features", None) self.changedFlag = False
def bench_orange(X, y, T, valid): # # .. Orange .. # import orange start = datetime.now() # prepare data in Orange's format columns = [] for i in range(0, X.shape[1]): columns.append("a" + str(i)) [orange.EnumVariable(x) for x in columns] classValues = ['0', '1'] domain = orange.Domain(map(orange.FloatVariable, columns), orange.EnumVariable("class", values=classValues)) y.shape = (len(y), 1) #reshape for Orange y[np.where(y < 0)] = 0 # change class labels to 0..K orng_train_data = orange.ExampleTable(domain, np.hstack((X, y))) valid.shape = (len(valid), 1) #reshape for Orange valid[np.where(valid < 0)] = 0 # change class labels to 0..K orng_test_data = orange.ExampleTable(domain, np.hstack((T, valid))) learner = orange.SVMLearner(orng_train_data, \ svm_type=orange.SVMLearner.Nu_SVC, \ kernel_type=orange.SVMLearner.RBF, C=1., \ gamma=1. / sigma) pred = np.empty(T.shape[0], dtype=np.int32) for i, e in enumerate(orng_test_data): pred[i] = learner(e) score = np.mean(pred == valid) return score, datetime.now() - start
def addNewClassLabel(self): i = 1 while True: newlabel = "Class %i" % i if newlabel not in self.classValuesModel: # self.classValuesModel.append(newlabel) break i += 1 values = list(self.classValuesModel) + [newlabel] newclass = orange.EnumVariable("Class label", values=values) newdomain = orange.Domain(self.graph.data.domain.attributes, newclass) newdata = orange.ExampleTable(newdomain) for ex in self.graph.data: newdata.append( orange.Example(newdomain, [ex[a] for a in ex.domain.attributes] + [str(ex.getclass())])) self.classVariable = newclass self.classValuesModel.wrap(self.classVariable.values) self.graph.data = newdata self.graph.updateGraph() newindex = self.classValuesModel.index(len(self.classValuesModel) - 1) self.classValuesView.selectionModel().select( newindex, QItemSelectionModel.ClearAndSelect) self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
def wordnet_meronyms(training, testing): ancestor_to_count = training.meronym_ancestor_map() all_ancestors = list(ancestor_to_count.keys()) all_ancestors.sort(key=lambda a: ancestor_to_count[a], reverse=True) used_ancestors = all_ancestors print "name", used_ancestors[0].name attributes = [ orange.EnumVariable(a.name, values=["True", "False"]) for a in used_ancestors ] print "got", len(used_ancestors), "features" domain = orange.Domain(attributes, training.orange_class_var) results = [] for annotation in [training, testing]: table = orange.ExampleTable(domain) results.append(table) for i, (word, label) in enumerate(annotation.data): ancestors = annotation.ancestors(i) ex = orange.Example(domain) ex["class"] = label for a_i, a in enumerate(attributes): ancestor_i = used_ancestors[a_i] if ancestor_i in ancestors: ex[a.name] = "True" else: ex[a.name] = "False" table.append(ex) training_table, testing_table = results return training_table, testing_table
def starRegression(cache, dimensions, progressCallback=None, **args): if len(cache.contAttributes) == 1: return triangles1D(cache, True) if not cache.points: cache.points = orange.ExampleTable( orange.Domain(cache.contAttributes, cache.data.domain.classVar), cache.data).native(0) points = cache.points npoints = len(points) if not cache.tri: cache.tri = triangulate(cache, points) tri = cache.tri if not cache.stars: cache.stars = [star(x, tri) for x in xrange(npoints)] S = cache.stars points = cache.points if progressCallback: nPoints = 100.0 / len(points) for x, (S, p) in enumerate(zip(cache.stars, points)): if S == []: cache.deltas[x] = ['?' for i in dimensions] continue st = list(set(reduce(lambda x, y: x + y, S))) A = [points[i][:-1] for i in st] b = [[points[i][-1]] for i in st] cache.deltas[x] = [i[0] for i in numpy.linalg.lstsq(A, b)[0]] if progressCallback: progressCallback(x * nPoints)
def test_to_numpy_multi(self): data = orange.ExampleTable("iris") self.assertRaises(ValueError, data.to_numpy, "AC/w", multinomial=2) self.assertRaises(ValueError, data.to_numpy, "Ac/Cw", multinomial=2) data.to_numpy(multinomial=2) data.to_numpy("a", multinomial=2) a, c, w = data.to_numpy(multinomial=0) self.assertIsNone(c) data = orange.ExampleTable("zoo") self.assertRaises(ValueError, data.to_numpy, multinomial=2) self.assertRaises(ValueError, data.to_numpy, "a", multinomial=2) ar, cl, w = data.to_numpy() self.assertIsNone(w) for i in range(len(data)): self.assertEqual(data[i, :-1], ar[i]) self.assertEqual(data[i, -1], cl[i]) ar, cl, w = data.to_numpy(multinomial=0) self.assertIsNone(cl) self.assertIsNone(w) nd = orange.Domain( [attr for attr in data.domain if len(attr.values) <= 2]) data2 = orange.ExampleTable(nd, data) for i in range(len(data)): self.assertEqual(data2[i], ar[i])
def test_to_numpy_noclass(self): data = orange.ExampleTable("iris") nd = orange.Domain(data.domain.attributes, None) data = orange.ExampleTable(nd, data) self.assertRaises(ValueError, data.to_numpy, "AC/w") self.assertRaises(ValueError, data.to_numpy, "AC/w", multinomial=0) self.assertRaises(ValueError, data.to_numpy, "A/Cw", multinomial=0)
def etForAttribute(datal, a): """ Builds an example table for a single attribute across multiple example tables. """ tables = len(datal) def getAttrVals(data, attr): dom2 = orange.Domain([data.domain[attr]], False) dataa = orange.ExampleTable(dom2, data) return [a[0].native() for a in dataa] domainl = [] valuesl = [] for id, data in enumerate(datal): v = getAttrVals(data, a) valuesl.append(v) domainl.append(orange.FloatVariable(name=("v" + str(id)))) classvals = getAttrVals(data, datal[0].domain.classVar) valuesl += [classvals] dom = orange.Domain(domainl, datal[0].domain.classVar) examples = [list(a) for a in zip(*valuesl)] datat = orange.ExampleTable(dom, examples) return datat