def __makeExampleTable(namesDict, data): import orange from constants import CLASS_ATRR_NAME, CONTROL_GROUP_KEY, DATA_GROUP_KEY geneIDs = sorted(data.keys()) attrList = [orange.FloatVariable(name=str(geneID)) for geneID in geneIDs] classAttr = orange.EnumVariable(name=CLASS_ATRR_NAME, values=[CONTROL_GROUP_KEY, DATA_GROUP_KEY]) domain = orange.Domain(attrList, classAttr) table = orange.ExampleTable(domain) # first half: group 1 for attrName in namesDict[CONTROL_GROUP_KEY].keys(): exampleValues = [ data[geneID][CONTROL_GROUP_KEY][attrName] for geneID in geneIDs ] + [CONTROL_GROUP_KEY] example = orange.Example(domain, exampleValues) table.append(example) # second half: group 2 for attrName in namesDict[DATA_GROUP_KEY].keys(): exampleValues = [ data[geneID][DATA_GROUP_KEY][attrName] for geneID in geneIDs ] + [DATA_GROUP_KEY] example = orange.Example(domain, exampleValues) table.append(example) return table
def test_construction(self): e = orange.Example(self.contdomain) for val in e: self.assertTrue(val.is_undefined()) e = orange.Example(self.contdomain, "01234") for i, val in enumerate(e): self.assertEqual(i, val) vals = ["ana"]*3+["berta", "cilka"] e = orange.Example(self.discdomain, vals) for v, ve in zip(vals, e): self.assertEqual(v, ve) e = orange.Example(self.discdomain, [0, 0, 0, 1, 2]) for v, ve in zip(vals, e): self.assertEqual(v, ve) with self.assertRaises(TypeError): orange.Example(self.contdomain, 3.14) with self.assertRaises(ValueError): orange.Example(self.contdomain, "0123") with self.assertRaises(ValueError): orange.Example(self.contdomain, "012345") with self.assertRaises(ValueError): orange.Example(self.contdomain, "abcde") with self.assertRaises(ValueError): orange.Example(self.discdomain, "abcde") with self.assertRaises(ValueError): orange.Example(self.discdomain, "00110")
def test_indexing_assign_example(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") d = orange.ExampleTable("test2") vara = d.domain["a"] metaa = vara.default_meta_id self.assertTrue(d[0].has_meta("a")) d[0] = ["3.14", "1", "f"] self.assertEqual(list(d[0]), [3.14, "1", "f"]) self.assertFalse(d[0].has_meta("a")) d[0] = [3.15, 1, "t"] self.assertEqual(list(d[0]), [3.15, "0", "t"]) with self.assertRaises(ValueError): d[0] = ["3.14", "1"] ex = orange.Example(d.domain, ["3.16", "1", "f"]) d[0] = ex self.assertEqual(list(d[0]), [3.16, "1", "f"]) ex = orange.Example(d.domain, ["3.16", "1", "f"]) ex.set_meta("e", "mmmapp") d[0] = ex self.assertEqual(list(d[0]), [3.16, "1", "f"]) ex.set_meta("e", "mmmapp")
def cforange_hierarchical_clustering_finished(postdata, input_dict, output_dict): import json import orange matrix = input_dict['dm'] linkage = int(input_dict['linkage']) widget_pk = postdata['widget_id'][0] try: selected_nodes = json.loads(postdata.get('selected_nodes')[0]) except: raise Exception('Please select a threshold for determining clusters.') if isinstance(matrix.items, orange.ExampleTable): root = Clustering.hierarchical_clustering(linkage, matrix) cluster_ids = set([cluster for _, _, cluster in selected_nodes]) selected_clusters = set( [cluster for _, selected, cluster in selected_nodes if selected]) clustVar = orange.EnumVariable( str('Cluster'), values=["Cluster %d" % i for i in cluster_ids] + ["Other"]) origDomain = matrix.items.domain domain = orange.Domain(origDomain.attributes, origDomain.classVar) domain.addmeta(orange.newmetaid(), clustVar) domain.addmetas(origDomain.getmetas()) # Build table with selected clusters selected_table, unselected_table = orange.ExampleTable( domain), orange.ExampleTable(domain) for id, selected, cluster in selected_nodes: new_ex = orange.Example(domain, matrix.items[id]) if selected: new_ex[clustVar] = clustVar("Cluster %d" % cluster) selected_table.append(new_ex) else: new_ex[clustVar] = clustVar("Other") unselected_table.append(new_ex) # Build table of centroids centroids = orange.ExampleTable(selected_table.domain) if len(selected_table) > 0: for cluster in sorted(selected_clusters): clusterEx = orange.ExampleTable([ ex for ex in selected_table if ex[clustVar] == "Cluster %d" % cluster ]) # Attribute statistics contstat = orange.DomainBasicAttrStat(clusterEx) discstat = orange.DomainDistributions(clusterEx, 0, 0, 1) ex = [ cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat) ] example = orange.Example(centroids.domain, ex) example[clustVar] = clustVar("Cluster %d" % cluster) centroids.append(example) else: # Attribute distance centroids, selected_table, unselected_table = None, None, None return { 'centroids': centroids, 'selected_examples': selected_table, 'unselected_examples': unselected_table }
def to_network(self, terms=None): """ Return an Orange.network.Network instance constructed from this ontology. """ edge_types = self.edge_types() terms = self.terms() from Orange.orng import orngNetwork import orange network = orngNetwork.Network(len(terms), True, len(edge_types)) network.objects = dict([(term.id, i) for i, term in enumerate(terms)]) edges = defaultdict(set) for term in self.terms(): related = self.related_terms(term) for relType, relTerm in related: edges[(term.id, relTerm)].add(relType) edgeitems = edges.items() for (src, dst), eTypes in edgeitems: network[src, dst] = [1 if e in eTypes else 0 for e in edge_types] domain = orange.Domain([ orange.StringVariable("id"), orange.StringVariable("name"), orange.StringVariable("def"), ], False) items = orange.ExampleTable(domain) for term in terms: ex = orange.Example( domain, [term.id, term.name, term.values.get("def", [""])[0]]) items.append(ex) relationships = set( [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems]) domain = orange.Domain([ orange.FloatVariable("u"), orange.FloatVariable("v"), orange.EnumVariable("relationship", values=list(edge_types)) ], False) id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)]) links = orange.ExampleTable(domain) for (src, dst), eTypes in edgeitems: ex = orange.Example(domain, [id2index[src], id2index[dst], eTypes.pop()]) links.append(ex) network.items = items network.links = links network.optimization = None return network
def __call__(self, examples, weight=0): if examples.domain.classVar.varType != 1: raise "MultiClassLearner only works with discrete class" # simple handling for simple 2-class problems if len(examples.domain.classVar.values) <= 2: if weight != 0: return self.learner(examples, weight) else: return self.learner(examples) # count the classes and generate the classifier matrix nc = len(examples.domain.classVar.values) nv = len(examples.domain.attributes) template = self.matrix(nc) # prepare the domain, and the new binary class bin = orange.EnumVariable(name="binary", values=['0', '1']) b0 = bin(0) b1 = bin(1) nd = orange.Domain(examples.domain.attributes + [bin]) # generate all classifiers cm = [] for i in template: exs = orange.ExampleTable(nd) if weight != 0: exs.addMetaAttribute(1) for j in examples: if i[int(j.getclass())] == 1: r = [j[x] for x in range(nv)] r.append(b1) x = orange.Example(nd, r) if weight != 0: x.setmeta(j.getMetaAttribute(weight), 1) exs.append(x) else: if i[int(j.getclass())] == -1: r = [j[x] for x in range(nv)] r.append(b0) x = orange.Example(nd, r) if weight != 0: x.setmeta(j.getMetaAttribute(weight), 1) exs.append(x) # prepare the classifier if len(exs) <= 0: raise "MultiClass: More than one of the declared class values do not appear in the data. Filter them out." if weight != 0: c = self.learner(exs, weight=1) else: c = self.learner(exs) cm.append((c, len(exs))) return self.pestimator(cm, template, examples.domain)
def testassociationrule(self): data = orange.ExampleTable("zoo") left = orange.Example(data.domain) left["hair"] = "0" left["type"] = "mammal" right = orange.Example(data.domain) right["aquatic"] = "1" rule = orange.AssociationRule(left, right) self.assertEqual(rule.left, left) self.assertEqual(str(rule), "hair=0 type=mammal -> aquatic=1") self.assertEqual(rule.support, -1) self.assertEqual(rule.confidence, -1) for attr in ("coverage", "strength", "lift", "leverage", "n_applies_left", "n_applies_right", "n_examples"): self.assertEqual(getattr(rule, attr), 0) self.assertEqual(rule.n_left, 2) self.assertEqual(rule.n_right, 1) rule2 = orange.AssociationRule(rule) self.assertEqual(rule, rule2) self.assertTrue(rule.applies_left(left)) self.assertFalse(rule.applies_left(right)) self.assertFalse(rule.applies_right(left)) self.assertTrue(rule.applies_right(right)) self.assertFalse(rule.applies_both(left)) self.assertFalse(rule.applies_both(right)) both = orange.Example(left) both["aquatic"] = "1" self.assertTrue(rule.applies_left(both)) self.assertTrue(rule.applies_right(both)) self.assertTrue(rule.applies_both(both)) import pickle s = pickle.dumps(rule) rule3 = pickle.loads(s) self.assertEqual(rule, rule3) rule = rule3 self.assertEqual(rule.left, left) self.assertEqual(str(rule), "hair=0 type=mammal -> aquatic=1") self.assertEqual(rule.support, -1) self.assertEqual(rule.confidence, -1) for attr in ("coverage", "strength", "lift", "leverage", "n_applies_left", "n_applies_right", "n_examples"): self.assertEqual(getattr(rule, attr), 0) self.assertEqual(rule.n_left, 2) self.assertEqual(rule.n_right, 1)
def calcContVarGrad(var, ex, gradRef): localEx = orange.Example(ex) if c_step is None: if self.domain.classVar.varType == orange.VarTypes.Discrete: # Classification coef_step = 1.0 else: coef_step = 0.08 # Needs confirmation! Coefficient step: c else: # used for testing significance: comment next and uncomment next-next raise (Exception( "This mode should only be used for debugging! Comment this line if debugging." )) #coef_step = float(c_step) if var in signDesc: step = 1 # Set step to one in case od signatures else: # dev - Standard deviation: http://orange.biolab.si/doc/reference/Orange.statistics.basic/ if "dev" in self.basicStat[var]: step = self.basicStat[var]["dev"] * coef_step else: return ([gradRef, gradRef], 0) if ex[var].isSpecial(): return ([gradRef, gradRef], step) # step UP localEx[var] = ex[var] + step ResUp = self(localEx, returnDFV=True)[1] # step DOWN localEx[var] = ex[var] - step ResDown = self(localEx, returnDFV=True)[1] return ([ResUp, ResDown], step)
def removeSelectedClassLabel(self): index = self.selectedClassLabelIndex() if index is not None and len(self.classValuesModel) > 1: label = self.classValuesModel[index] examples = [ ex for ex in self.graph.data if str(ex.getclass()) != label ] values = [val for val in self.classValuesModel if val != label] newclass = orange.EnumVariable("Class label", values=values) newdomain = orange.Domain(self.graph.data.domain.attributes, newclass) newdata = orange.ExampleTable(newdomain) for ex in examples: if ex[self.classVariable] != label and ex[ self.classVariable] in values: newdata.append( orange.Example(newdomain, [ex[a] for a in ex.domain.attributes] + [str(ex.getclass())])) self.classVariable = newclass self.classValuesModel.wrap(self.classVariable.values) self.graph.data = newdata self.graph.updateGraph() newindex = self.classValuesModel.index(max(0, index - 1)) self.classValuesView.selectionModel().select( newindex, QItemSelectionModel.ClearAndSelect) self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
def wordnet_meronyms(training, testing): ancestor_to_count = training.meronym_ancestor_map() all_ancestors = list(ancestor_to_count.keys()) all_ancestors.sort(key=lambda a: ancestor_to_count[a], reverse=True) used_ancestors = all_ancestors print "name", used_ancestors[0].name attributes = [ orange.EnumVariable(a.name, values=["True", "False"]) for a in used_ancestors ] print "got", len(used_ancestors), "features" domain = orange.Domain(attributes, training.orange_class_var) results = [] for annotation in [training, testing]: table = orange.ExampleTable(domain) results.append(table) for i, (word, label) in enumerate(annotation.data): ancestors = annotation.ancestors(i) ex = orange.Example(domain) ex["class"] = label for a_i, a in enumerate(attributes): ancestor_i = used_ancestors[a_i] if ancestor_i in ancestors: ex[a.name] = "True" else: ex[a.name] = "False" table.append(ex) training_table, testing_table = results return training_table, testing_table
def getDescriptors(self, translator, examples, buckets): tcoeff_names = [] descriptors = [] # used for managing continuous atts proto_example = orange.Example( examples[0] ) # used for converting bucket averages into realistic values true_values = [] for i in range(len(translator.trans)): t = translator.trans[i] tc = ["%s" % t.attr.name] tv = [] d = t.description() if d[0] == 0: # continuous values = self.bucketize(examples, t.attr, buckets) tc += values descriptors.append((i, -1)) for v in values: proto_example[t.attr] = v tp = translator.extransform(proto_example) tv.append(tp[t.idx]) else: # nominal x = 0 for n in d[2]: if n != '': tc.append(n) descriptors.append((i, x)) x += 1 true_values.append(tv) tcoeff_names.append(tc) return descriptors, tcoeff_names, true_values
def wordnet_glosses(training, testing): stopwords = set(nltk.corpus.stopwords.words()) gloss_dist = training.gloss_map() used_words = [ k for k in gloss_dist.keys() if not k in stopwords and gloss_dist[k] > 2 ] print "words", used_words attributes = [ orange.EnumVariable(a, values=["True", "False"]) for a in used_words ] print "got", len(used_words), "features" domain = orange.Domain(attributes, training.orange_class_var) results = [] for annotation in [training, testing]: table = orange.ExampleTable(domain) results.append(table) for i, (word, label) in enumerate(annotation.data): ancestors = annotation.ancestors(i) ex = orange.Example(domain) ex["class"] = label ex["word"] = word for a_i, a in enumerate(attributes): word_i = used_words[a_i] if word_i in annotation.synset(i).definition: ex[a.name] = "True" else: ex[a.name] = "False" table.append(ex) training_table, testing_table = results return training_table, testing_table
def addNewClassLabel(self): i = 1 while True: newlabel = "Class %i" % i if newlabel not in self.classValuesModel: # self.classValuesModel.append(newlabel) break i += 1 values = list(self.classValuesModel) + [newlabel] newclass = orange.EnumVariable("Class label", values=values) newdomain = orange.Domain(self.graph.data.domain.attributes, newclass) newdata = orange.ExampleTable(newdomain) for ex in self.graph.data: newdata.append( orange.Example(newdomain, [ex[a] for a in ex.domain.attributes] + [str(ex.getclass())])) self.classVariable = newclass self.classValuesModel.wrap(self.classVariable.values) self.graph.data = newdata self.graph.updateGraph() newindex = self.classValuesModel.index(len(self.classValuesModel) - 1) self.classValuesView.selectionModel().select( newindex, QItemSelectionModel.ClearAndSelect) self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
def __call__(self, example, returnType): table = orange.ExampleTable(example.domain) table.append(example) self.radvizWidget.setSubsetData( table) # show the example is we use the widget self.radvizWidget.handleNewSignals() anchorData = self.radvizWidget.graph.anchorData attributeNameIndex = self.radvizWidget.graph.attributeNameIndex scaleFunction = self.radvizWidget.graph.scaleExampleValue attrListIndices = [attributeNameIndex[val[2]] for val in anchorData] attrVals = [scaleFunction(example, index) for index in attrListIndices] table = self.radvizWidget.graph.createProjectionAsExampleTable( attrListIndices, scaleFactor=self.radvizWidget.graph.trueScaleFactor, useAnchorData=1) knn = self.radvizWidget.optimizationDlg.createkNNLearner( kValueFormula=0)(table) [xTest, yTest ] = self.radvizWidget.graph.getProjectedPointPosition(attrListIndices, attrVals, useAnchorData=1) (classVal, prob) = knn(orange.Example(table.domain, [xTest, yTest, "?"]), orange.GetBoth) if returnType == orange.GetBoth: return classVal, prob else: return classVal
def makeExample(self, landmark, figure, **args): features = self.compute(landmark, figure) features['isInsane'] = "False" features['class'] = "" flist = [features[attr.name] for attr in self.engine.domain()] ex = orange.Example(self.engine.domain(), flist) ex["geometry"] = args return ex
def convertTable(table, newDomain): newTable = orange.ExampleTable(newDomain) for ex in table: newex = orange.Example(newDomain, [ex[key] for key in newDomain]) for var in newDomain.getmetas().values(): newex[var.name] = ex[var.name] newTable.append(newex) return newTable
def dataTransform(self, attr1, val1, attr2, val2): example = orange.Example(self.graph.data.domain) example[attr1] = val1 example[attr2] = val2 example.setclass( self.graph.data.domain.classVar( self.graph.data.domain.classVar.baseValue)) self.graph.data.append(example) self.graph.updateGraph(dataInterval=(-1, sys.maxint))
def transformClass(self, classvector): # used for getting the label list r = [] for i in classvector: newc = [0.0] x = orange.Example(orange.Domain([self.cv.attr]), [i]) self.cv.apply(x, newc) r.append(newc[0]) return r
def __make_rule_term_example_table(tableDict, allTerms): import orange import constants as const attrList = [ orange.EnumVariable(name=str(term), values=[const.PRESENT, const.ABSENT]) for term in allTerms ] # three meta attributes ruleName = orange.StringVariable(const.NAME_ATTR) mid = orange.newmetaid() ruleTerms = orange.StringVariable(const.TERMS_ATTR) mid1 = orange.newmetaid() #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR) ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR, startValue=1, endValue=len(tableDict), stepValue=1, numberOfDecimals=0) mid2 = orange.newmetaid() # this is a classless domain domain = orange.Domain(attrList, False) # name of the rule is a meta attribute domain.addmeta(mid, ruleName, False) domain.addmeta(mid1, ruleTerms, False) domain.addmeta(mid2, ruleNumber, False) table = orange.ExampleTable(domain) for k in sorted(tableDict.keys()): exampleValues = [] for (i, term) in enumerate(allTerms): if term in tableDict[k][const.RULETERMS_KEY]: #exampleValues.append(PRESENT) exampleValues.append(orange.Value(attrList[i], const.PRESENT)) else: #exampleValues.append(ABSENT) exampleValues.append(orange.Value(attrList[i], const.ABSENT)) example = orange.Example(domain, exampleValues) #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1] #skip square brackets from the string #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1] #example[SEQ_NUM_ATTR] = k example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][ const.RULENAME_KEY][1:-1]) #skip square brackets from the string example[const.TERMS_ATTR] = orange.Value( ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1]) example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k) table.append(example) #end return table
def makeExample(self, w1, w2): ex = orange.Example(self.domain) ex1 = self.wnparents.makeExample(w1) ex2 = self.wnparents.makeExample(w2) for var in self.wnparents.domain.attributes: ex["%s_w1" % var.name] = ex1[var.name] ex["%s_w2" % var.name] = ex2[var.name] return ex
def data_center(data): """Return the central - average - point in the data set""" atts = data.domain.attributes astats = orange.DomainBasicAttrStat(data) center = [astats[a].avg if a.varType == orange.VarTypes.Continuous \ else max(enumerate(orange.Distribution(a, data)), key=lambda x:x[1])[0] if a.varType == orange.VarTypes.Discrete else None for a in atts] if data.domain.classVar: center.append(0) return orange.Example(data.domain, center)
def as_orange_table(self): domain = orange.Domain([], self.orange_class_var) domain.addmeta(orange.newmetaid(), orange.StringVariable("word")) table = orange.ExampleTable(domain) for word, label in self.data: ex = orange.Example(domain) ex["class"] = label ex["word"] = word table.append(ex) return table
def make_example_nway(geometry, trainer, domain): ex = orange.Example(domain) geometry["landmark"] = geometry["ground"] examples = [ trainer.engineMap[key].makeExample(expectInsane=True, **geometry) for key in trainer.annotationEngines if not len(geometry["figure"]) == 0 ] for engine_ex in examples: for attr in engine_ex.domain: ex[attr.name] = engine_ex[attr.name] return ex
def lookupFromFunction(attribute, bound, function): """ Constructs ClassifierByExampleTable or ClassifierByLookupTable mirroring the given function """ lookup = lookupFromBound(attribute, bound) if lookup: lookup.lookupTable = [orange.Value(attribute, function(attributes)) for attributes in orngMisc.LimitedCounter([len(attr.values) for attr in bound])] return lookup else: examples = orange.ExampleTable(orange.Domain(bound, attribute)) for attributes in orngMisc.LimitedCounter([len(attr.values) for attr in dom.attributes]): examples.append(orange.Example(dom, attributes + [function(attributes)])) return orange.LookupLearner(examples)
def dataTransform(self, attr1, x, rx, attr2, y, ry): import random new = [] for i in range(self.density): ex = orange.Example(self.graph.data.domain) ex[attr1] = random.normalvariate(x, rx) ex[attr2] = random.normalvariate(y, ry) ex.setclass( self.graph.data.domain.classVar( self.graph.data.domain.classVar.baseValue)) new.append(ex) self.graph.data.extend(new) self.graph.updateGraph(dataInterval=(-len(new), sys.maxint))
def drive(self): # if self.generating_tree: # return # if self.picked_up: # self.generating_tree = True # data_subset = self.data.select(orange.MakeRandomIndices2(self.data, 0.005), 0) # self.tree = orngTree.TreeLearner(data_subset) # self.generating_tree = False current_observation = \ orange.Example(self.data.domain, \ [self.last_tag_seen, self.tag_visible, \ self.tag_x_coord, self.tag_distance, \ self.bumping, '?']) command = self.tree(current_observation).value twist = Twist() if self.picked_up: twist.linear.x = 0.0 twist.angular.z = 0.0 elif command == 'f': twist.linear.x = 0.25 twist.angular.z = 0.0 self.stopped = False elif command == 'fr': twist.linear.x = 0.25 twist.angular.z = -1.0 self.stopped = False elif command == 'fl': twist.linear.x = 0.25 twist.angular.z = 1.0 self.stopped = False elif command == 'r': twist.linear.x = 0.0 twist.angular.z = -1.0 self.stopped = False elif command == 'l': twist.linear.x = 0.0 twist.angular.z = 1.0 self.stopped = False elif command == 'b': twist.linear.x = -0.25 twist.angular.z = 0.0 self.stopped = False elif command == 's': twist.linear.x = 0.0 twist.angular.z = 0.0 self.stopped = True self.pub.publish(twist)
def makeExample(self, expectInsane=False, **iArgs): #args = filterQgis(iArgs) args = iArgs try: features, drawMap = self._flist.computeAndVisualize(**args) features['isInsane'] = "False" lst = [] for attr in self._domain: if attr.name == "class": lst.append("") else: lst.append(features[attr.name]) ex = orange.Example(self._domain, lst) ex['drawMap'] = drawMap except InsaneExample, e: e.exArgs = iArgs if expectInsane: ex = orange.Example(self._domain) ex['isInsane'] = "True" else: raise
def test_indexing(self): e = orange.Example(self.contdomain) self.assertEqual(len(e), 5) for i in range(5): e[i] = i for i in range(5): self.assertEqual(e[i], i) self.assertEqual(e.getclass(), 4) e.setclass(42) self.assertEqual(e.getclass(), 42) vals = ["ana"]*3+["berta", "cilka"] e = orange.Example(self.discdomain) for i in range(5): e[i] = vals[i] for i in range(5): self.assertEqual(e[i], vals[i]) e.setclass("ana") self.assertEqual(e.getclass(), "ana") e.setclass("cilka") self.assertEqual(e[0], "ana") self.assertEqual(e[3], "berta") self.assertEqual(e[4], "cilka") self.assertEqual(e[-1], "cilka") self.assertEqual(e["A"], "ana") self.assertEqual(e["D"], "berta") self.assertEqual(e["E"], "cilka") self.assertEqual(e[self.discvars[0]], "ana") self.assertEqual(e[self.discvars[3]], "berta") self.assertEqual(e[self.discvars[4]], "cilka") self.assertEqual(e[self.domain["A"]], "ana") self.assertEqual(e[self.domain["D"]], "berta") self.assertEqual(e[self.domain["E"]], "cilka")
def merge(self, dataA, dataB, varA, varB): """ Merge two tables """ val2idx = dict([(e[varB].native(), i) for i, e in reversed(list(enumerate(dataB)))]) for key in ["?", "~", ""]: if key in val2idx: val2idx.pop(key) metasA = dataA.domain.getmetas().items() metasB = dataB.domain.getmetas().items() includedAttsB = [ attrB for attrB in dataB.domain if attrB not in dataA.domain ] includedMetaB = [(mid, meta) for mid, meta in metasB if (mid, meta) not in metasA] includedClassVarB = dataB.domain.classVar and dataB.domain.classVar not in dataA.domain reducedDomainB = orange.Domain(includedAttsB, includedClassVarB) reducedDomainB.addmetas(dict(includedMetaB)) mergingB = orange.ExampleTable(reducedDomainB) for ex in dataA: ind = val2idx.get(ex[varA].native(), None) if ind is not None: mergingB.append(orange.Example(reducedDomainB, dataB[ind])) else: mergingB.append( orange.Example(reducedDomainB, ["?"] * len(reducedDomainB))) return orange.ExampleTable([dataA, mergingB])
def test_append2(self): d = orange.ExampleTable("iris") d.shuffle() l1 = len(d) d.append([1, 2, 3, 4, 0]) self.assertEqual(len(d), l1 + 1) self.assertEqual(d[-1], [1, 2, 3, 4, 0]) x = orange.Example(d[10]) d.append(x) self.assertEqual(d[-1], d[10]) x = d[:50] x.append(d[50]) self.assertEqual(x[50], d[50])