Пример #1
0
 def __init__(self, training):
     self.training = training
     self.wnparents = trainer.WordnetParentsEngine(training)
     labels = ["Larger", "Smaller", "Equal", "None"]
     self.cls_variable = orange.EnumVariable("class", values=labels)
     
     alist = []
     for var in self.wnparents.domain.attributes:
         if isinstance(var, orange.FloatVariable):
             v1 = orange.FloatVariable(name="%s_w1" % var.name)
             v2 = orange.FloatVariable(name="%s_w2" % var.name)
             alist.append(v1)
             alist.append(v2)
         elif isinstance(var, orange.EnumVariable):
             v1 = orange.EnumVariable(name="%s_w1" % var.name, 
                                      values=var.values)
             v2 = orange.EnumVariable(name="%s_w2" % var.name, 
                                      values=var.values)
             alist.append(v1)
             alist.append(v2)
         else:
             raise ValueError("Unhandled attribute: " + `var`)
         
     self.domain = orange.Domain(alist,
                                 self.cls_variable)
     self.training_table = self.makeTable(self.training)
Пример #2
0
def bench_orange(X, y, T, valid):
#
#       .. Orange ..
#
    import orange
    start = datetime.now()

    # prepare data in Orange's format
    columns = []
    for i in range(0, X.shape[1]):
        columns.append("a" + str(i))
    [orange.EnumVariable(x) for x in columns]
    classValues = ['0', '1']

    domain = orange.Domain(map(orange.FloatVariable, columns),
                   orange.EnumVariable("class", values=classValues))
    y.shape = (len(y), 1) #reshape for Orange
    y[np.where(y < 0)] = 0 # change class labels to 0..K
    orng_train_data = orange.ExampleTable(domain, np.hstack((X, y)))

    valid.shape = (len(valid), 1)  #reshape for Orange
    valid[np.where(valid < 0)] = 0 # change class labels to 0..K
    orng_test_data = orange.ExampleTable(domain, np.hstack((T, valid)))

    learner = orange.SVMLearner(orng_train_data, \
                                svm_type=orange.SVMLearner.Nu_SVC, \
                                kernel_type=orange.SVMLearner.RBF, C=1., \
                                gamma=1. / sigma)

    pred = np.empty(T.shape[0], dtype=np.int32)
    for i, e in enumerate(orng_test_data):
        pred[i] = learner(e)

    score = np.mean(pred == valid)
    return score, datetime.now() - start
Пример #3
0
    def get_domain(self):
        if (self.domain != None):
            return self.domain

        values = ["0", "1"]
        mynames = self.known_objects
        attributes = [
            orange.EnumVariable(mynames[i], values=values)
            for i in range(len(mynames))
        ]
        classattr = orange.EnumVariable("classname", values=["-1", "0", "1"])
        self.domain = orange.Domain(attributes + [classattr])

        return self.domain
Пример #4
0
def __makeExampleTable(namesDict, data):
    import orange
    from constants import CLASS_ATRR_NAME, CONTROL_GROUP_KEY, DATA_GROUP_KEY

    geneIDs = sorted(data.keys())
    attrList = [orange.FloatVariable(name=str(geneID)) for geneID in geneIDs]
    classAttr = orange.EnumVariable(name=CLASS_ATRR_NAME,
                                    values=[CONTROL_GROUP_KEY, DATA_GROUP_KEY])
    domain = orange.Domain(attrList, classAttr)
    table = orange.ExampleTable(domain)

    # first half: group 1
    for attrName in namesDict[CONTROL_GROUP_KEY].keys():
        exampleValues = [
            data[geneID][CONTROL_GROUP_KEY][attrName] for geneID in geneIDs
        ] + [CONTROL_GROUP_KEY]
        example = orange.Example(domain, exampleValues)
        table.append(example)

    # second half: group 2
    for attrName in namesDict[DATA_GROUP_KEY].keys():
        exampleValues = [
            data[geneID][DATA_GROUP_KEY][attrName] for geneID in geneIDs
        ] + [DATA_GROUP_KEY]
        example = orange.Example(domain, exampleValues)
        table.append(example)

    return table
Пример #5
0
def wordnet_meronyms(training, testing):

    ancestor_to_count = training.meronym_ancestor_map()

    all_ancestors = list(ancestor_to_count.keys())
    all_ancestors.sort(key=lambda a: ancestor_to_count[a], reverse=True)

    used_ancestors = all_ancestors
    print "name", used_ancestors[0].name
    attributes = [
        orange.EnumVariable(a.name, values=["True", "False"])
        for a in used_ancestors
    ]
    print "got", len(used_ancestors), "features"
    domain = orange.Domain(attributes, training.orange_class_var)

    results = []
    for annotation in [training, testing]:
        table = orange.ExampleTable(domain)
        results.append(table)
        for i, (word, label) in enumerate(annotation.data):
            ancestors = annotation.ancestors(i)
            ex = orange.Example(domain)
            ex["class"] = label
            for a_i, a in enumerate(attributes):
                ancestor_i = used_ancestors[a_i]
                if ancestor_i in ancestors:
                    ex[a.name] = "True"
                else:
                    ex[a.name] = "False"
            table.append(ex)

    training_table, testing_table = results
    return training_table, testing_table
Пример #6
0
    def addNewClassLabel(self):
        i = 1
        while True:
            newlabel = "Class %i" % i
            if newlabel not in self.classValuesModel:
                #                self.classValuesModel.append(newlabel)
                break
            i += 1
        values = list(self.classValuesModel) + [newlabel]
        newclass = orange.EnumVariable("Class label", values=values)
        newdomain = orange.Domain(self.graph.data.domain.attributes, newclass)
        newdata = orange.ExampleTable(newdomain)
        for ex in self.graph.data:
            newdata.append(
                orange.Example(newdomain,
                               [ex[a] for a in ex.domain.attributes] +
                               [str(ex.getclass())]))

        self.classVariable = newclass
        self.classValuesModel.wrap(self.classVariable.values)

        self.graph.data = newdata
        self.graph.updateGraph()

        newindex = self.classValuesModel.index(len(self.classValuesModel) - 1)
        self.classValuesView.selectionModel().select(
            newindex, QItemSelectionModel.ClearAndSelect)

        self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
Пример #7
0
    def sendData(self, km=None):
        if km is None:
            km = self.bestRun[1] if self.optimized else self.km
        if not self.data or not km:
            self.send("Examples", None)
            self.send("Centroids", None)
            return
        clustVar = orange.EnumVariable(
            self.classifyName, values=["C%d" % (x + 1) for x in range(km.k)])

        origDomain = self.data.domain
        if self.addIdAs == 0:
            domain = orange.Domain(origDomain.attributes, clustVar)
            if origDomain.classVar:
                domain.addmeta(orange.newmetaid(), origDomain.classVar)
            aid = -1
        elif self.addIdAs == 1:
            domain = orange.Domain(origDomain.attributes + [clustVar],
                                   origDomain.classVar)
            aid = len(origDomain.attributes)
        else:
            domain = orange.Domain(origDomain.attributes, origDomain.classVar)
            aid = orange.newmetaid()
            domain.addmeta(aid, clustVar)

        domain.addmetas(origDomain.getmetas())

        # construct a new data set, with a class as assigned by k-means clustering
        new = orange.ExampleTable(domain, self.data)
        for ex, midx in izip(new, km.clusters):
            ex[aid] = midx

        self.send("Examples", new)
        self.send("Centroids", orange.ExampleTable(km.centroids))
Пример #8
0
    def removeSelectedClassLabel(self):
        index = self.selectedClassLabelIndex()
        if index is not None and len(self.classValuesModel) > 1:
            label = self.classValuesModel[index]
            examples = [
                ex for ex in self.graph.data if str(ex.getclass()) != label
            ]

            values = [val for val in self.classValuesModel if val != label]
            newclass = orange.EnumVariable("Class label", values=values)
            newdomain = orange.Domain(self.graph.data.domain.attributes,
                                      newclass)
            newdata = orange.ExampleTable(newdomain)
            for ex in examples:
                if ex[self.classVariable] != label and ex[
                        self.classVariable] in values:
                    newdata.append(
                        orange.Example(newdomain,
                                       [ex[a] for a in ex.domain.attributes] +
                                       [str(ex.getclass())]))

            self.classVariable = newclass
            self.classValuesModel.wrap(self.classVariable.values)

            self.graph.data = newdata
            self.graph.updateGraph()

            newindex = self.classValuesModel.index(max(0, index - 1))
            self.classValuesView.selectionModel().select(
                newindex, QItemSelectionModel.ClearAndSelect)

            self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
Пример #9
0
def wordnet_glosses(training, testing):
    stopwords = set(nltk.corpus.stopwords.words())
    gloss_dist = training.gloss_map()
    used_words = [
        k for k in gloss_dist.keys()
        if not k in stopwords and gloss_dist[k] > 2
    ]

    print "words", used_words

    attributes = [
        orange.EnumVariable(a, values=["True", "False"]) for a in used_words
    ]
    print "got", len(used_words), "features"
    domain = orange.Domain(attributes, training.orange_class_var)

    results = []
    for annotation in [training, testing]:
        table = orange.ExampleTable(domain)
        results.append(table)
        for i, (word, label) in enumerate(annotation.data):
            ancestors = annotation.ancestors(i)
            ex = orange.Example(domain)
            ex["class"] = label
            ex["word"] = word
            for a_i, a in enumerate(attributes):
                word_i = used_words[a_i]
                if word_i in annotation.synset(i).definition:
                    ex[a.name] = "True"
                else:
                    ex[a.name] = "False"
            table.append(ex)

    training_table, testing_table = results
    return training_table, testing_table
Пример #10
0
def cforange_hierarchical_clustering_finished(postdata, input_dict,
                                              output_dict):
    import json
    import orange
    matrix = input_dict['dm']
    linkage = int(input_dict['linkage'])
    widget_pk = postdata['widget_id'][0]
    try:
        selected_nodes = json.loads(postdata.get('selected_nodes')[0])
    except:
        raise Exception('Please select a threshold for determining clusters.')
    if isinstance(matrix.items, orange.ExampleTable):
        root = Clustering.hierarchical_clustering(linkage, matrix)
        cluster_ids = set([cluster for _, _, cluster in selected_nodes])
        selected_clusters = set(
            [cluster for _, selected, cluster in selected_nodes if selected])
        clustVar = orange.EnumVariable(
            str('Cluster'),
            values=["Cluster %d" % i for i in cluster_ids] + ["Other"])
        origDomain = matrix.items.domain
        domain = orange.Domain(origDomain.attributes, origDomain.classVar)
        domain.addmeta(orange.newmetaid(), clustVar)
        domain.addmetas(origDomain.getmetas())
        # Build table with selected clusters
        selected_table, unselected_table = orange.ExampleTable(
            domain), orange.ExampleTable(domain)
        for id, selected, cluster in selected_nodes:
            new_ex = orange.Example(domain, matrix.items[id])
            if selected:
                new_ex[clustVar] = clustVar("Cluster %d" % cluster)
                selected_table.append(new_ex)
            else:
                new_ex[clustVar] = clustVar("Other")
                unselected_table.append(new_ex)
        # Build table of centroids
        centroids = orange.ExampleTable(selected_table.domain)
        if len(selected_table) > 0:
            for cluster in sorted(selected_clusters):
                clusterEx = orange.ExampleTable([
                    ex for ex in selected_table
                    if ex[clustVar] == "Cluster %d" % cluster
                ])
                # Attribute statistics
                contstat = orange.DomainBasicAttrStat(clusterEx)
                discstat = orange.DomainDistributions(clusterEx, 0, 0, 1)
                ex = [
                    cs.avg if cs else (ds.modus() if ds else "?")
                    for cs, ds in zip(contstat, discstat)
                ]
                example = orange.Example(centroids.domain, ex)
                example[clustVar] = clustVar("Cluster %d" % cluster)
                centroids.append(example)
    else:  # Attribute distance
        centroids, selected_table, unselected_table = None, None, None
    return {
        'centroids': centroids,
        'selected_examples': selected_table,
        'unselected_examples': unselected_table
    }
Пример #11
0
 def __init__(self, var1, var2):
     self.var1 = var1
     self.var2 = var2
     self.noValues2 = len(var2.values)
     self.classVar = orange.EnumVariable("%sx%s" % (var1.name, var2.name))
     self.classVar.values = [
         "%s-%s" % (v1, v2) for v1 in var1.values for v2 in var2.values
     ]
Пример #12
0
def make_orange_dataset(X, y, n_classes):
    classes = [str(c) for c in range(n_classes)]
    columns = ["feature_%d" % i for i in range(X.shape[1])]
    input_vars = map(orange.FloatVariable, tuple(columns))
    class_var = orange.EnumVariable("y", values=classes)
    domain = orange.Domain(input_vars, class_var)
    examples = np.hstack((X, y.reshape(-1, 1)))
    return orange.ExampleTable(domain, examples)
Пример #13
0
    def to_network(self, terms=None):
        """
        Return an Orange.network.Network instance constructed from
        this ontology.

        """
        edge_types = self.edge_types()
        terms = self.terms()
        from Orange.orng import orngNetwork
        import orange

        network = orngNetwork.Network(len(terms), True, len(edge_types))
        network.objects = dict([(term.id, i) for i, term in enumerate(terms)])

        edges = defaultdict(set)
        for term in self.terms():
            related = self.related_terms(term)
            for relType, relTerm in related:
                edges[(term.id, relTerm)].add(relType)

        edgeitems = edges.items()
        for (src, dst), eTypes in edgeitems:
            network[src, dst] = [1 if e in eTypes else 0 for e in edge_types]

        domain = orange.Domain([
            orange.StringVariable("id"),
            orange.StringVariable("name"),
            orange.StringVariable("def"),
        ], False)

        items = orange.ExampleTable(domain)
        for term in terms:
            ex = orange.Example(
                domain, [term.id, term.name,
                         term.values.get("def", [""])[0]])
            items.append(ex)

        relationships = set(
            [", ".join(sorted(eTypes)) for (_, _), eTypes in edgeitems])
        domain = orange.Domain([
            orange.FloatVariable("u"),
            orange.FloatVariable("v"),
            orange.EnumVariable("relationship", values=list(edge_types))
        ], False)

        id2index = dict([(term.id, i + 1) for i, term in enumerate(terms)])
        links = orange.ExampleTable(domain)
        for (src, dst), eTypes in edgeitems:
            ex = orange.Example(domain,
                                [id2index[src], id2index[dst],
                                 eTypes.pop()])
            links.append(ex)

        network.items = items
        network.links = links
        network.optimization = None
        return network
Пример #14
0
def createClassVar(attributes, MQCNotation=False):
    import orngMisc
    if MQCNotation:
        return orange.EnumVariable(
            "Q",
            values=[
                "%s(%s)" % ("".join(["+-"[x] for x in v if x < 2]), ", ".join(
                    [attr for attr, x in zip(attributes, v) if x < 2]))
                for v in orngMisc.LimitedCounter([3] * len(attributes))
            ])
    else:
        return orange.EnumVariable(
            "Q",
            values=[
                "Q(%s)" % ", ".join([
                    "+-"[x] + attr for attr, x in zip(attributes, v) if x < 2
                ]) for v in orngMisc.LimitedCounter([3] * len(attributes))
            ])
Пример #15
0
def __make_rule_term_example_table(tableDict, allTerms):
    import orange
    import constants as const

    attrList = [
        orange.EnumVariable(name=str(term),
                            values=[const.PRESENT, const.ABSENT])
        for term in allTerms
    ]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR,
                                      startValue=1,
                                      endValue=len(tableDict),
                                      stepValue=1,
                                      numberOfDecimals=0)
    mid2 = orange.newmetaid()

    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i, term) in enumerate(allTerms):
            if term in tableDict[k][const.RULETERMS_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                #exampleValues.append(ABSENT)
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
        example = orange.Example(domain, exampleValues)
        #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1]    #skip square brackets from the string
        #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1]
        #example[SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][
            const.RULENAME_KEY][1:-1])  #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(
            ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
Пример #16
0
def addDummyClass(data):

    print "********************data.domain.classVar*****************"
    print data.domain.classVar
    if not data.domain.classVar:
        newAttr = orange.EnumVariable("dummyClass", values["dummyClass"])
        newDomain = orange.domain(data.domain.attributes, newAttr)
        newData = dataUtilities.DataTable(newDomain, data)
        data = newData
    return data
Пример #17
0
def makeDomain(names):
    attributes = [orange.FloatVariable(n) for n in names]
    domain = orange.Domain(
        attributes, orange.EnumVariable("class", values=["True", "False"]))
    domain.addmeta(orange.newmetaid(), orange.FloatVariable("weight"))

    domain.addmeta(orange.newmetaid(),
                   orange.EnumVariable("isInsane", values=["True", "False"]))

    domain.addmeta(orange.newmetaid(), orange.StringVariable("filename"))
    domain.addmeta(orange.newmetaid(),
                   orange.StringVariable("sourceEngineName"))
    domain.addmeta(orange.newmetaid(), orange.StringVariable("engineName"))
    domain.addmeta(orange.newmetaid(), orange.StringVariable("landmarkName"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("geometry"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("track"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("drawMap"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("description"))
    domain.addmeta(orange.newmetaid(), orange.PythonVariable("farAway"))

    return domain
Пример #18
0
    def __call__(self, examples, weight=0):
        if examples.domain.classVar.varType != 1:
            raise "MultiClassLearner only works with discrete class"

        # simple handling for simple 2-class problems
        if len(examples.domain.classVar.values) <= 2:
            if weight != 0:
                return self.learner(examples, weight)
            else:
                return self.learner(examples)

        # count the classes and generate the classifier matrix
        nc = len(examples.domain.classVar.values)
        nv = len(examples.domain.attributes)
        template = self.matrix(nc)

        # prepare the domain, and the new binary class
        bin = orange.EnumVariable(name="binary", values=['0', '1'])
        b0 = bin(0)
        b1 = bin(1)
        nd = orange.Domain(examples.domain.attributes + [bin])

        # generate all classifiers
        cm = []
        for i in template:
            exs = orange.ExampleTable(nd)
            if weight != 0:
                exs.addMetaAttribute(1)
            for j in examples:
                if i[int(j.getclass())] == 1:
                    r = [j[x] for x in range(nv)]
                    r.append(b1)
                    x = orange.Example(nd, r)
                    if weight != 0:
                        x.setmeta(j.getMetaAttribute(weight), 1)
                    exs.append(x)
                else:
                    if i[int(j.getclass())] == -1:
                        r = [j[x] for x in range(nv)]
                        r.append(b0)
                        x = orange.Example(nd, r)
                        if weight != 0:
                            x.setmeta(j.getMetaAttribute(weight), 1)
                        exs.append(x)
            # prepare the classifier
            if len(exs) <= 0:
                raise "MultiClass: More than one of the declared class values do not appear in the data. Filter them out."
            if weight != 0:
                c = self.learner(exs, weight=1)
            else:
                c = self.learner(exs)
            cm.append((c, len(exs)))
        return self.pestimator(cm, template, examples.domain)
    def get_domain_obs(self):
        #if(self.domain != None):
        #    return self.domain

        attributes = [
            orange.FloatVariable(name) for name in self.dataset.obs_alphabet
        ]

        alp = [str(s) for s in self.dataset.label_alphabet]
        classattr = orange.EnumVariable("classname", values=alp)
        domain = orange.Domain(attributes + [classattr])

        return domain
    def get_domain_trans(self):
        #if(self.domain != None):
        #    return self.domain

        attributes = [
            orange.FloatVariable(name) for name in self.dataset.trans_alphabet
        ]

        alp = [str(i) for i in range(len(self.dataset.label_alphabet)**2)]
        classattr = orange.EnumVariable("classname", values=alp)
        domain = orange.Domain(attributes + [classattr])

        return domain
Пример #21
0
    def sendpredictions(self):
        if not self.data or not self.outvar:
            self.send("Predictions", None)
            return

        # predictions, data set with class predictions
        classification = self.outvar.varType == orange.VarTypes.Discrete

        metas = []
        if classification:
            if len(self.selectedClasses):
                for c in self.predictors.values():
                    m = [orange.FloatVariable(name=str("%s(%s)" % (c.name, str(self.outvar.values[i]))),
                                              getValueFrom = lambda ex, rw, cindx=i, c=c: orange.Value(c(ex, c.GetProbabilities)[cindx])) \
                         for i in self.selectedClasses]
                    metas.extend(m)
            if self.showClass:
                mc = [
                    orange.EnumVariable(
                        name=str(c.name),
                        values=self.outvar.values,
                        getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                    for c in self.predictors.values()
                ]
                metas.extend(mc)
        else:
            # regression
            mc = [
                orange.FloatVariable(
                    name="%s" % c.name,
                    getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                for c in self.predictors.values()
            ]
            metas.extend(mc)

        classVar = self.outvar
        domain = orange.Domain(self.data.domain.attributes + [classVar])
        domain.addmetas(self.data.domain.getmetas())
        for m in metas:
            domain.addmeta(orange.newmetaid(), m)
        predictions = orange.ExampleTable(domain, self.data)
        if self.doPrediction:
            c = self.predictors.values()[0]
            for ex in predictions:
                ex[classVar] = c(ex)

        predictions.name = self.data.name
        self.send("Predictions", predictions)

        self.changedFlag = False
Пример #22
0
def cforange_attribute_distance(input_dict):
    import orange
    import orngInteract
    inputdata = input_dict['dataset']
    discretizedData = None
    classInteractions = int(input_dict['classInteractions'])
    atts = inputdata.domain.attributes
    if len(atts) < 2:
        return None
    matrix = orange.SymMatrix(len(atts))
    matrix.setattr('items', atts)
    if classInteractions < 3:
        if inputdata.domain.hasContinuousAttributes():
            if discretizedData is None:
                try:
                    discretizedData = orange.Preprocessor_discretize(
                        inputdata,
                        method=orange.EquiNDiscretization(numberOfIntervals=4))
                except orange.KernelException, ex:
                    return None
            data = discretizedData
        else:
            data = inputdata

        # This is ugly (no shit)
        if not data.domain.classVar:
            if classInteractions == 0:
                classedDomain = orange.Domain(
                    data.domain.attributes,
                    orange.EnumVariable("foo", values=["0", "1"]))
                data = orange.ExampleTable(classedDomain, data)
            else:
                return None

        im = orngInteract.InteractionMatrix(data, dependencies_too=1)
        off = 1
        if classInteractions == 0:
            diss, labels = im.exportChi2Matrix()
            off = 0
        elif classInteractions == 1:
            (diss, labels) = im.depExportDissimilarityMatrix(
                jaccard=1)  # 2-interactions
        else:
            (diss, labels) = im.exportDissimilarityMatrix(
                jaccard=1)  # 3-interactions

        for i in range(len(atts) - off):
            for j in range(i + 1):
                matrix[i + off, j] = diss[i][j]
Пример #23
0
    def makeDomain(self):
        attributes = [orange.FloatVariable(n) for n in self.features.names]

        attributes.append(
            orange.EnumVariable("isInsane", values=["True", "False"]))
        domain = orange.Domain(
            attributes,
            # orange broke when there were two enume variables
            # with the same name but different values.
            # the one in spatial relations land is called
            # "class" with three values ("bad tracking").
            # it was something to do with pickling and unpickling
            # and importing - anyway I fixed it by renaming the
            # class attribute. -- stefie10, 1/13/2009
            orange.EnumVariable("verbclass", values=["True", "False"]))

        domain.addmeta(orange.newmetaid(), orange.PythonVariable("drawMap"))
        domain.addmeta(orange.newmetaid(), orange.PythonVariable("entry"))
        domain.addmeta(orange.newmetaid(), orange.PythonVariable("situation"))
        domain.addmeta(orange.newmetaid(), orange.PythonVariable("engine"))
        domain.addmeta(orange.newmetaid(),
                       orange.PythonVariable("description"))
        domain.addmeta(orange.newmetaid(), orange.PythonVariable("exceptions"))
        return domain
Пример #24
0
    def __call__(self, table, bound, weight=0):
        bound = [table.domain[a] for a in bound]
        newattr = orange.EnumVariable(
            reduce(lambda x, y: x + "-" + y, [a.name for a in bound]),
            values=["r%i" % i for i in range(self.n)])
        if not len(bound):
            raise AttributeError, "no bound attributes"

        newattr.getValueFrom = orngLookup.lookupFromBound(
            newattr, [table.domain[x] for x in bound])
        lookupTable = newattr.getValueFrom.lookupTable = [
            random.randint(0, self.n - 1)
            for i in newattr.getValueFrom.lookupTable
        ]

        return newattr, random.randint(0, 100)
def add_class_noise(data, noise_level, rnd_seed):
    """adds class Noise

    :param data: Orange dataset
    :param noise_level:
    :param rnd_seed:
    :return:
    """

    meta_noisy = orange.EnumVariable("noise", values=["no", "yes"])
    mid = orange.newmetaid()
    while mid in data.domain.getmetas().keys():
        mid = orange.newmetaid()
    data.domain.addmeta(mid, meta_noisy)
    data.addMetaAttribute("noise", "no")
    # Generate random indices for noise insertion
    percent = float(noise_level) / 100
    try:
        rnds = int(rnd_seed)
    except:
        rnds = 0
    print "Random Seed:", rnds
    orange.setrandseed(rnds)
    noise_indices = random.sample(range(len(data)),
                                  int(round(percent * len(data))))
    #print "Amount of added noise:", percent*100, "percent (", len(noise_indices), "examples ):"
    #print "Random indices for added noise:", noise_indices
    className = data.domain.classVar.name
    #print "Class name:", className
    for index in noise_indices:
        data[index]["noise"] = "yes"
        temp = data[index][className]
        ##        if len(data.domain.classVar.values) > 2:
        # random value + check if it is diferent from the current one
        new_label = data.domain.classVar.randomvalue()
        while new_label == temp:
            new_label = data.domain.classVar.randomvalue()
        data[index][className] = new_label


##        else:
##            # switch the class value
##            data[index][className] = data.domain.classVar.nextvalue(data[index][className])
#print "\t", temp, "changed to:", data[index].getclass(), "(", index, ")"
#print "\n"
    noise_indices.sort()
    return noise_indices, data
Пример #26
0
  def __loadDataFromES(self, dataType, domain):
    table = None
    if dataType != "train":
      table = orange.ExampleTable(domain)
    else:
      attributes = map(self.__getOrangeVariableForFeature, self.features)
      classAttribute = orange.EnumVariable("is_good", values = ["0", "1"])
      domain = orange.Domain(attributes, classAttribute)
      domain.addmeta(orange.newmetaid(), orange.StringVariable("phrase"))
      table = orange.ExampleTable(domain)
    phrases = []
    if dataType == "train":
      phrasesCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}})
      size = phrasesCount["count"]
      phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}, size=size)
      phrases = phrases["hits"]["hits"]
    elif dataType == "holdout":
      phraseCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}})
      size = phrasesCount["count"]
      phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}, size=size)
      phrases = phrases["hits"]["hits"]
    else:
      self.phraseData = self.esClient.get(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId)
      phrases = [self.phraseData]

    for row in phrases:
      try:
        row = row["_source"]
        featureValues = []
        classType = "?"
        for feature in self.features:
          featureValues.append(row["features"][feature["name"]].encode("ascii"))
        if dataType == "train":
          classType = row["is_training"].encode("ascii", "ignore")
        elif dataType == "holdout":
          classType = row["is_holdout"].encode("ascii")
        example = None
        for i,featureValue in enumerate(featureValues):
          attr = domain.attributes[i]
          if type(attr) is orange.EnumVariable: 
            attr.addValue(featureValue)
        example = orange.Example(domain, (featureValues + [classType]))
        example[domain.getmetas().items()[0][0]] = row["phrase"].encode("ascii")
        table.append(example)
      except:
        self.logger.error("Error classifying phrase '" + row["phrase"] + "'")
    return table
Пример #27
0
    def getClabDescSignList(self, smiles, getMolFile=False):
        # Create an Orange ExampleTable with a smiles attribute
        smilesAttr = orange.EnumVariable("SMILEStoPred", values=[smiles])
        myDomain = orange.Domain([smilesAttr], 0)
        smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
        #    Calculate descriptors defined in the model files
        try:
            descList = self.model.varNames
        except:  # Consensus object different
            attributes = self.model.domain.variables
            descList = []
            for attr in attributes:
                descList.append(attr.name)
        #    Determine Signature and non-Signature descriptor names
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
            descList)
        #    Signatures
        if "sign" in DescMethodsAvailable and signatureHeight:
            print "Calculating signatures..."
            preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
            startHeight = 0  # Not used desc ignored in model prediction
            endHeight = signatureHeight
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                smilesData,
                startHeight,
                endHeight,
                preCalcData,
                returnAtomID=True)
        else:
            cmpdSignList = [[]]
            sdfStr = ""
        if not getMolFile:
            return (clabDesc, cmpdSignList[0])
        elif not sdfStr:
            return (clabDesc, cmpdSignList[0], "", "")
        # create a mol file
        molFile = miscUtilities.generateUniqueFile(desc="NN", ext="mol")
        file = open(molFile, "w")
        molStr = ""
        for line in sdfStr[0]:
            if "$$$$" in line:
                break
            molStr += line
            file.write(line)
        file.close()

        return (clabDesc, cmpdSignList[0], molFile, molStr)
Пример #28
0
        def createLogRegExampleTable(data, weightID):
            finalData = orange.ExampleTable(data)
            origData = orange.ExampleTable(data)
            for at in data.domain.attributes:
                # za vsak atribut kreiraj nov newExampleTable newData
                # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable
                if at.varType == orange.VarTypes.Continuous:
                    atDisc = orange.FloatVariable(at.name + "Disc")
                    newDomain = orange.Domain(origData.domain.attributes +
                                              [atDisc, data.domain.classVar])
                    newDomain.addmetas(newData.domain.getmetas())
                    finalData = orange.ExampleTable(newDomain, finalData)
                    newData = orange.ExampleTable(newDomain, origData)
                    origData = orange.ExampleTable(newDomain, origData)
                    for d in origData:
                        d[atDisc] = 0
                    for d in finalData:
                        d[atDisc] = 0
                    for i, d in enumerate(newData):
                        d[atDisc] = 1
                        d[at] = 0
                        d[weightID] = 100 * data[i][weightID]

                elif at.varType == orange.VarTypes.Discrete:
                    # v dataOrig, dataFinal in newData atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X"
                    atNew = orange.EnumVariable(at.name,
                                                values=at.values +
                                                [at.name + "X"])
                    newDomain = orange.Domain(
                        filter(lambda x: x != at, origData.domain.attributes) +
                        [atNew, origData.domain.classVar])
                    newDomain.addmetas(origData.domain.getmetas())
                    temp_finalData = orange.ExampleTable(finalData)
                    finalData = orange.ExampleTable(newDomain, finalData)
                    newData = orange.ExampleTable(newDomain, origData)
                    temp_origData = orange.ExampleTable(origData)
                    origData = orange.ExampleTable(newDomain, origData)
                    for i, d in enumerate(origData):
                        d[atNew] = temp_origData[i][at]
                    for i, d in enumerate(finalData):
                        d[atNew] = temp_finalData[i][at]
                    for i, d in enumerate(newData):
                        d[atNew] = at.name + "X"
                        d[weightID] = 10 * data[i][weightID]
                finalData.extend(newData)
            return finalData
Пример #29
0
    def sortAttrValues(self, attr, interattr=None):
        if not interattr:
            interattr = attr

        newvalues = list(interattr.values)
        newvalues.sort()
        if newvalues == list(interattr.values):
            return interattr

        newattr = orange.EnumVariable(interattr.name, values=newvalues)
        newattr.getValueFrom = orange.ClassifierByLookupTable(newattr, attr)
        lookupTable = newattr.getValueFrom.lookupTable
        distributions = newattr.getValueFrom.distributions
        for val in interattr.values:
            idx = attr.values.index(val)
            lookupTable[idx] = val
            distributions[idx][newvalues.index(val)] += 1
        return newattr
Пример #30
0
def mergeClassValues(data, value):
    selection = orange.EnumVariable("Selection", values=["0", "1"])

    selectedClassesStr = [value]
    nonSelectedClassesStr = []
    for val in data.domain.classVar.values:
        if val not in selectedClassesStr: nonSelectedClassesStr.append(val)

    shortData1 = data.select({data.domain.classVar.name: selectedClassesStr})
    shortData2 = data.select(
        {data.domain.classVar.name: nonSelectedClassesStr})
    d1 = orange.Domain(shortData1.domain.attributes + [selection])
    selection.getValueFrom = lambda ex, what: orange.Value(selection, "0")
    data1 = orange.ExampleTable(d1, shortData1)

    selection.getValueFrom = lambda ex, what: orange.Value(selection, "1")
    data2 = orange.ExampleTable(d1, shortData2)
    data1.extend(data2)
    return data1