示例#1
0
def __make_rule_term_example_table(tableDict, allTerms):
    import orange
    import constants as const

    attrList = [
        orange.EnumVariable(name=str(term),
                            values=[const.PRESENT, const.ABSENT])
        for term in allTerms
    ]

    # three meta attributes
    ruleName = orange.StringVariable(const.NAME_ATTR)
    mid = orange.newmetaid()
    ruleTerms = orange.StringVariable(const.TERMS_ATTR)
    mid1 = orange.newmetaid()
    #ruleNumber = orange.EnumVariable(SEQ_NUM_ATTR) #StringVariable(SEQ_NUM_ATTR)
    ruleNumber = orange.FloatVariable(const.SEQ_NUM_ATTR,
                                      startValue=1,
                                      endValue=len(tableDict),
                                      stepValue=1,
                                      numberOfDecimals=0)
    mid2 = orange.newmetaid()

    # this is a classless domain
    domain = orange.Domain(attrList, False)

    # name of the rule is a meta attribute
    domain.addmeta(mid, ruleName, False)
    domain.addmeta(mid1, ruleTerms, False)
    domain.addmeta(mid2, ruleNumber, False)

    table = orange.ExampleTable(domain)

    for k in sorted(tableDict.keys()):
        exampleValues = []
        for (i, term) in enumerate(allTerms):
            if term in tableDict[k][const.RULETERMS_KEY]:
                #exampleValues.append(PRESENT)
                exampleValues.append(orange.Value(attrList[i], const.PRESENT))
            else:
                #exampleValues.append(ABSENT)
                exampleValues.append(orange.Value(attrList[i], const.ABSENT))
        example = orange.Example(domain, exampleValues)
        #example[NAME_ATTR] = tableDict[k][RULENAME_KEY][1:-1]    #skip square brackets from the string
        #example[TERMS_ATTR] = tableDict[k][RULETERMS_STR_KEY][1:-1]
        #example[SEQ_NUM_ATTR] = k

        example[const.NAME_ATTR] = orange.Value(ruleName, tableDict[k][
            const.RULENAME_KEY][1:-1])  #skip square brackets from the string
        example[const.TERMS_ATTR] = orange.Value(
            ruleTerms, tableDict[k][const.RULETERMS_STR_KEY][1:-1])
        example[const.SEQ_NUM_ATTR] = orange.Value(ruleNumber, k)

        table.append(example)
    #end
    return table
 def __call__(self, ex, what=orange.Classifier.GetValue):
     val = ex[self.var1] * self.noValues2 + ex[self.var2]
     if what == orange.Classifier.GetValue:
         return orange.Value(self.classVar, val)
     probs = orange.DiscDistribution(self.classVar)
     probs[val] = 1.0
     if what == orange.Classifier.GetProbabilities:
         return probs
     else:
         return (orange.Value(self.classVar, val), probs)
    def sendpredictions(self):
        if not self.data or not self.outvar:
            self.send("Predictions", None)
            return

        # predictions, data set with class predictions
        classification = self.outvar.varType == orange.VarTypes.Discrete

        metas = []
        if classification:
            if len(self.selectedClasses):
                for c in self.predictors.values():
                    m = [orange.FloatVariable(name=str("%s(%s)" % (c.name, str(self.outvar.values[i]))),
                                              getValueFrom = lambda ex, rw, cindx=i, c=c: orange.Value(c(ex, c.GetProbabilities)[cindx])) \
                         for i in self.selectedClasses]
                    metas.extend(m)
            if self.showClass:
                mc = [
                    orange.EnumVariable(
                        name=str(c.name),
                        values=self.outvar.values,
                        getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                    for c in self.predictors.values()
                ]
                metas.extend(mc)
        else:
            # regression
            mc = [
                orange.FloatVariable(
                    name="%s" % c.name,
                    getValueFrom=lambda ex, rw, c=c: orange.Value(c(ex)))
                for c in self.predictors.values()
            ]
            metas.extend(mc)

        classVar = self.outvar
        domain = orange.Domain(self.data.domain.attributes + [classVar])
        domain.addmetas(self.data.domain.getmetas())
        for m in metas:
            domain.addmeta(orange.newmetaid(), m)
        predictions = orange.ExampleTable(domain, self.data)
        if self.doPrediction:
            c = self.predictors.values()[0]
            for ex in predictions:
                ex[classVar] = c(ex)

        predictions.name = self.data.name
        self.send("Predictions", predictions)

        self.changedFlag = False
示例#4
0
    def all_discrete_clauses(self, attr, max_card=None):
        all_vals = self.col_to_clauses[attr]
        attrobj = self.full_table.domain[attr]
        idx = self.full_table.domain.index(attrobj)

        if max_card:
            for card in xrange(1, max_card + 1):
                for vals in combinations(all_vals, card):
                    vals = [orange.Value(attrobj, value) for value in vals]
                    yield orange.ValueFilter_discrete(position=idx,
                                                      values=vals)
        else:
            for vals in powerset(all_vals):
                vals = [orange.Value(attrobj, value) for value in vals]
                yield orange.ValueFilter_discrete(position=idx, values=vals)
示例#5
0
    def __call__(self, example, resultType=orange.GetValue):
        from operator import add

        # voting for class probabilities
        if resultType == orange.GetProbabilities or resultType == orange.GetBoth:
            cprob = [0.] * len(self.domain.classVar.values)
            for c in self.classifiers:
                a = [x for x in c(example, orange.GetProbabilities)]
                cprob = list(map(add, cprob, a))
            norm = sum(cprob)
            for i in range(len(cprob)):
                cprob[i] = cprob[i] / norm

        # voting for crisp class membership, notice that
        # this may not be the same class as one obtaining the
        # highest probability through probability voting
        if resultType == orange.GetValue or resultType == orange.GetBoth:
            cfreq = [0] * len(self.domain.classVar.values)
            for c in self.classifiers:
                cfreq[int(c(example))] += 1
            index = cfreq.index(max(cfreq))
            cvalue = orange.Value(self.domain.classVar, index)

        if resultType == orange.GetValue: return cvalue
        elif resultType == orange.GetProbabilities: return cprob
        else: return (cvalue, cprob)
示例#6
0
文件: rule.py 项目: pdphuong/scorpion
    def fill_in_rule(self, table, ref_bounds):
        domain = table.domain

        # if there are any cols not in the rule, fill them in with table bounds
        conds = {}
        for c in self.filter.conditions:
            attr = domain[c.position]
            name = attr.name
            conds[name] = True

        for col, bounds in ref_bounds.iteritems():
            if col in conds:
                continue

            attr = domain[col]
            pos = domain.index(attr)

            if bounds is None:
                vals = range(len(attr.values))
                vals = [orange.Value(attr, attr.values[v]) for v in vals]
                cond = orange.ValueFilter_discrete(position=pos, values=vals)
            else:
                (minv, maxv) = bounds

                cond = orange.ValueFilter_continuous(
                    position=pos,
                    oper=orange.ValueFilter.Between,
                    min=minv - 1,
                    max=maxv + 1)
            self.filter.conditions.append(cond)
    def __call__(self, example, resultType=orange.GetValue):
        # 1. calculate sum of distributions of examples that cover the example
        num_cover = 0.0
        distribution = [0] * len(self.data.domain.classVar.values)
        for rsc in self.rulesClass:
            for rule in rsc.rules.rules:
                if rule.covers(example):
                    num_cover += 1
                    tmp_dist = rule(example, orange.GetProbabilities)
                    for i in range(len(distribution)):
                        distribution[i] += tmp_dist[i]
        # 2. calculate average of distributions of rules that cover example
        if num_cover != 0:
            max_index = 0
            for i in range(len(distribution)):
                distribution[i] = distribution[i] / num_cover
                if distribution[i] > distribution[max_index]:
                    max_index = i
            dist = orange.DiscDistribution(distribution)
            value = orange.Value(self.data.domain.classVar,
                                 self.data.domain.classVar.values[max_index])
        # if no rule fiers
        else:
            value, dist = self.majorityClassifier(example, orange.GetBoth)

        # 3. -----------return
        if resultType == orange.GetValue:
            return value
        elif resultType == orange.GetBoth:
            return (value, dist)
        else:
            return dist
示例#8
0
文件: bayes.py 项目: stefie10/slu_hri
    def __call__(self, example, result_type=orange.GetValue):
        # compute the class probabilities
        p = map(None, self.p_class)
        for c in range(len(self.domain.classVar.values)):
            for a in range(len(self.domain.attributes)):
                if not example[a].isSpecial():
                    p[c] *= self.p_cond[a][int(example[a])][c]

        # normalize probabilities to sum to 1
        sum = 0.
        for pp in p:
            sum += pp
        if sum > 0:
            for i in range(len(p)):
                p[i] = p[i] / sum

        # find the class with highest probability
        v_index = p.index(max(p))
        v = orange.Value(self.domain.classVar, v_index)

        # return the value based on requested return type
        if result_type == orange.GetValue:
            return v
        if result_type == orange.GetProbabilities:
            return p
        return (v, p)
示例#9
0
    def __rule__(self):
        if self._rule: return self._rule

        conds = []
        for attr, gid in zip(self.attrs, self.group):
            if attr.var_type == Orange.feature.Type.Discrete:
                vals = [
                    orange.Value(attr, v)
                    for v in self.grouper.id2vals[attr][gid]
                ]
                conds.append(
                    orange.ValueFilter_discrete(
                        position=self.grouper.data.domain.index(attr),
                        values=vals))
            else:
                vals = self.grouper.id2vals[attr][gid]
                minv, maxv = vals[0], vals[1]
                conds.append(
                    Orange.data.filter.ValueFilterContinuous(
                        oper=orange.ValueFilter.Between,
                        position=self.grouper.data.domain.index(attr),
                        min=minv,
                        max=maxv))
        self._rule = SDRule(self.grouper.data, None, conds, None)
        self._rule.quality = self.inf
        self._rule.inf_state = self.inf_state

        return self._rule
示例#10
0
 def cloneAndAddCondition(self, attribute, value):
     '''Returns a copy of this rule which condition part is extended by attribute = value'''
     cond = self.filter.conditions[:]
     cond.append(
         orange.ValueFilter_discrete(
             position=self.data.domain.attributes.index(attribute),
             values=[orange.Value(attribute, value)]))
     return SDRule(self.data, self.targetClass, cond, self.g)
示例#11
0
文件: rule.py 项目: pdphuong/scorpion
    def simplify(self, data=None, cdists=None, ddists=None):
        """
    Args:
      data:   non-filtered! data
      cdists: non-filtered Continuous distribution
      ddists: non-filtered discrete distribution
    Return:
      copy of this rule with simplified conditions
    """
        subset = data and self(data) or self.examples
        data = data or self.data
        ret = self.clone()

        positions = [cond.position for cond in self.filter.conditions]
        cdists = cdists or Orange.statistics.basic.Domain(data)
        ddists = ddists or Orange.statistics.distribution.Domain(data)
        #scdists = Orange.statistics.basic.Domain(subset)
        #sddists = Orange.statistics.distribution.Domain(subset)

        conds = []
        for old_cond, idx in zip(self.filter.conditions, positions):
            attr = data.domain[idx]

            # if rule values == full dataset values, then remove rule
            # filter down to the values that intersect the subset of data
            if attr.var_type == Orange.feature.Type.Discrete:
                full_d = ddists[attr.name]
                #sub_d = sddists[attr.name]
                fvals = [k for k, v in full_d.items() if v]
                cvals = set(
                    [str(attr.values[int(v)]) for v in old_cond.values])
                if len(cvals) == len(fvals):
                    continue

                #dvals = [k for k,v in sub_d.items() if v]
                #vals = set(cvals).intersection(dvals)
                vals = cvals
                cond = orange.ValueFilter_discrete(
                    position=idx,
                    values=[orange.Value(attr, val) for val in vals])
            else:
                fb = cdists[attr.name]
                #sb = scdists[attr.name]
                old_bound = [fb.min, fb.max]
                cond_bound = [old_cond.min, old_cond.max]

                bound = r_intersect(old_bound, cond_bound)
                if r_vol(bound) >= r_vol(old_bound): continue
                #bound = r_intersect(bound, [sb.min, sb.max])
                cond = old_cond
                cond.min, cond.max = bound[0], bound[1]
            conds.append(cond)
            continue

        ret.quality = self.quality
        ret.filter.conditions = conds
        ret.c_range = list(self.c_range)
        return ret
示例#12
0
 def __call__(self, example, what=orange.Classifier.GetValue):
     probs = self.classifier(example, self.GetProbabilities)
     if what == self.GetProbabilities:
         return probs
     value = orange.Value(self.classifier.classVar,
                          probs[1] > self.threshold)
     if what == orange.Classifier.GetValue:
         return value
     else:
         return (value, probs)
示例#13
0
def mergeClassValues(data, value):
    selection = orange.EnumVariable("Selection", values=["0", "1"])

    selectedClassesStr = [value]
    nonSelectedClassesStr = []
    for val in data.domain.classVar.values:
        if val not in selectedClassesStr: nonSelectedClassesStr.append(val)

    shortData1 = data.select({data.domain.classVar.name: selectedClassesStr})
    shortData2 = data.select(
        {data.domain.classVar.name: nonSelectedClassesStr})
    d1 = orange.Domain(shortData1.domain.attributes + [selection])
    selection.getValueFrom = lambda ex, what: orange.Value(selection, "0")
    data1 = orange.ExampleTable(d1, shortData1)

    selection.getValueFrom = lambda ex, what: orange.Value(selection, "1")
    data2 = orange.ExampleTable(d1, shortData2)
    data1.extend(data2)
    return data1
示例#14
0
    def __call__(self, data, targetClass, num_of_rules=0):
        '''Returns CN2-SD rules by performing weighted covering algorithm.'''

        data_discretized = False
        # If any of the attributes are continuous, discretize them
        if data.domain.hasContinuousAttributes():
            original_data = data
            data_discretized = True
            new_domain = []
            discretize = orange.EntropyDiscretization(forceAttribute=True)
            for attribute in data.domain.attributes:
                if attribute.varType == orange.VarTypes.Continuous:
                    d_attribute = discretize(attribute, data)
                    # An attribute is irrelevant, if it is discretized into a single interval
                    #                        if len(d_attribute.getValueFrom.transformer.points) > 0:
                    new_domain.append(d_attribute)
                else:
                    new_domain.append(attribute)
            data = original_data.select(new_domain +
                                        [original_data.domain.classVar])

        self.data = data
        self.max_rules = num_of_rules
        rules = []

        tc = orange.Value(data.domain.classVar, targetClass)

        # weighted covering
        self.data.addMetaAttribute(
            self.weightID)  # set weights of all examples to 1
        self.data.addMetaAttribute(
            self.counter)  # set counters of all examples to 0

        targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        tmpRule = self.rbf(data, self.weightID, targetClass, None)
        while (tmpRule.quality > 0) and (self.max_rules == 0
                                         or len(rules) < self.max_rules):
            bestRule = SDRule(self.data, tc, tmpRule.filter.conditions)
            bestRule.quality = tmpRule.quality
            self.decreaseExampleWeights(bestRule)
            rules.append(bestRule)
            tmpRule = self.rbf(data, self.weightID, targetClass, None)

        if data_discretized:
            targetClassRule = SDRule(original_data,
                                     targetClass,
                                     conditions=[],
                                     g=1)
            # change beam so the rules apply to original data
            rules = [rule.getUndiscretized(original_data) for rule in rules]
        else:
            targetClassRule = SDRule(data, targetClass, conditions=[], g=1)

        return SDRules(rules, targetClassRule, "CN2-SD")
示例#15
0
 def __call__(self, example, resultType = orange.GetValue):
     freq = [0.] * len(self.domain.classVar.values)
     for c in self.classifiers:
         freq[int(c(example))] += 1
     index = freq.index(max(freq))
     value = orange.Value(self.domain.classVar, index)
     for i in range(len(freq)):
         freq[i] = freq[i]/len(self.classifiers)
     if resultType == orange.GetValue: return value
     elif resultType == orange.GetProbabilities: return freq
     else: return (value, freq)
示例#16
0
    def __call__(self, example, what=orange.Classifier.GetValue):
        probability = self.classifier.orange_classify(example)

        answer = orange.Value(self.classVar, int(round(probability)))
        probabilities = orange.DiscDistribution(self.classVar)
        probabilities[answer] = probability
        if what == orange.Classifier.GetValue:
            return answer
        elif what == orange.Classifier.GetProbabilities:
            return probabilities
        else:
            return answer, probabilities
示例#17
0
def lookupFromFunction(attribute, bound, function):
    """
    Constructs ClassifierByExampleTable or ClassifierByLookupTable mirroring the given function
    """
    lookup = lookupFromBound(attribute, bound)
    if lookup:
        lookup.lookupTable = [orange.Value(attribute, function(attributes)) for attributes in orngMisc.LimitedCounter([len(attr.values) for attr in bound])]
        return lookup
    else:
        examples = orange.ExampleTable(orange.Domain(bound, attribute))
        for attributes in orngMisc.LimitedCounter([len(attr.values) for attr in dom.attributes]):
            examples.append(orange.Example(dom, attributes + [function(attributes)]))
        return orange.LookupLearner(examples)
示例#18
0
 def __call__(self, ex, what=orange.Classifier.GetValue):
     value = self.classify(ex)
     result = orange.Value(ex.domain.classVar, str(value))
     probs = orange.DiscDistribution(ex.domain.classVar)
     probs[value] = 1.0
     if what == orange.Classifier.GetValue:
         return result
     elif what == orange.Classifier.GetProbabilities:
         return probs
     elif what == orange.Classifier.GetBoth:
         return result, probs
     else:
         raise ValueError("Bad what argument: %s" % ` what `)
示例#19
0
 def __call__(self, example, result_type=orange.GetValue):
     if result_type == orange.GetValue:
         return orange.Value(self.domain.classVar,
                             self.model.predict(extract_features(example)))
     else:
         # build a label map, which will be used to sort the outputted
         # probabilities
         class_map = {}
         for pos, label in enumerate(self.domain.classVar.values):
             class_map[label] = pos
         result = self.model.eval_all(extract_features(example))
         if len(result) > 0:
             if result_type == orange.GetProbabilities:
                 r = [None] * len(result)
                 for label, prob in result:
                     r[class_map[label]] = prob
                 return r
             elif result_type == orange.GetBoth:
                 return (orange.Value(self.domain.classVar,
                                      result[0][0]), result[0][1])
         else:
             return None
示例#20
0
    def to_rule(self, table, cont_dists=None, disc_dists=None):
        """
    @param cols list of attribute names
    """
        if not self.rule:
            domain = table.domain
            attrnames = [attr.name for attr in domain]
            cont_dists = cont_dists or dict(
                zip(attrnames, Orange.statistics.basic.Domain(table)))
            disc_dists = disc_dists or dict(
                zip(attrnames, Orange.statistics.distribution.Domain(table)))
            conds = []

            for col, bound in zip(self.cols, zip(*self.bbox)):
                attr = domain[col]
                pos = domain.index(attr)
                table_bound = cont_dists[attr.name]
                minv, maxv = r_intersect(bound,
                                         [table_bound.min, table_bound.max])
                if maxv - minv > 0.99 * (table_bound.max - table_bound.min):
                    continue

                conds.append(
                    orange.ValueFilter_continuous(position=pos,
                                                  max=bound[1],
                                                  min=bound[0]))

            for disc_name, vidxs in self.discretes.iteritems():
                attr = domain[disc_name]
                disc_pos = domain.index(attr)
                vals = [
                    orange.Value(attr, attr.values[int(vidx)])
                    for vidx in vidxs if int(vidx) < len(attr.values)
                ]

                if not vals or len(vals) == len(disc_dists[attr.name]):
                    continue

                conds.append(
                    orange.ValueFilter_discrete(position=disc_pos,
                                                values=vals))

            rule = SDRule(table, None, conditions=conds)
            self.rule = rule

        rule = self.rule
        rule.quality = rule.score = self.error
        rule.inf_state = self.inf_state
        rule.c_range = self.c_range
        return rule
示例#21
0
    def getFixed(self, original_data):
        cond = []
        for c in self.filter.conditions:
            feature = self.data.domain.attributes[c.position]
            position = original_data.domain.attributes.index(feature.attribute)

            if feature.cond == '==':
                cond.append(
                    orange.ValueFilter_discrete(position=position,
                                                values=[
                                                    orange.Value(
                                                        feature.attribute,
                                                        feature.value)
                                                ]))
            elif feature.cond == '!=':
                cond.append(
                    orange.ValueFilter_discrete(
                        position=position,
                        values=[
                            orange.Value(feature.attribute, value)
                            for value in feature.attribute.values
                            if value != feature.value
                        ]))
            elif feature.cond == '<=':
                cond.append(
                    orange.ValueFilter_continuous(position=position,
                                                  max=feature.value,
                                                  min=float(-infinity),
                                                  outside=False))
            elif feature.cond == '>':
                cond.append(
                    orange.ValueFilter_continuous(position=position,
                                                  max=feature.value,
                                                  min=float(-infinity),
                                                  outside=True))

        return SDRule(original_data, self.targetClass, cond, self.g)
示例#22
0
    def __call__(self, learndata, testdata = None, weight = None):
        # because of preprocessing
        if testdata:
            classifier = SD_Classifier(testdata)
        else:
            classifier = SD_Classifier(learndata)

        for targetClassValue in learndata.domain.classVar.values:
            targetClass = orange.Value(learndata.domain.classVar, targetClassValue)
            beam = self.learner (learndata, targetClass, self.max_rules)
            classifier.addRulesForClass(beam, targetClass)

        classifier.name = self.name
        classifier.algorithm = self.algorithm
        return classifier
示例#23
0
 def __call__(self, example, resultType=orange.GetValue):
     votes = [0.] * len(self.classVar.values)
     for c, e in self.classifiers:
         votes[int(c(example))] += e
     index = orngMisc.selectBestIndex(votes)
     value = orange.Value(self.classVar, index)
     if resultType == orange.GetValue:
         return value
     sv = sum(votes)
     for i in range(len(votes)):
         votes[i] = votes[i] / sv
     if resultType == orange.GetProbabilities:
         return votes
     else:
         return (value, votes)
示例#24
0
文件: ndt.py 项目: pdphuong/scorpion
def create_clause(table, attr, val, bdists, cmp='='):
    cmps = ['<', '<=', '>', '>=', '=']
    if attr.varType == Orange.feature.Type.Discrete:
        if not isinstance(val, (list, tuple)):
            val = [val]
        vals = [orange.Value(attr, v) for v in val]
        filt = orange.ValueFilter_discrete(position=table.domain.index(attr),
                                           values=vals)
        return filt
    else:
        # it may be a discretized continuous condition (e.g., "<= 5")
        isnumerical = False
        for c in cmps:
            try:
                if val.startswith(c):
                    val = float(val.split(c)[1])
                    cmp = c
                    isnumerical = True
                    break
            except:
                pass

        if not isnumerical:
            val = float(val)

        bdist = bdists[attr]

        minv, maxv = bdist.min, bdist.max
        op = None
        if cmp == '>=':
            minv = val
        elif cmp == '>':
            minv = val
        elif cmp == '<=':
            maxv = val
        elif cmp == '<':
            maxv = val
        elif cmp == '=':
            maxv = minv = val
        else:
            raise

        return Orange.data.filter.ValueFilterContinuous(
            position=table.domain.index(attr),
            oper=orange.ValueFilter.Between,
            min=minv,
            max=maxv)
示例#25
0
    def __call__(self, example, resultType=orange.GetValue):
        ex = orange.Example(self.domain, example)
        ex = self.imputer(ex)
        ex = numpy.array(ex.native())

        if self.beta0:
            if len(self.beta) > 1:
                yhat = self.beta[0] + dot(self.beta[1:], ex[:-1])
            else:
                yhat = self.beta[0]
        else:
            yhat = dot(self.beta, ex[:-1])
        yhat = orange.Value(yhat)

        if resultType == orange.GetValue:
            return yhat
        if resultType == orange.GetProbabilities:
            return orange.ContDistribution({1.0: yhat})
        return (yhat, orange.ContDistribution({1.0: yhat}))
示例#26
0
文件: rule.py 项目: pdphuong/scorpion
    def dictToCond(d, data):
        if d['type'] == 'num':
            return orange.ValueFilter_continuous(
                position=d['pos'],
                oper=orange.ValueFilter.Between,
                min=d['vals'][0],
                max=d['vals'][1])

        # XXX: NULL hack
        attr = data.domain[d['col']]
        vals = []
        for v in d['vals']:
            if v is None:
                if 'NULL' in attr.values:
                    v = 'NULL'
                elif 'None' in attr.values:
                    v = 'None'
            vals.append(orange.Value(attr, v))
        return orange.ValueFilter_discrete(position=d['pos'], values=vals)
示例#27
0
    def __call__(self, table, bound, weightID=0):
        if not len(bound):
            raise AttributeError, "no bound attributes"

        bound = [table.domain[a] for a in bound]
        newVar = orange.EnumVariable("-".join([a.name for a in bound]))

        if (len(bound) == 1):
            newVar.values = list(bound[0].values)
            clsfr = orange.ClassifierByLookupTable(newVar, bound[0])
        else:
            import orngMisc
            for vs in orngMisc.LimitedCounter([len(a.values) for a in bound]):
                newVar.values.append("-".join(
                    [bound[i].values[v] for i, v in enumerate(vs)]))
            clsfr = orange.ClassifierByLookupTable(newVar, bound)

##    elif (len(bound)==2):
##      for v1 in bound[0].values:
##        for v2 in bound[1].values:
##          newVar.values.append(v1+"-"+v2)
##      clsfr = orange.ClassifierByLookupTable2(newVar, bound[0], bound[1])
##    elif (len(bound)==3):
##      for v1 in bound[0].values:
##        for v2 in bound[1].values:
##          for v3 in bound[2].values:
##            newVar.values.append(v1+"-"+v2+"-"+v3)
##      clsfr = orange.ClassifierByLookupTable3(newVar, bound[0], bound[1], bound[2])
##    else:
##      raise AttributeError, "cannot deal with more than 3 bound attributes"

        for i in range(len(newVar.values)):
            clsfr.lookupTable[i] = orange.Value(newVar, i)

        newVar.getValueFrom = clsfr

        if self.measure:
            meas = self.measure(newVar, table)
        else:
            meas = 0
        return newVar, meas
示例#28
0
文件: rule.py 项目: pdphuong/scorpion
    def cloneAndAddCondition(self,
                             attribute,
                             values,
                             used=False,
                             negate=False):
        '''Returns a copy of this rule which condition part is extended by attribute = value'''
        conds = list(self.filter.conditions)

        if not (values):
            return self
        if not isinstance(values, list):
            values = [values]

        pos = self.data.domain.index(attribute)
        conds = filter(lambda cond: cond.position != pos, conds)
        values = [orange.Value(attribute, value) for value in values]
        conds.append(
            orange.ValueFilter_discrete(
                position=self.data.domain.index(attribute), values=values))
        conds.sort(key=lambda c: c.position)

        return SDRule(self.data, self.targetClass, conds, self.g)
示例#29
0
 def __call__(self, example, resultType=orange.GetValue):
     if self.classVar.varType == orange.VarTypes.Discrete:
         freq = [0.] * len(self.classVar.values)
         for c in self.classifiers:
             freq[int(c(example))] += 1
         index = freq.index(max(freq))
         value = orange.Value(self.classVar, index)
         if resultType == orange.GetValue:
             return value
         for i in range(len(freq)):
             freq[i] = freq[i] / len(self.classifiers)
         if resultType == orange.GetProbabilities:
             return freq
         else:
             return (value, freq)
     elif self.classVar.varType == orange.VarTypes.Continuous:
         votes = [
             c(
                 example, orange.GetBoth
                 if resultType == orange.GetProbabilities else resultType)
             for c in self.classifiers
         ]
         wsum = float(len(self.classifiers))
         if resultType in [orange.GetBoth, orange.GetProbabilities]:
             pred = sum([float(c) for c, p in votes]) / wsum
             #                prob = sum([float(p.modus()) for c, p in votes]) / wsum
             from collections import defaultdict
             prob = defaultdict(float)
             for c, p in votes:
                 try:
                     prob[float(c)] += p[c] / wsum
                 except IndexError:  # p[c] sometimes fails with index error
                     prob[float(c)] += 1.0 / wsum
             prob = orange.ContDistribution(prob)
             return self.classVar(
                 pred), prob if resultType == orange.GetBoth else prob
         elif resultType == orange.GetValue:
             pred = sum([float(c) for c in votes]) / wsum
             return self.classVar(pred)
示例#30
0
def booleanToOrange(bool, var):
    if bool:
        txt = "True"
    else:
        txt = "False"
    return orange.Value(var, txt)