示例#1
0
 def MeasureAttribute_info(self, attr, data):
     table = data.select([attr, data.domain.classVar])
     table = orange.Preprocessor_dropMissing(table)
     a1 = [table[k][0].value for k in range(len(table))]
     a2 = [table[k][1].value for k in range(len(table))]
     
     val, prob = statc.pearsonr(a1, a2)
     return val
示例#2
0
    def MeasureAttribute_info(self, attr, data):
        table = data.select([attr, data.domain.classVar])
        table = orange.Preprocessor_dropMissing(table)
        a1 = [table[k][0].value for k in range(len(table))]
        a2 = [table[k][1].value for k in range(len(table))]

        val, prob = statc.pearsonr(a1, a2)
        return val
示例#3
0
def distPearson(x, y):
    """distance corresponding to 1 - pearson's correlation coefficient for arrays x,y
    returns distance: 1 - pearson_r
    """
    x = MA.asarray(x)
    y = MA.asarray(y)
    assert MA.rank(x) == MA.rank(y) == 1
    cond = MA.logical_not(MA.logical_or(MA.getmaskarray(x),
                                        MA.getmaskarray(y)))
    return 1 - statc.pearsonr(
        MA.compress(cond, x).tolist(),
        MA.compress(cond, y).tolist())[0]
示例#4
0
 def __call__(self, e1, e2):
     X1 = []; X2 = []
     for i in self.indxs:
         if not(e1[i].isSpecial() or e2[i].isSpecial()):
             X1.append(float(e1[i]))
             X2.append(float(e2[i]))
     if not X1:
         return 1.0
     try:
         return (1.0 - statc.pearsonr(X1, X2)[0]) / 2.
     except:
         return 1.0
示例#5
0
def computeCorrelation(data, attr1, attr2):
    if data.domain[attr1].varType != orange.VarTypes.Continuous: return None
    if data.domain[attr2].varType != orange.VarTypes.Continuous: return None

    table = data.select([attr1, attr2])
    table = orange.Preprocessor_dropMissing(table)
    a1 = [table[k][attr1].value for k in range(len(table))]
    a2 = [table[k][attr2].value for k in range(len(table))]
    
    try:
        val, prob = statc.pearsonr(a1, a2)
    except:
        val = 0.0    # possibly invalid a1 or a2
        
    return val
示例#6
0
def computeCorrelation(data, attr1, attr2):
    if data.domain[attr1].varType != orange.VarTypes.Continuous: return None
    if data.domain[attr2].varType != orange.VarTypes.Continuous: return None

    table = data.select([attr1, attr2])
    table = orange.Preprocessor_dropMissing(table)
    a1 = [table[k][attr1].value for k in range(len(table))]
    a2 = [table[k][attr2].value for k in range(len(table))]

    try:
        val, prob = statc.pearsonr(a1, a2)
    except:
        val = 0.0  # possibly invalid a1 or a2

    return val
示例#7
0
def computeCorrelationInsideClasses(data, attr1, attr2):
    if data.domain[attr1].varType != orange.VarTypes.Continuous: return None
    if data.domain[attr2].varType != orange.VarTypes.Continuous: return None

    table = data.select([attr1, attr2, data.domain.classVar])
    table = orange.Preprocessor_dropMissing(table)
    lengths = []; corrs = []
    for val in table.domain.classVar.values:
        tab = table.filter({table.domain.classVar: val})
        a1 = [tab[k][attr1].value for k in range(len(tab))]
        a2 = [tab[k][attr2].value for k in range(len(tab))]
        if len(a1) == 0: continue
        val, prob = statc.pearsonr(a1, a2)
        lengths.append(len(a1))
        corrs.append(val)
    corr = 0
    for ind in range(len(corrs)): corr += abs(corrs[ind])*lengths[ind]
    corr /= sum(lengths)
    return corr, corrs, lengths
示例#8
0
    def pearson(ex1, ex2):
        vals1 = ex1.native(0)[:-1]
        vals2 = ex2.native(0)[:-1]

        if check_same and vals1 == vals2:
            return 10  #they are the same

        #leaves undefined elements out
        if not no_unknowns:
            common = [ True if v1 != "?" and v2 != "?" else False \
                for v1,v2 in zip(vals1,vals2) ]
            vals1 = [v for v, c in zip(vals1, common) if c]
            vals2 = [v for v, c in zip(vals2, common) if c]

        #statc correlation is from 5-10 times faster than numpy!
        try:
            return statc.pearsonr(vals1, vals2)[0]
        except:
            return numpy.corrcoef([vals1, vals2])[0, 1]
示例#9
0
def computeCorrelationInsideClasses(data, attr1, attr2):
    if data.domain[attr1].varType != orange.VarTypes.Continuous: return None
    if data.domain[attr2].varType != orange.VarTypes.Continuous: return None

    table = data.select([attr1, attr2, data.domain.classVar])
    table = orange.Preprocessor_dropMissing(table)
    lengths = []
    corrs = []
    for val in table.domain.classVar.values:
        tab = table.filter({table.domain.classVar: val})
        a1 = [tab[k][attr1].value for k in range(len(tab))]
        a2 = [tab[k][attr2].value for k in range(len(tab))]
        if len(a1) == 0: continue
        val, prob = statc.pearsonr(a1, a2)
        lengths.append(len(a1))
        corrs.append(val)
    corr = 0
    for ind in range(len(corrs)):
        corr += abs(corrs[ind]) * lengths[ind]
    corr /= sum(lengths)
    return corr, corrs, lengths