示例#1
0
 def isCondIndMI(self, var1, var2, var3):
     d1 = self.data.getSeries(var1)
     d2 = self.data.getSeries(var2)
     d3 = self.data.getSeries(var3)
     # d1 = self.standardize(d1)
     # d2 = self.standardize(d2)
     # d3 = self.standardize(d3)
     p1 = self.mi_prepare(d1)
     p2 = self.mi_prepare(d2)
     p3 = self.mi_prepare(d3)
     cmi = ee.cmi(p1, p2, p3, k=3)
     e1 = self.getEntropy(var1)
     e2 = self.getEntropy(var2)
     e3 = self.getEntropy(var3)
     normCmi = cmi / (e1 * e2 * e3)**(1 / 3.0)
     result = normCmi < miCondIndThreshold
     Dprint('CI: ', var1, '_||_', var2, '|', var3, '=', result, normCmi)
     return result
示例#2
0
def isWinning(feature_col, coalition, rows, threshold=0.50):
    ''' Checks if union of the feature and coalitions leads to win
    
    A feature is winning if it's interdependent on atleast half of the members in the coalition.
    The interdependence is measured using conditional mutual information.
    
    '''
    total_dependence = 0
    x = feature_col.reshape(-1, 1).tolist()

    if len(coalition) == 1:
        y = rows[:, [coalition[0]]].tolist()
        return ee.mi(x, y) >= threshold

    for i in range(0, len(coalition)):
        y = rows[:, [coalition[i]]].tolist()
        z = rows[:, coalition[:i] + coalition[i + 1:]].tolist()
        if ee.cmi(x, y, z) >= threshold:
            total_dependence = total_dependence + 1
    return float(total_dependence) / float(len(coalition)) >= 0.5
示例#3
0
trueent += 0.5 * (2 + log(4. * pi * pi * det(
    [[cov[0][0], cov[0][2]], [cov[2][0], cov[2][2]]])))  # xz sub
trueent += 0.5 * (2 + log(4. * pi * pi * det(
    [[cov[1][1], cov[1][2]], [cov[2][1], cov[2][2]]])))  # yz sub
print('true CMI(x:y|x)', trueent / log(2))

ent = []
err = []
for NN in Ntry:
    tempent = []
    for j in range(nsamples):
        points = nr.multivariate_normal(mean, cov, NN)
        x = [point[:1] for point in points]
        y = [point[1:2] for point in points]
        z = [point[2:] for point in points]
        tempent.append(ee.cmi(x, y, z))
    tempent.sort()
    tempmean = np.mean(tempent)
    ent.append(tempmean)
    err.append((tempmean - tempent[samplo], tempent[samphi] - tempmean))

print('samples used', Ntry)
print('estimated CMI', ent)
print('95% conf int. (a, b) means (mean - a, mean + b)is interval\n', err)

# MUTUAL INFORMATION

print('Mutual Information')
trueent = 0.5 * (1 + log(2. * pi * cov[0][0]))  # x sub
trueent += 0.5 * (1 + log(2. * pi * cov[1][1]))  # y sub
trueent += -0.5 * (2 + log(4. * pi * pi * det(
示例#4
0
trueent = -0.5*(3+log(8.*pi*pi*pi*det(cov))) 
trueent += -0.5*(1+log(2.*pi*cov[2][2])) #z sub
trueent += 0.5*(2+log(4.*pi*pi*det([[cov[0][0],cov[0][2]],[cov[2][0],cov[2][2]]] ))) #xz sub
trueent += 0.5*(2+log(4.*pi*pi*det([[cov[1][1],cov[1][2]],[cov[2][1],cov[2][2]]] ))) #yz sub
print 'true CMI(x:y|x)', trueent/log(2)

ent = []
err = []
for NN in Ntry:
  tempent = []
  for j in range(nsamples):
    points = nr.multivariate_normal(mean,cov,NN)
    x = [point[:1] for point in points] 
    y = [point[1:2] for point in points] 
    z = [point[2:] for point in points] 
    tempent.append(ee.cmi(x,y,z))
  tempent.sort()
  tempmean = np.mean(tempent)
  ent.append(tempmean)
  err.append((tempmean - tempent[samplo],tempent[samphi]-tempmean)) 

print 'samples used',Ntry
print 'estimated CMI',ent
print '95% conf int. (a,b) means (mean-a,mean+b)is interval\n',err

## MUTUAL INFORMATION

print 'Mutual Information'
trueent = 0.5*(1+log(2.*pi*cov[0][0])) #x sub
trueent += 0.5*(1+log(2.*pi*cov[1][1])) #y sub
trueent += -0.5*(2+log(4.*pi*pi*det([[cov[0][0],cov[0][1]],[cov[1][0],cov[1][1]]] ))) #xz sub