Пример #1
0
def test_predict():
    iterations = 10
    burn_in = 2
    thinning = 3 # so index 2,5,8 -> m=3,m=6,m=9
    
    E = ['entity0','entity1']
    I = {E[0]:5, E[1]:3}
    K = {E[0]:2, E[1]:4}
    J = [6]
    
    iterations_all_Ft = {
        E[0] : [numpy.ones((I[E[0]],K[E[0]])) * 3*m**2 for m in range(1,10+1)],
        E[1] : [numpy.ones((I[E[1]],K[E[1]])) * 1*m**2 for m in range(1,10+1)] 
    }
    iterations_all_lambdat = {
        E[0] : [numpy.ones(K[E[0]]) * 3*m**2 for m in range(1,10+1)],
        E[1] : [numpy.ones(K[E[1]]) * 1*m**2 for m in range(1,10+1)]
    }
    iterations_all_Ft['entity0'][2][0,0] = 24 #instead of 27 - to ensure we do not get 0 variance in our predictions
    iterations_all_Sn = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]]
    iterations_all_taun = [[m**2 for m in range(1,10+1)]]
    iterations_all_Sm = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]]
    iterations_all_taum = [[m**2*2 for m in range(1,10+1)]]
    iterations_all_Gl = [[numpy.ones((J[0],K[E[0]])) * 2*m**2 * 3 for m in range(1,10+1)]]
    iterations_all_taul = [[m**2*3 for m in range(1,10+1)]]
    
    R0 = numpy.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],dtype=float)
    C0 = numpy.array([[1,2,3],[4,5,6],[7,8,9]],dtype=float)
    D0 = numpy.array([[1,2,3,4,5,6],[7,8,9,10,11,12],[13,14,15,16,17,18],[19,20,21,22,23,24],[25,26,27,28,29,30]],dtype=float)
    M0, M1, M2 = numpy.ones((5,3)), numpy.ones((3,3)), numpy.ones((5,6))
    R, C, D = [(R0,M0,E[0],E[1],1.)], [(C0,M1,E[1],1.)], [(D0,M2,E[0],1.)]
    
    alphatau, betatau = 1., 2.
    alpha0, beta0 = 6., 7.
    lambdaF, lambdaG = 3., 8.
    lambdaSn, lambdaSm = 4., 5.
    priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 
               'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm }
    settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 
                 'ARD' : True, 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows' }    
    
    #expected_exp_F0 = numpy.array([[125.,126.],[126.,126.],[126.,126.],[126.,126.],[126.,126.]])
    #expected_exp_F1 = numpy.array([[(9.+36.+81.)*(1./3.) for k in range(0,4)] for i in range(0,3)])
    #expected_exp_Sn = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)])
    #expected_exp_taun = (9.+36.+81.)/3.
    #R_pred = numpy.array([[ 3542112.,  3542112.,  3542112.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.]])
    
    #expected_exp_Sm = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)])
    #expected_exp_taum = (18.+72.+162.)/3.
    #C_pred = array([[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.]])
    
    #expected_exp_Gl = numpy.array([[(27.+108.+243.)*(2./3.) for k in range(0,2)] for j in range(0,6)])
    #expected_exp_taul = (27.+108.+243.)/3. 
    #D_pred = array([[63252.,63252.,63252.,63252.,63252.,63252.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.]])
    
    M_test_R = numpy.array([[0,0,1],[0,1,0],[0,0,0],[1,1,0],[0,0,0]]) #R->3,5,10,11, R_pred->3542112,3556224,3556224,3556224
    MSE_R = ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / 4.
    R2_R = 1. - ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / (4.25**2+2.25**2+2.75**2+3.75**2) #mean=7.25
    Rp_R = 357. / ( math.sqrt(44.75) * math.sqrt(5292.) ) #mean=7.25,var=44.75, mean_pred=3552696,var_pred=5292, corr=(-4.25*-63 + -2.25*21 + 2.75*21 + 3.75*21)
    
    M_test_C = numpy.array([[0,0,1],[0,1,0],[1,1,0]]) #C->3,5,7,8, C_pred->4741632,4741632,4741632,4741632
    MSE_C = ((3.-4741632.)**2 + (5.-4741632.)**2 + (7.-4741632.)**2 + (8.-4741632.)**2) / 4.
    R2_C = 1. - ((3.-4741632.)**2 + (5.-4741632.)**2 + (7.-4741632.)**2 + (8.-4741632.)**2) / (2.75**2+0.75**2+1.25**2+2.25**2) #mean=5.75
    
    M_test_D = numpy.array([[0,0,1,0,0,1],[0,1,0,0,0,0],[1,1,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]]) #D->3,6,8,13,14, D_pred->63252,63252,63504,63504,63504
    MSE_D = ((3.-63252.)**2 + (6.-63252.)**2 + (8.-63504.)**2 + (13.-63504.)**2 + (14.-63504.)**2) / 5.
    R2_D = 1. - ((3.-63252.)**2 + (6.-63252.)**2 + (8.-63504.)**2 + (13.-63504.)**2 + (14.-63504.)**2) / (5.8**2+2.8**2+0.8**2+4.2**2+5.2**2) #mean=8.8
    Rp_D = 0.84265143679484211    
    
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.iterations = iterations
    HMF.iterations_all_Ft = iterations_all_Ft
    HMF.iterations_all_lambdat = iterations_all_lambdat
    HMF.iterations_all_Sn = iterations_all_Sn
    HMF.iterations_all_taun = iterations_all_taun
    HMF.iterations_all_Sm = iterations_all_Sm
    HMF.iterations_all_taum = iterations_all_taum
    HMF.iterations_all_Gl = iterations_all_Gl
    HMF.iterations_all_taul = iterations_all_taul
    
    performances_R = HMF.predict_Rn(0,M_test_R,burn_in,thinning)
    performances_C = HMF.predict_Cm(0,M_test_C,burn_in,thinning)
    performances_D = HMF.predict_Dl(0,M_test_D,burn_in,thinning)
    
    assert performances_R['MSE'] == MSE_R
    assert performances_R['R^2'] == R2_R
    assert performances_R['Rp'] == Rp_R
    
    assert performances_C['MSE'] == MSE_C
    assert performances_C['R^2'] == R2_C
    assert numpy.isnan(performances_C['Rp'])
    
    assert performances_D['MSE'] == MSE_D
    assert performances_D['R^2'] == R2_D
    assert abs(performances_D['Rp'] - Rp_D) < 0.00000000001
Пример #2
0
def test_log_likelihood():
    iterations = 10
    burn_in = 2
    thinning = 3 # so index 2,5,8 -> m=3,m=6,m=9
    
    E = ['entity0','entity1']
    I = {E[0]:5, E[1]:3}
    K = {E[0]:2, E[1]:4}
    J = [6]
    
    iterations_all_Ft = {
        E[0] : [numpy.ones((I[E[0]],K[E[0]])) * 3*m**2 for m in range(1,10+1)],
        E[1] : [numpy.ones((I[E[1]],K[E[1]])) * 1*m**2 for m in range(1,10+1)] 
    }
    iterations_all_lambdat = {
        E[0] : [numpy.ones(K[E[0]]) * 3*m**2 for m in range(1,10+1)],
        E[1] : [numpy.ones(K[E[1]]) * 1*m**2 for m in range(1,10+1)]
    }
    iterations_all_Ft['entity0'][2][0,0] = 24 #instead of 27 - to ensure we do not get 0 variance in our predictions
    iterations_all_Sn = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]]
    iterations_all_taun = [[m**2 for m in range(1,10+1)]]
    iterations_all_Sm = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]]
    iterations_all_taum = [[m**2*2 for m in range(1,10+1)]]
    iterations_all_Gl = [[numpy.ones((J[0],K[E[0]])) * 2*m**2 * 3 for m in range(1,10+1)]]
    iterations_all_taul = [[m**2*3 for m in range(1,10+1)]]
    
    R0 = numpy.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],dtype=float)
    C0 = numpy.array([[1,2,3],[4,5,6],[7,8,9]],dtype=float)
    D0 = numpy.array([[1,2,3,4,5,6],[7,8,9,10,11,12],[13,14,15,16,17,18],[19,20,21,22,23,24],[25,26,27,28,29,30]],dtype=float)
    
    M0 = numpy.array([[0,0,1],[0,1,0],[0,0,0],[1,1,0],[0,0,0]]) #R->3,5,10,11, R_pred->3542112,3556224,3556224,3556224    
    M1 = numpy.array([[0,0,1],[0,1,0],[1,1,0]]) #C->3,7,8, C_pred->4741632,4741632,4741632 - entry 5 gets set to 0 since it is the diagonal
    M2 = numpy.array([[0,0,1,0,0,1],[0,1,0,0,0,0],[1,1,0,0,0,0],[0,0,0,0,0,0],[1,0,0,0,0,0]]) #D->3,6,8,13,14,25, D_pred->63252,63252,63504,63504,63504,63504
     
    R, C, D = [(R0,M0,E[0],E[1],1.)], [(C0,M1,E[1],1.)], [(D0,M2,E[0],1.)]
    
    #expected_exp_F0 = numpy.array([[125.,126.],[126.,126.],[126.,126.],[126.,126.],[126.,126.]])
    #expected_exp_F1 = numpy.array([[(9.+36.+81.)*(1./3.) for k in range(0,4)] for i in range(0,3)])
    #expected_exp_Sn = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)])
    #expected_exp_taun = (9.+36.+81.)/3.
    #R_pred = numpy.array([[ 3542112.,  3542112.,  3542112.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.]])
    
    #expected_exp_Sm = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)])
    #expected_exp_taum = (18.+72.+162.)/3.
    #C_pred = array([[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.]])
    
    #expected_exp_Gl = numpy.array([[(27.+108.+243.)*(2./3.) for k in range(0,2)] for j in range(0,6)])
    #expected_exp_taul = (27.+108.+243.)/3. 
    #D_pred = array([[63252.,63252.,63252.,63252.,63252.,63252.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.]])
    
    MSE_R = ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / 4.
    MSE_C = ((3.-4741632.)**2 + (7.-4741632.)**2 + (8.-4741632.)**2) / 3.
    MSE_D = ((3.-63252.)**2 + (6.-63252.)**2 + (8.-63504.)**2 + (13.-63504.)**2 + (14.-63504.)**2 + (25.-63504.)**2) / 6.
      
    HMF = HMF_Gibbs(R,C,D,K,{},{})
    HMF.iterations = iterations
    HMF.iterations_all_Ft = iterations_all_Ft
    HMF.iterations_all_lambdat = iterations_all_lambdat
    HMF.iterations_all_Sn = iterations_all_Sn
    HMF.iterations_all_taun = iterations_all_taun
    HMF.iterations_all_Sm = iterations_all_Sm
    HMF.iterations_all_taum = iterations_all_taum
    HMF.iterations_all_Gl = iterations_all_Gl
    HMF.iterations_all_taul = iterations_all_taul
    
    log_likelihood = 4./2. * (math.log(42.) - math.log(2*math.pi)) - 42./2.*(MSE_R*4.) + \
                     3./2. * (math.log(84.) - math.log(2*math.pi)) - 84./2.*(MSE_C*3.) + \
                     6./2. * (math.log(126.) - math.log(2*math.pi)) - 126./2.*(MSE_D*6.)    
    no_parameters = (5*2+4*3+2*4+4*4+2*6+2+4+3)
    no_datapoints = 4+3+6
    AIC = -2*log_likelihood + 2*no_parameters #F0,F1,Sn0,Sm0,G,lambda0,lambda1,tau
    BIC = -2*log_likelihood + no_parameters*math.log(no_datapoints)
    
    assert HMF.no_datapoints() == no_datapoints
    assert HMF.no_parameters() == no_parameters
    assert abs(log_likelihood - HMF.quality('loglikelihood',burn_in,thinning)) <= 1.
    assert abs(AIC - HMF.quality('AIC',burn_in,thinning)) <= 1.
    assert abs(BIC - HMF.quality('BIC',burn_in,thinning)) <= 1.
    with pytest.raises(AssertionError) as error:
        HMF.quality('FAIL',burn_in,thinning)
    assert str(error.value) == "Unrecognised metric for model quality: FAIL."
Пример #3
0
def test_approx_expectation():
    iterations = 10
    burn_in = 2
    thinning = 3 # so index 2,5,8 -> m=3,m=6,m=9
    
    E = ['entity0','entity1']
    I = {E[0]:5, E[1]:3}
    K = {E[0]:2, E[1]:4}
    J = [6]
    
    iterations_all_Ft = {
        E[0] : [numpy.ones((I[E[0]],K[E[0]])) * 3*m**2 for m in range(1,10+1)],
        E[1] : [numpy.ones((I[E[1]],K[E[1]])) * 1*m**2 for m in range(1,10+1)]
    }
    iterations_all_lambdat = {
        E[0] : [numpy.ones(K[E[0]]) * 3*m**2 for m in range(1,10+1)],
        E[1] : [numpy.ones(K[E[1]]) * 1*m**2 for m in range(1,10+1)]
    }
    iterations_all_Sn = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]]
    iterations_all_lambdan = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]]
    iterations_all_taun = [[m**2 for m in range(1,10+1)]]
    iterations_all_Sm = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]]
    iterations_all_lambdam = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]]
    iterations_all_taum = [[m**2*2 for m in range(1,10+1)]]
    iterations_all_Gl = [[numpy.ones((J[0],K[E[1]])) * 2*m**2 * 3 for m in range(1,10+1)]]
    iterations_all_taul = [[m**2*3 for m in range(1,10+1)]]
    
    expected_exp_F0 = numpy.array([[9.+36.+81. for k in range(0,2)] for i in range(0,5)])
    expected_exp_F1 = numpy.array([[(9.+36.+81.)*(1./3.) for k in range(0,4)] for i in range(0,3)])
    expected_exp_lambda0 = numpy.array([9.+36.+81. for k in range(0,2)])
    expected_exp_lambda1 = numpy.array([(9.+36.+81.)*(1./3.) for k in range(0,4)])
    expected_exp_Sn = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)])
    expected_exp_lambdan = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)])
    expected_exp_taun = (9.+36.+81.)/3.
    expected_exp_Sm = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)])
    expected_exp_lambdam = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)])
    expected_exp_taum = (18.+72.+162.)/3.
    expected_exp_Gl = numpy.array([[(27.+108.+243.)*(2./3.) for k in range(0,4)] for j in range(0,6)])
    expected_exp_taul = (27.+108.+243.)/3.
    
    R0, M0 = numpy.ones((I[E[0]],I[E[1]])), numpy.ones((I[E[0]],I[E[1]]))
    C0, M1 = numpy.ones((I[E[1]],I[E[1]])), numpy.ones((I[E[1]],I[E[1]]))
    D0, M2 = numpy.ones((I[E[1]],J[0])), numpy.ones((I[E[1]],J[0]))
    R, C, D = [(R0,M0,E[0],E[1],1.)], [(C0,M1,E[1],1.)], [(D0,M2,E[1],1.)]
    
    alphatau, betatau = 1., 2.
    alpha0, beta0 = 6., 7.
    lambdaF, lambdaG = 3., 8.
    lambdaSn, lambdaSm = 4., 5.
    priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 
               'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm }
    settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 
                 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows',
                 'ARD' : True, 'element_sparsity': True }    
    
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.iterations = iterations
    HMF.iterations_all_Ft = iterations_all_Ft
    HMF.iterations_all_lambdat = iterations_all_lambdat
    HMF.iterations_all_Sn = iterations_all_Sn
    HMF.iterations_all_lambdan = iterations_all_lambdan
    HMF.iterations_all_taun = iterations_all_taun
    HMF.iterations_all_Sm = iterations_all_Sm
    HMF.iterations_all_lambdam = iterations_all_lambdam
    HMF.iterations_all_taum = iterations_all_taum
    HMF.iterations_all_Gl = iterations_all_Gl
    HMF.iterations_all_taul = iterations_all_taul
    
    exp_F0 = HMF.approx_expectation_Ft(E[0],burn_in,thinning)
    exp_F1 = HMF.approx_expectation_Ft(E[1],burn_in,thinning)
    exp_lambda0 = HMF.approx_expectation_lambdat(E[0],burn_in,thinning)
    exp_lambda1 = HMF.approx_expectation_lambdat(E[1],burn_in,thinning)
    exp_Sn = HMF.approx_expectation_Sn(0,burn_in,thinning)
    exp_lambdan = HMF.approx_expectation_lambdan(0,burn_in,thinning)
    exp_taun = HMF.approx_expectation_taun(0,burn_in,thinning)
    exp_Sm = HMF.approx_expectation_Sm(0,burn_in,thinning)
    exp_lambdam = HMF.approx_expectation_lambdam(0,burn_in,thinning)
    exp_taum = HMF.approx_expectation_taum(0,burn_in,thinning)
    exp_Gl = HMF.approx_expectation_Gl(0,burn_in,thinning)
    exp_taul = HMF.approx_expectation_taul(0,burn_in,thinning)
    
    assert numpy.array_equal(expected_exp_F0,exp_F0)
    assert numpy.array_equal(expected_exp_F1,exp_F1)
    assert numpy.array_equal(expected_exp_lambda0,exp_lambda0)
    assert numpy.array_equal(expected_exp_lambda1,exp_lambda1)
    assert numpy.array_equal(expected_exp_Sn,exp_Sn)
    assert numpy.array_equal(expected_exp_lambdan,exp_lambdan)
    assert expected_exp_taun == exp_taun
    assert numpy.array_equal(expected_exp_Sm,exp_Sm)
    assert numpy.array_equal(expected_exp_lambdam,exp_lambdam)
    assert expected_exp_taum == exp_taum
    assert numpy.array_equal(expected_exp_Gl,exp_Gl)
    assert expected_exp_taul == exp_taul