Exemplo n.º 1
0
def validate_once(true_cf = [pl.ones(3)/3.0, pl.ones(3)/3.0], true_std = 0.01*pl.ones(3), std_bias = [1., 1., 1.], save=False, dir='', i=0):
    """
    Generate a set of simulated estimates for the provided true cause fractions; Fit the bad model and 
    the latent simplex model to this simulated data and calculate quality metrics. 
    """ 
    
    # generate simulation data
    X = data.sim_data_for_validation(1000, true_cf, true_std, std_bias)

    # fit bad model, calculate fit metrics 
    bad_model = models.bad_model(X)
    bad_model_metrics = calc_quality_metrics(true_cf, true_std, std_bias, bad_model)
    retrieve_estimates(bad_model, True, 'bad_model', dir, i)
    
    # fit latent simplex model, calculate fit metrics 
    m, latent_simplex = models.fit_latent_simplex(X)
    latent_simplex_metrics = calc_quality_metrics(true_cf, true_std, std_bias, latent_simplex)
    retrieve_estimates(latent_simplex, True, 'latent_simplex', dir, i)
    
    # either write results to disk or return them 
    if save: 
        pl.rec2csv(bad_model_metrics, '%s/metrics_bad_model_%i.csv' % (dir, i)) 
        pl.rec2csv(latent_simplex_metrics, '%s/metrics_latent_simplex_%i.csv' % (dir, i))
    else: 
        return bad_model_metrics, latent_simplex_metrics
Exemplo n.º 2
0
def validate_once(true_cf=[pl.ones(3) / 3.0,
                           pl.ones(3) / 3.0],
                  true_std=0.01 * pl.ones(3),
                  std_bias=[1., 1., 1.],
                  save=False,
                  dir='',
                  i=0):
    """
    Generate a set of simulated estimates for the provided true cause fractions; Fit the bad model and 
    the latent simplex model to this simulated data and calculate quality metrics. 
    """

    # generate simulation data
    X = data.sim_data_for_validation(1000, true_cf, true_std, std_bias)

    # fit bad model, calculate fit metrics
    bad_model = models.bad_model(X)
    bad_model_metrics = calc_quality_metrics(true_cf, true_std, std_bias,
                                             bad_model)
    retrieve_estimates(bad_model, True, 'bad_model', dir, i)

    # fit latent simplex model, calculate fit metrics
    m, latent_simplex = models.fit_latent_simplex(X)
    latent_simplex_metrics = calc_quality_metrics(true_cf, true_std, std_bias,
                                                  latent_simplex)
    retrieve_estimates(latent_simplex, True, 'latent_simplex', dir, i)

    # either write results to disk or return them
    if save:
        pl.rec2csv(bad_model_metrics, '%s/metrics_bad_model_%i.csv' % (dir, i))
        pl.rec2csv(latent_simplex_metrics,
                   '%s/metrics_latent_simplex_%i.csv' % (dir, i))
    else:
        return bad_model_metrics, latent_simplex_metrics
Exemplo n.º 3
0
import pylab as pl 
import scipy.stats.mstats as st

# set parameters 
outdir = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Output'
indir = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data' 
age, iso3, sex = sys.argv[1:4]
full_dir = '%s/v02_prep_%s' % (indir, iso3)

# get cause list 
causes = list(set([file.split('+')[1] for file in os.listdir(full_dir) if re.search(age, file)]))
causes.remove('HIV') # temporary until Miriam fixes the HIV files 

# gather data and fit model 
cf = data.get_cod_data(full_dir, causes, age, iso3, sex)
m, pi = models.fit_latent_simplex(cf) 

# calculate summary measures
N, T, J = pi.shape
mean = pi.mean(0)
lower = pl.array([[st.mquantiles(pi[:,t,j], 0.025)[0] for j in range(J)] for t in range(T)])
upper = pl.array([[st.mquantiles(pi[:,t,j], 0.975)[0] for j in range(J)] for t in range(T)])

# format summary and save
output = pl.np.core.records.fromarrays(mean.T, names=['%s_mean' % c for c in causes])
output = pl.rec_append_fields(output, ['%s_lower' % c for c in causes], lower.T)
output = pl.rec_append_fields(output, ['%s_upper' % c for c in causes], upper.T)
pl.rec2csv(output, '%s/%s+%s+%s+summary.csv' % (outdir, iso3, age, sex))

# format all sims and save 
pi.shape = (N*T, J)
import graphics
import data
import models

import pylab as pl

countries = 'ABW,AFG,AGO,ALB,ANT,ARE,ARG,ARM,AUS,AUT,AZE,BDI,BEL,BEN,BFA,BGD,BGR,BHR,BHS,BIH,BLR,BLZ,BOL,BRA,BRB,BRN,BTN,BWA,CAF,CAN,CHE,CHL,CHN,CIV,CMR,COD,COG,COL,COM,CPV,CRI,CUB,CYP,CZE,DEU,DJI,DNK,DOM,DZA,ECU,EGY,ERI,ESH,ESP,EST,ETH,FIN,FJI,FRA,FSM,GAB,GBR,GEO,GHA,GIN,GLP,GMB,GNB,GNQ,GRC,GRD,GTM,GUF,GUM,GUY,HKG,HND,HRV,HTI,HUN,IDN,IND,IRL,IRN,IRQ,ISL,ISR,ITA,JAM,JOR,JPN,KAZ,KEN,KGZ,KHM,KOR,KWT,LAO,LBN,LBR,LBY,LCA,LKA,LSO,LTU,LUX,LVA,MAC,MAR,MDA,MDG,MDV,MEX,MKD,MLI,MLT,MMR,MNE,MNG,MOZ,MRT,MTQ,MUS,MWI,MYS,NAM,NCL,NER,NGA,NIC,NLD,NOR,NPL,NZL,OMN,PAK,PAN,PER,PHL,PNG,POL,PRI,PRK,PRT,PRY,PSE,PYF,QAT,REU,ROU,RUS,RWA,SAU,SDN,SEN,SGP,SLB,SLE,SLV,SOM,SRB,STP,SUR,SVK,SVN,SWE,SWZ,SYR,TCD,TGO,THA,TJK,TKM,TLS,TON,TTO,TUN,TUR,TZA,UGA,UKR,URY,USA,UZB,VCT,VEN,VIR,VNM,VUT,WSM,YEM,ZAF,ZMB,ZWE'.split(',')
import random
random.shuffle(countries)
for iso3 in countries:
    print iso3
    try:
        F, causes = data.get_cod_data_all_causes(iso3=iso3)
        N, T, J = F.shape
        pi = pl.zeros((1000, T, J))
        for t in range(T):
            print t+1, 'of', T
            model, pi_t = models.fit_latent_simplex(F[:,t:(t+1),:])
            pi[:,t,:] = pi_t[:,0,:]
    except Exception, e:
        print e
        continue

    graphics.plot_F_and_pi(F, pi, causes, iso3)

    pl.savefig('/home/j/Project/Models/cod-correct/%s.png'%iso3)