def get_isd_lik_three_models(dat_list, out_dir = './out_files/', cutoff = 9):
    """Function to obtain the community-level log-likelihood (standardized by the number of individuals)
    
    as well as AICc values for METE, SSNT on D, and SSNT on D**(2/3) and write to files. 
    
    """
    for dat_name in dat_list:
        dat = wk.import_raw_data('./data/' + dat_name + '.csv')
        for site in np.unique(dat['site']):
            dat_site = dat[dat['site'] == site]
            S0 = len(np.unique(dat_site['sp']))
            if S0 > cutoff:
                N0 = len(dat_site)
                dbh_scaled = dat_site['dbh'] / min(dat_site['dbh'])
                psi = mete_distributions.psi_epsilon(S0, N0, sum(dbh_scaled ** 2))
                ssnt_isd = ssnt_isd_bounded(1, N0 / (sum(dbh_scaled) - N0))
                ssnt_isd_transform = ssnt_isd_bounded(2/3, N0 / (sum(dbh_scaled ** (2/3)) - N0))
                
                lik_mete, lik_ssnt, lik_ssnt_transform = 0, 0, 0
                for dbh in dbh_scaled:
                    lik_mete += np.log(psi.pdf(dbh ** 2) * 2 * dbh) # psi is on dbh**2
                    lik_ssnt += np.log(ssnt_isd.pdf(dbh))
                    lik_ssnt_transform += np.log(ssnt_isd_transform.pdf(dbh))
                out1 = open(out_dir + 'isd_lik_three_models.txt', 'a')
                print>>out1, dat_name, site, str(lik_mete / N0), str(lik_ssnt / N0), str(lik_ssnt_transform / N0)
                out1.close()
                
                out2 = open(out_dir + 'isd_aicc_three_models.txt', 'a')
                # METE has three parameters (S0, N0, E0) for ISD, while SSNT has two (N0 and sum(dbh**alpha))
                print>>out2, dat_name, site, str(mtools.AICc(lik_mete, 3, N0)), str(mtools.AICc(lik_ssnt, 2, N0)), \
                     str(mtools.AICc(lik_ssnt_transform, 2, N0))
                out2.close()
def bootstrap_SDR(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to one file on disk for R^2.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    
    par_list = []
    for sp in np.unique(dat_clean['sp']):
        dat_sp = dat_clean[dat_clean['sp'] == sp]
        n = len(dat_sp)
        genus_sp = dat_sp['genus'][0]
        m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp']))
        par_list.append([m, n])
        
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs'] 
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    
    iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    iisd_asne = mete_distributions.theta_epsilon(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled ** 1) - N))
    iisd_ssnt_1 = ssnt_isd_bounded(2/3, N / (sum(dbh_scaled ** (2/3)) - N))
    dist_for_model = {'ssnt_0': iisd_ssnt_0, 'ssnt_1': iisd_ssnt_1, 'asne': iisd_asne, 'agsne': iisd_agsne}
    dist = dist_for_model[model]
        
    for i in range(Niter):
        if model in ['ssnt_0', 'ssnt_1']: obs_boot = np.array([np.mean((dist.rvs(par[1])) ** 2) for par in par_list]) # Here par[1] is n for each species
        elif model == 'asne': 
            obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list])
        else:
            obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1], par[0]))) for par in par_list])
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
    
    wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version = 'untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 
                      'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 
                      'asne': md.trunc_logser(np.exp(-beta_asne), N),
                      'agsne': sad_agsne}
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]
    
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))]
    
    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))
    
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
def get_isd_lik_three_models(dat_list, out_dir='./out_files/', cutoff=9):
    """Function to obtain the community-level log-likelihood (standardized by the number of individuals)
    
    as well as AICc values for METE, SSNT on D, and SSNT on D**(2/3) and write to files. 
    
    """
    for dat_name in dat_list:
        dat = wk.import_raw_data('./data/' + dat_name + '.csv')
        for site in np.unique(dat['site']):
            dat_site = dat[dat['site'] == site]
            S0 = len(np.unique(dat_site['sp']))
            if S0 > cutoff:
                N0 = len(dat_site)
                dbh_scaled = dat_site['dbh'] / min(dat_site['dbh'])
                psi = mete_distributions.psi_epsilon(S0, N0,
                                                     sum(dbh_scaled**2))
                ssnt_isd = ssnt_isd_bounded(1, N0 / (sum(dbh_scaled) - N0))
                ssnt_isd_transform = ssnt_isd_bounded(
                    2 / 3, N0 / (sum(dbh_scaled**(2 / 3)) - N0))

                lik_mete, lik_ssnt, lik_ssnt_transform = 0, 0, 0
                for dbh in dbh_scaled:
                    lik_mete += np.log(psi.pdf(dbh**2) * 2 *
                                       dbh)  # psi is on dbh**2
                    lik_ssnt += np.log(ssnt_isd.pdf(dbh))
                    lik_ssnt_transform += np.log(ssnt_isd_transform.pdf(dbh))
                out1 = open(out_dir + 'isd_lik_three_models.txt', 'a')
                print >> out1, dat_name, site, str(lik_mete / N0), str(
                    lik_ssnt / N0), str(lik_ssnt_transform / N0)
                out1.close()

                out2 = open(out_dir + 'isd_aicc_three_models.txt', 'a')
                # METE has three parameters (S0, N0, E0) for ISD, while SSNT has two (N0 and sum(dbh**alpha))
                print>>out2, dat_name, site, str(mtools.AICc(lik_mete, 3, N0)), str(mtools.AICc(lik_ssnt, 2, N0)), \
                     str(mtools.AICc(lik_ssnt_transform, 2, N0))
                out2.close()
    "NC",
    "Oosting",
    "Serimbu",
    "WesternGhats",
    "Cocoli",
    "Luquillo",
    "Sherman",
    "Shirakami",
]

# Obtain predicted-observed values for the three patterns from the four models
dat_list_keep = []
dat_site_list = []
model_list = ["ssnt_0", "ssnt_1", "asne", "agsne"]
for dat_name in dat_list:
    dat = wk.import_raw_data("./data/" + dat_name + ".csv")
    for site in np.unique(dat["site"]):
        dat_site = dat[dat["site"] == site]
        dat_clean = smc.clean_data_agsne(dat_site)
        if dat_clean is not None:
            dat_list_keep.append(dat_name)
            dat_site_list.append([dat_name, site])
            smc.get_lik_sp_abd_dbh_four_models(dat_clean, dat_name)

            for model in model_list:
                if model is "ssnt_0":
                    smc.get_obs_pred_sad(dat_clean, dat_name, "ssnt")
                elif model in ["asne", "agsne"]:
                    smc.get_obs_pred_sad(dat_clean, dat_name, model)
                smc.get_obs_pred_isd(dat_clean, dat_name, model)
                smc.get_obs_pred_sdr(dat_clean, dat_name, model)
Exemplo n.º 6
0
import working_functions as wk
import mete_distributions as medis
import numpy as np
import multiprocessing

dat_list = [
    'ACA', 'BCI', 'BVSF', 'CSIRO', 'FERP', 'Lahei', 'LaSelva', 'NC', 'Oosting',
    'Serimbu', 'WesternGhats', 'Cocoli', 'Luquillo', 'Sherman', 'Shirakami'
]

# Obtain predicted-observed values for the three patterns from the four models
dat_list_keep = []
dat_site_list = []
model_list = ['ssnt_0', 'ssnt_1', 'asne', 'agsne']
for dat_name in dat_list:
    dat = wk.import_raw_data('./data/' + dat_name + '.csv')
    for site in np.unique(dat['site']):
        dat_site = dat[dat['site'] == site]
        dat_clean = smc.clean_data_agsne(dat_site)
        if dat_clean is not None:
            dat_list_keep.append(dat_name)
            dat_site_list.append([dat_name, site])
            smc.get_lik_sp_abd_dbh_four_models(dat_clean, dat_name)

            for model in model_list:
                if model is 'ssnt_0':
                    smc.get_obs_pred_sad(dat_clean, dat_name, 'ssnt')
                elif model in ['asne', 'agsne']:
                    smc.get_obs_pred_sad(dat_clean, dat_name, model)
                smc.get_obs_pred_isd(dat_clean, dat_name, model)
                smc.get_obs_pred_sdr(dat_clean, dat_name, model)
def bootstrap_ISD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    isd_agsne = mete_distributions.psi_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    isd_asne = mete_distributions.psi_epsilon_approx(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    isd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled ** 1) - N))
    isd_ssnt_1 = ssnt_isd_bounded(2/3, N / (sum(dbh_scaled ** (2/3)) - N))
    dist_for_model = {'ssnt_0': isd_ssnt_0, 'ssnt_1': isd_ssnt_1, 'asne': isd_asne, 'agsne': isd_agsne}
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_isd_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']
    
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare), new_line = False)
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks), new_line = False)
    
    num_pools = 8  # Assuming that 8 pools are to be created
    for i in xrange(Niter):
        obs_boot = []
        cdf_boot = []
        while len(obs_boot) < N:
            pool = multiprocessing.Pool(num_pools)
            out_sample = pool.map(wk.generate_isd_sample, [dist for j in xrange(num_pools)])
            for combo in out_sample:
                cdf_sublist, sample_sublist = combo
                obs_boot.extend(sample_sublist)
                cdf_boot.extend(cdf_sublist)
            pool.close()
            pool.join()
        if model in ['asne', 'agsne']: obs_boot = np.sort(obs_boot[:N]) ** 0.5 # Convert to diameter
        else: obs_boot = np.sort(obs_boot[:N])
        sample_rsquare = mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))
        sample_ks = max(abs(emp_cdf - np.sort(cdf_boot[:N])))
        
        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', "".join([',', str(sample_rsquare)]), new_line = False)
        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', "".join([',', str(sample_ks)]), new_line = False)
    
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
def bootstrap_SDR(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to one file on disk for R^2.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)

    par_list = []
    for sp in np.unique(dat_clean['sp']):
        dat_sp = dat_clean[dat_clean['sp'] == sp]
        n = len(dat_sp)
        genus_sp = dat_sp['genus'][0]
        m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp']))
        par_list.append([m, n])

    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']
    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]

    iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    iisd_asne = mete_distributions.theta_epsilon(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled**1) - N))
    iisd_ssnt_1 = ssnt_isd_bounded(2 / 3, N / (sum(dbh_scaled**(2 / 3)) - N))
    dist_for_model = {
        'ssnt_0': iisd_ssnt_0,
        'ssnt_1': iisd_ssnt_1,
        'asne': iisd_asne,
        'agsne': iisd_agsne
    }
    dist = dist_for_model[model]

    for i in range(Niter):
        if model in ['ssnt_0', 'ssnt_1']:
            obs_boot = np.array([
                np.mean((dist.rvs(par[1]))**2) for par in par_list
            ])  # Here par[1] is n for each species
        elif model == 'asne':
            obs_boot = np.array([
                np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list
            ])
        else:
            obs_boot = np.array([
                np.mean(np.array(dist.rvs(par[1], par[1], par[0])))
                for par in par_list
            ])
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))

    wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))
def bootstrap_ISD(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    isd_agsne = mete_distributions.psi_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    isd_asne = mete_distributions.psi_epsilon_approx(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    isd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled**1) - N))
    isd_ssnt_1 = ssnt_isd_bounded(2 / 3, N / (sum(dbh_scaled**(2 / 3)) - N))
    dist_for_model = {
        'ssnt_0': isd_ssnt_0,
        'ssnt_1': isd_ssnt_1,
        'asne': isd_asne,
        'agsne': isd_agsne
    }
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_isd_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']

    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare),
                     new_line=False)
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [
        dat_name, site,
        str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))
    ]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks),
                     new_line=False)

    num_pools = 8  # Assuming that 8 pools are to be created
    for i in xrange(Niter):
        obs_boot = []
        cdf_boot = []
        while len(obs_boot) < N:
            pool = multiprocessing.Pool(num_pools)
            out_sample = pool.map(wk.generate_isd_sample,
                                  [dist for j in xrange(num_pools)])
            for combo in out_sample:
                cdf_sublist, sample_sublist = combo
                obs_boot.extend(sample_sublist)
                cdf_boot.extend(cdf_sublist)
            pool.close()
            pool.join()
        if model in ['asne', 'agsne']:
            obs_boot = np.sort(obs_boot[:N])**0.5  # Convert to diameter
        else:
            obs_boot = np.sort(obs_boot[:N])
        sample_rsquare = mtools.obs_pred_rsquare(np.log10(obs_boot),
                                                 np.log10(pred))
        sample_ks = max(abs(emp_cdf - np.sort(cdf_boot[:N])))

        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt',
                         "".join([',', str(sample_rsquare)]),
                         new_line=False)
        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt',
                         "".join([',', str(sample_ks)]),
                         new_line=False)

    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks))
def bootstrap_SAD(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version='untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    dist_for_model = {
        'ssnt_0': stats.logser(np.exp(-beta_ssnt)),
        'ssnt_1': stats.logser(np.exp(-beta_ssnt)),
        'asne': md.trunc_logser(np.exp(-beta_asne), N),
        'agsne': sad_agsne
    }
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]

    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [
        dat_name, site,
        str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))
    ]

    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))

    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks))