def run_everything(cluster, num_sigma, red_clump, run_number, location, elem):
    """Return the covariance matrix statistics and KS distances for every element in APOGEE in the desired cluster,
    for every simulation run.  Function also saves all final summary statistics and values of sigma to file.

    Parameters
    ----------
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    num_sigma : int
        Number of simulations to run 
    red_clump : str
        If the red clump stars in rcsample are to be removed, set to True.  If all stars are to be used,
        set to False.
    run_number : int
        Number of the run by which to label files.
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being analyzed.

    Returns
    -------
    D_cov_all : tuple
        All covariance matrix summary statistics for all simulations
    ks_all : tuple
        All KS distances for all simulations
    """

    #Create cluster directory, if doesn't exist already
    cluster_dir = oc.make_directory(cluster)
    #Get APOGEE and spectral data
    apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra(
        cluster, red_clump, location)
    num_elem = 15
    num_stars = len(spectra)

    #Create synthetic spectra for each value of sigma and fit
    sigma_vals = np.random.uniform(0, 0.1, int(num_sigma))
    fake_res = []
    fake_err = []
    y_ax_psm = []
    psm_cdists = []
    fake_nanless_res = []
    final_real_spectra = []
    final_real_spectra_err = []
    for i in range(len(sigma_vals)):
        fake_dat = ABC.psm_data(num_elem, num_stars, apogee_cluster_data,
                                sigma_vals[i], T, cluster, spectra,
                                spectra_errs, run_number, location, elem)
        fake_res.append(fake_dat[0])
        fake_err.append(fake_dat[1])
        y_ax_psm.append(fake_dat[2])
        psm_cdists.append(fake_dat[3])
        fake_nanless_res.append(fake_dat[4])
        final_real_spectra.append(fake_dat[5])
        final_real_spectra_err.append(fake_dat[6])

    #Fit the data
    real_res = []
    real_err = []
    real_nanless_res = []
    real_nanless_err = []
    real_weights = []
    for i in range(len(sigma_vals)):
        real_dat = oc.fit_func(elem,
                               cluster,
                               final_real_spectra[i],
                               final_real_spectra_err[i],
                               T,
                               dat_type='data',
                               run_number=run_number,
                               location=location,
                               sigma_val=None)
        real_res.append(real_dat[0])
        real_err.append(real_dat[1])
        real_nanless_res.append(real_dat[7])
        real_nanless_err.append(real_dat[8])
        real_weights.append(real_dat[11])

    #Get the cumulative distributions for the data
    y_ax_real = []
    real_cdists = []
    for i in range(len(sigma_vals)):
        real_cdist_dat = pp.cum_dist(real_nanless_res[i], real_nanless_err[i])
        y_ax_real.append(real_cdist_dat[0])
        real_cdists.append(real_cdist_dat[1])

    #Calculate summary statistics
    D_cov_all = []
    ks_all = []
    for i in range(len(sigma_vals)):
        D_cov_all.append(
            ABC.d_cov(cluster, real_weights[i], real_res[i], real_err[i],
                      fake_res[i], fake_err[i], num_stars, sigma_vals[i], elem,
                      location, run_number))
        ks_all.append(
            ABC.KS(cluster, y_ax_real[i], real_cdists[i], y_ax_psm[i],
                   psm_cdists[i], sigma_vals[i], elem, location, run_number))
    D_cov_all = np.array(D_cov_all)
    ks_all = np.array(ks_all)

    #Write to file
    timestr = time.strftime(
        "%Y%m%d_%H%M%S")  #Date and time by which to identify file
    name_string = str(cluster).replace(' ',
                                       '')  #Remove spaces from name of cluster
    pid = str(os.getpid())
    if location == 'personal':
        path = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(
            elem
        ) + '/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'
    elif location == 'server':
        path = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(
            elem
        ) + '/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'  #Server path
    file = h5py.File(path, 'w')
    file['D_cov'] = D_cov_all
    file['KS'] = ks_all
    file['sigma'] = sigma_vals
    file.close()

    return D_cov_all, ks_all, sigma_vals
Exemplo n.º 2
0
def run_everything(cluster, num_sigma, red_clump, run_number, location,
                   elem):  ###Function to run the entire algorithm
    """Return the covariance matrix statistics and KS distances for every element in APOGEE in the desired cluster,
    for every simulation run.  Function also saves all final summary statistics and values of sigma to file.

    Parameters
    ----------
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    num_sigma : int
        Number of simulations to run 
    red_clump : str
        If the red clump stars in rcsample are to be removed, set to True.  If all stars are to be used,
        set to False.
    run_number : int
        Number of the run by which to label files.
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being analyzed.

    Returns
    -------
    D_cov_all : tuple
        All covariance matrix summary statistics for all simulations
    ks_all : tuple
        All KS distances for all simulations
    """

    #Create cluster directory, if doesn't exist already
    cluster_dir = oc.make_directory(
        cluster)  ###Make a directory named after the cluster
    #Get APOGEE and spectral data
    apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra(
        cluster, red_clump, location)  ###Get the allStar data and spectra
    num_elem = 15  ###Number of elements in APOGEE
    num_stars = len(spectra)  ###Number of stars in the cluster

    #Create synthetic spectra for each value of sigma and fit
    sigma_vals = np.random.uniform(
        0, 0.1, int(num_sigma)
    )  ###Create an array of sigma values between 0 and 0.1 dex that are randomly drawn from a uniform
    ###distribution, the size of the number of simulations that you want to run at once
    fake_res = []  ###Empty list for the fake residuals
    fake_err = []  ###Empty list for the fake errors
    y_ax_psm = [
    ]  ###Empty list for the y-axis for the fake cumulative distributions
    psm_cdists = []  ###Empty list for the fake cumulative distributions
    fake_nanless_res = [
    ]  ###Empty list for the fake residuals with NaNs removed
    final_real_spectra = [
    ]  ###Empty list for the observed spectra that are masked in the same way as the fake spectra
    final_real_spectra_err = [
    ]  ###Empty list for the observed spectral errors that are masked in the same way as the fake spectra
    for i in range(
            len(sigma_vals)
    ):  ###Iterate through the number of simulations you want to run
        ###Run the psm_data function from ABC.py to get the fake fits, etc.
        fake_dat = ABC.psm_data(num_elem, num_stars, apogee_cluster_data,
                                sigma_vals[i], T, cluster, spectra,
                                spectra_errs, run_number, location, elem)
        fake_res.append(fake_dat[0])  ###Get the fake residuals
        fake_err.append(fake_dat[1])  ###Get the fake errors
        y_ax_psm.append(
            fake_dat[2]
        )  ###Get the y-axis for the fake cumulative distributions
        psm_cdists.append(
            fake_dat[3])  ###Get the fake cumulative distributions
        fake_nanless_res.append(
            fake_dat[4])  ###Get the fake residuals with no NaNs
        final_real_spectra.append(
            fake_dat[5]
        )  ###Get the observed spectra that are masked in the same way as the fake spectra
        final_real_spectra_err.append(
            fake_dat[6]
        )  ###Get the observed spectral errors that are masked in the same way as the fake spectra

    #Fit the data
    real_res = []  ###Empty list for the real residuals
    real_err = []  ###Empty list for the real errors
    real_nanless_res = []  ###Empty list for the real residuals with no NaNs
    real_nanless_err = []  ###Empty list for the real errors with no NaNs
    real_weights = [
    ]  ###Empty list for the weights of the windows for the element
    for i in range(
            len(sigma_vals)):  ###Iterate through the number of simulations
        ###Run the fit_func function from occam_clusters_input.py to get fits for real data, using the observed spectra and errors masked in the same way as the simulations
        real_dat = oc.fit_func(elem,
                               cluster,
                               final_real_spectra[i],
                               final_real_spectra_err[i],
                               T,
                               dat_type='data',
                               run_number=run_number,
                               location=location,
                               sigma_val=None)
        real_res.append(real_dat[0])  ###Get the real residuals
        real_err.append(real_dat[1])  ###Get the real errors
        real_nanless_res.append(
            real_dat[7])  ###Get the real residuals with no NaNs
        real_nanless_err.append(
            real_dat[8])  ###Get the real errors with no NaNs
        real_weights.append(
            real_dat[11])  ###Get the weights of the windows for the element

    #Get the cumulative distributions for the data
    y_ax_real = []  ###Empty list for y-axis for real cumulative distributions
    real_cdists = []  ###Empty list for real cumulative distributions
    for i in range(
            len(sigma_vals)):  ###Iterate through the number of simulations
        real_cdist_dat = pp.cum_dist(
            all_real_nanless_res[i], all_real_nanless_err[i]
        )  ###Compute the cumulative distributions using the cum_dist function from occam_clusters_post_process.py
        y_ax_real.append(
            real_cdist_dat[0]
        )  ###Get the y-axes for the real cumulative distributions
        real_cdists.append(
            real_cdist_dat[1])  ###Get the real cumulative distributions

    #Calculate summary statistics
    D_cov_all = []  ###Empty list for the delta covariance statistics
    ks_all = []  ###Empty list for the KS distance statistics
    for i in range(len(sigma_vals)):  ###Iterate through the simulations
        ###Compute the delta covariance statistics
        D_cov_all.append(
            ABC.d_cov(cluster, real_weights[i], real_res[i], real_err[i],
                      fake_res[i], fake_err[i], num_stars, sigma_vals[i], elem,
                      location, run_number))
        ###Compute the KS distance statistics
        ks_all.append(
            ABC.KS(cluster, y_ax_real[i], real_cdists[i], y_ax_psm[i],
                   psm_cdists[i], sigma_vals[i], elem, location, run_number))
    D_cov_all = np.array(D_cov_all)  ###Make into array
    ks_all = np.array(ks_all)  ###Make into array

    #Write to file
    timestr = time.strftime(
        "%Y%m%d_%H%M%S")  #Date and time by which to identify file
    name_string = str(cluster).replace(' ',
                                       '')  #Remove spaces from name of cluster
    pid = str(os.getpid())  ###PID for file labelling
    if location == 'personal':  ###If running on Mac
        path = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'
    elif location == 'server':  ###If running on server
        path = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str(
            run_number) + '.hdf5'  #Server path
    file = h5py.File(path, 'w')  ###Write file
    file['D_cov'] = D_cov_all
    file['KS'] = ks_all
    file['sigma'] = sigma_vals
    file.close()

    return D_cov_all, ks_all, sigma_vals
    pid = str(os.getpid())
    if location == 'personal':
    	path = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'KS' + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' 
    elif location == 'server':
    	path = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_'  + str(elem) + '_' + 'KS' + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' 
    #If file exists, append to file
    if glob.glob(path):
    	file = h5py.File(path, 'a')
    	grp = file.create_group(str(sigma))
    	grp['KS'] = dist
    	file.close()
    #Else create a new file
    else:
    	file = h5py.File(path, 'w')
    	grp = file.create_group(str(sigma))
    	grp['KS'] = dist
    	file.close()
    return dist
    

if __name__ == '__main__':
	arguments = docopt(__doc__)
	
	apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra(arguments['--cluster'], arguments['--red_clump'], ['--location'])
	num_elem = 15
	num_stars = len(spectra)
	fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err = psm_data(num_elem, num_stars, apogee_cluster_data, arguments['--sigma'], T, arguments['--cluster'], spectra, spectra_errs, run_number, arguments['--location'], arguments['--elem'])
	real_res, real_err, real_points, real_temp, real_a, real_b, real_c, real_nanless_res, real_nanless_err, real_nanless_T, real_nanless_points, real_normed_weights = oc.fit_func(arguments['--elem'], arguments['--cluster'], final_real_spectra, final_real_spectra_err, T, arguments['--dat_type'], run_number, arguments['--location'], arguments['--sigma'])
	y_ax_real, real_cdists = pp.cum_dist(real_nanless_res, real_nanless_err)
	D_cov = d_cov(arguments['--cluster'], real_weights, real_res, real_err, fake_res, fake_err, num_stars, arguments['--sigma'], arguments['--elem'], arguments['--location'], run_number)
	ks = KS(arguments['--cluster'], y_ax_real, real_cdists, y_ax_psm, psm_cdists, arguments['--sigma'], arguments['--elem'], arguments['--location'], run_number)
def psm_data(num_elem, num_stars, apogee_cluster_data, sigma, T, cluster, spectra, spectra_errs, run_number, location, elem):
    """Return the residuals (with and without NaNs), errors (with and without NaNs), cumulative distributions,
    and skipped elements for the simulated spectra.

    This function generates synthetic spectra using PSM and a specified sigma value, then fits the simulated
    spectra in the same way as the data.

    Parameters
    ----------
    num_elem : int
        The number of elements in APOGEE
    num_stars : int
        The number of stars in the desired cluster
    apogee_cluster_data : structured array
        All cluster data from APOGEE
    sigma : float
        The value of sigma to create the simulated spectra
    T : tuple
        Array of floats representing the effective temperature of each star in the cluster
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    spectra : tuple
        Array of floats representing the spectra of the desired cluster
    spectra_errs : tuple
        Array of floats representing the spectral uncertainties of the desired cluster
    run_number : int
        Number of the run by which to label files
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being examined (e.g. 'AL')

    Returns
    -------
    fake_res : tuple
        Array of floats representing the residuals of the quadratic fit 
    fake_err : tuple
        Array of floats representing the spectral errors corresponding to the residuals
    y_ax_psm : tuple
        One-dimensional array containing values from 0 to 1, the same size as cdist
    psm_cdists : tuple
        One-dimensional array containing the sorted, normalized fit residuals
    fake_nanless_res : tuple
        Array of floats representing the residuals of the quadratic fit, with NaNs removed 
        (doesn't return fake_nanless_err because they are the same as real_nanless_err)
    fake_used_elems : tuple
        Array of str representing the elements used in the cluster (some elements may be omitted due to 
        lack of data)
    fake_skipped_elems : tuple
        Array of str representing the elements skipped due to lack of data 
    final_real_spectra : tuple
        Array of observed spectra masked in the same way as the simulated spectra 
    final_real_spectra_err : tuple
        Array of observed spectral errors masked in the same way as the simulated spectra
    """
    
    #Abundances WRT H
    num_elem = 15
    num_stars = len(spectra)
    fe_abundance_dict = {'element': ['C_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE', 'S_FE', 'K_FE', 'CA_FE', 'TI_FE', 'V_FE', 'MN_FE', 'NI_FE', 'FE_H']}
    cluster_xh = np.zeros((num_elem, num_stars))
    for i in range(num_elem):
    	for j in range(num_stars):
    		if fe_abundance_dict['element'][i] == 'FE_H':
    			cluster_xh[i] = apogee_cluster_data['FE_H']
    		else:
    			cluster_xh[i] = apogee_cluster_data[fe_abundance_dict['element'][i]] + apogee_cluster_data['FE_H']
    cluster_avg_abundance = np.mean(cluster_xh, axis=1)
    
    cluster_logg = apogee_cluster_data['LOGG']
    elem_number_dict = {'C': 0,
                       'N': 1,
                       'O': 2,
                       'NA': 3,
                       'MG': 4,
                       'AL': 5,
                       'SI': 6,
                       'S': 7,
                       'K': 8,
                       'CA': 9,
                       'TI': 10,
                       'V': 11,
                       'MN': 12,
                       'FE': 13,
                       'NI': 14}
    cluster_fake_abundance = np.copy(cluster_xh)
    cluster_fake_abundance[elem_number_dict[elem]] = np.random.normal(loc = cluster_avg_abundance[elem_number_dict[elem]], scale = float(sigma), size = num_stars)
    
    cluster_gen_spec = np.zeros((num_stars, 7214))
    for i in range(len(spectra)):
        cluster_gen_spec[i] = psm.generate_spectrum(Teff = T[i]/1000, logg = cluster_logg[i], vturb = psm.vturb, 
                                                   ch = cluster_fake_abundance[elem_number_dict['C']][i], 
                                                   nh = cluster_fake_abundance[elem_number_dict['N']][i], 
                                                   oh = cluster_fake_abundance[elem_number_dict['O']][i],
                                                   nah = cluster_fake_abundance[elem_number_dict['NA']][i], 
                                                   mgh = cluster_fake_abundance[elem_number_dict['MG']][i], 
                                                   alh = cluster_fake_abundance[elem_number_dict['AL']][i], 
                                                   sih = cluster_fake_abundance[elem_number_dict['SI']][i], 
                                                   sh = cluster_fake_abundance[elem_number_dict['S']][i], 
                                                   kh = cluster_fake_abundance[elem_number_dict['K']][i],
                                                   cah = cluster_fake_abundance[elem_number_dict['CA']][i], 
                                                   tih = cluster_fake_abundance[elem_number_dict['TI']][i], 
                                                   vh = cluster_fake_abundance[elem_number_dict['V']][i], 
                                                   mnh = cluster_fake_abundance[elem_number_dict['MN']][i], 
                                                   nih = cluster_fake_abundance[elem_number_dict['NI']][i], 
                                                   feh = cluster_fake_abundance[elem_number_dict['FE']][i], 
                                                   c12c13 = psm.c12c13)

    
    #Mask spectra outside of boundaries of DR12 detectors
    apStar_cluster_gen_spec = toApStarGrid(cluster_gen_spec, dr='12')
    dr12_d1_left = apStarInds['12']['blue'][0]
    dr12_d1_right = apStarInds['12']['blue'][-1]
    dr12_d2_left = apStarInds['12']['green'][0]
    dr12_d2_right = apStarInds['12']['green'][-1]
    dr12_d3_left = apStarInds['12']['red'][0]
    dr12_d3_right = apStarInds['12']['red'][-1]
    for i in range(len(apStar_cluster_gen_spec)):
    	for j in range(len(apStar_cluster_gen_spec.T)):
    		if j < dr12_d1_left or (dr12_d1_right < j < dr12_d2_left) or (dr12_d2_right < j < dr12_d3_left) or j > dr12_d3_right:
    			apStar_cluster_gen_spec[i][j] = np.nan
    
    #Pad psm spectra with zeros to make appropriate size for DR14
    cluster_padded_spec = toAspcapGrid(apStar_cluster_gen_spec, dr='14')

    #Create array of nans to mask the psm in the same way as the spectra
    masked_psm = np.empty_like(spectra)
    masked_psm[:] = np.nan

    #Mask the spectra
    for i in range(len(spectra)):
        for j in range(7514):
            if ~np.isnan(spectra[i][j]):
                masked_psm[i][j] = cluster_padded_spec[i][j]

    #Read in repeats residuals 
    if location == 'personal':
        file = h5py.File('/Users/chloecheng/Personal/repeats_dr14.hdf5', 'r') 
    elif location == 'server':
        file = h5py.File('/geir_data/scr/ccheng/AST425/Personal/repeats_dr14.hdf5', 'r')
    repeat_res = file['residuals'][()]
    file.close()

    #Cut out gaps between detectors for DR14
    repeats_dr14 = toAspcapGrid(repeat_res, dr='14')
    #Calculate 6sigma for repeats
    repeats_mean = np.nanmean(repeats_dr14)
    repeats_std = np.nanstd(repeats_dr14)
    repeats_6sigma_pos = repeats_mean + repeats_std*6
    repeats_6sigma_neg = repeats_mean - repeats_std*6

    #Create fake noise to add to the psm
    selected_repeats = []
    for i in range(0, num_stars): 
        #Select a random star from the repeats residuals by which to multiply the spectra errors
        random_repeat = np.random.choice(np.arange(0, len(repeats_dr14)))
        selected_repeats.append(repeats_dr14[random_repeat])
    selected_repeats = np.array(selected_repeats)

    #Mask individual |repeats| that are > 6sigma
    for i in range(len(selected_repeats)):
    	for j in range(len(selected_repeats.T)):
    		if (selected_repeats[i][j] > repeats_6sigma_pos) or (selected_repeats[i][j] < repeats_6sigma_neg):
    		#if np.abs(selected_repeats[i][j]) > repeats_6sigma_pos:
    			selected_repeats[i][j] = np.nan

    #Multiply the repeats by the spectral errors
    cluster_fake_errs = spectra_errs*selected_repeats
    #Correct the fake errors with zeroes in the same places as the PSM spectra
    cluster_fake_errs[masked_psm == 0] = 0.0

    #Add the noise to the psm 
    noise_fake_spec = masked_psm + cluster_fake_errs
    #Mask the real spectra and spectra errors in the same way as the fake spectra
    masked_real_spectra = np.copy(spectra)
    masked_real_spectra_err = np.copy(spectra_errs)
    masked_real_spectra[np.isnan(noise_fake_spec)] = np.nan
    masked_real_spectra_err[np.isnan(noise_fake_spec)] = np.nan

    #Remove empty spectra - assertion
    final_fake_spec = []
    final_real_spectra = []
    final_real_spectra_err = []
    for i in range(len(noise_fake_spec)):
        if any(noise_fake_spec[i,:] != 0):
            final_fake_spec.append(noise_fake_spec[i])
            final_real_spectra.append(masked_real_spectra[i])
            final_real_spectra_err.append(masked_real_spectra_err[i])
    final_fake_spec = np.array(final_fake_spec)
    final_real_spectra = np.array(final_real_spectra)
    final_real_spectra_err = np.array(final_real_spectra_err)
    
    #Run fitting function on synthetic spectra
    fake_res, fake_err, fake_points, fake_temp, fake_a, fake_b, fake_c, fake_nanless_res, fake_nanless_err, fake_nanless_T, fake_nanless_points, fake_normed_weights = oc.fit_func(elem, cluster, final_fake_spec, final_real_spectra_err, T, dat_type='sim', run_number = run_number, location = location, sigma_val = sigma)
    
    #Cumulative distributions
    y_ax_psm, psm_cdists  = pp.cum_dist(fake_nanless_res, fake_nanless_err)
    return fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err
def psm_data(num_elem, num_stars, apogee_cluster_data, sigma, T, cluster,
             spectra, spectra_errs, run_number, location, elem):
    """Return the residuals (with and without NaNs), errors (with and without NaNs), cumulative distributions,
    and skipped elements for the simulated spectra.

    This function generates synthetic spectra using PSM and a specified sigma value, then fits the simulated
    spectra in the same way as the data.

    Parameters
    ----------
    num_elem : int
        The number of elements in APOGEE
    num_stars : int
        The number of stars in the desired cluster
    apogee_cluster_data : structured array
        All cluster data from APOGEE
    sigma : float
        The value of sigma to create the simulated spectra
    T : tuple
        Array of floats representing the effective temperature of each star in the cluster
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    spectra : tuple
        Array of floats representing the spectra of the desired cluster
    spectra_errs : tuple
        Array of floats representing the spectral uncertainties of the desired cluster
    run_number : int
        Number of the run by which to label files
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being examined (e.g. 'AL')

    Returns
    -------
    fake_res : tuple
        Array of floats representing the residuals of the quadratic fit 
    fake_err : tuple
        Array of floats representing the spectral errors corresponding to the residuals
    y_ax_psm : tuple
        One-dimensional array containing values from 0 to 1, the same size as cdist
    psm_cdists : tuple
        One-dimensional array containing the sorted, normalized fit residuals
    fake_nanless_res : tuple
        Array of floats representing the residuals of the quadratic fit, with NaNs removed 
        (doesn't return fake_nanless_err because they are the same as real_nanless_err)
    fake_used_elems : tuple
        Array of str representing the elements used in the cluster (some elements may be omitted due to 
        lack of data)
    fake_skipped_elems : tuple
        Array of str representing the elements skipped due to lack of data 
    final_real_spectra : tuple
        Array of observed spectra masked in the same way as the simulated spectra 
    final_real_spectra_err : tuple
        Array of observed spectral errors masked in the same way as the simulated spectra
    """

    #Abundances WRT H
    num_elem = 15  ###Number of elements in APOGEE
    num_stars = len(spectra)  ###Number of stars in the cluster
    ###Dictionary for the names of the FE abundances in the allStar file
    fe_abundance_dict = {
        'element': [
            'C_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE', 'S_FE',
            'K_FE', 'CA_FE', 'TI_FE', 'V_FE', 'MN_FE', 'NI_FE', 'FE_H'
        ]
    }
    cluster_xh = np.zeros(
        (num_elem, num_stars
         ))  ###Make an empty array to add all of the cluster abundances to
    for i in range(num_elem):  ###Iterate through the elements
        for j in range(num_stars):  ###Iterate through the stars
            ###Get all of the [X/H] abundances for all of the elements X and all of the stars in the cluster by multiplying each [X/Fe] abundance by [Fe/H]
            cluster_xh[i] = apogee_cluster_data[
                fe_abundance_dict['element'][i]] * apogee_cluster_data['FE_H']
    cluster_avg_abundance = np.mean(
        cluster_xh, axis=1
    )  ###Get the average abundances of all the elements in the cluster (so should be just 1 number for each element)

    cluster_logg = apogee_cluster_data[
        'LOGG']  ###Get the surface gravities of each star from the allStar file
    elem_number_dict = {
        'C':
        0,  ###Create a dictionary to match element names to their order number 
        'N': 1,
        'O': 2,
        'NA': 3,
        'MG': 4,
        'AL': 5,
        'SI': 6,
        'S': 7,
        'K': 8,
        'CA': 9,
        'TI': 10,
        'V': 11,
        'MN': 12,
        'FE': 13,
        'NI': 14
    }
    cluster_fake_abundance = np.copy(
        cluster_xh
    )  ###Create a copy of the array of all abundances in the cluster to use to simulate abundances
    ###Simulate the abundances of the DESIRED ELEMENT (ONE ELEMENT ONLY) by drawing from a random normal distribution centred about the mean of THAT ONE
    ###ELEMENT with a scatter of the chosen value of sigma.  The rest of the abundances in this array will remain the same as the data
    cluster_fake_abundance[elem_number_dict[elem]] = np.random.normal(
        loc=cluster_avg_abundance[elem_number_dict[elem]],
        scale=float(sigma),
        size=num_stars)

    cluster_gen_spec = np.zeros(
        (num_stars, 7214))  ###Create an empty array to add the fake spectra
    for i in range(
            len(spectra)
    ):  ###Iterate through all of the stars - change this to range(num_stars)
        ###Use PSM to make a fake spectrum, using the array of abundances created above (with the element in question varied by the value of sigma and the
        ###remaining values of abundances the same as the data), using the photometric Teffs calculated previously, the logg of each star, the default
        ###PSM vturb value, and the default psm c12c13 value.
        cluster_gen_spec[i] = psm.generate_spectrum(
            Teff=T[i] / 1000,
            logg=cluster_logg[i],
            vturb=psm.vturb,
            ch=cluster_fake_abundance[elem_number_dict['C']][i],
            nh=cluster_fake_abundance[elem_number_dict['N']][i],
            oh=cluster_fake_abundance[elem_number_dict['O']][i],
            nah=cluster_fake_abundance[elem_number_dict['NA']][i],
            mgh=cluster_fake_abundance[elem_number_dict['MG']][i],
            alh=cluster_fake_abundance[elem_number_dict['AL']][i],
            sih=cluster_fake_abundance[elem_number_dict['SI']][i],
            sh=cluster_fake_abundance[elem_number_dict['S']][i],
            kh=cluster_fake_abundance[elem_number_dict['K']][i],
            cah=cluster_fake_abundance[elem_number_dict['CA']][i],
            tih=cluster_fake_abundance[elem_number_dict['TI']][i],
            vh=cluster_fake_abundance[elem_number_dict['V']][i],
            mnh=cluster_fake_abundance[elem_number_dict['MN']][i],
            nih=cluster_fake_abundance[elem_number_dict['NI']][i],
            feh=cluster_fake_abundance[elem_number_dict['FE']][i],
            c12c13=psm.c12c13)

    #Pad psm spectra with zeros to make appropriate size for DR14
    apStar_cluster_gen_spec = toApStarGrid(
        cluster_gen_spec, dr='12')  ###Put the fake spectra onto the DR12 grid
    cluster_padded_spec = toAspcapGrid(
        apStar_cluster_gen_spec, dr='14'
    )  ###Put the fake spectra onto the DR14 grid.  This will pad the spectra with zeroes
    ### to make it the right shape for DR14

    #Create array of nans to mask the psm in the same way as the spectra
    masked_psm = np.empty_like(
        spectra
    )  ###Create an empty array that is the same shape as the spectra
    masked_psm[:] = np.nan  ###Fill the array with NaNs to mask it

    #Mask the spectra
    for i in range(len(
            spectra)):  ###Iterate through the stars - change this to num_stars
        for j in range(
                7514
        ):  ###Iterate through the wavelength range - change to len(spectra.T) or make a variable = 7514 so I can stop hardcoding
            if ~np.isnan(spectra[i][j]
                         ):  ###If the entry in the real spectra is not a NaN
                masked_psm[i][j] = cluster_padded_spec[i][
                    j]  ###Make the value in the masked fake spectra the corresponding entry in the fake spectra

    #Read in repeats residuals
    if location == 'personal':  ###If running on Mac
        file = h5py.File('/Users/chloecheng/Personal/repeats_dr14.hdf5',
                         'r')  ###Path to repeats file that I made
    elif location == 'server':  ###If running on server
        file = h5py.File(
            '/geir_data/scr/ccheng/AST425/Personal/repeats_dr14.hdf5',
            'r')  ###Path to repeats file that I made
    repeat_res = file['residuals'][()]  ###Get the repeats
    file.close()  ###Close the file

    #Cut out gaps between detectors for DR14
    repeats_dr14 = toAspcapGrid(
        repeat_res,
        dr='14')  ###Cut the gaps between the detectors in the repeats
    #Calculate 6sigma for repeats
    repeats_mean = np.nanmean(
        repeats_dr14
    )  ###Get the mean of the repeats, avoiding the masked areas that are NaNs
    repeats_std = np.nanstd(
        repeats_dr14
    )  ###Get the standard deviation of the repeats, avoiding the masked areas that are NaNs
    repeats_6sigma = repeats_mean + repeats_std * 6  ###Get the value for 6 sigma of the repeats from the mean (check that this is correct)

    #Create fake noise to add to the psm
    selected_repeats = [
    ]  ###Empty list to append the random repeats that I will use to multiply the spectral errors
    for i in range(0, num_stars):  ###Iterate through the stars
        #Select a random star from the repeats residuals by which to multiply the spectra errors
        random_repeat = np.random.choice(np.arange(0, len(
            repeats_dr14)))  ###Randomly select one of the stars in the repeats
        selected_repeats.append(
            repeats_dr14[random_repeat]
        )  ###Get all of the repeats residuals for this star
    selected_repeats = np.array(
        selected_repeats)  ###Turn the list into an array

    #Mask individual |repeats| that are > 6sigma
    for i in range(
            len(selected_repeats)
    ):  ###Iterate through the number of selected repeats (number of stars)
        for j in range(len(
                selected_repeats.T)):  ###Iterate through the wavelength range
            if np.abs(
                    selected_repeats[i][j]
            ) > repeats_6sigma:  ###If the absolute value of a one of the entries in the selected repeats is greater than 6sigma
                selected_repeats[i][j] = np.nan  ###Mask it out

    #Multiply the repeats by the spectral errors
    cluster_fake_errs = spectra_errs * selected_repeats  ###Multiply the spectral errors by these randomly selected repeats.  The spectral errors will become masked in the same way as the repeats
    #Pad the fake errors with zeroes in the same places as the PSM spectra
    cluster_fake_errs[
        masked_psm ==
        0] = 0.0  ###Pad the fake errors with zeroes in the same places as the fake spectra from modifying the DR12 wavelength range to fit DR14

    #Add the noise to the psm
    noise_fake_spec = masked_psm + cluster_fake_errs  ###Add the fake errors that I've created to the fake spectra as fake noise to make more realistic
    #Mask the real spectra and spectra errors in the same way as the fake spectra
    masked_real_spectra = np.copy(
        spectra
    )  ###Make a copy of the observed spectra to mask in the same way as the fake spectra
    masked_real_spectra_err = np.copy(
        spectra_errs
    )  ###Make a copy of the observed spectral errors to mask in the same way as the fake spectra
    masked_real_spectra[np.isnan(
        noise_fake_spec
    )] = np.nan  ###Mask the observed spectra in the same way as the fake spectra
    masked_real_spectra_err[np.isnan(
        noise_fake_spec
    )] = np.nan  ###Mask the observed spectral errors in the same way as the fake spectra

    #Remove empty spectra ###I'm not sure if this chunk is necessary but I wrote it in just in case
    final_fake_spec = []  ###Empty list to append the final set of fake spectra
    final_real_spectra = [
    ]  ###Empty list to append the final set of real spectra
    final_real_spectra_err = [
    ]  ###Empty list to append the final set of real spectral errors
    for i in range(len(noise_fake_spec)):  ###Iterate through the fake spectra
        if any(
                noise_fake_spec[i, :] != 0
        ):  ###If there are rows that are not completely filled with zeroes
            final_fake_spec.append(
                noise_fake_spec[i])  ###Append those fake spectra
            final_real_spectra.append(
                masked_real_spectra[i])  ###Append those real spectra
            final_real_spectra_err.append(
                masked_real_spectra_err[i]
            )  ###Append those real spectral errors
    final_fake_spec = np.array(final_fake_spec)  ###Make into array
    final_real_spectra = np.array(final_real_spectra)  ###Make into array
    final_real_spectra_err = np.array(
        final_real_spectra_err)  ###Make into array

    #Run fitting function on synthetic spectra
    fake_res, fake_err, fake_points, fake_temp, fake_a, fake_b, fake_c, fake_nanless_res, fake_nanless_err, fake_nanless_T, fake_nanless_points, fake_normed_weights = oc.fit_func(
        elem,
        cluster,
        final_fake_spec,
        final_real_spectra_err,
        T,
        dat_type='sim',
        run_number=run_number,
        location=location,
        sigma_val=sigma)

    #Cumulative distributions
    y_ax_psm, psm_cdists = pp.cum_dist(fake_nanless_res, fake_nanless_err)
    return fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err