Exemplo n.º 1
0
def get_apogee(dr='16',use_astroNN=True):
    """

    dr:         dr to select.
    astronn:    boolean flag for whether to use astroNN abundances, defaults
                to ASPCAP abundances or not.

    Returns APOGEE allStar file without duplicates.
    """
    dr = str(dr)
    # Change to relevant data release
    change_dr(dr)

    # Only use astroNN values if they are available for this data release.
    if use_astroNN:
        if int(dr)<14:
            use_astroNN=False
        elif int(dr)>=14:
            allStar=apread.allStar(rmcommissioning=True,
                                   rmdups=False,
                                   use_astroNN=True)
    if not use_astroNN:
        allStar=apread.allStar(rmcommissioning=True,
                               rmdups=False,
                               use_astroNN=False)
    # Manually remove duplicates
    apids,inds=np.unique(allStar['APOGEE_ID'],return_index=True)
    return allStar[inds]
def fit_func(elem,
             name,
             spectra,
             spectra_errs,
             T,
             dat_type,
             run_number,
             location,
             sigma_val=None):  ###Fitting function
    """Return fit residuals from quadratic fit, spectral errors for desired element, fluxes for desired element,
    an appropriately-sized array of effective temperatures, the quadratic fitting parameters, the residuals, 
    errors, temperatures, and fluxes with NaNs removed, and the normalized elemental weights.
    
    Functions:
        Reads in the DR14 windows.
        Obtains the indices of pixels of the absorption lines and saves the flux value and uncertainty for 
        each star in these pixels.
        Performs the quadratic fit on each pixel using weight_lsq() and computes the residuals using residuals().
        Obtains the flux values, uncertainties, fits, residuals, and temperatures with NaNs removed.
        Writes the residuals and fit parameters to .hdf5 files.
    
    Parameters
    ----------
    elem : str
    	Element name (i.e. 'AL')
    name : str
    	Name of desired cluster (i.e. 'NGC 2682')
    spectra : tuple
    	Array of floats representing the spectra of the desired cluster
    spectra_errs : tuple
    	Array of floats representing the spectral uncertainties of the desired cluster
    T : tuple
    	Array of floats representing the effective temperature of each star in the cluster
    dat_type : str
    	Indicates whether the data being examined is the data or a simulation
     run_number : int
		Number of the run by which to label files
	location : str
		If running locally, set to 'personal'.  If running on the server, set to 'server'.
    sigma_val : float, optional
    	Indicates the value of sigma being used for the simulation in question, if applicable (default is None)

    Returns
    -------
    elem_res : tuple
    	Array of floats representing the fit residuals, with original positioning of points maintained
    final_err : tuple
    	Array of floats representing the spectral uncertainties from the lines of the desired element,
    	with original positioning of points maintained
    final_points : tuple
    	Array of floats representing the fluxes from the lines of the desired element, with original 
    	positioning of points maintained
    temp_array : tuple
    	Array of floats representing the effective temperature of each star in the cluster, with a row for
    	each pixel of the desired element
    elem_a : tuple
    	Array of floats representing the fitting parameters for the quadratic terms in the fits for each pixel of
    	the desired element
    elem_b : tuple
    	Array of floats representing the fitting parameters for the linear terms in the fits for each pixel of
    	the desired element
    elem_c : tuple
    	Array of floats representing the fitting parameters for the constant terms in the fits for each pixel of
    	the desired element
    nanless_res : tuple
    	Array of floats representing the fit residuals, with NaNs removed
    nanless_T : tuple
    	Array of floats representing the effective temperature of each star in the cluster, with a row for 
    	each pixel of the desired element, with NaNs removed
    nanless_points : tuple
    	Array of floats representing the fluxes from the lines of the desired element, with NaNs removed
    normed_weights : tuple
    	Array of floats representing the weight of each elemental window, normalized to 1
    """

    change_dr('12')  ###Switch data-release to 12
    #Find the DR14 windows from the DR12 windows
    dr12_elem_windows = window.read(
        elem)  ###Read in the DR12 windows for the element in question
    change_dr('14')  ###Switch back to DR14
    dr14_elem_windows_12 = np.concatenate(
        (dr12_elem_windows[246:3274], dr12_elem_windows[3585:6080],
         dr12_elem_windows[6344:8335]
         ))  ###Fit the dr12 windows to dr14 ("hacked" dr14 windows)
    normalized_dr14_elem_windows_12 = (
        dr14_elem_windows_12 - np.nanmin(dr14_elem_windows_12)) / (
            np.nanmax(dr14_elem_windows_12) - np.nanmin(dr14_elem_windows_12)
        )  ###Normalize the hacked dr14 windows to 1

    #Get the indices of the lines
    ind_12 = np.argwhere(
        normalized_dr14_elem_windows_12 > 0
    )  ###Get the indices of all of the pixels of the absorption lines of the element in question
    ind_12 = ind_12.flatten(
    )  ###Get rid of the extra dimension produced by np.argwhere

    #Get the fluxes and errors from spectra
    len_spectra = len(spectra)  ###Number of stars
    elem_points_12 = np.zeros(
        (len(ind_12), len_spectra)
    )  ###Array for values of the points in the spectra that are at the indices of the elemental lines in DR12
    elem_err_12 = np.zeros(
        (len(ind_12), len_spectra)
    )  ###Array for values of the errors in the spectra that are at the indices of the elemental lines in DR12
    for i in range(0,
                   len(ind_12)):  ###Iterate through the DR12 elemental indices
        for j in range(0, len_spectra):  ###Iterate through the stars
            elem_points_12[i][j] = spectra[j][ind_12[
                i]]  ###Get the values of the points in the spectra at these indices in DR12
            elem_err_12[i][j] = spectra_errs[j][ind_12[
                i]]  #APOGEE measured errors ###Get the values of the errors in the spectra at these indices in DR12

    #Use only pixels with more than 5 points
    final_points_12 = []  ###Empty list for the final DR12 points
    final_err_12 = []  ###Empty list for the final DR12 errors
    final_inds_12 = []  ###Empty list for the final DR12 elemental line indices
    for i in range(
            len(elem_points_12)):  ###Iterate through the DR12 flux points
        if np.count_nonzero(
                ~np.isnan(elem_points_12[i])
        ) >= 5:  ###If the number of points in each pixel that are not NaNs is greater than or equal to 5
            final_points_12.append(elem_points_12[i])  ###Append those points
            final_err_12.append(elem_err_12[i])  ###Append those errors
            final_inds_12.append(ind_12[i])  ###Append those indices
    final_points_12 = np.array(final_points_12)  ###Make into array
    final_err_12 = np.array(final_err_12)  ###Make into array
    final_inds_12 = np.array(final_inds_12)  ###Make into array
    if len(
            final_inds_12
    ) == 0:  ###If no indices are left (i.e. if there are less than 5 points in every pixel)
        print('Warning: less than 5 points for every pixel, skipping ',
              elem)  ###Skip and don't finish this element
    else:  ###If there are enough points left
        dr12_weights = normalized_dr14_elem_windows_12[
            final_inds_12]  ###Get all of the weights of the elemental pixels for DR12
        sorted_dr12_weights = np.sort(
            dr12_weights)  ###Sort these weights from smallest to largest

        #Get windows
        if location == 'personal':  ###If running on Mac
            window_file = pd.read_hdf(
                '/Users/chloecheng/Personal/dr14_windows.hdf5',
                'window_df')  ###Get file I made for DR14 windows
        elif location == 'server':  ###If running on the server
            window_file = pd.read_hdf(
                '/geir_data/scr/ccheng/AST425/Personal/dr14_windows.hdf5',
                'window_df')  ###Get file I made for DR14 windows

        dr14_elem_windows_14 = window_file[
            elem].values  ###Get the DR14 windows for the element in question
        normalized_dr14_elem_windows_14 = (
            dr14_elem_windows_14 - np.min(dr14_elem_windows_14)) / (
                np.max(dr14_elem_windows_14) - np.min(dr14_elem_windows_14)
            )  ###Normalize these windows to 1

        #Get the indices of the lines
        if elem == 'C' or elem == 'N' or elem == 'FE':  ###If we're looking at one of the elements with order ~1000 pixels
            ind = np.argwhere(
                normalized_dr14_elem_windows_14 > np.min(
                    sorted_dr12_weights[int(len(sorted_dr12_weights) * 0.7):])
            )  ###Get rid of the smallest 70% of the DR12 pixels
        else:  ###For all of the other elements
            ind = np.argwhere(
                normalized_dr14_elem_windows_14 > 0)  ###Get all of the pixels
        ind = ind.flatten(
        )  ###Get rid of the extra dimension from argwhere (try to streamline this)

        #Get the fluxes and errors from spectra
        #Limits of DR12 detectors
        dr12_d1_left = 322  ###Left limit of detector 1
        dr12_d1_right = 3242  ###Right limit of detector 1
        dr12_d2_left = 3648  ###Left limit of detector 2
        dr12_d2_right = 6048  ###Right limit of detector 2
        dr12_d3_left = 6412  ###Left limit of detector 3
        dr12_d3_right = 8306  ###Right limit of detector 3

        elem_points = np.zeros(
            (len(ind), len_spectra)
        )  ###Make an empty array to hold the values of the spectra at the elemental indices
        elem_err = np.zeros(
            (len(ind), len_spectra)
        )  ###Make an empty array to hold the values of the spectral errors at the elemental indices
        for i in range(0, len(ind)):  ###Iterate through the elemental indices
            for j in range(
                    0, len_spectra):  ###Iterate through the number of stars
                ###If the indices are outside of the bounds of the DR12 detectors (these bounds should be right)
                if ind[i] < dr12_d1_left or (
                        dr12_d1_right < ind[i] < dr12_d2_left) or (
                            dr12_d2_right < ind[i] <
                            dr12_d3_left) or ind[i] > dr12_d3_right:
                    elem_points[i][
                        j] = np.nan  ###Set the point to NaN and ignore
                    elem_err[i][j] = np.nan  ###Set the error to NaN and ignore
                else:  ###If the indices are within the bounds of the DR12 detectors
                    elem_points[i][j] = spectra[j][
                        ind[i]]  ###Get the corresponding point in the spectra
                    elem_err[i][j] = spectra_errs[j][ind[
                        i]]  #APOGEE measured errors ###Get the corresponding point in the spectral errors

        #Use only pixels with more than 5 points
        final_points = [
        ]  ###Make an array for the final set of spectral points
        final_err = [
        ]  ###Make an array for the final set of spectral error points
        final_inds = [
        ]  ###Make an array for the final set of elemental indices
        for i in range(len(
                elem_points)):  ###Iterate through the points we just obtained
            if np.count_nonzero(
                    ~np.isnan(elem_points[i])
            ) >= 5:  ###If the number of non-NaNs in the pixel is greater than or equal to 5
                final_points.append(elem_points[i])  ###Append the points
                final_err.append(elem_err[i])  ###Append the errors
                final_inds.append(ind[i])  ###Append the indices
        final_points = np.array(final_points)  ###Make into array
        final_err = np.array(final_err)  ###Make into array
        final_inds = np.array(final_inds)  ###Make into array

        if len(final_points) == 0:  ###If all pixels have less than 5 points
            print('Warning: less than 5 points for every pixel, skipping ',
                  elem)  ###Skip the element and end here
        else:  ###If there are some pixels remaining

            #Create an appropriately-sized array of temperatures to mask as well
            temp_array = np.full(
                (final_points.shape), T
            )  ###Create an array of the temperatures that is the same size as the spectral points, but each row is the same set of temperatures
            for i in range(
                    0,
                    len(final_points)):  ###Iterate through the spectral points
                for j in range(0, len_spectra):  ###Iterate through the stars
                    if np.isnan(final_points[i][j]):  ###If the point is a NaN
                        temp_array[i][
                            j] = np.nan  ###Mask the corresponding point in the temperature array

            #Do fits with non-nan numbers
            nanless_inds = np.isfinite(
                final_points)  ###Get the indices of the non-NaN points
            fits = []  ###Create an empty list for the fit parameters
            for i in range(
                    len(final_points)):  ###Iterate through the spectral points
                fits.append(
                    weight_lsq(final_points[i][nanless_inds[i]],
                               temp_array[i][nanless_inds[i]],
                               final_err[i][nanless_inds[i]])
                )  ###Fit using weight_lsq function and all points that are not NaNs
            for i in range(len(fits)):  ###Iterate through the fits
                fits[i] = np.array(
                    fits[i])  ###Make each sub-list into an array
            fits = np.array(fits)  ###Make the whole list into an array
            ###Check the order of these as well - I think it should be fine if you just change the order in weight_lsq bc still return a, b, c in the order 0, 1, 2
            elem_a = fits[:, 0]  ###Get the a-parameter
            elem_b = fits[:, 1]  ###Get the b-parameter
            elem_c = fits[:, 2]  ###Get the c-parameter

            elem_fits = np.zeros_like(
                final_points)  ###Create an array to save the actual fits
            for i in range(0,
                           len(final_points)):  ###Iterate through the points
                elem_fits[i] = elem_a[i] * temp_array[i]**2 + elem_b[
                    i] * temp_array[i] + elem_c[i]  ###Fit quadratically

            #Calculate residuals
            elem_res = residuals(final_points,
                                 elem_fits)  ###Calculate the fit residuals

            #Remove nans from fits, residuals, errors, and temperatures for plotting and cumulative distribution
            #calculation purposes
            nanless_fits = []  ###Create an empty list for the nanless fits
            nanless_res = [
            ]  ###Create an empty list for the nanless fit residuals
            nanless_err = []  ###Create an empty list for the nanless errors
            nanless_T = [
            ]  ###Create an empty list for the nanless temperatures
            nanless_points = [
            ]  ###Create an empty list for the nanless spectral points
            for i in range(
                    len(final_points)):  ###Iterate through the spectral points
                nanless_fits.append(
                    elem_fits[i][nanless_inds[i]])  ###Append the nanless fits
                nanless_res.append(elem_res[i][
                    nanless_inds[i]])  ###Append the nanless residuals
                nanless_err.append(final_err[i][
                    nanless_inds[i]])  ###Append the nanless errors
                nanless_T.append(temp_array[i][
                    nanless_inds[i]])  ###Append the nanless temperatures
                nanless_points.append(final_points[i][
                    nanless_inds[i]])  ###Append the nanless points
            for i in range(
                    len(final_points)):  ###Turn all sub lists into arrays
                nanless_fits[i] = np.array(nanless_fits[i])
                nanless_res[i] = np.array(nanless_res[i])
                nanless_err[i] = np.array(nanless_err[i])
                nanless_T[i] = np.array(nanless_T[i])
                nanless_points[i] = np.array(nanless_points[i])
            nanless_fits = np.array(
                nanless_fits)  ###Turn all lists into arrays
            nanless_res = np.array(nanless_res)
            nanless_err = np.array(nanless_err)
            nanless_T = np.array(nanless_T)
            nanless_points = np.array(nanless_points)

            #Get the weights for later
            weights = normalized_dr14_elem_windows_14[
                final_inds]  ###Get the weights fo the DR14 lines that we're using
            normed_weights = weights / np.sum(
                weights)  ###Normalize the weights

            #File-saving
            #If we are looking at the data
            timestr = time.strftime("%Y%m%d_%H%M%S")  ###date_time string
            name_string = str(name).replace(' ',
                                            '')  ###cluster name, remove space
            pid = str(os.getpid())  ###PID string
            if sigma_val == None:  ###IF we are looking at the data
                if location == 'personal':  ###If running on Mac
                    path_dat = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + str(
                        elem) + '_' + 'fit_res' + '_' + str(
                            dat_type) + '_' + timestr + '_' + pid + '_' + str(
                                run_number) + '.hdf5'  ###Use this path
                elif location == 'server':  ###If running on server
                    path_dat = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + str(
                        elem) + '_' + 'fit_res' + '_' + str(
                            dat_type) + '_' + timestr + '_' + pid + '_' + str(
                                run_number) + '.hdf5'  ###Use this path

                #If the file exists, output the desired variables
                if glob.glob(
                        path_dat
                ):  ###If the file already exists, don't write anything
                    return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
                #If the file does not exist, create file and output the desired variables
                else:  ###If the file does not exist, write all fitting information
                    file = h5py.File(path_dat, 'w')
                    file['points'] = final_points
                    file['residuals'] = elem_res
                    file['err_200'] = final_err
                    file['a_param'] = elem_a
                    file['b_param'] = elem_b
                    file['c_param'] = elem_c
                    file.close()
                    return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
            #If we are looking at simulations
            else:  ###If we are looking at a simulation
                if location == 'personal':  ###If running from Mac
                    path_sim = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + str(
                        elem) + '_' + 'fit_res' + '_' + str(
                            dat_type) + '_' + timestr + '_' + pid + '_' + str(
                                run_number) + '.hdf5'  ###Use this path
                elif location == 'server':  ###If running on server
                    path_sim = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + str(
                        elem) + '_' + 'fit_res' + '_' + str(
                            dat_type) + '_' + timestr + '_' + pid + '_' + str(
                                run_number) + '.hdf5'  ###Use this path

                #If the file exists, append to the file
                if glob.glob(path_sim):  ###If the file exists
                    file = h5py.File(path_sim, 'a')  ###Append to the file
                    #If the group for the particular value of sigma exists, don't do anything
                    if glob.glob(
                            str(sigma_val)
                    ):  ###If this value of sigma has already been tested, don't write anything
                        file.close()
                    #If not, append a new group to the file for the particular value of sigma
                    else:  ###If it has not been tested, write all fitting information to a group named after the value of sigma
                        grp = file.create_group(str(sigma_val))
                        grp['points'] = final_points
                        grp['residuals'] = elem_res
                        grp['err_200'] = final_err
                        grp['a_param'] = elem_a
                        grp['b_param'] = elem_b
                        grp['c_param'] = elem_c
                        file.close()
                #If the file does not exist, create a new file
                else:  ###If the file does not exist, write all of the fitting information to a group named after the value of sigma
                    file = h5py.File(path_sim, 'w')
                    grp = file.create_group(str(sigma_val))
                    grp['points'] = final_points
                    grp['residuals'] = elem_res
                    grp['err_200'] = final_err
                    grp['a_param'] = elem_a
                    grp['b_param'] = elem_b
                    grp['c_param'] = elem_c
                    file.close()
                return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
-h  Help file
--cluster=<arg>  Cluster name
--red_clump=<arg> Whether to exclude red clump stars in rcsample or not
--element=<arg>  Element name
--type=<arg>  Data type 
--location=<arg> Machine where the code is being run

"""

#Imports
#apogee package
import apogee.tools.read as apread  ###Read in the allStar data
from apogee.tools.path import change_dr  ###Change the data-release
from apogee.spec import window  ###Windows in fitting function for DR12
from apogee.tools.read import rcsample  ###Remove red clumps
change_dr('14')  #use DR14 ###Change to DR14
#astropy helper functions
import astropy.io.fits as afits  ###Read OCCAM file
#basic math and plotting
from docopt import docopt  ###Run with docopt from terminal
import numpy as np  ###Numpy
import pandas as pd  ###Read DR14 windows file
import h5py  ###Read/write files
import glob  ###See whether file/directory exists or not
import matplotlib.pyplot as plt  ###Plot for weighted least-squares function if matrix is singular
import os  ###Make directory for cluster
import time  ###Label files with date and time

fs = 16  ###Plot fontsize
plt.rc('font', family='serif', size=fs)  ###Plot fonts
Exemplo n.º 4
0
def pixels_cannon(*args, **kwargs):
    """
    NAME:
       pixels_cannon
    PURPOSE:
       determine continuum pixels using a Cannon-like technique (Ness et al. 2015)
    INPUT:
       Either:
        a) Input for running the apogee.spec.cannon:
          spec - spectra to fit (nspec,nlambda)
          specerrs - errors on the spectra (nspec,nlambda); assume no covariances
          label1, label2, ... - labels (nspec); best to subtract reference values before running this
          type= ('lin') type of Cannon to run:
             'lin' - linear Cannon
             'quad' - quadratic Cannon
        b) Output from a previous Cannon run:
          coefficients - coefficients from the fit (ncoeffs,nlambda)
          scatter - scatter from the fit (nlambda)
    KEYWORDS:
       baseline_dev= (0.015) maximum deviation from baseline
       label1_max= (10.**-5.) maximum deviation in first linear coefficient
       label2_max= (0.006) similar for the second
       label3_max= (0.012) similar for the third
       labelN_max= same with default 0.03
       ...
       scatter_max= (0.015) maximum scatter of residuals
       dr= (module-wide default) data release
    OUTPUT:
       Boolean index into the wavelength range with True for continuum pixels
    HISTORY:
       2015-02-05 - Written - Bovy (IAS@KITP)
    """
    # Grab kwargs
    type = kwargs.pop('type', 'lin')
    dr = kwargs.pop('dr', path._default_dr())
    # Parse input
    if len(args) == 0:  # Use default fit
        from apogee.spec._train_cannon import load_fit
        coeffs, scatter, baseline_labels = load_fit()
        type = 'quad'
    else:
        spec = args[0]
        specerr = args[1]
        # Determine the type of input
        if len(specerr.shape) == 2:
            # Run the Cannon
            if type.lower() == 'lin':
                coeffs, scatter = cannon.linfit(*args)
            elif type.lower() == 'quad':
                coeffs, scatter = cannon.quadfit(*args)
        else:
            coeffs = spec
            scatter = specerr
    ncoeffs = coeffs.shape[0]
    if type.lower() == 'lin':
        nlabels = ncoeffs - 1
    elif type.lower() == 'quad':
        nlabels = int((-3 + numpy.sqrt(9 + 8 * (ncoeffs - 1)))) // 2
    # Determine continuum pixels
    out = numpy.ones(len(scatter), dtype='bool')
    # Deviation from baseline
    out[numpy.fabs(coeffs[0] - 1.) > kwargs.get('baseline_dev', 0.015)] = False
    # Large dependence on labels
    maxs = numpy.zeros(nlabels)
    maxs[0] = kwargs.get('label1_max', 10.**-5.)
    maxs[1] = kwargs.get('label2_max', 0.006)
    maxs[2] = kwargs.get('label3_max', 0.012)
    for ii in range(nlabels - 3):
        maxs[ii + 3] = kwargs.get('label%i_max' % (ii + 4), 0.03)
    for ii in range(1, nlabels + 1):
        out[numpy.fabs(coeffs[ii]) > maxs[ii - 1]] = False
    # Large residuals
    out[scatter > kwargs.get('scatter_max', 0.015)] = False
    _, _, _, aspcapDR12length = _aspcapPixelLimits(dr='12')
    if int(dr) > 12 and coeffs.shape[1] == aspcapDR12length:
        # Want continuum pixels on >DR12 ASPCAP grid, but using coefficients
        # from <= DR12 grid
        dr_module = path._default_dr()
        path.change_dr(12)
        out = toApStarGrid(out)
        path.change_dr(dr)
        out = toAspcapGrid(out)
        path.change_dr(dr_module)
    return out
def fit_func(elem, name, spectra, spectra_errs, T, dat_type, run_number, location, sigma_val=None):
    """Return fit residuals from quadratic fit, spectral errors for desired element, fluxes for desired element,
    an appropriately-sized array of effective temperatures, the quadratic fitting parameters, the residuals, 
    errors, temperatures, and fluxes with NaNs removed, and the normalized elemental weights.
    
    Functions:
        Reads in the DR14 windows.
        Obtains the indices of pixels of the absorption lines and saves the flux value and uncertainty for 
        each star in these pixels.
        Performs the quadratic fit on each pixel using weight_lsq() and computes the residuals using residuals().
        Obtains the flux values, uncertainties, fits, residuals, and temperatures with NaNs removed.
        Writes the residuals and fit parameters to .hdf5 files.
    
    Parameters
    ----------
    elem : str
    	Element name (i.e. 'AL')
    name : str
    	Name of desired cluster (i.e. 'NGC 2682')
    spectra : tuple
    	Array of floats representing the spectra of the desired cluster
    spectra_errs : tuple
    	Array of floats representing the spectral uncertainties of the desired cluster
    T : tuple
    	Array of floats representing the effective temperature of each star in the cluster
    dat_type : str
    	Indicates whether the data being examined is the data or a simulation
     run_number : int
		Number of the run by which to label files
	location : str
		If running locally, set to 'personal'.  If running on the server, set to 'server'.
    sigma_val : float, optional
    	Indicates the value of sigma being used for the simulation in question, if applicable (default is None)

    Returns
    -------
    elem_res : tuple
    	Array of floats representing the fit residuals, with original positioning of points maintained
    final_err : tuple
    	Array of floats representing the spectral uncertainties from the lines of the desired element,
    	with original positioning of points maintained
    final_points : tuple
    	Array of floats representing the fluxes from the lines of the desired element, with original 
    	positioning of points maintained
    temp_array : tuple
    	Array of floats representing the effective temperature of each star in the cluster, with a row for
    	each pixel of the desired element
    elem_a : tuple
    	Array of floats representing the fitting parameters for the quadratic terms in the fits for each pixel of
    	the desired element
    elem_b : tuple
    	Array of floats representing the fitting parameters for the linear terms in the fits for each pixel of
    	the desired element
    elem_c : tuple
    	Array of floats representing the fitting parameters for the constant terms in the fits for each pixel of
    	the desired element
    nanless_res : tuple
    	Array of floats representing the fit residuals, with NaNs removed
    nanless_T : tuple
    	Array of floats representing the effective temperature of each star in the cluster, with a row for 
    	each pixel of the desired element, with NaNs removed
    nanless_points : tuple
    	Array of floats representing the fluxes from the lines of the desired element, with NaNs removed
    normed_weights : tuple
    	Array of floats representing the weight of each elemental window, normalized to 1
    """
    
    change_dr('12')
    #Find the DR14 windows from the DR12 windows
    dr12_elem_windows = window.read(elem)
    change_dr('14')
    dr14_elem_windows_12 = np.concatenate((dr12_elem_windows[246:3274], dr12_elem_windows[3585:6080], dr12_elem_windows[6344:8335]))
    normalized_dr14_elem_windows_12 = (dr14_elem_windows_12 - np.nanmin(dr14_elem_windows_12))/(np.nanmax(dr14_elem_windows_12) - np.nanmin(dr14_elem_windows_12))
    
    #Get the indices of the lines 
    ind_12 = np.argwhere(normalized_dr14_elem_windows_12 > 0)
    ind_12 = ind_12.flatten()
    
    #Get the fluxes and errors from spectra
    len_spectra = len(spectra)
    elem_points_12 = np.zeros((len(ind_12), len_spectra))
    elem_err_12 = np.zeros((len(ind_12), len_spectra))
    for i in range(0, len(ind_12)):
    	for j in range(0, len_spectra):
    		elem_points_12[i][j] = spectra[j][ind_12[i]]
    		elem_err_12[i][j] = spectra_errs[j][ind_12[i]] #APOGEE measured errors
    		
    #Use only pixels with more than 5 points
    final_points_12 = []
    final_err_12 = []
    final_inds_12 = []
    for i in range(len(elem_points_12)):
    	if np.count_nonzero(~np.isnan(elem_points_12[i])) >= 5:
    		final_points_12.append(elem_points_12[i])
    		final_err_12.append(elem_err_12[i])
    		final_inds_12.append(ind_12[i])
    final_points_12 = np.array(final_points_12)
    final_err_12 = np.array(final_err_12)
    final_inds_12 = np.array(final_inds_12)
    if len(final_inds_12) == 0:
    	print('Warning: less than 5 points for every pixel, skipping ', elem)
    else:
    	dr12_weights = normalized_dr14_elem_windows_12[final_inds_12]
    	sorted_dr12_weights = np.sort(dr12_weights)
    	
    	#Get windows
    	if location == 'personal':
    		window_file = pd.read_hdf('/Users/chloecheng/Personal/dr14_windows.hdf5', 'window_df') 
    	elif location == 'server':
    		window_file = pd.read_hdf('/geir_data/scr/ccheng/AST425/Personal/dr14_windows.hdf5', 'window_df')
    		
    	dr14_elem_windows_14 = window_file[elem].values
    	normalized_dr14_elem_windows_14 = (dr14_elem_windows_14 - np.min(dr14_elem_windows_14))/(np.max(dr14_elem_windows_14) - np.min(dr14_elem_windows_14))
    	
    	#Get the indices of the lines 
    	if elem == 'C' or elem == 'N' or elem == 'FE':
    		ind = np.argwhere(normalized_dr14_elem_windows_14 > np.min(sorted_dr12_weights[int(len(sorted_dr12_weights)*0.7):]))
    	else:
    		ind = np.argwhere(normalized_dr14_elem_windows_14 > 0)
    	ind = ind.flatten()
    	
    	#Get the fluxes and errors from spectra
    	elem_points = np.zeros((len(ind), len_spectra))
    	elem_err = np.zeros((len(ind), len_spectra))
    	for i in range(0, len(ind)):
    		for j in range(0, len_spectra):
    			elem_points[i][j] = spectra[j][ind[i]]
    			elem_err[i][j] = spectra_errs[j][ind[i]] #APOGEE measured errors
    	
    	#Use only pixels with more than 5 points
    	final_points = []
    	final_err = []
    	final_inds = []
    	for i in range(len(elem_points)):
    		if np.count_nonzero(~np.isnan(elem_points[i])) >= 5:
    			final_points.append(elem_points[i])
    			final_err.append(elem_err[i])
    			final_inds.append(ind[i])
    	final_points = np.array(final_points)
    	final_err = np.array(final_err)
    	final_inds = np.array(final_inds)
    	
    	if len(final_points) == 0:
    		print('Warning: less than 5 points for every pixel, skipping ', elem)
    	else:
    	
    		#Create an appropriately-sized array of temperatures to mask as well
    		temp_array = np.full((final_points.shape), T)
    		for i in range(0, len(final_points)):
    			for j in range(0, len_spectra):
    				if np.isnan(final_points[i][j]):
    					temp_array[i][j] = np.nan
    					
    		#Do fits with non-nan numbers
    		nanless_inds = np.isfinite(final_points)
    		fits = []
    		for i in range(len(final_points)):
    			fits.append(weight_lsq(final_points[i][nanless_inds[i]], temp_array[i][nanless_inds[i]], final_err[i][nanless_inds[i]]))
    		for i in range(len(fits)):
    			fits[i] = np.array(fits[i])
    		fits = np.array(fits)
    		elem_a = fits[:,0]
    		elem_b = fits[:,1]
    		elem_c = fits[:,2]
    		
    		elem_fits = np.zeros_like(final_points)
    		for i in range(0, len(final_points)):
    			elem_fits[i] = elem_a[i]*temp_array[i]**2 + elem_b[i]*temp_array[i] + elem_c[i]
    			
    		#Calculate residuals
    		elem_res = residuals(final_points, elem_fits)
    		
    		#Remove nans from fits, residuals, errors, and temperatures for plotting and cumulative distribution 
    		#calculation purposes
    		nanless_fits = []
    		nanless_res = []
    		nanless_err = []
    		nanless_T = []
    		nanless_points = []
    		for i in range(len(final_points)):
    			nanless_fits.append(elem_fits[i][nanless_inds[i]])
    			nanless_res.append(elem_res[i][nanless_inds[i]])
    			nanless_err.append(final_err[i][nanless_inds[i]])
    			nanless_T.append(temp_array[i][nanless_inds[i]])
    			nanless_points.append(final_points[i][nanless_inds[i]])
    		for i in range(len(final_points)):
    			nanless_fits[i] = np.array(nanless_fits[i])
    			nanless_res[i] = np.array(nanless_res[i])
    			nanless_err[i] = np.array(nanless_err[i])
    			nanless_T[i] = np.array(nanless_T[i])
    			nanless_points[i] = np.array(nanless_points[i])
    		nanless_fits = np.array(nanless_fits)
    		nanless_res = np.array(nanless_res)
    		nanless_err = np.array(nanless_err)
    		nanless_T = np.array(nanless_T)
    		nanless_points = np.array(nanless_points)
    		
    		#Get the weights for later
    		weights = normalized_dr14_elem_windows_14[final_inds]
    		normed_weights = weights/np.sum(weights)
    		
    		#File-saving 
    		#If we are looking at the data
    		timestr = time.strftime("%Y%m%d_%H%M%S")
    		name_string = str(name).replace(' ', '')
    		pid = str(os.getpid())
    		if sigma_val == None:
    			if location == 'personal':
    				path_dat = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5'
    			elif location == 'server':
    				path_dat = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5'
    	
    			#If the file exists, output the desired variables
    			if glob.glob(path_dat):
    				return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
    			#If the file does not exist, create file and output the desired variables
    			else:
    				file = h5py.File(path_dat, 'w')
    				file['points'] = final_points
    				file['residuals'] = elem_res
    				file['err_200'] = final_err
    				file['a_param'] = elem_a
    				file['b_param'] = elem_b
    				file['c_param'] = elem_c
    				file.close()
    				return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
    		#If we are looking at simulations
    		else:
    			if location == 'personal':
    				path_sim = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5'
    			elif location == 'server':
    				path_sim = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string  + '/' + name_string  + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5'
    	
    			#If the file exists, append to the file
    			if glob.glob(path_sim):
    				file = h5py.File(path_sim, 'a')
    				#If the group for the particular value of sigma exists, don't do anything
    				if glob.glob(str(sigma_val)):
    					file.close()
    				#If not, append a new group to the file for the particular value of sigma
    				else:
    					grp = file.create_group(str(sigma_val))
    					grp['points'] = final_points
    					grp['residuals'] = elem_res
    					grp['err_200'] = final_err
    					grp['a_param'] = elem_a
    					grp['b_param'] = elem_b
    					grp['c_param'] = elem_c
    					file.close()
    			#If the file does not exist, create a new file
    			else:
    				file = h5py.File(path_sim, 'w')
    				grp = file.create_group(str(sigma_val))
    				grp['points'] = final_points
    				grp['residuals'] = elem_res
    				grp['err_200'] = final_err
    				grp['a_param'] = elem_a
    				grp['b_param'] = elem_b
    				grp['c_param'] = elem_c
    				file.close()
    			return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
-h  Help file
--cluster=<arg>  Cluster name
--red_clump=<arg> Whether to exclude red clump stars in rcsample or not
--element=<arg>  Element name
--type=<arg>  Data type 
--location=<arg> Machine where the code is being run

"""

#Imports
#apogee package 
import apogee.tools.read as apread
from apogee.tools.path import change_dr
from apogee.spec import window
from apogee.tools.read import rcsample
change_dr('14') #use DR14
#astropy helper functions
import astropy.io.fits as afits
#basic math and plotting
from docopt import docopt
import numpy as np
import pandas as pd
import h5py
import glob
import matplotlib.pyplot as plt
import os
import time

fs=16
plt.rc('font', family='serif',size=fs)