def main(): """Run the main script routine.""" # Define vprint to only print when the verbose flag is given. vprint = vcl.verbose_print(args.verbose) output_dir = Path(vcl.config['PATHS']['output_dir']) search_str = f'{str(output_dir)}/HD*' star_dirs = glob(search_str) star_list = [] tqdm.write('Collecting stars...') for star_dir in tqdm(star_dirs): star = get_star(Path(star_dir)) if star is None: pass else: if args.casagrande2011: vprint('Applying values from Casagrande et al. 2011.') star.getStellarParameters('Casagrande2011') elif args.nordstrom2004: vprint('Applying values from Nordstrom et al. 2004.') star.getStellarParameters('Nordstrom2004') star_list.append(star) vprint(f'Added {star.name}.') # Check if the star fits the SP1 sample criteria: if (5680 <= star.temperature <= 5880) and\ (4.23 <= star.logG <= 4.65) and\ (-0.11 <= star.metallicity <= 0.11): # print(type(star.name)) # print(type(star.temperature.value)) # print(type(star.metallicity)) # print(type(star.logG)) tqdm.write(','.join( (star.name, str(int(star.temperature.value)), str(star.metallicity), str(star.logG), str(star.getNumObs()))))
def main(): """Run the main function for the script.""" # Define vprint to only print when the verbose flag is given. vprint = vcl.verbose_print(args.verbose) main_dir = Path(args.main_dir[0]) if not main_dir.exists(): raise FileNotFoundError(f'{main_dir} does not exist!') tqdm.write(f'Looking in main directory {main_dir}') if args.fit_params_file: vprint(f'Reading params file {args.fit_params_file}...') params_file = main_dir / f'fit_params/{args.fit_params_file}' fit_results = get_params_file(params_file) model_func = fit_results['model_func'] coeffs = fit_results['coeffs'] sigma_sys = fit_results['sigmas_sys'] apply_corrections = True else: fit_results = None apply_corrections = False if args.reference_star: ref_star = get_star(main_dir / args.reference_star) tqdm.write(f'Reference star is {ref_star.name}.') star_list = [] tqdm.write('Collecting stars...') for star_dir in tqdm(args.star_names): star = get_star(main_dir / star_dir) if star is None: pass else: if args.casagrande2011: vprint('Applying values from Casagrande et al. 2011.') star.getStellarParameters('Casagrande2011') elif args.nordstrom2004: vprint('Applying values from Nordstrom et al. 2004.') star.getStellarParameters('Nordstrom2004') star_list.append(star) vprint(f'Added {star.name}.') tqdm.write(f'Found {len(star_list)} usable stars in total.') if args.compare_offset_patterns: offset_patterns_pre = [] offset_patterns_post = [] stars_pre = [] stars_post = [] for star in star_list: if star.hasObsPre: pre_slice = slice(None, star.fiberSplitIndex) offset_patterns_pre.append( star.getTransitionOffsetPattern(pre_slice)) stars_pre.append(star.name) if star.hasObsPost: post_slice = slice(star.fiberSplitIndex, None) offset_patterns_post.append( (star.getTransitionOffsetPattern(post_slice))) stars_post.append(star.name) fig = plt.figure(figsize=(12, 8), tight_layout=True) gs = GridSpec(nrows=2, ncols=2, figure=fig, height_ratios=[1, 1], width_ratios=[1, 1]) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1], sharex=ax1) ax3 = fig.add_subplot(gs[1, 0], sharex=ax1) ax4 = fig.add_subplot(gs[1, 1], sharex=ax1) for ax in (ax1, ax2, ax3, ax4): ax.axhline(y=0, color='Black') ax1.set_xlim(left=-2, right=len(offset_patterns_pre[0][0]) + 1) ax1.set_ylabel('Offset from expected position (m/s)') ax3.set_ylabel('Offset from expected position (m/s)') ax2.set_ylabel('Standard deviation (m/s)') ax4.set_ylabel('Standard deviation (m/s)') for pattern, star_name in zip(offset_patterns_pre, stars_pre): indices = [x for x in range(len(pattern[0]))] ax1.plot(indices, pattern[0], label=star_name, alpha=1, marker='D', markersize=1.5, linestyle='') ax2.plot(indices, pattern[1], label=star_name, alpha=1, marker='D', markersize=1.5, linestyle='') for pattern, star_name in zip(offset_patterns_post, stars_post): indices = [x for x in range(len(pattern[0]))] ax3.plot(indices, pattern[0], label=star_name, alpha=1, marker='D', markersize=1.5, linestyle='') ax4.plot(indices, pattern[1], label=star_name, alpha=1, marker='D', markersize=1.5, linestyle='') for ax in (ax1, ax2, ax3, ax4): ax.legend(ncol=3) ax.xaxis.set_major_locator(ticker.MultipleLocator(base=10)) ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=2)) ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=100)) ax.xaxis.grid(which='major', color='Gray', alpha=0.7, linestyle='-') ax.xaxis.grid(which='minor', color='Gray', alpha=0.4, linestyle='--') ax.yaxis.grid(which='major', color='Gray', alpha=0.4, linestyle='--') ax.yaxis.grid(which='minor', color='Gray', alpha=0.4, linestyle='--') plt.show() if args.compare_stellar_parameters_pairs: tqdm.write('Unpickling pairs list...') with open(vcl.final_pair_selection_file, 'r+b') as f: pairs_list = pickle.load(f) plots_folder = main_dir / "star_comparisons/pairs" if not plots_folder.exists(): import os os.makedirs(plots_folder) tqdm.write('Creating plots for each pair...') for pair in tqdm(pairs_list): blend1 = pair._higherEnergyTransition.blendedness blend2 = pair._lowerEnergyTransition.blendedness for order_num in pair.ordersToMeasureIn: pair_label = '_'.join([pair.label, str(order_num)]) offsets_pre, offsets_post = [], [] errs_pre, errs_post = [], [] stds_pre, stds_post = [], [] temp_pre, temp_post = [], [] mtl_pre, mtl_post = [], [] mag_pre, mag_post = [], [] logg_pre, logg_post = [], [] # Get the reference star properties. pre_slice = slice(None, ref_star.fiberSplitIndex) post_slice = slice(ref_star.fiberSplitIndex, None) ref_mean_pre, ref_err_pre, ref_std_pre =\ get_pair_data_point(ref_star, pre_slice, pair_label) ref_mean_post, ref_err_post, ref_std_post =\ get_pair_data_point(ref_star, post_slice, pair_label) # Collect the data points for each star: for star in tqdm(star_list): # Ignore the reference star. if star.name == ref_star.name: vprint(f'Skipping over reference star {star.name}.') continue pre_slice = slice(None, star.fiberSplitIndex) post_slice = slice(star.fiberSplitIndex, None) if star.hasObsPre: star_mean_pre, star_err_pre, star_std_pre =\ get_pair_data_point(star, pre_slice, pair_label) offset = ref_mean_pre - star_mean_pre offsets_pre.append(offset) errs_pre.append(star_err_pre) stds_pre.append(star_std_pre) temp_pre.append(star.temperature) mtl_pre.append(star.metallicity) mag_pre.append(star.absoluteMagnitude) logg_pre.append(star.logG) if star.hasObsPost: star_mean_post, star_err_post, star_std_post =\ get_pair_data_point(star, post_slice, pair_label) offset = ref_mean_post - star_mean_post offsets_post.append(offset) errs_post.append(star_err_post) stds_post.append(star_std_post) temp_post.append(star.temperature) mtl_post.append(star.metallicity) mag_post.append(star.absoluteMagnitude) logg_post.append(star.logG) # Create the figure and subplots: comp_fig, axes_dict = create_parameter_comparison_figures( ylims=(-300 * u.m / u.s, 300 * u.m / u.s), temp_lims=(5400 * u.K, 6300 * u.K), mtl_lims=(-0.63, 0.52)) for ax in (axes_dict.values()): ax.annotate(f'Blendedness: ({blend1}, {blend2})', (0.01, 0.95), xycoords='axes fraction') for ax, attr in zip( ('temp_pre', 'mtl_pre', 'mag_pre', 'logg_pre'), (np.array(temp_pre) + 97, np.array(mtl_pre) + 0.12, mag_pre, logg_pre)): plot_data_points(axes_dict[ax], attr, offsets_pre, errs_pre, stds_pre, era='pre') for ax, attr in zip( ('temp_post', 'mtl_post', 'mag_post', 'logg_post'), (np.array(temp_post) + 97, np.array(mtl_post) + 0.12, mag_post, logg_post)): plot_data_points(axes_dict[ax], attr, offsets_post, errs_post, stds_post, era='post') # Plot the reference star points last so they're on top. plot_data_points(axes_dict['temp_pre'], ref_star.temperature, 0, ref_err_pre, ref_std_pre, ref=True) plot_data_points(axes_dict['temp_post'], ref_star.temperature, 0, ref_err_post, ref_std_post, ref=True) plot_data_points(axes_dict['mtl_pre'], ref_star.metallicity, 0, ref_err_pre, ref_std_pre, ref=True) plot_data_points(axes_dict['mtl_post'], ref_star.metallicity, 0, ref_err_post, ref_std_post, ref=True) plot_data_points(axes_dict['mag_pre'], ref_star.absoluteMagnitude, 0, ref_err_pre, ref_std_pre, ref=True) plot_data_points(axes_dict['mag_post'], ref_star.absoluteMagnitude, 0, ref_err_post, ref_std_post, ref=True) plot_data_points(axes_dict['logg_pre'], ref_star.logG, 0, ref_err_pre, ref_std_pre, ref=True) plot_data_points(axes_dict['logg_post'], ref_star.logG, 0, ref_err_post, ref_std_post, ref=True) file_name = plots_folder / f'{pair_label}.png' vprint(f'Saving file {pair_label}.png') comp_fig.savefig(str(file_name)) plt.close('all') if args.compare_stellar_parameters_transitions: tqdm.write('Unpickling transitions list..') with open(vcl.final_selection_file, 'r+b') as f: transitions_list = pickle.load(f) plots_folder = main_dir / "star_comparisons/transitions" if apply_corrections: model_name = '_'.join(model_func.__name__.split('_')[:-1]) else: model_name = 'uncorrected' plots_folder /= model_name if not plots_folder.exists(): import os os.makedirs(plots_folder) index_nums = [] index_num = 0 sigma_list_pre, sigma_list_post = [], [] sigma_sys_pre, sigma_sys_post = [], [] tqdm.write('Creating plots for each transition...') for transition in tqdm(transitions_list): for order_num in transition.ordersToFitIn: transition_label = '_'.join([transition.label, str(order_num)]) vprint(f'Analysing {transition_label}...') index_nums.append(index_num) index_num += 1 means_pre, means_post = [], [] errs_pre, errs_post = [], [] stds_pre, stds_post = [], [] temp_pre, temp_post = [], [] mtl_pre, mtl_post = [], [] mag_pre, mag_post = [], [] logg_pre, logg_post = [], [] for star in tqdm(star_list): pre_slice = slice(None, star.fiberSplitIndex) post_slice = slice(star.fiberSplitIndex, None) if star.hasObsPre: star_mean_pre, star_err_pre, star_std_pre =\ get_transition_data_point(star, pre_slice, transition_label, fit_params=fit_results) means_pre.append(star_mean_pre) errs_pre.append(star_err_pre) stds_pre.append(star_std_pre) temp_pre.append(star.temperature) mtl_pre.append(star.metallicity) mag_pre.append(star.absoluteMagnitude) logg_pre.append(star.logG) if star.hasObsPost: star_mean_post, star_err_post, star_std_post =\ get_transition_data_point(star, post_slice, transition_label, fit_params=fit_results) means_post.append(star_mean_post) errs_post.append(star_err_post) stds_post.append(star_std_post) temp_post.append(star.temperature) mtl_post.append(star.metallicity) mag_post.append(star.absoluteMagnitude) logg_post.append(star.logG) # Correct for trends in stellar parameters here. if apply_corrections: # vprint(f'Applying corrections from {model_name} model') # data_pre = np.stack((temp_pre, mtl_pre, mag_pre), # axis=0) # params_pre = coeffs[transition_label + '_pre'] # corrections = u.unyt_array(model_func(data_pre, # *params_pre), # units=u.m/u.s) # for mean in means_pre: # print(mean) # print(mean.units) # means_pre = ma.masked_invalid(means_pre) # means_pre -= corrections sigma_sys_pre.append(sigma_sys[transition_label + '_pre'].value) # data_post = np.stack((temp_post, mtl_post, mag_post), # axis=0) # params_post = coeffs[transition_label + '_post'] # corrections = u.unyt_array(model_func(data_post, # *params_post), # units=u.m/u.s) # means_post = ma.masked_invalid(means_post) # means_post -= corrections sigma_sys_post.append(sigma_sys[transition_label + '_post'].value) else: sigma_sys_pre.append(0) sigma_sys_post.append(0) # means_pre = u.unyt_array(means_pre, units=u.m/u.s) # means_post = u.unyt_array(means_post, units=u.m/u.s) # print('Units are:') # print(type(means_pre)) # print(type(means_post)) sigma_pre = np.nanstd(means_pre) sigma_post = np.nanstd(means_post) sigma_list_pre.append(sigma_pre) sigma_list_post.append(sigma_post) # print(sigma_pre.units) # print(sigma_post.units) # Write out data into a CSV file for checking. csv_file = plots_folder /\ f'Data_{transition_label}_{model_name}.csv' with open(csv_file, 'w', newline='') as f: datawriter = csv.writer(f) header = ('weighted_means_pre', 'EoWM_pre', 'EoM_pre', 'weighted_means_post', 'EoWM_post', 'EoM_post') datawriter.writerow(header) for row in zip_longest(means_pre, errs_pre, stds_pre, means_post, errs_post, stds_post): datawriter.writerow(row) # Create the figure and subplots: if not apply_corrections: total_means = np.concatenate((means_pre, means_post)) median = np.nanmedian(total_means) y_limits = (median - 300, median + 300) else: y_limits = (-300, 300) comp_fig, axes_dict = create_parameter_comparison_figures( ylims=None if args.full_range else y_limits, temp_lims=(5400 * u.K, 6300 * u.K), mtl_lims=(-0.63, 0.52)) for ax, attr in zip( ('temp_pre', 'mtl_pre', 'mag_pre', 'logg_pre'), (temp_pre, mtl_pre, mag_pre, logg_pre)): plot_data_points(axes_dict[ax], attr, means_pre, errs_pre, stds_pre, era='pre') axes_dict[ax].annotate( f'Blendedness: {transition.blendedness}' '\n' fr'$\sigma$: {sigma_pre:.2f}', (0.01, 0.99), xycoords='axes fraction', horizontalalignment='left', verticalalignment='top') data = np.array(ma.masked_invalid(means_pre).compressed()) axes_dict['hist_pre'].hist(data, bins='fd', color='Black', histtype='step', orientation='horizontal') for ax, attr in zip( ('temp_post', 'mtl_post', 'mag_post', 'logg_post'), (temp_post, mtl_post, mag_post, logg_post)): plot_data_points(axes_dict[ax], attr, means_post, errs_post, stds_post, era='post') axes_dict[ax].annotate( f'Blendedness: {transition.blendedness}' '\n' fr'$\sigma$: {sigma_post:.2f}', (0.01, 0.99), xycoords='axes fraction', horizontalalignment='left', verticalalignment='top') data = np.array(ma.masked_invalid(means_post).compressed()) axes_dict['hist_post'].hist(data, bins='fd', color='Black', histtype='step', orientation='horizontal') file_name = plots_folder /\ f'{transition_label}_{model_name}.png' vprint(f'Saving file {transition_label}.png') comp_fig.savefig(str(file_name)) plt.close('all') csv_file = plots_folder / f'{model_name}_sigmas.csv' with open(csv_file, 'w', newline='') as f: datawriter = csv.writer(f) header = ('#index', 'sigma_pre', 'sigma_sys_pre', 'sigma_post', 'sigma_sys_post') datawriter.writerow(header) for row in zip(index_nums, sigma_list_pre, sigma_sys_pre, sigma_list_post, sigma_sys_post): datawriter.writerow(row)
data64[:, 1], yerr=data64[:, 2], marker='x', markersize=8, markeredgecolor='FireBrick', color='FireBrick') ax.errorbar(data128[:, 0], data128[:, 1], yerr=data128[:, 2], marker='+', markersize=8, markeredgecolor='RoyalBlue', color='RoyalBlue') plt.show() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Plot the measured binned' ' residuals for HARPS calibration ') parser.add_argument('-v', '--verbose', action='store_true', help='Print out more information about the script.') args = parser.parse_args() vprint = vcl.verbose_print(args.verbose) main()
def find_sys_scatter(model_func, x_data, y_data, err_array, beta0, n_sigma=2.5, tolerance=0.001, verbose=False): """Find the systematic scatter in a dataset with a given model. Takes a model function `model_func`, and arrays of x, y, and uncertainties (which must have the same length) and an initial guess to the parameters of the function, and fits the model to the data. It then checks the reduced chi-squared value, and if it is greater than 1 (with a tolerance of 1e-3), it adds an additional amount in quadrature to the error array and refits the data, continuing until the chi-squared value is within the tolerance. Parameters ---------- model_func : callable The function to fit the data with. x_data : array-like The array of x-values to fit. y_data : array-like The array of y-values to fit. Must have same length as `x_data`. err_array : array-like The error array for the y-values. Must have same length as `x_data` and `y_data`. beta0 : tuple A tuple of values to use as the initial guesses for the paremeters in the function given by `model_func`. n_sigma : float The number of sigma outside of which a data point is considered an outlier. tolerance : float, Default : 0.001 The distance from one within which the chi-squared per degree of freedom must fall for the iteration to exit. (Note that if the chi-squared value is naturally less than one on the first iteration, the iteration with end even if the value is not closer to one than the tolerance.) verbose : bool, Default : False Whether to print out more diagnostic information on the process. Returns ------- dict A dictionary containing the following keys: popt : tuple A tuple of the optimized values found by the fitting routine for the parameters. pcov : `np.array` The covariance matrix for the fit. residuals : `np.array` The value of `y_data` minus the model values at all given independent variables. sys_err_list : list of floats A list conainting the values of the systematic error at each iteration. The last value is the values which brings the chi-squared per degree of freedom for the data within the tolerance to one. chi_squared_list : list of floats A list containing the calculated chi-squared per degree of freedom for each step of the iteration. mask_list : list of lists A list containing the mask applied to the data at each iteration. Each entry will be a list of 1s and 0s. """ vprint = vcl.verbose_print(verbose) # Iterate to find what additional systematic error is needed # to get a chi^2 of ~1. chi_tol = tolerance diff = 1 sys_err = 0 iter_err_array = np.sqrt(np.square(err_array) + np.square(sys_err)) chi_squared_list = [] sigma_sys_list = [] mask_list = [] sigma_sys_change_list = [] x_data.mask = False y_data.mask = False err_array.mask = False orig_x_data = ma.copy(x_data) orig_y_data = ma.copy(y_data) orig_errs = ma.copy(err_array) last_mask = np.zeros_like(y_data) new_mask = np.ones_like(y_data) iterations = 0 chi_squared_flips = 0 vprint(' # sigma_sys diff chi^2 SSCA #* flips') while True: iterations += 1 popt, pcov = curve_fit(model_func, x_data, y_data, sigma=iter_err_array, p0=beta0, absolute_sigma=True, method='lm', maxfev=10000) iter_model_values = model_func(x_data, *popt) iter_residuals = y_data - iter_model_values # Find the chi^2 value for this fit: chi_squared_nu = calc_chi_squared_nu(iter_residuals, iter_err_array, len(popt)) try: last_chi_squared = chi_squared_list[-1] except IndexError: # On the first iteration pass else: if chi_squared_nu > 1 and last_chi_squared < 1: chi_squared_flips += 1 elif chi_squared_nu < 1 and last_chi_squared > 1: chi_squared_flips += 1 else: pass sigma_sys_list.append(sys_err) chi_squared_list.append(chi_squared_nu) mask_list.append(last_mask) diff = abs(chi_squared_nu - 1) # Set the amount to change by using the latest chi^2 value. sigma_sys_change_amount = np.power(chi_squared_nu, 2 / 3) sigma_sys_change_list.append(sigma_sys_change_amount) vprint(f'{iterations:>3}, ' f'{sys_err:>10.6f}, {diff:>8.4f}, {chi_squared_nu:>8.4f},' f' {sigma_sys_change_amount:>8.4f},' f' {iter_residuals.count():>3}, {chi_squared_flips}') if verbose: sleep_length = 0 if chi_squared_flips < 3 else 0.1 sleep(sleep_length) if chi_squared_nu > 1: if sys_err == 0: sys_err = np.sqrt(chi_squared_nu - 1) * np.nanmedian(err_array) # sys_err = np.sqrt(chi_squared_nu) else: sys_err = sys_err * sigma_sys_change_amount elif chi_squared_nu < 1: if sys_err == 0: # If the chi-squared value is naturally lower # than 1, don't change anything, just exit. break else: sys_err = sys_err * sigma_sys_change_amount # Construct new error array using all errors. iter_err_array = np.sqrt(np.square(orig_errs) + np.square(sys_err)) new_mask = np.zeros_like(y_data) # Find residuals for all data, including that masked this iteration: full_model_values = model_func(orig_x_data, *popt) full_residuals = orig_y_data - full_model_values # Check for outliers at each point, and mark the mask appropriately. for i in range(len(iter_err_array)): if abs(full_residuals[i]) > (n_sigma * iter_err_array[i]): new_mask[i] = 1 # Set up the mask on the x and y data and errors for the next iteration. for array in (x_data, y_data, iter_err_array): if chi_squared_flips < 5: array.mask = new_mask last_mask = new_mask # If chi^2 flips between less than and greater than one too many # times, the routine is probably stuck in a loop adding and removing # a borderline point, so simply stop re-evaluating points for # inclusion. else: array.mask = last_mask # If chi^2 gets within the tolerance and the mask hasn't changed in the # last iteration, end the loop. if ((diff < chi_tol) and (np.all(last_mask == new_mask))): break # If the mask is still changing, but the sigma_sys value has # clearly converged to a value (by both the 10th and 100th most # recent entries being within the given tolerance of the most recent # entry), end the loop. Most runs terminate well under 100 steps so # this should only catch the problem cases. elif ((iterations > 100) and (diff < chi_tol) and ((abs(sigma_sys_list[-1] - sigma_sys_list[-10])) < chi_tol) and ((abs(sigma_sys_list[-1] - sigma_sys_list[-100])) < chi_tol)): break # If the chi^2 value is approaching 1 from the bottom, it may be the # case that it can never reach 1, even if sigma_sys goes to 0 (but it # will take forever to get there). In the case that chi^2_nu < 1, # and the values have clearly converged to a value within the tolerance # over the last 100 iterations, break the loop. This does leave the # derived sigma_sys value somewhat meaningless, but it should be small # enough in these cases as to be basically negligible. elif ( (iterations > 100) and (chi_squared_nu < 1.) and ((abs(chi_squared_list[-1]) - chi_squared_list[-10]) < chi_tol) and ((abs(chi_squared_list[-1]) - chi_squared_list[-100]) < chi_tol)): # If sigma_sys is less than a millimeter per second, just set it # to zero. if sys_err < 0.0011: sigma_sys_list[-1] = 0 break # If the iterations go on too long, it may be because it's converging # very slowly to zero sigma_sys, so give it a nudge if it's still large. elif iterations == 500: if (sys_err > 0.001) and (sys_err < 0.01) and\ (sigma_sys_list[-1] < sigma_sys_list[-2]): sys_err = 0.001 # If it's taking a really long time to converge, but sigma_sys is less # than a millimeter per second, just set it to zero and end the loop. elif iterations == 999: if sys_err < 0.0011: sigma_sys_list[-1] = 0 break else: print(f'Final sys_err = {sys_err}') print(f'Final chi^2 = {chi_squared_nu}') print(f'diff = {diff}') print(np.all(last_mask == new_mask)) for i, j in zip(last_mask, new_mask): print(f'{i} {j}') raise RuntimeError("Process didn't converge.") # --------- results_dict = { 'popt': popt, 'pcov': pcov, 'residuals': iter_residuals, 'sys_err_list': sigma_sys_list, 'chi_squared_list': chi_squared_list, 'mask_list': mask_list } return results_dict