def improve_order_regularization(r, o, star_filename, tellurics_filename, training_data, training_results, validation_data, validation_results, verbose=True, plot=False, basename='', K_star=0, K_t=0, L1=True, L2=True, tellurics_template_fixed=False): """ Use a validation scheme to determine the best regularization parameters for all model components in a given order r. Update files at star_filename, tellurics_filename with the best parameters. """ training_model = wobble.Model(training_data, training_results, r) training_model.add_star('star', variable_bases=K_star) if tellurics_template_fixed: # hackity hack hack results_51peg = wobble.Results( filename= '/Users/mbedell/python/wobble/results/results_51peg_Kstar0_Kt0.hdf5' ) template_xs = np.copy(results_51peg.tellurics_template_xs[o]) template_ys = np.copy(results_51peg.tellurics_template_ys[o]) training_model.add_telluric('tellurics', rvs_fixed=True, template_fixed=True, variable_bases=K_t, template_xs=template_xs, template_ys=template_ys) else: training_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t) training_model.setup() training_model.optimize(niter=0, verbose=verbose, rv_uncertainties=False) if plot: n = 0 # epoch to plot title = 'Initialization' filename = '{0}_init'.format(basename) plot_fit(r, n, training_data, training_results, title=title, basename=filename) validation_model = wobble.Model(validation_data, validation_results, r) validation_model.add_star('star', variable_bases=K_star, template_xs=training_results.star_template_xs[r] ) # ensure templates are same size if tellurics_template_fixed: # hackity hack hack validation_model.add_telluric( 'tellurics', rvs_fixed=True, template_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r], template_ys=training_results.tellurics_template_ys[r]) else: validation_model.add_telluric( 'tellurics', rvs_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r]) validation_model.setup() # the order in which these are defined will determine the order in which they are optimized: tensors_to_tune = [ training_model.components[1].L2_template_tensor, training_model.components[0].L2_template_tensor, training_model.components[1].L1_template_tensor, training_model.components[0].L1_template_tensor ] tensor_names = [ 'L2_template', 'L2_template', 'L1_template', 'L1_template' ] # this isonly needed bc TF appends garbage to the end of the tensor name tensor_components = ['tellurics', 'star', 'tellurics', 'star'] # ^ same if K_star > 0: tensors_to_tune = np.append(tensors_to_tune, [ training_model.components[0].L2_basis_vectors_tensor, training_model.components[0].L1_basis_vectors_tensor ]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['star', 'star']) if K_t > 0: tensors_to_tune = np.append(tensors_to_tune, [ training_model.components[1].L2_basis_vectors_tensor, training_model.components[1].L1_basis_vectors_tensor ]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['tellurics', 'tellurics']) regularization_dict = {} #o_init = max(0, o-1) # initialize from previous order, or if o=0 use defaults o_init = o # always initialize from starting guess (TODO: decide which init is better) for i, tensor in enumerate(tensors_to_tune): if tensor_components[i] == 'star': filename = star_filename elif tensor_components[i] == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r') as f: regularization_dict[tensor] = np.copy(f[tensor_names[i]][o_init]) i = 0 # track order in which parameters are improved for component, (tensor, name) in zip(tensor_components, zip(tensors_to_tune, tensor_names)): if (name[0:2] == "L1" and L1) or (name[0:2] == "L2" and L2): i += 1 regularization_dict[tensor] = improve_parameter( tensor, training_model, validation_model, regularization_dict, validation_data, validation_results, verbose=verbose, plot=plot, basename=basename + '_par{0}'.format(i)) if component == 'star': filename = star_filename elif component == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r+') as f: f[name][o] = np.copy(regularization_dict[tensor]) if plot: test_regularization_value(tensor, regularization_dict[tensor], training_model, validation_model, regularization_dict, validation_data, validation_results, plot=False, verbose=False) # hack to update results title = 'Final' filename = '{0}_final'.format(basename) plot_fit(r, n, validation_data, validation_results, title=title, basename=filename) fig = plt.figure() ax = fig.add_subplot(111) val_rvs = validation_results.star_rvs[r] + validation_results.bervs train_rvs = training_results.star_rvs[r] + training_results.bervs ax.plot(validation_results.dates, val_rvs - np.mean(val_rvs), 'r.') ax.plot(training_results.dates, train_rvs - np.mean(train_rvs), 'k.', alpha=0.5) ax.set_ylabel('RV (m/s)') ax.set_xlabel('JD') fig.tight_layout() plt.savefig(basename + '_final_rvs.png') plt.close(fig)
orders = np.copy(data.orders) results = wobble.Results(data=data) print("data loaded") print("time elapsed: {0:.2f} min".format((time() - start_time)/60.0)) elapsed_time = time() - start_time if plots: print("plots will be saved under directory: {0}".format(plot_dir)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) star_learning_rate = 0.1 telluric_learning_rate = 0.01 for r,o in enumerate(orders): model = wobble.Model(data, results, r) model.add_star('star', variable_bases=K_star, regularization_par_file=star_reg_file, learning_rate_template=star_learning_rate) model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t, regularization_par_file=tellurics_reg_file, learning_rate_template=telluric_learning_rate) print("--- ORDER {0} ---".format(o)) if plots: wobble.optimize_order(model, niter=niter, save_history=True, basename=plot_dir+'history', movies=movies, epochs_to_plot=epochs) fig, ax = plt.subplots(1, 1, figsize=(8,5)) ax.plot(data.dates, results.star_rvs[r] + data.bervs - data.drifts - np.mean(results.star_rvs[r] + data.bervs), 'k.', alpha=0.8, ms=4) ax.plot(data.dates, data.pipeline_rvs + data.bervs - np.mean(data.pipeline_rvs + data.bervs), 'r.', alpha=0.5, ms=4)
def improve_order_regularization(o, star_filename, tellurics_filename, training_data, training_results, validation_data, validation_results, verbose=True, plot=False, basename='', K_star=0, K_t=0, L1=True, L2=True, tellurics_template_fixed=False): """ Use a validation scheme to determine the best regularization parameters for all model components in a given order. Update files at star_filename, tellurics_filename with the best parameters. By default, this tunes in the following order: tellurics L2, star L2, tellurics L1, star L1. Parameters ---------- o : int Index into `star_filename` and `telluric_filename` to retrieve desired order. star_filename : str Filename containing regularization amplitudes for the star. tellurics_filename : str Filename containing regularization amplitudes for the tellurics. training_data : wobble.Data object Data to train template on (should be the majority of available data). training_results : wobble.Results object Results object corresponding to `training_data`. validation_data : wobble.Data object Data to use in assessing goodness-of-fit for template (should be a representative minority of the available data). validation_results : wobble.Results object Results object corresponding to `validation_data`. verbose : bool (default `True`) Toggle print statements and progress bars. plot : bool (default `False`) Generate and save plots of fits to validation data. basename : str (default ``) String to append to the beginning of saved plots (file path and base). K_star : int (default `0`) Number of variable basis vectors for the star. K_t : int (default `0`) Number of variable basis vectors for the tellurics. L1 : bool (default `True`) Whether to tune L1 amplitudes. L2 : bool (default `True`) Whether to tune L2 amplitudes. """ r = 0 # assumes there is only one order in data & results objects training_model = wobble.Model(training_data, training_results, r) training_model.add_star('star', variable_bases=K_star) training_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t) training_model.setup() training_model.optimize(niter=0, verbose=verbose, rv_uncertainties=False) if plot: n = 0 # epoch to plot title = 'Initialization' filename = '{0}_init'.format(basename) plot_fit(r, n, training_data, training_results, title=title, basename=filename) validation_model = wobble.Model(validation_data, validation_results, r) validation_model.add_star('star', variable_bases=K_star, template_xs=training_results.star_template_xs[r]) # ensure templates are same size validation_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r]) validation_model.setup() # the order in which these are defined will determine the order in which they are optimized: tensors_to_tune = [training_model.components[1].L2_template_tensor, training_model.components[0].L2_template_tensor, training_model.components[1].L1_template_tensor, training_model.components[0].L1_template_tensor] tensor_names = ['L2_template', 'L2_template', 'L1_template', 'L1_template'] # this is only needed bc TF appends garbage to the end of the tensor name tensor_components = ['tellurics', 'star', 'tellurics', 'star'] # ^ same if K_star > 0: tensors_to_tune = np.append(tensors_to_tune, [training_model.components[0].L2_basis_vectors_tensor, training_model.components[0].L1_basis_vectors_tensor]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['star', 'star']) if K_t > 0: tensors_to_tune = np.append(tensors_to_tune, [training_model.components[1].L2_basis_vectors_tensor, training_model.components[1].L1_basis_vectors_tensor]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['tellurics', 'tellurics']) regularization_dict = {} #o_init = max(0, o-1) # initialize from previous order, or if o=0 use defaults o_init = o # always initialize from starting guess (TODO: decide which init is better) for i,tensor in enumerate(tensors_to_tune): if tensor_components[i] == 'star': filename = star_filename elif tensor_components[i] == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r') as f: regularization_dict[tensor] = np.copy(f[tensor_names[i]][o_init]) i = 0 # track order in which parameters are improved for component,(tensor,name) in zip(tensor_components, zip(tensors_to_tune, tensor_names)): if (name[0:2] == "L1" and L1) or (name[0:2] == "L2" and L2): i += 1 regularization_dict[tensor] = improve_parameter(tensor, training_model, validation_model, regularization_dict, validation_data, validation_results, verbose=verbose, plot=plot, basename=basename+'_par{0}'.format(i)) if component == 'star': filename = star_filename elif component == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r+') as f: f[name][o] = np.copy(regularization_dict[tensor]) if plot: test_regularization_value(tensor, regularization_dict[tensor], training_model, validation_model, regularization_dict, validation_data, validation_results, plot=False, verbose=False) # hack to update results title = 'Final' filename = '{0}_final'.format(basename) plot_fit(r, n, validation_data, validation_results, title=title, basename=filename) fig = plt.figure() ax = fig.add_subplot(111) val_rvs = validation_results.star_rvs[r] + validation_results.bervs train_rvs = training_results.star_rvs[r] + training_results.bervs ax.plot(validation_results.dates, val_rvs - np.mean(val_rvs), 'r.') ax.plot(training_results.dates, train_rvs - np.mean(train_rvs), 'k.', alpha=0.5) ax.set_ylabel('RV (m/s)') ax.set_xlabel('JD') fig.tight_layout() plt.savefig(basename+'_final_rvs.png') plt.close(fig)