def setup_for_order(r, datafile, training_epochs, validation_epochs): # load up training and validation data for order r & set up results training_data = wobble.Data(datafile, orders=[r], epochs=training_epochs, min_snr=0.) training_results = wobble.Results(training_data) validation_data = wobble.Data(datafile, orders=training_data.orders, epochs=validation_epochs, min_snr=0.) validation_results = wobble.Results(validation_data) return training_data, training_results, validation_data, validation_results
star_reg_file = '../wobble/regularization/{0}_star_K{1}.hdf5'.format(starname, K_star) tellurics_reg_file = '../wobble/regularization/{0}_t_K{1}.hdf5'.format(starname, K_t) plot_dir = '../results/plots_{0}_Kstar{1}_Kt{2}/'.format(starname, K_star, K_t) print("running wobble on star {0} with K_star = {1}, K_t = {2}".format(starname, K_star, K_t)) start_time = time() orders = np.arange(72) data = wobble.Data(filename='../data/'+starname+'_e2ds.hdf5', orders=orders) if True: # reload data and remove all post-upgrade spectra upgrade = 2457174.5 # June 2015 e = data.epochs[data.dates < upgrade] data = wobble.Data(filename='../data/'+starname+'_e2ds.hdf5', orders=orders, epochs=e) data.drop_bad_orders() data.drop_bad_epochs() orders = np.copy(data.orders) results = wobble.Results(data=data) print("data loaded") print("time elapsed: {0:.2f} min".format((time() - start_time)/60.0)) elapsed_time = time() - start_time if plots: print("plots will be saved under directory: {0}".format(plot_dir)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) star_learning_rate = 0.1 telluric_learning_rate = 0.01 for r,o in enumerate(orders): model = wobble.Model(data, results, r) model.add_star('star', variable_bases=K_star,
f['orders'] = np.append(orders, g['orders'][()]) #check which orders are already there because wobble might drop some # TODO clean way is probably to make some neat ordered list of orders in either file first key_list = list(f.keys()) for r in range(n_orders): if 'order{0}'.format( r ) not in key_list: #find first order index not already present for r_chunk in range(g['R'][()]): r_tot = r + r_chunk #f.create_group('order{0}'.format(r_tot)) g.copy('order{0}'.format(r_chunk), f, name='order{0}'.format(r_tot)) break # combine orders even though thiscurrently yields garbage RVs, still makes issues for chunks (different n_Epochs?, maybe it was just median breaking on chunksize 1?) print('n_orders = R =', f['R'][()]) results = wobble.Results(filename=results_file) results.combine_orders('star') print("final RVs calculated.") print("time elapsed: {0:.2f} minutes".format((time() - start_time) / 60.0)) results.write(results_file) print("all scripts executed") print("time elapsed total: {0:.2f} min".format((time() - start_time) / 60.0))
def improve_order_regularization(r, o, star_filename, tellurics_filename, training_data, training_results, validation_data, validation_results, verbose=True, plot=False, basename='', K_star=0, K_t=0, L1=True, L2=True, tellurics_template_fixed=False): """ Use a validation scheme to determine the best regularization parameters for all model components in a given order r. Update files at star_filename, tellurics_filename with the best parameters. """ training_model = wobble.Model(training_data, training_results, r) training_model.add_star('star', variable_bases=K_star) if tellurics_template_fixed: # hackity hack hack results_51peg = wobble.Results( filename= '/Users/mbedell/python/wobble/results/results_51peg_Kstar0_Kt0.hdf5' ) template_xs = np.copy(results_51peg.tellurics_template_xs[o]) template_ys = np.copy(results_51peg.tellurics_template_ys[o]) training_model.add_telluric('tellurics', rvs_fixed=True, template_fixed=True, variable_bases=K_t, template_xs=template_xs, template_ys=template_ys) else: training_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t) training_model.setup() training_model.optimize(niter=0, verbose=verbose, rv_uncertainties=False) if plot: n = 0 # epoch to plot title = 'Initialization' filename = '{0}_init'.format(basename) plot_fit(r, n, training_data, training_results, title=title, basename=filename) validation_model = wobble.Model(validation_data, validation_results, r) validation_model.add_star('star', variable_bases=K_star, template_xs=training_results.star_template_xs[r] ) # ensure templates are same size if tellurics_template_fixed: # hackity hack hack validation_model.add_telluric( 'tellurics', rvs_fixed=True, template_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r], template_ys=training_results.tellurics_template_ys[r]) else: validation_model.add_telluric( 'tellurics', rvs_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r]) validation_model.setup() # the order in which these are defined will determine the order in which they are optimized: tensors_to_tune = [ training_model.components[1].L2_template_tensor, training_model.components[0].L2_template_tensor, training_model.components[1].L1_template_tensor, training_model.components[0].L1_template_tensor ] tensor_names = [ 'L2_template', 'L2_template', 'L1_template', 'L1_template' ] # this isonly needed bc TF appends garbage to the end of the tensor name tensor_components = ['tellurics', 'star', 'tellurics', 'star'] # ^ same if K_star > 0: tensors_to_tune = np.append(tensors_to_tune, [ training_model.components[0].L2_basis_vectors_tensor, training_model.components[0].L1_basis_vectors_tensor ]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['star', 'star']) if K_t > 0: tensors_to_tune = np.append(tensors_to_tune, [ training_model.components[1].L2_basis_vectors_tensor, training_model.components[1].L1_basis_vectors_tensor ]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['tellurics', 'tellurics']) regularization_dict = {} #o_init = max(0, o-1) # initialize from previous order, or if o=0 use defaults o_init = o # always initialize from starting guess (TODO: decide which init is better) for i, tensor in enumerate(tensors_to_tune): if tensor_components[i] == 'star': filename = star_filename elif tensor_components[i] == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r') as f: regularization_dict[tensor] = np.copy(f[tensor_names[i]][o_init]) i = 0 # track order in which parameters are improved for component, (tensor, name) in zip(tensor_components, zip(tensors_to_tune, tensor_names)): if (name[0:2] == "L1" and L1) or (name[0:2] == "L2" and L2): i += 1 regularization_dict[tensor] = improve_parameter( tensor, training_model, validation_model, regularization_dict, validation_data, validation_results, verbose=verbose, plot=plot, basename=basename + '_par{0}'.format(i)) if component == 'star': filename = star_filename elif component == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r+') as f: f[name][o] = np.copy(regularization_dict[tensor]) if plot: test_regularization_value(tensor, regularization_dict[tensor], training_model, validation_model, regularization_dict, validation_data, validation_results, plot=False, verbose=False) # hack to update results title = 'Final' filename = '{0}_final'.format(basename) plot_fit(r, n, validation_data, validation_results, title=title, basename=filename) fig = plt.figure() ax = fig.add_subplot(111) val_rvs = validation_results.star_rvs[r] + validation_results.bervs train_rvs = training_results.star_rvs[r] + training_results.bervs ax.plot(validation_results.dates, val_rvs - np.mean(val_rvs), 'r.') ax.plot(training_results.dates, train_rvs - np.mean(train_rvs), 'k.', alpha=0.5) ax.set_ylabel('RV (m/s)') ax.set_xlabel('JD') fig.tight_layout() plt.savefig(basename + '_final_rvs.png') plt.close(fig)
else: # HACK for HD 189733 e = np.asarray([ 0, 1, 6, 7, 9, 17, 18, 19, 21, 23, 24, 26, 30, 33, 34, 35, 36, 37, 38, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 61, 66, 69, 70, 72, 73, 75 ]) # night of August 28, 2007 validation_epochs = np.random.choice(e, len(e) // 8, replace=False) training_epochs = np.delete(e, validation_epochs) training_data = wobble.Data(starname + '_e2ds.hdf5', filepath='../data/', orders=orders, epochs=training_epochs, min_snr=3) training_results = wobble.Results(training_data) validation_data = wobble.Data(starname + '_e2ds.hdf5', filepath='../data/', orders=training_data.orders, epochs=validation_epochs, min_snr=1) # HACK validation_results = wobble.Results(validation_data) assert len(training_data.orders) == len( validation_data.orders ), "Number of orders used is not the same between training and validation data." orders = training_data.orders # improve each order's regularization: for r, o in enumerate( orders ): # r is an index into the (cleaned) data. o is an index into the 72 orders (and the file tracking them).
print("running wobble on star {0} with K_star = {1}, K_t = {2}".format( starname, K_star, K_t)) start_time = time() orders = np.arange(72) ''' e = [ 0, 1, 6, 7, 9, 17, 18, 19, 21, 23, 24, 26, 30, 33, 34, 35, 36, 37, 38, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 61, 66, 69, 70, 72, 73, 75] # night of August 28, 2007 data = wobble.Data(starname+'_e2ds.hdf5', filepath='../data/', orders=orders, epochs=e) ''' data = wobble.Data(starname + '_e2ds.hdf5', filepath='../data/', orders=orders) orders = np.copy(data.orders) results = wobble.Results(data=data) results_51peg = wobble.Results( filename= '/Users/mbedell/python/wobble/results/results_51peg_Kstar0_Kt0.hdf5') print("data loaded") print("time elapsed: {0:.2f} min".format((time() - start_time) / 60.0)) elapsed_time = time() - start_time if plots: print("plots will be saved under directory: {0}".format(plot_dir)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) star_learning_rate = 0.1 telluric_learning_rate = 0.01
if vis == True: orders = np.arange(11,53) lowest_optimized_order = 11 data = wobble.Data(starname+'_vis'+'_e2ds.hdf5', filepath= data_directory, orders=orders, min_flux=10**-5, min_snr=0) else: orders = np.arange(0,56) lowest_optimized_order = 0 data = wobble.Data(starname+'_nir_split'+'_e2ds.hdf5', filepath= data_directory, orders=orders, min_flux=10**-5, min_snr=0) ###### #plot_dir = plot_dir + results_name + "/"#+ "_bad_ep/" plot_dir = plot_dir + results_name + "_bad_ord(49)/" #plot_dir = plot_dir + "GJ1148_o49_snr" + "/" os.makedirs(plot_dir, exist_ok = True) results = wobble.Results(filename = results_directory + results_name +".hdf5") telluric_mask = np.genfromtxt("/data/cmatthe/python/wobble_aux/telluric_masks/" + "telluric_mask_carm_short.dat" ) #epochs = [ 5, 24, 65, 67, 74, 88, 93] #bad GJ436 #epochs = [10, 13, 32, 48, 66] # bad GJ1148 #epochs = [8, 20, 23, 24, 70, 97,107, 123, 141, 154] #bad Wolf294 (20 as "good standart") epochs_results = results.epochs #epochs = epochs_results[10:50:5] #epochs = [0,9,11,10,12,60] #epochs = list(set(epochs) & set(results.epochs)) #build intersectionn to make sure epoch epochs = epochs_results
def improve_order_regularization(r, o, star_filename, tellurics_filename, training_data, training_results, validation_data, validation_results, verbose=True, plot=False, basename='', K_star=0, K_t=0, L1=True, L2=True, tellurics_template_fixed=False): """ Use a validation scheme to determine the best regularization parameters for all model components in a given order r. Update files at star_filename, tellurics_filename with the best parameters. By default, this tunes in the following order: tellurics L2, star L2, tellurics L1, star L1. Parameters ---------- r : int Index into `training_data` and `validation_data` to retrieve desired order. o : int Index into `star_filename` and `telluric_filename` to retrieve desired order. star_filename : str Filename containing regularization amplitudes for the star. tellurics_filename : str Filename containing regularization amplitudes for the tellurics. training_data : wobble.Data object Data to train template on (should be the majority of available data). training_results : wobble.Results object Results object corresponding to `training_data`. validation_data : wobble.Data object Data to use in assessing goodness-of-fit for template (should be a representative minority of the available data). validation_results : wobble.Results object Results object corresponding to `validation_data`. verbose : bool (default `True`) Toggle print statements and progress bars. plot : bool (default `False`) Generate and save plots of fits to validation data. basename : str (default ``) String to append to the beginning of saved plots (file path and base). K_star : int (default `0`) Number of variable basis vectors for the star. K_t : int (default `0`) Number of variable basis vectors for the tellurics. L1 : bool (default `True`) Whether to tune L1 amplitudes. L2 : bool (default `True`) Whether to tune L2 amplitudes. tellurics_template_fixed : bool (default `False`) (currently hard-coded to work with MB's laptop, don't use this!) Whether to keep tellurics template fixed to values from 51 Peg fit. """ training_model = wobble.Model(training_data, training_results, r) training_model.add_star('star', variable_bases=K_star) if tellurics_template_fixed: # hackity hack hack results_51peg = wobble.Results( filename= '/Users/mbedell/python/wobble/results/results_51peg_Kstar0_Kt0.hdf5' ) template_xs = np.copy(results_51peg.tellurics_template_xs[o]) template_ys = np.copy(results_51peg.tellurics_template_ys[o]) training_model.add_telluric('tellurics', rvs_fixed=True, template_fixed=True, variable_bases=K_t, template_xs=template_xs, template_ys=template_ys) else: training_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t) training_model.setup() training_model.optimize(niter=0, verbose=verbose, rv_uncertainties=False) if plot: n = 0 # epoch to plot title = 'Initialization' filename = '{0}_init'.format(basename) plot_fit(r, n, training_data, training_results, title=title, basename=filename) validation_model = wobble.Model(validation_data, validation_results, r) validation_model.add_star('star', variable_bases=K_star, template_xs=training_results.star_template_xs[r] ) # ensure templates are same size if tellurics_template_fixed: # hackity hack hack validation_model.add_telluric( 'tellurics', rvs_fixed=True, template_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r], template_ys=training_results.tellurics_template_ys[r]) else: validation_model.add_telluric( 'tellurics', rvs_fixed=True, variable_bases=K_t, template_xs=training_results.tellurics_template_xs[r]) validation_model.setup() # the order in which these are defined will determine the order in which they are optimized: tensors_to_tune = [ training_model.components[1].L2_template_tensor, training_model.components[0].L2_template_tensor, training_model.components[1].L1_template_tensor, training_model.components[0].L1_template_tensor ] tensor_names = [ 'L2_template', 'L2_template', 'L1_template', 'L1_template' ] # this isonly needed bc TF appends garbage to the end of the tensor name tensor_components = ['tellurics', 'star', 'tellurics', 'star'] # ^ same if K_star > 0: tensors_to_tune = np.append(tensors_to_tune, [ training_model.components[0].L2_basis_vectors_tensor, training_model.components[0].L1_basis_vectors_tensor ]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['star', 'star']) if K_t > 0: tensors_to_tune = np.append(tensors_to_tune, [ training_model.components[1].L2_basis_vectors_tensor, training_model.components[1].L1_basis_vectors_tensor ]) tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors']) tensor_components = np.append(tensor_components, ['tellurics', 'tellurics']) regularization_dict = {} #o_init = max(0, o-1) # initialize from previous order, or if o=0 use defaults o_init = o # always initialize from starting guess (TODO: decide which init is better) for i, tensor in enumerate(tensors_to_tune): if tensor_components[i] == 'star': filename = star_filename elif tensor_components[i] == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r') as f: regularization_dict[tensor] = np.copy(f[tensor_names[i]][o_init]) i = 0 # track order in which parameters are improved for component, (tensor, name) in zip(tensor_components, zip(tensors_to_tune, tensor_names)): if (name[0:2] == "L1" and L1) or (name[0:2] == "L2" and L2): i += 1 regularization_dict[tensor] = improve_parameter( tensor, training_model, validation_model, regularization_dict, validation_data, validation_results, verbose=verbose, plot=plot, basename=basename + '_par{0}'.format(i)) if component == 'star': filename = star_filename elif component == 'tellurics': filename = tellurics_filename else: print("something has gone wrong.") assert False with h5py.File(filename, 'r+') as f: f[name][o] = np.copy(regularization_dict[tensor]) if plot: test_regularization_value(tensor, regularization_dict[tensor], training_model, validation_model, regularization_dict, validation_data, validation_results, plot=False, verbose=False) # hack to update results title = 'Final' filename = '{0}_final'.format(basename) plot_fit(r, n, validation_data, validation_results, title=title, basename=filename) fig = plt.figure() ax = fig.add_subplot(111) val_rvs = validation_results.star_rvs[r] + validation_results.bervs train_rvs = training_results.star_rvs[r] + training_results.bervs ax.plot(validation_results.dates, val_rvs - np.mean(val_rvs), 'r.') ax.plot(training_results.dates, train_rvs - np.mean(train_rvs), 'k.', alpha=0.5) ax.set_ylabel('RV (m/s)') ax.set_xlabel('JD') fig.tight_layout() plt.savefig(basename + '_final_rvs.png') plt.close(fig)
print("running wobble on star {0} with K_star = {1}, K_t = {2}".format( starname, K_star, K_t)) start_time = time() orders = np.arange(72) ''' e = [ 0, 1, 6, 7, 9, 17, 18, 19, 21, 23, 24, 26, 30, 33, 34, 35, 36, 37, 38, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 61, 66, 69, 70, 72, 73, 75] # night of August 28, 2007 data = wobble.Data(starname+'_e2ds.hdf5', filepath='../data/', orders=orders, epochs=e) ''' data = wobble.Data(starname + '_e2ds.hdf5', filepath='../data/', orders=orders) orders = np.copy(data.orders) results = wobble.Results(data=data) results_51peg = wobble.Results( filename='../results/results_51peg_Kstar0_Kt0.hdf5') print("data loaded") print("time elapsed: {0:.2f} min".format((time() - start_time) / 60.0)) elapsed_time = time() - start_time if plots: print("plots will be saved under directory: {0}".format(plot_dir)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) star_learning_rate = 0.1 telluric_learning_rate = 0.01 for r, o in enumerate(orders):
def run_wobble(parameters): p = parameters results_name = 'results_{0}_Kstar{1}_Kt{2}_'.format( p.starname, p.K_star, p.K_t, p.niter) + p.output_suffix results_file_base = p.results_dir + results_name results_file = results_file_base + '.hdf5' data_file = p.data_file = p.data_dir + p.starname + p.data_suffix + '_e2ds.hdf5' temp_dir = p.temp_dir = p.results_dir + '/temp_' + results_name + '/' plot_dir = p.plot_dir = p.results_dir + '/plots_' + results_name + '/' #make (output) directory #TODO these data and results dirs should be handlesd somewhere else os.makedirs(p.results_dir, exist_ok=True) os.makedirs(p.data_dir, exist_ok=True) os.makedirs(temp_dir, exist_ok=True) os.makedirs(plot_dir, exist_ok=True) start_time = p.start_time = time() #generate epoch list # if parameters has been passed a global epochs list use this going forward. This is primarily used by regularization.py try: #skipping the except will make major issues unless dropped epochs and orders are already handled epochs_list = p.epochs_list = p.global_epochs_list p.drop_orders = data.drop_orders # this check here only works if data object is not yet initialized with empty drop order list. except (AttributeError, UnboundLocalError) as e: print(e) print("Loading data. May take a few minutes") try: data = wobble.Data(data_file, orders=np.arange(p.start, p.end), min_flux=10**-5, min_snr=p.min_snr, parameters=p) epochs_list = p.epochs_list = data.epochs.tolist() except wobble.data.AllDataDropped: p.min_snr = 5 print("restarting with min_snr = {}".format(p.min_snr)) data = wobble.Data(data_file, orders=np.arange(p.start, p.end), min_flux=10**-5, min_snr=p.min_snr, parameters=p) epochs_list = p.epochs_list = data.epochs.tolist() #orders_list = p.orders_list = data.orders.tolist() #too agressive in visible # TODO implement alternate nir and vis handling try: p.drop_orders = data.drop_orders print("data.drop_orders", data.drop_orders) except AttributeError: print("data.drop_orders is not defined") base_orders_list = np.arange(p.start, p.end).tolist() orders_list = p.orders_list = [ x for x in base_orders_list if x not in p.drop_orders ] print("orders_list", orders_list) chunks = p.chunks = chunk_list(p.start, p.end, p.chunk_size, p.orders_list) print("Chunks: ", chunks) #Loop over chunks for i in range(len(chunks)): #pass parameters object to chunk script p.i = i with open( os.path.dirname(os.path.abspath(__file__)) + "/carmenes_aux_files/chunk_parameters.pkl", "wb") as f: dill.dump(p, f) #start chunk script os.system("python3 {0}/chunk.py".format( os.path.dirname(os.path.abspath(__file__)))) '''HACK to be removed #import parameters back after possible changes in chunk with open(os.path.dirname(os.path.abspath(__file__)) + "/" + "carmenes_aux_files/chunk_parameters.pkl", "rb") as f: p = dill.load(f) ''' print("all chunks optimized: writing combined file") results_file_stitch(p.chunks, results_file, temp_dir) #Combine orders results = wobble.Results(filename=results_file) results.combine_orders('star') print("final RVs calculated.") print("time elapsed: {0:.2f} minutes".format((time() - start_time) / 60.0)) results.write(results_file) append_dates_utc( results_file, data_file ) # cannot be done before results.write, as .write will remove dates_utc append_parameters(p, results_file) ''' # test loaded_parameters = read_parameters_from_results(results_file) attrs = dir(loaded_parameters) print(attrs) for attr in attrs: #print(attr, getattr(loaded_parameters, attr)) q = getattr(p, attr) == getattr(loaded_parameters, attr) if q == True: print(q) else: print(attr, q) ''' print("results saved as: {0}".format(results_file)) print("time elapsed: {0:.2f} minutes".format((time() - start_time) / 60.0)) #delete temp_dir which at this point only contains duplicates shutil.rmtree(temp_dir) print("deleted: {0}".format(temp_dir))