def spectrum_json(library, spec_id): path = os_path.join(args.path, library) x = SpectrumLibrarySqlite(path=path) spectrum = x.open(ids=int(spec_id)).extract_item(0) data = list(zip(spectrum.wavelengths, spectrum.values)) return json.dumps(data)
def spectrum_txt(library, spec_id): path = os_path.join(args.path, library) x = SpectrumLibrarySqlite(path=path) spectrum = x.open(ids=int(spec_id)).extract_item(0) data = np.asarray(list(zip(spectrum.wavelengths, spectrum.values, spectrum.value_errors))) txt_output = io.StringIO() np.savetxt(txt_output, data) response = make_response(txt_output.getvalue()) response.headers['Content-Type'] = 'text/plain' return response
def spectrum_png(library, spec_id, lambda_min, lambda_max): path = os_path.join(args.path, library) x = SpectrumLibrarySqlite(path=path) spectrum = x.open(ids=int(spec_id)).extract_item(0) fig = Figure(figsize=(16, 6)) ax = fig.add_subplot(111) ax.set_xlabel('Wavelength / A') ax.set_ylabel('Value') ax.set_xlim([float(lambda_min), float(lambda_max)]) ax.grid(True) ax.plot(spectrum.wavelengths, spectrum.values) canvas = FigureCanvas(fig) png_output = io.BytesIO() canvas.print_png(png_output) response = make_response(png_output.getvalue()) response.headers['Content-Type'] = 'image/png' return response
def spectrum_view(library, spec_id): lambda_min = 3600 lambda_max = 9600 try: lambda_min = float(request.form.get("lambda_min")) except (TypeError, ValueError): pass try: lambda_max = float(request.form.get("lambda_max")) except (TypeError, ValueError): pass parent_url = url_for("library_search", library=library) self_url = url_for("spectrum_view", library=library, spec_id=spec_id) txt_url = url_for("spectrum_txt", library=library, spec_id=spec_id) data_url = url_for("spectrum_json", library=library, spec_id=spec_id) png_url = url_for("spectrum_png", library=library, spec_id=spec_id, lambda_min=lambda_min, lambda_max=lambda_max) path = os_path.join(args.path, library) x = SpectrumLibrarySqlite(path=path) metadata_keys = x.list_metadata_fields() metadata_keys.sort() metadata = x.get_metadata(ids=int(spec_id))[0] metadata["spectrum_id"] = spec_id return render_template('spectrum.html', path=args.path, library=library, metadata_keys=metadata_keys, parent_url=parent_url, metadata=metadata, txt_url=txt_url, data_url=data_url, png_url=png_url, self_url=self_url, lambda_min=lambda_min, lambda_max=lambda_max)
def library_index(): # Fetch a list of all sub-directories inside this workspace -- each directory is a SpectrumLibrary libraries = glob.glob(os_path.join(args.path, "*")) libraries.sort() # For each library, look up how many spectra are inside it, and create a dictionary of properties library_info = [] for item in libraries: if os_path.isdir(item) and os.path.exists(item+'/index.db'): name = os_path.split(item)[1] x = SpectrumLibrarySqlite(path=item) library_info.append({ 'name': name, 'url': url_for('library_search', library=name), 'item_count': len(x) }) x.close() del x # Render list of SpectrumLibraries into HTML return render_template('index.html', path=args.path, libraries=library_info)
def __init__(self, spectrum_library_to_analyse, workspace, pipeline): """ Open the spectrum library containing the spectra that we are to analyse. :param spectrum_library_to_analyse: The name of the spectrum library we are to analyse :type spectrum_library_to_analyse: str :param workspace: Directory where we expect to find spectrum libraries. :type workspace: str :param pipeline: The Pipeline we are to run spectra through. :type pipeline: Pipeline """ # Initialise pipeline manager super(PipelineManagerReadFromSpectrumLibrary, self).__init__(pipeline=pipeline) # Open the spectrum library we are reading from self.spectrum_library_to_analyse = spectrum_library_to_analyse spectra = SpectrumLibrarySqlite.open_and_search( library_spec=spectrum_library_to_analyse, workspace=workspace, extra_constraints={}) input_library, input_library_items = [ spectra[i] for i in ("library", "items") ] input_library_ids = [i["specId"] for i in input_library_items] self.input_library = input_library self.input_library_items = input_library_items self.input_library_ids = input_library_ids self.spectrum_counter = 0
def __init__(self, workspace, fourmost_mode, rv_cross_correlation_library="rv_templates_resampled", rv_upsampling=1): """ Initialise the cross-correlation RV code. :param workspace: Directory where we expect to find spectrum libraries. :type workspace: str :param fourmost_mode: The name of the 4MOST mode we are operating, either hrs or lrs :type fourmost_mode: str :param rv_cross_correlation_library: The name of the spectrum library we are to get our cross correlation templates from. :type rv_cross_correlation_library: str :param rv_upsampling: The upsampling factor to apply to input spectra before cross correlating them with the templates. :type rv_upsampling: int """ super(TaskRVCorrect, self).__init__() self.fourmost_mode = fourmost_mode # Open spectrum library containing cross-correlation templates template_library = SpectrumLibrarySqlite( path=os_path.join(workspace, rv_cross_correlation_library), create=False, ) # Instantiate RV code self.rv_code = RvInstanceCrossCorrelation( spectrum_library=template_library, upsampling=rv_upsampling)
target_library_name = args.template_library library_path = os_path.join(workspace, target_library_name) # Instantiate the RV code time_start = time.time() rv_code = RvInstanceBrani.from_spectrum_library_sqlite( library_path=library_path) n_burn_default = rv_code.n_burn n_steps_default = rv_code.n_steps time_end = time.time() logger.info("Set up time was {:.2f} sec".format(time_end - time_start)) # Open the library of APOKASC test spectra test_library_name = args.test_library test_library_path = os_path.join(workspace, test_library_name) test_library = SpectrumLibrarySqlite(path=test_library_path, create=False) # Load test set test_library_ids = [i["specId"] for i in test_library.search()] # Pick some random spectra indices = [ random.randint(0, len(test_library_ids) - 1) for i in range(args.test_count) ] # Start writing output with open(args.output_file, "w") as output: column_headings_written = False stellar_label_names = []
from fourgp_speclib import SpectrumLibrarySqlite # Start logging our progress logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) logger.info("Synthesizing spectra of pepsi") # Instantiate base synthesizer synthesizer = Synthesizer(library_name="turbo_pepsi_replica_3label", logger=logger, docstring=__doc__) spectra = SpectrumLibrarySqlite.open_and_search( library_spec='pepsi_4fs_hrs/', workspace='/home/travegre/Projects/4GP/4most-4gp-scripts/workspace/', extra_constraints={"continuum_normalised": True} ) pepsi_library, pepsi_library_items = [spectra[i] for i in ("library", "items")] # Load test set pepsi_library_ids = [i["specId"] for i in pepsi_library_items] spectra = [pepsi_library.open(ids=i).extract_item(0) for i in pepsi_library_ids] print(spectra) star_list = [] for spectrum in spectra: try: #star_list.append({'name': spectrum.metadata['Starname'], 'Teff': spectrum.metadata['Teff'], 'logg': spectrum.metadata['logg'], '[Fe/H]': spectrum.metadata['[Fe/H]'], 'microturbulence': spectrum.metadata['vmic_GES'], 'extra_metadata': {'set_id': 1}}) star_list.append({'name': spectrum.metadata['Starname'], 'Teff': spectrum.metadata['Teff'], 'logg': spectrum.metadata['logg'], '[Fe/H]': spectrum.metadata['[Fe/H]'], 'extra_metadata': {'set_id': 1}})
def main(): """ Main entry point for running the Payne. """ global logger logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) # Read input parameters parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '--test', required=True, dest='test_library', help= "Library of spectra to test the trained Payne on. Stars may be filtered by parameters by " "placing a comma-separated list of constraints in [] brackets after the name of the " "library. Use the syntax [Teff=3000] to demand equality, or [0<[Fe/H]<0.2] to specify a " "range.") parser.add_argument( '--train', required=True, dest='train_library', help= "Library of labelled spectra to train the Payne on. Stars may be filtered by parameters " "by placing a comma-separated list of constraints in [] brackets after the name of the " "library. Use the syntax [Teff=3000] to demand equality, or [0<[Fe/H]<0.2] to specify a " "range.") parser.add_argument( '--workspace', dest='workspace', default="", help="Directory where we expect to find spectrum libraries.") parser.add_argument( '--train-batch-number', required=False, dest='train_batch_number', type=int, default=0, help= "If training pixels in multiple batches on different machines, then this is the number of " "the batch of pixels we are to train. It should be in the range 0 .. batch_count-1 " "inclusive. If it is -1, then we skip training to move straight to testing." ) parser.add_argument( '--test-batch-number', required=False, dest='test_batch_number', type=int, default=0, help= "If testing spectra in multiple batches on different machines, then this is the number of " "the batch of spectra we are to test. It should be in the range 0 .. test_batch_count-1 " "inclusive.") parser.add_argument( '--num-training-workers', required=False, dest='train_batch_count', type=int, default=1, help= "If training pixels in multiple batches on different machines, then this is the number " "of nodes/workers/batches.") parser.add_argument( '--num-testing-workers', required=False, dest='test_batch_count', type=int, default=1, help= "If testing spectra in multiple batches on different machines, then this is the number " "of nodes/workers/batches.") parser.add_argument( '--reload-payne', required=False, dest='reload_payne', default=None, help= "Skip training step, and reload a Payne that we've previously trained." ) parser.add_argument('--description', dest='description', help="A description of this fitting run.") parser.add_argument( '--labels', dest='labels', default="Teff,logg,[Fe/H]", help="List of the labels the Payne is to learn to estimate.") parser.add_argument( '--label-expressions', dest='label_expressions', default="", help="List of the algebraic labels the Payne is to learn to estimate " "(e.g. photometry_B - photometry_V).") parser.add_argument( '--labels-individual', dest='labels_individual', default="", help="List of the labels the Payne is to fit in separate fitting runs." ) parser.add_argument('--censor-scheme', default="1", dest='censor_scheme', help="Censoring scheme version to use (1, 2 or 3).") parser.add_argument( '--censor', default="", dest='censor_line_list', help= "Optional list of line positions for the Payne to fit, ignoring continuum between." ) parser.add_argument('--output-file', default="./test_cannon.out", dest='output_file', help="Data file to write output to.") parser.add_argument( '--assume-scaled-solar', action='store_true', dest="assume_scaled_solar", help= "Assume scaled solar abundances for any elements which don't have abundances individually " "specified. Useful for working with incomplete data sets.") parser.add_argument( '--no-assume-scaled-solar', action='store_false', dest="assume_scaled_solar", help= "Do not assume scaled solar abundances; throw an error if training set is has missing " "labels.") parser.set_defaults(assume_scaled_solar=False) parser.add_argument('--multithread', action='store_true', dest="multithread", help="Use multiple thread to speed Payne up.") parser.add_argument( '--nothread', action='store_false', dest="multithread", help="Do not use multiple threads - use only one CPU core.") parser.set_defaults(multithread=True) parser.add_argument( '--interpolate', action='store_true', dest="interpolate", help= "Interpolate the test spectra on the training spectra's wavelength raster. DANGEROUS!" ) parser.add_argument( '--nointerpolate', action='store_false', dest="interpolate", help="Do not interpolate the test spectra onto a different raster.") parser.set_defaults(interpolate=False) parser.add_argument( '--train-wavelength-window', dest="train_wavelength_window", help="Use only the selected wavelength region for the training") parser.set_defaults(train_wavelength_window=False) parser.add_argument( '--neuron-count', dest="neuron_count", help="Number of neurons in each of the Payne NN layers") parser.set_defaults(neuron_count=10) args = parser.parse_args() logger.info("Testing Payne with arguments <{}> <{}> <{}> <{}>".format( args.test_library, args.train_library, args.censor_line_list, args.output_file)) # List of labels over which we are going to test the performance of the Payne test_label_fields = args.labels.split(",") # List of labels we're going to fit individually if args.labels_individual: test_labels_individual = [ i.split("+") for i in args.labels_individual.split(",") ] else: test_labels_individual = [[]] # Set path to workspace where we expect to find libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") # Open training set spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.train_library, workspace=workspace, extra_constraints={"continuum_normalised": True}) training_library, training_library_items = [ spectra[i] for i in ("library", "items") ] # Open test set spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.test_library, workspace=workspace, extra_constraints={"continuum_normalised": True}) test_library, test_library_items = [ spectra[i] for i in ("library", "items") ] # Load training set training_library_ids_all = [i["specId"] for i in training_library_items] training_spectra_all = training_library.open(ids=training_library_ids_all) raster = training_spectra_all.wavelengths # Load test set test_library_ids = [i["specId"] for i in test_library_items] # Fit each set of labels we're fitting individually, one by one for labels_individual_batch_count, test_labels_individual_batch in enumerate( test_labels_individual): # Create filename for the output from this Payne run output_filename = args.output_file # If we're fitting elements individually, individually number the runs to fit each element if len(test_labels_individual) > 1: output_filename += "-{:03d}".format(labels_individual_batch_count) # If requested, fill in any missing labels on the training set by assuming scaled-solar abundances if args.assume_scaled_solar: training_spectra = autocomplete_scaled_solar_abundances( input_spectra=training_spectra_all, label_list=test_label_fields + test_labels_individual_batch) else: training_spectra = filter_training_spectra( input_spectra=training_spectra_all, label_list=test_label_fields + test_labels_individual_batch, input_library=training_library, input_spectrum_ids=training_library_ids_all) # Evaluate labels which are calculated via metadata expressions test_labels_expressions = [] if args.label_expressions.strip(): test_labels_expressions = args.label_expressions.split(",") evaluate_computed_labels(label_expressions=test_labels_expressions, spectra=training_spectra) # Make combined list of all labels the Payne is going to fit test_labels = test_label_fields + test_labels_individual_batch + test_labels_expressions logger.info("Beginning fit of labels <{}>.".format( ",".join(test_labels))) # If required, generate the censoring masks censoring_masks = create_censoring_masks( censoring_scheme=int(args.censor_scheme), raster=raster, censoring_line_list=args.censor_line_list, label_fields=test_label_fields + test_labels_individual_batch, label_expressions=test_labels_expressions, logger=logger) # Construct and train a model time_training_start = time.time() if args.train_wavelength_window: train_window_mask = (training_spectra.wavelengths > float(args.train_wavelength_window.split('-')[0])) \ & (training_spectra.wavelengths < float(args.train_wavelength_window.split('-')[1])) training_spectra.wavelengths = training_spectra.wavelengths[ train_window_mask] training_spectra.values = training_spectra.values[:, train_window_mask] training_spectra.value_errors = training_spectra.value_errors[:, train_window_mask] if args.reload_payne == 'true': model = PayneInstanceTing( training_set=training_spectra, label_names=test_labels, neuron_count=int(args.neuron_count), batch_number=args.train_batch_number, batch_count=args.train_batch_count, censors=censoring_masks, threads=None if args.multithread else 1, training_data_archive=output_filename, load_from_archive=True, ) else: model = PayneInstanceTing(training_set=training_spectra, label_names=test_labels, neuron_count=int(args.neuron_count), batch_number=args.train_batch_number, batch_count=args.train_batch_count, censors=censoring_masks, threads=None if args.multithread else 1, training_data_archive=output_filename) time_training_end = time.time() # Plot some characteristic spectra from the Payne generative model if False: def sigmoid_def(z): return 1.0 / (1.0 + np.exp(-z)) payne_status = model._payne_status w_array_0 = payne_status["w_array_0"] w_array_1 = payne_status["w_array_1"] w_array_2 = payne_status["w_array_2"] b_array_0 = payne_status["b_array_0"] b_array_1 = payne_status["b_array_1"] b_array_2 = payne_status["b_array_2"] x_min = payne_status["x_min"] x_max = payne_status["x_max"] # logging.info(w_array_0.shape) # dim1 - N pixels, dim2 - N neurons, dim3 - N labels labels = np.array([[5000, 4, 0], [5500, 4, 0], [5000, 4, -1.], [5000, 5, 0], [4000, 4, 0]]) labels = (labels - x_min) / (x_max - x_min) - 0.5 import matplotlib #matplotlib.use('Agg') import matplotlib.pyplot as plt fig = plt.figure(figsize=(12, 8), dpi=200) for i in training_spectra.values: plt.plot(training_spectra.wavelengths, i, lw=0.5, alpha=0.2) colors = ['yellow', 'orange', 'red', 'green', 'blue'] for j, i in enumerate(labels): predict_flux = w_array_2 * sigmoid_def( np.sum(w_array_1 * (sigmoid_def(np.dot(w_array_0, i) + b_array_0)), axis=1) + b_array_1) + b_array_2 plt.plot(training_spectra.wavelengths, predict_flux, lw=2, c=colors[j]) plt.show() fig.savefig("{:s}.characteristic_gen_model_plot.png".format( output_filename), format='png') # Test the model if not os.path.exists( os.path.join( output_filename, "batch_{:04d}_of_{:04d}.full.json.gz".format( args.test_batch_number, args.test_batch_count))): N = len(test_library_ids) time_taken = np.zeros(N) results = [] spec_start = (N // args.test_batch_count + 1) * args.test_batch_number spec_end = min(N, (N // args.test_batch_count + 1) * (args.test_batch_number + 1)) threads = 1 #cpu_count() #srng = split_seq(range(spec_start, spec_end), threads) if not args.train_wavelength_window: train_window_mask = False params = [ spec_start, spec_end, args, training_spectra, censoring_masks, model, test_labels, train_window_mask ] manager = multiprocessing.Manager() for batch in range(spec_start, spec_end)[::threads]: batch = [[ i, test_library.open(ids=test_library_ids[i]).extract_item(0) ] + params for i in range(batch, batch + threads) if i < len(test_library_ids)] dicti = manager.list() ps = [] for i in batch: spectrum = i[1] ref_labels = [] for j in test_labels: try: print(j, ' ', spectrum.metadata[j]) ref_labels.append(spectrum.metadata[j]) except: continue #if len(ref_labels) != 3: # continue #for j in test_labels[3:]: # ref_labels.append(0) p = multiprocessing.Process(target=parallel_fit, args=(i, dicti, np.array(ref_labels))) ps.append(p) p.start() for p in ps: p.join() #with Pool(threads) as pool: # batch_results = pool.map(parallel_fit, [[i, test_library.open(ids=test_library_ids[i]).extract_item(0)]+params for i in range(batch, batch+threads) if i<len(test_library_ids)]) for result in dicti: results.append(result) # Report time taken logger.info( "Fitting of {:d} spectra completed. Took {:.2f} +/- {:.2f} sec / spectrum." .format((spec_end - spec_start), np.mean(time_taken), np.std(time_taken))) # Create output data structure censoring_output = None if censoring_masks is not None: censoring_output = dict([ (label, tuple([int(i) for i in mask])) for label, mask in censoring_masks.items() ]) output_data = { "hostname": os.uname()[1], "generator": __file__, "4gp_version": fourgp_version, "cannon_version": None, "payne_version": model.payne_version, "start_time": time_training_start, "end_time": time.time(), "training_time": time_training_end - time_training_start, "description": args.description, "train_library": args.train_library, "test_library": args.test_library, "tolerance": None, "assume_scaled_solar": args.assume_scaled_solar, "line_list": args.censor_line_list, "labels": test_labels, "wavelength_raster": tuple(raster), "censoring_mask": censoring_output } # Write brief summary of run to JSON file, without masses of data #with gzip.open("{:s}.summary.json.gz".format(output_filename), "wt") as f: # f.write(json.dumps(output_data, indent=2)) with gzip.open( os.path.join( output_filename, "batch_{:04d}_of_{:04d}.summary.json.gz".format( args.test_batch_number, args.test_batch_count)), "wt") as f: f.write(json.dumps(output_data, indent=2)) # Write full results to JSON file output_data["spectra"] = results #with gzip.open("{:s}.full.json.gz".format(output_filename), "wt") as f: # f.write(json.dumps(output_data, indent=2)) with gzip.open( os.path.join( output_filename, "batch_{:04d}_of_{:04d}.full.json.gz".format( args.test_batch_number, args.test_batch_count)), "wt") as f: f.write(json.dumps(output_data, indent=2)) logging.info( "Saving results, batch {:04d} of {:04d} completed".format( args.test_batch_number, args.test_batch_count)) else: # Load teh test results from batches and join them logging.info("Loading Payne results from disk") payne_batches_summary = {} payne_batches_full = {} for i in range(args.test_batch_count): filename_summary = os.path.join( output_filename, "batch_{:04d}_of_{:04d}.summary.json.gz".format( i, args.test_batch_count)) filename_full = os.path.join( output_filename, "batch_{:04d}_of_{:04d}.full.json.gz".format( i, args.test_batch_count)) with gzip.open(filename_summary, "r") as f: payne_batches_summary.update( json.loads(f.read().decode('utf-8'))) with gzip.open(filename_full, "r") as f: if i == 0: payne_batches_full.update( json.loads(f.read().decode('utf-8'))) else: payne_batches_full['spectra'].extend( json.loads(f.read().decode('utf-8'))['spectra']) assert os.path.exists(filename_summary), "Could not proceed with joinning results, because " \ "test data for batch {:d} of spectra is not present " \ "on this server.".format(i) logging.info("Payne results loaded successfully") with gzip.open("{:s}.full.json.gz".format(output_filename), "wt") as f: f.write(json.dumps(payne_batches_full, indent=2)) with gzip.open("{:s}.summary.json.gz".format(output_filename), "wt") as f: f.write(json.dumps(payne_batches_summary, indent=2)) logging.info("Payne batches merged successfully")
required=False, default="/tmp/reddening_{}.log".format(pid), dest="log_to", help="Specify a log file where we log our progress.") args = parser.parse_args() logger.info("Reddening spectra from the library <{}>, storing the output into <{}>".format(args.input_library, args.output_library)) # Set path to workspace where we create libraries of spectra workspace = args.workspace if args.workspace else os_path.join(our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Open input SpectrumLibrary, and search for flux normalised spectra meeting our filtering constraints spectra = SpectrumLibrarySqlite.open_and_search(library_spec=args.input_library, workspace=workspace, extra_constraints={"continuum_normalised": 0} ) # Get a list of the spectrum IDs which we were returned input_library, input_spectra_ids, input_spectra_constraints = [spectra[i] for i in ("library", "items", "constraints")] # Create new spectrum library for output library_name = re.sub("/", "_", args.output_library) library_path = os_path.join(workspace, library_name) output_library = SpectrumLibrarySqlite(path=library_path, create=args.create) # List of photometric bands which we calculate extinction values for, and add to the metadata of each spectrum photometric_bands = ["SDSS_r", "SDSS_g", "GROUND_JOHNSON_V", "GROUND_JOHNSON_B"] # Parse the list of reddening values (i.e. E_BV) which we were passed on the command line ebv_list = [float(item.strip()) for item in args.ebv_list.split(",")]
args = parser.parse_args() # Set path to workspace where we create libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../../workspace") os.system("mkdir -p {}".format(workspace)) # Fetch metadata about this Cannon run cannon_output = json.loads( gzip.open(args.cannon + ".full.json.gz", "rt").read()) description = cannon_output['description'] # Open spectrum library we originally trained the Cannon on training_spectra_info = SpectrumLibrarySqlite.open_and_search( library_spec=cannon_output["train_library"], workspace=workspace, extra_constraints={"continuum_normalised": 1}) training_library, training_library_items = [ training_spectra_info[i] for i in ("library", "items") ] # Load training set training_library_ids_all = [i["specId"] for i in training_library_items] training_spectra_all = training_library.open(ids=training_library_ids_all) # If requested, fill in any missing labels on the training set by assuming scaled-solar abundances if cannon_output['assume_scaled_solar']: training_spectra = autocomplete_scaled_solar_abundances( input_spectra=training_spectra_all, label_list=cannon_output["labels"]) else:
dest="log_to", help="Specify a log file where we log our progress.") args = parser.parse_args() logger.info( "Adding radial velocities to spectra from <{}>, going into <{}>".format( args.input_library, args.output_library)) # Set path to workspace where we create libraries of spectra workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Open input SpectrumLibrary, and search for flux normalised spectra meeting our filtering constraints spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.input_library, workspace=workspace, extra_constraints={"continuum_normalised": 0}) # Get a list of the spectrum IDs which we were returned input_library, input_spectra_ids, input_spectra_constraints = [ spectra[i] for i in ("library", "items", "constraints") ] # Create new spectrum library for output library_name = re.sub("/", "_", args.output_library) library_path = os_path.join(workspace, library_name) output_library = SpectrumLibrarySqlite(path=library_path, create=args.create) # We may want to symlink the sqlite3 database file into /tmp for performance reasons # This bit of crack-on-a-stick is only useful if /tmp is on a ram disk, though... if args.db_in_tmp:
class Synthesizer: # Convenience function to provide dictionary access to rows of an astropy table @staticmethod def astropy_row_to_dict(x): return dict([(i, x[i]) for i in x.columns]) # Read input parameters def __init__(self, library_name, logger, docstring, root_path="../../../..", spectral_resolution=50000): self.logger = logger self.our_path = os_path.split(os_path.abspath(__file__))[0] self.root_path = os_path.abspath(os_path.join(self.our_path, root_path, "..")) self.pid = os.getpid() self.spectral_resolution = spectral_resolution parser = argparse.ArgumentParser(description=docstring) parser.add_argument('--output-library', required=False, default="turbospec_{}".format(library_name), dest="library", help="Specify the name of the SpectrumLibrary we are to feed synthesized spectra into.") parser.add_argument('--workspace', dest='workspace', default="", help="Directory where we expect to find spectrum libraries.") parser.add_argument('--create', required=False, action='store_true', dest="create", help="Create a clean SpectrumLibrary to feed synthesized spectra into") parser.add_argument('--no-create', required=False, action='store_false', dest="create", help="Do not create a clean SpectrumLibrary to feed synthesized spectra into") parser.set_defaults(create=True) parser.add_argument('--log-dir', required=False, default="/tmp/turbospec_{}_{}".format(library_name, self.pid), dest="log_to", help="Specify a log directory where we log our progress and configuration files.") parser.add_argument('--dump-to-sqlite-file', required=False, default="", dest="sqlite_out", help="Specify an sqlite3 filename where we dump the stellar parameters of the stars.") parser.add_argument('--line-lists-dir', required=False, default=self.root_path, dest="lines_dir", help="Specify a directory where line lists for TurboSpectrum can be found.") parser.add_argument('--elements', required=False, default="", dest="elements", help="Only read the abundances of a comma-separated list of elements, and use scaled-solar " "abundances for everything else.") parser.add_argument('--binary-path', required=False, default=self.root_path, dest="binary_path", help="Specify a directory where Turbospectrum and Interpol packages are installed.") parser.add_argument('--every', required=False, default=1, type=int, dest="every", help="Only process every nth spectrum. " "This is useful when parallelising this script across multiple processes.") parser.add_argument('--skip', required=False, default=0, type=int, dest="skip", help="Skip n spectra before starting to process every nth. " "This is useful when parallelising this script across multiple processes.") parser.add_argument('--limit', required=False, default=0, type=int, dest="limit", help="Only process a maximum of n spectra.") self.args = parser.parse_args() logging.info("Synthesizing {} to <{}>".format(library_name, self.args.library)) # Set path to workspace where we create libraries of spectra self.workspace = (self.args.workspace if self.args.workspace else os_path.abspath(os_path.join(self.our_path, root_path, "workspace"))) os.system("mkdir -p {}".format(self.workspace)) def set_star_list(self, star_list): self.star_list = star_list # Ensure that every star has a name; number stars of not for i, item in enumerate(self.star_list): if 'name' not in item: item['name'] = "star_{:08d}".format(i) # Ensure that every star has free_abundances and extra metadata for i, item in enumerate(self.star_list): if 'free_abundances' not in item: item['free_abundances'] = {} if 'extra_metadata' not in item: item['extra_metadata'] = {} if 'microturbulence' not in item: item['microturbulence'] = 1 # Ensure that we have a table of input data to dump to SQLite, if requested for item in self.star_list: if 'input_data' not in item: item['input_data'] = {'name': item['name'], 'Teff': item['Teff'], '[Fe/H]': item['[Fe/H]'], 'logg': item['logg']} item['input_data'].update(item['free_abundances']) item['input_data'].update(item['extra_metadata']) if 'name' not in item['input_data']: item['input_data']['name'] = item['name'] def dump_stellar_parameters_to_sqlite(self): # Output data into sqlite3 db if self.args.sqlite_out: os.system("rm -f {}".format(self.args.sqlite_out)) conn = sqlite3.connect(self.args.sqlite_out) c = conn.cursor() columns = [] for col_name, col_value in list(self.star_list[0]['input_data'].items()): col_type_str = isinstance(col_value, str) columns.append("{} {}".format(col_name, "TEXT" if col_type_str else "REAL")) c.execute("CREATE TABLE stars (uid INTEGER PRIMARY KEY, {});".format(",".join(columns))) for i, item in enumerate(self.star_list): print(("Writing sqlite parameter dump: %5d / %5d" % (i, len(self.star_list)))) c.execute("INSERT INTO stars (name) VALUES (?);", (item['input_data']['name'],)) uid = c.lastrowid for col_name in item['input_data']: if col_name == "name": continue arguments = ( str(item['input_data'][col_name]) if isinstance(item['input_data'][col_name], str) else float(item['input_data'][col_name]), uid ) c.execute("UPDATE stars SET %s=? WHERE uid=?;" % col_name, arguments) conn.commit() conn.close() def create_spectrum_library(self): # Create new SpectrumLibrary self.library_name = re.sub("/", "_", self.args.library) self.library_path = os_path.join(self.workspace, self.library_name) self.library = SpectrumLibrarySqlite(path=self.library_path, create=self.args.create) # Invoke FourMost data class. Ensure that the spectra we produce are much higher resolution than 4MOST. # We down-sample them later to whatever resolution we actually want. self.FourMostData = FourMost() self.lambda_min = self.FourMostData.bands["LRS"]["lambda_min"] self.lambda_max = self.FourMostData.bands["LRS"]["lambda_max"] self.line_lists_path = self.FourMostData.bands["LRS"]["line_lists_edvardsson"] # Invoke a TurboSpectrum synthesizer instance self.synthesizer = TurboSpectrum( turbospec_path=os_path.join(self.args.binary_path, "turbospectrum-15.1/exec-gf-v15.1"), interpol_path=os_path.join(self.args.binary_path, "interpol_marcs"), line_list_paths=[os_path.join(self.args.lines_dir, self.line_lists_path)], marcs_grid_path=os_path.join(self.args.binary_path, "fromBengt/marcs_grid")) self.synthesizer.configure(lambda_min=self.lambda_min, lambda_max=self.lambda_max, lambda_delta=float(self.lambda_min) / self.spectral_resolution, line_list_paths=[os_path.join(self.args.lines_dir, self.line_lists_path)], stellar_mass=1) self.counter_output = 0 # Start making log output os.system("mkdir -p {}".format(self.args.log_to)) self.logfile = os.path.join(self.args.log_to, "synthesis.log") def do_synthesis(self): # Iterate over the spectra we're supposed to be synthesizing with open(self.logfile, "w") as result_log: for star in self.star_list: star_name = star['name'] unique_id = hashlib.md5(os.urandom(32)).hexdigest()[:16] metadata = { "Starname": str(star_name), "uid": str(unique_id), "Teff": float(star['Teff']), "[Fe/H]": float(star['[Fe/H]']), "logg": float(star['logg']), "microturbulence": float(star["microturbulence"]) } # User can specify that we should only do every nth spectrum, if we're running in parallel self.counter_output += 1 if (self.args.limit > 0) and (self.counter_output > self.args.limit): break if (self.counter_output - self.args.skip) % self.args.every != 0: continue # Pass list of the abundances of individual elements to TurboSpectrum free_abundances = dict(star['free_abundances']) for element, abundance in list(free_abundances.items()): metadata["[{}/H]".format(element)] = float(abundance) # Propagate all ionisation states into metadata metadata.update(star['extra_metadata']) # Configure Turbospectrum with the stellar parameters of the next star self.synthesizer.configure( t_eff=float(star['Teff']), metallicity=float(star['[Fe/H]']), log_g=float(star['logg']), stellar_mass=1 if "stellar_mass" not in star else star["stellar_mass"], turbulent_velocity=1 if "microturbulence" not in star else star["microturbulence"], free_abundances=free_abundances ) # Make spectrum time_start = time.time() turbospectrum_out = self.synthesizer.synthesise() time_end = time.time() # Log synthesizer status logfile_this = os.path.join(self.args.log_to, "{}.log".format(star_name)) open(logfile_this, "w").write(json.dumps(turbospectrum_out)) # Check for errors errors = turbospectrum_out['errors'] if errors: result_log.write("[{}] {:6.0f} sec {}: {}\n".format(time.asctime(), time_end - time_start, star_name, errors)) logging.warn("Star <{}> could not be synthesised. Errors were: {}". format(star_name, errors)) result_log.flush() continue else: logging.info("Synthesis completed without error.") # Fetch filename of the spectrum we just generated filepath = os_path.join(turbospectrum_out["output_file"]) # Insert spectrum into SpectrumLibrary try: filename = "spectrum_{:08d}".format(self.counter_output) # First import continuum-normalised spectrum, which is in columns 1 and 2 metadata['continuum_normalised'] = 1 spectrum = Spectrum.from_file(filename=filepath, metadata=metadata, columns=(0, 1), binary=False) self.library.insert(spectra=spectrum, filenames=filename) # Then import version with continuum, which is in columns 1 and 3 metadata['continuum_normalised'] = 0 spectrum = Spectrum.from_file(filename=filepath, metadata=metadata, columns=(0, 2), binary=False) self.library.insert(spectra=spectrum, filenames=filename) except (ValueError, IndexError): result_log.write("[{}] {:6.0f} sec {}: {}\n".format(time.asctime(), time_end - time_start, star_name, "Could not read bsyn output")) result_log.flush() continue # Update log file to show our progress result_log.write("[{}] {:6.0f} sec {}: {}\n".format(time.asctime(), time_end - time_start, star_name, "OK")) result_log.flush() def clean_up(self): logging.info("Synthesized {:d} spectra.".format(self.counter_output)) # Close TurboSpectrum synthesizer instance self.synthesizer.close()
# They are as follows... grid_axes = [["Teff", (4000, 8250, 250)], ["[Fe/H]", (0.5, 3.0, 0.5)], ["logg", (1.5, 5.5, 0.5)]] grid_axis_values = [ np.arange(axis[1][0], axis[1][1], axis[1][2]) for axis in grid_axes ] grid_axis_indices = [ list(range(int((axis[1][1] - axis[1][0]) / axis[1][2]))) for axis in grid_axes ] grid_axis_index_combinations = itertools.product(*grid_axis_indices) # Turn Brani's set of templates into a spectrum library with path specified above library_path = os_path.join(workspace, target_library_name) library = SpectrumLibrarySqlite(path=library_path, create=True) # Brani's template spectra do not have any error vectors associated with them, so add an array of zeros errors_dummy = np.zeros_like(wavelength_raster) # Import each template spectrum in turn for i, axis_indices in enumerate(grid_axis_index_combinations): filename = "template{:06d}".format(i) metadata = {"Starname": filename} item = flux_templates for axis_counter, index in enumerate(axis_indices): metadata_key = grid_axes[axis_counter][0] metadata_value = grid_axis_values[axis_counter][index] metadata[metadata_key] = metadata_value metadata[metadata_key + "_index"] = index item = item[index]
# Set up logger logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) logger.info("Testing Cross-Correlation RV code") # Set path to workspace where we expect to find libraries of spectra workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") # Open test set spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.test_library, workspace=workspace, extra_constraints={"continuum_normalised": 0}) test_library, test_library_items, test_spectra_constraints = [ spectra[i] for i in ("library", "items", "constraints") ] # Open template spectrum library template_library = SpectrumLibrarySqlite( path=os_path.join(workspace, args.templates_library), create=False, ) # Instantiate RV code rv_calculator = RvInstanceCrossCorrelation(spectrum_library=template_library, upsampling=args.upsampling)
help="Separator to use between fields in the CSV output.") parser.add_argument( '--workspace', dest='workspace', default="", help="Directory where we expect to find spectrum libraries.") args = parser.parse_args() # Set path to workspace where we expect to find libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") # Open spectrum library we're going to export from, and search for flux-normalised spectra meeting our filtering # constraints input_library_info = SpectrumLibrarySqlite.open_and_search( library_spec=args.library, workspace=workspace, extra_constraints={}) # Get a list of the spectrum IDs which we were returned input_library, library_items = [ input_library_info[i] for i in ("library", "items") ] library_ids = [i["specId"] for i in library_items] # Fetch list of all metadata fields, and sort it alphabetically fields = [i.strip() for i in input_library.list_metadata_fields()] fields.sort() # At the top of the CSV file, write column headings with the field names line = args.separator.join(fields) print(line)
# Start logging our progress logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) logger.info("Creating synthetic versions of stars from <{}>".format(input_library)) # Instantiate base synthesizer synthesizer = Synthesizer(library_name="pepsi_synthetic", logger=logger, docstring=__doc__) star_list = [] # Open input spectrum library spectra = SpectrumLibrarySqlite.open_and_search(library_spec=input_library, workspace=synthesizer.workspace, extra_constraints={"continuum_normalised": 1} ) # Get a list of the spectrum IDs which we were returned input_library, input_spectra_ids, input_spectra_constraints = [spectra[i] for i in ("library", "items", "constraints")] # Loop over input spectra for input_spectrum_id in input_spectra_ids: logger.info("Working on <{}>".format(input_spectrum_id['filename'])) # Open Spectrum data from disk input_spectrum_array = input_library.open(ids=input_spectrum_id['specId']) # Turn SpectrumArray object into a Spectrum object input_spectrum = input_spectrum_array.extract_item(0) metadata = input_spectrum.metadata
def main(): """ Main entry point for running the Payne. """ global logger logging.basicConfig( level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) # Read input parameters parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '--test', required=True, dest='test_library', help= "Library of spectra to test the trained Payne on. Stars may be filtered by parameters by " "placing a comma-separated list of constraints in [] brackets after the name of the " "library. Use the syntax [Teff=3000] to demand equality, or [0<[Fe/H]<0.2] to specify a " "range.") parser.add_argument( '--train', required=True, dest='train_library', help= "Library of labelled spectra to train the Payne on. Stars may be filtered by parameters " "by placing a comma-separated list of constraints in [] brackets after the name of the " "library. Use the syntax [Teff=3000] to demand equality, or [0<[Fe/H]<0.2] to specify a " "range.") parser.add_argument( '--workspace', dest='workspace', default="", help="Directory where we expect to find spectrum libraries.") parser.add_argument( '--train-batch-number', required=False, dest='batch_number', type=int, default=0, help= "If training pixels in multiple batches on different machines, then this is the number of " "the batch of pixels we are to train. It should be in the range 0 .. batch_count-1 " "inclusive. If it is -1, then we skip training to move straight to testing." ) parser.add_argument( '--train-batch-count', required=False, dest='batch_count', type=int, default=1, help= "If training pixels in multiple batches on different machines, then this is the number " "of batches.") parser.add_argument('--description', dest='description', help="A description of this fitting run.") parser.add_argument( '--labels', dest='labels', default="Teff,logg,[Fe/H]", help="List of the labels the Payne is to learn to estimate.") parser.add_argument( '--label-expressions', dest='label_expressions', default="", help="List of the algebraic labels the Payne is to learn to estimate " "(e.g. photometry_B - photometry_V).") parser.add_argument( '--labels-individual', dest='labels_individual', default="", help="List of the labels the Payne is to fit in separate fitting runs." ) parser.add_argument('--censor-scheme', default="1", dest='censor_scheme', help="Censoring scheme version to use (1, 2 or 3).") parser.add_argument( '--censor', default="", dest='censor_line_list', help= "Optional list of line positions for the Payne to fit, ignoring continuum between." ) parser.add_argument('--output-file', default="./test_cannon.out", dest='output_file', help="Data file to write output to.") parser.add_argument( '--assume-scaled-solar', action='store_true', dest="assume_scaled_solar", help= "Assume scaled solar abundances for any elements which don't have abundances individually " "specified. Useful for working with incomplete data sets.") parser.add_argument( '--no-assume-scaled-solar', action='store_false', dest="assume_scaled_solar", help= "Do not assume scaled solar abundances; throw an error if training set is has missing " "labels.") parser.set_defaults(assume_scaled_solar=False) parser.add_argument('--multithread', action='store_true', dest="multithread", help="Use multiple thread to speed Payne up.") parser.add_argument( '--nothread', action='store_false', dest="multithread", help="Do not use multiple threads - use only one CPU core.") parser.set_defaults(multithread=True) parser.add_argument( '--interpolate', action='store_true', dest="interpolate", help= "Interpolate the test spectra on the training spectra's wavelength raster. DANGEROUS!" ) parser.add_argument( '--nointerpolate', action='store_false', dest="interpolate", help="Do not interpolate the test spectra onto a different raster.") parser.set_defaults(interpolate=False) args = parser.parse_args() logger.info("Testing Payne with arguments <{}> <{}> <{}> <{}>".format( args.test_library, args.train_library, args.censor_line_list, args.output_file)) # List of labels over which we are going to test the performance of the Payne test_label_fields = args.labels.split(",") # List of labels we're going to fit individually if args.labels_individual: test_labels_individual = [ i.split("+") for i in args.labels_individual.split(",") ] else: test_labels_individual = [[]] # Set path to workspace where we expect to find libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") # Open training set spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.train_library, workspace=workspace, extra_constraints={"continuum_normalised": True}) training_library, training_library_items = [ spectra[i] for i in ("library", "items") ] # Open test set spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.test_library, workspace=workspace, extra_constraints={"continuum_normalised": True}) test_library, test_library_items = [ spectra[i] for i in ("library", "items") ] # Load training set training_library_ids_all = [i["specId"] for i in training_library_items] training_spectra_all = training_library.open(ids=training_library_ids_all) raster = training_spectra_all.wavelengths # Load test set test_library_ids = [i["specId"] for i in test_library_items] # Fit each set of labels we're fitting individually, one by one for labels_individual_batch_count, test_labels_individual_batch in enumerate( test_labels_individual): # Create filename for the output from this Payne run output_filename = args.output_file # If we're fitting elements individually, individually number the runs to fit each element if len(test_labels_individual) > 1: output_filename += "-{:03d}".format(labels_individual_batch_count) # If requested, fill in any missing labels on the training set by assuming scaled-solar abundances if args.assume_scaled_solar: training_spectra = autocomplete_scaled_solar_abundances( input_spectra=training_spectra_all, label_list=test_label_fields + test_labels_individual_batch) else: training_spectra = filter_training_spectra( input_spectra=training_spectra_all, label_list=test_label_fields + test_labels_individual_batch, input_library=training_library, input_spectrum_ids=training_library_ids_all) # Evaluate labels which are calculated via metadata expressions test_labels_expressions = [] if args.label_expressions.strip(): test_labels_expressions = args.label_expressions.split(",") evaluate_computed_labels(label_expressions=test_labels_expressions, spectra=training_spectra) # Make combined list of all labels the Payne is going to fit test_labels = test_label_fields + test_labels_individual_batch + test_labels_expressions logger.info("Beginning fit of labels <{}>.".format( ",".join(test_labels))) # If required, generate the censoring masks censoring_masks = create_censoring_masks( censoring_scheme=int(args.censor_scheme), raster=raster, censoring_line_list=args.censor_line_list, label_fields=test_label_fields + test_labels_individual_batch, label_expressions=test_labels_expressions) # Construct and train a model time_training_start = time.time() model = PayneInstanceTing(training_set=training_spectra, label_names=test_labels, batch_number=args.batch_number, batch_count=args.batch_count, censors=censoring_masks, threads=None if args.multithread else 1, training_data_archive=output_filename) time_training_end = time.time() # Test the model N = len(test_library_ids) time_taken = np.zeros(N) results = [] for index in range(N): test_spectrum_array = test_library.open( ids=test_library_ids[index]) spectrum = test_spectrum_array.extract_item(0) logger.info("Testing {}/{}: {}".format( index + 1, N, spectrum.metadata['Starname'])) # Calculate the time taken to process this spectrum time_start = time.time() # If requested, interpolate the test set onto the same raster as the training set. DANGEROUS! if args.interpolate: spectrum = resample_spectrum(spectrum=spectrum, training_spectra=training_spectra) # Pass spectrum to the Payne fit_data = model.fit_spectrum(spectrum=spectrum) # Check whether Payne failed # if labels is None: # continue # Measure the time taken time_end = time.time() time_taken[index] = time_end - time_start # Identify which star it is and what the SNR is star_name = spectrum.metadata[ "Starname"] if "Starname" in spectrum.metadata else "" uid = spectrum.metadata["uid"] if "uid" in spectrum.metadata else "" # Fudge the errors for now until I work this out err_labels = [0 for item in test_labels] # Turn list of label values into a dictionary payne_output = dict(list(zip(test_labels, fit_data['results'][0]))) # Add the standard deviations of each label into the dictionary payne_output.update( dict( list( zip([ "E_{}".format(label_name) for label_name in test_labels ], err_labels)))) # Add the star name and the SNR ratio of the test spectrum result = { "Starname": star_name, "uid": uid, "time": time_taken[index], "spectrum_metadata": spectrum.metadata, "cannon_output": payne_output } results.append(result) # Report time taken logger.info( "Fitting of {:d} spectra completed. Took {:.2f} +/- {:.2f} sec / spectrum." .format(N, np.mean(time_taken), np.std(time_taken))) # Create output data structure censoring_output = None if censoring_masks is not None: censoring_output = dict([ (label, tuple([int(i) for i in mask])) for label, mask in censoring_masks.items() ]) output_data = { "hostname": os.uname()[1], "generator": __file__, "4gp_version": fourgp_version, "cannon_version": None, "payne_version": model.payne_version, "start_time": time_training_start, "end_time": time.time(), "training_time": time_training_end - time_training_start, "description": args.description, "train_library": args.train_library, "test_library": args.test_library, "tolerance": None, "assume_scaled_solar": args.assume_scaled_solar, "line_list": args.censor_line_list, "labels": test_labels, "wavelength_raster": tuple(raster), "censoring_mask": censoring_output } # Write brief summary of run to JSON file, without masses of data with gzip.open("{:s}.summary.json.gz".format(output_filename), "wt") as f: f.write(json.dumps(output_data, indent=2)) # Write full results to JSON file output_data["spectra"] = results with gzip.open("{:s}.full.json.gz".format(output_filename), "wt") as f: f.write(json.dumps(output_data, indent=2))
synthesizer.set_star_list(star_list) # Create new SpectrumLibrary synthesizer.create_spectrum_library() # Iterate over the spectra we're supposed to be synthesizing synthesizer.do_synthesis() # Close TurboSpectrum synthesizer instance synthesizer.clean_up() # Load spectrum spectra = SpectrumLibrarySqlite.open_and_search( library_spec=synthesizer.args.library, workspace=synthesizer.workspace, extra_constraints={ "Starname": "Sun", "continuum_normalised": 0 }) input_library, input_spectra_ids, input_spectra_constraints = [ spectra[i] for i in ("library", "items", "constraints") ] input_spectrum_array = input_library.open(ids=input_spectra_ids[0]['specId']) input_spectrum = input_spectrum_array.extract_item(0) # Process spectra through reddening model reddener = SpectrumReddener(input_spectrum=input_spectrum) # Instantiate 4FS wrapper etc_wrapper = FourFS(path_to_4fs=os_path.join(synthesizer.args.binary_path,
default="/tmp/fourfs_{}.log".format(pid), dest="log_to", help="Specify a log file where we log our progress.") args = parser.parse_args() logger.info("Running 4FS on spectra from <{}>, going into <{}> <{}>".format(args.input_library, args.output_library_lrs, args.output_library_hrs)) # Set path to workspace where we create libraries of spectra workspace = args.workspace if args.workspace else os_path.join(our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Open input SpectrumLibrary, and search for flux normalised spectra meeting our filtering constraints spectra = SpectrumLibrarySqlite.open_and_search(library_spec=args.input_library, workspace=workspace, extra_constraints={"continuum_normalised": 0} ) # Get a list of the spectrum IDs which we were returned input_library, input_spectra_ids, input_spectra_constraints = [spectra[i] for i in ("library", "items", "constraints")] # Create new SpectrumLibrary(s) to hold the output from 4FS output_libraries = {} for mode in ({"name": "LRS", "library": args.output_library_lrs, "active": args.run_lrs}, {"name": "HRS", "library": args.output_library_hrs, "active": args.run_hrs}): if mode['active']: # Create spectrum library library_name = re.sub("/", "_", mode['library']) library_path = os_path.join(workspace, library_name) output_library = SpectrumLibrarySqlite(path=library_path, create=args.create)
default="/tmp/half_ellipse_convolution_{}.log".format(pid), dest="log_to", help="Specify a log file where we log our progress.") args = parser.parse_args() logger.info( "Adding {} convolution to spectra from <{}>, going into <{}>".format( args.kernel, args.input_library, args.output_library)) # Set path to workspace where we create libraries of spectra workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Open input SpectrumLibrary, and search for flux normalised spectra meeting our filtering constraints spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.input_library, workspace=workspace, extra_constraints={}) # Get a list of the spectrum IDs which we were returned input_library, input_spectra_ids, input_spectra_constraints = [ spectra[i] for i in ("library", "items", "constraints") ] # Create new spectrum library for output library_name = re.sub("/", "_", args.output_library) library_path = os_path.join(workspace, library_name) output_library = SpectrumLibrarySqlite(path=library_path, create=args.create) # We may want to symlink the sqlite3 database file into /tmp for performance reasons # This bit of crack-on-a-stick is only useful if /tmp is on a ram disk, though... if args.db_in_tmp: del output_library
parser.add_argument('--no-create', action='store_false', dest="create", help="Do not create a clean spectrum library to feed output spectra into.") parser.set_defaults(create=True) args = parser.parse_args() # Set path to workspace where we create libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join(our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Create new spectrum library library_name = re.sub("/", "_", args.library) library_path = os_path.join(workspace, library_name) library = SpectrumLibrarySqlite(path=library_path, create=args.create) # Open fits spectrum f = fits.open(args.filename) data = f[1].data wavelengths = data['LAMBDA'] fluxes = data['FLUX'] # Create 4GP spectrum object spectrum = Spectrum(wavelengths=wavelengths, values=fluxes, value_errors=np.zeros_like(wavelengths), metadata={ "imported_from": args.filename })
def main(): """ Main entry point for running the Cannon. """ logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) # Read input parameters parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--test', required=True, dest='test_library', help="Library of spectra to test the trained Cannon on. Stars may be filtered by parameters by " "placing a comma-separated list of constraints in [] brackets after the name of the " "library. Use the syntax [Teff=3000] to demand equality, or [0<[Fe/H]<0.2] to specify a " "range.") parser.add_argument('--train', required=False, dest='train_library', default=None, help="Library of labelled spectra to train the Cannon on. Stars may be filtered by parameters " "by placing a comma-separated list of constraints in [] brackets after the name of the " "library. Use the syntax [Teff=3000] to demand equality, or [0<[Fe/H]<0.2] to specify a " "range.") parser.add_argument('--workspace', dest='workspace', default="", help="Directory where we expect to find spectrum libraries.") parser.add_argument('--cannon-version', default="casey_old", dest='cannon_version', choices=("casey_old", "casey_new", "anna_ho"), help="Select which implementation of the Cannon to use: Andy Casey's or Anna Ho's.") parser.add_argument('--polynomial-order', default=2, dest='polynomial_order', type=int, help="The maximum order of polynomials to use as basis functions in the Cannon.") parser.add_argument('--continuum-normalisation', default="none", dest='continuum_normalisation', help="Select continuum normalisation method: none, running_mean or polynomial.") parser.add_argument('--reload-cannon', required=False, dest='reload_cannon', default=None, help="Skip training step, and reload a Cannon that we've previously trained. Specify the full " "path to the .cannon file containing the trained Cannon, but without the .cannon suffix.") parser.add_argument('--description', dest='description', help="A description of this fitting run.") parser.add_argument('--labels', dest='labels', default="Teff,logg,[Fe/H]", help="List of the labels the Cannon is to learn to estimate.") parser.add_argument('--label-expressions', dest='label_expressions', default="", help="List of the algebraic labels the Cannon is to learn to estimate " "(e.g. photometry_B - photometry_V).") parser.add_argument('--labels-individual', dest='labels_individual', default="", help="List of the labels the Cannon is to fit in separate fitting runs.") parser.add_argument('--censor-scheme', default="1", dest='censor_scheme', help="Censoring scheme version to use (1, 2 or 3).") parser.add_argument('--censor', default="", dest='censor_line_list', help="Optional list of line positions for the Cannon to fit, ignoring continuum between.") parser.add_argument('--tolerance', default=None, dest='tolerance', type=float, help="The parameter xtol which is passed to the scipy optimisation routines as xtol to " "determine whether they have converged.") parser.add_argument('--output-file', default="./test_cannon.out", dest='output_file', help="Data file to write output to.") parser.add_argument('--assume-scaled-solar', action='store_true', dest="assume_scaled_solar", help="Assume scaled solar abundances for any elements which don't have abundances individually " "specified. Useful for working with incomplete data sets.") parser.add_argument('--no-assume-scaled-solar', action='store_false', dest="assume_scaled_solar", help="Do not assume scaled solar abundances; throw an error if training set is has missing " "labels.") parser.set_defaults(assume_scaled_solar=False) parser.add_argument('--multithread', action='store_true', dest="multithread", help="Use multiple thread to speed Cannon up.") parser.add_argument('--nothread', action='store_false', dest="multithread", help="Do not use multiple threads - use only one CPU core.") parser.set_defaults(multithread=True) parser.add_argument('--interpolate', action='store_true', dest="interpolate", help="Interpolate the test spectra on the training spectra's wavelength raster. DANGEROUS!") parser.add_argument('--nointerpolate', action='store_false', dest="interpolate", help="Do not interpolate the test spectra onto a different raster.") parser.set_defaults(interpolate=False) args = parser.parse_args() logging.info("Testing Cannon with arguments <{}> <{}> <{}> <{}>".format(args.test_library, args.train_library, args.censor_line_list, args.output_file)) # Pick which Cannon version to use cannon_class, continuum_normalised_testing, continuum_normalised_training = \ select_cannon(cannon_version=args.cannon_version, continuum_normalisation=args.continuum_normalisation) # List of labels over which we are going to test the performance of the Cannon test_label_fields = args.labels.split(",") # List of labels we're going to fit individually if args.labels_individual: test_labels_individual = [i.split("+") for i in args.labels_individual.split(",")] else: test_labels_individual = [[]] # Set path to workspace where we expect to find libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join(our_path, "../../../workspace") # Find out whether we're reloading a previously saved Cannon reloading_cannon = args.reload_cannon is not None # Open training set training_library = training_library_ids_all = None if not reloading_cannon: spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.train_library, workspace=workspace, extra_constraints={"continuum_normalised": continuum_normalised_training} ) training_library, training_library_items = [spectra[i] for i in ("library", "items")] # Make list of IDs of all spectra in the training set training_library_ids_all = [i["specId"] for i in training_library_items] # Open test set spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.test_library, workspace=workspace, extra_constraints={"continuum_normalised": continuum_normalised_testing} ) test_library, test_library_items = [spectra[i] for i in ("library", "items")] # Make list of IDs of all spectra in the test set test_library_ids = [i["specId"] for i in test_library_items] # Fit each set of labels we're fitting individually, one by one for labels_individual_batch_count, test_labels_individual_batch in enumerate(test_labels_individual): # Create filename for the output from this Cannon run output_filename = args.output_file # If we're fitting elements individually, individually number the runs to fit each element if len(test_labels_individual) > 1: output_filename += "-{:03d}".format(labels_individual_batch_count) # Sequence of tasks if we're reloading a pre-saved Cannon from disk if reloading_cannon: # Load the JSON data that summarises the Cannon training that we're about to reload json_summary_filename = "{}.summary.json.gz".format(args.reload_cannon) cannon_pickle_filename = "{}.cannon".format(args.reload_cannon) with gzip.open(json_summary_filename, "rt") as f: summary_json = json.loads(f.read()) raster = np.array(summary_json['wavelength_raster']) test_labels = summary_json['labels'] training_library_ids = summary_json['training_spectra_ids'] training_library_string = summary_json['train_library'] assume_scaled_solar = summary_json['assume_scaled_solar'] tolerance = summary_json['tolerance'] line_list = summary_json['line_list'] censoring_masks = None # If we're doing our own continuum normalisation, we need to treat each wavelength arm separately wavelength_arm_breaks = SpectrumProperties(raster).wavelength_arms()['break_points'] time_training_start = time.time() model = cannon_class(training_set=None, wavelength_arms=wavelength_arm_breaks, load_from_file=cannon_pickle_filename, label_names=test_labels, tolerance=args.tolerance, polynomial_order=args.polynomial_order, censors=None, threads=None if args.multithread else 1 ) time_training_end = time.time() # Sequence of tasks if we're training a Cannon from scratch else: training_library_string = args.train_library assume_scaled_solar = args.assume_scaled_solar tolerance = args.tolerance line_list = args.censor_line_list # If requested, fill in any missing labels on the training set by assuming scaled-solar abundances if args.assume_scaled_solar: training_library_ids, training_spectra = autocomplete_scaled_solar_abundances( training_library=training_library, training_library_ids_all=training_library_ids_all, label_list=test_label_fields + test_labels_individual_batch ) # Otherwise we reject any training spectra which have incomplete labels else: training_library_ids, training_spectra = filter_training_spectra( training_library=training_library, training_library_ids_all=training_library_ids_all, label_list=test_label_fields + test_labels_individual_batch ) # Look up the raster on which the training spectra are sampled raster = training_spectra.wavelengths # Evaluate labels which are calculated via metadata expressions test_labels_expressions = [] if args.label_expressions.strip(): test_labels_expressions = args.label_expressions.split(",") evaluate_computed_labels(label_expressions=test_labels_expressions, spectra=training_spectra) # Make combined list of all labels the Cannon is going to fit test_labels = test_label_fields + test_labels_individual_batch + test_labels_expressions logging.info("Beginning fit of labels <{}>.".format(",".join(test_labels))) # If required, generate the censoring masks censoring_masks = create_censoring_masks( censoring_scheme=int(args.censor_scheme), raster=raster, censoring_line_list=args.censor_line_list, label_fields=test_label_fields + test_labels_individual_batch, label_expressions=test_labels_expressions ) # If we're doing our own continuum normalisation, we need to treat each wavelength arm separately wavelength_arm_breaks = SpectrumProperties(raster).wavelength_arms()['break_points'] # Construct and train a model time_training_start = time.time() model = cannon_class(training_set=training_spectra, wavelength_arms=wavelength_arm_breaks, label_names=test_labels, tolerance=args.tolerance, polynomial_order=args.polynomial_order, censors=censoring_masks, threads=None if args.multithread else 1 ) time_training_end = time.time() # Save the model model.save_model(filename="{:s}.cannon".format(output_filename), overwrite=True) # Test the model N = len(test_library_ids) time_taken = np.zeros(N) results = [] for index in range(N): test_spectrum_array = test_library.open(ids=test_library_ids[index]) spectrum = test_spectrum_array.extract_item(0) logging.info("Testing {}/{}: {}".format(index + 1, N, spectrum.metadata['Starname'])) # Calculate the time taken to process this spectrum time_start = time.time() # If requested, interpolate the test set onto the same raster as the training set. DANGEROUS! if args.interpolate: spectrum = resample_spectrum(spectrum=spectrum, training_spectra=training_spectra) # Pass spectrum to the Cannon labels, cov, meta = model.fit_spectrum(spectrum=spectrum) # Check whether Cannon failed if labels is None: continue # Measure the time taken time_end = time.time() time_taken[index] = time_end - time_start # Identify which star it is and what the SNR is star_name = spectrum.metadata["Starname"] if "Starname" in spectrum.metadata else "" uid = spectrum.metadata["uid"] if "uid" in spectrum.metadata else "" # From the label covariance matrix extract the standard deviation in each label value # (diagonal terms in the matrix are variances) if args.cannon_version == "anna_ho": err_labels = cov[0] else: err_labels = np.sqrt(np.diag(cov[0])) # Turn list of label values into a dictionary cannon_output = dict(list(zip(test_labels, labels[0]))) # Add the standard deviations of each label into the dictionary cannon_output.update(dict(list(zip(["E_{}".format(label_name) for label_name in test_labels], err_labels)))) # Add the star name and the SNR ratio of the test spectrum result = {"Starname": star_name, "uid": uid, "time": time_taken[index], "spectrum_metadata": spectrum.metadata, "cannon_output": cannon_output } results.append(result) # Report time taken logging.info("Fitting of {:d} spectra completed. Took {:.2f} +/- {:.2f} sec / spectrum.". format(N, np.mean(time_taken), np.std(time_taken))) # Create output data structure censoring_output = None if reloading_cannon: censoring_output = summary_json['censoring_mask'] else: if censoring_masks is not None: censoring_output = dict([(label, tuple([int(i) for i in mask])) for label, mask in censoring_masks.items()]) output_data = { "hostname": os.uname()[1], "generator": __file__, "4gp_version": fourgp_version, "cannon_version": model.cannon_version, "start_time": time_training_start, "end_time": time.time(), "training_time": time_training_end - time_training_start, "description": args.description, "train_library": training_library_string, "test_library": args.test_library, "training_spectra_ids": training_library_ids, "tolerance": tolerance, "assume_scaled_solar": assume_scaled_solar, "line_list": line_list, "labels": test_labels, "wavelength_raster": tuple(raster), "censoring_mask": censoring_output } # Write brief summary of run to JSON file, without masses of data with gzip.open("{:s}.summary.json.gz".format(output_filename), "wt") as f: f.write(json.dumps(output_data, indent=2)) # Write full results to JSON file output_data["spectra"] = results with gzip.open("{:s}.full.json.gz".format(output_filename), "wt") as f: f.write(json.dumps(output_data, indent=2))
# Set path to workspace where we expect to find libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = os_path.join(our_path, "../../../../workspace") # Create directory to store output files in os.system("mkdir -p {}".format(args.output_stub)) # Fetch title for this Cannon run cannon_output = json.loads( gzip.open(args.cannon + ".summary.json.gz", "rt").read()) description = cannon_output['description'] # Open spectrum library we originally trained the Cannon on training_spectra_info = SpectrumLibrarySqlite.open_and_search( library_spec=cannon_output["train_library"], workspace=workspace, extra_constraints={"continuum_normalised": 1}) training_library, training_library_items = [ training_spectra_info[i] for i in ("library", "items") ] # Load training set training_library_ids = [i["specId"] for i in training_library_items] training_spectra = training_library.open(ids=training_library_ids) # Recreate a Cannon instance, using the saved state censoring_masks = cannon_output["censoring_mask"] if censoring_masks is not None: for key, value in censoring_masks.items(): censoring_masks[key] = np.asarray(value)
"output multiple times, once at each magnitude.") args = parser.parse_args() # Start logger logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s:%(filename)s:%(message)s', datefmt='%d/%m/%Y %H:%M:%S') logger = logging.getLogger(__name__) logger.info("Calculating magnitudes and exposure times for templates") # Set path to workspace where we create libraries of spectra workspace = args.workspace if args.workspace else os_path.join(our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Turn set of templates into a spectrum library with path specified above library_path = os_path.join(workspace, args.library) library = SpectrumLibrarySqlite(path=library_path, create=True) # Fetch a list of all the input template spectra which match the supplied filename wildcard templates = glob.glob(args.input) templates.sort() # Parse any definitions of SNR we were supplied on the command line if (args.snr_definitions is None) or (len(args.snr_definitions) < 1): snr_definitions = None else: snr_definitions = [] for snr_definition in args.snr_definitions: words = snr_definition.split(",") snr_definitions.append([words[0], float(words[1]), float(words[2])]) # Look up what definition of SNR is user specified we should use for 4MOST LRS
default="/tmp/gaussian_convolution_{}.log".format(pid), dest="log_to", help="Specify a log file where we log our progress.") args = parser.parse_args() logger.info("Calculating SNR ratios of spectra in <{}>".format( args.input_library)) # Set path to workspace where we create libraries of spectra workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Open input SpectrumLibrary, and search for flux normalised spectra meeting our filtering constraints spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.input_library, workspace=workspace, extra_constraints={"continuum_normalised": 0}) # Get a list of the spectrum IDs which we were returned input_library, input_spectra_ids, input_spectra_constraints = [ spectra[i] for i in ("library", "items", "constraints") ] # Parse any definitions of SNR we were supplied on the command line if (args.snr_definitions is None) or (len(args.snr_definitions) < 1): snr_definitions = None else: snr_definitions = [] for snr_definition in args.snr_definitions: words = snr_definition.split(",") snr_definitions.append([words[0], float(words[1]), float(words[2])])
def resample_templates(args, logger): """ Resample a spectrum library of templates onto a fixed logarithmic stride, representing each of the 4MOST arms in turn. We use 4FS to down-sample the templates to the resolution of 4MOST observations, and automatically detect the list of arms contained within each 4FS mock observation. We then resample the 4FS output onto a new raster with fixed logarithmic stride. :param args: Object containing arguments supplied by the used, for example the name of the spectrum libraries we use for input and output. The required fields are defined by the user interface above. :param logger: A python logging object. :return: None. """ # Set path to workspace where we expect to find libraries of spectra workspace = args.workspace if args.workspace else os_path.join( args.our_path, "../../../workspace") # Open input template spectra spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.templates_in, workspace=workspace, extra_constraints={"continuum_normalised": 0}) templates_library, templates_library_items, templates_spectra_constraints = \ [spectra[i] for i in ("library", "items", "constraints")] # Create new SpectrumLibrary to hold the resampled output templates library_path = os_path.join(workspace, args.templates_out) output_library = SpectrumLibrarySqlite(path=library_path, create=True) # Instantiate 4FS wrapper etc_wrapper = FourFS(path_to_4fs=os_path.join(args.binary_path, "OpSys/ETC"), snr_list=[250.], magnitude=13, snr_per_pixel=True) for input_spectrum_id in templates_library_items: logger.info("Working on <{}>".format(input_spectrum_id['filename'])) # Open Spectrum data from disk input_spectrum_array = templates_library.open( ids=input_spectrum_id['specId']) # Load template spectrum (flux normalised) template_flux_normalised = input_spectrum_array.extract_item(0) # Look up the unique ID of the star we've just loaded # Newer spectrum libraries have a uid field which is guaranteed unique; for older spectrum libraries use # Starname instead. # Work out which field we're using (uid or Starname) spectrum_matching_field = 'uid' if 'uid' in template_flux_normalised.metadata else 'Starname' # Look up the unique ID of this object object_name = template_flux_normalised.metadata[ spectrum_matching_field] # Search for the continuum-normalised version of this same object (which will share the same uid / name) search_criteria = { spectrum_matching_field: object_name, 'continuum_normalised': 1 } continuum_normalised_spectrum_id = templates_library.search( **search_criteria) # Check that continuum-normalised spectrum exists and is unique assert len(continuum_normalised_spectrum_id ) == 1, "Could not find continuum-normalised spectrum." # Load the continuum-normalised version template_continuum_normalised_arr = templates_library.open( ids=continuum_normalised_spectrum_id[0]['specId']) # Turn the SpectrumArray we got back into a single Spectrum template_continuum_normalised = template_continuum_normalised_arr.extract_item( 0) # Now create a mock observation of this template using 4FS logger.info("Passing template through 4FS") mock_observed_template = etc_wrapper.process_spectra( spectra_list=((template_flux_normalised, template_continuum_normalised), )) # Loop over LRS and HRS for mode in mock_observed_template: # Loop over the spectra we simulated (there was only one!) for index in mock_observed_template[mode]: # Loop over the various SNRs we simulated (there was only one!) for snr in mock_observed_template[mode][index]: # Create a unique ID for this arm's data unique_id = hashlib.md5(os.urandom(32)).hexdigest()[:16] # Import the flux- and continuum-normalised spectra separately, but give them the same ID for spectrum_type in mock_observed_template[mode][index][ snr]: # Extract continuum-normalised mock observation logger.info("Resampling {} spectrum".format(mode)) mock_observed = mock_observed_template[mode][index][ snr][spectrum_type] # Replace errors which are nans with a large value mock_observed.value_errors[np.isnan( mock_observed.value_errors)] = 1000. # Check for NaN values in spectrum itself if not np.all(np.isfinite(mock_observed.values)): print( "Warning: NaN values in template <{}>".format( template_flux_normalised. metadata['Starname'])) mock_observed.value_errors[np.isnan( mock_observed.values)] = 1000. mock_observed.values[np.isnan( mock_observed.values)] = 1. # Resample template onto a logarithmic raster of fixed step resampler = SpectrumResampler(mock_observed) # Construct the raster for each wavelength arm wavelength_arms = SpectrumProperties( mock_observed.wavelengths).wavelength_arms() # Resample 4FS output for each arm onto a fixed logarithmic stride for arm_count, arm in enumerate( wavelength_arms["wavelength_arms"]): arm_raster, mean_pixel_width = arm name = "{}_{}".format(mode, arm_count) arm_info = { "lambda_min": arm_raster[0], "lambda_max": arm_raster[-1], "lambda_step": mean_pixel_width } arm_raster = logarithmic_raster( lambda_min=arm_info['lambda_min'], lambda_max=arm_info['lambda_max'], lambda_step=arm_info['lambda_step']) # Resample 4FS output onto a fixed logarithmic step mock_observed_arm = resampler.onto_raster( arm_raster) # Save it into output spectrum library output_library.insert( spectra=mock_observed_arm, filenames=input_spectrum_id['filename'], metadata_list={ "uid": unique_id, "template_id": object_name, "mode": mode, "arm_name": "{}_{}".format(mode, arm_count), "lambda_min": arm_raster[0], "lambda_max": arm_raster[-1], "lambda_step": mean_pixel_width })
# Set path to workspace where we create libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join( our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Create new SpectrumLibrary(s) to hold the output from 4FS output_libraries = {} for mode, output_library in (("original", args.library), ("resampled", args.library_resampled), ("LRS", args.library_lrs), ("HRS", args.library_hrs)): library_name = re.sub("/", "_", output_library) library_path = os_path.join(workspace, library_name) output_libraries[mode] = SpectrumLibrarySqlite(path=library_path, create=args.create) # Instantiate 4FS wrapper etc_wrapper = FourFS(path_to_4fs=os_path.join(args.binary_path, "OpSys/ETC"), magnitude=13, snr_list=[250]) # Open fits spectrum for item in glob.glob(os_path.join(args.fits_path, "*.all6")): filename = os_path.split(item)[1] # Open FITS file f = fits.open(item) # Extract headers and import them as metadata in SpectrumLibrary headers = f[0].header
output_select = random.uniform(a=0, b=weights_sum) for index, weight in enumerate(weights): output_select -= weight if output_select <= 0: selected_index = index break return selected_index # Open input spectrum library(s), and fetch a list of all the flux-normalised spectra within each input_libraries = [] if args.input_library is not None: input_libraries = [ SpectrumLibrarySqlite.open_and_search( library_spec=item, workspace=workspace, extra_constraints={"continuum_normalised": 0}) for item in args.input_library ] # Report to user how many spectra we have just found logger.info("Opening {:d} input libraries. These contain {:s} spectra.".format( len(input_libraries), str([len(x['items']) for x in input_libraries]))) # Open contaminating spectrum library(s), if any, and fetch a list of all the flux-normalised spectra within each contamination_libraries = [] if args.contamination_library is not None: contamination_libraries = [ SpectrumLibrarySqlite.open_and_search( library_spec=item, workspace=workspace,