class Synthesizer: # Convenience function to provide dictionary access to rows of an astropy table @staticmethod def astropy_row_to_dict(x): return dict([(i, x[i]) for i in x.columns]) # Read input parameters def __init__(self, library_name, logger, docstring, root_path="../../../..", spectral_resolution=50000): self.logger = logger self.our_path = os_path.split(os_path.abspath(__file__))[0] self.root_path = os_path.abspath(os_path.join(self.our_path, root_path, "..")) self.pid = os.getpid() self.spectral_resolution = spectral_resolution parser = argparse.ArgumentParser(description=docstring) parser.add_argument('--output-library', required=False, default="turbospec_{}".format(library_name), dest="library", help="Specify the name of the SpectrumLibrary we are to feed synthesized spectra into.") parser.add_argument('--workspace', dest='workspace', default="", help="Directory where we expect to find spectrum libraries.") parser.add_argument('--create', required=False, action='store_true', dest="create", help="Create a clean SpectrumLibrary to feed synthesized spectra into") parser.add_argument('--no-create', required=False, action='store_false', dest="create", help="Do not create a clean SpectrumLibrary to feed synthesized spectra into") parser.set_defaults(create=True) parser.add_argument('--log-dir', required=False, default="/tmp/turbospec_{}_{}".format(library_name, self.pid), dest="log_to", help="Specify a log directory where we log our progress and configuration files.") parser.add_argument('--dump-to-sqlite-file', required=False, default="", dest="sqlite_out", help="Specify an sqlite3 filename where we dump the stellar parameters of the stars.") parser.add_argument('--line-lists-dir', required=False, default=self.root_path, dest="lines_dir", help="Specify a directory where line lists for TurboSpectrum can be found.") parser.add_argument('--elements', required=False, default="", dest="elements", help="Only read the abundances of a comma-separated list of elements, and use scaled-solar " "abundances for everything else.") parser.add_argument('--binary-path', required=False, default=self.root_path, dest="binary_path", help="Specify a directory where Turbospectrum and Interpol packages are installed.") parser.add_argument('--every', required=False, default=1, type=int, dest="every", help="Only process every nth spectrum. " "This is useful when parallelising this script across multiple processes.") parser.add_argument('--skip', required=False, default=0, type=int, dest="skip", help="Skip n spectra before starting to process every nth. " "This is useful when parallelising this script across multiple processes.") parser.add_argument('--limit', required=False, default=0, type=int, dest="limit", help="Only process a maximum of n spectra.") self.args = parser.parse_args() logging.info("Synthesizing {} to <{}>".format(library_name, self.args.library)) # Set path to workspace where we create libraries of spectra self.workspace = (self.args.workspace if self.args.workspace else os_path.abspath(os_path.join(self.our_path, root_path, "workspace"))) os.system("mkdir -p {}".format(self.workspace)) def set_star_list(self, star_list): self.star_list = star_list # Ensure that every star has a name; number stars of not for i, item in enumerate(self.star_list): if 'name' not in item: item['name'] = "star_{:08d}".format(i) # Ensure that every star has free_abundances and extra metadata for i, item in enumerate(self.star_list): if 'free_abundances' not in item: item['free_abundances'] = {} if 'extra_metadata' not in item: item['extra_metadata'] = {} if 'microturbulence' not in item: item['microturbulence'] = 1 # Ensure that we have a table of input data to dump to SQLite, if requested for item in self.star_list: if 'input_data' not in item: item['input_data'] = {'name': item['name'], 'Teff': item['Teff'], '[Fe/H]': item['[Fe/H]'], 'logg': item['logg']} item['input_data'].update(item['free_abundances']) item['input_data'].update(item['extra_metadata']) if 'name' not in item['input_data']: item['input_data']['name'] = item['name'] def dump_stellar_parameters_to_sqlite(self): # Output data into sqlite3 db if self.args.sqlite_out: os.system("rm -f {}".format(self.args.sqlite_out)) conn = sqlite3.connect(self.args.sqlite_out) c = conn.cursor() columns = [] for col_name, col_value in list(self.star_list[0]['input_data'].items()): col_type_str = isinstance(col_value, str) columns.append("{} {}".format(col_name, "TEXT" if col_type_str else "REAL")) c.execute("CREATE TABLE stars (uid INTEGER PRIMARY KEY, {});".format(",".join(columns))) for i, item in enumerate(self.star_list): print(("Writing sqlite parameter dump: %5d / %5d" % (i, len(self.star_list)))) c.execute("INSERT INTO stars (name) VALUES (?);", (item['input_data']['name'],)) uid = c.lastrowid for col_name in item['input_data']: if col_name == "name": continue arguments = ( str(item['input_data'][col_name]) if isinstance(item['input_data'][col_name], str) else float(item['input_data'][col_name]), uid ) c.execute("UPDATE stars SET %s=? WHERE uid=?;" % col_name, arguments) conn.commit() conn.close() def create_spectrum_library(self): # Create new SpectrumLibrary self.library_name = re.sub("/", "_", self.args.library) self.library_path = os_path.join(self.workspace, self.library_name) self.library = SpectrumLibrarySqlite(path=self.library_path, create=self.args.create) # Invoke FourMost data class. Ensure that the spectra we produce are much higher resolution than 4MOST. # We down-sample them later to whatever resolution we actually want. self.FourMostData = FourMost() self.lambda_min = self.FourMostData.bands["LRS"]["lambda_min"] self.lambda_max = self.FourMostData.bands["LRS"]["lambda_max"] self.line_lists_path = self.FourMostData.bands["LRS"]["line_lists_edvardsson"] # Invoke a TurboSpectrum synthesizer instance self.synthesizer = TurboSpectrum( turbospec_path=os_path.join(self.args.binary_path, "turbospectrum-15.1/exec-gf-v15.1"), interpol_path=os_path.join(self.args.binary_path, "interpol_marcs"), line_list_paths=[os_path.join(self.args.lines_dir, self.line_lists_path)], marcs_grid_path=os_path.join(self.args.binary_path, "fromBengt/marcs_grid")) self.synthesizer.configure(lambda_min=self.lambda_min, lambda_max=self.lambda_max, lambda_delta=float(self.lambda_min) / self.spectral_resolution, line_list_paths=[os_path.join(self.args.lines_dir, self.line_lists_path)], stellar_mass=1) self.counter_output = 0 # Start making log output os.system("mkdir -p {}".format(self.args.log_to)) self.logfile = os.path.join(self.args.log_to, "synthesis.log") def do_synthesis(self): # Iterate over the spectra we're supposed to be synthesizing with open(self.logfile, "w") as result_log: for star in self.star_list: star_name = star['name'] unique_id = hashlib.md5(os.urandom(32)).hexdigest()[:16] metadata = { "Starname": str(star_name), "uid": str(unique_id), "Teff": float(star['Teff']), "[Fe/H]": float(star['[Fe/H]']), "logg": float(star['logg']), "microturbulence": float(star["microturbulence"]) } # User can specify that we should only do every nth spectrum, if we're running in parallel self.counter_output += 1 if (self.args.limit > 0) and (self.counter_output > self.args.limit): break if (self.counter_output - self.args.skip) % self.args.every != 0: continue # Pass list of the abundances of individual elements to TurboSpectrum free_abundances = dict(star['free_abundances']) for element, abundance in list(free_abundances.items()): metadata["[{}/H]".format(element)] = float(abundance) # Propagate all ionisation states into metadata metadata.update(star['extra_metadata']) # Configure Turbospectrum with the stellar parameters of the next star self.synthesizer.configure( t_eff=float(star['Teff']), metallicity=float(star['[Fe/H]']), log_g=float(star['logg']), stellar_mass=1 if "stellar_mass" not in star else star["stellar_mass"], turbulent_velocity=1 if "microturbulence" not in star else star["microturbulence"], free_abundances=free_abundances ) # Make spectrum time_start = time.time() turbospectrum_out = self.synthesizer.synthesise() time_end = time.time() # Log synthesizer status logfile_this = os.path.join(self.args.log_to, "{}.log".format(star_name)) open(logfile_this, "w").write(json.dumps(turbospectrum_out)) # Check for errors errors = turbospectrum_out['errors'] if errors: result_log.write("[{}] {:6.0f} sec {}: {}\n".format(time.asctime(), time_end - time_start, star_name, errors)) logging.warn("Star <{}> could not be synthesised. Errors were: {}". format(star_name, errors)) result_log.flush() continue else: logging.info("Synthesis completed without error.") # Fetch filename of the spectrum we just generated filepath = os_path.join(turbospectrum_out["output_file"]) # Insert spectrum into SpectrumLibrary try: filename = "spectrum_{:08d}".format(self.counter_output) # First import continuum-normalised spectrum, which is in columns 1 and 2 metadata['continuum_normalised'] = 1 spectrum = Spectrum.from_file(filename=filepath, metadata=metadata, columns=(0, 1), binary=False) self.library.insert(spectra=spectrum, filenames=filename) # Then import version with continuum, which is in columns 1 and 3 metadata['continuum_normalised'] = 0 spectrum = Spectrum.from_file(filename=filepath, metadata=metadata, columns=(0, 2), binary=False) self.library.insert(spectra=spectrum, filenames=filename) except (ValueError, IndexError): result_log.write("[{}] {:6.0f} sec {}: {}\n".format(time.asctime(), time_end - time_start, star_name, "Could not read bsyn output")) result_log.flush() continue # Update log file to show our progress result_log.write("[{}] {:6.0f} sec {}: {}\n".format(time.asctime(), time_end - time_start, star_name, "OK")) result_log.flush() def clean_up(self): logging.info("Synthesized {:d} spectra.".format(self.counter_output)) # Close TurboSpectrum synthesizer instance self.synthesizer.close()
] grid_axis_index_combinations = itertools.product(*grid_axis_indices) # Turn Brani's set of templates into a spectrum library with path specified above library_path = os_path.join(workspace, target_library_name) library = SpectrumLibrarySqlite(path=library_path, create=True) # Brani's template spectra do not have any error vectors associated with them, so add an array of zeros errors_dummy = np.zeros_like(wavelength_raster) # Import each template spectrum in turn for i, axis_indices in enumerate(grid_axis_index_combinations): filename = "template{:06d}".format(i) metadata = {"Starname": filename} item = flux_templates for axis_counter, index in enumerate(axis_indices): metadata_key = grid_axes[axis_counter][0] metadata_value = grid_axis_values[axis_counter][index] metadata[metadata_key] = metadata_value metadata[metadata_key + "_index"] = index item = item[index] # Turn data into a Spectrum object spectrum = Spectrum(wavelengths=wavelength_raster, values=item, value_errors=errors_dummy, metadata=metadata) # Import spectrum into our SpectrumLibrary library.insert(spectra=spectrum, filenames=filename)
# Process spectra through reddening model reddener = SpectrumReddener(input_spectrum=input_spectrum) # Loop over each of the values of E(B-V) we are applying to each input spectrum for e_bv in ebv_list: # Create a unique ID for this reddened spectrum (shared between the flux- and continuum-normalised output) unique_id = hashlib.md5(os.urandom(32)).hexdigest()[:16] # Redden the spectrum reddened_spectrum = reddener.redden(e_bv=e_bv) # Add metadata to each spectrum recording how much reddening we have applied, and its new UID metadata = {"e_bv": e_bv, "uid": unique_id} # Calculate how many magnitudes of extinction we have applied to each photometric band of interest for band in photometric_bands: # Add this data to the metadata for each reddened spectrum metadata["A_{}".format(band)] = (reddened_spectrum.photometry(band=band) - input_spectrum.photometry(band=band)) # Save the flux-normalised reddened spectrum output_library.insert(spectra=reddened_spectrum, filenames=input_spectrum_id['filename'], metadata_list=metadata) # Save the continuum-normalised reddened spectrum, which is identical to the input output_library.insert(spectra=input_spectrum_continuum_normalised, filenames=continuum_normalised_spectrum_id[0]['filename'], metadata_list=metadata)
parser.set_defaults(create=True) args = parser.parse_args() # Set path to workspace where we create libraries of spectra our_path = os_path.split(os_path.abspath(__file__))[0] workspace = args.workspace if args.workspace else os_path.join(our_path, "../../../workspace") os.system("mkdir -p {}".format(workspace)) # Create new spectrum library library_name = re.sub("/", "_", args.library) library_path = os_path.join(workspace, library_name) library = SpectrumLibrarySqlite(path=library_path, create=args.create) # Open fits spectrum f = fits.open(args.filename) data = f[1].data wavelengths = data['LAMBDA'] fluxes = data['FLUX'] # Create 4GP spectrum object spectrum = Spectrum(wavelengths=wavelengths, values=fluxes, value_errors=np.zeros_like(wavelengths), metadata={ "imported_from": args.filename }) # Insert spectrum object into spectrum library library.insert(spectra=spectrum, filenames=os_path.split(args.filename)[1])
# Recreate a Cannon instance, using the saved state model = CannonInstance_2018_01_09(training_set=training_spectra, load_from_file=args.cannon + ".cannon", label_names=cannon_output["labels"], censors=censoring_masks, threads=None) cannon = model._model # Create new spectrum library for output library_name = re.sub("/", "_", args.output_library) library_path = os_path.join(workspace, library_name) output_library = SpectrumLibrarySqlite(path=library_path, create=args.create) # Query Cannon's internal model of each test spectrum in turn for test_item in cannon_output['spectra']: label_values = test_item['cannon_output'].copy() label_vector = np.asarray( [label_values[key] for key in cannon_output["labels"]]) cannon_predicted_spectrum = cannon.predict(label_vector)[0] spectrum_object = Spectrum(wavelengths=cannon.dispersion[overall_mask], values=cannon_predicted_spectrum[overall_mask], value_errors=cannon.s2[overall_mask]) output_library.insert(spectra=spectrum_object, filenames=test_item['Starname'], metadata_list=dict_merge( test_item['spectrum_metadata'], test_item['cannon_output']))
def resample_templates(args, logger): """ Resample a spectrum library of templates onto a fixed logarithmic stride, representing each of the 4MOST arms in turn. We use 4FS to down-sample the templates to the resolution of 4MOST observations, and automatically detect the list of arms contained within each 4FS mock observation. We then resample the 4FS output onto a new raster with fixed logarithmic stride. :param args: Object containing arguments supplied by the used, for example the name of the spectrum libraries we use for input and output. The required fields are defined by the user interface above. :param logger: A python logging object. :return: None. """ # Set path to workspace where we expect to find libraries of spectra workspace = args.workspace if args.workspace else os_path.join( args.our_path, "../../../workspace") # Open input template spectra spectra = SpectrumLibrarySqlite.open_and_search( library_spec=args.templates_in, workspace=workspace, extra_constraints={"continuum_normalised": 0}) templates_library, templates_library_items, templates_spectra_constraints = \ [spectra[i] for i in ("library", "items", "constraints")] # Create new SpectrumLibrary to hold the resampled output templates library_path = os_path.join(workspace, args.templates_out) output_library = SpectrumLibrarySqlite(path=library_path, create=True) # Instantiate 4FS wrapper etc_wrapper = FourFS(path_to_4fs=os_path.join(args.binary_path, "OpSys/ETC"), snr_list=[250.], magnitude=13, snr_per_pixel=True) for input_spectrum_id in templates_library_items: logger.info("Working on <{}>".format(input_spectrum_id['filename'])) # Open Spectrum data from disk input_spectrum_array = templates_library.open( ids=input_spectrum_id['specId']) # Load template spectrum (flux normalised) template_flux_normalised = input_spectrum_array.extract_item(0) # Look up the unique ID of the star we've just loaded # Newer spectrum libraries have a uid field which is guaranteed unique; for older spectrum libraries use # Starname instead. # Work out which field we're using (uid or Starname) spectrum_matching_field = 'uid' if 'uid' in template_flux_normalised.metadata else 'Starname' # Look up the unique ID of this object object_name = template_flux_normalised.metadata[ spectrum_matching_field] # Search for the continuum-normalised version of this same object (which will share the same uid / name) search_criteria = { spectrum_matching_field: object_name, 'continuum_normalised': 1 } continuum_normalised_spectrum_id = templates_library.search( **search_criteria) # Check that continuum-normalised spectrum exists and is unique assert len(continuum_normalised_spectrum_id ) == 1, "Could not find continuum-normalised spectrum." # Load the continuum-normalised version template_continuum_normalised_arr = templates_library.open( ids=continuum_normalised_spectrum_id[0]['specId']) # Turn the SpectrumArray we got back into a single Spectrum template_continuum_normalised = template_continuum_normalised_arr.extract_item( 0) # Now create a mock observation of this template using 4FS logger.info("Passing template through 4FS") mock_observed_template = etc_wrapper.process_spectra( spectra_list=((template_flux_normalised, template_continuum_normalised), )) # Loop over LRS and HRS for mode in mock_observed_template: # Loop over the spectra we simulated (there was only one!) for index in mock_observed_template[mode]: # Loop over the various SNRs we simulated (there was only one!) for snr in mock_observed_template[mode][index]: # Create a unique ID for this arm's data unique_id = hashlib.md5(os.urandom(32)).hexdigest()[:16] # Import the flux- and continuum-normalised spectra separately, but give them the same ID for spectrum_type in mock_observed_template[mode][index][ snr]: # Extract continuum-normalised mock observation logger.info("Resampling {} spectrum".format(mode)) mock_observed = mock_observed_template[mode][index][ snr][spectrum_type] # Replace errors which are nans with a large value mock_observed.value_errors[np.isnan( mock_observed.value_errors)] = 1000. # Check for NaN values in spectrum itself if not np.all(np.isfinite(mock_observed.values)): print( "Warning: NaN values in template <{}>".format( template_flux_normalised. metadata['Starname'])) mock_observed.value_errors[np.isnan( mock_observed.values)] = 1000. mock_observed.values[np.isnan( mock_observed.values)] = 1. # Resample template onto a logarithmic raster of fixed step resampler = SpectrumResampler(mock_observed) # Construct the raster for each wavelength arm wavelength_arms = SpectrumProperties( mock_observed.wavelengths).wavelength_arms() # Resample 4FS output for each arm onto a fixed logarithmic stride for arm_count, arm in enumerate( wavelength_arms["wavelength_arms"]): arm_raster, mean_pixel_width = arm name = "{}_{}".format(mode, arm_count) arm_info = { "lambda_min": arm_raster[0], "lambda_max": arm_raster[-1], "lambda_step": mean_pixel_width } arm_raster = logarithmic_raster( lambda_min=arm_info['lambda_min'], lambda_max=arm_info['lambda_max'], lambda_step=arm_info['lambda_step']) # Resample 4FS output onto a fixed logarithmic step mock_observed_arm = resampler.onto_raster( arm_raster) # Save it into output spectrum library output_library.insert( spectra=mock_observed_arm, filenames=input_spectrum_id['filename'], metadata_list={ "uid": unique_id, "template_id": object_name, "mode": mode, "arm_name": "{}_{}".format(mode, arm_count), "lambda_min": arm_raster[0], "lambda_max": arm_raster[-1], "lambda_step": mean_pixel_width })
}) # Work out magnitude mag_intrinsic = spectrum.photometry(args.photometric_band) # Pass template to 4FS degraded_spectra = etc_wrapper.process_spectra( spectra_list=((spectrum, None),) ) # Loop over LRS and HRS for mode in degraded_spectra: # Loop over the spectra we simulated (there was only one!) for index in degraded_spectra[mode]: # Loop over the various SNRs we simulated for snr in degraded_spectra[mode][index]: # Extract the exposure time returned by 4FS from the metadata associated with this Spectrum object # The exposure time is recorded in seconds exposure_time = degraded_spectra[mode][index][snr]["spectrum"].metadata["exposure"] # Print output print("{name:100s} {mode:6s} {snr:6.1f} {magnitude:6.3f} {exposure:6.3f}". \ format(name=name, mode=mode, snr=snr, magnitude=mag_intrinsic, exposure=exposure_time)) # Insert spectrum object into spectrum library library.insert(spectra=spectrum, filenames=os_path.split(template)[1])
# Process spectra with each radial velocity in turn for rv in rv_list: # Apply RV to the flux-normalised spectrum degraded = input_spectrum.apply_radial_velocity(rv * 1000) # Apply RV to the continuum-normalised spectrum degraded_cn = input_spectrum_continuum_normalised.apply_radial_velocity( rv * 1000) # Create a unique ID for this mock observation (shared between the flux- and continuum-normalised output) unique_id = hashlib.md5(os.urandom(32)).hexdigest()[:16] # Save the flux-normalised output output_library.insert(spectra=degraded, filenames=input_spectrum_id['filename'], metadata_list={ "uid": unique_id, "rv": rv * 1000 }) # Save the continuum-normalised output output_library.insert(spectra=degraded_cn, filenames=input_spectrum_id['filename'], metadata_list={ "uid": unique_id, "rv": rv * 1000 }) # If we put database in /tmp while adding entries to it, now return it to original location if args.db_in_tmp: del output_library os.system("mv /tmp/tmp_{}.db {}".format(
# Convolve spectrum flux_data = input_spectrum.values flux_data_convolved = np.convolve(a=flux_data, v=convolution_kernel, mode='same') flux_errors = input_spectrum.value_errors flux_errors_convolved = np.convolve(a=flux_errors, v=convolution_kernel, mode='same') output_spectrum = Spectrum(wavelengths=input_spectrum.wavelengths, values=flux_data_convolved, value_errors=flux_errors_convolved, metadata=input_spectrum.metadata) # Import degraded spectra into output spectrum library output_library.insert(spectra=output_spectrum, filenames=input_spectrum_id['filename'], metadata_list={ "convolution_width": kernel_width, "convolution_kernel": args.kernel }) # If we put database in /tmp while adding entries to it, now return it to original location if args.db_in_tmp: del output_library os.system("mv /tmp/tmp_{}.db {}".format( library_name, os_path.join(library_path, "index.db")))