def companion_search(fileList, primary_vsini, badregions=[], interp_regions=[], extensions=True, resolution=None, trimsize=1, reject_outliers=True, vsini_values=(10, 20, 30, 40), Tvalues=range(3000, 6900, 100), metal_values=(-0.5, 0.0, +0.5), logg_values=(4.5,), hdf5_file=StellarModel.HDF5_FILE, vbary_correct=True, observatory="CTIO", addmode="ML", output_mode='hdf5', output_file='CCF.hdf5', obstype='real', min_x=None, max_x=None, debug=False, makeplots=False): """ This function runs a companion search over a whole grid of model spectra Parameters: =========== - fileList: list of strings The list of fits data files. Each file is expected to have several echelle orders, each in their own fits extension. Each order is represented as a binary table with columns 'wavelength', 'flux', 'continuum', and 'error' - primary_vsini: list of floats A list of the same length as fileList, which contains the vsini for each star (in km/s) - badregions: list of lists, where each sub-list has size 2 Regions to exclude (contains strong telluric or stellar line residuals). Each sublist should give the start and end wavelength to exclude - interp_regions: list of lists, where each sub-list has size 2 Regions to interpolate over. Each sublist should give the start and end wavelength to exclude - trimsize: integer The number of pixels to cut from both sides of each order. This is because the order edges are usually pretty noisy. - reject_outliers: boolean Whether or not to detect and smooth over outliers in the data. - vsini_values: Any iterable A list of vsini values (in km/s) to apply to each model spectrum before correlation. - Tvalues: Any iterable A list of model temperatures (in K) to correlate the data against. - metal_values: Any iterable A list of [Fe/H] values to correlate the model against - logg_values: Any iterable A list of log(g) values (in cgs units) to correlate the model against - modeldir: string The path to a directory with several stellar models. This is no longer used by default! - hdf5_file: string The path to the hdf5 file containing the pre-broadened model grid. - vbary_correct: boolean Correct for the heliocentric motion of the Earth around the Sun? - observatory: string The name of the observatory, in a way that IRAF's rvcorrect will understand. Only needed if vbary_correct = True - addmode: string The way to add the CCFs for each order. Options are: 1: 'simple': Do a simple average 2: 'weighted': Do a weighted average: $C = \sum_i{w_i C_i^2}$ where $w_i$ is the line depth of the each pixel 3: 'simple-weighted': Same as weighted, but without squaring the CCFs: $C = \sum_i{w_i C_i}$ 4: 'T-weighted': Do a weighted average: $C = \sum_i{w_i C_i}$ where $w_i$ is how fast each pixel changes with temperature 5: 'dc': $C = \sum_i{C_i^2}$ (basically, weighting by the CCF itself) 6: 'ml': The maximum likelihood estimate. See Zucker 2003, MNRAS, 342, 1291 7: 'all': does simple, dc, and ml all at once. - output_mode: string How to output. Valid options are: 1: text, which is just ascii data with a filename convention. 2: hdf5, which ouputs a single hdf5 file with all the metadata necessary to classify the output. This is the default. - output_file: string An HDF5 file to output to. Only used if output_mode = 'hdf5'. Note: The file with be placed in a directory called 'Cross_correlations' - obstype: string Is this a synthetic binary star or real observation? (default is real). The HDF5 output is a bit different if it is a synthetic binary star observation. - min_x: float The minimum wavelength to use in the model. If not given, the whole model will be used - max_x: float The maximum wavelength to use in the model. If not given, the whole model will be used - debug: boolean Flag to print a bunch of information to screen, and save some intermediate data files - makeplots: boolean A 'higher level' of debug. Will make a plot of the data and model orders for each model. """ # Make sure the temperature, metal, and logg are all at least 1d arrays. Tvalues = np.atleast_1d(Tvalues) metal_values = np.atleast_1d(metal_values) logg_values = np.atleast_1d(logg_values) model_list = StellarModel.GetModelList(type='hdf5', hdf5_file=hdf5_file, temperature=Tvalues, metal=metal_values, logg=logg_values) if addmode.lower() == 't-weighted': modeldict, processed, sensitivity = StellarModel.MakeModelDicts(model_list, type='hdf5', hdf5_file=hdf5_file, vsini_values=vsini_values, vac2air=True, logspace=True, get_T_sens=True) else: modeldict, processed = StellarModel.MakeModelDicts(model_list, type='hdf5', hdf5_file=hdf5_file, vsini_values=vsini_values, vac2air=True, logspace=True) sensitivity = None get_weights = True if addmode.lower() == "weighted" or addmode.lower() == 'simple-weighted' else False orderweights = None MS = SpectralTypeRelations.MainSequence() # Do the cross-correlation datadict = defaultdict(list) temperature_dict = defaultdict(float) vbary_dict = defaultdict(float) alpha = 0.0 for temp in sorted(modeldict.keys()): for gravity in sorted(modeldict[temp].keys()): for metallicity in sorted(modeldict[temp][gravity].keys()): for vsini_sec in vsini_values: if debug: logging.info('T: {}, logg: {}, [Fe/H]: {}, vsini: {}'.format(temp, gravity, metallicity, vsini_sec)) # broaden the model model = modeldict[temp][gravity][metallicity][alpha][vsini_sec].copy() l_idx = 0 if min_x is None else np.searchsorted(model.x, min_x) r_idx = model.size() if max_x is None else np.searchsorted(model.x, max_x)+1 model = Broaden.RotBroad(model[l_idx:r_idx], vsini_sec * u.km.to(u.cm), linear=True) if resolution is not None: model = FittingUtilities.ReduceResolutionFFT(model, resolution) # Interpolate the temperature weights, if addmode='T-weighted' if addmode.lower() == 't-weighted': x = modeldict[temp][gravity][metallicity][alpha][vsini_sec].x y = sensitivity[temp][gravity][metallicity][alpha][vsini_sec] temperature_weights = spline(x, y) for i, (fname, vsini_prim) in enumerate(zip(fileList, primary_vsini)): if vbary_correct: if fname in vbary_dict: vbary = vbary_dict[fname] else: vbary = HelCorr_IRAF(fits.getheader(fname), observatory=observatory) vbary_dict[fname] = vbary process_data = False if fname in datadict else True if process_data: orders = Process_Data(fname, badregions, interp_regions=interp_regions, logspacing=True, extensions=extensions, trimsize=trimsize, vsini=vsini_prim, reject_outliers=reject_outliers) header = fits.getheader(fname) try: spt = StarData.GetData(header['object']).spectype if spt == 'Unknown': temperature_dict[fname] = np.nan # Unknown logging.warning('Spectral type retrieval from simbad failed! Entering NaN for primary temperature!') else: match = re.search('[0-9]', spt) if match is None: spt = spt[0] + "5" else: spt = spt[:match.start() + 1] temperature_dict[fname] = MS.Interpolate(MS.Temperature, spt) except AttributeError: temperature_dict[fname] = np.nan # Unknown logging.warning('Spectral type retrieval from simbad failed! Entering NaN for primary temperature!') datadict[fname] = orders else: orders = datadict[fname] # Now, process the model model_orders = process_model(model.copy(), orders, vsini_primary=vsini_prim, maxvel=1000.0, debug=debug, oversample=1, logspace=False) # Get order weights if addmode='T-weighted' if addmode.lower() == 't-weighted': get_weights = False orderweights = [np.sum(temperature_weights(o.x)) for o in orders] addmode = 'simple-weighted' if debug and makeplots: fig = plt.figure('T={} vsini={}'.format(temp, vsini_sec)) for o, m in zip(orders, model_orders): d_scale = np.std(o.y/o.cont) m_scale = np.std(m.y/m.cont) plt.plot(o.x, (o.y/o.cont-1.0)/d_scale, 'k-', alpha=0.4) plt.plot(m.x, (m.y/m.cont-1.0)/m_scale, 'r-', alpha=0.6) plt.show(block=False) # Make sure the output directory exists output_dir = "Cross_correlations/" outfilebase = fname.split(".fits")[0] if "/" in fname: dirs = fname.split("/") outfilebase = dirs[-1].split(".fits")[0] if obstype.lower() == 'synthetic': output_dir = "" for directory in dirs[:-1]: output_dir = output_dir + directory + "/" output_dir = output_dir + "Cross_correlations/" HelperFunctions.ensure_dir(output_dir) # Save the model and data orders, if debug=True if debug: # Save the individual spectral inputs and CCF orders (unweighted) output_dir2 = output_dir.replace("Cross_correlations", "CCF_inputs") HelperFunctions.ensure_dir(output_dir2) HelperFunctions.ensure_dir("%sCross_correlations/" % (output_dir2)) for i, (o, m) in enumerate(zip(orders, model_orders)): outfilename = "{0:s}{1:s}.{2:.0f}kps_{3:.1f}K{4:+.1f}{5:+.1f}.data.order{6:d}".format( output_dir2, outfilebase, vsini_sec, temp, gravity, metallicity, i + 1) o.output(outfilename) outfilename = "{0:s}{1:s}.{2:.0f}kps_{3:.1f}K{4:+.1f}{5:+.1f}.model.order{6:d}".format( output_dir2, outfilebase, vsini_sec, temp, gravity, metallicity, i + 1) m.output(outfilename) corr = Correlate.Correlate(orders, model_orders, addmode=addmode, outputdir=output_dir, get_weights=get_weights, prim_teff=temperature_dict[fname], orderweights=orderweights, debug=debug) if debug: corr, ccf_orders = corr # Barycentric correction if vbary_correct: corr.x += vbary # Output the ccf if obstype.lower() == 'synthetic': pars = {'outdir': output_dir, 'outbase': outfilebase, 'addmode': addmode, 'vsini_prim': vsini_prim, 'vsini': vsini_sec, 'T': temp, 'logg': gravity, '[Fe/H]': metallicity} save_synthetic_ccf(corr, params=pars, mode=output_mode) else: pars = {'outdir': output_dir, 'fname': fname, 'addmode': addmode, 'vsini_prim': vsini_prim, 'vsini': vsini_sec, 'T': temp, 'logg': gravity, '[Fe/H]': metallicity} pars['vbary'] = vbary if vbary_correct else np.nan save_ccf(corr, params=pars, mode=output_mode, hdf_outfilename=output_file) # Save the individual orders, if debug=True if debug: for i, c in enumerate(ccf_orders): print("Saving CCF inputs for order {}".format(i + 1)) outfilename = "{0:s}Cross_correlations/{1:s}.{2:.0f}kps_{3:.1f}K{4:+.1f}{5:+.1f}.order{6:d}".format( output_dir2, outfilebase, vsini_sec, temp, gravity, metallicity, i + 1) c.output(outfilename) # Delete the model. We don't need it anymore and it just takes up ram. modeldict[temp][gravity][metallicity][alpha][vsini_sec] = [] return
def analyze_sensitivity(hdf5_file="Sensitivity.hdf5", interactive=True, update=True, **heatmap_kws): """ This uses the output of a previous run of check_sensitivity, and makes plots. Frankly, the `summarize_sensitivity` function is more useful. Parameters: =========== - interactive: boolean If True, the user will pick which stars to plot - update: boolean If True, always update the Sensitivity_Dataframe.csv file. Otherwise, try to load that file instead of reading the hdf5 file - heatmap_kws: Any other keyword arguments to pass to Sensitivity.heatmap() Returns: ======== A dictionary of dictionaries. The inner dictionaries hold pandas DataFrames for specific parameter sets. """ if not update and os.path.isfile("Sensitivity_Dataframe.csv"): df = pd.read_csv("Sensitivity_Dataframe.csv") else: if hdf5_file.endswith("hdf5"): df = read_hdf5(hdf5_file) # Save the dataframe for later use df.to_csv("Sensitivity_Dataframe.csv", index=False) elif hdf5_file.endswith("csv"): # Treat the input as a csv file df = pd.read_csv(hdf5_file) # Group by a bunch of keys that probably don't change, but could groups = df.groupby(("star", "date", "[Fe/H]", "logg", "addmode", "primary SpT")) # Have the user choose keys if interactive: for i, key in enumerate(groups.groups.keys()): print("[{}]: {}".format(i + 1, key)) inp = raw_input("Enter the numbers of the keys you want to plot (, or - delimited): ") chosen = parse_input(inp) keys = [k for i, k in enumerate(groups.groups.keys()) if i + 1 in chosen] else: keys = groups.groups.keys() # Compile dataframes for each star dataframes = defaultdict(lambda: defaultdict(pd.DataFrame)) for key in keys: logging.info(key) g = groups.get_group(key) detrate = g.groupby(("temperature", "vsini", "logL", "contrast")).apply( lambda df: float(sum(df.significance.notnull())) / float(len(df)) ) significance = g.groupby(("temperature", "vsini", "logL", "contrast")).apply( lambda df: np.nanmean(df.significance) ) dataframes["detrate"][key] = detrate.reset_index().rename(columns={0: "detection rate"}) dataframes["significance"][key] = significance.reset_index().rename(columns={0: "significance"}) # Make heatmap plots for each key. HelperFunctions.ensure_dir("Figures/") for i, key in enumerate(keys): star = key[0] date = key[1] addmode = key[4] spt = key[5] logging.info("Making figures for {} observed on {} with addmode {}".format(star, date, addmode)) plt.figure(i * 3 + 1) if len(dataframes["detrate"][key]) == 0: dataframes["detrate"].pop(key) dataframes["significance"].pop(key) continue # sns.heatmap(dataframes['detrate'][key].pivot('temperature', 'vsini', 'detection rate')) heatmap(dataframes["detrate"][key][["vsini", "temperature", "detection rate"]], **heatmap_kws) plt.title("Detection Rate for {} ({}) on {}".format(star, spt, date)) plt.savefig("Figures/T_vsini_Detrate_{}.{}.addmode-{}.pdf".format(star.replace(" ", "_"), date, addmode)) plt.figure(i * 3 + 2) # sns.heatmap(dataframes['significance'][key].pivot('temperature', 'vsini', 'significance'), # robust=True) heatmap(dataframes["significance"][key][["vsini", "temperature", "significance"]], **heatmap_kws) plt.title("Detection Significance for {} ({}) on {}".format(star, spt, date)) plt.savefig("Figures/T_vsini_Significance_{}.{}.addmode-{}.pdf".format(star.replace(" ", "_"), date, addmode)) plt.figure(i * 3 + 3) # p = dataframes['detrate'][key].pivot('vsini', 'contrast', 'detection rate') # ylabels = [round(float(L), 2) for L in p.index] # sns.heatmap(p, yticklabels=ylabels) heatmap(dataframes["detrate"][key][["vsini", "contrast", "detection rate"]], **heatmap_kws) plt.title("Detection Rate for {} ({}) on {}".format(star, spt, date)) plt.savefig("Figures/contrast_vsini_Detrate_{}.{}.addmode-{}.pdf".format(star.replace(" ", "_"), date, addmode)) if not interactive: plt.close("all") if interactive: plt.show() return dataframes