def get_data( datacsv='../data/WASP-18b_transits_and_TESS_times_O-C_vs_epoch_selected.csv', is_occultation=False, plongphasing=False ): df = pd.read_csv(datacsv, sep=';') # 'period', 'period_err', 't0_HJD', 'err_t0', 't0_BJD_TDB', 'reference', # 'where_I_got_time', 'comment' y = nparr(df.t0_BJD_TDB) sigma_y = nparr(df.err_t0) refs = nparr(df.reference) sel = np.isfinite(y) & np.isfinite(sigma_y) y, sigma_y, refs = y[sel], sigma_y[sel], refs[sel] if not plongphasing: x, _ = get_epochs_given_midtimes_and_period( y, PERIOD, t0_fixed=EPOCH, verbose=True ) else: x, _ = get_epochs_given_midtimes_and_period( y, 0.4991110, t0_fixed=EPOCH, verbose=True ) return x, y, sigma_y, refs
def main(): df = pd.read_csv('../../data/ephemeris/midtimes.csv') df = df.sort_values(by='tmid') tmid = nparr(df['tmid']) tmid_err = nparr(df['tmiderr']) P_orb_init = 8.32467 # SPOC, +/- 4e-4 t0_orb_init = 2457000 + 1574.2738 # SPOC, +/- 1e-3 epoch, _ = get_epochs_given_midtimes_and_period(tmid, P_orb_init, t0_fixed=t0_orb_init, verbose=True) popt, pcov = curve_fit(linear_model, epoch, tmid, p0=(P_orb_init, t0_orb_init), sigma=tmid_err) lsfit_period = popt[0] lsfit_period_err = pcov[0, 0]**0.5 lsfit_t0 = popt[1] lsfit_t0_err = pcov[1, 1]**0.5 print(f't0: {lsfit_t0:.6f} +/- {lsfit_t0_err:.6f}') print(f'period: {lsfit_period:.7f} +/- {lsfit_period_err:.7f}')
def scatter_plot_parameter_vs_epoch_manual(plname, df, yparam, datafile, init_period, overwrite=False, savname=None, ylim=None, req_precision_minutes=10, xlim=None, occultationtimecsv=None, correcterrorbars=False): ''' args: df -- made by get_ETD_params yparam -- in ['O-C', 'Duration', 'Depth'] datafile -- e.g., "../data/20180826_WASP-18b_ETD.txt" ''' assert yparam == 'O-C' if not savname: savname = ('../results/' + datafile.split('/')[-1].split('.txt')[0] + "_" + yparam + "_vs_epoch.pdf") if os.path.exists(savname) and overwrite == False: print('skipped {:s}'.format(savname)) return 0 # fit a straight line (t vs. E) to all the times. then subtract the # best-fitting line from the data. # TESS midtime errors are taken as the MAXIMUM of (plus error, minus error) # -- see retrieve_measured_times.py. tmid = arr(df['t0_BJD_TDB']) err_tmid = arr(df['err_t0']) sel = np.isfinite(err_tmid) & np.isfinite(tmid) sel &= (err_tmid * 24 * 60 < req_precision_minutes) if plname == 'WASP-18b': badlist = arr([ '../results/tess_lightcurve_fit_parameters/100100827/sector_2/100100827_mandelagol_and_line_fit_empiricalerrs_t005.pickle', '../results/tess_lightcurve_fit_parameters/100100827/sector_2/100100827_mandelagol_and_line_fit_empiricalerrs_t022.pickle', '../results/tess_lightcurve_fit_parameters/100100827/sector_3/100100827_mandelagol_and_line_fit_empiricalerrs_t004.pickle', '../results/tess_lightcurve_fit_parameters/100100827/sector_3/100100827_mandelagol_and_line_fit_empiricalerrs_t010.pickle' ]) sel &= ~np.isin(df['picklepath'], badlist) epoch, init_t0 = get_epochs_given_midtimes_and_period( tmid[sel], init_period, err_t_mid=err_tmid[sel], verbose=True) # calculate epochs for occultation time measurements, so they can be used # in model comparison. (don't use them for determining least squares t0 or # period, because they are usually rattier). if isinstance(occultationtimecsv, str): occ_file = os.path.join('../data/manual_literature_time_concatenation', occultationtimecsv) else: occ_file = None if occ_file: print('\n----WRN!! WORKING WITH OCCULTATION TIMES----\n') # tmid = t0 + P*E # tocc = t0 + P*E + P/2 occ_df = pd.read_csv(occ_file, sep=';', comment=None) if 'tocc_BJD_TDB_w_ltt' in occ_df: print('USING OCC TIMES W/ LTT ALREADY ACCOUNTED') t_occ_ltt_corrected = np.array(occ_df['tocc_BJD_TDB_w_ltt']) err_t_occ = np.array(occ_df['err_tocc']) else: t_occ_no_ltt = np.array(occ_df['tocc_BJD_TDB_no_ltt']) err_t_occ = np.array(occ_df['err_tocc']) if plname == 'WASP-4b': semimaj = 0.0228 * u.au # Petrucci+ 2013, table 3 ltt_corr = (2 * semimaj / constants.c).to(u.second) elif plname == 'WASP-19b': semimaj = 0.01634 * u.au # TEPCAT ltt_corr = (2 * semimaj / constants.c).to(u.second) else: raise NotImplementedError('need to implement ltt correction') print( 'subtracting {:.3f} for occultation light travel time'.format( ltt_corr)) t_occ_ltt_corrected = t_occ_no_ltt - (ltt_corr.to(u.day)).value occ_epoch_full = (t_occ_ltt_corrected - init_t0 - init_period / 2) / init_period occ_epoch = np.round(occ_epoch_full, 1) print('got occultation epochs') print(occ_epoch_full) f_occ_epochs = np.isfinite(occ_epoch) tocc = t_occ_ltt_corrected[f_occ_epochs] err_tocc = err_t_occ[f_occ_epochs] occ_references = np.array(occ_df['reference'])[f_occ_epochs] occ_whereigot = np.array(occ_df['where_I_got_time'])[f_occ_epochs] else: print('\n----NOT WORKING WITH OCCULTATION TIMES----\n') print('{:d} transits collected'.format(len(err_tmid))) print('{:d} transits SELECTED (finite & err_tmid < {:d} minute)'.format( len(err_tmid[sel]), req_precision_minutes)) print('{:d} transits with claimed err_tmid < 1 minute'.format( len(err_tmid[err_tmid * 24 * 60 < 1.]))) xvals = epoch xdata = xvals ydata = tmid[sel] sigma = err_tmid[sel] sel_references = np.array(df['reference'])[sel] # do the TESS error bars make sense? in particular, for TESS times # only, is chi^2_reduced ~= 1? if not, maybe over-estimating # uncertainties! # -> correct them so that chi^2_reduced = 1... if np.any(np.array(df['reference']) == 'me'): sel_tess = sel & (np.array(df['reference']) == 'me') xdata_tess = epoch[sel_references == 'me'] xdata_tess -= np.sort(xdata_tess)[int(len(xdata_tess) / 2)] ydata_tess = tmid[sel_tess] sigma_tess = err_tmid[sel_tess] popt_tess, pcov_tess = curve_fit(linear_model, xdata_tess, ydata_tess, p0=(init_period, init_t0), sigma=sigma_tess) lsfit_period_tess = popt_tess[0] lsfit_t0_tess = popt_tess[1] calc_tmids_tess = lsfit_period_tess * xdata_tess + lsfit_t0_tess O_minus_C_tess = tmid[sel_tess] - calc_tmids_tess chi2 = np.sum(O_minus_C_tess**2 / sigma_tess**2) n_data, n_parameters = len(xdata_tess), 2 dof = n_data - n_parameters chi2_reduced = chi2 / dof # propose the empirical correction. `f` for fudge. f = np.sqrt(chi2 / dof) sigma_tess_corrected = sigma_tess * f chi2_corrected = np.sum(O_minus_C_tess**2 / sigma_tess_corrected**2) chi2_reduced_corrrected = chi2_corrected / dof plname = os.path.basename(savname).split("_")[0] tesscheckpath = (os.path.join( '../results/manual_plus_tess_O-C_vs_epoch', plname + "_tess_errorbar_check.png")) plot_tess_errorbar_check(xdata_tess, O_minus_C_tess * 24 * 60, sigma_tess * 24 * 60, sigma_tess_corrected * 24 * 60, lsfit_period_tess, lsfit_t0_tess, chi2, dof, chi2_reduced, f, chi2_corrected, chi2_reduced_corrrected, savpath=tesscheckpath) # finally, (optionally) update the errors to be used in the analysis! # NOTE: this assumes the TESS measurements are always being appended at # the end! however TESS data is pretty much always the newest for this # project, so this is OK. if correcterrorbars: print('WRN! ERROR BARS BEFORE EMPIRICAL CORRECTION') print(sigma) sigma_not_tess = sigma[~(sel_references == 'me')] sigma = np.concatenate((sigma_not_tess, sigma_tess_corrected)) print('WRN! ERROR BARS AFTER EMPIRICAL CORRECTION') print(sigma) else: sigma_not_tess = sigma[~(sel_references == 'me')] sigma = np.concatenate((sigma_not_tess, sigma_tess)) t0_offset = int(np.round(np.nanmedian(ydata), -3)) savdf = pd.DataFrame({ 'sel_epoch': xdata, 'sel_transit_times_BJD_TDB_minus_{:d}_minutes'.format(t0_offset): (ydata - t0_offset) * 24 * 60, 'sel_transit_times_BJD_TDB': ydata, 'err_sel_transit_times_BJD_TDB': sigma, 'err_sel_transit_times_BJD_TDB_minutes': (sigma) * 24 * 60, 'original_reference': np.array(df['reference'])[sel], 'where_I_got_time': np.array(df['where_I_got_time'])[sel], }) savdf = savdf.sort_values(by='sel_epoch') savdf = savdf[[ 'sel_epoch', 'sel_transit_times_BJD_TDB_minus_{:d}_minutes'.format(t0_offset), 'sel_transit_times_BJD_TDB', 'err_sel_transit_times_BJD_TDB', 'err_sel_transit_times_BJD_TDB_minutes', 'original_reference', 'where_I_got_time' ]] savdfpath = (os.path.join( '../data/', 'literature_plus_TESS_times', os.path.basename(savname.replace('.png', '_selected.csv')))) savdf.to_csv(savdfpath, sep=';', index=False) print('saved {:s}'.format(savdfpath)) if occ_file: occ_savdf = pd.DataFrame({ 'sel_epoch': occ_epoch[f_occ_epochs], 'sel_occ_times_BJD_TDB_minus_{:d}_minutes'.format(t0_offset): (tocc - t0_offset) * 24 * 60, 'sel_occ_times_BJD_TDB': tocc, 'err_sel_occ_times_BJD_TDB': err_tocc, 'err_sel_occ_times_BJD_TDB_minutes': (err_tocc) * 24 * 60, 'original_reference': occ_references, 'where_I_got_time': occ_whereigot, }) occ_savdf = occ_savdf.sort_values(by='sel_epoch') occ_savdf = occ_savdf[[ 'sel_epoch', 'sel_occ_times_BJD_TDB_minus_{:d}_minutes'.format(t0_offset), 'sel_occ_times_BJD_TDB', 'err_sel_occ_times_BJD_TDB', 'err_sel_occ_times_BJD_TDB_minutes', 'original_reference', 'where_I_got_time' ]] occ_savdfpath = ('../data/' + 'literature_plus_TESS_times/' + '{:s}_occultation_times_selected.csv'.format(plname)) occ_savdf.to_csv(occ_savdfpath, sep=';', index=False) print('saved {:s}'.format(occ_savdfpath)) popt, pcov = curve_fit(linear_model, xdata, ydata, p0=(init_period, init_t0), sigma=sigma) lsfit_period = popt[0] lsfit_t0 = popt[1] #FIXME FIXME #tanimoto+2020 lsfit_period = 0.4483993 lsfit_t0 = 2455543.943 # # yu+2015 linear # lsfit_period = 0.448391 # lsfit_t0 = 2455201.832 # #current TESS ephem # lsfit_period = 0.4483993 # 0.448463086010997 # 0.4483993 # lsfit_t0 = 2458468.63809577 epoch, _ = get_epochs_given_midtimes_and_period(tmid[sel], lsfit_period, err_t_mid=None, t0_fixed=lsfit_t0) xvals = epoch #FIXME FIXME if not abs(lsfit_period - init_period) < 1e-4: print('WRN! least squares period is worryingly far from given period') if not abs(lsfit_period - init_period) < 1e-3: print('ERR! least squares period should be close to given period') import IPython IPython.embed() raise AssertionError calc_tmids = lsfit_period * epoch + lsfit_t0 # we can now plot "O-C" yvals = (tmid[sel] - calc_tmids) * 24 * 60 ymin, ymax = np.nanmean(yvals)-3*np.nanstd(yvals), \ np.nanmean(yvals)+3*np.nanstd(yvals) #FIXME below ymin, ymax = np.nanmin(yvals)-10, \ np.nanmax(yvals)+10 #FIXME above if yparam == 'O-C': yerrs = sigma * 24 * 60 plt.close('all') f, ax = plt.subplots(figsize=(8, 6)) # data points dq = 1e3 * sigma ax.scatter(xvals, yvals, marker='o', s=1 / (dq**2), zorder=1, c='red') # error bars ax.errorbar(xvals, yvals, yerr=yerrs, elinewidth=0.3, ecolor='lightgray', capsize=2, capthick=0.3, linewidth=0, fmt='s', ms=0, zorder=0, alpha=0.75) # text for epoch and planet name pl_name = datafile.split("/")[-1].split("_")[0] ax.text(.04, .04, pl_name, ha='left', va='bottom', transform=ax.transAxes, fontsize='small') # add upper xscale, with BJD-2450000 on top t_cut = 2450000 ax_upper = ax.twiny() ax_upper.errorbar(tmid[sel] - t_cut, yvals, yerr=yerrs, elinewidth=0.3, ecolor='lightgray', capsize=2, capthick=0.3, linewidth=0, fmt='s', ms=0, zorder=0, alpha=0.) ax_upper.set_xlabel('BJD-{:d}'.format(t_cut)) for a in [ax, ax_upper]: a.get_yaxis().set_tick_params(which='both', direction='in') a.get_xaxis().set_tick_params(which='both', direction='in') # make vertical lines to roughly show TESS observation window function for # all sectors that this planet is observed in tw = pd.read_csv('../data/tess_sector_time_windows.csv') knownplanet_df_files = glob('../data/kane_knownplanet_tess_overlap/' 'kane_knownplanets_sector*.csv') if yparam == 'O-C': for knownplanet_df_file in knownplanet_df_files: knownplanet_df = pd.read_csv(knownplanet_df_file) # if planet is observed in this sector if np.isin( pl_name.split('b')[0], arr(knownplanet_df['pl_hostname'])): # 0-based sector number count this_sec_num = int( search('sector{:d}.csv', knownplanet_df_file)[0]) # 1-based sector number count _ = tw[tw['sector_num'] == this_sec_num + 1] st = float(_['start_time_HJD'].iloc[0]) et = float(_['end_time_HJD'].iloc[0]) st_epoch = (st - lsfit_t0) / lsfit_period et_epoch = (et - lsfit_t0) / lsfit_period ax.axvline(x=st_epoch, c='green', alpha=0.4, lw=0.5, zorder=-3) ax.axvline(x=et_epoch, c='green', alpha=0.4, lw=0.5, zorder=-3) ax.fill([st_epoch, et_epoch, et_epoch, st_epoch], [ymin, ymin, ymax, ymax], facecolor='green', alpha=0.05, zorder=-4) stxt = 'S' + str(this_sec_num + 1) ax.text(st_epoch + (et_epoch - st_epoch) / 2, ymin + 1e-3 * 24 * 60, stxt, fontsize='xx-small', ha='center', va='center', zorder=-2) xmin, xmax = min(ax.get_xlim()), max(ax.get_xlim()) # # show the plotted linear ephemeris, and the zero-line # txt = 'M = {:.5f} + {:f} * E'.format(lsfit_t0, lsfit_period) ax.text(.04, .96, txt, ha='left', va='top', transform=ax.transAxes, fontsize='small') ax.hlines(0, xmin, xmax, alpha=0.3, zorder=-1, lw=0.5) ax.set_ylabel('O-C [minutes]', fontsize='x-small') ax.set_xlabel( 'Epoch Number ' '({:d} records; tmids are BJD TDB; TESS windows +/-1 day)'.format( len(df)), fontsize='x-small') if xlim: ax.set_xlim(xlim) else: ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) if ylim: ax.set_ylim(ylim) # make the legend for _dq in np.linspace(np.nanmin(dq), np.nanmax(dq), num=6): ax.scatter([], [], c='r', s=1 / (_dq**2), label='{:.2E}'.format(_dq)) ax.legend(scatterpoints=1, frameon=True, labelspacing=0, title='err t0 [minutes]', loc='best', fontsize='xx-small') f.tight_layout() f.savefig(savname, bbox_inches='tight') print('made {:s}'.format(savname)) f.savefig(savname.replace('.pdf', '.png'), dpi=300, bbox_inches='tight') print('made {:s}'.format(savname.replace('.pdf', '.png')))
def calculate_timing_accuracy(plname, period_guess): """ First, load in the data with ONLY the literature times. Using period_guess, get_epochs_given_midtimes_and_period. Fit a linear ephemeris to these epochs. Calculate the uncertainty on the ephemeris during the time window that tess observes, based on the literature values. Also calculate difference between observed TESS time and expectation, in seconds Returns: tuple of: lsfit_t0, lsfit_t0_err, lsfit_period, lsfit_period_err, epoch, tmid, err_tmid, tess_epoch, tess_tmid, tess_err_tmid, diff_seconds, err_prediction_seconds """ manual_fpath = os.path.join( '/home/luke/Dropbox/proj/tessorbitaldecay/data', 'manual_literature_time_concatenation', '{:s}_manual.csv'.format(plname) ) mandf = pd.read_csv(manual_fpath, sep=';', comment=None) tmid = nparr(mandf['t0_BJD_TDB']) err_tmid = nparr(mandf['err_t0']) sel = np.isfinite(tmid) & np.isfinite(err_tmid) tmid = tmid[sel] err_tmid = err_tmid[sel] epoch, init_t0 = ( get_epochs_given_midtimes_and_period(tmid, period_guess, verbose=True) ) xdata = epoch ydata = tmid sigma = err_tmid popt, pcov = curve_fit( linear_model, xdata, ydata, p0=(period_guess, init_t0), sigma=sigma ) lsfit_period = popt[0] lsfit_period_err = pcov[0,0]**0.5 lsfit_t0 = popt[1] lsfit_t0_err = pcov[1,1]**0.5 # now get observed tess times, and compare to predicted. sel_fpath = os.path.join( '/home/luke/Dropbox/proj/tessorbitaldecay/data', 'literature_plus_TESS_times', '{:s}_literature_and_TESS_times_O-C_vs_epoch_selected.csv'. format(plname) ) seldf = pd.read_csv(sel_fpath, sep=';', comment=None) mytesstimes = nparr(seldf['original_reference'] == 'me') tess_tmid = nparr(seldf['sel_transit_times_BJD_TDB'])[mytesstimes] tess_err_tmid = nparr(seldf['err_sel_transit_times_BJD_TDB'])[mytesstimes] tess_sel = np.isfinite(tess_tmid) & np.isfinite(tess_err_tmid) tess_tmid = tess_tmid[tess_sel] tess_err_tmid = tess_err_tmid[tess_sel] tess_epoch, _ = ( get_epochs_given_midtimes_and_period( tess_tmid, period_guess, t0_fixed=lsfit_t0, verbose=True) ) # now: calculate the uncertainty on the ephemeris during the time window that # tess observes, based on the literature values. tmid_expected = lsfit_t0 + lsfit_period*tess_epoch tmid_upper = np.maximum( (lsfit_t0+lsfit_t0_err) + tess_epoch*(lsfit_period+lsfit_period_err), (lsfit_t0+lsfit_t0_err) + tess_epoch*(lsfit_period-lsfit_period_err) ) tmid_lower = np.minimum( (lsfit_t0-lsfit_t0_err) + tess_epoch*(lsfit_period-lsfit_period_err), (lsfit_t0-lsfit_t0_err) + tess_epoch*(lsfit_period+lsfit_period_err) ) tmid_perr = (tmid_upper - tmid_expected) tmid_merr = (tmid_expected - tmid_lower) # difference between observed TESS time and expectation, in seconds diff_seconds = (tess_tmid - tmid_expected)*24*60*60 err_prediction_seconds = np.mean([tmid_perr, tmid_merr], axis=0)*24*60*60 return ( lsfit_t0, lsfit_t0_err, lsfit_period, lsfit_period_err, epoch, tmid, err_tmid, tess_epoch, tess_tmid, tess_err_tmid, diff_seconds, err_prediction_seconds )
def get_mcmc_timing_accuracy(plname, period_guess): # load in the data with ONLY the literature times. fit a linear ephemeris. manual_fpath = ('/home/luke/Dropbox/proj/tessorbitaldecay/data/' + '{:s}_manual.csv'.format(plname)) mandf = pd.read_csv(manual_fpath, sep=';', comment=None) tmid = nparr(mandf['t0_BJD_TDB']) err_tmid = nparr(mandf['err_t0']) sel = np.isfinite(tmid) & np.isfinite(err_tmid) tmid = tmid[sel] err_tmid = err_tmid[sel] epoch, init_t0 = (get_epochs_given_midtimes_and_period(tmid, period_guess, verbose=True)) xdata = epoch ydata = tmid sigma = err_tmid data = nparr([epoch, tmid, err_tmid]) # get max likelihood initial guess theta_linear = best_theta(1, data, data_occ=None) sampledir = '/home/luke/local/emcee_chains/wasp4b_line_fitting_check' plotdir = '../results/' if not os.path.exists(sampledir): os.mkdir(sampledir) plname = 'WASP-4b' plparams = 0, 0, 0, 0, 1.338231466 * u.day, 2455073.841312848 * u.day max_n_steps = 10000 fit_2d = compute_mcmc(1, data, plparams, theta_linear, plname, data_occ=None, overwriteexistingsamples=True, max_n_steps=max_n_steps, sampledir=sampledir, nworkers=16, plotdir=plotdir) from scipy.optimize import curve_fit popt, pcov = curve_fit(linear_model, xdata, ydata, p0=(period_guess, init_t0), sigma=sigma) lsfit_period = popt[0] lsfit_period_err = pcov[0, 0]**0.5 lsfit_t0 = popt[1] lsfit_t0_err = pcov[1, 1]**0.5 print('\n') print('LEAST SQUARES GIVES') print('period: {:.10f}, t0: {:.10f}'.format(lsfit_period, lsfit_t0)) print('period_err: {:.3e}, t0_err: {:.3e}'.format( lsfit_period_err, lsfit_t0_err)) print('\n') print('MCMC GIVES') print(fit_2d['fitinfo']['medianparams']) print(fit_2d['fitinfo']['std_perrs']) print(fit_2d['fitinfo']['std_merrs']) print('\n') return 1
def make_plots_for_ephem_precision_check(plname, period_guess, xlim=None, ylim=None, manualkdebandwidth=None): # load in the data with ONLY the literature times. fit a linear ephemeris # to it. manual_fpath = '../data/{:s}_manual.csv'.format(plname) mandf = pd.read_csv(manual_fpath, sep=';', comment=None) tmid = nparr(mandf['t0_BJD_TDB']) err_tmid = nparr(mandf['err_t0']) sel = np.isfinite(tmid) & np.isfinite(err_tmid) tmid = tmid[sel] err_tmid = err_tmid[sel] epoch, init_t0 = (get_epochs_given_midtimes_and_period(tmid, period_guess, verbose=True)) xdata = epoch ydata = tmid sigma = err_tmid popt, pcov = curve_fit(linear_model, xdata, ydata, p0=(period_guess, init_t0), sigma=sigma) lsfit_period = popt[0] lsfit_period_err = pcov[0, 0]**0.5 lsfit_t0 = popt[1] lsfit_t0_err = pcov[1, 1]**0.5 # now get observed tess times! compare to predicted. sel_fpath = ( '../data/{:s}_literature_and_TESS_times_O-C_vs_epoch_selected.csv'. format(plname)) seldf = pd.read_csv(sel_fpath, sep=';', comment=None) mytesstimes = nparr(seldf['original_reference'] == 'me') tess_tmid = nparr(seldf['sel_transit_times_BJD_TDB'])[mytesstimes] tess_err_tmid = nparr(seldf['err_sel_transit_times_BJD_TDB'])[mytesstimes] tess_sel = np.isfinite(tess_tmid) & np.isfinite(tess_err_tmid) if plname == 'WASP-18b': tess_sel &= (tess_err_tmid * 24 * 60 < 1) tess_tmid = tess_tmid[tess_sel] tess_err_tmid = tess_err_tmid[tess_sel] tess_epoch, _ = (get_epochs_given_midtimes_and_period(tess_tmid, period_guess, t0_fixed=lsfit_t0, verbose=True)) # now: calculate the uncertainty on the ephemeris during the time window that # tess observes, based on the literature values. tmid_expected = lsfit_t0 + lsfit_period * tess_epoch tmid_lower = ((lsfit_t0 - lsfit_t0_err) + (lsfit_period - lsfit_period_err) * tess_epoch) tmid_upper = ((lsfit_t0 + lsfit_t0_err) + (lsfit_period + lsfit_period_err) * tess_epoch) tmid_perr = (tmid_upper - tmid_expected) tmid_merr = (tmid_expected - tmid_lower) # difference between observed TESS time and expectation, in seconds diff_seconds = (tess_tmid - tmid_expected) * 24 * 60 * 60 err_prediction_seconds = np.mean([tmid_perr, tmid_merr], axis=0) * 24 * 60 * 60 OmC_by_err = diff_seconds / err_prediction_seconds x = OmC_by_err # plot the difference in absolute and relative terms on O-C diagrams. _plot_absolute_O_minus_C(plname, tess_epoch, diff_seconds, tess_err_tmid * 24 * 60 * 60, err_prediction_seconds, xlim=xlim, ylim=ylim) _plot_relative_O_minus_C(plname, tess_epoch, diff_seconds, tess_err_tmid * 24 * 60 * 60, err_prediction_seconds, xlim=xlim, ylim=ylim) # check whether the observed samples statistically differ from those drawn # from a normal distribution. ks2samp_txt = _run_ks2sample_vs_gaussian(x) # plot the difference between prediction and observation, estimating # distributions thru kernel density estimates. _plot_kde_vs_gaussian_relative(x, ks2samp_txt, plname, err_prediction_seconds, manualkdebandwidth=manualkdebandwidth) if not (manualkdebandwidth is None): bw = manualkdebandwidth * np.mean(err_prediction_seconds) else: bw = None _plot_kde_vs_gaussian_absolute(diff_seconds, ks2samp_txt, plname, err_prediction_seconds, manualkdebandwidth=bw) return (lsfit_t0, lsfit_t0_err, lsfit_period, lsfit_period_err, tess_epoch, tess_tmid, tess_err_tmid)
def precision_of_predicted_ephemeris(plname='WASP-4b', period_guess=1.33823204): """ using the manually collected ephemerides, how precise is the prediction for when the transits are supposed to fall? (at the epoch of TESS observation) """ manual_fpath = '../data/{:s}_manual.csv'.format(plname) df = pd.read_csv(manual_fpath, sep=';', comment=None) tmid = nparr(df['t0_BJD_TDB']) err_tmid = nparr(df['err_t0']) sel = np.isfinite(tmid) & np.isfinite(err_tmid) tmid = tmid[sel] err_tmid = err_tmid[sel] epoch, init_t0 = (get_epochs_given_midtimes_and_period(tmid, period_guess, verbose=True)) xdata = epoch ydata = tmid sigma = err_tmid popt, pcov = curve_fit(linear_model, xdata, ydata, p0=(period_guess, init_t0), sigma=sigma) lsfit_period = popt[0] lsfit_period_err = pcov[0, 0]**0.5 lsfit_t0 = popt[1] lsfit_t0_err = pcov[1, 1]**0.5 # now: what is the uncertainty on the ephemeris during the time window that # tess observes? tw = pd.read_csv('../data/tess_sector_time_windows.csv') knownplanet_df_files = glob('../data/kane_knownplanet_tess_overlap/' 'kane_knownplanets_sector*.csv') for knownplanet_df_file in knownplanet_df_files: knownplanet_df = pd.read_csv(knownplanet_df_file) # if planet is observed in this sector if np.isin(plname.split('b')[0], nparr(knownplanet_df['pl_hostname'])): # 0-based sector number count this_sec_num = (int( search('sector{:d}.csv', knownplanet_df_file)[0])) # 1-based sector number count _ = tw[tw['sector_num'] == this_sec_num + 1] st = float(_['start_time_HJD'].iloc[0]) et = float(_['end_time_HJD'].iloc[0]) mt = st + (et - st) / 2 st_epoch = (st - lsfit_t0) / lsfit_period et_epoch = (et - lsfit_t0) / lsfit_period mt_epoch = int((mt - lsfit_t0) / lsfit_period) # what is range of allowed tmid at the mid-observation epoch? tmid_mt_expected = lsfit_t0 + lsfit_period * mt_epoch tmid_mt_lower = ((lsfit_t0 - lsfit_t0_err) + (lsfit_period - lsfit_period_err) * mt_epoch) tmid_mt_upper = ((lsfit_t0 + lsfit_t0_err) + (lsfit_period + lsfit_period_err) * mt_epoch) tmid_mt_perr = (tmid_mt_upper - tmid_mt_expected) tmid_mt_merr = (tmid_mt_expected - tmid_mt_lower) print('-' * 42) print('\n{:s}'.format(plname)) print('using only the literature times (no TESS times)') print('started with period_guess {}'.format(period_guess)) print('got') print('least-squares period {} +/- {}'.format(lsfit_period, lsfit_period_err)) print('least-squares t0 {} +/- {}'.format(lsfit_t0, lsfit_t0_err)) print('converts to') print('err_period: {:.2e} seconds. err_t0: {:.2e} seconds'.format( lsfit_period_err * 24 * 60 * 60, lsfit_t0_err * 24 * 60 * 60)) print('calculated') print('at epoch {:d} = BJD {:.6f}'.format(mt_epoch, tmid_mt_expected)) print('allowed range of +{:.6f} -{:.6f} days'.format( tmid_mt_perr, tmid_mt_merr)) print('= +{:.6f} -{:.6f} seconds'.format(tmid_mt_perr * 24 * 60 * 60, tmid_mt_merr * 24 * 60 * 60)) print('\n')
def main(is_dayspecific_exofop_upload=1, cdipssource_vnum=0.4, uploadnamestr='sectors_12_thru_13_clear_threshold'): """ Put together a few useful CSV candidate summaries: * bulk uploads to exofop/tess * observer info sparse (focus on TICIDs, gaia mags, positions on sky, etc) * observer info full (stellar rvs for membership assessment; ephemeris information) * merge of everything (exoFOP upload, + the subset of gaia information useful to observers) ---------- Args: is_dayspecific_exofop_upload: if True, reads in the manually-written (from google spreadsheet) comments and source_ids, and writes those to a special "TO_EXOFOP" csv file. uploadnamestr: used as unique identifying string in file names """ # # Read in the results from the fits # paramglob = os.path.join( fitdir, "sector-*_CLEAR_THRESHOLD/fitresults/hlsp_*gaiatwo*_llc/*fitparameters.csv" ) parampaths = glob(paramglob) statusglob = os.path.join( fitdir, "sector-*_CLEAR_THRESHOLD/fitresults/hlsp_*gaiatwo*_llc/*.stat") statuspaths = glob(statusglob) statuses = [ dict(load_status(f)['fivetransitparam_fit']) for f in statuspaths ] param_df = pd.concat((pd.read_csv(f, sep='|') for f in parampaths)) outpath = os.path.join( fitdir, "{}_{}_mergedfitparams.csv".format(today_YYYYMMDD(), uploadnamestr)) param_df['param_path'] = parampaths param_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) status_df = pd.DataFrame(statuses) status_df['statuspath'] = statuspaths status_gaiaids = list( map( lambda x: int( os.path.dirname(x).split('gaiatwo')[1].split('-')[0].lstrip( '0')), statuspaths)) status_df['source_id'] = status_gaiaids if is_dayspecific_exofop_upload: # # Manually commented candidates are the only ones we're uploading. # manual_comment_df = pd.read_csv( '/nfs/phtess2/ar0/TESS/PROJ/lbouma/cdips/data/exoFOP_uploads/{}_cdips_candidate_upload.csv' .format(today_YYYYMMDD()), sep=",") common = status_df.merge(manual_comment_df, on='source_id', how='inner') sel_status_df = status_df[status_df.source_id.isin(common.source_id)] # # WARN: the MCMC fits should have converged before uploading. # (20190918 had two exceptions, where the fit looked fine.) # if len(sel_status_df[sel_status_df['is_converged'] == 'False']) > 0: print('\nWRN! THE FOLLOWING CANDIDATES ARE NOT CONVERGED') print(sel_status_df[sel_status_df['is_converged'] == 'False']) param_gaiaids = list( map( lambda x: int( os.path.basename(x).split('gaiatwo')[1].split('-')[0]. lstrip('0')), parampaths)) param_df['source_id'] = param_gaiaids # # Require that you actually have a parameter file (...). # _df = sel_status_df.merge(param_df, on='source_id', how='inner') to_exofop_df = param_df[param_df.source_id.isin(_df.source_id)] if len(to_exofop_df) != len(manual_comment_df): print('\nWRN! {} CANDIDATES DID NOT HAVE PARAMETERS'.format( len(manual_comment_df) - len(to_exofop_df))) print('They are...') print(manual_comment_df[~manual_comment_df.source_id. isin(to_exofop_df.source_id)]) print('\n') # # Duplicate entries in "to_exofop_df" are multi-sector. Average their # parameters (really will end up just being durations) across sectors, # and then remove the duplicate multi-sector rows using the "groupby" # aggregator. This removes the string-based columns, which we can # reclaim by a "drop_duplicates" call, since they don't have # sector-specific information. Then, assign comments and format as # appropriate for ExoFop-TESS. Unique tag for the entire upload. # to_exofop_df['source_id'] = to_exofop_df['source_id'].astype(str) mean_val_to_exofop_df = to_exofop_df.groupby( 'target').mean().reset_index() string_cols = [ 'target', 'flag', 'disp', 'tag', 'group', 'notes', 'source_id' ] dup_dropped_str_df = (to_exofop_df.drop_duplicates( subset=['target'], keep='first', inplace=False)[string_cols]) out_df = mean_val_to_exofop_df.merge(dup_dropped_str_df, how='left', on='target') # # The above procedure got the epochs on multisector planets wrong. # Determine (t0,P) by fitting a line to entries with >=3 sectors # instead. For the two-sector case, due to bad covariance matrices, # just use the newest ephemeris. # multisector_df = (to_exofop_df[to_exofop_df.target.groupby( to_exofop_df.target).transform('value_counts') > 1]) u_multisector_df = out_df[out_df.target.isin(multisector_df.target)] # temporarily drop the multisector rows from out_df (they will be # re-merged) out_df = out_df.drop(np.argwhere( out_df.target.isin(multisector_df.target)).flatten(), axis=0) ephem_d = {} for ix, t in enumerate(np.unique(multisector_df.target)): sel = (multisector_df.target == t) tmid = nparr(multisector_df[sel].epoch) tmid_err = nparr(multisector_df[sel].epoch_unc) init_period = nparr(multisector_df[sel].period.mean()) E, init_t0 = get_epochs_given_midtimes_and_period(tmid, init_period, verbose=False) popt, pcov = curve_fit(linear_model, E, tmid, p0=(init_period, init_t0), sigma=tmid_err) if np.all(np.isinf(pcov)): # if least-squares doesn't give good error (i.e., just two # epochs), take the most recent epoch. s = np.argmax(tmid) use_t0 = tmid[s] use_t0_err = tmid_err[s] use_period = nparr(multisector_df[sel].period)[s] use_period_err = nparr(multisector_df[sel].period_unc)[s] else: use_t0 = popt[1] use_t0_err = pcov[1, 1]**0.5 use_period = popt[0] use_period_err = pcov[0, 0]**0.5 if DEBUG: print( 'init tmid {}, tmiderr {}\nperiod {}, perioderr {}'.format( tmid, tmid_err, nparr(multisector_df[sel].period), nparr(multisector_df[sel].period_unc))) print( 'use tmid {}, tmiderr {}\nperiod {}, perioderr {}'.format( use_t0, use_t0_err, use_period, use_period_err)) print(10 * '-') ephem_d[ix] = { 'target': t, 'epoch': use_t0, 'epoch_unc': use_t0_err, 'period': use_period, 'period_unc': use_period_err } ephem_df = pd.DataFrame(ephem_d).T mdf = ephem_df.merge(u_multisector_df, how='left', on='target', suffixes=('', '_DEPRECATED')) mdf = mdf.drop([c for c in mdf.columns if 'DEPRECATED' in c], axis=1, inplace=False) temp_df = out_df.append(mdf, ignore_index=True, sort=False) out_df = temp_df to_exofop_df = out_df[COLUMN_ORDER] # to_exofop_df = mdf[COLUMN_ORDER] # special behavior for 2020/02/07 fix # to_exofop_df['flag'] = 'newparams' _df = manual_comment_df[manual_comment_df.source_id.isin( to_exofop_df.source_id)] comments = list(_df['comment']) # comments = 'Fixed ephemeris bug. (Old epoch was erroneous).' # #2020/02/07 for c in comments: assert len(c) <= 119 to_exofop_df = to_exofop_df.sort_values(by="source_id") _df = _df.sort_values(by="source_id") to_exofop_df['notes'] = comments to_exofop_df['tag'] = ('{}_bouma_cdips-v01_00001'.format( today_YYYYMMDD())) istoi = ~to_exofop_df['target'].astype(str).str.startswith('TIC') if np.any(istoi): newtargetname = 'TOI' + to_exofop_df[istoi].target.astype(str) to_exofop_df.loc[istoi, 'target'] = newtargetname outpath = os.path.join( exofopdir, "{}_{}_w_sourceid.csv".format(today_YYYYMMDD(), uploadnamestr)) to_exofop_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) to_exofop_df = to_exofop_df.drop(['source_id'], axis=1) outpath = os.path.join( exofopdir, "params_planet_{}_001.txt".format(today_YYYYMMDD())) for c in ['epoch', 'epoch_unc', 'period', 'period_unc']: to_exofop_df[c] = to_exofop_df[c].astype(float) to_exofop_df = to_exofop_df.round(FORMATDICT) to_exofop_df['depth'] = to_exofop_df['depth'].astype(int) to_exofop_df['depth_unc'] = to_exofop_df['depth_unc'].astype(int) to_exofop_df.to_csv(outpath, index=False, sep='|', header=False) print('made {}'.format(outpath)) # manually check these... print('\n' + 42 * '=' + '\n') print('\nPeriod uncertainties [minutes]') print(to_exofop_df['period_unc'] * 24 * 60) print('\nEpoch uncertainties [minutes]') print(to_exofop_df['epoch_unc'] * 24 * 60) print('\nPlanet radii [Rearth]') print(to_exofop_df[['radius', 'radius_unc', 'notes']]) print('\n' + 42 * '=' + '\n') # # above is the format exofop-TESS wants. however it's not particularly # useful for followup. for that, we want: gaia IDs, magnitudes, ra, dec. # gaiaids = list( map( lambda x: int( os.path.basename(x).split('gaiatwo')[1].split('-')[0].lstrip( '0')), parampaths)) lcnames = list( map( lambda x: os.path.basename(x).replace('_fitparameters.csv', '.fits' ), parampaths)) lcdir = '/nfs/phtess2/ar0/TESS/PROJ/lbouma/CDIPS_LCS/sector-*/cam?_ccd?/' lcpaths = [glob(os.path.join(lcdir, lcn))[0] for lcn in lcnames] # now get the header values kwlist = [ 'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TESSMAG', 'Gaia-ID', 'TICID', 'TICTEFF', 'TICRAD', 'TICMASS' ] for k in kwlist: thislist = [] for l in lcpaths: thislist.append(iu.get_header_keyword(l, k, ext=0)) param_df[k] = np.array(thislist) # now search for stellar RV xmatch res = [ fr.get_rv_xmatch(ra, dec, G_mag=gmag, dr2_sourceid=s) for ra, dec, gmag, s in zip( list(param_df['RA_OBJ']), list(param_df['DEC_OBJ']), list(param_df['phot_g_mean_mag']), list(param_df['Gaia-ID'])) ] res = np.array(res) param_df['stellar_rv'] = res[:, 0] param_df['stellar_rv_unc'] = res[:, 1] param_df['stellar_rv_provenance'] = res[:, 2] # make column showing whether there are ESO spectra available res = [ fr.wrangle_eso_for_rv_availability(ra, dec) for ra, dec in zip(list(param_df['RA_OBJ']), list(param_df['DEC_OBJ'])) ] param_df['eso_rv_availability'] = nparr(res)[:, 2] # # try to get cluster RV. first from Soubiran, then from Kharchenko. # to do this, load in CDIPS target catalog. merging the CDCLSTER name # (comma-delimited string) against the target catalog on source identifiers # allows unique cluster name identification, since I already did that, # earlier. # cdips_df = ccl.get_cdips_pub_catalog(ver=cdipssource_vnum) dcols = 'cluster;reference;source_id;unique_cluster_name' ccdf = cdips_df[dcols.split(';')] ccdf['source_id'] = ccdf['source_id'].astype(np.int64) mdf = param_df.merge(ccdf, how='left', left_on='source_id', right_on='source_id') param_df['unique_cluster_name'] = nparr(mdf['unique_cluster_name']) s19 = gvc.get_soubiran_19_rv_table() k13_param = gvc.get_k13_param_table() c_rvs, c_err_rvs, c_rv_nstar, c_rv_prov = [], [], [], [] for ix, row in param_df.iterrows(): if row['unique_cluster_name'] in nparr(s19['ID']): sel = (s19['ID'] == row['unique_cluster_name']) c_rvs.append(float(s19[sel]['RV'].iloc[0])) c_err_rvs.append(float(s19[sel]['e_RV'].iloc[0])) c_rv_nstar.append(int(s19[sel]['Nsele'].iloc[0])) c_rv_prov.append('Soubiran+19') continue elif row['unique_cluster_name'] in nparr(k13_param['Name']): sel = (k13_param['Name'] == row['unique_cluster_name']) c_rvs.append(float(k13_param[sel]['RV'].iloc[0])) c_err_rvs.append(float(k13_param[sel]['e_RV'].iloc[0])) c_rv_nstar.append(int(k13_param[sel]['o_RV'].iloc[0])) c_rv_prov.append('Kharchenko+13') continue else: c_rvs.append(np.nan) c_err_rvs.append(np.nan) c_rv_nstar.append(np.nan) c_rv_prov.append('') param_df['cluster_rv'] = c_rvs param_df['cluster_err_rv'] = c_err_rvs param_df['cluster_rv_nstar'] = c_rv_nstar param_df['cluster_rv_provenance'] = c_rv_prov # # finally, begin writing the output # outpath = ("/home/lbouma/proj/cdips/results/fit_gold/" "{}_{}_fitparams_plus_observer_info.csv".format( today_YYYYMMDD(), uploadnamestr)) param_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) # # sparse observer info cut # scols = [ 'target', 'flag', 'disp', 'tag', 'group', 'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TICID', 'TESSMAG', 'TICTEFF', 'TICRAD', 'TICMASS', 'Gaia-ID' ] sparam_df = param_df[scols] outpath = ("/home/lbouma/proj/cdips/results/fit_gold/" "{}_{}_observer_info_sparse.csv".format(today_YYYYMMDD(), uploadnamestr)) sparam_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath)) # # full observer info cut # scols = [ 'target', 'flag', 'disp', 'tag', 'group', 'RA_OBJ', 'DEC_OBJ', 'CDIPSREF', 'CDCLSTER', 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 'TICID', 'TESSMAG', 'TICTEFF', 'TICRAD', 'TICMASS', 'Gaia-ID', 'period', 'period_unc', 'epoch', 'epoch_unc', 'depth', 'depth_unc', 'duration', 'duration_unc', 'radius', 'radius_unc', 'stellar_rv', 'stellar_rv_unc', 'stellar_rv_provenance', 'eso_rv_availability', 'cluster_rv', 'cluster_err_rv', 'cluster_rv_nstar', 'cluster_rv_provenance' ] sparam_df = param_df[scols] outpath = ("/home/lbouma/proj/cdips/results/fit_gold/" "{}_{}_observer_info_full.csv".format(today_YYYYMMDD(), uploadnamestr)) sparam_df.to_csv(outpath, index=False, sep='|') print('made {}'.format(outpath))