示例#1
0
def fitFlare(x, y, yerr, tstart, tstop, skew_fac=10):
    mask = (x > tstart) & (x < tstop)
    mu0 = (tstart + tstop) / 2
    sig0 = (tstop - tstart) / 2
    A0 = np.max(y) * 100
    skew = 0

    try:
        # Fit a gaussian to the segment
        popt1, pcov1 = curve_fit(fh.gaussian,
                                 x[mask],
                                 y[mask],
                                 p0=(mu0, sig0, A0),
                                 sigma=yerr[mask])
        y_model = fh.gaussian(x[mask], popt1[0], popt1[1], popt1[2])
        chi1 = fh.redChiSq(y_model, y[mask], yerr[mask], len(y[mask]) - 3)

        # Fit the Davenport 2014 flare model to the segment
        popt2, pcov2 = curve_fit(fh.aflare1,
                                 x[mask],
                                 y[mask],
                                 p0=(mu0, sig0, A0),
                                 sigma=yerr[mask])
        y_model = fh.aflare1(x[mask], popt2[0], popt2[1], popt2[2])
        chi2 = fh.redChiSq(y_model, y[mask], yerr[mask], len(y[mask]) - 3)

        # If the flare model fit worked, calculate the skew by centering on the peak of the aflare model
        # Use a window scaled to the FWHM of the flare model for integration
        mu = popt2[0]  #np.trapz(x[mask]*A*y[mask], x[mask])
        f_hwhm = popt2[1] / 2
        t1_skew, t2_skew = mu - skew_fac * f_hwhm, mu + skew_fac * f_hwhm
        skew_mask = (x > t1_skew) & (x < t2_skew)

        # Measure the skew by treating time = x and flux = p(x). Calculate the
        # third moment of p(x)
        A = 1 / np.trapz(y[skew_mask], x[skew_mask])
        var = np.trapz((x[skew_mask] - mu)**2 * A * y[skew_mask], x[skew_mask])
        stddev = np.sqrt(np.fabs(var))
        skew = np.trapz((x[skew_mask] - mu)**3 * A * y[skew_mask],
                        x[skew_mask]) / stddev**3
    except:
        traceback.print_exc()
        empty = np.zeros(3)
        return empty, empty, -1, empty, empty, -1, 0, 0

    n_pts = len(x[mask])
    n_pts_true = np.floor(((tstop - tstart) * u.d).to(u.min).value / 2)
    coverage = n_pts / n_pts_true

    return popt1, np.sqrt(pcov1.diagonal()), chi1, popt2, np.sqrt(
        pcov2.diagonal()), chi2, skew, coverage
示例#2
0
    # Now plot individual flares

    fig, axes = plt.subplots(figsize=(16, 16), nrows=4, ncols=4)

    for idx in range(len(flares)):
        fl = flares.iloc[idx]
        if idx > 15:
            break
        row_idx = idx // 4
        col_idx = idx % 4

        t0, t1 = fl['t0'], fl['t1']
        mask = (time >= t0) & (time < t1)
        axes[row_idx][col_idx].errorbar(time[mask],
                                        flux[mask] / median - smo[mask],
                                        error[mask] / median)

        xmodel = np.linspace(t0, t1)
        ymodel = fh.aflare1(xmodel, fl['tpeak'], fl['fwhm'], fl['f_amp'])
        axes[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{f}$ = ' + '{:.3f}'.format(fl['f_chisq']) \
                                    + '\n FWHM/window = ' + '{:.2f}'.format(fl['f_fwhm_win']))
        ymodel = fh.gaussian(xmodel, fl['mu'], fl['std'], fl['g_amp'])
        axes[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{g}$ = ' + '{:.3f}'.format(fl['g_chisq']) \
                                    + '\n FWHM/window = ' + '{:.2f}'.format(fl['g_fwhm_win']))
        axes[row_idx][col_idx].legend()
        axes[row_idx][col_idx].set_title('Skew = ' +
                                         '{:.3f}'.format(fl['skew']))

    fig.savefig(plots_path + file + '_flares.png', format='png')
    plt.close('all')
示例#3
0
def examineFlare(file, df, df_param, path, t0_list=[]):
    if len(t0_list) > 0:
        flares = df[np.isin(df['t0'], t0_list)]
    else:
        flares = df[df['file'] == file]
    par = df_param[df_param['file'] == file].iloc[0]

    median = par['med']

    # First plot the LC with the CPA points overlaid

    with fits.open(path + file, mode='readonly') as hdulist:
        tess_bjd = hdulist[1].data['TIME']
        quality = hdulist[1].data['QUALITY']
        pdcsap_flux = hdulist[1].data['PDCSAP_FLUX']
        pdcsap_flux_error = hdulist[1].data['PDCSAP_FLUX_ERR']

    fig, axes = plt.subplots(figsize=(16, 4))
    axes.plot(tess_bjd, pdcsap_flux, zorder=1)

    ok_cut = (quality == 0) & (~np.isnan(tess_bjd)) & (~np.isnan(pdcsap_flux)) \
                  & (~np.isnan(pdcsap_flux_error))

    time = tess_bjd[ok_cut]
    flux = pdcsap_flux[ok_cut]
    error = pdcsap_flux_error[ok_cut]

    time_smo, smo, var = np.loadtxt(path + 'gp/' + file + '.gp')
    smo_int = np.interp(time, time_smo, smo)

    for idx in range(len(flares)):
        fl = flares.iloc[idx]
        t0, t1 = fl['t0'], fl['t1']
        mask = (time >= t0) & (time < t1)
        axes.scatter(time[mask], flux[mask], zorder=2)

    axes.set_xlabel('Time [BJD - 2457000, days]')
    axes.set_ylabel('Flux [e-/s]')
    fig.savefig('lc.png', format='png')

    # Now plot individual flares

    fig, axes = plt.subplots(figsize=(16, 16), nrows=4, ncols=4)

    for idx in range(len(flares)):
        fl = flares.iloc[idx]
        if idx > 15:
            break
        row_idx = idx // 4
        col_idx = idx % 4

        t0, t1 = fl['t0'], fl['t1']
        mask = (time >= t0) & (time < t1)
        axes[row_idx][col_idx].errorbar(time[mask],
                                        flux[mask] / median - smo_int[mask],
                                        error[mask] / median)

        xmodel = np.linspace(t0, t1)
        ymodel = fh.aflare1(xmodel, fl['tpeak'], fl['fwhm'], fl['f_amp'])
        axes[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{f}$ = ' + '{:.3f}'.format(fl['f_chisq']) \
                                       + '\n FWHM/window = ' + '{:.2f}'.format(fl['f_fwhm_win']))
        ymodel = fh.gaussian(xmodel, fl['mu'], fl['std'], fl['g_amp'])
        axes[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{g}$ = ' + '{:.3f}'.format(fl['g_chisq']) \
                                       + '\n FWHM/window = ' + '{:.2f}'.format(fl['g_fwhm_win']))
        axes[row_idx][col_idx].legend()
        axes[row_idx][col_idx].set_title('Skew = ' +
                                         '{:.3f}'.format(fl['skew']))
示例#4
0
def procFlares(prefix,
               filenames,
               path,
               clobberGP=False,
               makePlots=False,
               writeLog=True):
    if makePlots:
        plots_path = path + 'plots/'
        if not os.path.exists(plots_path):
            os.makedirs(plots_path)

    gp_path = path + 'gp/'

    #if not os.path.exists(gp_path):
    #os.makedirs(gp_path)

    log_path = path + 'log/'

    #if not os.path.exists(log_path):
    #os.makedirs(log_path)

    if writeLog:
        if os.path.exists(log_path + prefix + '.log'):
            os.remove(log_path + prefix + '.log')

    # Columns for flare table
    FL_files = np.array([])
    FL_TICs = np.array([])
    FL_id = np.array([])
    FL_t0 = np.array([])
    FL_t1 = np.array([])
    FL_f0 = np.array([])
    FL_f1 = np.array([])
    FL_ed = np.array([])
    FL_ed_err = np.array([])
    FL_skew = np.array([])
    FL_cover = np.array([])
    FL_mu = np.array([])
    FL_std = np.array([])
    FL_g_amp = np.array([])
    FL_mu_err = np.array([])
    FL_std_err = np.array([])
    FL_g_amp_err = np.array([])
    FL_tpeak = np.array([])
    FL_fwhm = np.array([])
    FL_f_amp = np.array([])
    FL_tpeak_err = np.array([])
    FL_fwhm_err = np.array([])
    FL_f_amp_err = np.array([])
    FL_g_chisq = np.array([])
    FL_f_chisq = np.array([])
    FL_g_fwhm_win = np.array([])
    FL_f_fwhm_win = np.array([])

    # Columns for param table
    P_median = np.array([])
    P_s_window = np.array([])
    P_acf_1dt = np.array([])
    P_acf_amp = np.array([])

    failed_files = []

    for k in range(len(filenames)):
        start_time = timing.time()
        filename = filenames[k]
        TIC = int(filename.split('-')[-3])
        file = path + filename

        if makePlots:
            fig, axes = plt.subplots(figsize=(16, 16), nrows=4, sharex=True)

        print('Processing ' + filename)
        gp_data_file = gp_path + filename + '.gp'
        gp_param_file = gp_path + filename + '.gp.par'
        median = -1
        s_window = -1
        acf_1dt = -1
        acf_amp = -1
        with fits.open(file, mode='readonly') as hdulist:
            try:
                tess_bjd = hdulist[1].data['TIME']
                quality = hdulist[1].data['QUALITY']
                pdcsap_flux = hdulist[1].data['PDCSAP_FLUX']
                pdcsap_flux_error = hdulist[1].data['PDCSAP_FLUX_ERR']
            except:
                P_median = np.append(P_median, median)
                P_s_window = np.append(P_s_window, s_window)
                P_acf_1dt = np.append(P_acf_1dt, acf_1dt)
                P_acf_amp = np.append(P_acf_amp, acf_amp)
                failed_files.append(filename)
                np.savetxt(gp_data_file, ([]))
                print('Reading file ' + filename + ' failed')
                continue

        if makePlots:
            axes[0].plot(tess_bjd, pdcsap_flux)

        # Cut out poor quality points
        ok_cut = (quality == 0) & (~np.isnan(tess_bjd)) & (~np.isnan(pdcsap_flux))\
                  & (~np.isnan(pdcsap_flux_error))

        tbl = Table([tess_bjd[ok_cut], pdcsap_flux[ok_cut], \
                  pdcsap_flux_error[ok_cut]],
                     names=('TIME', 'PDCSAP_FLUX', 'PDCSAP_FLUX_ERR'))
        df_tbl = tbl.to_pandas()

        median = np.nanmedian(df_tbl['PDCSAP_FLUX'])

        # Estimate the period of the LC with autocorrelation
        acf = fh.autocorr_estimator(tbl['TIME'], tbl['PDCSAP_FLUX']/median, \
                                    yerr=tbl['PDCSAP_FLUX_ERR']/median,
                                    min_period=0.1, max_period=27, max_peaks=2)
        if len(acf['peaks']) > 0:
            acf_1dt = acf['peaks'][0]['period']
            acf_amp = acf['autocorr'][1][np.where(
                acf['autocorr'][0] == acf_1dt)]
            mask = np.where(
                (acf['autocorr'][0] == acf['peaks'][0]['period']))[0]
            acf_1pk = acf['autocorr'][1][mask][0]
            s_window = int(acf_1dt /
                           np.fabs(np.nanmedian(np.diff(df_tbl['TIME']))) / 6)
        else:
            acf_1dt = (tbl['TIME'][-1] - tbl['TIME'][0]) / 2
            acf_amp = 0
            s_window = 128

        P_median = np.append(P_median, median)
        P_s_window = np.append(P_s_window, s_window)
        P_acf_1dt = np.append(P_acf_1dt, acf_1dt)
        P_acf_amp = np.append(P_acf_amp, acf_amp)

        # Run GP fit on the lightcurve if we haven't already
        if os.path.exists(gp_data_file) and not clobberGP:
            # Failed GP regression will produce an empty file
            if os.path.getsize(gp_data_file) == 0:
                print(file + ' failed (previously) during GP regression')
                failed_files.append(filename)
                continue

            print('GP file already exists, loading...')
            times, smo, var = np.loadtxt(gp_data_file)
        else:
            smo = np.zeros(len(df_tbl['TIME']))
            try:
                if makePlots:
                    ax = axes[1]
                else:
                    ax = None
                times, smo, var, params = iterGP_rotation(df_tbl['TIME'].values, df_tbl['PDCSAP_FLUX'].values/median, \
                                          df_tbl['PDCSAP_FLUX_ERR'].values/median, acf_1dt, acf_1pk, ax=ax)

                #np.savetxt(gp_param_file, params['logs2'], params['logamp'], params['logperiod'], \
                #           params['logq0'], params['logdeltaq'], params['mix'], params['period'])
                np.savetxt(gp_param_file, params)
                np.savetxt(gp_data_file, (times, smo, var))

            except:
                traceback.print_exc()
                failed_files.append(filename)
                np.savetxt(gp_data_file, ([]))
                print(filename + ' failed during GP fitting')
                continue

        # The GP is produced from a downsampled lightcurve. Need to interpolate to
        # compare GP and full LC

        smo_int = np.interp(tbl['TIME'], times, smo)

        # Search for flares in the smoothed lightcurve
        x = np.array(tbl['TIME'])
        y = np.array(tbl['PDCSAP_FLUX'] / median - smo_int)
        yerr = np.array(tbl['PDCSAP_FLUX_ERR'] / median)

        FL = fh.FINDflare(y,
                          yerr,
                          avg_std=True,
                          std_window=s_window,
                          N1=3,
                          N2=1,
                          N3=3)

        if makePlots:
            axes[3].plot(x, y, zorder=1)
            for j in range(len(FL[0])):
                s1, s2 = FL[0][j], FL[1][j] + 1
                axes[3].scatter(x[s1:s2], y[s1:s2], zorder=2)

        # Measure properties of detected flares
        if makePlots:
            fig_fl, axes_fl = plt.subplots(figsize=(16, 16), nrows=4, ncols=4)

        for j in range(len(FL[0])):
            s1, s2 = FL[0][j], FL[1][j] + 1
            tstart, tstop = x[s1], x[s2]
            dx_fac = 10
            dx = tstop - tstart
            x1 = tstart - dx * dx_fac / 2
            x2 = tstop + dx * dx_fac / 2
            mask = (x > x1) & (x < x2)

            # Mask out other flare detections when fitting models
            other_mask = np.ones(len(x), dtype=bool)
            for i in range(len(FL[0])):
                s1other, s2other = FL[0][i], FL[1][i] + 1
                if i == j:
                    continue
                other_mask[s1other:s2other] = 0

            popt1, pstd1, g_chisq, popt2, pstd2, f_chisq, skew, cover = \
                fitFlare(x[other_mask], y[other_mask], yerr[other_mask], x1, x2)

            mu, std, g_amp = popt1[0], popt1[1], popt1[2]
            mu_err, std_err, g_amp_err = pstd1[0], pstd1[1], pstd1[2]

            tpeak, fwhm, f_amp = popt2[0], popt2[1], popt2[2]
            tpeak_err, fwhm_err, f_amp_err = pstd2[0], pstd2[1], pstd2[2]

            f_fwhm_win = fwhm / (x2 - x1)
            g_fwhm_win = std / (x2 - x1)

            ed, ed_err = measureED(x, y, yerr, tpeak, fwhm)

            FL_files = np.append(FL_files, filename)
            FL_TICs = np.append(FL_TICs, TIC)
            FL_t0 = np.append(FL_t0, x1)
            FL_t1 = np.append(FL_t1, x2)
            FL_f0 = np.append(FL_f0, np.nanmedian(tbl['PDCSAP_FLUX'][s1:s2]))
            FL_f1 = np.append(FL_f1, np.nanmax(tbl['PDCSAP_FLUX'][s1:s2]))
            FL_ed = np.append(FL_ed, ed)
            FL_ed_err = np.append(FL_ed_err, ed_err)

            FL_skew = np.append(FL_skew, skew)
            FL_cover = np.append(FL_cover, cover)
            FL_mu = np.append(FL_mu, mu)
            FL_std = np.append(FL_std, std)
            FL_g_amp = np.append(FL_g_amp, g_amp)
            FL_mu_err = np.append(FL_mu_err, mu_err)
            FL_std_err = np.append(FL_std_err, std_err)
            FL_g_amp_err = np.append(FL_g_amp_err, g_amp_err)

            FL_tpeak = np.append(FL_tpeak, tpeak)
            FL_fwhm = np.append(FL_fwhm, fwhm)
            FL_f_amp = np.append(FL_f_amp, f_amp)
            FL_tpeak_err = np.append(FL_tpeak_err, tpeak_err)
            FL_fwhm_err = np.append(FL_fwhm_err, fwhm_err)
            FL_f_amp_err = np.append(FL_f_amp_err, f_amp_err)

            FL_g_chisq = np.append(FL_g_chisq, g_chisq)
            FL_f_chisq = np.append(FL_f_chisq, f_chisq)

            FL_g_fwhm_win = np.append(FL_g_fwhm_win, g_fwhm_win)
            FL_f_fwhm_win = np.append(FL_f_fwhm_win, f_fwhm_win)

            if makePlots and j < 15:
                row_idx = j // 4
                col_idx = j % 4
                axes_fl[row_idx][col_idx].errorbar(x[mask],
                                                   y[mask],
                                                   yerr=yerr[mask])
                axes_fl[row_idx][col_idx].scatter(x[s1:s2], y[s1:s2])

                if popt1[0] > 0:
                    xmodel = np.linspace(x1, x2)
                    ymodel = fh.aflare1(xmodel, tpeak, fwhm, f_amp)
                    axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{f}$ = ' + '{:.3f}'.format(f_chisq) \
                                                + '\n FWHM/window = ' + '{:.2f}'.format(f_fwhm_win))
                    ymodel = fh.gaussian(xmodel, mu, std, g_amp)
                    axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{g}$ = ' + '{:.3f}'.format(g_chisq) \
                                                + '\n FWHM/window = ' + '{:.2f}'.format(g_fwhm_win))
                    axes_fl[row_idx][col_idx].axvline(tpeak - fwhm / 2,
                                                      linestyle='--')
                    axes_fl[row_idx][col_idx].axvline(tpeak + fwhm / 2,
                                                      linestyle='--')
                    axes_fl[row_idx][col_idx].legend()
                    axes_fl[row_idx][col_idx].set_title('Skew = ' +
                                                        '{:.3f}'.format(skew))

        if makePlots:
            fig.suptitle(filename)
            axes[0].set_xlabel('Time [BJD - 2457000, days]')
            axes[0].set_ylabel('Flux [e-/s]')
            axes[1].set_xlabel('Time [BJD - 2457000, days]')
            axes[1].set_ylabel('Normalized Flux')
            axes[2].set_xlabel('Time [BJD - 2457000, days]')
            axes[2].set_ylabel('Rolling STD of GP')
            axes[3].set_xlabel('Time [BJD - 2457000, days]')
            axes[3].set_ylabel('Normalized Flux - GP')
            fig.savefig(plots_path + filename + '.png', format='png')

            if len(FL[0] > 0):
                fig_fl.suptitle(filename)
                fig_fl.savefig(plots_path + filename + '_flares.png',
                               format='png')

            plt.clf()

        if writeLog:
            with open(log_path + prefix + '.log', 'a') as f:
                time_elapsed = timing.time() - start_time
                num_flares = len(FL[0])

                f.write('{:^15}'.format(str(k+1) + '/' + str(len(filenames))) + \
                        '{:<60}'.format(filename) + '{:<20}'.format(time_elapsed) + \
                        '{:<10}'.format(num_flares) + '\n')

        # Periodically write to the flare table file and param table file
        l = k + 1
        ALL_TIC = pd.Series(filenames).str.split(
            '-', expand=True).iloc[:, -3].astype('int')
        ALL_FILES = pd.Series(filenames).str.split('/', expand=True).iloc[:,
                                                                          -1]

        flare_out = pd.DataFrame(data={'file':FL_files,'TIC':FL_TICs, 't0':FL_t0, 't1':FL_t1, \
                                    'med_flux':FL_f0, 'peak_flux':FL_f1, 'ed':FL_ed, \
                                    'ed_err':FL_ed_err, 'skew':FL_skew, 'cover':FL_cover, \
                                    'mu':FL_mu, 'std':FL_std, 'g_amp': FL_g_amp, 'mu_err':FL_mu_err, \
                                    'std_err':FL_std_err, 'g_amp_err':FL_g_amp_err,'tpeak':FL_tpeak, \
                                    'fwhm':FL_fwhm, 'f_amp':FL_f_amp, 'tpeak_err':FL_tpeak_err, \
                                    'fwhm_err':FL_fwhm_err, 'f_amp_err':FL_f_amp_err,'f_chisq':FL_f_chisq, \
                                    'g_chisq':FL_g_chisq, 'f_fwhm_win':FL_f_fwhm_win, 'g_fwhm_win':FL_g_fwhm_win})
        flare_out.to_csv(log_path + prefix + '_flare_out.csv', index=False)

        param_out = pd.DataFrame(data={'file':ALL_FILES[:l], 'TIC':ALL_TIC[:l], 'med':P_median[:l], \
                                    's_window':P_s_window[:l], 'acf_1dt':P_acf_1dt[:l], 'acf_amp':P_acf_amp[:l]})
        param_out.to_csv(log_path + prefix + '_param_out.csv', index=False)

    for k in range(len(failed_files)):
        print(failed_files[k])