Exemplo n.º 1
0
def test_fivetransitparam_fit_magseries_easy():
    """
    Fit one TESS sector of data, with a HJ (candidate) in it, for a transit
    model (t0, period, incl, sma, rp/star) with believable error bars.
    """

    # path and identifier for GaiaDR2 3007171311355035136
    lcpath = LCPATHS[0]
    identifier = str(lcpath.split('gaiatwo')[1].split('-')[0].lstrip('0'))

    lc = iu.get_data_keyword_list(lcpath, ['TMID_BJD', 'TFA2'])
    hdr = iu.get_header_keyword_list(lcpath, ['TICTEFF', 'TICRAD', 'TICLOGG'])

    time = lc['TMID_BJD']
    mag = lc['TFA2']
    mag_0, f_0 = 12, 1e4
    flux = f_0 * 10**(-0.4 * (mag - mag_0))
    flux /= np.nanmedian(flux)
    err = np.ones_like(flux) * 1e-4

    teff = hdr['TICTEFF']
    rstar = hdr['TICRAD']
    logg = hdr['TICLOGG']

    fit_savdir = os.path.join(os.getcwd(), 'fivetransitparam_results')
    chain_savdir = os.path.join(os.getcwd(), 'fivetransitparam_chains')

    mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
        time,
        flux,
        err,
        teff,
        rstar,
        logg,
        identifier,
        fit_savdir,
        chain_savdir,
        n_mcmc_steps=4000,
        overwriteexistingsamples=False,
        n_transit_durations=5,
        make_tlsfit_plot=True,
        exp_time_minutes=30,
        bandpass='******',
        magsarefluxes=True,
        nworkers=multiprocessing.cpu_count())

    assert is_converged
    assert_allclose(tlsr['period'], 3.495, atol=1e-2)
    assert_allclose(mafr['fitinfo']['finalparams']['period'], 3.495, atol=1e-2)

    # theoretical t0 for this data (like 5 or 6 transits, over 1 TESS sector)
    # is 3.99e+00 min = 6.64e-02 h = 2.77e-03 days.
    assert mafr['fitinfo']['finalparamerrs']['std_perrs']['t0'] < 5e-3
    assert mafr['fitinfo']['finalparamerrs']['std_merrs']['t0'] < 5e-3

    # guess-timate period should be better than 3 minutes too.
    assert mafr['fitinfo']['finalparamerrs']['std_perrs']['period'] < 3 / (24 *
                                                                           60)
    assert mafr['fitinfo']['finalparamerrs']['std_merrs']['period'] < 3 / (24 *
                                                                           60)
Exemplo n.º 2
0
def test_fivetransitparam_fit_magseries_hard():
    """
    Fit one TESS sector of data, with a hard WJ (candidate) in it, for a
    transit model (t0, period, incl, sma, rp/star) with believable error bars.
    """

    # path and identifier for GaiaDR2 4827527233363019776
    lcpath = LCPATHS[1]
    identifier = str(lcpath.split('gaiatwo')[1].split('-')[0].lstrip('0'))

    lc = iu.get_data_keyword_list(lcpath, ['TMID_BJD', 'TFA2'])
    hdr = iu.get_header_keyword_list(lcpath, ['TICTEFF', 'TICRAD', 'TICLOGG'])

    time = lc['TMID_BJD']
    mag = lc['TFA2']
    mag_0, f_0 = 12, 1e4
    flux = f_0 * 10**(-0.4 * (mag - mag_0))
    flux /= np.nanmedian(flux)
    err = np.ones_like(flux) * 1e-4

    teff = hdr['TICTEFF']
    rstar = hdr['TICRAD']
    logg = hdr['TICLOGG']

    fit_savdir = os.path.join(os.getcwd(), 'fivetransitparam_results')
    chain_savdir = os.path.join(os.getcwd(), 'fivetransitparam_chains')

    # Autocorrelation time is like 400 steps for this case. But it does
    # converge.
    mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
        time,
        flux,
        err,
        teff,
        rstar,
        logg,
        identifier,
        fit_savdir,
        chain_savdir,
        n_mcmc_steps=25000,
        overwriteexistingsamples=False,
        n_transit_durations=5,
        make_tlsfit_plot=True,
        exp_time_minutes=30,
        bandpass='******',
        magsarefluxes=True,
        nworkers=32)

    print(is_converged)
    print(tlsr['period'])
    print(mafr['fitinfo']['finalparams']['period'])

    assert is_converged
    assert_allclose(tlsr['period'], 10.714, atol=1e-2)
    assert_allclose(mafr['fitinfo']['finalparams']['period'],
                    10.714,
                    atol=1e-2)

    # exofopTESS quotes t0 for TOI-450 to <1 minute (multisector). < 10 minutes
    # required.
    assert mafr['fitinfo']['finalparamerrs']['std_perrs']['t0'] < 10 / (24 *
                                                                        60)
    assert mafr['fitinfo']['finalparamerrs']['std_merrs']['t0'] < 10 / (24 *
                                                                        60)

    # guess-timate period should be better than say 10 minutes.
    assert mafr['fitinfo']['finalparamerrs']['std_perrs']['period'] < 10 / (
        24 * 60)
    assert mafr['fitinfo']['finalparamerrs']['std_merrs']['period'] < 10 / (
        24 * 60)
Exemplo n.º 3
0
def test_multithread_speed(nworkers=20):
    """
    Ensure that increasing nworkers speeds up the MCMC sampling.

    2019/08/15: this test fails.

    Assumption is ideally that run time goes as 1/nworkers. We are a bit nicer
    here, and take out a factor of two for overhead. Even this fails, because
    the emcee multithread scaling in lcfit/transits.mandelagol_fit_magseries is
    non-existent.
    """
    # NOTE: this test fails, because something is wrong with the emcee
    # multithreading in lcfit/transits.py. (This is an issue that would be nice
    # to resolve -- though for the time being not "mission-critical")

    # path and identifier for GaiaDR2 3007171311355035136, the nice HJ.
    lcpath = LCPATHS[0]
    identifier = str(lcpath.split('gaiatwo')[1].split('-')[0].lstrip('0'))

    hdul = fits.open(lcpath)
    hdr, lc = hdul[0].header, hdul[1].data
    hdul.close()

    time = lc['TMID_BJD']
    mag = lc['TFA2']
    mag_0, f_0 = 12, 1e4
    flux = f_0 * 10**(-0.4 * (mag - mag_0))
    flux /= np.nanmedian(flux)
    err = np.ones_like(flux) * 1e-4

    teff = hdr['TICTEFF']
    rstar = hdr['TICRAD']
    logg = hdr['TICLOGG']

    fit_savdir = os.path.join(os.getcwd(),
                              'fivetransitparam_results_single_thread')
    chain_savdir = os.path.join(os.getcwd(),
                                'fivetransitparam_chains_single_thread')

    start = _time.time()
    mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
        time,
        flux,
        err,
        teff,
        rstar,
        logg,
        identifier,
        fit_savdir,
        chain_savdir,
        n_mcmc_steps=1000,
        overwriteexistingsamples=True,
        n_transit_durations=5,
        make_tlsfit_plot=True,
        exp_time_minutes=30,
        bandpass='******',
        magsarefluxes=True,
        nworkers=1)
    end_singlethread = _time.time()

    fit_savdir = os.path.join(os.getcwd(),
                              'fivetransitparam_results_manythread')
    chain_savdir = os.path.join(os.getcwd(),
                                'fivetransitparam_chains_manythread')

    mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
        time,
        flux,
        err,
        teff,
        rstar,
        logg,
        identifier,
        fit_savdir,
        chain_savdir,
        n_mcmc_steps=1000,
        overwriteexistingsamples=True,
        n_transit_durations=5,
        make_tlsfit_plot=True,
        exp_time_minutes=30,
        bandpass='******',
        magsarefluxes=True,
        nworkers=nworkers)
    end_multithread = _time.time()

    multithread_time = end_multithread - end_singlethread
    singlethread_time = end_singlethread - start

    print("Singlethread took {0:.1f} seconds".format(singlethread_time))
    print("Multithreaded took {0:.1f} seconds with {} workers".format(
        multithread_time, nworkers))

    print("{0:.1f} times faster than serial".format(singlethread_time /
                                                    multithread_time))

    # passes, but mainly b/c of the overhead from TLS
    assert multithread_time < singlethread_time

    # fails
    assert multithread_time < singlethread_time / (0.5 * nworkers)
Exemplo n.º 4
0
def _fit_transit_model_single_sector(tfa_sr_path,
                                     lcpath,
                                     outpath,
                                     mdf,
                                     source_id,
                                     supprow,
                                     suppfulldf,
                                     pfdf,
                                     pfrow,
                                     toidf,
                                     ctoidf,
                                     sector,
                                     nworkers,
                                     cdipsvnum=1,
                                     overwrite=1):
    try_mcmc = True
    identifier = source_id
    #
    # read and re-detrend lc if needed. (recall: these planet candidates were
    # found using a penalized spline detrending in most cases).
    #
    hdul_sr = fits.open(tfa_sr_path)
    hdul = fits.open(lcpath)

    lc_sr = hdul_sr[1].data
    lc, hdr = hdul[1].data, hdul[0].header

    # FIXME: logic needs updating in >=S14 processing
    raise NotImplementedError
    is_pspline_dtr = bool(pfrow['pspline_detrended'].iloc[0])

    fluxap = 'IRM2' if is_pspline_dtr else 'TFASR2'

    time, mag = lc_sr['TMID_BJD'], lc_sr[fluxap]
    try:
        time, mag = moe.mask_orbit_start_and_end(time,
                                                 mag,
                                                 raise_expectation_error=False)
    except AssertionError:
        raise AssertionError(
            'moe.mask_orbit_start_and_end failed for {}'.format(tfa_sr_path))

    flux = vp._given_mag_get_flux(mag)
    err = np.ones_like(flux) * 1e-4

    time, flux, err = sigclip_magseries(time,
                                        flux,
                                        err,
                                        magsarefluxes=True,
                                        sigclip=[50, 5])

    if is_pspline_dtr or identifier in KNOWN_EXTRA_DETREND:
        flux, _ = dtr.detrend_flux(time, flux)

    if identifier in KNOWN_EXTRA_DETREND:
        fit_savdir = os.path.dirname(outpath)
        dtrpath = os.path.join(fit_savdir, 'extra_detrend_lc.png')

        if not os.path.exists(dtrpath):
            plt.close('all')
            f, ax = plt.subplots(figsize=(6, 3))
            ax.scatter(time,
                       flux,
                       c='black',
                       alpha=0.9,
                       zorder=2,
                       s=8,
                       rasterized=True,
                       linewidths=0)
            ax.set_xlabel('bjdtdb')
            ax.set_ylabel('detrended flux')
            f.savefig(dtrpath, bbox_inches='tight')
            raise AssertionError(
                'U NEED TO MANUALLY LOOK AT {} AND VERIFY ITS OK'.format(
                    dtrpath))
        else:
            print('WRN! found {}. continuing to fit.'.format(dtrpath))

    #
    # define the paths. get the stellar parameters, and do the fit!
    #
    fit_savdir = os.path.dirname(outpath)
    chain_savdir = os.path.dirname(outpath).replace('fitresults', 'samples')

    try:
        teff, teff_err, rstar, rstar_err, logg, logg_err = (
            get_teff_rstar_logg(hdr))
    except (NotImplementedError, ValueError) as e:
        print(e)
        print('did not get rstar for {}. MUST MANUALLY FIX.'.format(source_id))
        try_mcmc = False

    #
    # initialize status file
    #
    status_file = os.path.join(fit_savdir, 'run_status.stat')
    fittype = 'fivetransitparam_fit'
    if not os.path.exists(status_file):
        save_status(status_file, fittype, {
            'is_converged': False,
            'n_steps_run': 0
        })
    status = load_status(status_file)[fittype]

    #
    # if not converged and no steps previously run:
    #   run 4k steps. write status file.
    #
    # reload status file.
    # if not converged and 4k steps previously run and in long ID list:
    #   run 25k steps, write status file.
    #
    # reload status file.
    # if not converged:
    #   print a warning.
    #
    if identifier in KNOWN_MCMC_FAILS:
        print('WRN! identifier {} requires manual fixing.'.format(identifier))
        try_mcmc = False

    if (not str2bool(status['is_converged'])
            and int(status['n_steps_run']) == 0 and try_mcmc):

        n_mcmc_steps = 4000

        mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
            time,
            flux,
            err,
            teff,
            rstar,
            logg,
            identifier,
            fit_savdir,
            chain_savdir,
            n_mcmc_steps=n_mcmc_steps,
            overwriteexistingsamples=False,
            n_transit_durations=5,
            make_tlsfit_plot=True,
            exp_time_minutes=30,
            bandpass='******',
            magsarefluxes=True,
            nworkers=nworkers)

        status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps}
        save_status(status_file, fittype, status)

    status = load_status(status_file)[fittype]
    if (not str2bool(status['is_converged'])
            and int(status['n_steps_run']) != 25000
            and int(identifier) in LONG_RUN_IDENTIFIERS and try_mcmc):

        n_mcmc_steps = 25000

        # NOTE: hard-code nworkers, since we dont get multithreading
        # improvement anyway (this is some kind of bug)
        mafr, tlsr, is_converged = fivetransitparam_fit_magseries(
            time,
            flux,
            err,
            teff,
            rstar,
            logg,
            identifier,
            fit_savdir,
            chain_savdir,
            n_mcmc_steps=n_mcmc_steps,
            overwriteexistingsamples=True,
            n_transit_durations=5,
            make_tlsfit_plot=True,
            exp_time_minutes=30,
            bandpass='******',
            magsarefluxes=True,
            nworkers=4)

        status = {'is_converged': is_converged, 'n_steps_run': n_mcmc_steps}
        save_status(status_file, fittype, status)

    #
    # if converged or in the list of IDs for which its fine to skip convegence
    # (because by-eye, the fits are converged), convert fit results to ctoi csv
    # format
    #
    status = load_status(status_file)[fittype]

    if (str2bool(status['is_converged'])
            or int(identifier) in SKIP_CONVERGENCE_IDENTIFIERS):

        try:
            _ = isinstance(mafr, dict)
        except UnboundLocalError:
            #
            # get the MCMC results from the pickle file; regenerate the TLS
            # result.
            #

            fitparamdir = os.path.dirname(status_file)
            fitpklsavpath = os.path.join(
                fitparamdir,
                '{}_phased_fivetransitparam_fit_empiricalerrs.pickle'.format(
                    identifier))
            with open(fitpklsavpath, 'rb') as f:
                mafr = pickle.load(f)

            tlsp = htls.tls_parallel_pfind(time,
                                           flux,
                                           err,
                                           magsarefluxes=True,
                                           tls_rstar_min=0.1,
                                           tls_rstar_max=10,
                                           tls_mstar_min=0.1,
                                           tls_mstar_max=5.0,
                                           tls_oversample=8,
                                           tls_mintransits=1,
                                           tls_transit_template='default',
                                           nbestpeaks=5,
                                           sigclip=None,
                                           nworkers=nworkers)
            tlsr = tlsp['tlsresult']

        ticid = int(hdr['TICID'])
        ra, dec = hdr['RA_OBJ'], hdr['DEC_OBJ']
        print('{} converged. writing ctoi csv.'.format(identifier))
        fit_results_to_ctoi_csv(ticid,
                                ra,
                                dec,
                                mafr,
                                tlsr,
                                outpath,
                                toidf,
                                ctoidf,
                                teff,
                                teff_err,
                                rstar,
                                rstar_err,
                                logg,
                                logg_err,
                                cdipsvnum=cdipsvnum)
    else:
        print('WRN! {} did not converge, after {} steps. MUST MANUALLY FIX.'.
              format(identifier, status['n_steps_run']))