from multiprocessing import Pool
import os
import contextlib

os.environ["OMP_NUM_THREADS"] = "1"

from multiprocessing import cpu_count
ncpu = cpu_count()
print("{0} CPUs".format(ncpu))

if __name__ == '__main__':
    with contextlib.closing(Pool(processes=8)) as pool:
        tstart = time.time()  # time it
        out = fitting.run_emcee_sampler(lnprobfn,
                                        initial_center,
                                        model,
                                        pool=pool,
                                        hdf5=hfile,
                                        **run_params)
        esampler, burn_loc0, burn_prob0 = out
        edur = time.time() - tstart

sys.stdout = fout

print('done emcee in {0}s'.format(edur))

write_results.write_hdf5(hfile,
                         run_params,
                         model,
                         obs,
                         esampler,
                         guesses,
Exemplo n.º 2
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--prefix',
                        type=str,
                        default='redmapper_sdssphot',
                        help='String to prepend to I/O files.')
    parser.add_argument('--nthreads',
                        type=int,
                        default=16,
                        help='Number of cores to use concurrently.')
    parser.add_argument('--first',
                        type=int,
                        default=0,
                        help='Index of first object to fit.')
    parser.add_argument('--last',
                        type=int,
                        default=None,
                        help='Index of last object to fit.')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='Random number seed.')
    parser.add_argument('--build-sample',
                        action='store_true',
                        help='Build the sample.')
    parser.add_argument('--dofit', action='store_true', help='Run prospector!')
    parser.add_argument(
        '--refit',
        action='store_true',
        help='Refit even if the prospector output files exist.')
    parser.add_argument('--qaplots',
                        action='store_true',
                        help='Make some neat plots.')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Be loquacious.')
    args = parser.parse_args()

    # Specify the run parameters and initialize the SPS object.
    run_params = {
        'prefix': args.prefix,
        'verbose': args.verbose,
        'seed': args.seed,
        # initial optimization choices (nmin is only for L-M optimization)
        'do_levenburg': True,
        'do_powell': False,
        'do_nelder_mead': False,
        'nmin': np.max((10, args.nthreads)),
        # emcee fitting parameters monte carlo markov chain samples parameters ft. certain technique
        'nwalkers': 128,
        'nburn': [32, 32, 64],
        'niter': 256,  # 512,
        'interval': 0.1,  # save 10% of the chains at a time
        # Nestle fitting parameters
        'nestle_method': 'single',
        'nestle_npoints': 200,
        'nestle_maxcall': int(1e6),
        # Multiprocessing
        'nthreads': args.nthreads,
        # SPS initialization parameters
        'compute_vega_mags': False,
        'vactoair_flag': False,  # use wavelengths in air
        'zcontinuous': 1,  # interpolate in metallicity
    }

    rand = np.random.RandomState(args.seed)

    if not args.build_sample:
        t0 = time()
        print('Initializing CSPSpecBasis...')
        sps = CSPSpecBasis(zcontinuous=run_params['zcontinuous'],
                           compute_vega_mags=run_params['compute_vega_mags'],
                           vactoair_flag=run_params['vactoair_flag'])
        print('...took {:.1f} seconds.'.format(time() - t0))

    if args.build_sample:
        # READ OUR SHIT INSTEAD.
        # Read the parent redmapper catalog, choose a subset of objects and
        # write out.
        cat = read_redmapper()

        # Choose objects with masses from iSEDfit, Kravtsov, and pymorph, but
        # for now just pick three random galaxies.
        #these = np.arange(2) # [300, 301, 302]
        these = np.arange(100)  # [300, 301, 302]
        print('Selecting {} galaxies.'.format(len(these)))
        out = cat[these]
        #out = cat[:200]

        outfile = os.path.join(datadir(),
                               '{}_sample.fits'.format(run_params['prefix']))
        print('Writing {}'.format(outfile))
        fitsio.write(outfile, out, clobber=True)

    if args.dofit:
        import h5py
        import emcee
        from prospect import fitting
        from prospect.io import write_results
        # MAKE THIS SHIT WORK WITH OUR SHIT TOO ft getobs
        # Read the parent sample and loop on each object.
        cat = read_parent(
            prefix=run_params['prefix'])  #, first=args.first, last=args.last)
        for ii, obj in enumerate(cat):
            objprefix = '{0:05}'.format(obj['ISEDFIT_ID'])
            print('Working on object {}/{} with prefix {}.'.format(
                ii + 1, len(cat), objprefix))

            # Check for the HDF5 output file / fitting results --
            outroot = os.path.join(
                datadir(), '{}_{}'.format(run_params['prefix'], objprefix))
            hfilename = os.path.join(
                datadir(), '{}_{}_mcmc.h5'.format(run_params['prefix'],
                                                  objprefix))
            if os.path.isfile(hfilename):
                if args.refit:
                    os.remove(hfilename)
                else:
                    print('Prospector fitting results {} exist; skipping.'.
                          format(hfilename))
                    continue

            # Grab the photometry for this object and then initialize the priors
            # and the SED model.
            obs = getobs(obj)
            model = load_model(zred=obs['zred'], seed=args.seed)

            # Get close to the right answer doing a simple minimization.
            if run_params['verbose']:
                print('Free parameters: {}'.format(model.free_params))
                print('Initial parameter values: {}'.format(
                    model.initial_theta))
            initial_theta = model.rectify_theta(
                model.initial_theta)  # make zeros tiny numbers

            if bool(run_params.get('do_powell', True)):
                tstart = time()
                # optimization options
                powell_opt = {
                    'ftol': run_params.get('ftol', 0.5e-5),
                    'xtol': 1e-6,
                    'maxfev': run_params.get('maxfev', 5000)
                }

                chi2args = (model, obs, sps, run_params['verbose']
                            )  # extra arguments for chisqfn
                guesses, pinit = fitting.pminimize(
                    chisqfn,
                    initial_theta,
                    args=chi2args,
                    model=model,
                    method='powell',
                    opts=powell_opt,
                    pool=pool,
                    nthreads=run_params['nthreads'])
                best = np.argmin([p.fun for p in guesses])

                # Hack to recenter values outside the parameter bounds!
                initial_center = fitting.reinitialize(
                    guesses[best].x,
                    model,
                    edge_trunc=run_params.get('edge_trunc', 0.1))
                initial_prob = -1 * guesses[best]['fun']
                pdur = time() - tstart
                if run_params['verbose']:
                    print('Powell initialization took {:.1f} seconds.'.format(
                        pdur))
                    print('Best Powell guesses: {}'.format(initial_center))
                    print('Initial probability: {}'.format(initial_prob))

            elif bool(run_params.get('do_nelder_mead', True)):
                from scipy.optimize import minimize
                tstart = time()
                chi2args = (model, obs, sps, run_params['verbose']
                            )  # extra arguments for chisqfn
                guesses = minimize(chisqfn,
                                   initial_theta,
                                   args=chi2args,
                                   method='nelder-mead')
                pdur = time() - tstart

                # Hack to recenter values outside the parameter bounds!
                initial_center = fitting.reinitialize(
                    guesses.x,
                    model,
                    edge_trunc=run_params.get('edge_trunc', 0.1))
                initial_prob = -1 * guesses['fun']

                if run_params['verbose']:
                    print('Nelder-Mead initialization took {:.1f} seconds.'.
                          format(pdur))
                    print('Best guesses: {}'.format(initial_center))
                    print('Initial probability: {}'.format(initial_prob))

            elif bool(run_params.get('do_levenburg', True)):
                tstart = time()
                nmin = run_params['nmin']

                chi2args = (model, obs, sps)
                pinitial = fitting.minimizer_ball(initial_theta,
                                                  nmin,
                                                  model,
                                                  seed=run_params['seed'])

                lsargs = list()
                for pinit in pinitial:
                    lsargs.append((chivecfn, pinit, chi2args))

                if run_params['nthreads'] > 1:
                    p = multiprocessing.Pool(run_params['nthreads'])
                    guesses = p.map(_doleast_squares, lsargs)
                    p.close()
                else:
                    guesses = list()
                    for lsargs1 in lsargs:
                        guesses.append(_doleast_squares(lsargs1))

                chisq = [np.sum(r.fun**2) for r in guesses]
                best = np.argmin(chisq)
                initial_prob = -np.log(chisq[best] / 2)

                #initial_center = guesses[best].x
                initial_center = fitting.reinitialize(
                    guesses[best].x,
                    model,
                    edge_trunc=run_params.get('edge_trunc', 0.1))

                pdur = time() - tstart
                if run_params['verbose']:
                    print(
                        'Levenburg-Marquardt initialization took {:.1f} seconds.'
                        .format(pdur))
                    print('Best guesses: {}'.format(initial_center))
                    print('Initial probability: {}'.format(initial_prob))

                from prospector_plot_utilities import bestfit_sed
                fig = bestfit_sed(obs,
                                  theta=initial_center,
                                  sps=sps,
                                  model=model)
                fig.savefig('test.png')

            else:
                if run_params['verbose']:
                    print('Skipping initial minimization.')
                guesses = None
                pdur = 0.0
                initial_center = initial_theta.copy()
                initial_prob = None

            # Write some basic info to the HDF5 file--
            hfile = h5py.File(hfilename, 'a')
            write_results.write_h5_header(hfile, run_params, model)
            write_results.write_obs_to_h5(hfile, obs)

            if run_params['verbose']:
                print('Started emcee sampling on {}'.format(asctime()))
            tstart = time()
            out = fitting.run_emcee_sampler(lnprobfn,
                                            initial_center,
                                            model,
                                            verbose=run_params['verbose'],
                                            nthreads=run_params['nthreads'],
                                            nwalkers=run_params['nwalkers'],
                                            nburn=run_params['nburn'],
                                            niter=run_params['niter'],
                                            prob0=initial_prob,
                                            hdf5=hfile,
                                            postargs=(model, obs, sps),
                                            pool=pool)
            esampler, burn_p0, burn_prob0 = out
            del out

            edur = time() - tstart
            if run_params['verbose']:
                print('Finished emcee sampling in {:.2f} minutes.'.format(
                    edur / 60.0))

            # Update the HDF5 file with the results.
            write_results.write_pickles(run_params,
                                        model,
                                        obs,
                                        esampler,
                                        guesses,
                                        outroot=outroot,
                                        toptimize=pdur,
                                        tsample=edur,
                                        sampling_initial_center=initial_center,
                                        post_burnin_center=burn_p0,
                                        post_burnin_prob=burn_prob0)
            write_results.write_hdf5(hfilename,
                                     run_params,
                                     model,
                                     obs,
                                     esampler,
                                     guesses,
                                     toptimize=pdur,
                                     tsample=edur,
                                     sampling_initial_center=initial_center,
                                     post_burnin_center=burn_p0,
                                     post_burnin_prob=burn_prob0)
            hfile.close()

    if args.qaplots:
        import h5py
        from prospect.io import read_results
        from prospector_plot_utilities import param_evol, subtriangle, bestfit_sed

        # Read the parent sample and loop on each object.
        cat = read_parent(
            prefix=run_params['prefix'])  #, first=args.first, last=args.last)
        for obj in cat:
            objprefix = '{0:05}'.format(obj['ISEDFIT_ID'])

            # Grab the emcee / prospector outputs.
            h5file = os.path.join(
                datadir(), '{}_{}_mcmc.h5'.format(run_params['prefix'],
                                                  objprefix))
            if not os.path.isfile(h5file):
                print('HDF5 file {} not found; skipping.'.format(h5file))
                continue
            print('Reading {}'.format(h5file))

            results, guesses, model = read_results.results_from(h5file)
            nwalkers, niter, nparams = results['chain'][:, :, :].shape

            # --------------------------------------------------
            # Figure: Generate the best-fitting SED.
            qafile = os.path.join(
                datadir(), '{}_{}_sed.png'.format(args.prefix, objprefix))
            print('Writing {}'.format(qafile))

            fig = bestfit_sed(results['obs'],
                              chain=results['chain'],
                              lnprobability=results['lnprobability'],
                              sps=sps,
                              model=model,
                              seed=results['run_params']['seed'])
            fig.savefig(qafile)

            # --------------------------------------------------
            # Figure: Visualize a random sampling of the MCMC chains.
            qafile = os.path.join(
                datadir(), '{}_{}_chains.png'.format(args.prefix, objprefix))
            print('Writing {}'.format(qafile))

            thesechains = rand.choice(nwalkers,
                                      size=int(0.3 * nwalkers),
                                      replace=False)
            fig = param_evol(results, chains=thesechains)
            fig.savefig(qafile)

            # --------------------------------------------------
            # Figure: Generate a corner/triangle plot of the free parameters.
            qafile = os.path.join(
                datadir(), '{}_{}_corner.png'.format(args.prefix, objprefix))
            print('Writing {}'.format(qafile))

            fig = subtriangle(results, thin=2)
            fig.savefig(qafile)
Exemplo n.º 3
0
    else:
        if rp['verbose']:
            print('No minimization requested.')
        guesses = None
        pdur = 0.0
        initial_center = initial_theta.copy()
        initial_prob = None

    # -------
    # Sample
    # -------
    if rp['verbose']:
        print('emcee sampling...')
    tstart = time.time()
    out = fitting.run_emcee_sampler(lnprobfn, initial_center, model,
                                    postkwargs=postkwargs, initial_prob=initial_prob,
                                    pool=pool, hdf5=hfile, **rp)
    esampler, burn_p0, burn_prob0 = out
    edur = time.time() - tstart
    if rp['verbose']:
        print('done emcee in {0}s'.format(edur))

    # -------------------------
    # Output pickles (and HDF5)
    # -------------------------
    print("Writing to {}".format(outroot))
    write_results.write_pickles(rp, model, obsdat, esampler, guesses,
                                outroot=outroot, toptimize=pdur, tsample=edur,
                                sampling_initial_center=initial_center,
                                post_burnin_center=burn_p0,
                                post_burnin_prob=burn_prob0)
Exemplo n.º 4
0
            print('done Powell in {0}s'.format(pdur))
            print('best Powell guess:{0}'.format(initial_center))
    else:
        powell_guesses = None
        pdur = 0.0
        initial_center = initial_theta.copy()
        initial_prob = None

    # -------
    # Sample
    # -------
    if rp['verbose']:
        print('emcee sampling...')
    tstart = time.time()
    out = fitting.run_emcee_sampler(lnprobfn, initial_center, model,
                                    postkwargs=postkwargs, initial_prob=initial_prob,
                                    pool=pool, hdf5=hfile, **rp)
    esampler, burn_p0, burn_prob0 = out
    edur = time.time() - tstart
    if rp['verbose']:
        print('done emcee in {0}s'.format(edur))

    # -------------------------
    # Output pickles (and HDF5)
    # -------------------------
    write_results.write_pickles(rp, model, obsdat, esampler, powell_guesses,
                                outroot=outroot, toptimize=pdur, tsample=edur,
                                sampling_initial_center=initial_center,
                                post_burnin_center=burn_p0,
                                post_burnin_prob=burn_prob0)
    if hfile is None: