def do_fit(system, verbose): try: degree = system.model['d'] except TypeError: msg = red('Error: ') + 'Need to run mod before fit. ' clogger.error(msg) return with warnings.catch_warnings(record=True) as w: p = polyfit(system.time, system.vrad, degree) if len(w): msg = yellow('Warning: ') + 'Polyfit may be poorly conditioned. ' \ + 'Maybe try a lower degree drift?' clogger.info(msg) return p
def do_multinest(system): msg = blue('INFO: ') + 'Transfering data to MultiNest...' clogger.info(msg) # write data to file to be read by MultiNest nest_filename = 'input.rv' nest_header = 'file automatically generated for MultiNest analysis, ' + timestamp nest_header += '\n' + str(len(system.time)) savetxt(nest_filename, zip(system.time, system.vrad, system.error), header=nest_header, fmt=['%12.6f', '%7.5f', '%7.5f']) msg = blue('INFO: ') + 'Starting MultiNest...' clogger.info(msg) cmd = 'mpirun -np 2 ./OPEN/multinest/nest' subprocess.call(cmd, shell=True) # os.system(cmd) return
def fit_gp(model, initial, data, ncpu, nwalkers=20): k, lnlike, lnprior, lnprob = model ndim = len(initial) p0 = [np.array(initial) + rel(initial, 1) * np.random.randn(ndim) for i in xrange(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=data, threads=1) msg = blue(' :: ') + 'Running burn-in...' clogger.info(msg) p0, lnp, _ = sampler.run_mcmc(p0, 100) sampler.reset() p0, lnp, _ = sampler.run_mcmc(p0, 200) sampler.reset() niter = 1000 msg = blue(' :: ') + 'Running %d MCMC chains for %d iterations...' % (nwalkers, niter) clogger.info(msg) logl = [] t1 = time() for p0, lnp, _ in sampler.sample(p0, None, None, iterations=niter): # pass logl.append(max(lnp)) # p0, lnp, _ = sampler.run_mcmc(p0, niter) t2 = time() logl = np.array(logl) p = p0[np.argmax(lnp)] msg = blue(' :: ') + 'MCMC took %f seconds' % (t2-t1) clogger.info(msg) return sampler, p, logl
def do_restrict(system, quantity, *args): ## restrict by uncertainty value if quantity == 'error': msg = blue('INFO: ') + 'Removing data with uncertainty higher than %f km/s' % args[0] clogger.info(msg) maxerr = args[0] return # we have to keep a record of how many values come out of each file t, rv, err = system.time_full, system.vrad_full, system.error_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): print i, n1, n2 # print err[:n1] val = err[:n1] <= maxerr nout = (val == False).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = nout print (val == False).sum(), n1 t, rv, err = t[n1:], rv[n1:], err[n1:] # now build the full boolean vector vals = system.error_full <= maxerr print vals, len(vals) # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[vals] system.vrad = system.vrad_full[vals] system.error = system.error_full[vals] ## restrict by date (JD) if quantity == 'date': msg = blue('INFO: ') + 'Retaining data between %i and %i JD' % (args[0], args[1]) clogger.info(msg) minjd, maxjd = args[0], args[1] # we have to keep a record of how many values come out of each file t, rv, err = system.time_full, system.vrad_full, system.error_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): lower = t[:n1] <= minjd higher = t[:n1] <= maxjd nout = (lower == True).sum() nout += (higher == True).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = n1 - nout t, rv, err = t[n1:], rv[n1:], err[n1:] # now build the full boolean vector lowers = system.time_full <= minjd highers = system.time_full >= maxjd # fancy syntax just to negate the intersection of lowers and highers keepers = ~(lowers | highers) # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[keepers] system.vrad = system.vrad_full[keepers] system.error = system.error_full[keepers] # also from extras, but this is trickier d = system.extras._asdict() # asdict because namedtuple is immutable for i, field in enumerate(system.extras._fields): d[field] = system.extras_full[i][keepers] extra = namedtuple('Extra', system.extras_names, verbose=False) system.extras = extra(**d) ## restrict to values from one year if quantity == 'year': msg = blue('INFO: ') + 'Retaining data from %i' % args[0] clogger.info(msg) yr = args[0] # we have to keep a record of how many values come out of each file time = system.time_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): years = np.array([julian_day_to_date(t)[0] for t in time[:n1]]) keepers = years == yr nout = (keepers == True).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = n1 - nout time = time[n1:] # build the full boolean vector years = np.array([julian_day_to_date(t)[0] for t in system.time_full]) keepers = years == yr # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[keepers] system.vrad = system.vrad_full[keepers] system.error = system.error_full[keepers] # also from extras d = system.extras._asdict() # asdict because namedtuple is immutable for i, field in enumerate(system.extras._fields): d[field] = system.extras_full[i][keepers] extra = namedtuple('Extra', system.extras_names, verbose=False) system.extras = extra(**d) ## restrict to values from a year range if quantity == 'years': msg = blue('INFO: ') + 'Retaining data between %i and %i' % (args[0], args[1]) clogger.info(msg) yr1, yr2 = args[0], args[1] # we have to keep a record of how many values come out of each file time = system.time_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): years = np.array([julian_day_to_date(t)[0] for t in time[:n1]]) keepers = (years >= yr1) & (years <= yr2) nout = (keepers == True).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = n1 - nout time = time[n1:] # now build the full boolean vector years = np.array([julian_day_to_date(t)[0] for t in system.time_full]) keepers = (years >= yr1) & (years <= yr2) # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[keepers] system.vrad = system.vrad_full[keepers] system.error = system.error_full[keepers] # also from extras, but this is trickier d = system.extras._asdict() # asdict because namedtuple is immutable for i, field in enumerate(system.extras._fields): d[field] = system.extras_full[i][keepers] extra = namedtuple('Extra', system.extras_names, verbose=False) system.extras = extra(**d) return
def do_genetic(system, just_gen=False): """ Carry out the fit using a genetic algorithm and if just_gen=False try to improve it with a run of the LM algorithm """ try: degree = system.model['d'] keplerians = system.model['k'] except TypeError: msg = red('Error: ') + 'Need to run mod before gen. ' clogger.error(msg) return maxP = system.per.get_peaks(output_period=True)[1] size_maxP = 10**(len(str(int(maxP)))-1) system.fit = {} msg = blue('INFO: ') + 'Initializing genetic algorithm...' clogger.info(msg) msg = blue(' : ') + 'Model is: %d keplerians + %d drift' % (keplerians, degree) clogger.info(msg) vel = zeros_like(system.time) def chi2_1(individual): """ Fitness function for 1 planet model """ P, K, ecc, omega, T0, gam = individual get_rvn(system.time, P, K, ecc, omega, T0, gam, vel) chi2 = sum(((system.vrad - vel)/system.error)**2) #print chi2 return chi2, def chi2_n(individual): """ Fitness function for N planet model """ P, K, ecc, omega, T0, gam = [individual[i::6] for i in range(6)] #print ecc get_rvn(system.time, P, K, ecc, omega, T0, gam[0], vel) #print 'out of get_rvn' chi2 = sum(((system.vrad - vel)/system.error)**2) #print chi2 return chi2, ## create the required types -- the fitness and the individual. creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) # minimization of a single objective creator.create("Individual", list, fitness=creator.FitnessMin) ## create parameters by sampling from their priors def P_prior(): return random.uniform(5, 1000) # return random.gauss(maxP, size_maxP) def K_prior(): return random.uniform(0, 150) def ecc_prior(): return random.uniform(0, 0.9) def om_prior(): return random.uniform(0, 360) def t0_prior(): return random.uniform(2350000, 2550000) def gamma_prior(): return random.uniform(-100, 100) priors = [P_prior, K_prior, ecc_prior, om_prior, t0_prior, gamma_prior] toolbox = base.Toolbox() toolbox.register("individual", tools.initCycle, creator.Individual, priors, n=keplerians) toolbox.register("population", tools.initRepeat, list, toolbox.individual) def mutPrior(individual, indpb): for i, fcn in enumerate(zip(individual, priors)): if random.random() < indpb: individual[i] = fcn[1]() return individual, toolbox.register("evaluate", chi2_n) toolbox.register("mate", tools.cxTwoPoints) toolbox.register("mutate", mutPrior, indpb=0.10) toolbox.register("select", tools.selTournament, tournsize=3) npop = 500 ngen = 150 npar = 5*keplerians+1 ## build the population pop = toolbox.population(n=npop) ## helper functions hof = tools.HallOfFame(1) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", nanmean) # stats.register("std", nanstd) stats.register("min", np.nanmin) # stats.register("max", np.nanmax) # stats.register("total", sigma3) stats.register("red", lambda v: min(v)/(len(system.time)-npar) ) msg = blue('INFO: ') + 'Created population with N=%d. Going to evolve for %d generations...' % (npop,ngen) clogger.info(msg) ## run the genetic algorithm algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=ngen, stats=stats, halloffame=hof, verbose=True) ## output results information msg = yellow('RESULT: ') + 'Best individual is' clogger.info(msg) ## loop over planets print("%3s %12s %10s %10s %10s %15s %9s" % \ ('', 'P[days]', 'K[km/s]', 'e', unichr(0x3c9).encode('utf-8')+'[deg]', 'T0[days]', 'gam') ) for i, planet in enumerate(list(ascii_lowercase)[:keplerians]): P, K, ecc, omega, T0, gam = [hof[0][j::6] for j in range(6)] print("%3s %12.1f %10.2f %10.2f %10.2f %15.2f %9.2f" % (planet, P[i], K[i], ecc[i], omega[i], T0[i], gam[i]) ) msg = yellow('RESULT: ') + 'Best fitness value: %s\n' % (hof[0].fitness) clogger.info(msg) if just_gen: # save fit in the system and return, no need for LM system.fit['params'] = hof[0] system.fit['chi2'] = hof[0].fitness/(len(system.time)-npar) return msg = blue('INFO: ') + 'Calling LM to improve result...' clogger.info(msg) ## call levenberg markardt fit lm = do_lm(system, [hof[0][j::6] for j in range(6)]) lm_par = lm[0] ## loop over planets msg = yellow('RESULT: ') + 'Best fit is' clogger.info(msg) print("%3s %12s %10s %10s %10s %15s %9s" % \ ('', 'P[days]', 'K[km/s]', 'e', unichr(0x3c9).encode('utf-8')+'[deg]', 'T0[days]', 'gam') ) for i, planet in enumerate(list(ascii_lowercase)[:keplerians]): P, K, ecc, omega, T0, gam = [lm_par[j::6] for j in range(6)] print("%3s %12.1f %10.2f %10.2f %10.2f %15.2f %9.2f" % (planet, P[i], K[i], ecc[i], omega[i], T0[i], gam[i]) ) chi2 = chi2_n(lm_par)[0] msg = yellow('RESULT: ') + 'Best fitness value: %f, %f' % (chi2, chi2/(len(system.time)-npar)) clogger.info(msg) # save fit in the system system.fit['params'] = lm_par system.fit['chi2'] = chi2 # # print p.minFitness, p.maxFitness, p.avgFitness, p.sumFitness # print 'Genetic:', p.bestFitIndividual, p.bestFitIndividual.fitness # lm = do_lm(system, p.bestFitIndividual.genes) # lm_par = lm[0] # print 'LM:', lm_par # # get best solution curve # new_time = system.get_time_to_plot() # vel = zeros_like(new_time) # P, K, ecc, omega, t0 = p.bestFitIndividual.genes # get_rv(new_time, P, K, ecc, omega, t0, vel) # # plot RV with time # plot(system.time, system.vrad, 'o') # plot(new_time, vel, '-') # P, K, ecc, omega, t0 = lm_par # get_rv(new_time, P, K, ecc, omega, t0, vel) # plot(new_time, vel, 'r-') # show() return
def do_it(system, training_variable, ncpu=1): t = system.time # find the quantity on which to train the GP i = system.extras._fields.index(training_variable) # index corresponding to the quantity y = system.extras[i] if training_variable == 'rhk': training_variable_error = 'sig_rhk' i = system.extras._fields.index(training_variable_error) # index corresponding to the uncertainties yerr = system.extras[i] if training_variable == 'fwhm': if system.units == 'm/s': f = 2.35e-3 else: f = 2.35 yerr = f * system.error if training_variable == 'bis_span': yerr = 2.e-3*system.error # subtract mean y = y - np.mean(y) data = (t, y, yerr) model = GPfuncs['QuasiPeriodicJitter'] # print y.ptp() initial = np.array([0.01, 1e-5, 5000, 1, 23]) # best_p = initial sampler, best_p, logl = fit_gp(model, initial, data, ncpu) samples = sampler.flatchain std = samples.std(axis=0) msg = yellow(' :: ') + 'Best GP hyperparameters: ' + initial.size*' %f ' % tuple(best_p) clogger.info(msg) msg = yellow(' :: ') + 'std of the chains: ' + initial.size*' %f ' % tuple(std) clogger.info(msg) plt.figure() for i in range(samples.shape[1]+1): plt.subplot(6,1,i+1) if i == samples.shape[1]: plt.plot(logl) else: plt.plot(samples[:,i]) plt.show() # # The positions where the prediction should be computed. x = np.linspace(min(t), max(t), 5000) x = np.hstack((x, t)) x.sort() # # Plot 24 posterior samples. # # for s in samples[np.random.randint(len(samples), size=4)]: # # # Set up the GP for this sample. # # z1, z2, z3, z4 = s # # kernel = z1**2 * kernels.ExpSquaredKernel(z2**2) * kernels.ExpSine2Kernel(2./z4**2, z3) # # gp = george.GP(kernel) # # gp.compute(t, yerr) # # # Compute the prediction conditioned on the observations and plot it. # # m = gp.sample_conditional(y, x) # # plt.plot(x, m, color="#4682b4", alpha=0.3) # plot lnp solution best_p[1] = 0. kernel = model[0](*best_p) gp = george.GP(kernel, solver=george.HODLRSolver) gp.compute(t, yerr) print gp.lnlikelihood(y) # Compute the prediction conditioned on the observations and plot it. # t1 = time() m, cov = gp.predict(y, x) m1, cov = gp.predict(y, t) # print time() - t1 plt.figure() plt.subplot(211) # phase, fwhm_sim = np.loadtxt('/home/joao/phd/data/simulated/HD41248/HD41248_simul_oversampled.rdb', unpack=True, usecols=(0, 4), skiprows=2) # plt.plot(phase*18.3+t[0], fwhm_sim - fwhm_sim.mean(), 'g-') plt.plot(x, m, color='r', alpha=0.8) # plt.plot(t, m1, color='r', alpha=0.8) # Plot the data plt.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0) # plt.plot(t, system.extras.rhk_activity - system.extras.rhk_activity.mean(), "og") plt.ylabel(training_variable) # Plot the residuals plt.subplot(212) plt.errorbar(t, y - m1, yerr=yerr, fmt=".k", capsize=0) plt.xlabel('Time [days]') plt.figure() ax = plt.subplot(211) ts = BasicTimeSeries() ts.time = t ts.vrad = y ts.error = yerr per = gls(ts) per._plot(axes=ax, newFig=False) ax = plt.subplot(212) ts.vrad = y-m1 per = gls(ts) per._plot(axes=ax, newFig=False) plt.show() # sys.exit(0) # fig = triangle.corner(samples, plot_contours=False) enter = raw_input('Press Enter to continue: ') if enter == 'n': sys.exit(0) return best_p, std