Exemplo n.º 1
0
def do_fit(system, verbose):
	try:
		degree = system.model['d']
	except TypeError:
		msg = red('Error: ') + 'Need to run mod before fit. '
		clogger.error(msg)
		return
	with warnings.catch_warnings(record=True) as w:
		p = polyfit(system.time, system.vrad, degree)
		if len(w):
			msg = yellow('Warning: ') + 'Polyfit may be poorly conditioned. ' \
			      + 'Maybe try a lower degree drift?'
			clogger.info(msg)
	return p
Exemplo n.º 2
0
def do_multinest(system):
	msg = blue('INFO: ') + 'Transfering data to MultiNest...'
	clogger.info(msg)

	# write data to file to be read by MultiNest
	nest_filename = 'input.rv'
	nest_header = 'file automatically generated for MultiNest analysis, ' + timestamp
	nest_header += '\n' + str(len(system.time))
	savetxt(nest_filename, zip(system.time, system.vrad, system.error),
		    header=nest_header,
		    fmt=['%12.6f', '%7.5f', '%7.5f'])

	msg = blue('INFO: ') + 'Starting MultiNest...'
	clogger.info(msg)
	
	cmd = 'mpirun -np 2 ./OPEN/multinest/nest'
	subprocess.call(cmd, shell=True)
	# os.system(cmd)

	return
Exemplo n.º 3
0
def fit_gp(model, initial, data, ncpu, nwalkers=20):
    k, lnlike, lnprior, lnprob = model
    ndim = len(initial)
    p0 = [np.array(initial) + rel(initial, 1) * np.random.randn(ndim) for i in xrange(nwalkers)]
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=data, threads=1)

    msg = blue('    :: ') + 'Running burn-in...'
    clogger.info(msg)

    p0, lnp, _ = sampler.run_mcmc(p0, 100)
    sampler.reset()

    p0, lnp, _ = sampler.run_mcmc(p0, 200)
    sampler.reset()

    niter = 1000
    
    msg = blue('    :: ') + 'Running %d MCMC chains for %d iterations...' % (nwalkers, niter)
    clogger.info(msg)

    logl = []
    t1 = time()
    for p0, lnp, _ in sampler.sample(p0, None, None, iterations=niter):
    #     pass
        logl.append(max(lnp))
    # p0, lnp, _ = sampler.run_mcmc(p0, niter)
    t2 = time()
    logl = np.array(logl)

    p = p0[np.argmax(lnp)]


    msg = blue('    :: ') + 'MCMC took %f seconds' % (t2-t1)
    clogger.info(msg)
    return sampler, p, logl
Exemplo n.º 4
0
def do_restrict(system, quantity, *args):
	## restrict by uncertainty value
	if quantity == 'error':
		msg = blue('INFO: ') + 'Removing data with uncertainty higher than %f km/s' % args[0]
		clogger.info(msg)
		maxerr = args[0]
		return

		# we have to keep a record of how many values come out of each file
		t, rv, err = system.time_full, system.vrad_full, system.error_full # temporaries
		for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())):
			print i, n1, n2
			# print err[:n1]

			val = err[:n1] <= maxerr
			nout = (val == False).sum()
			# system.provenance keeps the record
			if nout >= n1: 
				system.provenance[fname][1] = n1
			else:
				system.provenance[fname][1] = nout

			print (val == False).sum(), n1
			t, rv, err = t[n1:], rv[n1:], err[n1:]

		# now build the full boolean vector 
		vals = system.error_full <= maxerr
		print vals, len(vals)

		# and pop out the values from time, vrad, and error
		# leaving all *_full vectors intact
		system.time = system.time_full[vals]
		system.vrad = system.vrad_full[vals]
		system.error = system.error_full[vals]


	## restrict by date (JD)
	if quantity == 'date':
		msg = blue('INFO: ') + 'Retaining data between %i and %i JD' % (args[0], args[1])
		clogger.info(msg)
		minjd, maxjd = args[0], args[1]

		# we have to keep a record of how many values come out of each file
		t, rv, err = system.time_full, system.vrad_full, system.error_full # temporaries
		for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())):
			lower = t[:n1] <= minjd
			higher = t[:n1] <= maxjd
			nout = (lower == True).sum()
			nout += (higher == True).sum()
			# system.provenance keeps the record
			if nout >= n1: 
				system.provenance[fname][1] = n1
			else:
				system.provenance[fname][1] = n1 - nout

			t, rv, err = t[n1:], rv[n1:], err[n1:]

		# now build the full boolean vector 
		lowers = system.time_full <= minjd
		highers = system.time_full >= maxjd
		# fancy syntax just to negate the intersection of lowers and highers
		keepers = ~(lowers | highers)

		# and pop out the values from time, vrad, and error
		# leaving all *_full vectors intact
		system.time = system.time_full[keepers]
		system.vrad = system.vrad_full[keepers]
		system.error = system.error_full[keepers]	
		# also from extras, but this is trickier
		d = system.extras._asdict() # asdict because namedtuple is immutable
		for i, field in enumerate(system.extras._fields):
			d[field] = system.extras_full[i][keepers]
		extra = namedtuple('Extra', system.extras_names, verbose=False)
		system.extras = extra(**d)

	## restrict to values from one year
	if quantity == 'year':
		msg = blue('INFO: ') + 'Retaining data from %i' % args[0]
		clogger.info(msg)
		yr = args[0]

		# we have to keep a record of how many values come out of each file
		time = system.time_full # temporaries
		for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())):
			years = np.array([julian_day_to_date(t)[0] for t in time[:n1]])
			keepers = years == yr
			nout = (keepers == True).sum()
			# system.provenance keeps the record
			if nout >= n1: 
				system.provenance[fname][1] = n1
			else:
				system.provenance[fname][1] = n1 - nout
			time = time[n1:]

		# build the full boolean vector
		years = np.array([julian_day_to_date(t)[0] for t in system.time_full])
		keepers = years == yr

		# and pop out the values from time, vrad, and error
		# leaving all *_full vectors intact
		system.time = system.time_full[keepers]
		system.vrad = system.vrad_full[keepers]
		system.error = system.error_full[keepers]
		# also from extras
		d = system.extras._asdict() # asdict because namedtuple is immutable
		for i, field in enumerate(system.extras._fields):
			d[field] = system.extras_full[i][keepers]
		extra = namedtuple('Extra', system.extras_names, verbose=False)
		system.extras = extra(**d)	

	## restrict to values from a year range
	if quantity == 'years':
		msg = blue('INFO: ') + 'Retaining data between %i and %i' % (args[0], args[1])
		clogger.info(msg)
		yr1, yr2 = args[0], args[1]

		# we have to keep a record of how many values come out of each file
		time = system.time_full # temporaries
		for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())):
			years = np.array([julian_day_to_date(t)[0] for t in time[:n1]])
			keepers = (years >= yr1) & (years <= yr2)
			nout = (keepers == True).sum()
			# system.provenance keeps the record
			if nout >= n1: 
				system.provenance[fname][1] = n1
			else:
				system.provenance[fname][1] = n1 - nout
			time = time[n1:]


		# now build the full boolean vector 
		years = np.array([julian_day_to_date(t)[0] for t in system.time_full])
		keepers = (years >= yr1) & (years <= yr2)

		# and pop out the values from time, vrad, and error
		# leaving all *_full vectors intact
		system.time = system.time_full[keepers]
		system.vrad = system.vrad_full[keepers]
		system.error = system.error_full[keepers]	
		# also from extras, but this is trickier
		d = system.extras._asdict() # asdict because namedtuple is immutable
		for i, field in enumerate(system.extras._fields):
			d[field] = system.extras_full[i][keepers]
		extra = namedtuple('Extra', system.extras_names, verbose=False)
		system.extras = extra(**d)

	return
Exemplo n.º 5
0
def do_genetic(system, just_gen=False):
	""" Carry out the fit using a genetic algorithm and if 
	just_gen=False try to improve it with a run of the LM algorithm """
	try:
		degree = system.model['d']
		keplerians = system.model['k']
	except TypeError:
		msg = red('Error: ') + 'Need to run mod before gen. '
		clogger.error(msg)
		return

	maxP = system.per.get_peaks(output_period=True)[1]
	size_maxP = 10**(len(str(int(maxP)))-1)
	system.fit = {}

	msg = blue('INFO: ') + 'Initializing genetic algorithm...'
	clogger.info(msg)
	msg = blue('    : ') + 'Model is: %d keplerians + %d drift' % (keplerians, degree)
	clogger.info(msg)

	vel = zeros_like(system.time)

	def chi2_1(individual):
		""" Fitness function for 1 planet model """
		P, K, ecc, omega, T0, gam = individual 
		get_rvn(system.time, P, K, ecc, omega, T0, gam, vel)
		chi2 = sum(((system.vrad - vel)/system.error)**2)
		#print chi2
		return chi2,

	def chi2_n(individual):
		""" Fitness function for N planet model """
		P, K, ecc, omega, T0, gam = [individual[i::6] for i in range(6)]
		#print ecc
		get_rvn(system.time, P, K, ecc, omega, T0, gam[0], vel)
		#print 'out of get_rvn'
		chi2 = sum(((system.vrad - vel)/system.error)**2)
		#print chi2
		return chi2,

	## create the required types -- the fitness and the individual.
	creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) # minimization of a single objective
	creator.create("Individual", list, fitness=creator.FitnessMin) 

	## create parameters by sampling from their priors
	def P_prior():
		return random.uniform(5, 1000)
		# return random.gauss(maxP, size_maxP)
	def K_prior():
		return random.uniform(0, 150)
	def ecc_prior():
		return random.uniform(0, 0.9)
	def om_prior():
		return random.uniform(0, 360)
	def t0_prior():
		return random.uniform(2350000, 2550000)
	def gamma_prior():
		return random.uniform(-100, 100)
	priors = [P_prior, K_prior, ecc_prior, om_prior, t0_prior, gamma_prior]

	toolbox = base.Toolbox()
	toolbox.register("individual", tools.initCycle, creator.Individual, priors, n=keplerians)
	toolbox.register("population", tools.initRepeat, list, toolbox.individual)

	def mutPrior(individual, indpb):
		for i, fcn in enumerate(zip(individual, priors)):
			if random.random() < indpb:			
				individual[i] = fcn[1]()
		return individual,

	toolbox.register("evaluate", chi2_n)
	toolbox.register("mate", tools.cxTwoPoints)
	toolbox.register("mutate", mutPrior, indpb=0.10)
	toolbox.register("select", tools.selTournament, tournsize=3)

	npop = 500
	ngen = 150
	npar = 5*keplerians+1
	## build the population
	pop = toolbox.population(n=npop)
	## helper functions
	hof = tools.HallOfFame(1)
	stats = tools.Statistics(lambda ind: ind.fitness.values)
	stats.register("avg", nanmean)
	# stats.register("std", nanstd)
	stats.register("min", np.nanmin)
	# stats.register("max", np.nanmax)
	# stats.register("total", sigma3)
	stats.register("red", lambda v: min(v)/(len(system.time)-npar) )

	msg = blue('INFO: ') + 'Created population with N=%d. Going to evolve for %d generations...' % (npop,ngen)
	clogger.info(msg)

	## run the genetic algorithm
	algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=ngen, stats=stats, halloffame=hof, verbose=True)
	
	## output results information
	msg = yellow('RESULT: ') + 'Best individual is'
	clogger.info(msg)
	## loop over planets
	print("%3s %12s %10s %10s %10s %15s %9s" % \
		('', 'P[days]', 'K[km/s]', 'e', unichr(0x3c9).encode('utf-8')+'[deg]', 'T0[days]', 'gam') )
	for i, planet in enumerate(list(ascii_lowercase)[:keplerians]):
		P, K, ecc, omega, T0, gam = [hof[0][j::6] for j in range(6)]
		print("%3s %12.1f %10.2f %10.2f %10.2f %15.2f %9.2f" % (planet, P[i], K[i], ecc[i], omega[i], T0[i], gam[i]) )
	
	msg = yellow('RESULT: ') + 'Best fitness value: %s\n' % (hof[0].fitness)
	clogger.info(msg)

	if just_gen: 
		# save fit in the system and return, no need for LM
		system.fit['params'] = hof[0]
		system.fit['chi2'] = hof[0].fitness/(len(system.time)-npar)
		return

	msg = blue('INFO: ') + 'Calling LM to improve result...'
	clogger.info(msg)	

	## call levenberg markardt fit
	lm = do_lm(system, [hof[0][j::6] for j in range(6)])
	lm_par = lm[0]
	
	## loop over planets
	msg = yellow('RESULT: ') + 'Best fit is'
	clogger.info(msg)
	print("%3s %12s %10s %10s %10s %15s %9s" % \
		('', 'P[days]', 'K[km/s]', 'e', unichr(0x3c9).encode('utf-8')+'[deg]', 'T0[days]', 'gam') )
	for i, planet in enumerate(list(ascii_lowercase)[:keplerians]):
		P, K, ecc, omega, T0, gam = [lm_par[j::6] for j in range(6)]
		print("%3s %12.1f %10.2f %10.2f %10.2f %15.2f %9.2f" % (planet, P[i], K[i], ecc[i], omega[i], T0[i], gam[i]) )

	chi2 = chi2_n(lm_par)[0]
	msg = yellow('RESULT: ') + 'Best fitness value: %f, %f' % (chi2, chi2/(len(system.time)-npar))
	clogger.info(msg)

	# save fit in the system
	system.fit['params'] = lm_par
	system.fit['chi2'] = chi2

	# #  print p.minFitness, p.maxFitness, p.avgFitness, p.sumFitness
	# print 'Genetic:', p.bestFitIndividual, p.bestFitIndividual.fitness
	# lm = do_lm(system, p.bestFitIndividual.genes)
	# lm_par = lm[0]
	# print 'LM:', lm_par

	# # get best solution curve
	# new_time = system.get_time_to_plot()
	# vel = zeros_like(new_time)

	# P, K, ecc, omega, t0 = p.bestFitIndividual.genes
	# get_rv(new_time, P, K, ecc, omega, t0, vel)
	# # plot RV with time
	# plot(system.time, system.vrad, 'o')
	# plot(new_time, vel, '-')

	# P, K, ecc, omega, t0 = lm_par
	# get_rv(new_time, P, K, ecc, omega, t0, vel)
	# plot(new_time, vel, 'r-')
	# show()

	return
Exemplo n.º 6
0
def do_it(system, training_variable, ncpu=1):

    t = system.time

    # find the quantity on which to train the GP
    i = system.extras._fields.index(training_variable) # index corresponding to the quantity
    y = system.extras[i]

    if training_variable == 'rhk': 
        training_variable_error = 'sig_rhk'
        i = system.extras._fields.index(training_variable_error) # index corresponding to the uncertainties
        yerr = system.extras[i]
    if training_variable == 'fwhm':
        if system.units == 'm/s':
            f = 2.35e-3
        else:
            f = 2.35
        yerr = f * system.error
    if training_variable == 'bis_span':
        yerr = 2.e-3*system.error

    
    # subtract mean
    y = y - np.mean(y)
    data = (t, y, yerr)

    model = GPfuncs['QuasiPeriodicJitter']

    # print y.ptp()
    initial = np.array([0.01, 1e-5, 5000, 1, 23])
    # best_p = initial
    sampler, best_p, logl = fit_gp(model, initial, data, ncpu)
    samples = sampler.flatchain 
    std = samples.std(axis=0)

    msg = yellow('    :: ') + 'Best GP hyperparameters: ' + initial.size*' %f ' % tuple(best_p)
    clogger.info(msg)
    msg = yellow('    :: ') + 'std of the chains:       ' + initial.size*' %f ' % tuple(std)
    clogger.info(msg)




    plt.figure()
    for i in range(samples.shape[1]+1):
        plt.subplot(6,1,i+1)
        if i == samples.shape[1]:
            plt.plot(logl)
        else:
            plt.plot(samples[:,i])
    plt.show()

    


    # # The positions where the prediction should be computed.
    x = np.linspace(min(t), max(t), 5000)
    x = np.hstack((x, t))
    x.sort()

    # # Plot 24 posterior samples.

    # # for s in samples[np.random.randint(len(samples), size=4)]:
    # #     # Set up the GP for this sample.
    # #     z1, z2, z3, z4 = s
    # #     kernel = z1**2 * kernels.ExpSquaredKernel(z2**2) * kernels.ExpSine2Kernel(2./z4**2, z3)
    # #     gp = george.GP(kernel)
    # #     gp.compute(t, yerr)

    # #     # Compute the prediction conditioned on the observations and plot it.
    # #     m = gp.sample_conditional(y, x)
    # #     plt.plot(x, m, color="#4682b4", alpha=0.3)

    # plot lnp solution
    best_p[1] = 0.
    kernel = model[0](*best_p)
    gp = george.GP(kernel, solver=george.HODLRSolver)
    gp.compute(t, yerr)
    print gp.lnlikelihood(y)
    # Compute the prediction conditioned on the observations and plot it.
    # t1 = time()
    m, cov = gp.predict(y, x)
    m1, cov = gp.predict(y, t)
    # print time() - t1
    plt.figure()
    plt.subplot(211)

    # phase, fwhm_sim = np.loadtxt('/home/joao/phd/data/simulated/HD41248/HD41248_simul_oversampled.rdb', unpack=True, usecols=(0, 4), skiprows=2)
    # plt.plot(phase*18.3+t[0], fwhm_sim - fwhm_sim.mean(), 'g-')

    plt.plot(x, m, color='r', alpha=0.8)
    # plt.plot(t, m1, color='r', alpha=0.8)
    # Plot the data
    plt.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0)
    # plt.plot(t, system.extras.rhk_activity - system.extras.rhk_activity.mean(), "og")
    plt.ylabel(training_variable)
    # Plot the residuals
    plt.subplot(212)
    plt.errorbar(t, y - m1, yerr=yerr, fmt=".k", capsize=0)

    plt.xlabel('Time [days]')

    plt.figure()
    ax = plt.subplot(211)
    ts = BasicTimeSeries()
    ts.time = t
    ts.vrad = y
    ts.error = yerr
    per = gls(ts)
    per._plot(axes=ax, newFig=False)
    ax = plt.subplot(212)
    ts.vrad = y-m1
    per = gls(ts)
    per._plot(axes=ax, newFig=False)

    plt.show()
    # sys.exit(0)

    # fig = triangle.corner(samples, plot_contours=False)

    enter = raw_input('Press Enter to continue: ')
    if enter == 'n':
        sys.exit(0)

    return best_p, std