def test(challenge,lim=None): meta,desi,times_f,resid_f,error_f = load(challenge,limit=int(lim)) alphaab = alphamat(meta) print 'Working with {0} pulsars.'.format(len(meta)) with timing('GW covariance matrix [recurring]'): cgw = Cgw_100ns(alphaab,times_f,-2.0/3.0,fL=1.0/500,approx_ksum=True) cgw2 = cgw.copy() with timing('Cgw interpolation'): cgw3 = 0.2 * cgw + 0.8 * cgw2 with timing('PN covariance matrix'): cpn = Cpn(error_f) with timing('Design matrix'): gmat = Gdesi2(desi,meta) # note this takes meta, not len(meta) with timing('Reduced data'): resid_f = N.dot(gmat.T,resid_f) with timing('Reduced Cpn'): cpn = blockmul(cpn,gmat,meta,blas=True) with timing('Reduced Cgw [recurring]'): cgw = blockmul(cgw,gmat,meta,blas=True) cgw2 = cgw.copy() with timing('Reduced-Cgw interpolation'): cgw3 = 0.2 * cgw + 0.8 * cgw2 with timing('Likelihood [recurring]'): logl = logL(resid_f,cgw,cpn)
def calculate_values(self): num_s = self.domain.num_states num_a = self.domain.num_actions r = self.domain.get_rewards() p = self.domain.get_probabilities() sum_probs = np.sum(p, axis=1, keepdims=True) if self.verbose: print('Value iteration begins...') # set starting state values if self.initial_values is not None: v_curr = self.initial_values.reshape(num_s, 1) else: v_curr = np.zeros((num_s, 1)) self.iter_values.append((0, v_curr.reshape(num_s))) dist = np.inf i = 1 t = time.perf_counter() while dist >= self.threshold or i < self.max_iterations: # noinspection PyCompatibility v = r + self.discount * p @ v_curr split_values = np.split(v, num_s) split_values = np.array(split_values) for s in range(num_s): for a in range(num_a): if sum_probs[s * num_a + a] == 0: split_values[s][a] = -np.inf # maximizing the Bellman eq. for all actions v_next = np.array(list(map(np.max, split_values))) v_next = v_next.reshape(num_s, 1) dist = np.linalg.norm(v_next - v_curr, ord=np.inf) self.iter_values.append((i, v_next.reshape(num_s), dist)) i += 1 v_curr = v_next if self.verbose: timing(t) print('Value iteration finished:') print(*self.iter_values, sep='\n') values = self.get_value_table() return values
def checklike(challenge,procs,yL=500.0,lim=None,gproj=True,inject=False,A=5e-14,alpha=-2.0/3.0,debug=1,prange=None): """Load challenge data and compute the likelihood for a range of background amplitudes. Returns a two-column array of (A,logL).""" global resid_f,cgw,alphaab,times_f,gmat,meta,cpn,error_f meta,desi,times_f,resid_f,error_f = load(challenge,limit=lim) alphaab = alphamat(meta) with timing("Initial setup",1,debug): cgw = Cgw_100ns(alphaab,times_f,alpha,fL=1.0/float(yL)) cpn = Cpn(error_f) if inject: resid_f = simulate(alphaab,times_f,cgw,cpn,A=A,n=1) if gproj: with timing("Timing for gmat setup",2,debug): if desi is None: gmat = Gproj(times_f,len(meta)) else: print "Using tempo2 design matrix" gmat = Gdesi2(desi,meta) # gmat = Gdesi(desi,len(meta)) resid_f = N.dot(gmat.T,resid_f) cgw = blockmul(cgw,gmat,meta) # cgw = N.dot(gmat.T,N.dot(cgw,gmat)) cpn = blockmul(cpn,gmat,meta) # cpn = N.dot(gmat.T,N.dot(cpn,gmat)) pool = multiprocessing.Pool(int(procs)) if prange is None: x = N.linspace(1e-14,9e-14,20) # range of A else: x = N.linspace(prange[0],prange[1],20) # assigned range of A with timing("Total timing for {0} likelihoods".format(len(x)),1,debug): # l = pool.map(lnprob,[[x0] for x0 in x]) l = pool.map(lnprob2,[[x0,alpha] for x0 in x]) pool.close() pool.join() if debug is True or debug >= 2: print "Maximum %s found at par %s" % (N.max(l),x[N.argmax(l)]) return N.array([x,l]).T
def policy_iteration(mdp, threshold, discount): num_s = mdp.num_states num_a = mdp.num_actions r = mdp.get_rewards() p = mdp.get_probabilities() print('Policy iteration begins') v_curr = np.zeros((num_s, 1)) policy_curr = np.zeros((num_s, 1)) policy_next = np.full((num_s, 1), np.inf) t = time.perf_counter() while not np.array_equal(policy_curr, policy_next): distance = np.inf values = list() values.append((0, v_curr.reshape(num_s))) i = 1 while distance >= threshold: v = r + discount * p @ v_curr split_values = np.split(v, num_s) # maximizing the Bellman eq. for all actions v_next = np.array(list(map(np.max, split_values))) v_next = v_next.reshape(num_s, 1) distance = np.linalg.norm(v_next - v_curr, ord=np.inf) values.append((i, v_next.reshape(num_s), distance)) i += 1 v_curr = v_next print(*values, sep='\n') # policy improvement last_v = r + discount * p @ v_curr split_last_values = np.split(last_v, num_s) # find actions that maximize the Bellman eq. (argmax) policy_curr = policy_next policy_next = np.array(list(map(np.argmax, split_last_values))) policy_next = policy_next.reshape(num_s, 1) print('policy iteration - policy: \n', policy_next) timing(t) return policy_next
def emceehammer(challenge,procs=10,suffix=None,ndim=None,nwalkers=200,iters=100,limit=None,inject=False,resume=False,checkpoint=None): """Load challenge data and perform a single-parameter (A) emcee Hammer run on them. Save chain and probabilities to numpy arrays.""" global resid_f,cgw,alphaab,times_f,gmat,meta,cpn,error_f meta,desi,times_f,resid_f,error_f = load(challenge,limit=limit) alphaab = alphamat(meta) if inject == 'raw': print "Loading clean data from raw challenge files" resid_f = loadraw(challenge,limit=None) if ndim is None and challenge in ['closed1','closed2','closed3']: ndim = 2*len(meta) + 2 with timing("Initial setup"): cgw = Cgw_100ns(alphaab,times_f,alpha=-2.0/3.0,fL=1.0/500) cpn = Cpn(error_f) if challenge == 'open3': cpn = cpn + Cred_100ns(alphaab,times_f,A=5.77e-22,alpha=1.7,fL=1.0/500) if inject == 'inject': print "Injecting synthetic signals at dataset times" resid_f = simulate(alphaab,times_f,cgw,cpn,A=5e-14,n=1) if desi is None: gmat = Gproj(times_f,len(meta)) else: print "Using tempo2 design matrix" gmat = Gdesi2(desi,meta) # gmat = Gdesi(desi,len(meta)) resid_f = N.dot(gmat.T,resid_f) if ndim == 1: # otherwise the multiplication is done in logL cgw = blockmul(cgw,gmat,meta) # cgw = N.dot(gmat.T,N.dot(cgw,gmat)) cpn = blockmul(cpn,gmat,meta) # cpn = N.dot(gmat.T,N.dot(cpn,gmat)) if N.any(N.isnan(cgw.flatten())) or N.any(N.isinf(cgw.flatten())): raise ArithmeticError # multiprocessing seems to work better if nwalkers >> procs # also keep in mind that the ensemble is split in two... trueA, truealpha = 5e-14, -2.0/3.0 trueAred, truealphared = 5.77e-22, 1.7 if ndim == 1: # initial walker positions - a list of numpy arrays p0 = [random.uniform(trueA*0.5,trueA*1.5) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob,args=[],threads=int(procs)) elif ndim == 2: p0 = [[random.uniform(trueA*0.5,trueA*1.5), random.uniform(alpha_min,alpha_max)] for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob2,args=[],threads=int(procs)) elif ndim == 4: p0 = [[random.uniform(trueA*0.5,trueA*1.5), random.uniform(alpha_min,alpha_max), random.uniform(trueAred*0.1,trueAred*10), random.uniform(alphared_min,alphared_max)] for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob4,args=[],threads=int(procs)) elif ndim == 2*len(meta) + 2: p0 = [[random.uniform(trueA*0.5,trueA*1.5),random.uniform(alpha_min,alpha_max)] + [value for pulsar in meta for value in [random.uniform(math.log10(trueAred*0.1),math.log10(trueAred*10)),random.uniform(alphared_min,alphared_max)]] # for value in [random.uniform(trueAred*0.1,trueAred*10),random.uniform(alphared_min,alphared_max)]] for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob22Nlog,args=[],threads=int(procs)) # sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob22N,args=[],threads=int(procs)) elif ndim == 3*len(meta) + 2: p0 = [[random.uniform(trueA*0.5,trueA*1.5),random.uniform(alpha_min,alpha_max)] + [value for pulsar in meta for value in [random.uniform(trueAred*0.1,trueAred*10), random.uniform(alphared_min,alphared_max), random.uniform(log10_efac_min,log10_efac_max)]] for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob23N,args=[],threads=int(procs)) suffix = (suffix + '-' + str(ndim)) if suffix else str(ndim) resumefile = '../runs/resume-{0}-{1}.npy'.format(challenge,suffix) chainfile = '../runs/chain-{0}-{1}.npy'.format(challenge,suffix) lnprobfile = '../runs/lnprob-{0}-{1}.npy'.format(challenge,suffix) if resume: p0 = N.load(resumefile) print "Resuming run from file", resumefile if checkpoint: for subrun in range(int(iters/checkpoint)): with timing("{0} x {1} samples (subrun {2})".format(checkpoint,nwalkers,subrun)): sampler.run_mcmc(p0,checkpoint) p0 = sampler.chain[:,-1,:] N.save(resumefile,p0) N.save(chainfile, sampler.chain) N.save(lnprobfile,sampler.lnprobability) else: with timing("{0} x {1} samples".format(iters,nwalkers)): sampler.run_mcmc(p0,iters) N.save(resumefile,sampler.chain[:,-1,:]) N.save(chainfile, sampler.chain) N.save(lnprobfile,sampler.lnprobability) print "Done! Mean acceptance fraction:", N.mean(sampler.acceptance_fraction)
def sample(pulsarfile='cJ0437-4715',pulsardir='.',suffix=None,outputdir='.', procs=1,fitpars=None,walkers=200,nsteps=100,ball=None, reseed=None,resume=False,useprefitvals=False,showml=False,improveml=False,efficiency='0.8', method='emcee',ntemps=1,writeparfile=False,dist=10.): global pulsar, multiplier, parameters, ranges, multipliers, priors, offsets, err, DMdist # evals, lapse DMdist = dist if method == 'multinest': from mpi4py import MPI import pymultinest printdebug = MPI.COMM_WORLD.Get_rank() == 0 else: printdebug = True # find tempo2 files pulsarfile, parfile, timfile = sampleutils.findtempo2(pulsarfile,pulsardir=pulsardir,debug=printdebug) # parfile, timfile = '../eptadata/par/' + pulsarfile + '_EPTA_0.0.par', '../eptadata/tim/' + pulsarfile + '_EPTA_0.0.tim' # parfile, timfile = '../nanograv/par/' + pulsarfile + '_noPX.par', '../nanograv/tim/' + pulsarfile + '_NANOGrav_dfg+12.tim' whichpulsar = os.path.basename(pulsarfile) # initialize Cython proxy for tempo2 pulsar pulsar = T.tempopulsar(parfile,timfile) err = 1e-6 * pulsar.toaerrs # print "TOA errors: min {0:.2g} s, avg {1:.2g}, median {2:.2g}, max {3:.2g}".format(N.min(err),N.mean(err),N.median(err),N.max(err)) # -- set up global lists/dicts of parameter names, offsets, ranges, priors # fitting parameters if fitpars: if fitpars[0] == '+': parameters = list(pulsar.pars) + fitpars[1:].split(',') else: parameters = fitpars.split(',') else: parameters = pulsar.pars if 'log10_Ared' in parameters or 'Ared' in parameters: setuprednoise() if 'log10_jitter' in parameters or 'jitter' in parameters: setupjitter() ndim = len(parameters) if printdebug: print "Fitting {0}/{1} parameters: {2}".format(ndim,pulsar.ndim,' '.join(parameters)) meta = N.fromiter(((par,pulsar[par].val,pulsar[par].err,pulsar.prefit[par].val,pulsar.prefit[par].err) if par in pulsar.allpars else (par,default[par],0.0,default[par],0.0) for par in parameters), dtype=[('name','a32'),('val','f16'),('err','f16'),('pval','f16'),('perr','f16')]) # do it here, otherwise it will set the post-fit errors to zero for par in parameters: if par in pulsar.allpars: pulsar[par].fit = False if printdebug: print "Integrating over {0} parameters: {1}".format(pulsar.ndim,' '.join(pulsar.pars)) if ball is None: ball = 1 if method == 'emcee' else 4 for par in parameters: # start from best-fit and (1-sigma) least-squares error if par not in pulsar.allpars: center, error = N.longdouble(0), N.longdouble(0) elif useprefitvals: center, error = pulsar.prefit[par].val, pulsar.prefit[par].err if error == 0.0: error = pulsar[par].err if printdebug: print "Warning: prefit error is zero for parameter {0}! Using post-fit error...".format(par) else: center, error = pulsar[par].val, pulsar[par].err if error == 0.0 and printdebug: print "Warning: error is zero for parameters {0}! (May be reset to prior.)".format(par) # offset parameters (currently F0 only) so that we handle them with sufficient precision offsets[par] = center if par in ['F0'] else 0.0 # if an absolute range is not prescribed, derive it from the tempo2 best-fit and errors, # extending the latter by a prescribed or standard multiplier if par not in ranges: multiplier = multipliers[par] if par in multipliers else ball ranges[par] = ((center - offsets[par]) - multiplier*error, (center - offsets[par]) + multiplier*error) # make sure that ranges are compatible with prior ranges if par in priors and not hasattr(priors[par],'__call__'): offprior = priors[par][0] - offsets[par], priors[par][1] - offsets[par] if ranges[par][0] >= offprior[1] or ranges[par][1] <= offprior[0] or ranges[par][1] - ranges[par][0] == 0.0: # if the range is fully outside the prior, reset range to prior ranges[par] = offprior else: # otherwise, reset range to intersection of range and prior ranges[par] = max(ranges[par][0],offprior[0]), min(ranges[par][1],offprior[1]) if printdebug: print "{0} range: [{1},{2}] + {3}".format(par,ranges[par][0],ranges[par][1],offsets[par]) # -- main sampling setup and loop if method == 'emcee': # -- set up if reseed: # restart from the last step (do we double-count it then?) if ntemps > 1: data = N.load('{0}/chain-pt-{1}.npy'.format(outputdir,reseed)) p0 = data[:,:,-1,:] else: data = N.load('{0}/chain-{1}.npy'.format(outputdir,reseed)) p0 = [data[:,-1,:]] else: # initialize walkers in a Gaussian ball (rescaled by ranges) p0 = [[randomtuple() for i in range(walkers)] for j in range(ntemps)] p1 = [[randomtuple() for i in range(walkers)] for j in range(ntemps)] if ntemps > 1: sampler = emcee.PTSampler(ntemps,walkers,ndim,logL,logP,threads=int(procs)) else: p0 = p0[0] # only one temperature sampler = emcee.EnsembleSampler(walkers,ndim,logPL,threads=int(procs)) # -- run! with timing("{0} x {1} (x {2} T) samples".format(nsteps,walkers,ntemps)): sampler.run_mcmc(p0,nsteps) print "Mean acceptance fraction:", N.mean(sampler.acceptance_fraction) # -- save everything filename = '{0}{1}-{2}.npy'.format(whichpulsar,'' if suffix is None else '-' + suffix,ndim) print print "Writing to files {0}/*-{1}".format(outputdir,filename) N.save('{0}/meta-{1}'.format(outputdir,filename),meta) if ntemps > 1: N.save('{0}/chain-pt-{1}'.format(outputdir,filename) ,sampler.chain) N.save('{0}/lnprob-pt-{1}'.format(outputdir,filename),sampler.lnprobability) N.save('{0}/chain-{1}'.format(outputdir,filename) ,sampler.chain[0,:,:,:]) N.save('{0}/lnprob-{1}'.format(outputdir,filename),sampler.lnprobability[0,:,:]) allpops, lnprobs = sampler.chain[0,:,-1,:], sampler.lnprobability[0,:,-1] lnZ, dlnZ = sampler.thermodynamic_integration_log_evidence(fburnin=0.1) print "Global (log) Evidence: %e +/- %e" % (lnZ, dlnZ) else: N.save('{0}/chain-{1}'.format(outputdir,filename) ,sampler.chain) N.save('{0}/lnprob-{1}'.format(outputdir,filename),sampler.lnprobability) allpops, lnprobs = sampler.chain[:,-1,:], sampler.lnprobability[:,-1] best = N.argmax(lnprobs) val_mode, logp_mode = allpops[best,:], lnprobs[best] # -- done elif method == 'multinest': outfile = '{0}/{1}{2}-'.format(outputdir,whichpulsar,'' if suffix is None else '-' + suffix) if efficiency[-1] == 'C' or efficiency[-1] == 'c': const_eff = True eff = float(efficiency[:-1]) else: const_eff = False eff = float(efficiency) pymultinest.run(multilog,multiprior,ndim, n_live_points=walkers,sampling_efficiency=eff, # 0.3/0.8 for evidence/parameter evaluation #importance_nested_sampling = const_eff,const_efficiency_mode = const_eff, # possible with newer MultiNest outputfiles_basename=outfile,resume=resume,verbose=True,init_MPI=False) # if init_MPI=False, I should be able to use MPI in Python # if we're not root, we exit, and let him (her?) do the statistics if MPI.COMM_WORLD.Get_rank() != 0: sys.exit(0) print " Writing to files {0}*".format(outfile) print for line in open('{0}stats.dat'.format(outfile),'r'): if "Global Evidence" in line: print line.strip('\n') print # save tempo2 fit information N.save('{0}meta.npy'.format(outfile),meta) # now let's have a look at the populations cloud = N.loadtxt('{0}post_equal_weights.dat'.format(outfile)) allpops = cloud[:,:-1] lnprobs = cloud[:,-1] live = N.loadtxt('{0}phys_live.points'.format(outfile)) best = N.argmax(live[:,-2]) val_mode, logp_mode = live[best,:-2], live[best,-2] else: raise NotImplementedError, ("Unknown sampling method: " + method) # further optimize the mode if improveml: optimizer = Simplex.Simplex(lambda xs: -logPL(xs),val_mode,0.1*N.var(allpops[:,:],axis=0)) print "Optimizing MAP..." minimum, error, iters = optimizer.minimize(maxiters=1000,monitor=1); print val_mode = N.array(minimum) # statistical analysis # print header maxlen = max(3,max(map(len,parameters))) print '-' * (101 + maxlen + 3) print "%*s | tempo2 fit parameters | mcmc-fit parameters | diff | erat bias" % (maxlen,'par') # loop over fitted parameters for i,par in enumerate(parameters): if useprefitvals: val_tempo, err_tempo = meta[i]['pval'], meta[i]['perr'] else: val_tempo, err_tempo = meta[i]['val'], meta[i]['err'] val_mcmc = (val_mode[i] if showml else N.mean(allpops[:,i])) + offsets[par] # MCMC values/errors err_mcmc = math.sqrt(N.var(allpops[:,i])) # use cond. var. also for ML est. if writeparfile and par in pulsar.allpars: pulsar[par].val = val_mcmc pulsar[par].err = err_mcmc try: with numpy_seterr(divide='ignore'): print ('%*s | %+24.*e ± %.1e | %+24.*e ± %.1e | %+.1e | %.1e %+.1e' % (maxlen,par, # parameter name precisiondigits(val_tempo,err_tempo),val_tempo,err_tempo, # tempo2 value and error precisiondigits(val_mcmc, err_mcmc ),val_mcmc, err_mcmc, # MCMC value and error val_mcmc - val_tempo, # MCMC/tempo2 difference err_mcmc/err_tempo, # ratio of errors (val_mcmc - val_tempo)/err_tempo)) # difference in units of tempo2 error except: print "Problem with values:", par, val_tempo, err_tempo, val_mcmc, err_mcmc print '-' * (101 + maxlen + 3) if writeparfile: parfilename = '{0}/{1}{2}-mcmc.par'.format(outputdir,whichpulsar,'' if suffix is None else '-' + suffix) pulsar.savepar(parfilename) print "Wrote new parfile to", parfilename val_tempo2 = [(par['pval'] if useprefitvals else par['val']) - offsets[par['name']] for par in meta] dof = pulsar.nobs - pulsar.ndim pmchisq = -2.0 * logL(val_mode) / dof try: pfchisq = -2.0 * logL(val_tempo2) / dof except: pfchisq = 'NaN' print print "{0}-fit log L: {1}; post-mcmc (best fit) log L: {2}".format('Pre' if useprefitvals else 'Post',pfchisq,pmchisq) pmrms = rmsres(val_mode) pfrms = rmsres(val_tempo2) print "{0}-fit rms res.: {1}; post-mcmc rms res.: {2}".format('Pre' if useprefitvals else 'Post',pfrms,pmrms)
samples_total = [] for seq_ep in range(SEQ_STEPS if SEQ_RUN else EPISODES, EPISODES + 1, SEQ_STEPS): game_rewards, ep_sample = play_dqn(env_stock, seq_ep, file_name_stock, load=IS_LOAD) samples_total.extend(ep_sample) # episode reward calculations game_reward_total = sum(game_rewards.values()) game_reward_avg = game_reward_total / seq_ep print("\t Average reward per episode {:.2f}".format(game_reward_avg)) rewards_total[seq_ep] = game_reward_total t_finish = timing(t_start) print('\n', '-' * 10, 'Summary', '-' * 10) with open('samples.csv', mode='w') as samples_handle: csv_writer = csv.writer(samples_handle, delimiter=',') csv_writer.writerows(samples_total) # total episodes reward calculations sum_rewards_total = sum(rewards_total.values()) avg_rewards_total = sum_rewards_total / sum(rewards_total.keys()) time_per_step = t_finish / sum_rewards_total print('*** Time per step: {:.4f} second(s)'.format(time_per_step)) print('*** Sum of total rewards: {}'.format(sum_rewards_total)) print('*** Average total reward: {:.2f}'.format(avg_rewards_total)) '''-------Plotting-------'''