def __init__(self, meta): self.meta = meta self.vals = self.meta.ivals self.sampler = self.meta.sampler self.burns = 0 self.runs = 0 with open(self.meta.calctime,'w') as calctimer: calctimer.write(str(timeit.default_timer())+' icalc \n') calctimer.close() # what outputs of emcee will we be saving? self.stats = [ stats.stat_chains(self.meta), stats.stat_probs(self.meta), stats.stat_fracs(self.meta), stats.stat_times(self.meta) ]
def __init__(self,input_address): self.key = key.key(t=input_address) # read input parameters self.testdir = os.path.join('..','tests') with open(os.path.join(self.testdir,input_address)) as infile: lines = (line.split(None) for line in infile) indict = {defn[0]:defn[1] for defn in lines} # load data with open(os.path.join(self.testdir,'topdirs.p')) as topdirs: self.topdir = cpkl.load(topdirs)[input_address] self.datadir = os.path.join(self.topdir,'data') self.topdir = os.path.join(self.topdir,'mcmc') if os.path.exists(self.topdir): shutil.rmtree(self.topdir) os.makedirs(self.topdir) with open(os.path.join(self.datadir,'logdata.csv'),'rb') as csvfile: tuples = (line.split(None) for line in csvfile) alldata = [[float(pair[k]) for k in range(0,len(pair))] for pair in tuples] self.binends = np.array(alldata[0]) self.nbins = len(self.binends)-1 self.binlos = self.binends[:-1] self.binhis = self.binends[1:] self.bindifs = self.binhis-self.binlos self.bindif = sum(self.bindifs)/self.nbins self.binmids = (self.binlos+self.binhis)/2. self.logpobs = np.array(alldata[1:]) self.pobs = np.exp(np.array(alldata[1:])) self.ngals = len(self.logpobs) self.flatNz = np.array([float(self.ngals)/float(self.nbins)/self.bindif]*self.nbins) self.logflatNz = np.log(self.flatNz) self.trueZs = None self.trueNz = None self.logtrueNz = None if os.path.exists(os.path.join(self.datadir,'logtrue.csv')): with open(os.path.join(self.datadir,'logtrue.csv'),'rb') as csvfile: tuples = (line.split(None) for line in csvfile) trudata = [[float(pair[k]) for k in range(0,len(pair))] for pair in tuples] self.trueZs = np.array(trudata[1:]) trueNz = [sys.float_info.epsilon]*self.nbins for z in self.trueZs: for k in xrange(self.nbins): if z[0] > self.binlos[k] and z[0] < self.binhis[k]: trueNz[k] += 1./self.bindif self.trueNz = np.array(trueNz) self.logtrueNz = np.log(self.trueNz) # generate full Sheldon, et al. 2011 "posterior" stackprep = np.sum(np.array(self.pobs),axis=0) self.stack = np.array([max(sys.float_info.epsilon,stackprep[k]) for k in xrange(self.nbins)]) self.logstack = np.log(self.stack) # generate MAP N(z) self.mapNz = [sys.float_info.epsilon]*self.nbins mappreps = [np.argmax(l) for l in self.logpobs] for m in mappreps: self.mapNz[m] += 1./self.bindifs[m] self.logmapNz = np.log(self.mapNz) # generate expected value N(z) expprep = [sum(z) for z in self.binmids*self.pobs*self.bindifs] self.expNz = [sys.float_info.epsilon]*self.nbins for z in expprep: for k in xrange(self.nbins): if z > self.binlos[k] and z < self.binhis[k]: self.expNz[k] += 1./self.bindifs[k] self.logexpNz = np.log(self.expNz) #print('logexpNz='+str(self.logexpNz)) # how many walkers self.nwalkers = 2*self.nbins #self.walknos = xrange(self.nwalkers) # prior specification if 'prior' in indict: mean = indict['priormean'] self.mean = np.array([float(mean[i]) for i in range(0,self.nbins)]) covmat = indict['priorcov'] # if self.meta.random[self.n]: # q = 1.#0.5 # e = 0.15/self.meta.zdif**2 # tiny = q*1e-6 # self.covmat = np.array([[q*m.exp(-0.5*e*(self.binmids[a]-self.binmids[b])**2.) for a in xrange(0,self.nbins)] for b in xrange(0,self.nbins)])+tiny*np.identity(self.nbins) # else: self.covmat = np.reshape(np.array([float(covmat[i]) for i in range(0,self.nbins**2)]),(self.nbins,self.nbins)) else: self.mean = self.logflatNz self.covmat = np.identity(self.nbins) self.priordist = mvn(self.mean,self.covmat) # posterior specification self.postdist = post(self.priordist, self.binends, self.logpobs) # sampler specification self.sampler = emcee.EnsembleSampler(self.nwalkers, self.nbins, self.postdist.lnprob) # initialization schemes if 'inits' in indict: self.inits = indict['init'] else: self.inits = 'gs'#corresponding to 'ps', 'gm' #generate initial values for walkers if self.inits == 'ps': self.ivals,self.mean = self.priordist.sample_ps(self.nwalkers) self.init_names = 'Prior Samples' elif self.inits == 'gm': self.ivals,self.mean = self.priordist.sample_gm(self.nwalkers) self.init_names = 'Gaussian Ball Around Mean' elif self.inits == 'gs': self.ivals,self.mean = self.priordist.sample_gs(self.nwalkers) self.init_names = 'Gaussian Ball Around Prior Sample' self.ivals_dir = os.path.join(self.topdir,'ivals.p') with open(self.ivals_dir,'wb') as ival_file: cpkl.dump(self.ivals,ival_file) # parameters for MCMC if 'miniters' in indict: self.miniters = int(indict['miniters']) else: self.miniters = int(1e3) if 'thinto' in indict: self.thinto = int(indict['thinto']) else: self.thinto = 1 assert(self.miniters%self.thinto==0) self.ntimes = self.miniters / self.thinto assert(self.ntimes > self.nwalkers) # # what outputs of emcee will we be saving? self.stats = [ stats.stat_chains(self), stats.stat_probs(self), stats.stat_fracs(self), stats.stat_times(self) ] # colors for plots self.colors='rgbymc' outdict = { 'topdir': self.topdir, 'binends': self.binends, 'logpobs': self.logpobs, 'inits': self.inits, 'miniters': self.miniters, 'thinto': self.thinto, 'ivals': self.ivals, 'mean': self.mean, 'covmat': self.covmat } with open(os.path.join(self.topdir,'README.md'), 'a') as readme: readme.write('\n') readme.write(repr(outdict)) self.calctime = os.path.join(self.testdir, 'calctimer.txt') if os.path.exists(self.calctime): os.remove(self.calctime) self.plottime = os.path.join(self.testdir, 'plottimer.txt') if os.path.exists(self.plottime): os.remove(self.plottime) # self.iotime = os.path.join(self.testdir, 'iotimer.txt') # if os.path.exists(self.iotime): # os.remove(self.iotime) print('ingested inputs and initialized sampling')