Пример #1
0
def load_data_all(data_dir, all_data_path, pred_path, gloveFile, first_run,
                  load_all):

    # Load embeddings for the filtered glove list
    if load_all == True:
        weight_matrix, word_idx = uf.load_embeddings(gloveFile)
    else:
        weight_matrix, word_idx = uf.load_embeddings(filtered_glove_path)

    len(word_idx)
    len(weight_matrix)

    #%%
    # create test, validation and trainng data
    all_data = uf.read_data(all_data_path)
    train_data, test_data, dev_data = uf.training_data_split(
        all_data, 0.8, data_dir)

    train_data = train_data.reset_index()
    dev_data = dev_data.reset_index()
    test_data = test_data.reset_index()

    #%%
    # inputs from dl_sentiment that are hard coded but need to be automated
    maxSeqLength, avg_words, sequence_length = uf.maxSeqLen(all_data)
    numClasses = 10
    #%%

    # load Training data matrix
    train_x = uf.tf_data_pipeline_nltk(train_data, word_idx, weight_matrix,
                                       maxSeqLength)
    test_x = uf.tf_data_pipeline_nltk(test_data, word_idx, weight_matrix,
                                      maxSeqLength)
    val_x = uf.tf_data_pipeline_nltk(dev_data, word_idx, weight_matrix,
                                     maxSeqLength)

    #%%
    # load labels data matrix
    train_y = uf.labels_matrix(train_data)
    val_y = uf.labels_matrix(dev_data)
    test_y = uf.labels_matrix(test_data)

    #%%

    # summarize size
    print("Training data: ")
    print(train_x.shape)
    print(train_y.shape)

    # Summarize number of classes
    print("Classes: ")
    print(np.unique(train_y.shape[1]))

    return train_x, train_y, test_x, test_y, val_x, val_y, weight_matrix, word_idx
Пример #2
0
def run_scipy():
    theta_pass = 6e-13, 4.4e-14, 1e-9, 1e10
    ssfr, snr, snr_err = util.read_data()

    nll = lambda *args: -lnlike(*args)
    result = opt.minimize(nll, [theta_pass],
                          args=(ssfr, snr, snr_err),
                          options={'disp': True})
    print "ML parameters:", result.x
    #a_fit, b_fit, c_fit, d_fit = result.x[0], result.x[1], result.x[2], result.x[3]
    #theta_pass = a_fit, b_fit, c_fit, d_fit
    theta_pass = result.x
    return theta_pass
Пример #3
0
def run_scipy(ssfr, snr, snr_err):
	logssfr, ssfr, snr, snr_err = util.read_data_with_log()

	theta_pass = 1.1183673469387756e-13, 0.49081632653061219, 1.6653061224489797e-10, 0.7265306122448979
	a_min, a_max = 1e-13, 7.e-13
	k_min, k_max = 0.1, 1.
	s0_min, s0_max = 1e-10, 2e-10
	alpha_min, alpha_max = 0.6, 9.
	bnds = ((a_min,a_max),(k_min,k_max),(s0_min,s0_max),(alpha_min,alpha_max))
	ssfr, snr, snr_err = util.read_data()
	nll = lambda *args: -lnlike(*args)
	
	#result = opt.minimize(nll, [theta_pass],args=(ssfr,snr,snr_err),options={'disp': True},bounds=bnds,method='SLSQP')
	result = opt.fmin(nll, [theta_pass],args=(ssfr,snr,snr_err),ftol=0.000001,xtol=0.00001)
	result = opt.fmin_cg(nll, [theta_pass],args=(ssfr,snr,snr_err))
	print "ML parameters:", result
	#a_fit, b_fit, c_fit, d_fit = result.x[0], result.x[1], result.x[2], result.x[3]
	#theta_pass = a_fit, b_fit, c_fit, d_fit
	theta_pass = result
	return theta_pass
Пример #4
0
def run_emcee():
	if os.path.isfile(root_dir + 'Data/MCMC_abnew.pkl'):
		print 'Chains already exist, using existing chains'
		pkl_data_file = open(root_dir + 'Data/MCMC_abnew.pkl','rb')
		samples = pick.load(pkl_data_file)
		pkl_data_file.close()
		print np.shape(samples)
	else:
		print 'Chains do not exist, computing chains...'

		# Setting parameter top hat priors
		k1_min, k1_max = 1e-13, 9e-13
		k2_min, k2_max = 1e-5, 7e-4
		x1_min, x1_max = 1e-10, 8e-10
		x2_min, x2_max = 1e-11, 8e-11

		ndim = 4	
		nwalkers = 300
		nburn = 20
		nsample = 150

		# These functions define the prior and the function to apply prior to likelihood
		def lnprior(theta):
			k1, k2, x1, x2 = theta
			if (k1_min < k1 < k1_max) and (k2_min < k2 < k2_max) and (x1_min < x1 < x1_max) and (x2_min < x2 < x2_max):
				return 0.
			return -np.inf

		def lnprob(theta, logssfr, logsnr, snr_err):
			lp = lnprior(theta)
			if not np.isfinite(lp):
				return -np.inf
			return lp + lnlike(theta, logssfr, logsnr, snr_err)

		# Reading in data
		ssfr, snr, snr_err = util.read_data()

		# Setting initial position of walkers
		pos_min = np.array([k1_min, k2_min, x1_min, x2_min])
		pos_max = np.array([k1_max, k2_max, x1_max, x2_max])
		psize = pos_max - pos_min

		psize = pos_max - pos_min
		pos = [pos_min + psize*np.random.rand(ndim) for ii in range(nwalkers)]
		#pos = [np.array([4.628e-13, 6.105e-11, 2.885e-10, 1.008e-11, 6.100e-4]) + 1e-4*psize*np.random.randn(ndim) for i in range(nwalkers)]

		# Defining sampler
		sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(ssfr, snr, snr_err), threads=1)

		# Performing burn in
		pos, prob, state = sampler.run_mcmc(pos, nburn)
		sampler.reset()

		# Main sampling run
		pos, prob, state = sampler.run_mcmc(pos, nsample)

		# These plots are for diagnostics use
		plt.figure()
		ax = plt.subplot()
		ax.set_yscale("log")
		#ax.set_xscale("log")
		plt.plot(sampler.chain[:,:,0].T,'b',alpha=0.05)
		plt.xlabel('par0')
		
		plt.figure()
		ax = plt.subplot()
		ax.set_yscale("log")
		#ax.set_xscale("log")
		plt.plot(sampler.chain[:,:,3].T,'b',alpha=0.05)
		plt.xlabel('par3')

		plt.figure()
		ax = plt.subplot()
		#ax.set_yscale("log")
		#ax.set_xscale("log")
		plt.plot(sampler.lnprobability[:,:].T,'b',alpha=0.05)
		plt.xlabel('lnprob')

		# Formatting and saving output
		samples = sampler.flatchain
		output = open(root_dir + 'Data/MCMC_piecewise.pkl','wb')
 		pick.dump(samples,output)
 		output.close()
		print np.shape(samples)

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.hist(samples[:,0],bins=100)
	plt.xlabel('k1')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.hist(samples[:,1],bins=100)
	plt.xlabel('k2')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.hist(samples[:,2],bins=100)
	plt.xlabel('x1')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.hist(samples[:,3],bins=100)
	plt.xlabel('x2')

	c = ChainConsumer()
	c.add_chain(samples, parameters=["$k_1$", "$k_2$", "$sSFR_1$", "$sSFR_2$", "$sSFR_a$"])
	c.configure(smooth=False,bins=300,sigmas=[0,1,2,3])
	#figw = c.plotter.plot_walks()
	fig = c.plotter.plot()
	summary =  c.analysis.get_summary()

	k1_fit = summary["$k_1$"][1]
	k2_fit = summary["$k_2$"][1]
	ssfr1_fit = summary["$sSFR_1$"][1]
	ssfr2_fit = summary["$sSFR_2$"][1]
	ssfra_fit = summary["$sSFR_a$"][1]

	theta_pass = k1_fit, k2_fit, x1_fit, x2_fit
	
	print 'k1', k1_fit
	print 'k2', k2_fit
	print 'x1', x1_fit
	print 'x2', x2_fit
	return theta_pass
Пример #5
0
def run_grid():
	if util.does_grid_exist(model_name,root_dir):
		print 'Grid already exists, using existing grid...'
		resolution, likelihoods, parameters, theta_max = util.read_grid(model_name,root_dir)
		k1_par, k2_par, x1_par, x2_par = parameters
	else:
		print 'Grid does not exist, computing grid...'
	
		resolution = 100

		k1_min, k1_max = 0.4, 0.8
		k2_min, k2_max = 0.1e-8, 90e-8
		x1_min, x1_max = 0.1e-11, 4e-11
		x2_min, x2_max = 0.1e-9, 3e-9

		# Reading in data
		ssfr, snr, snr_err = util.read_data()

		k1_par = np.linspace(k1_min,k1_max,resolution)
		k2_par = np.linspace(k2_min,k2_max,resolution)
		x1_par = np.linspace(x1_min,x1_max,resolution)
		x2_par = np.linspace(x2_min,x2_max,resolution)

		likelihoods = np.ones((resolution,resolution,resolution,resolution))
		max_like = 0.

		for ii in np.arange(resolution):
			if ii%2 == 0:
				print np.round((float(ii) / resolution) * 100.,2), "% Done"
			for jj in np.arange(resolution):
				for kk in np.arange(resolution):
					for ll in np.arange(resolution):
						theta = k1_par[ii], k2_par[jj], x1_par[kk], x2_par[ll]
						likelihoods[ii,jj,kk,ll] = np.exp(lnlike(theta,ssfr,snr,snr_err))
						if likelihoods[ii,jj,kk,ll] > max_like:
							max_like = likelihoods[ii,jj,kk,ll]
							theta_max = k1_par[ii], k2_par[jj], x1_par[kk], x2_par[ll]
							#print "New max like:", max_like
							#print theta_max, "\n"
		likelihoods /= np.sum(likelihoods)
		output = open(root_dir + 'Data/MCMC_piecewise_grid.pkl','wb')
		parameters = k1_par, k2_par, x1_par, x2_par
		result = resolution, likelihoods, parameters, theta_max
 		pick.dump(result,output)
 		output.close()

	k1_like = np.zeros(resolution)
	k2_like = np.zeros(resolution)
	x1_like = np.zeros(resolution)
	x2_like = np.zeros(resolution)
	for ii in np.arange(resolution):
		k1_like[ii]    = np.sum(likelihoods[ii,:,:,:])
		k2_like[ii]    = np.sum(likelihoods[:,ii,:,:])
		x1_like[ii]    = np.sum(likelihoods[:,:,ii,:])
		x2_like[ii]    = np.sum(likelihoods[:,:,:,ii])
	
	'''
	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(k1_par,k1_like,'x')
	plt.xlabel('slope')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(k2_par,k2_like,'x')
	plt.xlabel('offset')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(x1_par,x1_like,'x')
	plt.xlabel('x1')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(x2_par,x2_like,'x')
	plt.xlabel('x2')
	'''
	
	# These are the marginalised maximum likelihood parameters
	k1_fit = k1_par[np.argmax(k1_like)]
	k2_fit = k2_par[np.argmax(k2_like)]
	x1_fit = x1_par[np.argmax(x1_like)]
	x2_fit = x2_par[np.argmax(x2_like)]

	print "ML parameters:"
	#theta_pass = k1_fit, k2_fit, x1_fit, x2_fit
	theta_pass = theta_max
	print theta_pass
	return theta_pass
Пример #6
0
4) Calculate the derivatives needed for Fisher sum
5) Calculate Fisher sum by doing (N'*(C**2))*N where N is the numerical derivative matrix and
C is the inverse of the diagonal covariance matrix.  Or alternatively just sum over all the
individual components of the FIsher matrix. 
'''

start = timer() #To track program runtime

#---------------------------------- INPUTS -----------------------------------
#1) The function that we wish to take derivatives of
def inputFunction(dataPoints,baseParameters):

    return nicelog.nicelog_snr(np.log10(dataPoints),baseParameters)

#2) The datapoints where we wish to know the derivative
ssfr, snr, snr_err = util.read_data()
dataPoints = ssfr
dataPointsUncertainties = snr_err

#3) The parameter values of the function


#baseParameters = np.array([5.222222e-14, 0.2272727, 1.30909e-10, 0.9449494],dtype=np.float64)
#baseParametersOriginal = np.array([5.222222e-14, 0.2272727, 1.309090e-10, 0.9449494],dtype=np.float64)

baseParameters = np.array([1.1183673469387756e-13, 0.49081632653061219, 1.6653061224489797e-10, 0.7265306122448979],dtype=np.float64)
baseParametersOriginal = np.array([1.1183673469387756e-13, 0.49081632653061219, 1.6653061224489797e-10, 0.7265306122448979],dtype=np.float64)

#----------------------------- FUNCTION DEFINITIONS---------------------------
def derivativeNumerical(function,stepSize,parameterNumber):
    
Пример #7
0
def run_grid():
    if util.does_grid_exist(model_name, root_dir):
        print 'Grid already exists, using existing grid...'
        resolution, likelihoods, parameters, theta_max = util.read_grid(
            model_name, root_dir)
        k1_par, k2_par, s1_par, s2_par, sa_par = parameters
    else:
        print 'Grid does not exist, computing grid...'

        resolution = 40
        '''
		# These limits find a decent fit with r.chi2 1.55 
		k1_min, k1_max = 3e-13, 6.6e-13
		k2_min, k2_max = 3e-11, 9.5e-11
		ssfr1_min, ssfr1_max = 2e-10, 4e-10
		ssfr2_min, ssfr2_max = 2e-12, 1.35e-10
		ssfra_min, ssfra_max = 4e-4, 9.5e-4
		'''

        #theta_pass = 4.628e-13, 2e-4, 1./(0.5e9), 1.008e-11, 2./(12e9)
        k1_min, k1_max = 3e-13, 6e-13
        k2_min, k2_max = 3e-5, 4e-4
        ssfr1_min, ssfr1_max = 2e-10, 6e-10
        ssfr2_min, ssfr2_max = 1.5e-11, 5.8e-11
        ssfra_min, ssfra_max = 5.2e-11, 7.e-10

        # Reading in data
        ssfr, snr, snr_err = util.read_data()

        k1_par = np.linspace(k1_min, k1_max, resolution)
        k2_par = np.linspace(k2_min, k2_max, resolution)
        s1_par = np.linspace(ssfr1_min, ssfr1_max, resolution)
        s2_par = np.linspace(ssfr2_min, ssfr2_max, resolution)
        sa_par = np.linspace(ssfra_min, ssfra_max, resolution)

        # Adding another point by hand
        #4.628e-13, 6.105e-11, 2.885e-10, 1.008e-11, 6.100e-4
        #k1_par = np.sort(np.append(k1_par,4.628e-13))
        #k2_par = np.sort(np.append(k2_par,6.105e-11))
        #s1_par = np.sort(np.append(s1_par,2.885e-10))
        #s2_par = np.sort(np.append(s2_par,1.008e-11))
        #sa_par = np.sort(np.append(sa_par,6.100e-4))
        #resolution += 1

        likelihoods = np.ones(
            (resolution, resolution, resolution, resolution, resolution))
        max_like = 0.

        for ii in np.arange(resolution):
            if ii % 2 == 0:
                print np.round((float(ii) / resolution) * 100., 2), "% Done"
            for jj in np.arange(resolution):
                for kk in np.arange(resolution):
                    for ll in np.arange(resolution):
                        for mm in np.arange(resolution):
                            theta = k1_par[ii], k2_par[jj], s1_par[kk], s2_par[
                                ll], sa_par[mm]
                            likelihoods[ii, jj, kk, ll, mm] = np.exp(
                                lnlike(theta, ssfr, snr, snr_err))
                            if likelihoods[ii, jj, kk, ll, mm] > max_like:
                                max_like = likelihoods[ii, jj, kk, ll, mm]
                                theta_max = k1_par[ii], k2_par[jj], s1_par[
                                    kk], s2_par[ll], sa_par[mm]
                                #print "New max like:", max_like
                                #print theta_max, "\n"
        likelihoods /= np.sum(likelihoods)
        output = open(root_dir + 'Data/MCMC_abnew_grid.pkl', 'wb')
        parameters = k1_par, k2_par, s1_par, s2_par, sa_par
        result = resolution, likelihoods, parameters, theta_max
        pick.dump(result, output)
        output.close()

    k1_like = np.zeros(resolution)
    k2_like = np.zeros(resolution)
    s1_like = np.zeros(resolution)
    s2_like = np.zeros(resolution)
    sa_like = np.zeros(resolution)
    for ii in np.arange(resolution):
        k1_like[ii] = np.sum(likelihoods[ii, :, :, :, :])
        k2_like[ii] = np.sum(likelihoods[:, ii, :, :, :])
        s1_like[ii] = np.sum(likelihoods[:, :, ii, :, :])
        s2_like[ii] = np.sum(likelihoods[:, :, :, ii, :])
        sa_like[ii] = np.sum(likelihoods[:, :, :, :, ii])
    '''
	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(k1_par,k1_like,'x')
	plt.xlabel('k1')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(k2_par,k2_like,'x')
	plt.xlabel('k2')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(s1_par,s1_like,'x')
	plt.xlabel('ssfr1')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(s2_par,s2_like,'x')
	plt.xlabel('ssfr2')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(sa_par,sa_like,'x')
	plt.xlabel('ssfra')
	'''

    # These are the marginalised maximum likelihood parameters
    k1_fit = k1_par[np.argmax(k1_like)]
    k2_fit = k2_par[np.argmax(k2_like)]
    s1_fit = s1_par[np.argmax(s1_like)]
    s2_fit = s2_par[np.argmax(s2_like)]
    sa_fit = sa_par[np.argmax(sa_like)]

    print "ML parameters:"
    #theta_pass = k1_fit, k2_fit, s1_fit, s2_fit, sa_fit
    theta_pass = theta_max
    print theta_pass
    return theta_pass
Пример #8
0
def run_grid():
	if util.does_grid_exist(model_name,root_dir):
		print 'Grid already exists, using existing grid...'
		resolution, likelihoods, parameters, theta_max = util.read_grid(model_name,root_dir)
		k2_par, s1_par, s2_par, sa_par = parameters
	else:
		print 'Grid does not exist, computing grid...'
	
		resolution = 100

		#theta_pass = 4.628e-13, 2e-4, 1./(0.5e9), 1.008e-11, 2./(12e9)
		k2_min, k2_max = 4e-5, 1.2e-4
		ssfr1_min, ssfr1_max = 1e-8, 5e-7
		ssfr2_min, ssfr2_max = 7e-12, 2.e-10
		ssfra_min, ssfra_max = 2e-10, 1.e-9

		# Reading in data
		ssfr, snr, snr_err = util.read_data()

		k2_par = np.linspace(k2_min,k2_max,resolution)
		s1_par = np.linspace(ssfr1_min,ssfr1_max,resolution)
		s2_par = np.linspace(ssfr2_min,ssfr2_max,resolution)
		sa_par = np.linspace(ssfra_min,ssfra_max,resolution)

		likelihoods = np.ones((resolution,resolution,resolution,resolution))
		max_like = 0.

		for ii in np.arange(resolution):
			if ii%2 == 0:
				print np.round((float(ii) / resolution) * 100.,2), "% Done"
			for jj in np.arange(resolution):
				for kk in np.arange(resolution):
					for ll in np.arange(resolution):
						theta = k2_par[ii], s1_par[jj], s2_par[kk], sa_par[ll]
						likelihoods[ii,jj,kk,ll] = np.exp(lnlike(theta,ssfr,snr,snr_err))
						if likelihoods[ii,jj,kk,ll] > max_like:
							max_like = likelihoods[ii,jj,kk,ll]
							theta_max = k2_par[ii], s1_par[jj], s2_par[kk], sa_par[ll]
							#print "New max like:", max_like
							#print theta_max, "\n"
		likelihoods /= np.sum(likelihoods)
		output = open(root_dir + 'Data/MCMC_shortabnew_grid.pkl','wb')
		parameters = k2_par, s1_par, s2_par, sa_par
		result = resolution, likelihoods, parameters, theta_max
 		pick.dump(result,output)
 		output.close()

	k2_like = np.zeros(resolution)
	s1_like = np.zeros(resolution)
	s2_like = np.zeros(resolution)
	sa_like = np.zeros(resolution)
	for ii in np.arange(resolution):
		k2_like[ii] = np.sum(likelihoods[ii,:,:,:])
		s1_like[ii] = np.sum(likelihoods[:,ii,:,:])
		s2_like[ii] = np.sum(likelihoods[:,:,ii,:])
		sa_like[ii] = np.sum(likelihoods[:,:,:,ii])
	
	'''
	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(k2_par,k2_like,'x')
	plt.xlabel('k2')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(s1_par,s1_like,'x')
	plt.xlabel('ssfr1')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(s2_par,s2_like,'x')
	plt.xlabel('ssfr2')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(sa_par,sa_like,'x')
	plt.xlabel('ssfra')
	'''

	# These are the marginalised maximum likelihood parameters
	k2_fit = k2_par[np.argmax(k2_like)]
	s1_fit = s1_par[np.argmax(s1_like)]
	s2_fit = s2_par[np.argmax(s2_like)]
	sa_fit = sa_par[np.argmax(sa_like)]

	print "ML parameters:"
	#theta_pass = k2_fit, s1_fit, s2_fit, sa_fit
	theta_pass = theta_max
	print theta_pass
	return theta_pass
Пример #9
0
def run_grid():
    if util.does_grid_exist(model_name, root_dir):
        print 'Grid already exists, using existing chains...'
        resolution, likelihoods, parameters, theta_max = util.read_grid(
            model_name, root_dir)
        a_par, b_par, c_par, d_par = parameters
    else:
        print 'Grid does not exist, computing grid...'

        resolution = 100
        '''
		# These limits give a pretty decent overview of this local extrema
		a_min, a_max = 1e-13, 9e-13
		b_min, b_max = 1e-17, 6e-14
		c_min, c_max = 0.1e-10, 1e-9
		d_min, d_max = 1e9, 50e9
		'''

        # These limits focus in on the local extrema above
        a_min, a_max = 2e-13, 6e-13
        b_min, b_max = 6e-15, 6e-14
        c_min, c_max = 6e-11, 3e-10
        d_min, d_max = 7e9, 80e9

        # Reading in data
        logssfr, logsnr, snr_err = util.read_data()

        a_par = np.linspace(a_min, a_max, resolution)
        b_par = np.linspace(b_min, b_max, resolution)
        c_par = np.linspace(c_min, c_max, resolution)
        d_par = np.linspace(d_min, d_max, resolution)

        likelihoods = np.ones((resolution, resolution, resolution, resolution))
        max_like = 0.

        for ii in np.arange(resolution):
            if ii % 5 == 0:
                print np.round((float(ii) / resolution) * 100., 2), "% Done"
            for jj in np.arange(resolution):
                for kk in np.arange(resolution):
                    for ll in np.arange(resolution):
                        theta = a_par[ii], b_par[jj], c_par[kk], d_par[ll]
                        likelihoods[ii, jj, kk, ll] = np.exp(
                            lnlike(theta, logssfr, logsnr, snr_err))
                        if likelihoods[ii, jj, kk, ll] > max_like:
                            max_like = likelihoods[ii, jj, kk, ll]
                            theta_max = a_par[ii], b_par[jj], c_par[kk], d_par[
                                ll]
                            #print "New max like:", max_like
                            #print theta_max, "\n"
        likelihoods /= np.sum(likelihoods)
        output = open(root_dir + 'Data/MCMC_sigmoid_grid.pkl', 'wb')
        parameters = a_par, b_par, c_par, d_par
        result = resolution, likelihoods, parameters, theta_max
        pick.dump(result, output)
        output.close()

    a_like = np.zeros(resolution)
    b_like = np.zeros(resolution)
    c_like = np.zeros(resolution)
    d_like = np.zeros(resolution)
    for ii in np.arange(resolution):
        a_like[ii] = np.sum(likelihoods[ii, :, :, :])
        b_like[ii] = np.sum(likelihoods[:, ii, :, :])
        c_like[ii] = np.sum(likelihoods[:, :, ii, :])
        d_like[ii] = np.sum(likelihoods[:, :, :, ii])
    '''
	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(a_par,a_like)
	plt.xlabel('a')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(b_par,b_like)
	plt.xlabel('b')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(c_par,c_like)
	plt.xlabel('c')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(d_par,d_like)
	plt.xlabel('d')
	'''

    a_fit = a_par[np.argmax(a_like)]
    b_fit = b_par[np.argmax(b_like)]
    c_fit = c_par[np.argmax(c_like)]
    d_fit = d_par[np.argmax(d_like)]

    print "ML parameters:"
    theta_pass = a_fit, b_fit, c_fit, d_fit
    print theta_pass
    return theta_max
Пример #10
0
def run_grid():
	if util.does_grid_exist(model_name,root_dir):
		print 'Grid already exists, using existing grid...'
		resolution, likelihoods, parameters, theta_max = util.read_grid(model_name,root_dir)
		a_par, k_par, s0_par, alpha_par = parameters
	else:
		print 'Grid does not exist, computing grid...'
	
		resolution = 50
		#theta_pass = 4.2e-14, 0.272, 3.8e-11, 0.9
		a_min, a_max = 2e-14, 20e-14
		k_min, k_max = 0.05, 1.4
		s0_min, s0_max = 1e-11, 60e-11
		alpha_min, alpha_max = 0.3, 1.4

		#a_min, a_max = 2e-14, 13e-14
		#k_min, k_max = 0.05, 2
		#s0_min, s0_max = 1e-11, 20e-11
		#alpha_min, alpha_max = 0.55, 1.4

		# Reading in data
		ssfr, snr, snr_err = util.read_data()

		a_par = np.linspace(a_min,a_max,resolution)
		k_par = np.linspace(k_min,k_max,resolution)
		s0_par = np.linspace(s0_min,s0_max,resolution)
		alpha_par = np.linspace(alpha_min,alpha_max,resolution)

		likelihoods = np.ones((resolution,resolution,resolution,resolution))
		max_like = 0.

		for ii in np.arange(resolution):
			if ii%2 == 0:
				print np.round((float(ii) / resolution) * 100.,2), "% Done"
			for jj in np.arange(resolution):
				for kk in np.arange(resolution):
					for ll in np.arange(resolution):
						theta = a_par[ii], k_par[jj], s0_par[kk], alpha_par[ll]
						likelihoods[ii,jj,kk,ll] = np.exp(lnlike(theta,ssfr,snr,snr_err))
						if likelihoods[ii,jj,kk,ll] > max_like:
							max_like = likelihoods[ii,jj,kk,ll]
							theta_max = a_par[ii], k_par[jj], s0_par[kk], alpha_par[ll]
							#print "New max like:", max_like
							#print theta_max, "\n"
		likelihoods /= np.sum(likelihoods)
		output = open(root_dir + 'Data/MCMC_nicelog_grid.pkl','wb')
		parameters = a_par, k_par, s0_par, alpha_par
		result = resolution, likelihoods, parameters, theta_max
 		pick.dump(result,output)
 		output.close()

	a_like = np.zeros(resolution)
	k_like = np.zeros(resolution)
	s0_like = np.zeros(resolution)
	alpha_like = np.zeros(resolution)
	for ii in np.arange(resolution):
		a_like[ii]    = np.sum(likelihoods[ii,:,:,:])
		k_like[ii]    = np.sum(likelihoods[:,ii,:,:])
		s0_like[ii]    = np.sum(likelihoods[:,:,ii,:])
		alpha_like[ii]    = np.sum(likelihoods[:,:,:,ii])
	
	yes_chainconsumer = False
	if yes_chainconsumer:
		print "Defining chainconsumer"
		c = ChainConsumer()
		print "Adding chain"
		c.add_chain([a_par, k_par, s0_par, alpha_par], parameters=["a","k","s0","alpha"],weights=likelihoods,grid=True)
		print "Doing plot"
		fig = c.plotter.plot()

	'''
	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(a_par,a_like,'x')
	plt.xlabel('a')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(k_par,k_like,'x')
	plt.xlabel('k')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(s0_par,s0_like,'x')
	plt.xlabel('ssfr0')

	plt.figure()
	ax = plt.subplot()
	ax.set_xscale("log")
	plt.plot(alpha_par,alpha_like,'x')
	plt.xlabel('alpha')
	'''
	
	# These are the marginalised maximum likelihood parameters
	a_fit = a_par[np.argmax(a_like)]
	k_fit = k_par[np.argmax(k_like)]
	s0_fit = s0_par[np.argmax(s0_like)]
	alpha_fit = alpha_par[np.argmax(alpha_like)]

	print "ML parameters:"
	#theta_pass = a_fit, k_fit, s0_fit, alpha_fit
	theta_pass = theta_max
	print theta_pass
	return theta_pass
Пример #11
0
def bootstrap_uncertainties():
	
	time_start = time.time()
	
	a_min, a_max = 0.1e-13, 4.0e-13
	k_min, k_max = 0.1, 2.0
	s0_min, s0_max = 0.05e-10, 7e-10
	alpha_min, alpha_max = 0.1, 1.2

	n_runs = 400
	pars = np.zeros((4,n_runs))

	ssfr, snr, snr_err = util.read_data()

	ndata = len(ssfr)
	index_array = np.arange(ndata)

	#plt.figure()
	#ax = plt.subplot()
	counter = 0
	while counter < n_runs:
		ii = counter
		print ii
		#snr_shift = snr + np.random.normal(size=len(snr)) * snr_err

		indices = np.random.choice(index_array,size=ndata,replace=True)

		ssfr_sub = ssfr[indices]
		snr_sub = snr[indices]
		snr_err_sub = snr_err[indices]
		pars[:,ii] = run_grid_fast(ssfr_sub, snr_sub, snr_err_sub)
		if np.isclose(pars[:,ii][3]-alpha_min,0.):
			counter -= 1

		#util.plot_data(root_dir, model_name, pars[:,ii], nicelog_snr)
		#plt.show()
		#plt.plot(np.log10(ssfr_sub),snr_sub,'bo',alpha=0.05)
		counter += 1
	#plt.errorbar(np.log10(ssfr),snr,yerr=snr_err,fmt='o',color='k')
	#plt.xlabel('log(sSFR)',size='large')
	#plt.ylabel('sSNR',size='large')
	#ax.set_yscale("log")
	#plt.xlim((-13,-8))
	#plt.ylim((5e-15,6e-12))
		
	
	plt.figure()
	plt.hist(pars[0,:],bins=10,range=(a_min,a_max))
	plt.xlabel('a')

	plt.figure()
	plt.hist(pars[1,:],bins=10,range=(k_min,k_max))
	plt.xlabel('k')

	plt.figure()
	plt.hist(pars[2,:],bins=10,range=(s0_min,s0_max))
	plt.xlabel('ssfr0')

	plt.figure()
	plt.hist(pars[3,:],bins=10,range=(alpha_min,alpha_max))
	plt.xlabel('alpha')
	
	runtime = time.time() - time_start
	np.savetxt('bootstrap_parameters.txt',pars.T)
	np.savetxt('runtime.txt',np.array([runtime]),fmt='%4.2f')
Пример #12
0
def run_grid():
    if util.does_grid_exist(model_name, root_dir):
        print 'Grid already exists, using existing grid...'
        resolution, likelihoods, parameters, theta_max = util.read_grid(
            model_name, root_dir)
        k1_par, k2_par, s1_par, s2_par, alpha_par = parameters
    else:
        print 'Grid does not exist, computing grid...'

        resolution = 30

        #k1_min, k1_max = 1.e-12, 4.5e-12
        #k2_min, k2_max = 4e-4, 9e-4
        #ssfr1_min, ssfr1_max = 1e-10, 8e-10
        #ssfr2_min, ssfr2_max = 1.5e-11, 5.5e-11
        #alpha_min, alpha_max = 0.01, 0.15

        k1_min, k1_max = 1.e-12, 4.5e-12
        k2_min, k2_max = 1e-4, 5e-4
        ssfr1_min, ssfr1_max = 9e-10, 2e-9
        ssfr2_min, ssfr2_max = 2.5e-11, 6.e-11
        alpha_min, alpha_max = 0.01, 0.15

        # Reading in data
        ssfr, snr, snr_err = util.read_data()

        k1_par = np.linspace(k1_min, k1_max, resolution)
        k2_par = np.linspace(k2_min, k2_max, resolution)
        s1_par = np.linspace(ssfr1_min, ssfr1_max, resolution)
        s2_par = np.linspace(ssfr2_min, ssfr2_max, resolution)
        alpha_par = np.linspace(alpha_min, alpha_max, resolution)

        likelihoods = np.ones(
            (resolution, resolution, resolution, resolution, resolution))
        max_like = 0.

        for ii in np.arange(resolution):
            if ii % 2 == 0:
                print np.round((float(ii) / resolution) * 100., 2), "% Done"
            for jj in np.arange(resolution):
                for kk in np.arange(resolution):
                    for ll in np.arange(resolution):
                        for mm in np.arange(resolution):
                            theta = k1_par[ii], k2_par[jj], s1_par[kk], s2_par[
                                ll], alpha_par[mm]
                            likelihoods[ii, jj, kk, ll, mm] = np.exp(
                                lnlike(theta, ssfr, snr, snr_err))
                            if likelihoods[ii, jj, kk, ll, mm] > max_like:
                                max_like = likelihoods[ii, jj, kk, ll, mm]
                                theta_max = k1_par[ii], k2_par[jj], s1_par[
                                    kk], s2_par[ll], alpha_par[mm]
                                #print "New max like:", max_like
                                #print theta_max, "\n"
        likelihoods /= np.sum(likelihoods)
        output = open(root_dir + 'Data/MCMC_abnewalpha_grid.pkl', 'wb')
        parameters = k1_par, k2_par, s1_par, s2_par, alpha_par
        result = resolution, likelihoods, parameters, theta_max
        pick.dump(result, output)
        output.close()

    k1_like = np.zeros(resolution)
    k2_like = np.zeros(resolution)
    s1_like = np.zeros(resolution)
    s2_like = np.zeros(resolution)
    alpha_like = np.zeros(resolution)
    for ii in np.arange(resolution):
        k1_like[ii] = np.sum(likelihoods[ii, :, :, :, :])
        k2_like[ii] = np.sum(likelihoods[:, ii, :, :, :])
        s1_like[ii] = np.sum(likelihoods[:, :, ii, :, :])
        s2_like[ii] = np.sum(likelihoods[:, :, :, ii, :])
        alpha_like[ii] = np.sum(likelihoods[:, :, :, :, ii])

    plt.figure()
    ax = plt.subplot()
    ax.set_xscale("log")
    plt.plot(k1_par, k1_like, 'x')
    plt.xlabel('k1')

    plt.figure()
    ax = plt.subplot()
    ax.set_xscale("log")
    plt.plot(k2_par, k2_like, 'x')
    plt.xlabel('k2')

    plt.figure()
    ax = plt.subplot()
    ax.set_xscale("log")
    plt.plot(s1_par, s1_like, 'x')
    plt.xlabel('ssfr1')

    plt.figure()
    ax = plt.subplot()
    ax.set_xscale("log")
    plt.plot(s2_par, s2_like, 'x')
    plt.xlabel('ssfr2')

    plt.figure()
    ax = plt.subplot()
    #ax.set_xscale("log")
    plt.plot(alpha_par, alpha_like, 'x')
    plt.xlabel('alpha')

    # These are the marginalised maximum likelihood parameters
    k1_fit = k1_par[np.argmax(k1_like)]
    k2_fit = k2_par[np.argmax(k2_like)]
    s1_fit = s1_par[np.argmax(s1_like)]
    s2_fit = s2_par[np.argmax(s2_like)]
    alpha_fit = alpha_par[np.argmax(alpha_like)]

    print "ML parameters:"
    #theta_pass = k1_fit, k2_fit, s1_fit, s2_fit, alpha_fit
    theta_pass = theta_max
    print theta_pass
    return theta_pass