示例#1
0
    def bayesian_regression(self, Methodology):
        
        fit_dict                = OrderedDict()
        
        fit_dict['methodology'] = r'Inference $\chi^{2}$ model'
        
        #Initial guess for the fitting:
        Np_lsf                  = polyfit(self.x_array, self.y_array, 1)
        m_0, n_0                = Np_lsf[0], Np_lsf[1]
                
        MCMC_dict               = self.lr_ChiSq(self.x_array, self.y_array, m_0, n_0)
        
        myMCMC                  = MCMC(MCMC_dict)
        
        myMCMC.sample(iter=10000, burn=1000)

        fit_dict['m'], fit_dict['n'], fit_dict['m_error'], fit_dict['n_error'] = myMCMC.stats()['m']['mean'], myMCMC.stats()['n']['mean'], myMCMC.stats()['m']['standard deviation'], myMCMC.stats()['n']['standard deviation']
        
        return fit_dict
示例#2
0
def dotheMCMC(x):
    '''
        Performs the Markov Chain Monte Carlo analysis to find the global 
        average of film runtimes and the deviation from that average for 
        different countries, languages and genres.
        
        Parameters
        ----------
        
        x: tuple
            x[0]: integer
                the year in which the films to be analysed were released.
        
            x[1]: pandas dataframe
                the dataframe containing all the movies released that year
        
        
        Returns
        -------
        
        stats: a pyMC2 stats dictionary
            this contains the results of the MCMC, i.e. the average, standard
            deviation and 95% confidence interval for each category and for the
            global average.
        
        group: pandas dataframe
            identical to the dataframe x[1]
        
        representedCountries: dictionary of arrays
            a dictionary of two elements: "same" and "diff". dict['same'] and
            dict['diff'] each contains an array of two element lists. Each
            pair is the name of a country and the number of times that country
            appears in the group dataframe for overlapping and non-overlapping
            writer/director respectively. The array is ordered by the number of
            appearances from smallest to largest.
        
        representedLanguages: dictionary of arrays
            as representedCountries but for languages
        
        representedGenres: dictionary of arrays
        as representedCountries but for genres
        
        numRepresented: integer
            the total number of movies released that year
        
        '''
    #get the parameters needed to initialize the model
    year, group =  x[0], x[1]
    representedCountries = get_represented(group, countries, 'Cou_')
    representedLanguages = get_represented(group, languages, 'Lan_')
    representedGenres = get_represented(group, genres, 'Gen_')
    
    numRepresented = representedCountries['same'].shape[0] + \
                        representedCountries['diff'].shape[0]
    numRepresented += representedLanguages['same'].shape[0] + \
                        representedLanguages['diff'].shape[0]
    numRepresented += representedGenres['same'].shape[0] + \
                        representedGenres['diff'].shape[0]
    
    #initialize the model in a pyMC object, then perform the MCMC
    mc=MCMC(film_model_by_year(str(year), group, representedCountries, \
                               representedLanguages, representedGenres, \
                               numRepresented))
    mc.sample(iter=300000, burn=75000, progress_bar=False)
    
    return {'stats':mc.stats(), 'year':year, 'countries':representedCountries, \
            'languages': representedLanguages, 'genres':representedGenres, \
            'num': numRepresented}
示例#3
0
 def test_stats_after_reload(self):
     db = database.pickle.load('MCMC.pickle')
     M2 = MCMC(DisasterModel, db=db)
     M2.stats()
     db.close()
     os.remove('MCMC.pickle')
#   value for each parameter, as well as the 95% confidence interval.


# plot function takes the model (or a single parameter) as an argument:
Matplot.plot(M)
plt.show()


# ### Making inferences about model parameters ###
# The *stats()* function provides an interface to the statistics of our posterior,  
# in the form of a dictionary.  For example, let's find the predicted ratio between  
# effective sizes of the disk and the bulge, and let's also explore how confidently  
# we can determine the effective surface brightness of the disk.

print 'R_effective (bulge) / R_effective (disk) =', \
       M.stats()['r_e_B']['mean'] / M.stats()['r_e_D']['mean']
print 'Effective surface brightness of the bulge: \n', \
       '    Best-fit value:', M.stats()['M_e_B']['mean'], \
       '\n    95% Confidence interval:', M.stats()['M_e_B']['quantiles'][2.5], \
        'to', M.stats()['M_e_B']['quantiles'][97.5]


# ### Visualizing specific realizations of our model ###
# The *trace()* method presents the values of a variable for all of the saved  
# Markov Chain steps. Let's plot up several of these traces, and see how  
# the model changes with different parameter values.

for i in range(50):
    plt.plot(M.r.value, M.trace('SB')[i], c='gray', alpha=.25)

plt.scatter(M.r.value, M.mags.value, c='r') 
示例#5
0
 def test_stats_after_reload(self):
     db = database.pickle.load("MCMC.pickle")
     M2 = MCMC(disaster_model, db=db)
     M2.stats()
     db.close()
     os.remove("MCMC.pickle")
from pymc import MCMC
import numpy as np
from pythonMCMC import pymcCrater
from pymc.Matplot import plot
from pylab import hist, show,draw

M = MCMC(pymcCrater)
M.sample(iter=10000, burn=700, thin=5)

print M.trace('lnlike')[:]
print M.stats()
plot(M)
show()
示例#7
0
文件: hw11.py 项目: r-b-g-b/AY250_HW
print """Beta distribution with alpha=%.4f and beta=%.4f yields mu=%.4f and sigma^2=%.4f
     """ % (M.alpha, M.beta, B.mean(), B.var())

# ## (b) Draw samples from the posterior
M.sample(20000, burn=2000, thin=20)

# ## (c) Check convergence of MCMC by plotting traces
fig, axs = plt.subplots(1, 3, figsize=(12, 4));
for i in range(3):
    axs[i].plot(M.avg.trace[:, i]);
    axs[i].set_title('Player %u' % i);
axs[0].set_ylabel('Batting average');
axs[1].set_xlabel('Sample');

# ## (d) Posterior mean and 95% CI for each player
avg_mcmc_mean = M.stats()['avg']['mean']
avg_mcmc_ci = M.stats()['avg']['95% HPD interval']

print
print 'MCMC mean for each player'
for m, ci in zip(avg_mcmc_mean, avg_mcmc_ci):
    print 'Mean: %.4f\tCI: (%.4f, %.4f)' % (m, ci[0], ci[1])

# transform confidence intervals for plotting
avg_mcmc_ci[:, 0] = avg_mcmc_mean - avg_mcmc_ci[:, 0]
avg_mcmc_ci[:, 1] = avg_mcmc_ci[:, 1] - avg_mcmc_mean

# ## (e) Full-season batting average versus MLE from April
df_full = pd.read_csv('laa_2011_full.txt', sep='\t')
avg_mle_full = df_full.H / df_full.AB.astype(float)
示例#8
0
import coal_disaster
from pymc import MCMC
from pylab import hist, show
from pymc.Matplot import plot,pyplot

__author__ = 'auroua'

M = MCMC(coal_disaster)
print M.switchpoint.value

M.sample(iter=10000, burn=1000, thin=10)
# print len(M.trace('switchpoint')[:])
# hist(M.trace('late_mean')[:])
# show()
plot(M)
M.stats()
#!/usr/bin/env python
import two_normal_model
from pymc import MCMC
from pymc.Matplot import plot

# do posterior sampling
m = MCMC(two_normal_model)
m.sample(iter=100000, burn=1000)
print(m.stats())

import numpy
for p in ['mean1', 'mean2', 'std_dev', 'theta']:
    numpy.savetxt("%s.trace" % p, m.trace(p)[:])

# draw some pictures
plot(m)
from pymc import MCMC
from pymc.Matplot import plot
import numpy as np

import small_model as model

A = MCMC(model)
A.sample(iter=5000)
plot(A, suffix='-gamma')

print '%s prior' % model.prior
print[(x, A.stats()[x]['mean']) for x in A.stats()]
error = (1 - A.stats()['ABp']['mean']) * 400 + A.stats(
)['CAp']['mean'] * 600 + A.stats()['CBp']['mean'] * 1000 - 200
print 'Error: %s' % error
示例#11
0
 def test_stats_after_reload(self):
     db = database.pickle.load('MCMC.pickle')
     M2 = MCMC(DisasterModel, db=db)
     M2.stats()
     db.close()
     os.remove('MCMC.pickle')
示例#12
0
import model
from pymc import MCMC
import pprint
import sys, os

# Run sampling for 40000 iterations, with a burn-in of 2000 iterations and thinning for every 10 iterations.
M = MCMC(model)
print M

sys.exit()
M.sample(iter=40000, burn=5000, thin=10)

# Refer to sample_output.txt for example of posterior sampling summary.
pprint.pprint(M.stats())