def DisplayData(): """Displays the Proton-Proton Cross Section Data.""" pyt.figsize(12,7) plt.loglog(E_cm,Sig,ls=' ',marker='.',markersize=3,color='black',label='PDG Data') plt.loglog(eE_cm,ESig,ls=' ',marker='.',markersize=3,color='black') plt.loglog(E_cm[90:],func(E_cm[90:],popt2[0],popt2[1],popt2[2],popt2[3],popt2[4],popt2[5],popt2[6]),color='blue') plt.loglog(E_cm,func(E_cm,popt[0],popt[1],popt[2],popt[3],popt[4],popt[5],popt[6]),color='blue',label='Fit') plt.scatter(7000,70.5,label='TOTEM EPL 101 21003',color='red') plt.scatter([2760,7000],[62.1,72.7],label='ALICE 2011',color='blue') plt.scatter([7000,8000],[72.9,74.7],label='TOTEM 2013',color='green') plt.errorbar([2760,7000,7000,7000,8000],[62.1,70.5,72.7,72.9,74.7],yerr=[5.9,3.4,6.2,1.5,1.7],fmt=' ',color='black') plt.loglog(E_cm[90:],10*SigI(E_cm[90:])) plt.errorbar(E_cm,Sig,xerr=[E_cm-cm_min,cm_max-E_cm],yerr=[StEr_L,StEr_H],ms=.5,mew=0,fmt=None,ecolor='black') plt.errorbar(eE_cm,ESig,xerr=[eE_cm-ecm_min,ecm_max-eE_cm],yerr=[EStEr_L,EStEr_H],ms=.5,mew=0,fmt=None,ecolor='black') plt.annotate("Total",fontsize=11,xy=(7,46),xytext=(7,46)) plt.annotate("Elastic",fontsize=11,xy=(1000,10),xytext=(1000,10)) plt.annotate("Inelastic",fontsize=11,xy=(35,25),xytext=(35,25)) plt.title("pp Cross Section Data",fontsize=16) plt.ylabel("Cross Section [mb]",fontsize=12) plt.xlabel("$\sqrt{s}\,[GeV]$",fontsize=16) plt.ylim(1,400) plt.grid(which='minor',axis='y') plt.grid(which='major',axis='x') plt.legend(loc=4) plt.show()
def main(): N = 100 p = pm.Uniform("freq_cheating", 0, 1) true_answers = pm.Bernoulli("truths", p, size=N) first_coin_flips = pm.Bernoulli("first_flips", 0.5, size=N) second_coin_flips = pm.Bernoulli("second_flips", 0.5, size=N) @pm.deterministic def observed_proportion(t_a=true_answers, fc=first_coin_flips, sc=second_coin_flips): result = t_a & fc | ~fc & sc return float(sum(result)) / len(result) X = 35 observations = pm.Binomial("obs", N, observed_proportion, value=X, observed=True) model = pm.Model([p, true_answers, first_coin_flips, second_coin_flips, observed_proportion, observations]) # To be explained in Chapter 3! mcmc = pm.MCMC(model) mcmc.sample(40000, 15000) figsize(12.5, 3) p_trace = mcmc.trace("freq_cheating")[:] plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85, bins=30, label="posterior distribution", color="#348ABD") plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3) plt.xlim(0, 1) plt.legend() plt.show()
def CustomPlot(): import json import matplotlib from IPython.core.pylabtools import figsize s = json.load( open("../styles/matplotlibrc.json") ) matplotlib.rcParams.update(s) figsize(18, 6)
def roll_mean_std(self, df, xaxis, yaxis, xlabel_name, width): figsize(13.5,8) df = df fig, ax1 = plt.subplots() ax1.plot(df[xaxis][0:96], pd.rolling_mean(df[yaxis][0:96], window = width), 'b-') ax1.set_xlabel(xlabel_name,fontsize = 24) plt.xticks(fontsize = 20) # Make the y-axis label and tick labels match the line color. ax1.set_ylabel('Rolling mean', color='b', fontsize = 24) plt.yticks(fontsize = 20) for tl in ax1.get_yticklabels(): tl.set_color('b') ax2 = ax1.twinx() ax2.plot(df[xaxis][0:96], pd.rolling_std(df[yaxis][0:96], window = width), 'r-') ax2.set_ylabel('Rolling standard deviation', color='r',fontsize = 24) plt.yticks(fontsize = 20) for tl in ax2.get_yticklabels(): tl.set_color('r') #- ax1.plot(df[xaxis], pd.rolling_mean(df[yaxis], window = width), 'b-') #----------------------------- ax1.set_xlabel(xlabel_name,fontsize = 24) #--------------------------------------------- plt.xticks(fontsize = 20) #--------- # Make the y-axis label and tick labels match the line color. #-------------- ax1.set_ylabel('Rolling mean', color='b', fontsize = 24) #--------------------------------------------- plt.yticks(fontsize = 20) #-------------------------------------- for tl in ax1.get_yticklabels(): #------------------------------------------------- tl.set_color('b') #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #----------------------------------------------------- ax2 = ax1.twinx() #-- ax2.plot(df[xaxis], pd.rolling_std(df[yaxis], window = width), 'r-') #- ax2.set_ylabel('Rolling standard deviation', color='r',fontsize = 24) #--------------------------------------------- plt.yticks(fontsize = 20) #-------------------------------------- for tl in ax2.get_yticklabels(): #------------------------------------------------- tl.set_color('r') #------------------------------------------------------- # plt.xlim(0,96) plt.xticks(fontsize = 20) ax1.yaxis.grid(False,'minor') ax1.xaxis.grid(False, 'minor') ax2.yaxis.grid(False,'major') ax2.xaxis.grid(False, 'major') plt.show() pass
def plot_confusion_matrix_fancy(conf_arr, title='Confusion matrix', names=[]): norm_conf = [] for i in conf_arr: a = 0 tmp_arr = [] a = sum(i, 0) for j in i: tmp_arr.append(float(j)/float(a)) norm_conf.append(tmp_arr) fig = plt.figure(figsize(10, 10)) plt.clf() ax = fig.add_subplot(111) ax.set_aspect(1) res = ax.imshow(np.array(norm_conf), cmap=plt.cm.Blues, interpolation='nearest') width = len(conf_arr) height = len(conf_arr[0]) for x in xrange(width): for y in xrange(height): ax.annotate(str(conf_arr[x][y]), xy=(y, x), horizontalalignment='center', verticalalignment='center') plt.title(title) cb = fig.colorbar(res) plt.xticks(range(width), names, rotation='vertical') plt.yticks(range(height), names) plt.savefig(DATASET_PATH+title+'.png', format='png', dpi=200)
def main(): figsize(12.5, 3.5) np.set_printoptions(precision=3, suppress=True) challenger_data = np.genfromtxt("challenger_data.csv", skip_header=1, usecols=[1, 2], missing_values="NA", delimiter=",") # drop the NA values challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])] # plot it, as a function of temperature (the first column) print "Temp (F), O-Ring failure?" print challenger_data plt.scatter(challenger_data[:, 0], challenger_data[:, 1], s=75, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside temperature (Fahrenheit)") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.show()
def main(): figsize(12.5, 4) parameters = [(10, .4), (10, .9)] colors = ["#348ABD", "#A60628"] for params, cols in zip(parameters, colors): N, p = params _x = np.arange(N + 1) plt.bar(_x - 0.5, stats.binom.pmf(_x, N, p), color=cols, edgecolor=cols, alpha=0.6, label="$N$: %d, $p$: %.1f" % (N, p), linewidth=3) plt.legend(loc="upper left") plt.xlim(0, 10.5) plt.xlabel("$k$") plt.ylabel("$P(X = k)$") plt.title("Probability mass distributions of binomial random variables") plt.show()
def main(): figsize(12, 3) x = np.linspace(-4, 4, 100) plt.plot(x, logistic(x, 1), label=r"$\beta = 1$", ls="--", lw=1, color='#332288') plt.plot(x, logistic(x, 3), label=r"$\beta = 3$", ls="--", lw=1, color='#117733') plt.plot(x, logistic(x, -5), label=r"$\beta = -5$", ls="--", lw=1, color='#882255') plt.plot(x, logistic(x, 1, 1), label=r"$\beta = 1, \alpha = 1$", color="#332288") plt.plot(x, logistic(x, 3, -2), label=r"$\beta = 3, \alpha = -2$", color="#117733") plt.plot(x, logistic(x, -5, 7), label=r"$\beta = -5, \alpha = 7$", color="#882255") plt.legend(loc="lower left") plt.show()
def plot_gridsearch(self,x, aspect = 3): scores=np.array(x) scores=scores[:, 1:].T # print scores #scores= scores[:,5:] print np.shape(scores) # print np.arange(100,2010,20) figsize(16,8) fig, ax = plt.subplots(1,1) cax = ax.imshow(scores, interpolation='none', origin='highest', cmap=plt.cm.coolwarm, aspect=aspect) plt.grid(b=True, which='x', color='white',linestyle='-') plt.xlim(0,96) plt.xticks((-0.5, 96.5), (100,2000), fontsize = 20) # plt.xticks(np.linspace(0,194,10), int(np.linspace(100,4000,10)), fontsize = 20) plt.yticks(np.arange(0,11,1), np.arange(1,12,1), fontsize = 20) plt.xlabel('Number of trees',fontsize = 24) plt.ylabel('Number of features', fontsize = 24) ax.yaxis.grid(False,'major') ax.xaxis.grid(False, 'major') cb = fig.colorbar(cax, orientation='horizontal', pad = 0.15, shrink=1, aspect=50) cb.ax.tick_params(labelsize=14) #----------------------------------------------------------- scores=np.array(df) #------------------------------------------------------ scores=scores[:, 2:13].T #-------------------- plt.imshow(scores, interpolation='None', origin='highest', #--------------------------------- cmap=plt.cm.coolwarm, aspect=3) #------------------------------------------------------------------------------ #-------------- cb = plt.colorbar(orientation='horizontal', shrink=1, aspect=50) #---------------------------------------------- #sns.axes_style(axes.grid=False) #-------------------------------------------------------------------- plt.show() plt.show()
def main(): figsize(12.5, 5) fig = plt.figure() jet = plt.cm.jet plt.subplot(121) x = y = np.linspace(0, 5, 100) X, Y = np.meshgrid(x, y) exp_x = stats.expon.pdf(x, scale=3) exp_y = stats.expon.pdf(x, scale=10) M = np.dot(exp_x[:, None], exp_y[None, :]) CS = plt.contour(X, Y, M) im = plt.imshow(M, interpolation='none', origin='lower', cmap=jet, extent=(0, 5, 0, 5)) plt.xlabel("prior on $p_1$") plt.ylabel("prior on $p_2$") plt.title("$Exp(3), Exp(10)$ prior landscape") ax = fig.add_subplot(122, projection='3d') ax.plot_surface(X, Y, M, cmap=jet) ax.view_init(azim=390) plt.title("$Exp(3), Exp(10)$ prior landscape; \nalternate view") plt.show()
import pandas as pd import numpy as np import matplotlib.pyplot as plt import sklearn.linear_model as skl from sklearn import metrics from sklearn.cross_validation import train_test_split from sklearn.linear_model import LogisticRegression #plt.style.use('seaborn') from IPython.core.pylabtools import figsize figsize(16, 8) np.seterr(divide='ignore', invalid='ignore') pd.options.mode.chained_assignment = None # default='warn' def plot_two(data, base_col, second_col, booked_col, nbins, title_prefix): f, (ax1, ax2) = plt.subplots(1, 2) plot_dict = {ax1: base_col, ax2: second_col} for ax, col in plot_dict.items(): h, bins, patches = ax.hist((data.loc[data[booked_col] == True][col], data.loc[data[booked_col] == False][col]), nbins) h_ratio = h[0].astype(float) / (h[1] + h[0]).astype(float) h_ratio[np.isnan(h_ratio)] = 0 axa = ax.twinx() axa.scatter((bins[0:-1] + bins[1:]) / 2.0, h_ratio, color='r')
# Calculate first and third quartile first_quartile = data['Site EUI (kBtu/ft²)'].describe()['25%'] third_quartile = data['Site EUI (kBtu/ft²)'].describe()['75%'] # Interquartile range iqr = third_quartile - first_quartile # Remove outliers data = data[(data['Site EUI (kBtu/ft²)'] > (first_quartile - 3 * iqr)) & (data['Site EUI (kBtu/ft²)'] < (third_quartile + 3 * iqr))] # Create a list of buildings with more than 100 measurements types = data.dropna(subset=['score']) types = types['Largest Property Use Type'].value_counts() types = list(types[types.values > 100].index) ''' # Plot of distribution of scores for building categories # Plot each building figsize(12, 10) for b_type in types: # Select the building type subset = data[data['Largest Property Use Type'] == b_type] # Density plot of Energy Star scores sns.kdeplot(subset['score'].dropna(), label=b_type, shade=False, alpha=0.8); plt.gca().legend(); # label the plot plt.xlabel('Energy Star Score', size=20); plt.ylabel('Density', size=20);
from IPython.core.pylabtools import figsize from matplotlib import pyplot as plt import scipy.stats as stats import numpy as np figsize(12.5, 4) a = np.arange(16) poi = stats.poisson lambda_ = [1.5, 4.25] colors = ["#348ABD", "#A60628"] plt.bar(a, poi.pmf(a, lambda_[0]), color=colors[0], label="$\lambda = %.1f$" % lambda_[0], alpha=0.60, edgecolor=colors[0], lw="3") plt.show()
# Methods for creating plots to visualise atomic arrangements import matplotlib.pyplot as plt import ase import ase.io from ase.visualize.plot import plot_atoms from ase import Atoms import numpy as np from mpl_toolkits import mplot3d from IPython.core.pylabtools import figsize figsize(5, 5) def plot_cell_ase(cell, title): """Use ase in-built function to create 2D plot NOTE: Not very nice for visualising a 3D structure and appears to always set cell vectors to positive values Args: cell (ase Atoms object): Ase atoms object for structure to visualise title (str): Set a title for the plot Returns: Produces figure of plot, originally used in development notebook with '%matplotlib inline' """ fig, ax = plt.subplots() plt.title(title) plot_atoms(cell, ax, radii=0.3, rotation=('0x,0y,0z')) def plot_cell_custom_3D(ase_cell, title): """Produces 3D plot of structure for a system
print(df.head(100)) #골든크로스 데드크로스 prev_key = prev_val = 0 for key, val in df['diff'][1:].iteritems(): if val == 0: continue if val * prev_val < 0 and val > prev_val: print '[golden]', key, val if val * prev_val < 0 and val < prev_val: print '[dead]', key, val prev_key, prev_val = key, val #골든크로 데드크로스 차트 표시 ax = df[['Adj Close', 'MA_5', 'MA_20']].plot(figsize(16,6)) prev_key = prev_val = 0 for key, val in df['diff'][1:].iteritems(): if val == 0: continue if val * prev_val < 0 and val > prev_val: ax.annotate('Golden', xy=(key, df['MA_20'][key]), xytext=(10,-30), textcoords='offset points', arrowprops=dict(arrowstyle='-|>')) if val * prev_val < 0 and val < prev_val: ax.annotate('Dead', xy=(key, df['MA_20'][key]), xytext=(10,30), textcoords='offset points', arrowprops=dict(arrowstyle='-|>')) prev_key, prev_val = key, val # plt.show()
%matplotlib inline import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris from IPython.core.pylabtools import figsize iris_data=load_iris() # Load the iris dataset figsize(12.5, 10) fig = plt.figure() fig.suptitle('Plots of Iris Dimensions', fontsize=14) fig.subplots_adjust(wspace=0.35,hspace=0.5) colors=('r','g','b') cols=[colors[i] for i in iris_data.target] def get_legend_data(clrs): leg_data = [] for clr in clrs: line=plt.Line2D(range(1),range(1),marker='o', color=clr) leg_data.append(line) return tuple(leg_data) def display_iris_dimensions(fig,x_idx, y_idx,sp_idx): ax = fig.add_subplot(3,2,sp_idx) ax.scatter(iris_data.data[:, x_idx], iris_data.data[:,y_idx],c=cols) ax.set_xlabel(iris_data.feature_names[x_idx]) ax.set_ylabel(iris_data.feature_names[y_idx]) leg_data = get_legend_data(colors) ax.legend(leg_data,iris_data.target_names, numpoints=1,
# In[1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np import scipy.stats as sct import seaborn as sns from statsmodels.distributions.empirical_distribution import ECDF # In[2]: # %matplotlib inline from IPython.core.pylabtools import figsize figsize(12, 8) sns.set() # ## Parte 1 # ### _Setup_ da parte 1 # In[3]: np.random.seed(42) dataframe = pd.DataFrame({ "normal": sct.norm.rvs(20, 4, size=10000), "binomial": sct.binom.rvs(100, 0.2, size=10000) })
try: File = open(sys.argv[1],'r') except: print ("File " + sys.argv[1] + " not found.") sys.exit(37) PicTempNames = [] for ii in File.readlines() : PicTempName = ii.split() PicTempNames.append(PicTempName) from IPython.core.pylabtools import figsize figsize(15,10) plt.rcParams['axes.labelsize'] = 25 plt.rcParams['axes.titlesize'] = 30 plt.rcParams['xtick.labelsize'] = 15 plt.rcParams['ytick.labelsize'] = 15 plt.rcParams['legend.fontsize'] = 30 plt.rcParams['lines.markersize'] = 15 #Load list of run ids and l3 rates. id_vs_l3 = np.loadtxt('runid_to_l3rate') # generate dictionary that maps runID to l3 rate id_to_l3 = {str(int(id_vs_l3[ii][0])) : int(id_vs_l3[ii][1]) for ii in range(0,len(id_vs_l3[:,1]))} #Set SIMBAD parameters
# %% import numpy as np import scipy.stats as stats from IPython.core.pylabtools import figsize import matplotlib.pyplot as plt figsize(12.5, 3) colors = ["#348ABD", "#A606628", "#7A68A6", "#467821"] x = np.linspace(0,1) y1, y2 = stats.beta.pdf(x, 1, 1), stats.beta.pdf(x, 10, 10) p = plt.plot(x, y1, label = 'An objective prior \n(uninformative, \n "Principle of Indifference")') plt.fill_between(x, 0, y1, color = p[0].get_color(), alpha = .3) p = plt.plot(x, y2, label = "A subjective prior \n(informative)") plt.fill_between(x[25:], 0, 2, color = p[0].get_color(), alpha=.3) p = plt.plot(x[25:], 2*np.ones(25), label = "another subjective prior") plt.fill_between(x[25:], 0, 2, color = p[0].get_color(), alpha =.3) plt.ylim(0, 4) plt.ylim(0, 4) leg = plt.legend(loc = "upper left") leg.get_frame().set_alpha(.4) plt.title("Comparing objective vs. subjective priors for an unknown probability") # %% # Gamma figsize(12.5, 5)
labled_anomalies = data_frame.loc[data_frame['anomaly_label'] == 1, ['time_epoch', 'value']] ax.scatter(labled_anomalies['time_epoch'], labled_anomalies['value'], s=200, color='green') return ax # def prepare_plot(data_frame): # # fig, ax = plt.subplots() # # ax.scatter(data_frame['timestamp'], data_frame['value'], s=8, color='blue') # scatter 산포그래프 # # # # labled_anomalies = data_frame.loc[data_frame['anomaly_label'] == 1, ['timestamp', 'value']] # # ax.scatter(labled_anomalies['timestamp'], labled_anomalies['value'], s=200, color='green') # # # # return ax figsize(16, 7) prepare_plot(training_data_frame) plt.show() # The visualization of the training and test datasets look like this: # visualization figsize(16, 7) prepare_plot(test_data_frame) plt.show() # Preparing a dataset training_data_frame['value_no_anomaly'] = training_data_frame[training_data_frame['anomaly_label'] == 0]['value']
[3, 3 ]]) plt.title('Dendrogram of Traditional Hierarchical Clustering') z = linkage(data_array, method='complete') d = dendrogram(z) plt.axhline(y=2.,color='k',ls='dashed') plt.axhline(y=1.25,color='b',ls='dashed') plt.axhline(y=0.8,color='r',ls='dashed') plt.show() # In[3]: figsize(6, 6) import numpy as np import matplotlib.pyplot as plt n = range(0,11) fig, ax = plt.subplots() plt.title('2-Dimensional Visualization of the Data') ax.scatter(data_array[:,0], data_array[:,1]) for i, txt in enumerate(n): ax.annotate(txt, (data_array[:,0][i],data_array[:,1][i])) # ### Bayesian Hierarchical Clustering (BHC) #
import pandas as pd from IPython.core import pylabtools import matplotlib from actions_naming import marketbeat_mapping # STOCKS_FILE_NAME = 'data/marketbeat_nasdaq_latest.csv' STOCKS_FILE_NAME = 'data/marketbeat_nasdaq_2013_only.csv' OUTPUT_NUMERIC_RECOS_FILE_NAME = 'data/marketbeat_numeric_recos_2013.csv' # global drawing options pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier matplotlib.rcParams['mathtext.fontset'] = 'cm' # deals with missing fonts in matplotlib pylabtools.figsize(15, 5) def construct_data_matrix(data, output_file_name): # Add a column with the date string converted to datetime data['DateTime'] = pd.to_datetime(data['Date']) # filter the latest recommendation from every analyst and every ticker latest = data.iloc[data.groupby(['Ticker', 'Firm']).DateTime.idxmax()] # Sanity check - see that 4 actions turned into only the latest print data[(data['Ticker'] == 'MSFT') & (data['Firm'] == 'Goldman Sachs')] print '\n\n' print latest[(latest['Ticker'] == 'MSFT') & (latest['Firm'] == 'Goldman Sachs')] # Pivot the table, each row is a firm, each column a ticker, and the value of each cell is the rating recos = latest.pivot('Firm','Ticker','Rating').dropna(how = 'all') # ALL rows = NaN # print a sample # just show firms that have recommendations on all
from matplotlib import pyplot as plt from IPython.core.pylabtools import figsize from pylab import savefig import pymc as pm import numpy as np import scipy.stats as stats from scipy.stats.mstats import mquantiles figsize(12.5, 3) np.set_printoptions(precision=3, suppress=True) challenger_data = np.genfromtxt("data/challenger_data.csv", skip_header=1, usecols=[1, 2], missing_values="NA", delimiter=",") temperature = challenger_data[:, 0] D = challenger_data[:, 1] # defect, or not? beta = pm.Normal("beta", 0, 0.001, value=0) alpha = pm.Normal("alpha", 0, 0.001, value=0) @pm.deterministic def p(t=temperature, alpha=alpha, beta=beta): return 1.0 / (1. + np.exp(beta * t + alpha)) def logistic(x, beta, alpha=0): return 1.0 / (1.0 + np.exp(np.dot(beta, x) + alpha)) print(p.value)
# connect the probabilities in `p` with our observations through a # Bernoulli random variable. with model: observed = pm.Bernoulli("bernoulli_obs", p, observed=D) # Mysterious code to be explained in Chapter 3 start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(120000, step=step, start=start) burned_trace = trace[100000::2] alpha_samples = burned_trace["alpha"][:, None] # best to make them 1d beta_samples = burned_trace["beta"][:, None] # plt.figure(1) figsize(12.5, 6) #histogram of the samples: plt.subplot(211) plt.title(r"Posterior distributions of the variables $\alpha, \beta$") plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85, label=r"posterior of $\beta$", color="#7A68A6", density=True) plt.legend() plt.subplot(212) plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85, label=r"posterior of $\alpha$", color="#A60628", density=True) plt.legend() # t = np.linspace(temperature.min() - 5, temperature.max()+5, 50)[:, None]
n_data_points = 5 # in CH1 we had ~70 data points @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_data_points) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after tau is lambda2 return out # Here, what does prior distribution look like? %matplotlib inline from IPython.core.pylabtools import figsize from matplotlib import pyplot as plt figsize(12.5, 4) samples = [lambda_1.random() for i in range(20000)] plt.hist(samples, bins=70, normed=True, histtype="stepfilled") plt.title("Prior distribution for $\lambda_1$") plt.xlim(0, 8); # Take the case of the sms data in the previous chapter, knowing what we do # about parent and child variables and taking an omniscient view on the data # and determining a modeling procedure we can work backwards to create the # data mimicing the expected creation of the data. i.e. tau = pm.rdiscrete_uniform(0, 80) print( tau )
indicators = pd.DataFrame(index=R.index) #prepare indicators DataFrame indicators['spread'] = NTpair['spread'] indicators['spread_ratio'] = NTpair['spread_ratio'] indicators['z_score'] = R_nday / R_nday.rolling( window=25).std() # current day return z-score indicators['cumRet'] = 100 * R.cumsum( ) # total sum of returns, which is a sythetic price indicators['ma'] = indicators['cumRet'].rolling( window=200).mean() #moving average of synthetic price indicators['momentum'] = indicators['cumRet'] - indicators[ 'ma'] # difference between synth. #price and ma, gives indication of momentum strength #print (indicators.tail(5)) figsize(8, 3) indicators[['cumRet', 'ma']].tail(750).plot(grid=True, title='Nikkei-TOPIX spread') plt.savefig(os.path.abspath('pics\\NT_spread') + '.png', format='png') plt.figure() indicators[['z_score', 'momentum']].tail(750).plot(grid=True) plt.savefig(os.path.abspath('pics\\NT_zscore') + '.png', format='png') # strategy parameters win, ma_win, z_enter, z_enter2, z_exit = 25, 200, 2.5, 3.75, 1.9 NTdf = backtest(NTpair['spread_pct_N'], NTpair['spread_pct'], window=win, ma_thresh=0.5, ma_window=ma_win, z_enter=z_enter,
update the styles in only this notebook. Try running the following code: import json s = json.load(open("../styles/bmh_matplotlibrc.json")) matplotlib.rcParams.update(s) """ # The code below can be passed over, as it is currently not important, plus it # uses advanced topics we have not covered yet. LOOK AT PICTURE, MICHAEL! get_ipython().magic(u'matplotlib inline') from IPython.core.pylabtools import figsize import numpy as np from matplotlib import pyplot as plt figsize(11, 9) import scipy.stats as stats dist = stats.beta n_trials = [0, 1, 2, 3, 4, 5, 8, 15, 50, 500] data = stats.bernoulli.rvs(0.5, size=n_trials[-1]) x = np.linspace(0, 1, 100) # For the already prepared, I'm using Binomial's conj. prior. for k, N in enumerate(n_trials): sx = plt.subplot(len(n_trials) / 2, 2, k + 1) plt.xlabel("$p$, probability of heads") if k in [0, len(n_trials) - 1] else None plt.setp(sx.get_yticklabels(), visible=False) heads = data[:N].sum()
#import seaborn as sns from sklearn.metrics import recall_score, precision_score from sklearn.metrics.scorer import make_scorer from matplotlib.ticker import MultipleLocator, FormatStrFormatter from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier from sklearn.tree import DecisionTreeClassifier from mpl_toolkits.mplot3d import axes3d from matplotlib import cm df =pd.read_csv('/home/peng/new160half.csv', header=0) print df df1=df[df['Target']==1] df0=df[df['Target']==0] figsize(10,8) ax = plt.subplot(111, projection='3d') ax.scatter(df1['length'], df1['stress'], df1['age'], c='r', s=30, label='Failed') ax.scatter(df0['length'], df0['stress'], df0['age'], c ='b', marker='^',s=30, label = 'Unfailed') plt.xlabel('Length of roadway(m)', fontsize=16) plt.ylabel('Stress factor',fontsize=16) ax.set_zlabel('Age(years)',fontsize=16, rotation = 90) plt.legend(fontsize = 16) #----------------------------- ax.plot(df1['length'], df1['stress'], df1['age']) #----------------------------- ax.plot(df0['length'], df0['stress'], df0['age']) plt.show()
def main(): # unknown ground truth lambda1 = tf.constant(1.) lambda2 = tf.constant(3.) # sample an fake observation data ~ poisson(data[0]; lambda1) * poisson(data[1]; lambda2); data = tf.stack([ tfp.distributions.Poisson(rate=lambda1).sample(sample_shape=(1), seed=4), tfp.distributions.Poisson(rate=lambda2).sample(sample_shape=(1), seed=8) ]) # (posteriori L) likelihood(lambda1,lambda2 | data) = p(data | lambda1, lambda2) = poisson(data[0] ; lambda1) * poisson(data[1] ; lambda2) # x represents lambda1, y represents lambda2 x = y = tf.linspace(.01, 5., 100) prob_x = tfp.distributions.Poisson(rate=x).prob(data[0, ...]) prob_y = tfp.distributions.Poisson(rate=y).prob(data[1, ...]) L = tf.expand_dims(prob_x, 1) * tf.expand_dims(prob_y, 0) # (prior M) p(lambda1, lambda2) = P(lambda1) * P(lambda2), where lambda1 ~ U(0,5), lambda2 ~ U(0,5) uniform_x = tfp.distributions.Uniform(low=0., high=5.).prob(x) m = median(tf.gather_nd(uniform_x, tf.where(tf.greater(uniform_x, 0)))) uniform_x = tf.where(tf.equal(uniform_x, 0), uniform_x, m) uniform_y = tfp.distributions.Uniform(low=0., high=5.).prob(y) m = median(tf.gather_nd(uniform_y, tf.where(tf.greater(uniform_y, 0)))) uniform_y = tf.where(tf.equal(uniform_y, 0), uniform_y, m) M = tf.expand_dims(uniform_x, 1) * tf.expand_dims(uniform_y, 0) plt.figure(figsize(12.5, 15.0)) # 1) plot P(lambda1, lambda2) = P(lambda1) * P(lambda2) # lambda ~ Uniform(0, 5) plt.subplot(221) im = plt.imshow(M.numpy(), interpolation='none', origin='lower', cmap=plt.cm.jet, vmax=1, vmin=-.15, extent=(0, 5, 0, 5)) plt.scatter(lambda2.numpy(), lambda1.numpy(), c='k', s=50, edgecolor='none') plt.xlim(0, 5) plt.ylim(0, 5) plt.title(r'Landscape formed by Uniform priors on $p_1, p2$') # 2) plot P(lambda1, lambda2, data) = p(lambda1, lambda2) * p(data | lambda1, lambda2) plt.subplot(223) plt.contour(x.numpy(), y.numpy(), (M * L).numpy()) im = plt.imshow(M * L, interpolation='none', origin='lower', cmap=plt.cm.jet, extent=(0, 5, 0, 5)) plt.title( 'Landscape warped by %d data observation;\n Uniform priors on $p_1, p_2$.' % 1) plt.scatter(lambda2.numpy(), lambda1.numpy(), c='k', s=50, edgecolor='none') plt.xlim(0, 5) plt.ylim(0, 5) # 3) plot P(lambda1, lambda2) = P(lambda1) * P(lambda2) # lambda1 ~ Exponential(0.3) # lambda2 ~ Exponential(0.1) plt.subplot(222) expx = tfp.distributions.Exponential(rate=.3).prob(x) expx = tf.where(tf.math.is_nan(expx), tf.ones_like(expx) * expx[1], expx) expy = tfp.distributions.Exponential(rate=.10).prob(y) expy = tf.where(tf.math.is_nan(expy), tf.ones_like(expy) * expy[1], expy) M = tf.expand_dims(expx, 1) * tf.expand_dims(expy, 0) plt.contour(x, y, M) im = plt.imshow(M, interpolation='none', origin='lower', cmap=plt.cm.jet, extent=(0, 5, 0, 5)) plt.scatter(lambda2.numpy(), lambda1.numpy(), c='k', s=50, edgecolor='none') plt.xlim(0, 5) plt.ylim(0, 5) plt.title('Landscape formed by Exponential priors on $p_1, p_2$.') # 4) plot P(lambda1, lambda2, data) = P(lambda1, lambda2) * p(data | lambda1, lambda2) plt.subplot(224) plt.contour(x, y, M * L) im = plt.imshow(M * L, interpolation='none', origin='lower', cmap=plt.cm.jet, extent=(0, 5, 0, 5)) plt.title( 'Landscape warped by %d data objservation; \n Exponential priors on $p_1, p_2$.' % 1) plt.scatter(lambda2.numpy(), lambda1.numpy(), c='k', s=50, edgecolor='none') plt.xlim(0, 5) plt.ylim(0, 5) plt.show()
from __future__ import print_function, absolute_import, division import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from IPython.core.pylabtools import figsize figsize(12, 4) import os import sys os.environ['THEANO_FLAGS'] = "device=cpu,optimizer=fast_run" DATA_DIR = os.path.join('/res', 'data') sys.path.append(os.path.join('/res', 'src')) import scipy.io as sio import numpy as np import theano from theano import tensor as T import h5py # for loading data from utils import (plot_images, Progbar, plot_confusion_matrix, plot_weights, glorot_uniform, validate_deep_network, train_networks, report_performance, plot_weights4D) f = h5py.File(os.path.join(DATA_DIR, 'mnist.h5'), 'r') X_train = f['X_train'].value y_train = f['y_train'].value X_valid = f['X_valid'].value y_valid = f['y_valid'].value
print('Classification Report: ') print(classification_report(y_test,y_pred)) y_pred_prob = classifier.predict_proba(X_test)[:,1] fpr, tpr, thesholds = roc_curve(y_test, y_pred_prob) plt.plot([0,1],[0,1], 'k--') plt.plot(fpr, tpr, linewidth=5,color=clr,label=lbl) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title(roctitle) if showflag == True: leg = plt.legend(loc='lower right') plt.show() return True figsize(12,9) ########################################## Baseball Fun ############################################### MIN_AT_BATS = 0 START_YEAR = 1954 END_YEAR = 2018 FSHZ = 17 START_DATE = datetime.strptime(str(START_YEAR)+'-01-01','%Y-%m-%d') END_DATE = datetime.strptime(str(END_YEAR)+'-12-31','%Y-%m-%d') LEGEND_PROPERTIES = {'weight':'bold'} path = 'C:\\Users\\User\\Documents\\PAUL\\Springboard\\core\\' battingf = path + 'dfbatting_player_allstats.csv' dfbatting_player_stats = pd.read_csv(battingf,parse_dates=['debut','finalGame','birthdate'])
flow_edges = compute_flow_edge(name) # flow edge ######## Diffusion ########## #from diffusion import diffuse_inprob print 'Diffusion...' inprobs = compute_inprob(name, segs) diffused_prob = diffuse_inprob(inprobs, paths, segs,imgs) inprob_image = prob_to_image(inprobs, paths, segs) diffused_image = prob_to_image(diffused_prob, paths,segs ) for i in range(diffused_image.shape[2]): figure(figsize(12,9)) subplot(1,2,1) imshow(inprob_image[:,:,i]) subplot(1,2,2) imshow(diffused_image[:,:,i]) show() locprior = compute_locprior(name, segs, diffused_prob) loc_unary = -np.log(locprior+1e-7) ###### Random forest ######## print 'Random Forest...' # see my thesis, p.14 # prepare training data based on diffused prob. mean_rgbs = []
from datetime import datetime import os.path import matplotlib.pyplot as plt import matplotlib.ticker as mtick import matplotlib as mpl import pylab as plb import matplotlib.mlab as mlab import math from numpy.random import seed import random from IPython.core.pylabtools import figsize import warnings warnings.filterwarnings("ignore") import seaborn as sns figsize(14, 10) #sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white'}) sns.set_style('white') # standard global constants MIN_AT_BATS = 0 START_YEAR = 1970 END_YEAR = 2018 FSHZ = 17 START_DATE = datetime.strptime(str(START_YEAR) + '-01-01', '%Y-%m-%d') END_DATE = datetime.strptime(str(END_YEAR) + '-12-31', '%Y-%m-%d') LEGEND_PROPERTIES = {'weight': 'bold'} # set path for reading Lahman baseball statistics and read data from rttm dataset path = 'C:\\Users\\User\\Documents\\PAUL\\Springboard\\core\\' battingf = path + 'cpOPSpredictionsRidge_GS.csv'
@author: Usamahk """ # This file examines the challenger data to determine and model where failure # of an O ring will occur due to temperature. # Libraries we will need from IPython.core.pylabtools import figsize from matplotlib import pyplot as plt import pymc as pm import numpy as np # Loading the data and setting figsize as a standard figsize(12.5, 3.5) np.set_printoptions(precision = 3, suppress = True) challenger_data = np.genfromtxt("data/challenger_data.csv", skip_header = 1, usecols = [1,2], missing_values = "NA", delimiter = ",") # Print to see data print("Temp (F), O-Ring Failure") print(challenger_data) # Now that we've read in the data, we need to drop all the NA values, in this # case only one at the end. Should do this before in practice but getting a # hang of things here. challenger_data = challenger_data[~np.isnan(challenger_data[:,1])]
import os import pandas as pd import numpy as np import time from ta import momentum import pymongo import matplotlib.pyplot as plt from IPython.core.pylabtools import figsize import yfinance as yf import datetime figsize(20, 7) client = pymongo.MongoClient('mongodb+srv://admin:[email protected]/test?retryWrites=true&w=majority')#(os.environ['MONGO_URI']) db = client.stocks class Day_parser: '''Create a day parser''' def __init__(self, date): self.date = date def isweekday(self): weekday = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'] if self.date in weekday: return True else: return False def round_amt(x, base=5): '''Rounded amount of stocks''' #Need to update not exceed 20K return base * round(x/base) def create_dataframe(stock, c_day):
import os from keras.utils import get_file import gensim import patoolib import subprocess import numpy as np import matplotlib.pyplot as plt from IPython.core.pylabtools import figsize figsize(10, 10) from sklearn.manifold import TSNE import json from collections import Counter from itertools import chain MODEL = 'GoogleNews-vectors-negative300.bin' path = get_file( MODEL + '.gz', 'https://deeplearning4jblob.blob.core.windows.net/resources/wordvectors/%s.gz' % MODEL) if not os.path.isdir('generated'): os.mkdir('generated') unzipped = os.path.join('generated', MODEL) is_model_generated = os.path.isdir( 'generated\GoogleNews-vectors-negative300.bin') if is_model_generated == False: print("Unpacking model") patoolib.extract_archive(path, outdir=unzipped)
def Main(): mcmc = pm.MCMC([true_price, prize_1, prize_2, price_estimate, error]) mcmc.sample(50000, 10000) price_trace = mcmc.trace("true_price")[:] # Plotting figsize(12.5, 4) x = np.linspace(5000, 40000) plt.plot(x, stats.norm.pdf(x, 35000, 7500), c="k", lw=2, label="prior dist. of suite price") _hist = plt.hist(price_trace, bins=35, density=True, histtype="stepfilled") plt.title("Posterior of the true price estimate") plt.vlines(mu_prior, 0, 1.1 * np.max(_hist[0]), label="prior's mean", linestyles="--") plt.vlines(price_trace.mean(), 0, 1.1 * np.max(_hist[0]), label="posterior's mean", linestyles="-.") plt.legend(loc="upper left") plt.show() guesses = np.linspace(5000, 50000, 70) risks = np.linspace(30000, 150000, 6) expected_loss = lambda guess, risk: showdown_loss(guess, price_trace, risk ).mean() figsize(12.5, 7) for _p in risks: results = [expected_loss(_g, _p) for _g in guesses] plt.plot(guesses, results, label="%d" % _p) # Plotting plt.title("Expected loss of different guesses, \nvarious risk-levels of \ overestimating") plt.legend(loc="upper left", title="Risk parameter") plt.xlabel("price bid") plt.ylabel("expected loss") plt.xlim(5000, 30000) ax = plt.subplot(111) for _p in risks: _color = next(ax._get_lines.prop_cycler) _min_results = sop.fmin(expected_loss, 15000, args=(_p, ), disp=False) _results = [expected_loss(_g, _p) for _g in guesses] plt.plot(guesses, _results, color=_color['color']) plt.scatter(_min_results, 0, s = 60, \ color= _color['color'], label = "%d"%_p) plt.vlines(_min_results, 0, 120000, color=_color['color'], linestyles="--") plt.title("Expected loss & Bayes actions of different guesses, \n \ various risk-levels of overestimating") plt.legend(loc="upper left", scatterpoints=1, title="Bayes action at risk:") plt.xlabel("price guess") plt.ylabel("expected loss") plt.xlim(7000, 30000) plt.ylim(-1000, 80000)
y_mean.append(np.mean(y[col_index[-2]:col_index[-1]])) y_std.append(np.std(y[col_index[-2]:col_index[-1]])) x_label_legend = "age" + "(" + str(age1) + '-' + str(age2) + ")" return x_label, y_mean, y_std, x_label_legend # s = den_divide(x_y_den, 4, 3) # 距离执行以下两行 x_label1, y_mean1, y_std1, x_label_legend1 = part_mean(x_y_den, 6, 1, 20, 60) x_label2, y_mean2, y_std2, x_label_legend2 = part_mean(x_y_den, 7, 0, 30, 60) # 年龄执行以下两行 # x_label1, y_mean1, y_std1 = part_mean(x_y_den, 11, 1) # x_label2, y_mean2, y_std2 = part_mean(x_y_den, 5, 0) # t.scatter(res[0], res[1]) # t.show() figsize(30, 28) male = G[G['gender'] == 1] female = G[G['gender'] == 0] plt.scatter(male['dis'], male['speed'], color='r', alpha=0.05) plt.scatter(female['dis'], female['speed'], color='b', alpha=0.1) plt.errorbar(x_label1, y_mean1, yerr=y_std1, color='r', alpha=1, label='male') plt.errorbar(x_label2, y_mean2, yerr=y_std2, color='b', alpha=1, label='female') plt.xlabel('distant(m)', font2) plt.ylabel('speed(m/s)', font2) # plt.title(x_label_legend1) # ax.yaxis.set_major_locator(MultipleLocator(0.4))
# -*- coding: UTF-8 -*- import matplotlib.pyplot as plt import pandas as pd import numpy as np import os import re from sklearn.preprocessing import minmax_scale from IPython.core.pylabtools import figsize from matplotlib.ticker import MultipleLocator if __name__ == '__main__': figsize(5, 3) font_legend = {'family': 'Arial', 'weight': 'normal', 'size': 11, } font_label = {'family': 'Arial', 'weight': 'normal', 'size': 18, } data = pd.read_excel('./data.xlsx') fig, left_axis = plt.subplots() right_axis = left_axis.twinx() lns1=left_axis.plot(np.array(data['interval']), list(data['A_coverage']), color='red', linewidth=1, linestyle='solid', markersize=8, marker='^', label='A Coverage') lns2 = left_axis.plot(np.array(data['interval']), list(data['B_coverage']), color='purple', linewidth=1, linestyle='solid', markersize=8,
# Thanks for Cameron for his book: Probabilistic Programming Bayesian Method for Hacker # This is not an easy book for me. # ==================== basic # prior probability: a belief of a probability about some event. # evidence: another event relate to the event # posterior probability: after seeing the evidence, we update our belief about the event # using formula: P(A|X) = P(X|A)P(A) / P(X) # in IPython environment from IPython.core.pylabtools import figsize import numpy as np import matplotlib.pyplot as plt figsize(12.5, 4) plt.rcParams["savefig.dpi"] = 300 plt.rcParams["figure.dpi"] = 120 colors = ['#348ABD', '#A60628'] prior = [1/21, 20/21] posterior = [0.087, 1-0.087] plt.bar([0, .7], prior, alpha=0.70, width=0.25, color=colors[0], label='prior distribution', lw='3', edgecolor="#348ABD") plt.bar([0 + 0.25, .7 + 0.25], posterior, alpha=0.70, width=0.25, color=colors[1], label='posterior distribution', lw='3', edgecolor="#A60628") plt.xticks([0.02, 0.95], ['librarian', 'farmer']) plt.title("Prior and Posterior probabilities of Steve's occupation") plt.ylabel("Probability") plt.legend(loc='upper left') plt.show()
flow_edges = compute_flow_edge(name) # flow edge ######## Diffusion ########## #from diffusion import diffuse_inprob print 'Diffusion...' inprobs = compute_inprob(name, segs) diffused_prob = diffuse_inprob(inprobs, paths, segs, imgs) inprob_image = prob_to_image(inprobs, paths, segs) diffused_image = prob_to_image(diffused_prob, paths, segs) for i in range(diffused_image.shape[2]): figure(figsize(12, 9)) subplot(1, 2, 1) imshow(inprob_image[:, :, i]) subplot(1, 2, 2) imshow(diffused_image[:, :, i]) show() locprior = compute_locprior(name, segs, diffused_prob) loc_unary = -np.log(locprior + 1e-7) ###### Random forest ######## print 'Random Forest...' # see my thesis, p.14 # prepare training data based on diffused prob. mean_rgbs = []
import pandas as pd import os from ipywidgets import widgets, interact, fixed from IPython.display import display get_ipython().magic('matplotlib inline') import seaborn as sbn import matplotlib.pyplot as plt import numpy as np from IPython.core.pylabtools import figsize import scipy import scipy.interpolate from contextlib import redirect_stdout figsize(12, 10) sbn.set_context("paper", font_scale=1) sbn.set_style("whitegrid") from collections import namedtuple # ### Testing for the maintenance monte carlo simulation # In[2]: def harmonicConstituentModel(time, *hm): assert len(hm) % 3 == 0 velocity = 0 for i in range(len(hm) // 3): velocity += hm[3 * i] * np.cos(
%matplotlib inline import numpy as np from IPython.core.pylabtools import figsize import matplotlib.pyplot as plt figsize(12.5, 5) sample_size = 100000 expected_value = lambda_ = 4.5 poi = np.random.poisson N_samples = range(1, sample_size, 100) for k in range(3): samples = poi(lambda_, sample_size) partial_average = [samples[:i].mean() for i in N_samples] plt.plot(N_samples, partial_average, lw=1.5, label="average \ pf $n$ samples; seq. %d"%k) plt.plot(N_samples, expected_value*np.ones_like(partial_average), ls = "--", label="true expected value", c ="k") plt.ylim(4.35, 4.65) plt.title("Convergence of the average of \n random variables to its \ expected_value") plt.ylabel("average of $n$ samples") plt.xlabel( "# of samples, $n$") plt.legend() figsize(12.5, 4)
from __future__ import absolute_import from __future__ import division from __future__ import print_function import os os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import numpy as np import pandas as pd import warnings from matplotlib import pyplot as plt from IPython.core.pylabtools import figsize figsize(11, 9) import collections import tensorflow as tf import tensorflow_probability as tfp from tensorflow.python import debug as tf_debug tfd = tfp.distributions import tensorflow.contrib.eager as tfe from tensorflow.python.eager.context import eager_mode, graph_mode import pandas as pd import numpy as np # Handy snippet to reset the global graph and global session.
@author: Usamahk """ # This file examines the challenger data to determine and model where failure # of an O ring will occur due to temperature. # Libraries we will need from IPython.core.pylabtools import figsize from matplotlib import pyplot as plt import pymc as pm import numpy as np # Loading the data and setting figsize as a standard figsize(12.5, 3.5) np.set_printoptions(precision=3, suppress=True) challenger_data = np.genfromtxt("data/challenger_data.csv", skip_header=1, usecols=[1, 2], missing_values="NA", delimiter=",") # Print to see data print("Temp (F), O-Ring Failure") print(challenger_data) # Now that we've read in the data, we need to drop all the NA values, in this # case only one at the end. Should do this before in practice but getting a # hang of things here.
StEr_L=data[:,6] SyEr_H=data[:,7] SyEr_L=data[:,8] if DataFound2==True: Edata=np.loadtxt(ElasticData,float,usecols=(0,1,2,3,4,5,6,7,8),skiprows=11) EPoint=Edata[:,0] EPlab=Edata[:,1] #GeV/c EPlab_min=Edata[:,2] EPlab_max=Edata[:,3] ESig=Edata[:,4] EStEr_H=Edata[:,5] EStEr_L=Edata[:,6] ESyEr_H=Edata[:,7] ESyEr_L=Edata[:,8] pyt.figsize(12,7) def Ecm(Plab): """Converts Plab momenta to center of mass energy [GeV].""" E=(((Plab**2+.938**2)**(1/2.)+.938)**2-(Plab**2))**(1/2.) return E if DataFound1==True and DataFound2==True: #Automatically converts all P_lab momenta to corresponding center-of-mass energy [GeV] E_cm=Ecm(Plab) eE_cm=Ecm(EPlab) cm_min=Ecm(Plab_min) cm_max=Ecm(Plab_max) ecm_min=Ecm(EPlab_min) ecm_max=Ecm(EPlab_max) #Define best fit curve given by the particle data group
# isolating the examples so I can follow better # These scripts are taken from his code chunks. Ths example deals with # text message data over the course of a 2 months or so. The hypothesis # is that at some point the users behaviour changed. Can we infer this? # Import all libraries from IPython.core.pylabtools import figsize import numpy as np from matplotlib import pyplot as plt import pymc as pm # Set figsize figsize(12.5, 3.5) # Load data count_data = np.loadtxt("txtdata.csv") n_count_data = len(count_data) # Making a plot of daily messaging plt.bar(np.arange(n_count_data), count_data, color="#348ABD") plt.xlabel("Time (days)") plt.ylabel("count of text-msgs received") plt.title("Did the user's texting habits change over time?") plt.xlim(0, n_count_data); # Defining all the variables, alpha, lambda_1 and lambda_2
# The researchers actually observe 35 "Yes" responses. X = 35 observations = pm.Binomial("obs", N, observed_proportion, observed=True, value=X) # Let's add all variables of interest to a Model container, and run our # black-box algo over the model model = pm.Model([p, true_answers, first_coin_flips, second_coin_flips, observed_proportion, observations]) # Monte Carlo sim mcmc = pm.MCMC(model) mcmc.sample(40000, 15000) figsize(12.5, 3) p_trace = mcmc.trace("freq_cheating")[:] plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85, bins=30, label="posterior distribution", color="#348ABD") plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3) plt.xlim(0, 1) plt.legend() savefig("cheats.png", bbox_inches="tight") # What do we now know? Our posterior distribution shows that it is implausible # that there are no cheaters: we started with a uniform prior (treating all # values of p as equally possible), then the posterior assigned a low # probability to p = 0 # we have thus gathered information, preserving some degree of privacy, and are
def main(): figsize(12.5, 4) data = np.loadtxt("data/mixture_data.csv", delimiter=",") plt.hist(data, bins=20, color="k", histtype="stepfilled", alpha=0.8) plt.title("Histogram of the dataset") plt.ylim([0, None]) print data[:10], "..." plt.show() p = pm.Uniform("p", 0, 1) assignment = pm.Categorical("assignment", [p, 1 - p], size=data.shape[0]) print "prior assignment, with p = %.2f:" % p.value print assignment.value[:10], "..." taus = 1.0 / pm.Uniform("stds", 0, 100, size=2) ** 2 centers = pm.Normal("centers", [120, 190], [0.01, 0.01], size=2) """ The below deterministic functions map an assignment, in this case 0 or 1, to a set of parameters, located in the (1,2) arrays `taus` and `centers`. """ @pm.deterministic def center_i(assignment=assignment, centers=centers): return centers[assignment] @pm.deterministic def tau_i(assignment=assignment, taus=taus): return taus[assignment] print "Random assignments: ", assignment.value[:4], "..." print "Assigned center: ", center_i.value[:4], "..." print "Assigned precision: ", tau_i.value[:4], "..." observations = pm.Normal("obs", center_i, tau_i, value=data, observed=True) model = pm.Model([p, assignment, observations, taus, centers]) mcmc = pm.MCMC(model) mcmc.sample(50000) plt.subplot(311) lw = 1 center_trace = mcmc.trace("centers")[:] if center_trace[-1, 0] > center_trace[-1, 1]: colors = ["#348ABD", "#A60628"] else: colors = ["#A60628", "#348ABD"] plt.plot(center_trace[:, 0], label="trace of center 0", c=colors[0], lw=lw) plt.plot(center_trace[:, 1], label="trace of center 1", c=colors[1], lw=lw) plt.title("Traces of unknown parameters") leg = plt.legend(loc="upper right") leg.get_frame().set_alpha(0.7) plt.subplot(312) std_trace = mcmc.trace("stds")[:] plt.plot(std_trace[:, 0], label="trace of standard deviation of cluster 0", c=colors[0], lw=lw) plt.plot(std_trace[:, 1], label="trace of standard deviation of cluster 1", c=colors[1], lw=lw) plt.legend(loc="upper left") plt.subplot(313) p_trace = mcmc.trace("p")[:] plt.plot(p_trace, label="$p$: frequency of assignment to cluster 0", color="#467821", lw=lw) plt.xlabel("Steps") plt.ylim(0, 1) plt.legend() plt.show() std_trace = mcmc.trace("stds")[:] _i = [1, 2, 3, 4] for i in range(2): plt.subplot(2, 2, _i[2 * i]) plt.title("Posterior of center of cluster %d" % i) plt.hist(center_trace[:, i], color=colors[i], bins=30, histtype="stepfilled") plt.subplot(2, 2, _i[2 * i + 1]) plt.title("Posterior of standard deviation of cluster %d" % i) plt.hist(std_trace[:, i], color=colors[i], bins=30, histtype="stepfilled") # plt.autoscale(tight=True) plt.tight_layout() plt.show() plt.cmap = mpl.colors.ListedColormap(colors) plt.imshow(mcmc.trace("assignment")[::400, np.argsort(data)], cmap=plt.cmap, aspect=.4, alpha=.9) plt.xticks(np.arange(0, data.shape[0], 40), ["%.2f" % s for s in np.sort(data)[::40]]) plt.ylabel("posterior sample") plt.xlabel("value of $i$th data point") plt.title("Posterior labels of data points") plt.show() cmap = mpl.colors.LinearSegmentedColormap.from_list("BMH", colors) assign_trace = mcmc.trace("assignment")[:] plt.scatter(data, 1 - assign_trace.mean(axis=0), cmap=cmap, c=assign_trace.mean(axis=0), s=50) plt.ylim(-0.05, 1.05) plt.xlim(35, 300) plt.title("Probability of data point belonging to cluster 0") plt.ylabel("probability") plt.xlabel("value of data point") plt.show() x = np.linspace(20, 300, 500) posterior_center_means = center_trace.mean(axis=0) posterior_std_means = std_trace.mean(axis=0) posterior_p_mean = mcmc.trace("p")[:].mean() plt.hist(data, bins=20, histtype="step", normed=True, color="k", lw=2, label="histogram of data") y = posterior_p_mean * stats.norm.pdf(x, loc=posterior_center_means[0], scale=posterior_std_means[0]) plt.plot(x, y, label="Cluster 0 (using posterior-mean parameters)", lw=3) plt.fill_between(x, y, color=colors[1], alpha=0.3) y = (1 - posterior_p_mean) * stats.norm.pdf(x, loc=posterior_center_means[1], scale=posterior_std_means[1]) plt.plot(x, y, label="Cluster 1 (using posterior-mean parameters)", lw=3) plt.fill_between(x, y, color=colors[0], alpha=0.3) plt.legend(loc="upper left") plt.title("Visualizing Clusters using posterior-mean parameters") plt.show()
import numpy as np import matplotlib.pyplot as plt, mpld3 get_ipython().magic(u'matplotlib inline') mpld3.enable_notebook() ##GET Data from MYSQL which will contain metric and associated value import scipy.stats as stats engine=create_engine("mysql+mysqldb://username:password@localhost:3306/schema",pool_recycle=3600) table=pd.read_sql_query("SELECT date(min_stamp) as dayte,avg(nineeighty_pctl_usrs) as daily FROM (SELECT * FROM usr.users WHERE access_technology = 'ground')a GROUP BY DATE(min_stamp)",con=engine) min(table.dayte),max(table.dayte) ## Show an initial plot of the distribution of the Count Data mpld3.enable_notebook() from matplotlib.backends.backend_pdf import PdfPages pp = PdfPages('ground.pdf') figsize(13.5,5) table=table.sort(['dayte']) count_data=table['daily'] n_count_data=len(count_data) plt.bar(np.arange(n_count_data),count_data,color="#348ABD") plt.xlabel("Time (days)") plt.ylabel("98th Percentile of Total Users in Sector, leading to Event") plt.xlim(0, n_count_data) plt.savefig(pp, format='pdf') ## Applied code taken from PYMC book found on GIT, on modeling a divergence in time series data using Bayesian. In this case it is Count Data on Users Per Day import pymc as pm alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha)
data1 = df1.as_matrix() data2 = df2.as_matrix() data3 = df3.as_matrix() data4 = df4.as_matrix() data1 = data1.T data2 = data2.T data3 = data3.T data4 = data4.T kinase = data4[:, 0] substrate = data4[:, 1] #1E90FF #1E90FF ##### graph a scatter plot to estimate model figsize(10,10) (m,b)=np.polyfit(kinase ,substrate ,1) yp = np.polyval([m,b],kinase) equation = 'y = ' + str(round(m,2)) + 'x' ' + ' + str(round(b,2)) plt.scatter(kinase, substrate, s=75, color="#1E90FF", alpha=0.5) plt.plot(kinase,yp, color = '#696969', linewidth = 2.0) plt.text(-2.5,-.5, equation, fontsize = 16) plt.ylabel(protein1Name, fontsize = 20) plt.xlabel(protein2Name + '-S94', fontsize = 20) plt.title("Phosphorylation Relationship Between " + protein1Name + " and " + protein2Name + '-S94', fontsize = 20) plt.tight_layout() plt.savefig(path + '/' + protein1Name + '_' + protein2Name + '-S94_Estimator.png') ##### assuming a linear relationship R = alpha + beta * kinase + epsilon (prec)
#-------------------------------------- ax2.set_ylabel('Rolling std', color='r') #---------------------------------------------- for tl in ax2.get_yticklabels(): #--------------------------------------------------------- tl.set_color('r') #-------------------------------------------------------------------- plt.show() #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------- stop = timeit.default_timer() #--------------------------- print "The running takes %r min" %((stop-start)/60) ff = RFclass.training() tt = RFclass.test() pp = Superplot.fancy() figsize(9.5,7) df1 = pd.read_csv('//home/peng/git/Machine_learning_for_reliability_analysis/Test_1/Results/Ensemble/statistical_csv/bag_acc_10cv_100_4000.csv', header=0) df2 = pd.read_csv('//home/peng/git/Machine_learning_for_reliability_analysis/Test_1/Results/Ensemble/statistical_csv/bag_prec_10cv_100_4000.csv', header=0) print "This is the training set of field data." print "The size of the data is " print df.describe() plt.plot(df1['tree_range'], df1['12'], label='Accuracy') plt.plot(df1['tree_range'], df2['12'], label = 'Precision') plt.legend(fontsize = 20) plt.xticks(fontsize =20) plt.yticks(fontsize =20) plt.ylabel('Classification metrics', fontsize = 24) plt.xlabel('Number of trees', fontsize = 24) plt.show()
TrCostCar = [] TrCostTaxi = [] TrCostDrvNow =[] DrvNowCostHr= DrvNowCostMin*60 for d in range(1,max(dist)+1): TrCostCar.append(TotalExpCar/365 + (FuelConsEst/100 * d)*FuelPrice) TrCostTaxi.append(5.90 + d*1.60 + InsTaxCar/365) TrCostDrvNow.append(d/AvgSpeed*DrvNowCostHr+InsTaxCar/365) return TrCostCar, TrCostTaxi,TrCostDrvNow # In[] Visualization get_ipython().magic('matplotlib notebook') from IPython.core.pylabtools import figsize figsize(10, 15) plt.figure() count = 0 # The script below iteratively estimates costs for disserent transports at different average speed and plot the results for AvgSpeed in [20, 30, 40]: FuelConsEst = EstFC(FuelCons,AvgSpeed,Scaling) [TrCostCar, TrCostTaxi,TrCostDrvNow] = CostAnalysis(dist, AvgSpeed,FuelConsEst,FuelPrice,InsTaxCar,TotalExpCar, DrvNowCostMin) count = count + 1 index = 310 + count plt.subplot(index) plt.plot(dist,TrCostCar,'b.-')
# -*- coding: utf-8 -*- """ Created on Sat Dec 5 15:15:30 2015 @author: Usamahk """ # Understanding the Bayesian Landscape - Plot a distribution of posterior # probabilities %matplotlib inline import scipy.stats as stats from IPython.core.pylabtools import figsize import numpy as np figsize(12.5, 4) import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D jet = plt.cm.jet fig = plt.figure() x = y = np.linspace(0, 5, 100) X, Y = np.meshgrid(x, y) plt.subplot(121) uni_x = stats.uniform.pdf(x, loc=0, scale=5) uni_y = stats.uniform.pdf(y, loc=0, scale=5) M = np.dot(uni_x[:, None], uni_y[None, :]) im = plt.imshow(M, interpolation='none', origin='lower', cmap=jet, vmax=1, vmin=-.15, extent=(0, 5, 0, 5))
# # Original Sensitivity Analysis notebook: Will Usher, UCL Energy Institute, 10th December 2015 <br/> # Updates to demonstrate running array jobs on a cluster: Richard West, 2016 # # In this version, most of the background and detail have been removed. Please refer to the original at https://github.com/SALib/SATut if you are not familiar with the system. # In[1]: from ipywidgets import widgets, interact from IPython.display import display get_ipython().magic('matplotlib inline') import seaborn as sbn import matplotlib.pyplot as plt import numpy as np from IPython.core.pylabtools import figsize figsize(12, 10) sbn.set_context("talk", font_scale=1) # The model used for this seminar is contained in the file model.py from model import cost_of_vehicle_to_grid, compute_profit, annualized_capital_cost, battery_lifetime, max_vehicle_power # In[2]: # Uncomment and execute the following line to see the contents of the `model.py` file # %load model.py # ## Using SALib to run a Sensitivity Analysis # # As we saw earlier, SALib is a **free** **open-source** **Python** library which you can install by running the command
__author__ = 'william007' from IPython.core.pylabtools import figsize import numpy as np from matplotlib import pyplot as plt figsize(11, 9) import scipy.stats as stats dist = stats.beta n_trials = [0, 1, 2, 3, 4, 5, 8, 15, 50, 500] data = stats.bernoulli.rvs(0.5, size=n_trials[-1]) x = np.linspace(0, 1, 100) # For the already prepared, I'm using Binomial's conj. prior. for k, N in enumerate(n_trials): sx = plt.subplot(len(n_trials) / 2, 2, k + 1) plt.xlabel("$p$, probability of heads") \ if k in [0, len(n_trials) - 1] else None plt.setp(sx.get_yticklabels(), visible=False) heads = data[:N].sum() y = dist.pdf(x, 1 + heads, 1 + N - heads) plt.plot(x, y, label="observe %d tosses,\n %d heads" % (N, heads)) plt.fill_between(x, 0, y, color="#348ABD", alpha=0.4) plt.vlines(0.5, 0, 4, color="k", linestyles="--", lw=1) leg = plt.legend() leg.get_frame().set_alpha(0.4) plt.autoscale(tight=True)
def random_picks(): figsize(14, 7) # creates an inch-by-inch image style.use('ggplot') # ggplot is a data visualization pkg stock_list = sample(watchlist, 1) # pick a random 4 stocks print(f'random stock pick from watchlist ${stock_list}') chart_dir = '/Users/MisterFili/Documents/misc_files/' #set current date & 1 year from now today = dt.datetime.now().date() end = dt.datetime(today.year, today.month, today.day) start = dt.datetime(today.year - 1, today.month, today.day) d_dash = today.strftime("%Y-%m-%d") for stock_pick in stock_list: # df = web.DataReader(f'{stock_pick}', 'yahoo', start=start, end=end) # df.to_csv(f'{stock_pick}.csv') df = pd.read_csv(f'{stock_pick}.csv', parse_dates=True, index_col=0) # If True -> try parsing the index. dates are stored @ column 0 #CHECK TO SEE IF MARKETS ARE OPEN invert_df = df.sort_index(axis=0, ascending=False) mkt_date_check = invert_df.loc[d_dash] # if mkt_date_check.empty == True: # print('dataframe empty!\n!!MARKET CLOSED!!') # print('exiting') # exit(1) # # raise RuntimeError('data is empty') # else: # print('MARKET OPEN!') # Resampling the time series data based on months # we apply it on stock close price # 'M' indicates month # monthly_resampled_data = df.close.resample('M').mean() df['200d_EMA'] = df.Close.ewm(span=200, min_periods=0, adjust=False, ignore_na=False).mean() df['50d_EMA'] = df.Close.ewm(span=50, min_periods=0, adjust=False, ignore_na=False).mean() df['20d_EMA'] = df.Close.ewm(span=20, min_periods=0, adjust=False, ignore_na=False).mean() df['26d_EMA'] = df.Close.ewm(span=26, min_periods=0, adjust=False, ignore_na=False).mean() df['12d_EMA'] = df.Close.ewm(span=12, min_periods=0, adjust=False, ignore_na=False).mean() #calculate the MCAD df['mcad'] = df['12d_EMA'] - df['26d_EMA'] df['macdsignal'] = df['mcad'].ewm(span=9, adjust=False).mean() df_ohlc = df['Adj Close'].resample('W-Fri').ohlc() # df_volume = df['Volume'].resample('W-Fri').sum() #This will give you ohlc data for the week ending on a Friday. edition = 87 df_ohlc.reset_index(inplace=True) # don't want date to be an index anymore, reset_index # dates is just a regular column. Next, we convert it df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num) ax1 = plt.subplot2grid((6, 1), (0, 0), rowspan=4, colspan=1, title=f"${stock_pick} STOCK") ax2 = plt.subplot2grid((6, 1), (5, 0), rowspan=1, colspan=1, sharex=ax1, title="MACD") candlestick_ohlc(ax1, df_ohlc.values, width=2, colorup='g', alpha=0.7) ax2.plot(df.index, df[['macdsignal']], label='Signal') ax2.plot(df.index, df[['mcad']], label='MCAD') ax1.plot(df.index, df[['20d_EMA']], label='20d_EMA') ax1.plot(df.index, df[['50d_EMA']], label='50d_EMA') ax1.plot(df.index, df[['200d_EMA']], label='200d_EMA') # ax2.fill_between(df_volume.index.map(mdates.date2num), df_volume.values, 0) #x and y ax1.xaxis_date( ) # converts the axis from the raw mdate numbers to dates. ax1.legend() ax2.legend() plt.savefig(f'{chart_dir}{stock_pick}{edition}.png', bbox_inches='tight')
# make a graph to eyeball if the variable changed fig, ax = plt.subplots(figsize=(16, 8)) plt.bar(np.arange(n_data), data, color="magenta") plt.xlabel('Time days') plt.ylabel('Number of cars arrived') plt.title('Did the number of cars arrived change over time?') # calculating posteriors lambdas lambda_1_samples, lambda_2_samples, tau_samples = calc_posteriors(data, n_data) # plot posteriors plt.rcParams.update({'font.size': 16}) figsize(16, 10) # histogram of the samples: ax = plt.subplot(211) ax.set_autoscaley_on(False) plt.hist(lambda_1_samples, bins=30, alpha=0.85, label="posterior of $\lambda_1$", color="#8ecd00", density=True) plt.legend(loc="upper left") plt.title(r"""Posterior distributions of the variables $\lambda_1$; $\lambda_2$""") plt.xlim([0, 30])
# Return the dataframe with missing information return mis_val_table_ren_columns missing_values = missing_values_table(data) # Get the columns with > 50% missing missing_df = missing_values_table(data) print(missing_df) missing_columns = list(missing_df[missing_df['% of Total Values'] > 50].index) print('We will remove %d columns.' % len(missing_columns)) # Drop the columns data = data.drop(columns=list(missing_columns)) figsize(8, 8) # Rename the score data = data.rename(columns={'ENERGY STAR Score': 'score'}) # Histogram of the Energy Star Score plt.style.use('fivethirtyeight') plt.hist(data['score'].dropna(), bins=100, edgecolor='k') plt.xlabel('Score') plt.ylabel('Number of Buildings') plt.title('Energy Star Score Distribution') plt.show() # Histogram Plot of Site EUI figsize(8, 8) plt.hist(data['Site EUI (kBtu/ft²)'].dropna(), bins=20, edgecolor='black')