示例#1
0
def DisplayData():
    """Displays the Proton-Proton Cross Section Data."""
    pyt.figsize(12,7)
    plt.loglog(E_cm,Sig,ls=' ',marker='.',markersize=3,color='black',label='PDG Data')
    plt.loglog(eE_cm,ESig,ls=' ',marker='.',markersize=3,color='black')
    plt.loglog(E_cm[90:],func(E_cm[90:],popt2[0],popt2[1],popt2[2],popt2[3],popt2[4],popt2[5],popt2[6]),color='blue')
    plt.loglog(E_cm,func(E_cm,popt[0],popt[1],popt[2],popt[3],popt[4],popt[5],popt[6]),color='blue',label='Fit')
    plt.scatter(7000,70.5,label='TOTEM EPL 101 21003',color='red')
    plt.scatter([2760,7000],[62.1,72.7],label='ALICE 2011',color='blue')
    plt.scatter([7000,8000],[72.9,74.7],label='TOTEM 2013',color='green')
    plt.errorbar([2760,7000,7000,7000,8000],[62.1,70.5,72.7,72.9,74.7],yerr=[5.9,3.4,6.2,1.5,1.7],fmt=' ',color='black')
    plt.loglog(E_cm[90:],10*SigI(E_cm[90:]))
    plt.errorbar(E_cm,Sig,xerr=[E_cm-cm_min,cm_max-E_cm],yerr=[StEr_L,StEr_H],ms=.5,mew=0,fmt=None,ecolor='black')
    plt.errorbar(eE_cm,ESig,xerr=[eE_cm-ecm_min,ecm_max-eE_cm],yerr=[EStEr_L,EStEr_H],ms=.5,mew=0,fmt=None,ecolor='black')
    plt.annotate("Total",fontsize=11,xy=(7,46),xytext=(7,46))
    plt.annotate("Elastic",fontsize=11,xy=(1000,10),xytext=(1000,10))
    plt.annotate("Inelastic",fontsize=11,xy=(35,25),xytext=(35,25))
    plt.title("pp Cross Section Data",fontsize=16)
    plt.ylabel("Cross Section [mb]",fontsize=12)
    plt.xlabel("$\sqrt{s}\,[GeV]$",fontsize=16)
    plt.ylim(1,400)
    plt.grid(which='minor',axis='y')
    plt.grid(which='major',axis='x')
    plt.legend(loc=4)
    plt.show()
def main():
    N = 100
    p = pm.Uniform("freq_cheating", 0, 1)
    true_answers = pm.Bernoulli("truths", p, size=N)
    first_coin_flips = pm.Bernoulli("first_flips", 0.5, size=N)
    second_coin_flips = pm.Bernoulli("second_flips", 0.5, size=N)

    @pm.deterministic
    def observed_proportion(t_a=true_answers,
                            fc=first_coin_flips,
                            sc=second_coin_flips):
        result = t_a & fc | ~fc & sc
        return float(sum(result)) / len(result)

    X = 35
    observations = pm.Binomial("obs", N, observed_proportion, value=X, observed=True)

    model = pm.Model([p, true_answers, first_coin_flips,
                  second_coin_flips, observed_proportion, observations])

    # To be explained in Chapter 3!
    mcmc = pm.MCMC(model)
    mcmc.sample(40000, 15000)

    figsize(12.5, 3)
    p_trace = mcmc.trace("freq_cheating")[:]
    plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85, bins=30,
             label="posterior distribution", color="#348ABD")
    plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)
    plt.xlim(0, 1)
    plt.legend()
    plt.show()
def CustomPlot():
    import json
    import matplotlib
    from IPython.core.pylabtools import figsize
    s = json.load( open("../styles/matplotlibrc.json") )
    matplotlib.rcParams.update(s)
    figsize(18, 6) 
    def roll_mean_std(self, df, xaxis, yaxis, xlabel_name, width):
        figsize(13.5,8)
        
        df = df
        
        fig, ax1 = plt.subplots()


        ax1.plot(df[xaxis][0:96], pd.rolling_mean(df[yaxis][0:96], window = width), 'b-')
        ax1.set_xlabel(xlabel_name,fontsize = 24)
        plt.xticks(fontsize = 20)
        # Make the y-axis label and tick labels match the line color.
        ax1.set_ylabel('Rolling mean', color='b', fontsize = 24)
        plt.yticks(fontsize = 20)
        for tl in ax1.get_yticklabels():
            tl.set_color('b')


        ax2 = ax1.twinx()
        ax2.plot(df[xaxis][0:96], pd.rolling_std(df[yaxis][0:96], window = width), 'r-')
        ax2.set_ylabel('Rolling standard deviation', color='r',fontsize = 24)
        plt.yticks(fontsize = 20)
        for tl in ax2.get_yticklabels():
            tl.set_color('r')
            
        #- ax1.plot(df[xaxis], pd.rolling_mean(df[yaxis], window = width), 'b-')
        #----------------------------- ax1.set_xlabel(xlabel_name,fontsize = 24)
        #--------------------------------------------- plt.xticks(fontsize = 20)
        #--------- # Make the y-axis label and tick labels match the line color.
        #-------------- ax1.set_ylabel('Rolling mean', color='b', fontsize = 24)
        #--------------------------------------------- plt.yticks(fontsize = 20)
        #-------------------------------------- for tl in ax1.get_yticklabels():
            #------------------------------------------------- tl.set_color('b')
#------------------------------------------------------------------------------ 
#------------------------------------------------------------------------------ 
        #----------------------------------------------------- ax2 = ax1.twinx()
        #-- ax2.plot(df[xaxis], pd.rolling_std(df[yaxis], window = width), 'r-')
        #- ax2.set_ylabel('Rolling standard deviation', color='r',fontsize = 24)
        #--------------------------------------------- plt.yticks(fontsize = 20)
        #-------------------------------------- for tl in ax2.get_yticklabels():
            #------------------------------------------------- tl.set_color('r')
#------------------------------------------------------- #        plt.xlim(0,96)
        
        plt.xticks(fontsize = 20)  
        ax1.yaxis.grid(False,'minor')
        ax1.xaxis.grid(False, 'minor')
        ax2.yaxis.grid(False,'major')
        ax2.xaxis.grid(False, 'major')        
        plt.show()
        pass
def plot_confusion_matrix_fancy(conf_arr, title='Confusion matrix', names=[]):
    norm_conf = []
    for i in conf_arr:
        a = 0
        tmp_arr = []
        a = sum(i, 0)
        for j in i:
            tmp_arr.append(float(j)/float(a))
        norm_conf.append(tmp_arr)

    fig = plt.figure(figsize(10, 10))
    plt.clf()
    ax = fig.add_subplot(111)
    ax.set_aspect(1)
    res = ax.imshow(np.array(norm_conf), cmap=plt.cm.Blues,
                    interpolation='nearest')

    width = len(conf_arr)
    height = len(conf_arr[0])

    for x in xrange(width):
        for y in xrange(height):
            ax.annotate(str(conf_arr[x][y]), xy=(y, x),
                        horizontalalignment='center',
                        verticalalignment='center')
    plt.title(title)
    cb = fig.colorbar(res)
    plt.xticks(range(width), names, rotation='vertical')
    plt.yticks(range(height), names)
    plt.savefig(DATASET_PATH+title+'.png', format='png', dpi=200)
def main():
    figsize(12.5, 3.5)
    np.set_printoptions(precision=3, suppress=True)
    challenger_data = np.genfromtxt("challenger_data.csv", skip_header=1,
                                    usecols=[1, 2], missing_values="NA",
                                    delimiter=",")
    # drop the NA values
    challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]

    # plot it, as a function of temperature (the first column)
    print "Temp (F), O-Ring failure?"
    print challenger_data

    plt.scatter(challenger_data[:, 0], challenger_data[:, 1], s=75, color="k",
                alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside temperature (Fahrenheit)")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.show()
示例#7
0
def main():
    figsize(12.5, 4)
    parameters = [(10, .4), (10, .9)]
    colors = ["#348ABD", "#A60628"]

    for params, cols in zip(parameters, colors):
        N, p = params
        _x = np.arange(N + 1)
        plt.bar(_x - 0.5, stats.binom.pmf(_x, N, p), color=cols,
                edgecolor=cols,
                alpha=0.6,
                label="$N$: %d, $p$: %.1f" % (N, p),
                linewidth=3)

    plt.legend(loc="upper left")
    plt.xlim(0, 10.5)
    plt.xlabel("$k$")
    plt.ylabel("$P(X = k)$")
    plt.title("Probability mass distributions of binomial random variables")
    plt.show()
示例#8
0
def main():
    figsize(12, 3)

    x = np.linspace(-4, 4, 100)

    plt.plot(x, logistic(x, 1), label=r"$\beta = 1$", ls="--", lw=1,
             color='#332288')
    plt.plot(x, logistic(x, 3), label=r"$\beta = 3$", ls="--", lw=1,
             color='#117733')
    plt.plot(x, logistic(x, -5), label=r"$\beta = -5$", ls="--", lw=1,
             color='#882255')

    plt.plot(x, logistic(x, 1, 1), label=r"$\beta = 1, \alpha = 1$",
             color="#332288")
    plt.plot(x, logistic(x, 3, -2), label=r"$\beta = 3, \alpha = -2$",
             color="#117733")
    plt.plot(x, logistic(x, -5, 7), label=r"$\beta = -5, \alpha = 7$",
             color="#882255")

    plt.legend(loc="lower left")
    plt.show()
    def plot_gridsearch(self,x, aspect = 3):

        scores=np.array(x)
        scores=scores[:, 1:].T
        #    print scores
        #scores= scores[:,5:]
        print np.shape(scores)
        
        #    print np.arange(100,2010,20)
        
        figsize(16,8)
        fig, ax = plt.subplots(1,1)
        cax = ax.imshow(scores, interpolation='none', origin='highest',
                        cmap=plt.cm.coolwarm, aspect=aspect)
        
        plt.grid(b=True, which='x', color='white',linestyle='-')
        
        plt.xlim(0,96)
        
        plt.xticks((-0.5, 96.5), (100,2000), fontsize = 20)        
#        plt.xticks(np.linspace(0,194,10), int(np.linspace(100,4000,10)), fontsize = 20)
        plt.yticks(np.arange(0,11,1), np.arange(1,12,1), fontsize = 20)
        plt.xlabel('Number of trees',fontsize = 24)
        plt.ylabel('Number of features', fontsize = 24)
        ax.yaxis.grid(False,'major')
        ax.xaxis.grid(False, 'major')
        cb = fig.colorbar(cax, orientation='horizontal', pad = 0.15, shrink=1, aspect=50)
        cb.ax.tick_params(labelsize=14)
        #----------------------------------------------------------- scores=np.array(df)
        #------------------------------------------------------ scores=scores[:, 2:13].T
        #-------------------- plt.imshow(scores, interpolation='None', origin='highest',
                      #--------------------------------- cmap=plt.cm.coolwarm, aspect=3)
        #------------------------------------------------------------------------------ 
        #-------------- cb = plt.colorbar(orientation='horizontal', shrink=1, aspect=50)
        #---------------------------------------------- #sns.axes_style(axes.grid=False)
        #-------------------------------------------------------------------- plt.show()
        plt.show()
示例#10
0
def main():
    figsize(12.5, 5)
    fig = plt.figure()
    jet = plt.cm.jet
    plt.subplot(121)

    x = y = np.linspace(0, 5, 100)
    X, Y = np.meshgrid(x, y)

    exp_x = stats.expon.pdf(x, scale=3)
    exp_y = stats.expon.pdf(x, scale=10)
    M = np.dot(exp_x[:, None], exp_y[None, :])
    CS = plt.contour(X, Y, M)
    im = plt.imshow(M, interpolation='none', origin='lower',
                    cmap=jet, extent=(0, 5, 0, 5))
    plt.xlabel("prior on $p_1$")
    plt.ylabel("prior on $p_2$")
    plt.title("$Exp(3), Exp(10)$ prior landscape")

    ax = fig.add_subplot(122, projection='3d')
    ax.plot_surface(X, Y, M, cmap=jet)
    ax.view_init(azim=390)
    plt.title("$Exp(3), Exp(10)$ prior landscape; \nalternate view")
    plt.show()
示例#11
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model as skl
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression

#plt.style.use('seaborn')
from IPython.core.pylabtools import figsize

figsize(16, 8)

np.seterr(divide='ignore', invalid='ignore')
pd.options.mode.chained_assignment = None  # default='warn'


def plot_two(data, base_col, second_col, booked_col, nbins, title_prefix):
    f, (ax1, ax2) = plt.subplots(1, 2)
    plot_dict = {ax1: base_col, ax2: second_col}

    for ax, col in plot_dict.items():
        h, bins, patches = ax.hist((data.loc[data[booked_col] == True][col],
                                    data.loc[data[booked_col] == False][col]),
                                   nbins)

        h_ratio = h[0].astype(float) / (h[1] + h[0]).astype(float)
        h_ratio[np.isnan(h_ratio)] = 0

        axa = ax.twinx()
        axa.scatter((bins[0:-1] + bins[1:]) / 2.0, h_ratio, color='r')
示例#12
0
# Calculate first and third quartile
first_quartile = data['Site EUI (kBtu/ft²)'].describe()['25%']
third_quartile = data['Site EUI (kBtu/ft²)'].describe()['75%']

# Interquartile range
iqr = third_quartile - first_quartile

# Remove outliers
data = data[(data['Site EUI (kBtu/ft²)'] > (first_quartile - 3 * iqr))
            & (data['Site EUI (kBtu/ft²)'] < (third_quartile + 3 * iqr))]

# Create a list of buildings with more than 100 measurements
types = data.dropna(subset=['score'])
types = types['Largest Property Use Type'].value_counts()
types = list(types[types.values > 100].index)
'''
# Plot of distribution of scores for building categories
# Plot each building
figsize(12, 10)
for b_type in types:
    # Select the building type
    subset = data[data['Largest Property Use Type'] == b_type]

    # Density plot of Energy Star scores
    sns.kdeplot(subset['score'].dropna(),
                label=b_type, shade=False, alpha=0.8);
    plt.gca().legend();
    
# label the plot
plt.xlabel('Energy Star Score', size=20);
plt.ylabel('Density', size=20);
示例#13
0
from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
import scipy.stats as stats
import numpy as np

figsize(12.5, 4)

a = np.arange(16)
poi = stats.poisson
lambda_ = [1.5, 4.25]
colors = ["#348ABD", "#A60628"]
plt.bar(a,
        poi.pmf(a, lambda_[0]),
        color=colors[0],
        label="$\lambda = %.1f$" % lambda_[0],
        alpha=0.60,
        edgecolor=colors[0],
        lw="3")
plt.show()
示例#14
0
# Methods for creating plots to visualise atomic arrangements 

import matplotlib.pyplot as plt
import ase
import ase.io
from ase.visualize.plot import plot_atoms
from ase import Atoms
import numpy as np
from mpl_toolkits import mplot3d
from IPython.core.pylabtools import figsize
figsize(5, 5)


def plot_cell_ase(cell, title):
    """Use ase in-built function to create 2D plot
    NOTE: Not very nice for visualising a 3D structure and appears to always set cell vectors to positive values

    Args:
        cell (ase Atoms object): Ase atoms object for structure to visualise
        title (str): Set a title for the plot

    Returns:
        Produces figure of plot, originally used in development notebook with '%matplotlib inline'
    """
    fig, ax = plt.subplots()
    plt.title(title)
    plot_atoms(cell, ax, radii=0.3, rotation=('0x,0y,0z'))
      
      
def plot_cell_custom_3D(ase_cell, title):
    """Produces 3D plot of structure for a system
示例#15
0
print(df.head(100))

#골든크로스 데드크로스
prev_key = prev_val = 0

for key, val in df['diff'][1:].iteritems():
    if val == 0:
        continue
    if val * prev_val < 0 and val > prev_val:
        print '[golden]', key, val
    if val * prev_val < 0 and val < prev_val:
        print '[dead]', key, val
    prev_key, prev_val = key, val

#골든크로 데드크로스 차트 표시
ax = df[['Adj Close', 'MA_5', 'MA_20']].plot(figsize(16,6))

prev_key = prev_val = 0

for key, val in df['diff'][1:].iteritems():
    if val == 0:
        continue

    if val * prev_val < 0 and val > prev_val:
        ax.annotate('Golden', xy=(key, df['MA_20'][key]), xytext=(10,-30), textcoords='offset points', arrowprops=dict(arrowstyle='-|>'))
    if val * prev_val < 0 and val < prev_val:
        ax.annotate('Dead', xy=(key, df['MA_20'][key]), xytext=(10,30), textcoords='offset points', arrowprops=dict(arrowstyle='-|>'))

    prev_key, prev_val = key, val

# plt.show()
示例#16
0
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from IPython.core.pylabtools import figsize

iris_data=load_iris()  # Load the iris dataset
figsize(12.5, 10)
fig = plt.figure()
fig.suptitle('Plots of Iris Dimensions', fontsize=14)
fig.subplots_adjust(wspace=0.35,hspace=0.5)
colors=('r','g','b')
cols=[colors[i] for i in iris_data.target]

def get_legend_data(clrs):
    leg_data = []
    for clr in clrs:
        line=plt.Line2D(range(1),range(1),marker='o', color=clr)
        leg_data.append(line)
    return tuple(leg_data)


def display_iris_dimensions(fig,x_idx, y_idx,sp_idx):
    ax = fig.add_subplot(3,2,sp_idx)
    ax.scatter(iris_data.data[:, x_idx], iris_data.data[:,y_idx],c=cols)
    
    ax.set_xlabel(iris_data.feature_names[x_idx])
    ax.set_ylabel(iris_data.feature_names[y_idx])
    leg_data = get_legend_data(colors)
   
    ax.legend(leg_data,iris_data.target_names, numpoints=1,
示例#17
0
# In[1]:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sct
import seaborn as sns
from statsmodels.distributions.empirical_distribution import ECDF

# In[2]:

# %matplotlib inline

from IPython.core.pylabtools import figsize

figsize(12, 8)

sns.set()

# ## Parte 1

# ### _Setup_ da parte 1

# In[3]:

np.random.seed(42)

dataframe = pd.DataFrame({
    "normal": sct.norm.rvs(20, 4, size=10000),
    "binomial": sct.binom.rvs(100, 0.2, size=10000)
})
示例#18
0
try:
    File = open(sys.argv[1],'r')

except:
    print ("File " + sys.argv[1] + " not found.")
    sys.exit(37)

PicTempNames = []

for ii in File.readlines() :
    PicTempName = ii.split()
    PicTempNames.append(PicTempName)
    
    
from IPython.core.pylabtools import figsize
figsize(15,10)
plt.rcParams['axes.labelsize']   = 25
plt.rcParams['axes.titlesize']   = 30
plt.rcParams['xtick.labelsize']  = 15
plt.rcParams['ytick.labelsize']  = 15
plt.rcParams['legend.fontsize']  = 30
plt.rcParams['lines.markersize'] = 15 



#Load list of run ids and l3 rates.
id_vs_l3 = np.loadtxt('runid_to_l3rate')
# generate dictionary that maps runID to l3 rate
id_to_l3 = {str(int(id_vs_l3[ii][0])) : int(id_vs_l3[ii][1]) for ii in range(0,len(id_vs_l3[:,1]))}

#Set SIMBAD parameters
示例#19
0
# %%
import numpy as np
import scipy.stats as stats
from IPython.core.pylabtools import figsize
import matplotlib.pyplot as plt

figsize(12.5, 3)
colors = ["#348ABD", "#A606628", "#7A68A6", "#467821"]

x = np.linspace(0,1)
y1, y2 = stats.beta.pdf(x, 1, 1), stats.beta.pdf(x, 10, 10)

p = plt.plot(x, y1, label = 'An objective prior \n(uninformative, \n "Principle of Indifference")')
plt.fill_between(x, 0, y1, color = p[0].get_color(), alpha = .3)

p = plt.plot(x, y2, label = "A subjective prior \n(informative)")
plt.fill_between(x[25:], 0, 2, color = p[0].get_color(), alpha=.3)

p = plt.plot(x[25:], 2*np.ones(25), label = "another subjective prior")
plt.fill_between(x[25:], 0, 2, color = p[0].get_color(), alpha =.3)

plt.ylim(0, 4)

plt.ylim(0, 4)
leg = plt.legend(loc = "upper left")
leg.get_frame().set_alpha(.4)
plt.title("Comparing objective vs. subjective priors for an unknown probability")

# %%
# Gamma
figsize(12.5, 5)
示例#20
0
    labled_anomalies = data_frame.loc[data_frame['anomaly_label'] == 1, ['time_epoch', 'value']]
    ax.scatter(labled_anomalies['time_epoch'], labled_anomalies['value'], s=200, color='green')

    return ax

# def prepare_plot(data_frame):
# #     fig, ax = plt.subplots()
# #     ax.scatter(data_frame['timestamp'], data_frame['value'], s=8, color='blue')  # scatter 산포그래프
# #
# #     labled_anomalies = data_frame.loc[data_frame['anomaly_label'] == 1, ['timestamp', 'value']]
# #     ax.scatter(labled_anomalies['timestamp'], labled_anomalies['value'], s=200, color='green')
# #
# #     return ax


figsize(16, 7)
prepare_plot(training_data_frame)
plt.show()



# The visualization of the training and test datasets look like this:
# visualization

figsize(16, 7)
prepare_plot(test_data_frame)
plt.show()


# Preparing a dataset
training_data_frame['value_no_anomaly'] = training_data_frame[training_data_frame['anomaly_label'] == 0]['value']
              [3,   3  ]])

plt.title('Dendrogram of Traditional Hierarchical Clustering')
z = linkage(data_array, method='complete')
d = dendrogram(z)

plt.axhline(y=2.,color='k',ls='dashed')
plt.axhline(y=1.25,color='b',ls='dashed')
plt.axhline(y=0.8,color='r',ls='dashed')

plt.show()


# In[3]:

figsize(6, 6)
import numpy as np
import matplotlib.pyplot as plt

n = range(0,11)
fig, ax = plt.subplots()
plt.title('2-Dimensional Visualization of the Data')
ax.scatter(data_array[:,0], data_array[:,1])

for i, txt in enumerate(n):
    ax.annotate(txt, (data_array[:,0][i],data_array[:,1][i]))
 


# ### Bayesian Hierarchical Clustering (BHC)
# 
示例#22
0
import pandas as pd
from IPython.core import pylabtools
import matplotlib
from actions_naming import marketbeat_mapping

# STOCKS_FILE_NAME = 'data/marketbeat_nasdaq_latest.csv'
STOCKS_FILE_NAME = 'data/marketbeat_nasdaq_2013_only.csv'
OUTPUT_NUMERIC_RECOS_FILE_NAME = 'data/marketbeat_numeric_recos_2013.csv'

# global drawing options
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
matplotlib.rcParams['mathtext.fontset'] = 'cm' # deals with missing fonts in matplotlib
pylabtools.figsize(15, 5)

def construct_data_matrix(data, output_file_name):
    # Add a column with the date string converted to datetime
    data['DateTime'] = pd.to_datetime(data['Date'])

    # filter the latest recommendation from every analyst and every ticker
    latest = data.iloc[data.groupby(['Ticker', 'Firm']).DateTime.idxmax()]

    # Sanity check - see that 4 actions turned into only the latest
    print data[(data['Ticker'] == 'MSFT') & (data['Firm'] == 'Goldman Sachs')]
    print '\n\n'
    print latest[(latest['Ticker'] == 'MSFT') & (latest['Firm'] == 'Goldman Sachs')]

    # Pivot the table, each row is a firm, each column a ticker, and the value of each cell is the rating
    recos = latest.pivot('Firm','Ticker','Rating').dropna(how = 'all') # ALL rows = NaN

    # print a sample
    # just show firms that have recommendations on all
示例#23
0
from matplotlib import pyplot as plt
from IPython.core.pylabtools import figsize
from pylab import savefig

import pymc as pm
import numpy as np
import scipy.stats as stats
from scipy.stats.mstats import mquantiles

figsize(12.5, 3)

np.set_printoptions(precision=3, suppress=True)

challenger_data = np.genfromtxt("data/challenger_data.csv", skip_header=1,
                                usecols=[1, 2], missing_values="NA",
                                delimiter=",")

temperature = challenger_data[:, 0]
D = challenger_data[:, 1]  # defect, or not?

beta = pm.Normal("beta", 0, 0.001, value=0)
alpha = pm.Normal("alpha", 0, 0.001, value=0)

@pm.deterministic
def p(t=temperature, alpha=alpha, beta=beta):
    return 1.0 / (1. + np.exp(beta * t + alpha))

def logistic(x, beta, alpha=0):
    return 1.0 / (1.0 + np.exp(np.dot(beta, x) + alpha))

print(p.value)
示例#24
0
# connect the probabilities in `p` with our observations through a
# Bernoulli random variable.
with model:
    observed = pm.Bernoulli("bernoulli_obs", p, observed=D)

    # Mysterious code to be explained in Chapter 3
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(120000, step=step, start=start)
    burned_trace = trace[100000::2]

alpha_samples = burned_trace["alpha"][:, None]  # best to make them 1d
beta_samples = burned_trace["beta"][:, None]

# plt.figure(1)
figsize(12.5, 6)

#histogram of the samples:
plt.subplot(211)
plt.title(r"Posterior distributions of the variables $\alpha, \beta$")
plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,
         label=r"posterior of $\beta$", color="#7A68A6", density=True)
plt.legend()

plt.subplot(212)
plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85,
         label=r"posterior of $\alpha$", color="#A60628", density=True)
plt.legend()


# t = np.linspace(temperature.min() - 5, temperature.max()+5, 50)[:, None]
示例#25
0
n_data_points = 5  # in CH1 we had ~70 data points


@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
    out = np.zeros(n_data_points)
    out[:tau] = lambda_1  # lambda before tau is lambda1
    out[tau:] = lambda_2  # lambda after tau is lambda2
    return out

# Here, what does prior distribution look like?
  
%matplotlib inline
from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
figsize(12.5, 4)


samples = [lambda_1.random() for i in range(20000)]
plt.hist(samples, bins=70, normed=True, histtype="stepfilled")
plt.title("Prior distribution for $\lambda_1$")
plt.xlim(0, 8);

# Take the case of the sms data in the previous chapter, knowing what we do
# about parent and child variables and taking an omniscient view on the data
# and determining a modeling procedure we can work backwards to create the 
# data mimicing the expected creation of the data. i.e.

tau = pm.rdiscrete_uniform(0, 80)
print( tau )
示例#26
0
indicators = pd.DataFrame(index=R.index)  #prepare indicators DataFrame

indicators['spread'] = NTpair['spread']
indicators['spread_ratio'] = NTpair['spread_ratio']
indicators['z_score'] = R_nday / R_nday.rolling(
    window=25).std()  # current day return z-score
indicators['cumRet'] = 100 * R.cumsum(
)  # total sum of returns, which is a sythetic price
indicators['ma'] = indicators['cumRet'].rolling(
    window=200).mean()  #moving average of synthetic price
indicators['momentum'] = indicators['cumRet'] - indicators[
    'ma']  # difference between synth.
#price and ma, gives indication of momentum strength
#print (indicators.tail(5))

figsize(8, 3)
indicators[['cumRet', 'ma']].tail(750).plot(grid=True,
                                            title='Nikkei-TOPIX spread')
plt.savefig(os.path.abspath('pics\\NT_spread') + '.png', format='png')
plt.figure()
indicators[['z_score', 'momentum']].tail(750).plot(grid=True)
plt.savefig(os.path.abspath('pics\\NT_zscore') + '.png', format='png')

# strategy parameters
win, ma_win, z_enter, z_enter2, z_exit = 25, 200, 2.5, 3.75, 1.9
NTdf = backtest(NTpair['spread_pct_N'],
                NTpair['spread_pct'],
                window=win,
                ma_thresh=0.5,
                ma_window=ma_win,
                z_enter=z_enter,
示例#27
0
       update the styles in only this notebook. Try running the following code:

        import json
        s = json.load(open("../styles/bmh_matplotlibrc.json"))
        matplotlib.rcParams.update(s)

"""

# The code below can be passed over, as it is currently not important, plus it
# uses advanced topics we have not covered yet. LOOK AT PICTURE, MICHAEL!
get_ipython().magic(u'matplotlib inline')
from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt

figsize(11, 9)

import scipy.stats as stats

dist = stats.beta
n_trials = [0, 1, 2, 3, 4, 5, 8, 15, 50, 500]
data = stats.bernoulli.rvs(0.5, size=n_trials[-1])
x = np.linspace(0, 1, 100)

# For the already prepared, I'm using Binomial's conj. prior.
for k, N in enumerate(n_trials):
    sx = plt.subplot(len(n_trials) / 2, 2, k + 1)
    plt.xlabel("$p$, probability of heads") if k in [0, len(n_trials) -
                                                     1] else None
    plt.setp(sx.get_yticklabels(), visible=False)
    heads = data[:N].sum()
#import seaborn as sns
from sklearn.metrics import recall_score, precision_score
from sklearn.metrics.scorer import make_scorer
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from mpl_toolkits.mplot3d import axes3d
from matplotlib import cm

df =pd.read_csv('/home/peng/new160half.csv', header=0)
print df
df1=df[df['Target']==1]
df0=df[df['Target']==0]


figsize(10,8)
ax = plt.subplot(111, projection='3d')
ax.scatter(df1['length'], df1['stress'], df1['age'], c='r', s=30, label='Failed')
ax.scatter(df0['length'], df0['stress'], df0['age'], c ='b', marker='^',s=30, label = 'Unfailed')
plt.xlabel('Length of roadway(m)', fontsize=16)
plt.ylabel('Stress factor',fontsize=16)

ax.set_zlabel('Age(years)',fontsize=16, rotation = 90)
plt.legend(fontsize = 16)
#----------------------------- ax.plot(df1['length'], df1['stress'], df1['age'])
#----------------------------- ax.plot(df0['length'], df0['stress'], df0['age'])

plt.show()


def main():

    # unknown ground truth
    lambda1 = tf.constant(1.)
    lambda2 = tf.constant(3.)

    # sample an fake observation data ~ poisson(data[0]; lambda1) * poisson(data[1]; lambda2);
    data = tf.stack([
        tfp.distributions.Poisson(rate=lambda1).sample(sample_shape=(1),
                                                       seed=4),
        tfp.distributions.Poisson(rate=lambda2).sample(sample_shape=(1),
                                                       seed=8)
    ])

    # (posteriori L) likelihood(lambda1,lambda2 | data)  = p(data | lambda1, lambda2) = poisson(data[0] ; lambda1) * poisson(data[1] ; lambda2)
    # x represents lambda1, y represents lambda2
    x = y = tf.linspace(.01, 5., 100)
    prob_x = tfp.distributions.Poisson(rate=x).prob(data[0, ...])
    prob_y = tfp.distributions.Poisson(rate=y).prob(data[1, ...])
    L = tf.expand_dims(prob_x, 1) * tf.expand_dims(prob_y, 0)
    # (prior M) p(lambda1, lambda2) = P(lambda1) * P(lambda2), where lambda1 ~ U(0,5), lambda2 ~ U(0,5)
    uniform_x = tfp.distributions.Uniform(low=0., high=5.).prob(x)
    m = median(tf.gather_nd(uniform_x, tf.where(tf.greater(uniform_x, 0))))
    uniform_x = tf.where(tf.equal(uniform_x, 0), uniform_x, m)
    uniform_y = tfp.distributions.Uniform(low=0., high=5.).prob(y)
    m = median(tf.gather_nd(uniform_y, tf.where(tf.greater(uniform_y, 0))))
    uniform_y = tf.where(tf.equal(uniform_y, 0), uniform_y, m)
    M = tf.expand_dims(uniform_x, 1) * tf.expand_dims(uniform_y, 0)

    plt.figure(figsize(12.5, 15.0))

    # 1) plot P(lambda1, lambda2) = P(lambda1) * P(lambda2)
    # lambda ~ Uniform(0, 5)
    plt.subplot(221)
    im = plt.imshow(M.numpy(),
                    interpolation='none',
                    origin='lower',
                    cmap=plt.cm.jet,
                    vmax=1,
                    vmin=-.15,
                    extent=(0, 5, 0, 5))
    plt.scatter(lambda2.numpy(),
                lambda1.numpy(),
                c='k',
                s=50,
                edgecolor='none')
    plt.xlim(0, 5)
    plt.ylim(0, 5)
    plt.title(r'Landscape formed by Uniform priors on $p_1, p2$')
    # 2) plot P(lambda1, lambda2, data) = p(lambda1, lambda2) * p(data | lambda1, lambda2)
    plt.subplot(223)
    plt.contour(x.numpy(), y.numpy(), (M * L).numpy())
    im = plt.imshow(M * L,
                    interpolation='none',
                    origin='lower',
                    cmap=plt.cm.jet,
                    extent=(0, 5, 0, 5))
    plt.title(
        'Landscape warped by %d data observation;\n Uniform priors on $p_1, p_2$.'
        % 1)
    plt.scatter(lambda2.numpy(),
                lambda1.numpy(),
                c='k',
                s=50,
                edgecolor='none')
    plt.xlim(0, 5)
    plt.ylim(0, 5)
    # 3) plot P(lambda1, lambda2) = P(lambda1) * P(lambda2)
    # lambda1 ~ Exponential(0.3)
    # lambda2 ~ Exponential(0.1)
    plt.subplot(222)
    expx = tfp.distributions.Exponential(rate=.3).prob(x)
    expx = tf.where(tf.math.is_nan(expx),
                    tf.ones_like(expx) * expx[1], expx)
    expy = tfp.distributions.Exponential(rate=.10).prob(y)
    expy = tf.where(tf.math.is_nan(expy),
                    tf.ones_like(expy) * expy[1], expy)
    M = tf.expand_dims(expx, 1) * tf.expand_dims(expy, 0)
    plt.contour(x, y, M)
    im = plt.imshow(M,
                    interpolation='none',
                    origin='lower',
                    cmap=plt.cm.jet,
                    extent=(0, 5, 0, 5))
    plt.scatter(lambda2.numpy(),
                lambda1.numpy(),
                c='k',
                s=50,
                edgecolor='none')
    plt.xlim(0, 5)
    plt.ylim(0, 5)
    plt.title('Landscape formed by Exponential priors on $p_1, p_2$.')
    # 4) plot P(lambda1, lambda2, data) = P(lambda1, lambda2) * p(data | lambda1, lambda2)
    plt.subplot(224)
    plt.contour(x, y, M * L)
    im = plt.imshow(M * L,
                    interpolation='none',
                    origin='lower',
                    cmap=plt.cm.jet,
                    extent=(0, 5, 0, 5))
    plt.title(
        'Landscape warped by %d data objservation; \n Exponential priors on $p_1, p_2$.'
        % 1)
    plt.scatter(lambda2.numpy(),
                lambda1.numpy(),
                c='k',
                s=50,
                edgecolor='none')
    plt.xlim(0, 5)
    plt.ylim(0, 5)

    plt.show()
from __future__ import print_function, absolute_import, division
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from IPython.core.pylabtools import figsize
figsize(12, 4)

import os
import sys
os.environ['THEANO_FLAGS'] = "device=cpu,optimizer=fast_run"
DATA_DIR = os.path.join('/res', 'data')
sys.path.append(os.path.join('/res', 'src'))

import scipy.io as sio
import numpy as np

import theano
from theano import tensor as T

import h5py # for loading data

from utils import (plot_images, Progbar, plot_confusion_matrix, plot_weights,
                   glorot_uniform, validate_deep_network, train_networks,
                   report_performance, plot_weights4D)

f = h5py.File(os.path.join(DATA_DIR, 'mnist.h5'), 'r')
X_train = f['X_train'].value
y_train = f['y_train'].value

X_valid = f['X_valid'].value
y_valid = f['y_valid'].value
    print('Classification Report: ')
    print(classification_report(y_test,y_pred))
    
    y_pred_prob = classifier.predict_proba(X_test)[:,1]
    fpr, tpr, thesholds = roc_curve(y_test, y_pred_prob)
    plt.plot([0,1],[0,1], 'k--')
    plt.plot(fpr, tpr, linewidth=5,color=clr,label=lbl)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(roctitle)
    if showflag == True:
        leg = plt.legend(loc='lower right')
        plt.show()
    return True

figsize(12,9)

########################################## Baseball Fun ###############################################

MIN_AT_BATS = 0
START_YEAR = 1954
END_YEAR = 2018
FSHZ = 17
START_DATE = datetime.strptime(str(START_YEAR)+'-01-01','%Y-%m-%d')
END_DATE = datetime.strptime(str(END_YEAR)+'-12-31','%Y-%m-%d')
LEGEND_PROPERTIES = {'weight':'bold'}
path = 'C:\\Users\\User\\Documents\\PAUL\\Springboard\\core\\'


battingf = path + 'dfbatting_player_allstats.csv'
dfbatting_player_stats = pd.read_csv(battingf,parse_dates=['debut','finalGame','birthdate'])
示例#32
0
flow_edges = compute_flow_edge(name) # flow edge

######## Diffusion ##########

#from diffusion import diffuse_inprob
print 'Diffusion...'

inprobs = compute_inprob(name, segs)

diffused_prob = diffuse_inprob(inprobs, paths, segs,imgs)

inprob_image = prob_to_image(inprobs, paths, segs) 
diffused_image = prob_to_image(diffused_prob, paths,segs ) 

for i in range(diffused_image.shape[2]):
    figure(figsize(12,9))
    subplot(1,2,1)
    imshow(inprob_image[:,:,i])
    subplot(1,2,2)
    imshow(diffused_image[:,:,i])
    show()
    
locprior = compute_locprior(name, segs, diffused_prob)
loc_unary = -np.log(locprior+1e-7)

###### Random forest ########
print 'Random Forest...'
# see my thesis, p.14
# prepare training data based on diffused prob.

mean_rgbs = []
from datetime import datetime
import os.path
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib as mpl
import pylab as plb
import matplotlib.mlab as mlab
import math
from numpy.random import seed
import random
from IPython.core.pylabtools import figsize
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns

figsize(14, 10)
#sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white'})
sns.set_style('white')

# standard global constants
MIN_AT_BATS = 0
START_YEAR = 1970
END_YEAR = 2018
FSHZ = 17
START_DATE = datetime.strptime(str(START_YEAR) + '-01-01', '%Y-%m-%d')
END_DATE = datetime.strptime(str(END_YEAR) + '-12-31', '%Y-%m-%d')
LEGEND_PROPERTIES = {'weight': 'bold'}
# set path for reading Lahman baseball statistics and read data from rttm dataset
path = 'C:\\Users\\User\\Documents\\PAUL\\Springboard\\core\\'

battingf = path + 'cpOPSpredictionsRidge_GS.csv'
@author: Usamahk
"""

# This file examines the challenger data to determine and model where failure
# of an O ring will occur due to temperature.

# Libraries we will need

from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
import pymc as pm
import numpy as np

# Loading the data and setting figsize as a standard
figsize(12.5, 3.5)
np.set_printoptions(precision = 3, suppress = True)
challenger_data = np.genfromtxt("data/challenger_data.csv", skip_header = 1,
                                usecols = [1,2], missing_values = "NA",
                                delimiter = ",")

# Print to see data
print("Temp (F), O-Ring Failure")
print(challenger_data)

# Now that we've read in the data, we need to drop all the NA values, in this
# case only one at the end. Should do this before in practice but getting a 
# hang of things here.

challenger_data = challenger_data[~np.isnan(challenger_data[:,1])]
示例#35
0
import os
import pandas as pd
import numpy as np
import time
from ta import momentum
import pymongo
import matplotlib.pyplot as plt
from IPython.core.pylabtools import figsize
import yfinance as yf
import datetime
figsize(20, 7)

client = pymongo.MongoClient('mongodb+srv://admin:[email protected]/test?retryWrites=true&w=majority')#(os.environ['MONGO_URI'])
db = client.stocks

class Day_parser:
  '''Create a day parser'''
  def __init__(self, date):
    self.date = date
  def isweekday(self):
    weekday = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    if self.date in weekday:
      return True
    else:
      return False

def round_amt(x, base=5):
  '''Rounded amount of stocks''' #Need to update not exceed 20K
  return base * round(x/base)

def create_dataframe(stock, c_day):
示例#36
0
import os
from keras.utils import get_file
import gensim
import patoolib
import subprocess
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.pylabtools import figsize
figsize(10, 10)

from sklearn.manifold import TSNE
import json
from collections import Counter
from itertools import chain

MODEL = 'GoogleNews-vectors-negative300.bin'

path = get_file(
    MODEL + '.gz',
    'https://deeplearning4jblob.blob.core.windows.net/resources/wordvectors/%s.gz'
    % MODEL)

if not os.path.isdir('generated'):
    os.mkdir('generated')
unzipped = os.path.join('generated', MODEL)
is_model_generated = os.path.isdir(
    'generated\GoogleNews-vectors-negative300.bin')
if is_model_generated == False:
    print("Unpacking model")
    patoolib.extract_archive(path, outdir=unzipped)
def Main():

    mcmc = pm.MCMC([true_price, prize_1, prize_2, price_estimate, error])
    mcmc.sample(50000, 10000)

    price_trace = mcmc.trace("true_price")[:]

    # Plotting
    figsize(12.5, 4)
    x = np.linspace(5000, 40000)
    plt.plot(x,
             stats.norm.pdf(x, 35000, 7500),
             c="k",
             lw=2,
             label="prior dist. of suite price")

    _hist = plt.hist(price_trace, bins=35, density=True, histtype="stepfilled")
    plt.title("Posterior of the true price estimate")
    plt.vlines(mu_prior,
               0,
               1.1 * np.max(_hist[0]),
               label="prior's mean",
               linestyles="--")
    plt.vlines(price_trace.mean(),
               0,
               1.1 * np.max(_hist[0]),
               label="posterior's mean",
               linestyles="-.")
    plt.legend(loc="upper left")
    plt.show()

    guesses = np.linspace(5000, 50000, 70)
    risks = np.linspace(30000, 150000, 6)
    expected_loss = lambda guess, risk: showdown_loss(guess, price_trace, risk
                                                      ).mean()

    figsize(12.5, 7)

    for _p in risks:
        results = [expected_loss(_g, _p) for _g in guesses]
        plt.plot(guesses, results, label="%d" % _p)

    # Plotting
    plt.title("Expected loss of different guesses, \nvarious risk-levels of \
    overestimating")
    plt.legend(loc="upper left", title="Risk parameter")
    plt.xlabel("price bid")
    plt.ylabel("expected loss")
    plt.xlim(5000, 30000)

    ax = plt.subplot(111)

    for _p in risks:
        _color = next(ax._get_lines.prop_cycler)
        _min_results = sop.fmin(expected_loss, 15000, args=(_p, ), disp=False)
        _results = [expected_loss(_g, _p) for _g in guesses]

        plt.plot(guesses, _results, color=_color['color'])
        plt.scatter(_min_results, 0, s = 60, \
                    color= _color['color'], label = "%d"%_p)
        plt.vlines(_min_results,
                   0,
                   120000,
                   color=_color['color'],
                   linestyles="--")

        plt.title("Expected loss & Bayes actions of different guesses, \n \
        various risk-levels of overestimating")
        plt.legend(loc="upper left",
                   scatterpoints=1,
                   title="Bayes action at risk:")
        plt.xlabel("price guess")
        plt.ylabel("expected loss")
        plt.xlim(7000, 30000)
        plt.ylim(-1000, 80000)
示例#38
0
    y_mean.append(np.mean(y[col_index[-2]:col_index[-1]]))
    y_std.append(np.std(y[col_index[-2]:col_index[-1]]))
    x_label_legend = "age" + "(" + str(age1) + '-' + str(age2) + ")"
    return x_label, y_mean, y_std, x_label_legend


# s = den_divide(x_y_den, 4, 3)
# 距离执行以下两行
x_label1, y_mean1, y_std1, x_label_legend1 = part_mean(x_y_den, 6, 1, 20, 60)
x_label2, y_mean2, y_std2, x_label_legend2 = part_mean(x_y_den, 7, 0, 30, 60)
# 年龄执行以下两行
# x_label1, y_mean1, y_std1 = part_mean(x_y_den, 11, 1)
# x_label2, y_mean2, y_std2 = part_mean(x_y_den, 5, 0)
# t.scatter(res[0], res[1])
# t.show()
figsize(30, 28)
male = G[G['gender'] == 1]
female = G[G['gender'] == 0]
plt.scatter(male['dis'], male['speed'], color='r', alpha=0.05)
plt.scatter(female['dis'], female['speed'], color='b', alpha=0.1)
plt.errorbar(x_label1, y_mean1, yerr=y_std1, color='r', alpha=1, label='male')
plt.errorbar(x_label2,
             y_mean2,
             yerr=y_std2,
             color='b',
             alpha=1,
             label='female')
plt.xlabel('distant(m)', font2)
plt.ylabel('speed(m/s)', font2)
# plt.title(x_label_legend1)
# ax.yaxis.set_major_locator(MultipleLocator(0.4))
示例#39
0
文件: plt.py 项目: Ng-95/INT-label
# -*- coding: UTF-8 -*-
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import re
from sklearn.preprocessing import minmax_scale
from IPython.core.pylabtools import figsize
from matplotlib.ticker import MultipleLocator



if __name__ == '__main__':
    figsize(5, 3)
    font_legend = {'family': 'Arial',
                   'weight': 'normal',
                   'size': 11,
                   }
    font_label = {'family': 'Arial',
                  'weight': 'normal',
                  'size': 18,
                  }
    data = pd.read_excel('./data.xlsx')

    fig, left_axis = plt.subplots()
    right_axis = left_axis.twinx()

    lns1=left_axis.plot(np.array(data['interval']), list(data['A_coverage']), color='red', linewidth=1, linestyle='solid', markersize=8,
                   marker='^', label='A Coverage')
    lns2 = left_axis.plot(np.array(data['interval']), list(data['B_coverage']), color='purple', linewidth=1,
                          linestyle='solid', markersize=8,
示例#40
0
# Thanks for Cameron for his book: Probabilistic Programming Bayesian Method for Hacker
# This is not an easy book for me.

# ==================== basic 
# prior probability: a belief of a probability about some event.
# evidence: another event relate to the event
# posterior probability: after seeing the evidence, we update our belief about the event
# using formula: P(A|X) = P(X|A)P(A) / P(X)

# in IPython environment
from IPython.core.pylabtools import figsize
import numpy as np
import matplotlib.pyplot as plt
figsize(12.5, 4)
plt.rcParams["savefig.dpi"] = 300
plt.rcParams["figure.dpi"] = 120
colors = ['#348ABD', '#A60628']
prior = [1/21, 20/21]
posterior = [0.087, 1-0.087]
plt.bar([0, .7], prior, alpha=0.70, width=0.25,
        color=colors[0], label='prior distribution', 
        lw='3', edgecolor="#348ABD")
plt.bar([0 + 0.25, .7 + 0.25], posterior, alpha=0.70, width=0.25, 
        color=colors[1], label='posterior distribution', 
        lw='3', edgecolor="#A60628")
plt.xticks([0.02, 0.95], ['librarian', 'farmer'])
plt.title("Prior and Posterior probabilities of Steve's occupation")
plt.ylabel("Probability")
plt.legend(loc='upper left')
plt.show()
示例#41
0
flow_edges = compute_flow_edge(name)  # flow edge

######## Diffusion ##########

#from diffusion import diffuse_inprob
print 'Diffusion...'

inprobs = compute_inprob(name, segs)

diffused_prob = diffuse_inprob(inprobs, paths, segs, imgs)

inprob_image = prob_to_image(inprobs, paths, segs)
diffused_image = prob_to_image(diffused_prob, paths, segs)

for i in range(diffused_image.shape[2]):
    figure(figsize(12, 9))
    subplot(1, 2, 1)
    imshow(inprob_image[:, :, i])
    subplot(1, 2, 2)
    imshow(diffused_image[:, :, i])
    show()

locprior = compute_locprior(name, segs, diffused_prob)
loc_unary = -np.log(locprior + 1e-7)

###### Random forest ########
print 'Random Forest...'
# see my thesis, p.14
# prepare training data based on diffused prob.

mean_rgbs = []
import pandas as pd
import os
from ipywidgets import widgets, interact, fixed
from IPython.display import display

get_ipython().magic('matplotlib inline')
import seaborn as sbn
import matplotlib.pyplot as plt
import numpy as np
from IPython.core.pylabtools import figsize
import scipy
import scipy.interpolate
from contextlib import redirect_stdout

figsize(12, 10)
sbn.set_context("paper", font_scale=1)
sbn.set_style("whitegrid")

from collections import namedtuple

# ### Testing for the maintenance monte carlo simulation

# In[2]:


def harmonicConstituentModel(time, *hm):
    assert len(hm) % 3 == 0
    velocity = 0
    for i in range(len(hm) // 3):
        velocity += hm[3 * i] * np.cos(
%matplotlib inline
import numpy as np
from IPython.core.pylabtools import figsize
import matplotlib.pyplot as plt

figsize(12.5, 5)

sample_size = 100000
expected_value = lambda_ = 4.5
poi = np.random.poisson
N_samples = range(1, sample_size, 100)

for k in range(3):
    samples = poi(lambda_, sample_size)

    partial_average = [samples[:i].mean() for i in N_samples]

    plt.plot(N_samples, partial_average, lw=1.5, label="average \ pf $n$ samples; seq. %d"%k)

plt.plot(N_samples, expected_value*np.ones_like(partial_average),
    ls = "--", label="true expected value", c ="k")

plt.ylim(4.35, 4.65)
plt.title("Convergence of the average of \n random variables to its \
expected_value")
plt.ylabel("average of $n$ samples")
plt.xlabel( "# of samples, $n$")
plt.legend()

figsize(12.5, 4)
示例#44
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import numpy as np
import pandas as pd
import warnings

from matplotlib import pyplot as plt

from IPython.core.pylabtools import figsize
figsize(11, 9)

import collections

import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.python import debug as tf_debug

tfd = tfp.distributions

import tensorflow.contrib.eager as tfe
from tensorflow.python.eager.context import eager_mode, graph_mode
import pandas as pd
import numpy as np

# Handy snippet to reset the global graph and global session.
示例#45
0
@author: Usamahk
"""

# This file examines the challenger data to determine and model where failure
# of an O ring will occur due to temperature.

# Libraries we will need

from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt
import pymc as pm
import numpy as np

# Loading the data and setting figsize as a standard
figsize(12.5, 3.5)
np.set_printoptions(precision=3, suppress=True)
challenger_data = np.genfromtxt("data/challenger_data.csv",
                                skip_header=1,
                                usecols=[1, 2],
                                missing_values="NA",
                                delimiter=",")

# Print to see data
print("Temp (F), O-Ring Failure")
print(challenger_data)

# Now that we've read in the data, we need to drop all the NA values, in this
# case only one at the end. Should do this before in practice but getting a
# hang of things here.
示例#46
0
    StEr_L=data[:,6]
    SyEr_H=data[:,7]
    SyEr_L=data[:,8]
if DataFound2==True:
    Edata=np.loadtxt(ElasticData,float,usecols=(0,1,2,3,4,5,6,7,8),skiprows=11)
    EPoint=Edata[:,0]
    EPlab=Edata[:,1] #GeV/c
    EPlab_min=Edata[:,2]
    EPlab_max=Edata[:,3]
    ESig=Edata[:,4]
    EStEr_H=Edata[:,5]
    EStEr_L=Edata[:,6]
    ESyEr_H=Edata[:,7]
    ESyEr_L=Edata[:,8]

pyt.figsize(12,7)

def Ecm(Plab):
    """Converts Plab momenta to center of mass energy [GeV]."""
    E=(((Plab**2+.938**2)**(1/2.)+.938)**2-(Plab**2))**(1/2.)
    return E
if DataFound1==True and DataFound2==True:
    #Automatically converts all P_lab momenta to corresponding center-of-mass energy [GeV]
    E_cm=Ecm(Plab)
    eE_cm=Ecm(EPlab)
    cm_min=Ecm(Plab_min)
    cm_max=Ecm(Plab_max)
    ecm_min=Ecm(EPlab_min)
    ecm_max=Ecm(EPlab_max)

#Define best fit curve given by the particle data group
示例#47
0
# isolating the examples so I can follow better

# These scripts are taken from his code chunks. Ths example deals with
# text message data over the course of a 2 months or so. The hypothesis
# is that at some point the users behaviour changed. Can we infer this?

# Import all libraries

from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
import pymc as pm

# Set figsize 

figsize(12.5, 3.5)

# Load data

count_data = np.loadtxt("txtdata.csv")
n_count_data = len(count_data)

# Making a plot of daily messaging

plt.bar(np.arange(n_count_data), count_data, color="#348ABD")
plt.xlabel("Time (days)")
plt.ylabel("count of text-msgs received")
plt.title("Did the user's texting habits change over time?")
plt.xlim(0, n_count_data);

# Defining all the variables, alpha, lambda_1 and lambda_2
示例#48
0
# The researchers actually observe 35 "Yes" responses.
X = 35
observations = pm.Binomial("obs", N, observed_proportion, observed=True,
                           value=X)

# Let's add all variables of interest to a Model container, and run our
# black-box algo over the model
model = pm.Model([p, true_answers, first_coin_flips,
                  second_coin_flips, observed_proportion, observations])

# Monte Carlo sim
mcmc = pm.MCMC(model)
mcmc.sample(40000, 15000)

figsize(12.5, 3)
p_trace = mcmc.trace("freq_cheating")[:]
plt.hist(p_trace, histtype="stepfilled", normed=True, alpha=0.85, bins=30,
         label="posterior distribution", color="#348ABD")
plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)
plt.xlim(0, 1)
plt.legend()

savefig("cheats.png", bbox_inches="tight")

# What do we now know? Our posterior distribution shows that it is implausible
# that there are no cheaters: we started with a uniform prior (treating all
# values of p as equally possible), then the posterior assigned a low
# probability to p = 0

# we have thus gathered information, preserving some degree of privacy, and are
def main():
    figsize(12.5, 4)
    data = np.loadtxt("data/mixture_data.csv", delimiter=",")

    plt.hist(data, bins=20, color="k", histtype="stepfilled", alpha=0.8)
    plt.title("Histogram of the dataset")
    plt.ylim([0, None])
    print data[:10], "..."
    plt.show()

    p = pm.Uniform("p", 0, 1)

    assignment = pm.Categorical("assignment", [p, 1 - p], size=data.shape[0])
    print "prior assignment, with p = %.2f:" % p.value
    print assignment.value[:10], "..."

    taus = 1.0 / pm.Uniform("stds", 0, 100, size=2) ** 2
    centers = pm.Normal("centers", [120, 190], [0.01, 0.01], size=2)

    """
    The below deterministic functions map an assignment, in this case 0 or 1,
    to a set of parameters, located in the (1,2) arrays `taus` and `centers`.
    """

    @pm.deterministic
    def center_i(assignment=assignment, centers=centers):
        return centers[assignment]

    @pm.deterministic
    def tau_i(assignment=assignment, taus=taus):
        return taus[assignment]

    print "Random assignments: ", assignment.value[:4], "..."
    print "Assigned center: ", center_i.value[:4], "..."
    print "Assigned precision: ", tau_i.value[:4], "..."

    observations = pm.Normal("obs", center_i, tau_i, value=data, observed=True)
    model = pm.Model([p, assignment, observations, taus, centers])

    mcmc = pm.MCMC(model)
    mcmc.sample(50000)

    plt.subplot(311)
    lw = 1
    center_trace = mcmc.trace("centers")[:]

    if center_trace[-1, 0] > center_trace[-1, 1]:
        colors = ["#348ABD", "#A60628"]
    else:
        colors = ["#A60628", "#348ABD"]

    plt.plot(center_trace[:, 0], label="trace of center 0", c=colors[0], lw=lw)
    plt.plot(center_trace[:, 1], label="trace of center 1", c=colors[1], lw=lw)
    plt.title("Traces of unknown parameters")
    leg = plt.legend(loc="upper right")
    leg.get_frame().set_alpha(0.7)

    plt.subplot(312)
    std_trace = mcmc.trace("stds")[:]
    plt.plot(std_trace[:, 0], label="trace of standard deviation of cluster 0",
         c=colors[0], lw=lw)
    plt.plot(std_trace[:, 1], label="trace of standard deviation of cluster 1",
         c=colors[1], lw=lw)
    plt.legend(loc="upper left")

    plt.subplot(313)
    p_trace = mcmc.trace("p")[:]
    plt.plot(p_trace, label="$p$: frequency of assignment to cluster 0",
         color="#467821", lw=lw)
    plt.xlabel("Steps")
    plt.ylim(0, 1)
    plt.legend()
    plt.show()

    std_trace = mcmc.trace("stds")[:]

    _i = [1, 2, 3, 4]
    for i in range(2):
        plt.subplot(2, 2, _i[2 * i])
        plt.title("Posterior of center of cluster %d" % i)
        plt.hist(center_trace[:, i], color=colors[i], bins=30,
                 histtype="stepfilled")

        plt.subplot(2, 2, _i[2 * i + 1])
        plt.title("Posterior of standard deviation of cluster %d" % i)
        plt.hist(std_trace[:, i], color=colors[i], bins=30,
                 histtype="stepfilled")
        # plt.autoscale(tight=True)

    plt.tight_layout()
    plt.show()

    plt.cmap = mpl.colors.ListedColormap(colors)
    plt.imshow(mcmc.trace("assignment")[::400, np.argsort(data)],
           cmap=plt.cmap, aspect=.4, alpha=.9)
    plt.xticks(np.arange(0, data.shape[0], 40),
           ["%.2f" % s for s in np.sort(data)[::40]])
    plt.ylabel("posterior sample")
    plt.xlabel("value of $i$th data point")
    plt.title("Posterior labels of data points")
    plt.show()

    cmap = mpl.colors.LinearSegmentedColormap.from_list("BMH", colors)
    assign_trace = mcmc.trace("assignment")[:]
    plt.scatter(data, 1 - assign_trace.mean(axis=0), cmap=cmap,
            c=assign_trace.mean(axis=0), s=50)
    plt.ylim(-0.05, 1.05)
    plt.xlim(35, 300)
    plt.title("Probability of data point belonging to cluster 0")
    plt.ylabel("probability")
    plt.xlabel("value of data point")
    plt.show()

    x = np.linspace(20, 300, 500)
    posterior_center_means = center_trace.mean(axis=0)
    posterior_std_means = std_trace.mean(axis=0)
    posterior_p_mean = mcmc.trace("p")[:].mean()

    plt.hist(data, bins=20, histtype="step", normed=True, color="k",
         lw=2, label="histogram of data")
    y = posterior_p_mean * stats.norm.pdf(x, loc=posterior_center_means[0],
                                    scale=posterior_std_means[0])
    plt.plot(x, y, label="Cluster 0 (using posterior-mean parameters)", lw=3)
    plt.fill_between(x, y, color=colors[1], alpha=0.3)

    y = (1 - posterior_p_mean) * stats.norm.pdf(x, loc=posterior_center_means[1],
                                          scale=posterior_std_means[1])
    plt.plot(x, y, label="Cluster 1 (using posterior-mean parameters)", lw=3)
    plt.fill_between(x, y, color=colors[0], alpha=0.3)

    plt.legend(loc="upper left")
    plt.title("Visualizing Clusters using posterior-mean parameters")
    plt.show()
import numpy as np
import matplotlib.pyplot as plt, mpld3
get_ipython().magic(u'matplotlib inline')
mpld3.enable_notebook()

##GET Data from MYSQL which will contain metric and associated value
import scipy.stats as stats
engine=create_engine("mysql+mysqldb://username:password@localhost:3306/schema",pool_recycle=3600)
table=pd.read_sql_query("SELECT date(min_stamp) as dayte,avg(nineeighty_pctl_usrs) as daily FROM (SELECT * FROM usr.users WHERE access_technology = 'ground')a GROUP BY DATE(min_stamp)",con=engine)
min(table.dayte),max(table.dayte)

## Show an initial plot of the distribution of the Count Data
mpld3.enable_notebook()
from matplotlib.backends.backend_pdf import PdfPages
pp = PdfPages('ground.pdf')
figsize(13.5,5)
table=table.sort(['dayte'])
count_data=table['daily']
n_count_data=len(count_data)
plt.bar(np.arange(n_count_data),count_data,color="#348ABD")
plt.xlabel("Time (days)")
plt.ylabel("98th Percentile of Total Users in Sector, leading to Event")
plt.xlim(0, n_count_data)
plt.savefig(pp, format='pdf')

## Applied code taken from PYMC book found on GIT, on modeling a divergence in time series data using Bayesian. In this case it is Count Data on Users Per Day
import pymc as pm
alpha = 1.0 / count_data.mean()  # Recall count_data is the
                               # variable that holds our txt counts
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
data1 = df1.as_matrix()
data2 = df2.as_matrix()
data3 = df3.as_matrix()
data4 = df4.as_matrix()
data1 = data1.T
data2 = data2.T
data3 = data3.T
data4 = data4.T

kinase = data4[:, 0]
substrate = data4[:, 1]

#1E90FF
#1E90FF
##### graph a scatter plot to estimate model
figsize(10,10)

(m,b)=np.polyfit(kinase ,substrate ,1)
yp = np.polyval([m,b],kinase)
equation = 'y = ' + str(round(m,2)) + 'x' ' + ' + str(round(b,2))

plt.scatter(kinase, substrate, s=75, color="#1E90FF", alpha=0.5)
plt.plot(kinase,yp, color = '#696969', linewidth = 2.0)
plt.text(-2.5,-.5, equation, fontsize = 16)
plt.ylabel(protein1Name, fontsize = 20)
plt.xlabel(protein2Name + '-S94', fontsize = 20)
plt.title("Phosphorylation Relationship Between " + protein1Name + " and " + protein2Name + '-S94', fontsize = 20)
plt.tight_layout()
plt.savefig(path + '/' + protein1Name + '_' + protein2Name + '-S94_Estimator.png')

##### assuming a linear relationship R = alpha + beta * kinase + epsilon (prec)
#-------------------------------------- ax2.set_ylabel('Rolling std', color='r')
#---------------------------------------------- for tl in ax2.get_yticklabels():
    #--------------------------------------------------------- tl.set_color('r')
#-------------------------------------------------------------------- plt.show()
#------------------------------------------------------------------------------ 
#------------------------------------------------------------------------------ 
#------------------------------------------------------------------------------ 
#------------------------------------------------- stop = timeit.default_timer()
#--------------------------- print "The running takes %r min" %((stop-start)/60)

    
        
ff = RFclass.training()
tt = RFclass.test()
pp = Superplot.fancy()
figsize(9.5,7)

df1 = pd.read_csv('//home/peng/git/Machine_learning_for_reliability_analysis/Test_1/Results/Ensemble/statistical_csv/bag_acc_10cv_100_4000.csv', header=0)
df2 = pd.read_csv('//home/peng/git/Machine_learning_for_reliability_analysis/Test_1/Results/Ensemble/statistical_csv/bag_prec_10cv_100_4000.csv', header=0)
print "This is the training set of field data."
print "The size of the data is " 
print df.describe()
plt.plot(df1['tree_range'], df1['12'], label='Accuracy')
plt.plot(df1['tree_range'], df2['12'], label = 'Precision')
plt.legend(fontsize = 20)
plt.xticks(fontsize =20)
plt.yticks(fontsize =20)
plt.ylabel('Classification metrics', fontsize = 24)
plt.xlabel('Number of trees', fontsize = 24)
plt.show()
    TrCostCar = []
    TrCostTaxi = []
    TrCostDrvNow =[]
    DrvNowCostHr= DrvNowCostMin*60
    for d in range(1,max(dist)+1):
        TrCostCar.append(TotalExpCar/365 + (FuelConsEst/100 * d)*FuelPrice)
        TrCostTaxi.append(5.90 + d*1.60 + InsTaxCar/365)
        TrCostDrvNow.append(d/AvgSpeed*DrvNowCostHr+InsTaxCar/365) 
    return TrCostCar, TrCostTaxi,TrCostDrvNow


# In[] Visualization

get_ipython().magic('matplotlib notebook')
from IPython.core.pylabtools import figsize
figsize(10, 15) 
plt.figure()
count = 0

# The script below iteratively estimates costs for disserent transports at different average speed and plot the results

for AvgSpeed in [20, 30, 40]:
    
    FuelConsEst = EstFC(FuelCons,AvgSpeed,Scaling)
    [TrCostCar, TrCostTaxi,TrCostDrvNow] = CostAnalysis(dist, AvgSpeed,FuelConsEst,FuelPrice,InsTaxCar,TotalExpCar, DrvNowCostMin)
    
    count = count + 1
    index = 310 + count
    plt.subplot(index)
    
    plt.plot(dist,TrCostCar,'b.-')
示例#54
0
# -*- coding: utf-8 -*-
"""
Created on Sat Dec  5 15:15:30 2015

@author: Usamahk
"""

# Understanding the Bayesian Landscape - Plot a distribution of posterior
# probabilities

%matplotlib inline
import scipy.stats as stats
from IPython.core.pylabtools import figsize
import numpy as np
figsize(12.5, 4)

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

jet = plt.cm.jet
fig = plt.figure()
x = y = np.linspace(0, 5, 100)
X, Y = np.meshgrid(x, y)

plt.subplot(121)
uni_x = stats.uniform.pdf(x, loc=0, scale=5)
uni_y = stats.uniform.pdf(y, loc=0, scale=5)
M = np.dot(uni_x[:, None], uni_y[None, :])
im = plt.imshow(M, interpolation='none', origin='lower',
                cmap=jet, vmax=1, vmin=-.15, extent=(0, 5, 0, 5))
# 
# Original Sensitivity Analysis notebook: Will Usher, UCL Energy Institute, 10th December 2015 <br/>
# Updates to demonstrate running array jobs on a cluster: Richard West, 2016
# 
# In this version, most of the background and detail have been removed. Please refer to the original at https://github.com/SALib/SATut if you are not familiar with the system.

# In[1]:

from ipywidgets import widgets, interact
from IPython.display import display
get_ipython().magic('matplotlib inline')
import seaborn as sbn
import matplotlib.pyplot as plt
import numpy as np
from IPython.core.pylabtools import figsize
figsize(12, 10)
sbn.set_context("talk", font_scale=1)

# The model used for this seminar is contained in the file model.py
from model import cost_of_vehicle_to_grid, compute_profit, annualized_capital_cost, battery_lifetime, max_vehicle_power


# In[2]:

# Uncomment and execute the following line to see the contents of the `model.py` file
# %load model.py


# ## Using SALib to run a Sensitivity Analysis
# 
# As we saw earlier, SALib is a **free** **open-source** **Python** library which you can install by running the command
示例#56
0
__author__ = 'william007'

from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
figsize(11, 9)

import scipy.stats as stats

dist = stats.beta
n_trials = [0, 1, 2, 3, 4, 5, 8, 15, 50, 500]
data = stats.bernoulli.rvs(0.5, size=n_trials[-1])
x = np.linspace(0, 1, 100)

# For the already prepared, I'm using Binomial's conj. prior.
for k, N in enumerate(n_trials):
    sx = plt.subplot(len(n_trials) / 2, 2, k + 1)
    plt.xlabel("$p$, probability of heads") \
        if k in [0, len(n_trials) - 1] else None
    plt.setp(sx.get_yticklabels(), visible=False)
    heads = data[:N].sum()
    y = dist.pdf(x, 1 + heads, 1 + N - heads)
    plt.plot(x, y, label="observe %d tosses,\n %d heads" % (N, heads))
    plt.fill_between(x, 0, y, color="#348ABD", alpha=0.4)
    plt.vlines(0.5, 0, 4, color="k", linestyles="--", lw=1)

    leg = plt.legend()
    leg.get_frame().set_alpha(0.4)
    plt.autoscale(tight=True)

示例#57
0
def random_picks():
    figsize(14, 7)  # creates an inch-by-inch image
    style.use('ggplot')  # ggplot is a data visualization pkg
    stock_list = sample(watchlist, 1)  # pick a random 4 stocks
    print(f'random stock pick from watchlist ${stock_list}')
    chart_dir = '/Users/MisterFili/Documents/misc_files/'
    #set current date & 1 year from now
    today = dt.datetime.now().date()
    end = dt.datetime(today.year, today.month, today.day)
    start = dt.datetime(today.year - 1, today.month, today.day)
    d_dash = today.strftime("%Y-%m-%d")

    for stock_pick in stock_list:
        # df = web.DataReader(f'{stock_pick}', 'yahoo', start=start, end=end)
        # df.to_csv(f'{stock_pick}.csv')
        df = pd.read_csv(f'{stock_pick}.csv', parse_dates=True, index_col=0)
        #  If True -> try parsing the index. dates are stored @ column 0

        #CHECK TO SEE IF MARKETS ARE OPEN
        invert_df = df.sort_index(axis=0, ascending=False)
        mkt_date_check = invert_df.loc[d_dash]
        # if mkt_date_check.empty == True:
        #     print('dataframe empty!\n!!MARKET CLOSED!!')
        #     print('exiting')
        #     exit(1)
        #     # raise RuntimeError('data is empty')
        # else:
        #     print('MARKET OPEN!')
        # Resampling the time series data based on months
        # we apply it on stock close price
        # 'M' indicates month
        # monthly_resampled_data = df.close.resample('M').mean()
        df['200d_EMA'] = df.Close.ewm(span=200,
                                      min_periods=0,
                                      adjust=False,
                                      ignore_na=False).mean()
        df['50d_EMA'] = df.Close.ewm(span=50,
                                     min_periods=0,
                                     adjust=False,
                                     ignore_na=False).mean()
        df['20d_EMA'] = df.Close.ewm(span=20,
                                     min_periods=0,
                                     adjust=False,
                                     ignore_na=False).mean()
        df['26d_EMA'] = df.Close.ewm(span=26,
                                     min_periods=0,
                                     adjust=False,
                                     ignore_na=False).mean()
        df['12d_EMA'] = df.Close.ewm(span=12,
                                     min_periods=0,
                                     adjust=False,
                                     ignore_na=False).mean()

        #calculate the MCAD
        df['mcad'] = df['12d_EMA'] - df['26d_EMA']
        df['macdsignal'] = df['mcad'].ewm(span=9, adjust=False).mean()

        df_ohlc = df['Adj Close'].resample('W-Fri').ohlc()
        # df_volume = df['Volume'].resample('W-Fri').sum() #This will give you ohlc data for the week ending on a Friday.

        edition = 87
        df_ohlc.reset_index(inplace=True)
        # don't want date to be an index anymore, reset_index
        # dates is just a regular column. Next, we convert it
        df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)

        ax1 = plt.subplot2grid((6, 1), (0, 0),
                               rowspan=4,
                               colspan=1,
                               title=f"${stock_pick} STOCK")
        ax2 = plt.subplot2grid((6, 1), (5, 0),
                               rowspan=1,
                               colspan=1,
                               sharex=ax1,
                               title="MACD")

        candlestick_ohlc(ax1, df_ohlc.values, width=2, colorup='g', alpha=0.7)

        ax2.plot(df.index, df[['macdsignal']], label='Signal')
        ax2.plot(df.index, df[['mcad']], label='MCAD')
        ax1.plot(df.index, df[['20d_EMA']], label='20d_EMA')
        ax1.plot(df.index, df[['50d_EMA']], label='50d_EMA')
        ax1.plot(df.index, df[['200d_EMA']], label='200d_EMA')
        # ax2.fill_between(df_volume.index.map(mdates.date2num), df_volume.values, 0) #x and y
        ax1.xaxis_date(
        )  # converts the axis from the raw mdate numbers to dates.
        ax1.legend()
        ax2.legend()

        plt.savefig(f'{chart_dir}{stock_pick}{edition}.png',
                    bbox_inches='tight')
示例#58
0
    # make a graph to eyeball if the variable changed
    fig, ax = plt.subplots(figsize=(16, 8))
    plt.bar(np.arange(n_data), data, color="magenta")
    plt.xlabel('Time days')
    plt.ylabel('Number of cars arrived')
    plt.title('Did the number of cars arrived change over time?')

    # calculating posteriors lambdas
    lambda_1_samples,
    lambda_2_samples,
    tau_samples = calc_posteriors(data, n_data)

    # plot posteriors
    plt.rcParams.update({'font.size': 16})
    figsize(16, 10)

    # histogram of the samples:
    ax = plt.subplot(211)
    ax.set_autoscaley_on(False)

    plt.hist(lambda_1_samples,
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_1$",
             color="#8ecd00",
             density=True)
    plt.legend(loc="upper left")
    plt.title(r"""Posterior distributions of the variables
        $\lambda_1$; $\lambda_2$""")
    plt.xlim([0, 30])
示例#59
0
    # Return the dataframe with missing information
    return mis_val_table_ren_columns


missing_values = missing_values_table(data)

# Get the columns with > 50% missing
missing_df = missing_values_table(data)
print(missing_df)
missing_columns = list(missing_df[missing_df['% of Total Values'] > 50].index)
print('We will remove %d columns.' % len(missing_columns))

# Drop the columns
data = data.drop(columns=list(missing_columns))

figsize(8, 8)

# Rename the score
data = data.rename(columns={'ENERGY STAR Score': 'score'})

# Histogram of the Energy Star Score
plt.style.use('fivethirtyeight')
plt.hist(data['score'].dropna(), bins=100, edgecolor='k')
plt.xlabel('Score')
plt.ylabel('Number of Buildings')
plt.title('Energy Star Score Distribution')
plt.show()

# Histogram Plot of Site EUI
figsize(8, 8)
plt.hist(data['Site EUI (kBtu/ft²)'].dropna(), bins=20, edgecolor='black')