def plotBestFit(weights):
    data_mat, label_mat = load_dataset()
    data_arr = np.array(data_mat)
    n = np.shape(data_arr)[0]
    xcord_1 = []
    ycord_1 = []
    xcord_2 = []
    ycord_2 = []
    for i in range(n):
        if int(label_mat[i]) == 1:
            xcord_1.append(data_arr[i, 1])
            ycord_1.append(data_arr[i, 2])
        else:
            xcord_2.append(data_arr[i, 1])
            ycord_2.append(data_arr[i, 2])

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord_1, ycord_1, s=30, c="red", marker='s')
    ax.scatter(xcord_2, ycord_2, s=30, c="green")
    x = np.arange(-3.0, 3.0, 0.1)
    y = (-weights[0] - weights[1] * x) / weights[2]
    ax.plot(x, y)
    plt.xlable("x1")
    plt.ylabel("x2")
    plt.show()
    def plot_bar_pdf(self):

        """Function to plot the pdf of the binomial distribution
        
        Args:
            None
        
        Returns:
            list: x values for the pdf plot
            list: y values for the pdf plot
            
        """
    
        x=[]
        y=[]
        
        for i in range(self.n+1):
            x.append(i)
            y.append(self.pdf(i))
            
        plt.bar(x,y)
        plt.title("result destribution")
        plt.xlable("result")
        plt.ylable("probablity od destribution")
        
        return x,y
Пример #3
0
    def plot_bar(self):
        """Function to output a histogram of the instance variable data using 
        matplotlib pyplot library.
        
        Args:
            None
            
        Returns:
            None
        """

        # TODO: Use the matplotlib package to plot a bar chart of the data
        #       The x-axis should have the value zero or one
        #       The y-axis should have the count of results for each case
        #
        #       For example, say you have a coin where heads = 1 and tails = 0.
        #       If you flipped a coin 35 times, and the coin landed on
        #       heads 20 times and tails 15 times, the bar chart would have two bars:
        #       0 on the x-axis and 15 on the y-axis
        #       1 on the x-axis and 20 on the y-axis

        #       Make sure to label the chart with a title, x-axis label and y-axis label
        x_pos = [1, 0]
        y = [self.n * self.p, self.n * (1 - self.p)]
        plt.bar(x_pos, y)
        plt.title("Binomia Distribution Bar Chart")
        plt.xlable("senario")
        plt.ylable("occurrences")
Пример #4
0
    def plot_bar_pdf(self):
        """Function to plot the pdf of the binomial distribution
        
        Args:
            None
        
        Returns:
            list: x values for the pdf plot
            list: y values for the pdf plot
            
        """

        # TODO: Use a bar chart to plot the probability density function from
        # k = 0 to k = n

        #   Hint: You'll need to use the pdf() method defined above to calculate the
        #   density function for every value of k.

        #   Be sure to label the bar chart with a title, x label and y label

        #   This method should also return the x and y values used to make the chart
        #   The x and y values should be stored in separate lists
        x_pos = range(n + 1)
        y = [self.pdf(i) for i in x_pos]
        plt.bar(x_pos, y)
        plt.title("Probability Density Function Bar Chart")
        plt.xlable("k")
        plt.ylable("density function")
Пример #5
0
 def printPlot(self):
     plt.plot(self.df['סיכום ברוטו'],self.df['מס הכנסה'])
     plt.xlable('סיכום ברוטו')
     plt.ylable('מס הכנסה')
     plt.title('יחס שכר\מס')
     plt.legend()
     print(self.df)
def plot_matches_by_team():
    y = ipl_df[['batting_team','match_code']].groupby(['batting_team']).agg('nunique')
    x = np.arange(len(y.index))
    plt.bar(x,y1['match_code'])
    plt.xlable('Team Names')
    plt.ylable('Matches Played')
    plt.xticks(x,y.index.values,rotation = 90)
    plt.show()
 def plot_image(self):
     plt.scatter(self.sublevels, self.commits)
     plt.ylable('fix commits')
     plt.xlable('kernel sublevel')
     plt.savefig("sublevel_%s.png" % self.rev)
     plt.clf()
     plt.scatter(self.release_hours, self.commits)
     plt.ylabel('fix commits')
     plt.xlable('hours')
     plt.savefig("hours_%s.png" % self.rev)
Пример #8
0
 def scan(self):
     param = self.parameters.widget.get()
     filename = param['Filename']
     F = open(filename + '.dat', 'w')
     f = filename + 'wavelength.dat'
     F2 = open(f, 'w')
     start_wavelength = param['Start'].magnitude * 1e9
     stop_wavelength = param['Stop'].magnitude * 1e9
     speed = param['Speed'].magnitude * 1e9
     n = param['Num Scan']
     self.spec = []
     with Client(self.conn1) as dlc:
         dlc.set("laser1:ctl:scan:wavelength-begin", start_wavelength)
         dlc.set("laser1:ctl:scan:wavelength-end", stop_wavelength)
         dlc.set("laser1:ctl:scan:speed", speed)
         dlc.set("laser1:ctl:scan:microsteps", True)
         dlc.set("laser1:ctl:scan:shaple", 1)  #0=Sawtooth,1=Triangle
         dlc.set("laser1:ctl:scan:trigger:output-enabled", True)
         for x in range(n - 1):
             dlc.set("laser1:ctl:wavelength-set", start_wavelength)
             dlc.set("laser1:ctl:scan:trigger:output-threshold",
                     start_wavelength + 0.1)
             while True:
                 st = dlc.get("io:digital-out2:value-act+0.1")
                 if st == False:
                     break
             dlc.set("laser1:ctl:scan:trigger:output-threshold",
                     stop_wavelength)
             time.sleep(0.5)
             act_start = self.wm.measure_wavelength()
             dlc.exec("laser1:ctl:scan:start")
             daq.start()
             if dlc.get("io:digital-out2:value-act"):
                 dlc.exec("laser1:ctl:scan:pause")
                 data = daq.read(nidaqmx.constants.READ_ALL_AVAILABLE)
                 daq.wait_until_done()
                 self.xs.append(data)
                 daq.stop()
                 act_stop = self.wm.measure_wavelength()
                 print('%d scan: act start = %f, act stop = %f' %
                       (n, act_start, act_stop))
         for i in range(n - 1):
             self.spec = self.spec + 1 / n * self.xs[i, :]
         self.wl = np.linspace(act_start, act_stop, len(self.spec))
         plt.plot(self.wl, self.spec)
         plt.xlable('wavelength/nm')
         plt.ylable('transmission')
         for item in self.spec:
             F.write("%f," % item)
         F.write("\n")
         for item in self.wl:
             F.write("%f," % item)
         return
Пример #9
0
def makeGraph(data, loan):
    xcor = []
    ycor = []
    for point in data:
        xcor.append(point[0])
        ycor.append(point[1])
    pyplot.plot(xcor, ycor)
    pyplot.title(
        str(100 * loan['intrest']) + "% Intrest With $" +
        str(loan['monthly']) + " Monthy Payments")
    pyplot.xlable("Month")
    pyplot.ylable("Principal")
    pyplot.show()
 def plot_bar(self):
     """Function to output a histogram of the instance variable data using 
     matplotlib pyplot library.
     
     Args:
         None
         
     Returns:
         None
     """
    
     plt.bar(x=['0','1'],y=[self.n*self.p , self.n*(1-self.p)])
     plt.title('Bar chart of the data')
     plt.xlable('result')
     plot.ylable('repetition')
Пример #11
0
def plotmtl(datac, iii, system, agecol, pgraph, pfolder):
    """
	Function to plot the MTL data

	INPUT:
		datac   : 
		iii     : 
		system  : 
		agecol  : 
		pgraph  : 
		pfolder : 

	OUTPUTS:
		No outputs

	USAGE:
		plotmtl(datac, iii, system, agecol, pgraph, pfolder)
		
	"""

    # open a new figure
    #plt.figure(iii+3)
    lrange = np.arange(0.5, 17.5)
    mtl = datac[iii, agecol[system]:agecol[system] + 17]
    plt.bar(lrange,
            mtl,
            widtch=1,
            bottom=None,
            label='MTL',
            color='r',
            alpha=0.5)
    mtlp = datac[iii, agecol[system] + 18:agecol[system] + 18 + 17]
    plt.bar(lrange,
            mtlp,
            widtch=1,
            bottom=None,
            label='Predicted MTL',
            color='b',
            alpha=0.5)
    plt.xlable(agecol[system])
    plt.ylabel('Frequency')
    plt.legend(loc='best', numpoints=1)
    plt.set_title('Sample number ' + str(iii + 1))

    return
    def plot_histogram(self):
        """Method to output a histogram of the instance variable data using 
        matplotlib pyplot library.
        
        Args:
            None
            
        Returns:
            None
        """

        # TODO: Plot a histogram of the data_list using the matplotlib package.
        #       Be sure to label the x and y axes and also give the chart a title

        plt.hist(self.data)
        plt.title("Histogram of data")
        plt.xlable('data')
        plt.ylabel('count')
def describe_year(year):
    filtereddf = df.filter(df['year'] == year).agg({
        'value': 'sum'
    }).withColumnRenamed('sum(value)', 'convictions')
    burough_list = [x[0] for x in filtereddf.toLocalIterator()]
    conviction_list = [x[1] for x in filtereddf.toLocalIterator()]

    plt.title('Crime for the year:' + year, frontsize=30)
    plt.xlable('Boroughs', fontsize=30)
    plt.ylable('Convictions', fontsize=30)

    plt.xtics(rotation=90, frontsize=30)
    plt.ytics(frontsize=30)
    plt.autoscale()

    plt.figure(figsize=(33, 10))
    plt.bar(burough_list, conviction_list)
    plt.xtic
    plt.show()
Пример #14
0
def visualize_the_confusion_matrix(list_z_score_data):
    list_label = [
        'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10',
        'constitution'
    ]

    plt.imshow(list_z_score_data, interpolation='nearest')
    plt.title("可视化混淆矩阵")
    plt.colorbar()

    x_locations = numpy.array(range(len(list_label)))

    plt.xticks(x_locations, list_label, rotation=90)
    plt.yticks(x_locations, list_label)

    plt.ylable("True label")
    plt.xlable("Predicted label")

    # cm = confusion_matrix(y_true, y_pred)
    numpy.set_printoptions(precision=2)
Пример #15
0
def gmeans(X,alpha=0.0001,k=1):
    needtoinc = True
    trialData = X
    fit = KMeans(n_clusters=k)
    initresult = fit.fit(trialData)
    centers = initresult.cluster_centers_
    while(needtoinc):
        needtoinc = False
        i=0
        normTestData = trialData[initresult.labels_ == i]
        normTestData = np.matrix(normTestData)
        pvalue = normalityTest(normTestData)
        if pvalue <= alpha:
            needtoinc = True
            tempresults = KMeans(2)
            tempresults = tempresults.fit(normTestData)
            newcenters = tempresults.cluster_centers_
        else:
            newcenters = centers[i, :]

        k = centers.shape[0]
        for i in range(1, k):
            normTestData = trialData[initresult.labels_ == i]
            normTestData = np.matrix(normTestData)
            pvalue = normalityTest(normTestData)
            if pvalue <= alpha:
                needtoinc = True
                tempresults = KMeans(2)
                tempresults = tempresults.fit(normTestData)
                newcenters = np.vstack((newcenters, tempresults.cluster_centers_))
            else:
                newcenters = np.vstack((newcenters, centers[i,:]))
        centers = newcenters
        initresult = KMeans(centers.shape[0],init=centers).fit(trialData)
        centers = initresult.cluster_centers_
    print 'optimal no of clusters:',centers.shape[0]
    x = trialData.as_matrix()
    plt.figure()
    plt.scatter(x[:,0],x[:,1],c=initresult.labels_)
	plt.xlable('x1')
	plt.ylable('x2')
Пример #16
0
 def PlotbyMonth(self,Indicator,level,name='null'):
     '''
     This method generate a time series plot for a collision statistics demanded
     by the user
     '''
     df = self.Table_Dict[Level](Indicator,name)
     if name == 'null'
         rowSum = df.sum(axis = 1)
         rowsum.plot(kind = 'line')
         plt.title('Time Series analysis on' + level + 'level')
         plt.ylabel(self.IndicatorPrint[Indicator])
         plt.xlable('Time')
         plt.savefig('Time Series analysis on' + level + 'level')
         plt.show()
     else:
         totalSum = df.sum(axis = 0)
         totalSum.plot(kind = 'line')
         plt.title('Time Series analysis for' + name)
         plt.ylabel(self.IndicatorPrint[Indicator])
         plt.xlable('Time')
         plt.savefig('Time Series analysis for' + name)
         plt.show()
Пример #17
0
def plot_return_risk():
    ret, vol = return_risk(stocks)
    color = np.arry([0.18, 0.96, 0.75, 0.3, 0.9, 0.5])
    plt.scatter(ret,
                vol,
                marker='o',
                c=color,
                s=500,
                camp=plt.get_cmap('Spectral'))
    plt.xlable("日收益率均值%")
    plt.ylable("标准差%")
    for lable, x, y in zip(stocks.keys(), ret, vol):
        plt.annotate(lable,
                     xy=(x, y),
                     xytext=(20, 20),
                     textcoords="offset points",
                     ha="right",
                     va="bottom",
                     bbox=dict(boxstyle='round,pad=0.5',
                               fc='yellow',
                               alpha=0.5),
                     arrowprops=dict(arrowstyle="->",
                                     connetionstyle="arc3,rad=0"))
Пример #18
0
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total)-len(dataset_test) - 60 : ].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs) 

X_test = []
for i in range(60,80):
    X_test.append(inputs[i-60:i, 0])

X_test = np.array(X_test)
X_test = np.reshape(X_test,(X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)

# inversing the scaling normalization
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

# Plotting the graphs
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlable('Time')
plt.ylabe('Google Stock Price')
plt.legend()
plt.show()






Пример #19
0
                V0,
                options_data.loc[option]['STRIKE'],
                options_data.loc[option]['TTM'],
                r,
                options_data.loc[option]['PRICE'],
                sigma_est=2,
                it=100)
        options_data['IMP_VOL'].loc[option]=imp_vol

#plot implied volatilities
plot_data=options_data[options_data['IMP_VOL']>0]

maturities = sorted(set(options_data['MATURITY']))

plt.figure(figsize=(8,6))
for maturity in maturities:
    #select data for maturity
    data=plot_data[options_data.Maturity == maturity]
    plt.plot(data['STRIKE'],data['IMP_VOL'],lable=maturity.date(),lw=1.5)
    plt.plot(data['STRIKE'],data['IMP_VOL'],'r.')
plt.grid(True)
plt.xlable('strike')
plt.ylable('implied volatility of volatitlity')
plt.legend()
plt.show()

#group data for simplicity
keep = ['PRICE','IMP_VOL']
group_data=plot_data.groupby(['MATURITY','STRIKE'])[keep]
group_data=group_data.sum()
group_data.head()
Пример #20
0
for i in range(len(colors)):
    px = features[:, 0][labels == i]
    py = features[:, 1][labels == i]
    plt.scatter(px, py, c=colors[i])
    plt.legend(labels_names)
    plt.xlabel('Sepal Length')
    plt.ylabel('Sepal Width')
    plt.show()

    # plot-2 between petal length and petal width
    for i in range(len(colors)):
        px = features[:, 1][labels == i]
        py = features[:, 2][labels == i]
        plt.scatter(px, py, c=colors[i])
        plt.legend(labels_names)
        plt.xlable('Petal Length')
        plt.ylable('Petal Width')
        plt.show()

        # Estimating two principle componets using PCA
        est = PCA(n_components=2)
        x_pca = est.fit_transform(features)

        colors = ['black', 'orange', 'pink']
        for i in range(len(colors)):
            px = x_pca[:, 0][labels == i]
            py = x_pca[:, 1][labels == i]
            plt.scatter(px, py, c=colors[i])
        plt.legend(labels_names)
        plt.xlabel('First Principle Component')
        plt.ylabel('Second Principle Component')
                                                    test_size=0.2,
                                                    random_state=0)

# Fitting Multiple Linear Regression on the dataset
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)

# Predicting the Test set results
y_pred = regressor.predict(x_test)

#visualising the test set without backward elimination method
plt.scatter(x_test[:, [20]], y_test, color='red')
plt.plot(x_test[:, [20]], regressor.predict(x_test), color='green')
plt.title('F/R vs S/R (Without Backward Elimination Method)')
plt.xlable('F/R')
plt.ylabel('S/R')
plt.show()

# Building the optimal model using Backward Elimination
import statsmodels.formula.api as sm
x = np.append(arr=np.ones((1143, 1)).astype(int), values=x, axis=1)
x_opt = x[:, [
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
    21
]]
regressor_OLS = sm.OLS(endog=y, exog=x_opt).fit()
regressor_OLS.summary()

# removin' index 14 predictor from the original x matrix because of highest p value for teamin' up x_opt with optimal predictors
x_opt = x[:, [
Пример #22
0
                                                    random_state=0)

# Feature Scaling
"""from sklearn.preprocessing import StandardScalar
sc_X = StandardScalar()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)"""

# Fitting simple linear regression
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predicting results
y_pred = regressor.predict(X_test)

# Plotting
plt.scatter(X_train, y_train, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue')
plt.title('Salary vs Experience (Training Set)')
plt.xlable('Years of Experience')
plt.ylabel('Salary')
plt.show()

# Plotting
plt.scatter(X_test, y_test, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue')
plt.title('Salary vs Experience (Test Set)')
plt.xlable('Years of Experience')
plt.ylabel('Salary')
plt.show()
Пример #23
0
              optimizer=opt,
              metrics=["accuracy"])

#train
print("Training network")
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
                        validation_data=(testX, testY),
                        steps_per_epoch=len(trainX) // BS,
                        epochs=EPOCHS,
                        verbose=1)

#save model

print("Serializing Network")
model.save(args[model])

#plotting graph

plt.style.use("ggplot")
plt.figure()
N = EPOCHS
plt.plot(np.parse(0, N), H.history["loss"], label="train_loss")
plt.plot(np.parse(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.parse(0, N), H.history["acc"], label="train_acc")
plt.plot(np.parse(0, N), H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlable("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig(args["plot"])
Пример #24
0
        imp_vol = bsm_call_imp_vol(V0,
                                   options_data.loc[option]['STRIKE'],
                                   options_data.loc[option]['TTM'],
                                   r,
                                   options_data.loc[option]['PRICE'],
                                   sigma_est=2,
                                   it=100)
        options_data['IMP_VOL'].loc[option] = imp_vol

#plot implied volatilities
plot_data = options_data[options_data['IMP_VOL'] > 0]

maturities = sorted(set(options_data['MATURITY']))

plt.figure(figsize=(8, 6))
for maturity in maturities:
    #select data for maturity
    data = plot_data[options_data.Maturity == maturity]
    plt.plot(data['STRIKE'], data['IMP_VOL'], lable=maturity.date(), lw=1.5)
    plt.plot(data['STRIKE'], data['IMP_VOL'], 'r.')
plt.grid(True)
plt.xlable('strike')
plt.ylable('implied volatility of volatitlity')
plt.legend()
plt.show()

#group data for simplicity
keep = ['PRICE', 'IMP_VOL']
group_data = plot_data.groupby(['MATURITY', 'STRIKE'])[keep]
group_data = group_data.sum()
group_data.head()
Пример #25
0
# 打印前五行数据
print(df_train.head())
print("Total number of question pairs for training: {}".format(len(df_train)))
print("Duplicate pairs: {}%".format(
    round(df_train['is_duplicate'].mean() * 100, 2)))
qids = pd.Series(df_train['qid1'].tolist() + df_train['qid2'].tolist())
print("Total number of questions in the training data: {}".format(
    len(np.unique(qids))))
print("number of questions that appear multiple times: {}".format(
    np.sum(qids.value_counts() > 1)))

plt.figure(figsize=(12, 5))
plt.hist(qids.value_counts(), bins=50)
plt.yscale("log", nonposy='clip')
plt.title("Log-Histogram of question appearance counts")
plt.xlable("Number of occurences of question")
plt.ylabel("Number of questions")

p = df_train['is_duplicate'].mean()
print(
    "predicted_score: ",
    log_loss(df_train['is_duplicate'],
             np.zeros_like(df_train['is_duplicate']) + p))

df_test = pd.read_csv("input/test.csv")
sub = pd.DataFrame({'test_id': df_test['test_id'], 'is_duplicate': p})
sub.to_csv("naive_submission.csv", index=False)
print(sub.head())

df_test = pd.read_csv("input/test.csv")
print(df_test.head())
Пример #26
0
#print(accuracy)

# predict

forecast_set = clf.predict(X_lately)

print(X_lately)
print(forecast_set, accuracy, forecast_out)

df['Forecast'] = np.nan
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day

for i in forecast_set:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
    df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i]

df['Adj. Close'].plot()
df['Forecats'].plot()
plt.legend(loc=4)
plt.xlable('Date')
plt.ylabel('Price')
plt.show()



k = range(6, 20, 2)


# In[187]:


# calling the above defined function 
test = Elbow(k)


# In[188]:


# plotting the curves 
plt.plot(k, test)
plt.xlable('K Neighbors')
plt.ylable(' Test error')
plt.title('Elbow curve error')


# In[189]:


# creating instance of KNN
clf = KNN(n_neighbors=12)

# fitting the model 
clf.fit(train_x,train_y)


# predicting over the main set and calculating F1
numbers_of_selection = [0] * d
sum_of_rewards = [0] * d
adds_selected = []
total_reward = 0
for n in range(0, N):
    ad = 0
    max_upper_bound = 0
    for i in range(0, d):
        if numbers_of_selection[i] > 0:
            average_reward = sum_of_rewards[i] / numbers_of_selection[i]
            delta_i = math.sqrt(3 / 2 * math.log(n + 1) /
                                numbers_of_selection[i])
            upper_bound = average_reward + delta_i
        else:
            upper_bound = 1e400
        if upper_bound > max_upper_bound:
            max_upper_bound = upper_bound
            ad = i
    ads_selected.append(ad)
    numbers_of_selection[ad] += 1
    reward = dataset.values[n, ad]
    sum_of_rewards[ad] += reward
    total_reward += reward

# visualising The result

plt.hist(ads_selected)
plt.title("Histogram of Ads Selesction")
plt.xlable("Ads")
plt.ylable("number of times")
Пример #29
0
train = train.set_index('PassengerId')

train.Survived.value_counts().plot(kind='bar')
plt.ylabel('frequency')
plt.title('survival(1)')

train.Pclass.value_counts().plot(kind='bar')
plt.ylabel('frequency')
plt.title('The distribution of passengers class')

survived_0 = train.Pclass[train.Survived == 0].value_counts()
survived_1 = train.Pclass[train.Survived == 1].value_counts()
df_survived = pd.DataFrame({'Survived': survived_1, 'Nonsurvived': survived_0})
df_survived.plot(kind='bar', stacked=True)
plt.title('The distribution of survivors based on passengers class')
plt.xlable('Pclass')
plt.ylabel('Frequency')

survived_m = train.Survived[train.Sex == 'male'].value_counts()
survived_f = train.Survived[train.Sex == 'female'].value_counts()
df_survived = pd.DataFrame({'Male': survived_m, 'Female': survived_f})
df_survived.plot(kind='bar', stacked=True)
plt.title('The distribution of survivors based on sex')
plt.xlabel('Sex')
plt.ylabel('Frequency')

train = train.fillna(train.mean())

train.groupby('Embarked').Survived.value_counts()
train[train.Embarked.isnull()]  #return missing values of Embark
train = train.fillna(train.Embarked.value_counts().index[0])
Пример #30
0
X_set,y_set=X_train,y_train
X1,X2=np.meshgrid(np.arange(start=X_set[:,0].min()-1, stop=X_set[:,0].max()+1, step=0.01),
                  np.arange(start=X_set[:,1].min()-1, stop=X_set[:,1].max()+1, step=0.01))
plt.contourf(X1,X2, classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,cmap=ListedColormap('red','green'))

plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())


for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set==j,0], X_set[y_set==j,1],
                c=ListedColormap(('red','green'))(i), label=j)
plt.title('Logistic Regression (Test set)')
plt.xlable('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()



# Visualizing the test set results
from matplotlib.colors import ListedColormap

X_set,y_set=X_test,y_test
X1,X2=np.meshgrid(np.arange(start=X_set[:,0].min()-1, stop=X_set[:,0].max()+1, step=0.01),
                  np.arange(start=X_set[:,1].min()-1, stop=X_set[:,1].max()+1, step=0.01))
plt.contourf(X1,X2, classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,cmap=ListedColormap('red','green'))
N=int(T/dt)

t=[i*dt for i in range(N)]

na=[None]*N

nb=[None]*N    //根据获取的数据初始化计算过程中使用的数组(列表),常数等

na[0]=input('A的初始原子数')

ta=input('A的衰变常数')

nb[0]=input('B的初始原子数')

tb=input('B的衰变常数')//通过人机交互获取模拟的初始数据

for i in range(N-1):
  
  na[i+1]=na[i]-dt*na[i]/ta
  
  nb[i+1]=nb[i]-dt*nb[i]/tb+dt*na[i]/ta//用欧勒法数值解微分方程组

pyp.plot(t,na,'k',t,nb,'r')

pyp.title('NA='+na[0]+'  Ta='+ta+'NB='+nb[0]+'  Tb='+tb)

pyp.xlable('时间/年')

pyp.ylable('粒子数/个')//绘图
Пример #32
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#Import dataset
dataset = pd.read_csv("Mall.csv")
X = dataset.iloc[:, [3, 4]].values

#Plot dendogram to find number of cluster
import scipy.cluster.hierarchy as sch
dendogram = sch.dendogram(sch.linkage(X, method='ward'))
plt.title('Dendogram')
plt.xlable('Customers')
plt.ylabel("Distance")
plt.show()

#Fitting HC to the dataset
from sklearn.cluster import AgglomerativeClustering
hc = AgglomerativeClustering(n_clusters=5,
                             affinity='euclidean',
                             linkage='ward')
y_hc = hc.fit_predict(X)

#Visualising the clusters
plt.scatter(X[y_hc == 0, 0],
            X[y_hc == 0, 1],
            s=100,
            c='red',
            label='Cluster 1')
plt.scatter(X[y_hc == 1, 0],
            X[y_hc == 1, 1],
Пример #33
0
import matplotlib.pyplot as plt
plt.plot([1, 2, 3, 4])
plt.xlable('')
plt.ylabel('some numbers')
plt.show()