def draw_bar(grades):
    xticks = ['A', 'B', 'C', 'D', 'E']
    gradeGroup = {}
    #对每一类成绩进行频数统计
    for grade in grades:
        gradeGroup[grade] = gradeGroup.get(grade, 0) + 1
    #创建柱状图
    #第一个参数为柱的横坐标
    #第二个参数为柱的高度
    #参数align为柱的对齐方式,以第一个参数为参考标准
    plt.bar(range(5), [gradeGroup.get(xtick, 0) for xtick in xticks], align='center')

    #设置柱的文字说明
    #第一个参数为文字说明的横坐标
    #第二个参数为文字说明的内容
    plt.xticks(range(5), xticks)

    #设置横坐标的文字说明
    plt.xlabel('Grade')
    #设置纵坐标的文字说明
    plt.ylabel('Frequency')
    #设置标题
    plt.title('Grades Of Male Students')
    #绘图
    plt.show()
示例#2
0
def predict_prices(dates, prices, x):
    dates = np.reshape(dates, (len(dates), 1))

    svr_len = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)

    svr_lin.fit(dates, prices)
    svr_poly.fit(dates, prices)
    svr_rbf.fit(dates, prices)

    plt.scatter(dates, prices, color='black', label='data')
    plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model')
    plt.plot(dates,
             svr_lin.predict(dates),
             color='green',
             label='Linear model')
    plt.plot(dates,
             svr_poly.predict(dates),
             color='blue',
             label='Polynomial model')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Sipport Vector Regression')
    plt.legend()
    plt.show()

    return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
def draw_box(heights):
    #创建箱形图
    #第一个参数为待绘制的定量数据
    #第二个参数为数据的文字说明
    plt.boxplot([heights], labels=['Heights'])
    plt.title('Heights Of Male Students')
    plt.show()
示例#4
0
def GenerateOutcomes(x, z, num_cont, num_bin):
    """
    Following the generating procedure defined by Madras in Algorithm 2
    """
    # As defined by Madras
    num_z = z.shape[1]
    w = -11
    beta_a = 6

    # Algorithm 2
    # horizontal concatenation
    xz = np.concatenate((x, z), 1)
    W = np.ones(xz.shape[1])*.5

    # lists to store generated values
    y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1 = list(), list(), list(), list()
    mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1 = list(), list(), list(), list()

    # loop over observations because all need individual beta sample
    for obs in xz:
        # sample new beta
        beta_cont = choice([0, .1, .2, .3, .4], num_cont, p=[.5, .125, .125, .125, .125])
        beta_bin = choice([0, .1, .2, .3, .4], num_bin, p=[.6, .1, .1, .1, .1])

        beta_z = choice([.4, .6], num_z, p=[.5, .5])
        # in x, continuous variables come first
        beta = np.concatenate((beta_cont, beta_bin, beta_z), 0)

        # calculate y dist
        mu1 = np.matmul(np.exp(obs + W), beta)
        mu_t0_a0.append(mu1)
        mu2 = np.matmul(obs, beta)-w
        mu_t1_a0.append(mu2)
        mu3 = np.matmul(np.exp(obs + W), beta) + beta_a
        mu_t0_a1.append(mu3)
        mu4 = np.matmul(obs, beta) - w + beta_a
        mu_t1_a1.append(mu4)
        # sample new y
        y_t0_a0.append(np.random.normal(mu1, 1, 1)[0])
        y_t1_a0.append(np.random.normal(mu2, 1, 1)[0])
        y_t0_a1.append(np.random.normal(mu3, 1, 1)[0])
        y_t1_a1.append(np.random.normal(mu4, 1, 1)[0])

    plt_entries = {'y_t0_a0': y_t0_a0, 'y_t1_a0': y_t1_a0, 'y_t0_a1': y_t0_a1, 'y_t1_a1': y_t1_a1}
    plt.figure()
    plt.title('Generated data')

    for label, entry in plt_entries.items():
        plt.hist(entry, label=label, alpha=0.5, bins=20)
    plt.legend()
    plt.show()

    y_all = np.transpose(np.vstack((y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1)))
    mu_all = np.transpose(np.vstack((mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1)))

    # column names should be consistent with above vstack
    y_column = 'y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1'
    mu_column = 'mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1'
    return y_all, mu_all, y_column, mu_column
示例#5
0
def show_train_history(train_history, train, validation):
    plt.plot(train_history, history[train])
    plt.plot(train_history, history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
def draw_hist(heights):
    #创建直方图
    #第一个参数为待绘制的定量数据,不同于定性数据,这里并没有事先进行频数统计
    #第二个参数为划分的区间个数
    plt.hist(heights, 100)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights Of Male Students')
    plt.show()
def draw_scatter(heights, weights):
    #创建散点图
    #第一个参数为点的横坐标
    #第二个参数为点的纵坐标
    plt.scatter(heights, weights)
    plt.xlabel('Heights')
    plt.ylabel('Weights')
    plt.title('Heights & Weights Of Male Students')
    plt.show()
def draw_cumulative_hist(heights):
    #创建累积曲线
    #第一个参数为待绘制的定量数据
    #第二个参数为划分的区间个数
    #normed参数为是否无量纲化
    #histtype参数为'step',绘制阶梯状的曲线
    #cumulative参数为是否累积
    plt.hist(heights, 20, normed=True, histtype='step', cumulative=True)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights Of Male Students')
    plt.show()
def draw_pie(grades):
    labels = ['A', 'B', 'C', 'D', 'E']
    gradeGroup = {}
    for grade in grades:
        gradeGroup[grade] = gradeGroup.get(grade, 0) + 1
    #创建饼形图
    #第一个参数为扇形的面积
    #labels参数为扇形的说明文字
    #autopct参数为扇形占比的显示格式
    plt.pie([gradeGroup.get(label, 0) for label in labels], labels=labels, autopct='%1.1f%%')
    plt.title('Grades Of Male Students')
    plt.show()
    weekAverages[d]=sum(weekRatings[d]*1.0/len(weekRatings[d]))
weekAverages
x = list(weekAverages,keys())
Y=[weekAverages[x] for x in X]
import matplotlib.pylot as plt
plt.plot(X,Y)
plt.bar(X,Y)
# zoom in more to see the detail
plt.ylim(3.6, 3.8)
plt.bar(X, Y)

plt.ylim(3.6,3.8)
plt.xlabel("Weekday")
plt.ylabel("Rating")
plt.xticks([0,1,2,3,4,5,6],['S','M','T','W','T','F','S'])
plt.title("Rating as a function of weekday")
plt.bar(X,Y)

#L4 Live-coding: MatPlotLib
path = "datasets/yelp_data/review.json"
f = open(path,'r',encoding = 'utf8')
import json
import time
dataset = []
for i in range(50000):
    d = json.loads(f.readline())
    # d['data']
    d['timeStruct'] = time.strptime(d['data'],'%Y-%m-%d')
    d['timeInt'] = time.mktime(d['timeStruct'])
    dataset.append(d)
dataset[0]
示例#11
0
# In[3]:

from matplotliv import pyplot as plt

# In[4]:

from matplotlib import pyplot as plt

# In[9]:

x = [1, 2, 3]
y = [1, 4, 9]
z = [10, 5, 0]
plt.plot(x, y)
plt.plot(x, z)
plt.title("test plot")
plt.xlabel("x")
plt.ylabel("y and z")
plt.legend(["this is y", "this is z"])
plt.show()

# In[10]:

sample_data = pd.read_csv('sample_data.csv')

# In[11]:

sample_data

# In[12]:
示例#12
0
for index, fft_val in enumerate(red_fft):
    if fft_val > max_val:
        max_val = fft_val
        max_index = index

heartrate = freqs[max_index] * 60        
print('Estimated Heartate: {} bpm'.format(heartrate))


# Plotting
if PLOT:
    plt.figure(figsize=(16,9))
    plt.plot(x, colors['red'], color='#fc4f30')
    plt.xlabel('Time [s]')
    plt.ylabel('Normalized Pixel Color')
    plt.title('Time-Series Red Channel Pixel Data')
    fig1 = plt.gcf()
    plt.show()
    if SAVE:
        plt.draw()
        fig1.savefig('./{}_time_series.png'.format(filename), dpi=200)
    
    # Plot the highpass data
    plt.figure(figsize=(16,9))
    plt.plot(x_filt, colors['red_filt'], color='#fc4f30')
    plt.xlabel('Time [s]')
    plt.ylabel('Normalized Pixel Color')
    plt.title('Filtered Red Channel Pixel Data')
    fig2 = plt.gcf()
    plt.show()
    if SAVE:
示例#13
0
df.as_matrix() #returns numpy array.

#Data Visualization Reference.
import numpy as np
import pandas as pd
import matplotlib.pylot as plt
%matplotlib inline #jupyter notebook only.  below line for everything else.
plt.show()
x = np.arange(0, 10)
y = x ** 2
plt.plot(x, y, 'red') #shows red line.
plt.plot(x, y, '*') #shows stars on graph.
plt.plot(x, y, 'r--') #shows red line with dashes.
plt.xlim(0, 4) #shows x-axis limits at 0 and 4.
plt.ylim(0, 10) #shows y-axis limits at 0 and 10.
plt.title("title goes here")
plt.xlabel('x label goes here')
plt.ylabel('y label goes here')
mat = np.arange(0, 100).reshape(10, 10) #makes array.
plt.imshow(mat, cmap = 'RdYlGn')
mat = np.random.randint(0, 1000, (10, 10))
plt.imshow(mat)
plt.colorbar()
df = pd.read_csv('salaries.csv')
df.plot(x = 'salary', y = 'age', kind = 'scatter') #kind could be 'line' or whatever else you need.

#SciKit-Learn Reference/Pre-Processing.
import numpy as np
from sklearn.preprocessing import MinMaxScaler
data = np.random.randint(0, 100, (10, 2))
scaler_model = MinMaxScaler()
plt.style.use('fivethirtyeight')

#Generate data with two classes
X, y = make_classification(class_sep=1.2, weights=[0.1, 0.9], n_informative=3,
                            n_redundant=1, n_features=5, n_clusters_per_class=1,
                            n_samples=10000, flip_y=0, random_state=10)

pca = PCA(n_components=2)
X = pca.fit_transform(X)

y = y.astype('str')
y[y=='1'] = 'L'
y[y=='0'] = 'S'

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0)

X_1, X_2 = X_train[y_train=='S'], X_train[y_train=='L']


#Scatter plot of the dataset
plt.scatter(zip(*X_1)[0], zip(*X_1)[1], color='#labc9c')
plt.scatter(zip(*X_2)[0], zip(*X_2)[1], color='#e67e22')


x_coords = zip(*X_1)[0] + zip(*X_2)[0]
y_coords = zip(*X_1)[1] + zip(*X_2)[1]
plt.axis([min(x_coords), max(x_coords), min(y_coords, max(y_coords)])

plt.title("Original Dataset")
plt.show()
示例#15
0
stdout.write("\r%d%% completed" % comp)
stdout.flush()
stdout.write("\n")

# Calculate and print the position of minimum in MSE
msemin = np.argmin(mse)
print("Suggested number of components: ", msemin+1)
stdout.write("\n")

if plot_components is True:
 with plt.style.context(('ggplot')):
     plt.plot(component, np.array(mse), '-v', color = 'blue', mfc='blue')
     plt.plot(component[msemin], np.array(mse)[msemin], 'P', ms=10, mfc='red')
     plt.xlabel('Number of PLS components')
     plt.ylabel('MSE')
     plt.title('PLS')
     plt.xlim(xmin=-1)
     plt.show()

# Run PLS with suggested number of components
pls = PLSRegression(n_components=msemin+1)
pls.fit(X_calib, Y_calib)
Y_pred = pls.predict(X_valid) 

# Calculate and print scores
score_p = r2_score(Y_valid, Y_pred)
mse_p = mean_squared_error(Y_valid, Y_pred)
sep = np.std(Y_pred[:,0]-Y_valid)
rpd = np.std(Y_valid)/sep
bias = np.mean(Y_pred[:,0]-Y_valid)
示例#16
0
# initialize time and x and y expenditure at initial time
t_0 = 0
init_data = np.array([14, 5])

# starting RK45 integration method
sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001)

# storing initial data
sol_x = [sys_1.y[0]]
sol_y = [sys_1.y[1]]
time = [t_0]

for i in range(5000):
    sys_1.step()  # performing integration step
    sol_x.append(
        sys_1.y[0]
    )  # storing the results in our solution list, y is the attribute current state
    sol_y.append(sys_1.y[1])
    time.append(sys_1.t)

plt.figure(figsize=(20, 10))

# plotting results in a graph
plt.plot(time, sol_x, 'b--', label='Country A')
plt.plot(time, sol_y, 'r--', label='Country B')
plt.ylabel('Military Expenditure (billions USD)', fontsize=16)
plt.xlabel('Time (years)', fontsize=16)
plt.legend(loc='best', fontsize=22)
plt.title('Simple Arms Race: Aggressive vs. Passive', fontsize=28)
plt.show()
              stop=X_set[:, 1].max() + 1,
              step=0.01))
plt.contourf(X1,
             X2,
             classifer.predict(np.array([X1.ravel(),
                                         X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,
             cmap=ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0],
                X_set[y_set == j, 1],
                c=ListedColormap(('red', 'green'))(i),
                label=j)
plt.title('Classifier (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
              step=0.01))
示例#18
0
# initialize time and x and y expenditure at initial time
t_0 = 0
init_data = np.array([3, 3.5])

# starting RK45 integration method
sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001)

# storing initial data
sol_x = [sys_1.y[0]]
sol_y = [sys_1.y[1]]
time = [t_0]

for i in range(5000):
    sys_1.step()  # performing integration step
    sol_x.append(
        sys_1.y[0]
    )  # storing the results in our solution list, y is the attribute current state
    sol_y.append(sys_1.y[1])
    time.append(sys_1.t)

plt.figure(figsize=(20, 10))

# plotting results in a graph
plt.ylim(2, 5.5)
plt.plot(time, sol_x, 'b--', label='Country A (passive)')
plt.plot(time, sol_y, 'r--', label='Country B (passive)')
plt.ylabel('Military Expenditure (billions USD)', fontsize=16)
plt.xlabel('Time (years)', fontsize=16)
plt.legend(loc='best', fontsize=22)
plt.title('Arms Race: Passive vs. Passive', fontsize=28)
plt.show()
示例#19
0
#import scipy as sp
import matplotlib.pylot as plt

import pandas as pd
data=pd.read_csv("scratch3.csv")
data['bedrooms'].value_counts().plot(kind='bar')
plt.title('number of bedrooms')
plt.xlabel('bedrooms')
plt.ylabel('count')
plt.show()
import pylab as pl
import matplotlib.pylot as plt
x = [1, 2, 3, 4, 5, 6, 7, 8]
y = [9, 8, 8.25, 8, 7.5, 8, 8, 8.75]
pl.plot(x, y, 'D')
plt.title("Grafica de promedios semestral")
plt.xlabel("Semestres cursados")
plt.ylabel("Promedio")
pl.savefig('promedios.png')
plt.show()
'''

import json
from textblob import TextBlob
from wordcloud import WordCloud
import matplotlib.pylot as plt

# Get the JSON data
tweetFile = open("tweets.json", "r")
tweetData = json.load(tweetFile)
tweetFile.close()

polarity_values = []

for tweet in tweetData:
    tweets.append(tweet["text"])
giant_string = " ".join(tweets)    


    tb = TextBlob(tweet_text)
    print("{}: {}".format(tweet_text, tb.polarity))
    polarity_values.append(tb.polarity)

# bins = [-1, -0.5, 0, 0.5, 1]

plt.hist(polarity_values, bins)
plt.title("tweet polarity")
plt.ylabel("Count of tweets")
plt.xlabel("Polarity")
plt.show()
from pylab import *
import matplotlib.pylot as plt

# budget, fear factor and external factor constraints
b_x, b_y = 8.0, 10.0
c_x, c_y = (x/b_x), (y/b_y)
f_x, f_y = (1.0-y/x), (1.0-y/x)
e_x, e_y = 0.75, 0.9

x, y = meshgrid(arrange(1, 4, 0.1), arrange(1, 4, 0.1))
xdot = f_x*y - c_x*x + e_x
ydot = f_y*x - c_y*y + e_y

plt.figure(figsize=(10, 10))
plt.title('Phase Plot: Aggressive vs. Aggressive', fontsize = 28)
streamplot(x, y, xdot, ydot)

show()
        nan_indices.append(i)
        
        
unique_provinces = list(unique_provinces)
province_confirmed_cases = list(province_confirmed_cases)

for i in nan_indices:
    unique_provices.pop(i)
    province_confirmed_cases.pop(i)     
    
    
# Plot a bar graph to see the total confirmed cases across different countries

plt.figure(figsize=(32,32))
plt.barh(unique_countries, country_confirmed_cases)
plt.title('Number of Covid-19 Confirmed Cases in Countries')
plt.xlabel('Number of Covid Confirmed Caese')
plt.show()

# Plot a bar graph to see the total confirmed cases b/w mainland china and outside mainland china

china_confirmed = latest_confirmed[confirmed_cases['Country/Region']=='China'].sum()
outside_mainland_china_confirmed = np.sum(country_confirmed_cases)-china_confirmed
plt.figure(figsize=(16, 9))
plt.barh('Mainland China',china_confirmed)
plt.barh('Outside Mainland China',outside_mainland_china_confirmed)
plt.title('Number of Confirmed Coronavirus cases')
plt.show()

# Print the total cases in mainland china outside of it
示例#24
0
percent_popular = len(np_ratings[popular_apps]) / len(np_ratings) * 100
print("percent_popular")

unpopular_apps = np_ratings < 4
print("percent_unpopular", len(np_ratings[unpopular_apps]))

percent_unpopular = 100 - (np_ratings[unpopular_apps]) / len(np_ratings) * 100
print("percent_unpopular")

somewhat_popular = 100 - (percent_popular + percent_unpopular)
print("somewhat_popular")

# do a visualization with out new data
labels = "Sucks", "Meh", "Love it!"
sizes = [unpopular_apps, somewhat_popular, popular_apps]
colors = ['yellowgreen', 'lightgreen', 'lightskyblue']
explode = (0.1, 0.1, 0.15)

plt.pie(sizes, explode=explode, colors=color, autopct='%1.1%', shadow=True, startangle=140)

plt.axis('equal')
plt.legend(labels, loc=1)
plt.title("Do we love our apps?")
plt.xlabel("User Ratings - App Installs (10,000+ apps)")
plt.show()

# print ('processed', line_count, 'lines of data')
print(categories)
print('first row of data', installs [0])
print('last row of data', installs [-1])
# Inner product of vectors
print(a.dot(b))
print(np.dot(a, b))

# Matrix / vector product; both produce the rank 1 array [29 67]
print(c.dot(d))
print(np.dot(c, d))

# Matrix / matrix product; both produce the rank 2 array
# [[19 22]
#  [43 50]]
print(a.dot(c))
print(np.dot(b, d))

# In[24]:

import numpy as np
from matplotlib import pylot as plt

x = np.arrange(1, 11)
y = 2 * x + 5

plt.title("Matplotlib demo")
plt.xlabel("x axis caption")
plt.ylabel("y axis caption")
plt.plot(x, y, "ob")
plt.show()

# In[ ]:
示例#26
0
import covid
import matplotlib.pylot as plt   

cov=covid.Covid()

name = input("ENTER the country name")
print(name)
virusdata=covid.get_status_by_country
active=virusdata['active']
recover=virusdata['recovered']
deaths=virusdata['deaths']
plt.pie([active,recover,deaths]).labels
plt.title(name)
plt.legend()
plt.show
示例#27
0
negative = percentage(negative, noOfSearchTerms)
neutral = percentage(neutral, noOfSearchTerms)

positive = format(positive, '.2f')
negative = format(negative, '.2f')
neutral = format(neutral, '.2f')

print("How are poeple reacting on " + searchTerm + " by analyzing " +
      str(noOfSearchTerms) + "Tweets.")

if (polarity == 0.00):
    print("Neutral")
elif (polarity < 0.00):
    print("Negative")
elif (polarity > 0.00):
    print("Positive")

labels = [
    'Positive [' + str(positive) + '%]', 'Neutral [' + str(neutral) + '%]',
    'Negative [' + str(negative) + '%]'
]
sizes = [positive, neutral, negative]
colors = ['yellowgreen', 'gold', 'red']
patches, texts = plt.pie(sizes, colors=colors, startangle=90)
plt.legend(patches, labels, loc="best")
plt.title('How people are reacting on ' + searchTerm + ' by analyzing ' +
          str(noOfSearchTerms) + ' Tweets.')
plt.axis('equal')
plt.tight.layout()
plt.show()
示例#28
0
data2.groupby([clusterNos]).mean()
plt.scatter(data2.ApplicantIncome, data2.LoanAmount, c=clusterNos)
plt.scatter(data2.ApplicantIncome, data2.Credit_History,
            c=clusterNos)  #better distinction
plt.scatter(data2.ApplicantIncome, data2.Loan_Amount_Term,
            c=clusterNos)  #better distinction

#Now use this information ;
#which customers you would like to target.

#hierarchical clustering
import scipy.cluster.hierarchy as shc
dend = shc.dendrogram(shc.linkage(data2_scaled, method='ward'))

plt.figure(figsize=(10, 7))
plt.title("Dendrogram")
dend = shc.dendrogram(shc.linkage(data2_scaled, method='ward'))
plt.axhline(y=6, color='r', linestyle='--')
plt.show()

#another method for Hcluster from sklearn
from sklearn.cluster import AgglomerativeClustering
aggCluster = AgglomerativeClustering(n_clusters=2,
                                     affinity='euclidean',
                                     linkage='ward')
aggCluster.fit_predict(data2_scaled)
aggCluster
aggCluster.labels_

#compare
compare = pd.DataFrame({
示例#29
0
import matplotlib.pylot as plt

years = [
    1950, 1995, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005,
    2010, 2015
]

pops = [2.5, 2.7, 3, 3.3, 3.6, 4, 4.4, 4.8, 5.3, 5.7, 6.1, 6.5, 6.9, 7.3]
death = [1.2, 1.7, 1.8, 2.2, 2.5, 2.7, 2.9, 3, 3.1, 3.3, 3.5, 3.8, 4.0, 4.3]
'''
plt.plot(years, pops,'---', color=(255/255, 100/255, 100/255))
plt.plot(years, death, color=(.6, .6, .1))
'''
lines = plt.plot(years, pops, years, death)
plt.grid(True)

plt.setp(lines, color=(1, .4, .4), marker='o')

plt.ylabel("Population in Billions")
plt.xlabel("Population growth by Year")
plt.title("Population Growth")
plt.show()
示例#30
0
print("percent_popular")

unpopular_apps = np_ratings < 4
print("popular apps", len(np_ratings[unpopular_apps]))

percent_unpopular = int(
    len(np_ratings[unpopular_apps]) / len(np_ratings) * 100)
print(percent_unpopular)

kinda_popular = 100 - (percent_popular + percent_unpopular)
print("kinda popular")

#  do a visualization with our shiny new data
labels = "Sucks", "Meh", "Love it!"
sizes = [percent_unpopular, kinda_popular, percent_popular]
colors = ['yellowgreen', 'lightgreen', 'lightskyblue']
explode = (0.1, 0.1, 0.15)

plt.pie(sizes,
        explode=explode,
        colors=colors,
        autopct='%1.1f%%',
        shadow=True,
        startangle=140)

plt.axis('equal')
plt.legend(labels, loc=1)
plt.title("Do we love us some apps?")
plt.xlabel("User ratings - App Installs (10,000+ apps")
plt.show()