def plot_graphs(history, string): plt.plot(history.history[string]) plt.plot(history.history['val_' + string]) plt.xlabel("Epochs") plt.ylabel(string) plt.legend([string, 'val_' + string]) plt.show()
def draw_bar(grades): xticks = ['A', 'B', 'C', 'D', 'E'] gradeGroup = {} #对每一类成绩进行频数统计 for grade in grades: gradeGroup[grade] = gradeGroup.get(grade, 0) + 1 #创建柱状图 #第一个参数为柱的横坐标 #第二个参数为柱的高度 #参数align为柱的对齐方式,以第一个参数为参考标准 plt.bar(range(5), [gradeGroup.get(xtick, 0) for xtick in xticks], align='center') #设置柱的文字说明 #第一个参数为文字说明的横坐标 #第二个参数为文字说明的内容 plt.xticks(range(5), xticks) #设置横坐标的文字说明 plt.xlabel('Grade') #设置纵坐标的文字说明 plt.ylabel('Frequency') #设置标题 plt.title('Grades Of Male Students') #绘图 plt.show()
def predict_prices(dates, prices, x): dates = np.reshape(dates, (len(dates), 1)) svr_len = SVR(kernel='linear', C=1e3) svr_poly = SVR(kernel='poly', C=1e3, degree=2) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_lin.fit(dates, prices) svr_poly.fit(dates, prices) svr_rbf.fit(dates, prices) plt.scatter(dates, prices, color='black', label='data') plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model') plt.plot(dates, svr_lin.predict(dates), color='green', label='Linear model') plt.plot(dates, svr_poly.predict(dates), color='blue', label='Polynomial model') plt.xlabel('Date') plt.ylabel('Price') plt.title('Sipport Vector Regression') plt.legend() plt.show() return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
def show_train_history(train_history, train, validation): plt.plot(train_history, history[train]) plt.plot(train_history, history[validation]) plt.title('Train History') plt.ylabel(train) plt.xlabel('Epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show()
def draw_scatter(heights, weights): #创建散点图 #第一个参数为点的横坐标 #第二个参数为点的纵坐标 plt.scatter(heights, weights) plt.xlabel('Heights') plt.ylabel('Weights') plt.title('Heights & Weights Of Male Students') plt.show()
def draw_hist(heights): #创建直方图 #第一个参数为待绘制的定量数据,不同于定性数据,这里并没有事先进行频数统计 #第二个参数为划分的区间个数 plt.hist(heights, 100) plt.xlabel('Heights') plt.ylabel('Frequency') plt.title('Heights Of Male Students') plt.show()
def draw_cumulative_hist(heights): #创建累积曲线 #第一个参数为待绘制的定量数据 #第二个参数为划分的区间个数 #normed参数为是否无量纲化 #histtype参数为'step',绘制阶梯状的曲线 #cumulative参数为是否累积 plt.hist(heights, 20, normed=True, histtype='step', cumulative=True) plt.xlabel('Heights') plt.ylabel('Frequency') plt.title('Heights Of Male Students') plt.show()
def plot_regression_line(x, y, b): # plotting the actual points as scatter plot plt.scatter(x, y, color="m", marker="o", s=30) # predict response vector y_pred = b[0] + b[1] * x # plotting the regression line plt.plot(x, y_pred, color="g") # putting labels plt.xlabel('x') plt.ylabel('y') # function to show plot plt.show()
def generate_plot(platforms, output_file): """ Generates a bar chart out of the given platforms and writes the output into the specified file as PNG image. """ # First off we need to convert the platforms in a format that can be # attached to the 2 axis of our bar chart. "labels" will become the # x-axis and "values" the value of each label on the y-axis: labels = [] values = [] for platform in platforms: name = platform['name'] adapted_price = platform['adjusted_price'] price = platform['original_price'] # skip prices higher than 2000 USD simply because it would make the # output unsuable. if price > 2000: continue # If the name of the platform is too long, replace it with the # abbreviation. list.insert(0,val) inserts val at the beginning of # the list. if len(name) > 15: name = platform['abbreviation'] labels.insert( 0, u"{0}\n$ {1}\n$ {2}".format(name, price, round(adjusted_price, 2))) values.insert(0, adapted_price) # Let's define the width of each bar and the size of the resulting graph. width = 0.3 ind = np.arange(len(values)) fig = plt.figure(figsize=(len(labels) * 1.8, 10)) # Generate a subplot and put our values onto it. ax = fig.add_subplot(1, 1, 1) ax.bar(ind, values, width, align='center') # Format the x and Y axis labels. Also set the ticks on the x-axis slightly # farther apart and give them a slight tilting effect. plt.ylabel('Adjusted price') plt.xlabel('Year/ Console') ax.set_xticks(ind + 0.3) ax.set_xtickslabels(labels) fig.autofmt_xdate() plt.grid(True) plt.savefig(output_file, dpi=72)
unique_provinces = list(unique_provinces) province_confirmed_cases = list(province_confirmed_cases) for i in nan_indices: unique_provices.pop(i) province_confirmed_cases.pop(i) # Plot a bar graph to see the total confirmed cases across different countries plt.figure(figsize=(32,32)) plt.barh(unique_countries, country_confirmed_cases) plt.title('Number of Covid-19 Confirmed Cases in Countries') plt.xlabel('Number of Covid Confirmed Caese') plt.show() # Plot a bar graph to see the total confirmed cases b/w mainland china and outside mainland china china_confirmed = latest_confirmed[confirmed_cases['Country/Region']=='China'].sum() outside_mainland_china_confirmed = np.sum(country_confirmed_cases)-china_confirmed plt.figure(figsize=(16, 9)) plt.barh('Mainland China',china_confirmed) plt.barh('Outside Mainland China',outside_mainland_china_confirmed) plt.title('Number of Confirmed Coronavirus cases') plt.show() # Print the total cases in mainland china outside of it print('Outside Mainland China{} cases:',format(outside_mainland_china_confirmed))
weekRatings[day].append(d['stars']) weekAverages = {} for d in weekRatings: weekAverages[d]=sum(weekRatings[d]*1.0/len(weekRatings[d])) weekAverages x = list(weekAverages,keys()) Y=[weekAverages[x] for x in X] import matplotlib.pylot as plt plt.plot(X,Y) plt.bar(X,Y) # zoom in more to see the detail plt.ylim(3.6, 3.8) plt.bar(X, Y) plt.ylim(3.6,3.8) plt.xlabel("Weekday") plt.ylabel("Rating") plt.xticks([0,1,2,3,4,5,6],['S','M','T','W','T','F','S']) plt.title("Rating as a function of weekday") plt.bar(X,Y) #L4 Live-coding: MatPlotLib path = "datasets/yelp_data/review.json" f = open(path,'r',encoding = 'utf8') import json import time dataset = [] for i in range(50000): d = json.loads(f.readline()) # d['data'] d['timeStruct'] = time.strptime(d['data'],'%Y-%m-%d')
from matplotliv import pyplot as plt # In[4]: from matplotlib import pyplot as plt # In[9]: x = [1, 2, 3] y = [1, 4, 9] z = [10, 5, 0] plt.plot(x, y) plt.plot(x, z) plt.title("test plot") plt.xlabel("x") plt.ylabel("y and z") plt.legend(["this is y", "this is z"]) plt.show() # In[10]: sample_data = pd.read_csv('sample_data.csv') # In[11]: sample_data # In[12]: type(sample_data)
return optimizer def plot_accuracies(train_top1, train_top5, val_top1, val_top5, SWD): ''' Plots the top-1/5 accuracy for each epoch in the training and validation sets ''' plt.figure() epochs = range(len(train_top1)) lw=1 plt.plot(epochs, train_top1, color='darkred', lw=lw, linestyle='dashed', label='top-1 (train)') plt.plot(epochs, train_top5, color='red', lw=lw, label='top-5 (train)') plt.plot(epochs, val_top1, color='darkblue', lw=lw*2, linestyle='dashed', label='top-1 (test)') plt.plot(epochs, val_top5, color='blue', lw=lw*2, label='top-5 (test)') plt.xlabel('Epoch #', fontsize=20) plt.ylabel('Accuracy (%)') if SWD: filename = 'SWD-results.png' else: filename = 'SGD-results.png' plt.savefig(filename) class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self): self.reset() def reset(self):
import numpy as np import random as rd import matplotlib.pylot as plt #data #data = pd.read_csv('data/clustering.csv') url = 'hhttps://raw.githubusercontent.com/DUanalytics/pyAnalytics/master/data/clustering.csv' data = pd.read_csv(url) data.shape data.head() data.describe() data.columns #visualise plt.scatter(data.ApplicantIncome, data.LoanAmount) plt.xlabel('Income') plt.ylabel('LoanAmt') plt.show() #standardize data : Scaling #missing values #https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html data.dtypes data.isnull().any() data.isnull().any(axis=1) data.index[data.isnull().any(axis=1)] data.iloc[6] data.isnull().sum().sum() #75 missing values data.isnull().sum(axis=0) #columns missing data.isnull().sum(axis=1)
step=0.01)) plt.contourf(X1, X2, classifer.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Classifier (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = X_test, y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1,
reference_data = pd.DataFrame.as_matrix(data['Ref AC']) Y_calib = reference_data[:xxx] Y_valid = reference_data[xxx:] # Get spectra X_calib = pd.DataFrame.as_matrix(data.iloc[:422, 2:]) X_valid = pd.DataFrame.as_matrix(data.iloc[423:, 2:]) # Get wavelengths (They are in the first line which is considered a header from pandas) wl = np.array(list(data)[2:]) # Plot spectra plt.figure(figsize=(8,4.5)) with plt.style.context(('ggplot')): plt.plot(wl, X_calib.T) plt.xlabel('Wavelength (nm)') plt.ylabel('Absorbance') plt.show() # Calculate derivatives X2_calib = savgol_filter(X_calib, 17, polyorder = 2,deriv=2) X2_valid = savgol_filter(X_valid, 17, polyorder = 2,deriv=2) # Plot second derivative plt.figure(figsize=(8,4.5)) with plt.style.context(('ggplot')): plt.plot(wl, X2_calib.T) plt.xlabel('Wavelength (nm)') plt.ylabel('D2 Absorbance') plt.show()
import matplotlib.pylot as plt import pandas as pd #fetch best performing model best_model = RF_gscv.best_estimator_ best_model2 = MLP_gscv.best_estimator_ #fit permutation importance on test data perm = PermutationImportance(best_model).fit(test_img, test_lab) perm2 = PermutationImportance(best_model2).fit(test_img, test_lab) #show weights wghts = eli5.format_as_dataframe(eli5.explain_weights(perm)) wghts2 = eli5.format_as_dataframe(eli5.explain_weights(perm2)) #write dataframes to csv wghts.to_csv( 'D:/studies/phd/WV3_Data_July2019/010039360030_01/L_Sabie_subset/rf_permImportance.csv', encoding='utf-8', index=False) wghts2.to_csv( 'D:/studies/phd/WV3_Data_July2019/010039360030_01/L_Sabie_subset/mlp_permImportance.csv', encoding='utf-8', index=False) gLawn = mlp_map_prob[:, 3] w = x_img_arr[:, -9] plt.scatter(w, gLawn) plt.xlabel('proximity_to_water') plt.ylabel('gLawn_probability') plt.show()
import matplotlib.pylot as plt input_values = [1, 2, 3, 4, 5] squares = [1, 4, 9, 16, 25] plt.plot(input_values, squares, linewidth=5) plt.title("sqare numders", fontsize=28) plt.xlabel("value", fontsize=14) plt.ylable("squares, of value", fontsize=14) plt.tick_params(axis='both', lablesize=14) plt.show()
# coding: utf-8 import matplotlib.pylot as plt import matplotlib.pyplot as plt import numpy as np x = np.linspace(0, 10, 10) y = np.sin(x) plt.plot(x, y) plot.show() plt.show() plt.plot(x, y) plt.xlabel("Time") plt.ylabel("Some function of time") plt.title("sin") plt.show() x = np.linspace(0, 10, 100) y = np.sin(x) plt.plot(x, y) plt.show() pd
percent_popular = len(np_ratings[popular_apps]) / len(np_ratings) * 100 print("percent_popular") unpopular_apps = np_ratings < 4 print("percent_unpopular", len(np_ratings[unpopular_apps])) percent_unpopular = 100 - (np_ratings[unpopular_apps]) / len(np_ratings) * 100 print("percent_unpopular") somewhat_popular = 100 - (percent_popular + percent_unpopular) print("somewhat_popular") # do a visualization with out new data labels = "Sucks", "Meh", "Love it!" sizes = [unpopular_apps, somewhat_popular, popular_apps] colors = ['yellowgreen', 'lightgreen', 'lightskyblue'] explode = (0.1, 0.1, 0.15) plt.pie(sizes, explode=explode, colors=color, autopct='%1.1%', shadow=True, startangle=140) plt.axis('equal') plt.legend(labels, loc=1) plt.title("Do we love our apps?") plt.xlabel("User Ratings - App Installs (10,000+ apps)") plt.show() # print ('processed', line_count, 'lines of data') print(categories) print('first row of data', installs [0]) print('last row of data', installs [-1])
max_val = 0 max_index = 0 for index, fft_val in enumerate(red_fft): if fft_val > max_val: max_val = fft_val max_index = index heartrate = freqs[max_index] * 60 print('Estimated Heartate: {} bpm'.format(heartrate)) # Plotting if PLOT: plt.figure(figsize=(16,9)) plt.plot(x, colors['red'], color='#fc4f30') plt.xlabel('Time [s]') plt.ylabel('Normalized Pixel Color') plt.title('Time-Series Red Channel Pixel Data') fig1 = plt.gcf() plt.show() if SAVE: plt.draw() fig1.savefig('./{}_time_series.png'.format(filename), dpi=200) # Plot the highpass data plt.figure(figsize=(16,9)) plt.plot(x_filt, colors['red_filt'], color='#fc4f30') plt.xlabel('Time [s]') plt.ylabel('Normalized Pixel Color') plt.title('Filtered Red Channel Pixel Data') fig2 = plt.gcf()
import pylab as pl import matplotlib.pylot as plt x = [1, 2, 3, 4, 5, 6, 7, 8] y = [9, 8, 8.25, 8, 7.5, 8, 8, 8.75] pl.plot(x, y, 'D') plt.title("Grafica de promedios semestral") plt.xlabel("Semestres cursados") plt.ylabel("Promedio") pl.savefig('promedios.png') plt.show()
''' import json from textblob import TextBlob from wordcloud import WordCloud import matplotlib.pylot as plt # Get the JSON data tweetFile = open("tweets.json", "r") tweetData = json.load(tweetFile) tweetFile.close() polarity_values = [] for tweet in tweetData: tweets.append(tweet["text"]) giant_string = " ".join(tweets) tb = TextBlob(tweet_text) print("{}: {}".format(tweet_text, tb.polarity)) polarity_values.append(tb.polarity) # bins = [-1, -0.5, 0, 0.5, 1] plt.hist(polarity_values, bins) plt.title("tweet polarity") plt.ylabel("Count of tweets") plt.xlabel("Polarity") plt.show()
# initialize time and x and y expenditure at initial time t_0 = 0 init_data = np.array([14, 5]) # starting RK45 integration method sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001) # storing initial data sol_x = [sys_1.y[0]] sol_y = [sys_1.y[1]] time = [t_0] for i in range(5000): sys_1.step() # performing integration step sol_x.append( sys_1.y[0] ) # storing the results in our solution list, y is the attribute current state sol_y.append(sys_1.y[1]) time.append(sys_1.t) plt.figure(figsize=(20, 10)) # plotting results in a graph plt.plot(time, sol_x, 'b--', label='Country A') plt.plot(time, sol_y, 'r--', label='Country B') plt.ylabel('Military Expenditure (billions USD)', fontsize=16) plt.xlabel('Time (years)', fontsize=16) plt.legend(loc='best', fontsize=22) plt.title('Simple Arms Race: Aggressive vs. Passive', fontsize=28) plt.show()
# Inner product of vectors print(a.dot(b)) print(np.dot(a, b)) # Matrix / vector product; both produce the rank 1 array [29 67] print(c.dot(d)) print(np.dot(c, d)) # Matrix / matrix product; both produce the rank 2 array # [[19 22] # [43 50]] print(a.dot(c)) print(np.dot(b, d)) # In[24]: import numpy as np from matplotlib import pylot as plt x = np.arrange(1, 11) y = 2 * x + 5 plt.title("Matplotlib demo") plt.xlabel("x axis caption") plt.ylabel("y axis caption") plt.plot(x, y, "ob") plt.show() # In[ ]:
Distribution = [] for OutcomeIndex1 in range(0, NumberFlips + 1): Distribution.append(SumTrials.count(OutcomeIndex1) / (1.0 * NumberTrials)) print(repr(Distribution)) # Print the sum of the elements in Distribution sumDistrib = 0 for item in Distribution: sumDistrib += item print(repr(sumDistrib)) OutcomeIndex2 = range(0, NumberFlips + 1) num_bins = len(OutcomeIndex2) bar_width = 0.8 XticksIndex = [(outcome + (0.5 * bar_width)) for outcome in OutcomeIndex2] opacity = 0.4 plt.bar(OutcomeIndex2, Distribution, bar_width, alpha=opacity, color='b') plt.xlabel("Value") plt.ylabel("Probability") plt.xticks(XticksIndex, OutcomeIndex2) plt.show() """ Describe what happens to the figure as you vary ParameterP from zero to one. -As ParameterP increases from zero to one, the figure shifts from left to right. What is the most likely outcome for ParameterP = 0.7 and NumberFlips = 8? -With ParameterP = 0.7 and NumberFlips = 8, the most likely outcome is 6 with 29.7% probability. """
import pandas as pd import matplotlib.pylot as plt from sklearn.linear_model import LinearRegression x = np.arange(0, 100) y = np.arange(0, 100) print(x) print(y) lr = LinearRegression() x.ndim y.ndim x.shape y.shape x = x.reshape(-1, 1) x.shape(-1, 1) x.ndim lr.fit(x, y) plt.scatter(x, y, color='red') plt.plot(x, lr.prdict(x), color='blue') plt.title('Linear Regression Demo') plt.xlabel('X') plt.ylabel('y') plt.show()
#Data Visualization Reference. import numpy as np import pandas as pd import matplotlib.pylot as plt %matplotlib inline #jupyter notebook only. below line for everything else. plt.show() x = np.arange(0, 10) y = x ** 2 plt.plot(x, y, 'red') #shows red line. plt.plot(x, y, '*') #shows stars on graph. plt.plot(x, y, 'r--') #shows red line with dashes. plt.xlim(0, 4) #shows x-axis limits at 0 and 4. plt.ylim(0, 10) #shows y-axis limits at 0 and 10. plt.title("title goes here") plt.xlabel('x label goes here') plt.ylabel('y label goes here') mat = np.arange(0, 100).reshape(10, 10) #makes array. plt.imshow(mat, cmap = 'RdYlGn') mat = np.random.randint(0, 1000, (10, 10)) plt.imshow(mat) plt.colorbar() df = pd.read_csv('salaries.csv') df.plot(x = 'salary', y = 'age', kind = 'scatter') #kind could be 'line' or whatever else you need. #SciKit-Learn Reference/Pre-Processing. import numpy as np from sklearn.preprocessing import MinMaxScaler data = np.random.randint(0, 100, (10, 2)) scaler_model = MinMaxScaler() type(scaler_model)
import matplotlib.pylot as plt years = [ 1950, 1995, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015 ] pops = [2.5, 2.7, 3, 3.3, 3.6, 4, 4.4, 4.8, 5.3, 5.7, 6.1, 6.5, 6.9, 7.3] death = [1.2, 1.7, 1.8, 2.2, 2.5, 2.7, 2.9, 3, 3.1, 3.3, 3.5, 3.8, 4.0, 4.3] ''' plt.plot(years, pops,'---', color=(255/255, 100/255, 100/255)) plt.plot(years, death, color=(.6, .6, .1)) ''' lines = plt.plot(years, pops, years, death) plt.grid(True) plt.setp(lines, color=(1, .4, .4), marker='o') plt.ylabel("Population in Billions") plt.xlabel("Population growth by Year") plt.title("Population Growth") plt.show()
#import scipy as sp import matplotlib.pylot as plt import pandas as pd data=pd.read_csv("scratch3.csv") data['bedrooms'].value_counts().plot(kind='bar') plt.title('number of bedrooms') plt.xlabel('bedrooms') plt.ylabel('count') plt.show()