from collections import defaultdict weekRatings = defaultdict(list) for d in datasetWithTimeValues: day = d['timeStruct'].tm_wday weekRatings[day].append(d['stars']) weekAverages = {} for d in weekRatings: weekAverages[d]=sum(weekRatings[d]*1.0/len(weekRatings[d])) weekAverages x = list(weekAverages,keys()) Y=[weekAverages[x] for x in X] import matplotlib.pylot as plt plt.plot(X,Y) plt.bar(X,Y) # zoom in more to see the detail plt.ylim(3.6, 3.8) plt.bar(X, Y) plt.ylim(3.6,3.8) plt.xlabel("Weekday") plt.ylabel("Rating") plt.xticks([0,1,2,3,4,5,6],['S','M','T','W','T','F','S']) plt.title("Rating as a function of weekday") plt.bar(X,Y) #L4 Live-coding: MatPlotLib path = "datasets/yelp_data/review.json" f = open(path,'r',encoding = 'utf8') import json import time dataset = []
X_set, y_set = X_train, y_train X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifer.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Classifier (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = X_test, y_test X1, X2 = np.meshgrid(
df[df['salary'] > 60000] or df[my_salary] df.as_matrix() #returns numpy array. #Data Visualization Reference. import numpy as np import pandas as pd import matplotlib.pylot as plt %matplotlib inline #jupyter notebook only. below line for everything else. plt.show() x = np.arange(0, 10) y = x ** 2 plt.plot(x, y, 'red') #shows red line. plt.plot(x, y, '*') #shows stars on graph. plt.plot(x, y, 'r--') #shows red line with dashes. plt.xlim(0, 4) #shows x-axis limits at 0 and 4. plt.ylim(0, 10) #shows y-axis limits at 0 and 10. plt.title("title goes here") plt.xlabel('x label goes here') plt.ylabel('y label goes here') mat = np.arange(0, 100).reshape(10, 10) #makes array. plt.imshow(mat, cmap = 'RdYlGn') mat = np.random.randint(0, 1000, (10, 10)) plt.imshow(mat) plt.colorbar() df = pd.read_csv('salaries.csv') df.plot(x = 'salary', y = 'age', kind = 'scatter') #kind could be 'line' or whatever else you need. #SciKit-Learn Reference/Pre-Processing. import numpy as np from sklearn.preprocessing import MinMaxScaler data = np.random.randint(0, 100, (10, 2))
#Rafael Almeida # K-MEANS import pandas as pd import numpy as np import matplotlib.pylot as plt %matplotlib inline df = pd.DataFrame({ 'x': [12, 20, 28, 18, 29, 33, 24, 45, 45, 52, 51, 52, 55, 53, 55, 61, 64, 69, 72], 'y': [39, 36, 30, 52, 54, 46, 55, 59, 63, 70, 66, 63, 58, 23, 14, 8, 19, 7, 24] }) np.random.seed(200) k = 3 # centroids[i] = [x,y] centroids = { i +1 [np.random.randint(0, 80), np.random.randint(0, 80)] for i in range (k) } fig = plt.figure(figsize = (5,5)) plt.scatter(df['x'], df['y'], color= 'k') colmap = {1: 'r', 2: 'g', 3: 'b'} for i in centroids.keys(): plt.scatter(*centroids[i], color=colmap[i]) plt.xlim(0, 80) plt.ylim(0, 80) plt.show()
# initialize time and x and y expenditure at initial time t_0 = 0 init_data = np.array([3, 3.5]) # starting RK45 integration method sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001) # storing initial data sol_x = [sys_1.y[0]] sol_y = [sys_1.y[1]] time = [t_0] for i in range(5000): sys_1.step() # performing integration step sol_x.append( sys_1.y[0] ) # storing the results in our solution list, y is the attribute current state sol_y.append(sys_1.y[1]) time.append(sys_1.t) plt.figure(figsize=(20, 10)) # plotting results in a graph plt.ylim(2, 5.5) plt.plot(time, sol_x, 'b--', label='Country A (passive)') plt.plot(time, sol_y, 'r--', label='Country B (passive)') plt.ylabel('Military Expenditure (billions USD)', fontsize=16) plt.xlabel('Time (years)', fontsize=16) plt.legend(loc='best', fontsize=22) plt.title('Arms Race: Passive vs. Passive', fontsize=28) plt.show()
# !/usr/bin/python # -*- coding: UTF-8 -*- ########################## # Creator: Javy # Create Time: 20170416 # Email: [email protected] # Description: sigmoid ########################## import matplotlib.pylot as plt import numpy as np def sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) z = np.arange(-7, 7, 0.1) phi_z = sigmoid(z) plt.plot(z, phi_z) plt.axvline(0.0, color='k') plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1.0, ls='dotted') plt.axhline(y=0.5, ls='dotted', color='k') plt.yticks([0.0, 0.5, 1.0]) plt.ylim(-0.1, 1.1) plt.xlable('z') plt.ylable('$\phi (z)$') plt.show()
# initialize time and x and y expenditure at initial time t_0 = 0 init_data = np.array([5, 5]) # starting RK45 integration method sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001) # storing initial data sol_x = [sys_1.y[0]] sol_y = [sys_1.y[1]] time = [t_0] for i in range(5000): sys_1.step() # performing integration step sol_x.append( sys_1.y[0] ) # storing the results in our solution list, y is the attribute current state sol_y.append(sys_1.y[1]) time.append(sys_1.t) plt.figure(figsize=(20, 10)) # plotting results in a graph plt.ylim(4, 10) plt.plot(time, sol_x, 'b--', label='Country A (aggressive)') plt.plot(time, sol_y, 'r--', label='Country B (aggressive)') plt.ylabel('Military Expenditure (billions USD)', fontsize=16) plt.xlabel('Time (years)', fontsize=16) plt.legend(loc='best', fontsize=22) plt.title('Arms Race: Aggressive vs. Aggressive', fontsize=28) plt.show()