def main(): m = 350 random.seed(2) X = np.empty([m, 2]) X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) #not separable y = np.empty([m, 1]) for i in range(X.shape[0]): y[i] = func2(X[i, :]) #plot data and decision surface ax = pu.plot_data(X, y) pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax) plt.show() #train svm #change c to hard/soft margins w, w0, support_vectors_idx = svm.train(X, y, c=99999, eps=0.1) #plot result predicted_labels = svm.classify_all(X, w, w0) print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels))) ax = pu.plot_data(X, y, support_vectors_idx) pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax) plt.show()
def main(): print("Loading data...") X = np.loadtxt('X.txt') y = np.loadtxt('y.txt') print("Plotting data...") plot_data(X, y)
def main(): m=350 random.seed(2) X = np.empty([m,2]) X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) #not separable y = np.empty([m,1]) for i in range(X.shape[0]): y[i] = func2(X[i,:]) #plot data and decision surface ax = pu.plot_data(X,y) pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax) plt.show() #train svm #change c to hard/soft margins w,w0, support_vectors_idx = svm.train(X,y,c=99999,eps=0.1) #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) ax = pu.plot_data(X,y, support_vectors_idx) pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax) plt.show()
def main(): m=100 X = np.empty([m,2]) X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) # preprocessing.scale(X) #linearly separable y = np.empty([m,1]) for i in range(m): y[i] = func(X[i,]) #plot data and decision surface ax = pu.plot_data(X,y) pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax) plt.show() #train svm w,w0, support_vectors_idx = svm.train(X,y,c=999999999999999, eps=10, type='gaussian') # w, w0, support_vectors_idx = svm.train(X, y, c=999999999999999, eps=10, type='polynomial') #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) ax = pu.plot_data(X,y, support_vectors_idx) pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax) plt.show()
def main(): m=150 random.seed(2) X = np.empty([m,2]) X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) preprocessing.scale(X) #linearly separable y = np.empty([m,1]) for i in range(m): y[i] = func(X[i,]) # shuffle p = np.random.permutation(len(X)) X = X[p] y = y[p] #plot data and decision surface ax = pu.plot_data(X,y) pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax) plt.show() #train svm w,w0, support_vectors_idx = svm.train(X,y,c=9999, eps=0.000001) #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) kfold = svm.kfoldCrossValidation(X,y,10,1,c=999999999,eps=0.000001) print (kfold) ax = pu.plot_data(X,y, support_vectors_idx) pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax) plt.show()
def main(): m = 150 random.seed(2) X = np.empty([m, 2]) X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) preprocessing.scale(X) #linearly separable y = np.empty([m, 1]) for i in range(m): y[i] = func(X[i, ]) # shuffle p = np.random.permutation(len(X)) X = X[p] y = y[p] #plot data and decision surface ax = pu.plot_data(X, y) pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax) plt.show() #train svm w, w0, support_vectors_idx = svm.train(X, y, c=9999, eps=0.000001) #plot result predicted_labels = svm.classify_all(X, w, w0) print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels))) kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=999999999, eps=0.000001) print(kfold) ax = pu.plot_data(X, y, support_vectors_idx) pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax) plt.show()
print("Num dead: {}".format(len(DEAD))) print("Num recovered: {}".format(len(RECOVERED))) if len(DEAD) == num_all_people: if output: print(f"All died in {i} days") return [inf, dead, rec] if len(RECOVERED) == num_all_people or \ len(INFECTED) == 0: if output: print(f"All revovered. Took {i} days") return [inf, dead, rec] if output: print("-" * 35) i += 1 inf, dead, rec = run_simulation(3, output=False) BOARD_HISTORY.append(BOARD) if PLOT_RESULTS: print("Plotting data") plot_data(inf, dead, rec, START_EMERGENCY, START_DEEPENING, START_RECOVERING, vir_sev, inf_chance) print("Saved plots")
X_norm,mu,sigma = utils.feature_normalize(X) # add intercept term to X_norm XX = np.vstack([np.ones((X.shape[0],)),X_norm.T]).T print 'Running gradient descent ..' # set up model and train linear_reg3 = LinearReg_SquaredLoss() J_history3 = linear_reg3.train(XX,y,learning_rate=0.01,num_iters=5000,verbose=False) # Plot the convergence graph and save it in fig5.pdf plot_utils.plot_data(range(len(J_history3)),J_history3,'Number of iterations','Cost J') plt.savefig('fig5.pdf') # Display the computed theta print 'Theta computed by gradient descent: ', linear_reg3.theta ######################################################################## # ======= Part 3: Predict on unseen data with model ======= ===========# ######################################################################## ######################################################################## # TODO: # # Predict values for the average home # # remember to multiply prediction by 10000 using linear_reg3 #
# add intercept term to X_norm XX = np.vstack([np.ones((X.shape[0],)),X_norm.T]).T print X_norm print 'Running gradient descent ..' # set up model and train linear_reg3 = LinearReg_SquaredLoss() J_history3 = linear_reg3.train(XX,y,learning_rate=0.01,num_iters=5000,verbose=False) # Plot the convergence graph and save it in fig5.pdf plot_utils.plot_data(range(len(J_history3)),J_history3,'Number of iterations','Cost J') plt.savefig('fig5a.pdf') # Display the computed theta print 'Theta computed by gradient descent: ', linear_reg3.theta ######################################################################## # ======= Part 3: Predict on unseen data with model ======= ===========# ######################################################################## ######################################################################## # TODO: # # Predict values for the average home # # remember to multiply prediction by 10000 using linear_reg3 #
z = 0 while x > 0: if x % 2 == 1: z = z + y y = y << 1 x = x >> 1 return z if __name__ == "__main__": # first arg is max value, second arg is step numbers = range(1,int(sys.argv[1]),int(sys.argv[2])) print "Starting to test timings for function:" times_naive = [] times_russian = [] tot_numbers = len(numbers) s0 = time.time() for idx,val in enumerate(numbers): if idx%100 == 0: print " -- Working on %i/%i: current time: %.3f seconds"%(idx+1,tot_numbers,time.time() - s0) s1 = time.time() naive(val,val) times_naive.append(time.time() - s1) s2 = time.time() russian(val,val) times_russian.append(time.time() - s2) e0 = time.time() print "Time taken: %.3f seconds"%(e0 - s0) times = numpy.array([times_naive,times_russian]).transpose() numbers = numpy.array([numbers,numbers]).transpose() print "opening plot..." plot_utils.plot_data(numbers,times)
# Group the data by week, and take the count for each week week_counts = df.resample('W').count() week_counts.columns = ['adoptions'] commit_week = df_c.resample('W').count() commit_week.columns = ['commits'] #merge for multi-axis plotting week_merged = week_counts.merge(commit_week, how='outer', left_index=True, right_index=True) week_merged.fillna(int(0), inplace=True) #plot? plot_utils.plot_data(week_counts.index, week_counts['adoptions'], "time", "number of adoption events per week", "Adoption Events Per Week", filename="results/adop_over_time_week.png") #double axis plot plot_utils.plot_two_axes(week_merged.index, week_merged['adoptions'], week_merged['commits'], "time", "number of adoption events per week", "number of import commits per week", "Adoption Events and Import Commits Per Week", filename="results/adop_commit_over_time_week.png") # Group the data by week, and take the count for each week month_counts = df.resample('M').count() month_counts.columns = ['adoptions']
# if a spike is detected, store the link in reddit.events if (tree_score > adjusted_max/5) or (tree_score < adjusted_min): log_event(date, post, tree_score, TOPIC) # slide window to the right window = window[1:] + [daily_max] # find max and adjusted max window_max = max(window) adjusted_max = (window_max + 6*adjusted_max + global_max)/10 # write to file in case of failure f = open(AMAX_TEMP, "w+") f.write(str(adjusted_max) + "\n" + str(global_max)) f.close() # find min and adjusted min window_min = min(window) adjusted_min = (window_min + 6*adjusted_min + global_min)/10 # write to file in case of failure f = open(AMIN_TEMP, "w+") f.write(str(adjusted_min) + "\n" + str(global_min)) f.close() # write score to file append_to_file(date, score, FILENAME) os.remove(AMAX_TEMP) os.remove(AMIN_TEMP) plot_data(TOPIC)
bdata = load_boston() df = pd.DataFrame(data = bdata.data, columns = bdata.feature_names) # X is the percentage of the population in a census tract that is of # lower economic status. X is a vector of length 506. # y is to the median home value in $10000's. y is a vector of length 506 X = df.LSTAT y = bdata.target # Scatter plot LSTAT vs median home value, saved in fig1.pdf import numpy as np import plot_utils print 'Plotting data ...' plot_utils.plot_data(X,y,'Percentage of population with lower economic status','Median home value in $10000s') plt.savefig('fig1.pdf') ######################################################################## ##============= Part 1: Training a univariate model ===================# ######################################################################## # Predict median home value from percentage of lower economic status in a census tract # add the column of ones to X to represent the intercept term XX = np.vstack([np.ones((X.shape[0],)),X]).T from linear_regressor import LinearRegressor, LinearReg_SquaredLoss # set up a linear regression model
key=len, reverse=True) repo_comp_data.append(len(repo_comps)) times.append(time) print " finished", adoption_index, "events" #get final component counts n = len(events_list) user_comps = sorted(nx.connected_component_subgraphs(U), key=len, reverse=True) user_comp_data.append(len(user_comps)) repo_comps = sorted(nx.connected_component_subgraphs(R), key=len, reverse=True) repo_comp_data.append(len(repo_comps)) times.append(events_list[n - 1]["target"]["time"]) #plot number of components over time plot_utils.plot_data(times, user_comp_data, "Time (UNIX)", "Number of Components in User Graph", "Number of Components in User Graph over Time", filename="results/time_components_user.png", log_scale=False) print "user component plot saved to results/time_components_user.png" plot_utils.plot_data(times, repo_comp_data, "Time (UNIX)", "Number of Components in Repo Graph", "Number of Components in Repo Graph over Time", filename="results/time_components_repo.png", log_scale=False) print "repo component plot saved to results/time_components_repo.png"
#plots use usage counts, adoption counts, and average delta t: use_counts = [] adop_counts = [] avg_delta = [] for lib in usage_counts: if lib in lib_adop_counts: use_counts.append(usage_counts[lib]) adop_counts.append(lib_adop_counts[lib]) avg_delta.append(lib_delta[lib]) #total # of usages for library on x, total # of adoptions for lib on y plot_utils.plot_data(use_counts, adop_counts, "Number of uses", "Number of adoptions", "Uses vs. Adoptions per Library", filename="results/uses_vs_adoptions.png", scatter=True, log_scale=True) print "Uses vs. adoptions plot saved to results/uses_vs_adoptions.png" #frequency distribution of # of adoptions per library lib_adop_freq, min_lib_adop, max_lib_adop = plot_utils.count_freq( lib_adop_counts) plot_utils.plot_freq(lib_adop_freq, "library adoption count", "freq", "Frequency of library adoption counts", filename="results/lib_adop_freq.jpg", log_scale=True) print "lib adop counts: min =", min_lib_adop, ", max =", max_lib_adop
######################################################################## # We start the exercise by first loading and visualizing the dataset. # # The following code will load the dataset into your environment and # # plot the data. # ######################################################################## # Load Training Data print 'Loading and Visualizing Data ...' X, y, Xtest, ytest, Xval, yval = utils.load_mat('ex2data1.mat') # Plot training data plot_utils.plot_data(X,y,'Change in water level (x)','Water flowing out of the dam (y)') plt.savefig('fig6.pdf') ######################################################################## ## =========== Part 2: Regularized Linear Regression ==================# ######################################################################## # You should now implement the loss function and gradient of the # loss function for regularized linear regression in reg_linear_regression_multi.py # append a column of ones to matrix X XX = np.vstack([np.ones((X.shape[0],)),X]).T # Train linear regression with lambda = 0 reglinear_reg1 = RegularizedLinearReg_SquaredLoss()
G.add_edge(user, repo) commit_index = commit_index + 1 if commit_index % 100 == 0: #get components again, add to plot data comps = sorted(nx.connected_component_subgraphs(G), key=len, reverse=True) comp_data[key] = len(comps) #add to plot print "finished", commit_index, "commits" #plot # of components over time plot_utils.plot_data(comp_data, "Time (UNIX)", "Number of Components", "%s: Number of Components over Time" % lib, filename="results/%s_time_components_APPROX.png" % lib, x_max=0, x_min=0, log_scale=False) print "plot saved to results/%s_time_components_APPROX.png" % lib ''' SLOW WAY for user_id in users_list: user = str(user_id) print user for repo in user_to_repos[user]: print repo #read repo file repo_commits = utils.load_json("imports_data/%s.log" % repo) print len(repo_commits) #loop all commits in this repo for commit in commits: #each commit is user, time, dictionary of imports