def main(train_path, eval_path, pred_path): """Problem 1(e): Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # *** START CODE HERE *** # Train GDA model = GDA() model.fit(x_train, y_train) # Plot data and decision boundary util.plot(x_train, y_train, model.theta, 'output/p01e_{}.png'.format(pred_path[-5])) # Save predictions x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True) y_pred = model.predict(x_eval) np.savetxt(pred_path, y_pred > 0.5, fmt='%d')
def main(train_path, valid_path, save_path): """Problem: Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # *** START CODE HERE *** # Train a GDA classifier model = GDA() model.fit(x_train, y_train) # Plot decision boundary on validation set x_val, y_val = util.load_dataset(valid_path, add_intercept=False) image_path = save_path[:-3] + "png" theta = np.concatenate(model.theta) assert theta.shape == (x_val.shape[1]+1, 1) util.plot( x=x_val, y=y_val, theta=theta, save_path=image_path ) # Use np.savetxt to save outputs from validation set to save_path prob_val = model.predict(x_val) np.savetxt(save_path, prob_val)
def fit(self, x, y): """Run Newton's Method to minimize J(theta) for logistic regression. Args: x: Training example inputs. Shape (m, n). y: Training example labels. Shape (m,). Returns: theta: Logistic regression model parameters, including intercept. """ # *** START CODE HERE *** m, n = x.shape theta = self.theta if theta is None: theta = np.zeros(n) while True: loss = self.loss(x, y, theta, m, n) theta_new = self.update(x, y, theta, m, n) if self.verbose: print("Loss: ", loss, " 1-norm: ", np.linalg.norm(theta_new - theta, ord=1)) if np.linalg.norm(theta_new - theta, ord=1) < self.eps: self.theta = theta_new break theta = theta_new self.theta = theta util.plot(x, y, self.theta, 'output/p01b')
def logistic_regression(X, Y): """Train a logistic regression model""" m, n = X.shape theta = np.zeros(n) learning_rate = 1 i = 0 while True: i += 1 prev_theta = theta grad = calc_grad(X, Y, theta) theta = theta - learning_rate * grad if i % 10000 == 0: DEBUG = False if DEBUG: from util import plot from matplotlib import pyplot as plt plot(X, (Y == 1), theta, 'output/{}.png'.format(i)) print('Finished %d iterations' % i) print(np.linalg.norm(prev_theta - theta)) print(theta) if np.linalg.norm(prev_theta - theta) < 1e-15: print('Converged in %d iterations' % i) break return
def main(train_path, valid_path, save_path, plot_path): """Problem: Logistic regression with Newton's Method. Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ x_train, y_train = util.load_dataset(train_path, add_intercept=True) # *** START CODE HERE *** # Train a logistic regression classifier # Plot decision boundary on top of validation set set # Use np.savetxt to save predictions on eval set to save_path clf = LogisticRegression() clf.fit(x_train, y_train) x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True) pred = clf.predict(x_valid) np.savetxt(save_path, pred) print(f"ACC_valid : {np.sum((pred > 0.5) == y_valid) / len(y_valid)}") util.plot( x_train, y_train, clf.theta, plot_path.replace(".png", "_train.png"), correction=1.0, ) util.plot( x_valid, y_valid, clf.theta, plot_path.replace(".png", "_valid.png"), correction=1.0, )
def main(train_path, eval_path, pred_path): """Problem 1(e): Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # get the model model = GDA() model.fit(x_train, y_train) # predict using the trained model x_eval, y_eval = util.load_dataset(eval_path, add_intercept=False) y_pred = model.predict(x_eval) # Plot decision boundary on top of validation set set theta = list(model.theta) theta_0 = [model.theta_0] util.plot(x_eval, y_eval, theta_0 + theta, 'output/{ds}_GDA.pdf'.format(ds=eval_path.split('/')[-1])) # Use np.savetxt to save predictions on eval set to pred_path np.savetxt(pred_path, y_pred)
def main(lr, train_path, eval_path, save_path): """Problem: Poisson regression with gradient ascent. Args: lr: Learning rate for gradient ascent. train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. save_path: Path to save predictions. """ # Load training set x_train, y_train = util.load_dataset(train_path, add_intercept=True) # *** START CODE HERE *** # Fit a Poisson Regression model # Run on the validation set, and use np.savetxt to save outputs to save_path model = PoissonRegression(lr) model.fit(x_train, y_train) x_val, y_val = util.load_dataset(eval_path, add_intercept=True) util.plot(x_val, y_val, model.theta, save_path=save_path.replace(".txt", ".png")) np.savetxt(save_path, model.predict(x_val))
def build_c0(): # data tok = Tokenizer() tok.fit_on_texts(pnames) pnames_vec = tok.texts_to_sequences(pnames) maxlen_pd = max(map(len,pnames_vec))#找出最長句子的長度 pnames_train_data = pad_sequences(pnames_vec,maxlen_pd) #train_data 是產生完畢的訓練資料 print(pnames_train_data[:10]) pnames_vocab_size = len(tok.word_index) + 1 # model pnames_in = Input(shape =(maxlen_pd,),dtype='int32') ecoded = Embedding(pnames_vocab_size,50)(pnames_in) ecoded = LSTM(100)(ecoded) out = Dense(c0_labels_len,activation='softmax')(ecoded) model = Model([pnames_in],out) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc']) history = model.fit(pnames_train_data ,c0_labels ,validation_split=0.05 ,batch_size=32 ,epochs=5 #20 ,verbose=2) model.save_weights(directory+"c0.h5") # -------------- # 模型成效輸出 # -------------- u.plot(history.history,('acc','val_acc'), ' training&validation acc',('Epoch','Acc'))
def main(train_path, valid_path, save_path): """Problem: Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # *** START CODE HERE *** # Train a GDA classifier # Plot decision boundary on validation set # Use np.savetxt to save outputs from validation set to save_path x_val, y_val = util.load_dataset(valid_path, add_intercept=False) ###decomment to normalize the training and validation sets to improve the GDA performance: # x_train = (x_train - np.mean(x_train, axis=0))/np.std(x_train, axis=0) # x_val = (x_val - np.mean(x_val, axis=0))/np.std(x_val, axis=0) # x_train = (x_train - np.min(x_train, axis=0))/(np.max(x_train, axis=0) - np.min(x_train, axis=0)) # x_val = (x_val - np.min(x_val, axis=0))/(np.max(x_val, axis=0) - np.min(x_val, axis=0)) clf = GDA() clf.fit(x_train, y_train) y_predict = clf.predict(x_val) np.savetxt(save_path, y_predict) util.plot(x_val, (y_predict >= 0.5), clf.theta, save_path[:-4]+ "validation_expected") #plotting the real distribution util.plot(x_val, y_val, clf.theta, save_path[:-4] + "validation_real")
def main(train_path, valid_path, save_path): """Problem: Logistic regression with Newton's Method. Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ x_train, y_train = util.load_dataset(train_path, add_intercept=True) # *** START CODE HERE *** # Train a logistic regression classifier # Plot decision boundary on top of validation set # Use np.savetxt to save predictions on eval set to save_path model = LogisticRegression() model.fit(x_train, y_train) x_val, y_val = util.load_dataset(valid_path, add_intercept=True) util.plot(x_val, y_val, model.theta, save_path=save_path.replace(".txt", "jpg")) yhat = model.predict(x_val) np.savetxt(save_path, yhat) print(f"LogReg acc: {util.compute_accuracy(y_val, yhat)}") print(f"LogReg log loss: {util.compute_log_loss(y_val, yhat)}")
def main(train_path, eval_path, pred_path): """Problem 1(b): Logistic regression with Newton's Method. Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ x_train, y_train = util.load_dataset(train_path, add_intercept=True) # initial guess of parameters theta_0 = np.zeros(shape=(3, )) # get the model model = LogisticRegression(theta_0=theta_0) model.fit(x_train, y_train) # predict using the trained model x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True) y_pred = model.predict(x_eval) # Plot decision boundary on top of validation set set util.plot(x_eval, y_eval, model.theta, 'output/{ds}_log_reg.pdf'.format(ds=eval_path.split('/')[-1])) # Use np.savetxt to save predictions on eval set to pred_path np.savetxt(pred_path, y_pred)
def train_exploiting_greedy(self, episodes=1000, max_steps=1000, plot_rewards=True): scores = [] e = 0 for _ in range(episodes): trace = [] greedy_reversal_sort(self.env.observation_space.sample(), trace) for __ in range(3): for permutation in trace[::-1]: score = self.run_episode(max_steps, forced=permutation) scores.append(score) print("Episode:", e, " score:", score, " epsilon:", self.epsilon) e += 1 print() print() self.model.save_weights(FINAL_WEIGHTS_PATH) scores = np.array(scores) if plot_rewards: plot(scores) plot_running_avg(scores)
def extract_stats(filepaths, image_size, square_function): print "Calculating mean, std and var of all images" #Running total (sum) of all images count_so_far = 0 mean = np.zeros((image_size, image_size)) M2 = np.zeros((image_size, image_size)) n = len(filepaths) for i, filepath in enumerate(filepaths): image = misc.imread(filepath, flatten=1) image = process(image, square_function, image_size) # Online statistics count_so_far = count_so_far + 1 delta = image - mean mean = mean + delta / count_so_far M2 = M2 + delta * (image - mean) if i % 50 == 0: util.update_progress(i / n) util.update_progress(1.0) mean_image = mean variance_image = M2 / (n - 1) std_image = np.sqrt(variance_image) print "Plotting mean image (only shows afterwards)" util.plot(mean_image, invert=True) return mean_image, variance_image, std_image
def main(): args = setup_argparser().parse_args() filepath = args.file num_clusters = args.num_clusters data, truth_clusters = import_file(filepath, correct_clusters=True) points = [Point(x) for x in data] aggclustering = AgglomerativeClustering(num_clusters=num_clusters) labels = aggclustering.fit(points) logging.info("Labels: {}".format(labels)) logging.info("Rand score: {}".format(rand_score(truth_clusters, labels))) logging.info("Jaccard coefficient: {}".format( jaccard_coeff(truth_clusters, labels))) # We apply PCA dim reduction to both data, and centroids to be able to plot them plot(reduce_dimensionality(data), truth_clusters, None, suffix="hierarchical_truth") plot(reduce_dimensionality(data), labels, None, suffix="hierarchical_computed") return
def run_training(model, sess_context_manager, summary_writer): """Repeatedly runs training iterations, logging loss to screen and writing summaries""" tf.logging.info("starting run_training") with sess_context_manager as sess: tf.train.start_queue_runners(sess=sess) if FLAGS.debug: # start the tensorflow debugger sess = tf_debug.LocalCLIDebugWrapperSession(sess) sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) num_batch = 0 while True: # repeats until interrupted if num_batch % FLAGS.logging_step == 0: tf.logging.info('------ number of batches: ' + str(num_batch) + ' ------') t0 = time.time() model.run_train_step(sess, summary_writer, logging=True) t1 = time.time() tf.logging.info('seconds for training step: %.3f', t1 - t0) tf.logging.info("sampling from the generator") sampling_result = model.sample_generator(sess) if FLAGS.dataset_id == "mnist": util.plot(sampling_result['g_sample'], num_batch, 1) elif FLAGS.dataset_id == "cifar": util.plot(sampling_result['g_sample'], num_batch, 3) print(model.run_eval_step(sess)) else: # no logging model.run_train_step(sess, summary_writer, logging=False) num_batch += 1
def main(): args = setup_argparser().parse_args() filepath = args.file min_pts = args.min_points eps = args.eps logging.info(args) data, truth_clusters = import_file(filepath, correct_clusters=False) db = DBSCAN(eps=eps, min_points=min_pts) db.dbscan(data) logging.info("Rand Index: {}".format(rand_score(truth_clusters, db.labels))) logging.info("Jaccard Coefficient: {}".format( jaccard_coeff(truth_clusters, db.labels))) # There's barely any difference b/w what we classify and what Sklearns does - this looks correct # We apply PCA dim reduction to both data, and centroids to be able to plot them plot(reduce_dimensionality(data), truth_clusters, None, suffix="dbscan_truth") plot(reduce_dimensionality(data), db.labels, None, suffix="dbscan_computed") return
def fit(self, x, y): """Run Newton's Method to minimize J(theta) for logistic regression. Args: x: Training example inputs. Shape (m, n). y: Training example labels. Shape (m,). """ # *** START CODE HERE *** # *** END CODE HERE *** self.theta = np.zeros(x[[0], :].size) count = 0 alpha = self.step_size N = self.max_iter epsilon = self.eps theta_i = self.theta # grad = gradient(zero_v) hess = self.hessian(theta_i, x, y) # grad_iter = grad - np.linalg.inv(hessian(zero_v)).dot() temp = theta_i theta_i = temp - alpha * (np.linalg.inv(hessian(temp, x, y)).dot( self.gradient(temp, x, y))) count += 1 while count < N and norm(temp, theta_i) >= epsilon: temp = theta_i theta_i = temp - np.linalg.inv(hessian(temp, x, y)).dot( gradient(temp, x, y)) count += 1 util.plot(x, y, theta_i, correction=1.0) self.theta = theta_i
def train(self, episodes=1000, max_steps=1000, plot_rewards=True): # Initialize target network weights scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() break_flag = 0 for e in range(episodes): score, step, loss = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " loss:", loss, " time:", time.time() - start) #break_flag = break_flag+1 if step == max_steps else 0 #if break_flag > 60: break saver = tf.train.Saver() saver.save(self.session, self.train_path) if plot_rewards: t_time = time.time() - start print("Mean step:", np.mean(steps), " Total steps:", np.sum(steps), " total time:", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' + str(self.n_neighbors) + "_scores", scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' + str(self.n_neighbors) + "_time", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' + str(self.n_neighbors) + "_steps", steps) plot(steps) plot_running_avg(steps)
def plot(self, images): perrow = 5 num, c, w, h = images.size() rows = int(math.ceil(num / perrow)) means, sigmas, values, _ = self.hyper(images) images = images.data plt.figure(figsize=(perrow * 3, rows * 3)) for i in range(num): ax = plt.subplot(rows, perrow, i + 1) im = np.transpose(images[i, :, :, :].cpu().numpy(), (1, 2, 0)) im = np.squeeze(im) ax.imshow(im, interpolation='nearest', extent=(-0.5, w - 0.5, -0.5, h - 0.5), cmap='gray_r') util.plot(means[i, :, 1:].unsqueeze(0), sigmas[i, :, 1:].unsqueeze(0), values[i, :].unsqueeze(0), axes=ax, flip_y=h, alpha_global=0.2) plt.gcf()
def committe(solver, solver_name, intervals, reps): np.random.seed() X, y = util.basic_data() polls = util.add_noise(y) curr_labels = np.random.choice(range(len(X)), size=4, replace=False) X_train = X[curr_labels] square_errors = np.zeros([2, len(intervals)]) for i in range(len(intervals)): print("interval: ", intervals[i]) for j in range(reps): while len(curr_labels) <= intervals[i]: next_points = next_countys(solver, curr_labels, X, polls) curr_labels = np.append(curr_labels, next_points) curr_labels = curr_labels[:intervals[i]] preds = solver(X, X[curr_labels], polls[curr_labels]) square_errors[:, i] += util.square_error(y, preds) square_errors[:, i] /= reps square_errors = np.vstack( (square_errors.mean(axis=0), util.performance(solver, intervals, reps).mean(axis=0))) util.plot("committe", intervals / len(X), square_errors, legend=[solver_name, "random"], x_label="% counties", y_label="MSE", title="Committe")
def main(train_path, valid_path, save_path): """Problem: Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # *** START CODE HERE *** # Train a GDA classifier # Plot decision boundary on validation set # Use np.savetxt to save outputs from validation set to save_path model = GDA() model.fit(x_train, y_train) x_val, y_val = util.load_dataset(valid_path, add_intercept=False) util.plot(x_val, y_val, model.theta, save_path=save_path.replace(".txt", "jpg")) yhat = model.predict(x_val) np.savetxt(save_path, yhat) print(f"GDA acc: {util.compute_accuracy(y_val, yhat)}") print(f"GDA log loss: {util.compute_log_loss(y_val, yhat)}")
def train(self, episodes=1000, max_steps=800, plot_rewards=True): # Initialize target network weights self.actor.update_target_model(copy=True) self.critic.update_target_model(copy=True) scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() for e in range(episodes): score, step = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " time:", time.time() - start) ensure_saved_models_dir() if plot_rewards: t_time = time.time() - start print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time) plot(scores) plot_running_avg(scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_scores", scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_time", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_steps", steps)
def main(train_path, eval_path, pred_path): """Problem 1(b): Logistic regression with Newton's Method. Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ x_train, y_train = util.load_dataset(train_path, add_intercept=True) x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True) # *** START CODE HERE *** # Train a logistic regression classifier # Plot decision boundary on top of validation set set # Use np.savetxt to save predictions on eval set to pred_path initial_theta = np.zeros(x_train.shape[1]) log_reg = LogisticRegression(step_size=0.2, max_iter=100, eps=1e-5, theta_0=initial_theta, verbose=True) log_reg.fit(x_train, y_train) prediction = log_reg.predict(x_eval) plot_path = pred_path + ".plot.png" util.plot(x_eval, y_eval, log_reg.theta, plot_path, correction=1.0) np.savetxt(pred_path, prediction)
def main(train_path, valid_path, save_path): """Problem 1(e): Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predictions using np.savetxt(). """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # *** START CODE HERE *** clf = GDA() clf.fit(x_train, y_train) #check values... #Plot decision boundary on validation set x_valid, y_valid = util.load_dataset(valid_path, add_intercept=True) util.plot(x_valid, y_valid, clf.theta, save_path[:-4], correction=1) # Use np.savetxt to save predictions on eval set to save_path #need to add 1 intercept to x_train x_train, y_train = util.load_dataset(train_path, add_intercept=True) np.savetxt(save_path, clf.predict(x_train)) # Use np.savetxt to save outputs from validation set to save_path np.savetxt(save_path, clf.predict(x_valid))
def main(train_path, eval_path, pred_path, k = 0): """Problem 1(b): Logistic regression with Newton's Method. Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ x_train, y_train = util.load_dataset(train_path, add_intercept=True) x_eval, y_eval = util.load_dataset(eval_path, add_intercept = True) # *** START CODE HERE *** # Train a logistic regression classifier # Plot decision boundary on top of validation set set # Use np.savetxt to save predictions on eval set to pred_path clf = LogisticRegression() theta = clf.fit(x_train,y_train) p = clf.predict(x_eval) if(k==0): np.savetxt(pred_path,p,delimiter = ',') sp = 'output/p01b_plot' util.plot(x_eval,y_eval,theta,sp) elif(k==1): ind = p < 0.5 p[ind] = 0 index = p >= 0.5 p[index] = 1 return theta,p
def train(self, episodes=1000, max_steps=1000, plot_rewards=True): scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() for e in range(episodes): score, step = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " epsilon:", self.epsilon, " time:", time.time() - start) '''if e%100 == 0: ensure_saved_models_dir() self.model.save_weights(FINAL_WEIGHTS_PATH) print("Weights Saved")''' ensure_saved_models_dir() self.model.save_weights(FINAL_WEIGHTS_PATH) if plot_rewards: t_time = time.time() - start print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time) plot(scores) plot_running_avg(scores) np.save("./train_data/ddqn_" + str(self.state_size) + "_scores", scores) np.save("./train_data/ddqn_" + str(self.state_size) + "_time", t_time) np.save("./train_data/ddqn_" + str(self.state_size) + "_steps", steps)
def main(train_path, valid_path, save_path): """Problem: Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) # *** START CODE HERE *** # Train a GDA classifier model = GDA() # Fit model to the training data. Define theta model.fit(x_train, y_train) # Read validation set x_val, y_val = util.load_dataset(valid_path, add_intercept=True) # Save predictions to save path np.savetxt(save_path, model.predict(x_val)) # Plot boundaries util.plot(x_val, y_val, model.theta, save_path[:-4])
def main(train_path, eval_path, pred_path): """Problem 1(e): Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. pred_path: Path to save predictions. """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) x_eval, y_eval = util.load_dataset(eval_path, add_intercept=True) # *** START CODE HERE *** # Train a GDA classifier # Plot decision boundary on validation set # Use np.savetxt to save outputs from validation set to pred_path gda = GDA(verbose=True) gda.fit(x_train, y_train) prediction = gda.predict(x_eval) plot_path = pred_path + ".plot.png" util.plot(x_eval, y_eval, gda.theta, plot_path, correction=1.0) np.savetxt(pred_path, prediction)
def main(args): start_epoch = 0 if args.resume: print('Resuming from checkpoint at ckpts/flow.pth.tar...') checkpoint = torch.load('ckpts/flow.pth.tar') flow.load_state_dict(checkpoint['flow']) start_epoch = checkpoint['epoch'] + 1 for epoch in range(start_epoch, start_epoch + args.epoch): for i, x in enumerate(dataloader): x = x.to(device) optim_flow.zero_grad() loss_flow = -flow.log_prob(x).mean() loss_flow.backward() optim_flow.step() print("[Epoch %d/%d] [Batch %d/%d] [Loss: %f]" % (epoch, start_epoch + args.epoch, i, len(dataloader), loss_flow.item())) print('Saving flow model to ckpts/flow.pth.tar...') state = { 'flow': flow.state_dict(), 'value': loss_flow, 'epoch': epoch, } os.makedirs('ckpts', exist_ok=True) torch.save(state, 'ckpts/flow.pth.tar') # visualization util.plot(dataset, flow, epoch, device)
def main(train_path, valid_path, save_path): """Problem: Gaussian discriminant analysis (GDA) Args: train_path: Path to CSV file containing dataset for training. valid_path: Path to CSV file containing dataset for validation. save_path: Path to save predicted probabilities using np.savetxt(). """ # Load dataset x_train, y_train = util.load_dataset(train_path, add_intercept=False) x_eval, y_eval = util.load_dataset(valid_path, add_intercept=False) # *** START CODE HERE *** # Train a GDA classifier clf = GDA() clf.fit(x_train, y_train) preds = clf.predict(x_eval) # Plot decision boundary on validation set theta_ = np.insert(clf.theta, 0, clf.theta_zero) save_path_ = save_path.strip('.txt') util.plot(x_eval, y_eval, theta_, save_path_) # Use np.savetxt to save outputs from validation set to save_path np.savetxt(save_path, preds)
def analyze_test_info(data: list, title: str, graph_name: str): data_str = ' '.join(data) total_count = 20 glossary = GlossaryScraper() freq_words = [(w, len(re.findall(w, data_str, re.IGNORECASE))) for w in glossary.glossaries] sorted_freq_words = sorted(freq_words, key=itemgetter(1), reverse=True) fig1 = plot(sorted_freq_words[0: total_count], '{} (Test Topics)'.format(title), 'Word Counts') fig1.savefig('{}_glossaries.png'.format(graph_name)) fig2 = plot(get_word_frequencies(data, total_count), '{} (Word Frequencies)'.format(title), 'Word Counts') fig2.savefig('{}_word_frequencies.png'.format(graph_name))
def main(): iterations = [] inv_iterations = [] dets = [] traces = [] total_fails = 0 for _ in range(200): i = 0 vec = None inv_vec = None tries = -1 while vec is None or inv_vec is None: i = 0 a, a_inv = get_matrix() while vec is None and i < len(VECTORS): val, vec, n_iter = power(a, VECTORS[i], VECTORS[i], 0.00005, 100) i += 1 i = 0 while inv_vec is None and i < len(VECTORS): inv_val, inv_vec, inv_n_iter = power(a_inv, VECTORS[i], VECTORS[i], 0.00005, 100) i += 1 tries += 1 total_fails += tries iterations.append(n_iter / 100.) inv_iterations.append(inv_n_iter / 100.) traces.append(trace(a)) dets.append(determinant_2_2(a)) util.plot([min(dets)-.1, max(dets)+.1, min(traces)-.1, max(traces)+.1], dets, traces, iterations, "Determinant", "Trace", "Determinant vs Trace by A Power Iteration", "writing/3/writing_3_a") util.plot([min(dets)-.1, max(dets)+.1, min(traces)-.1, max(traces)+.1], dets, traces, inv_iterations, "Determinant", "Trace", "Determinant vs Trace by A Inverse Power Iteration", "writing/3/writing_3_a_inv") print "Number of rejected matrices: ", total_fails
y = "".join([random.choice(z) for _ in range(size)]) # Sequence Alignment v, cost = linear_sequence_alignment(x, y, g, a) out_x, out_y = get_sequence_linear(x, y, g, a) elapsed = (time.clock() - start_time)/60 used_men = memory_usage() - start_mem # Saves chain size result[0].append(size) # Saves time result[1].append(elapsed) # Saves consumed memory result[2].append(used_men) print "i = %d, j = %d\nElapsed Time: %.3f mins\nUsed memory: %.3f MB" % (i, j, elapsed, used_men) # Releasing unused memory del x, y, v, out_x, out_y gc.collect() if elapsed > 15: return result return result if __name__ == '__main__': result = task2_linear() plot(result)