def calibrate_sensor(sensor, measurements, verbose): parameters = PARAMETERS[sensor] if verbose: print "found %d records" % len(measurements) flt_meas, flt_idx = utils.filter_meas(measurements, parameters.noise_window, parameters.noise_threshold) if verbose: print "remaining %d after low pass" % len(flt_meas) p0 = utils.get_min_max_guess(flt_meas, parameters.sensor_ref) cp0, np0 = utils.scale_measurements(flt_meas, p0) print "initial guess : avg %f std %f" % (np0.mean(), np0.std()) def err_func(p, meas, y): cp, np = utils.scale_measurements(meas, p) err = y * scipy.ones(len(meas)) - np return err p1, success = scipy.optimize.leastsq(err_func, p0[:], args=(flt_meas, parameters.sensor_ref)) cp1, np1 = utils.scale_measurements(flt_meas, p1) print "optimized guess : avg %f std %f" % (np1.mean(), np1.std()) utils.print_xml(p1, sensor, parameters.sensor_res) print "" utils.plot_results(measurements, flt_idx, flt_meas, cp0, np0, cp1, np1, parameters.sensor_ref)
def new_experiment(dataset_filename, network_filename): dataset_filename = '../reddit-comments-may-2015/TipOfMyTongue_sub.db' network_filename = 'TipOfMyTongue_sub_network_Dec_2020.txt' graph_engineering.db_to_graph(dataset_filename, network_filename, parenting=False) print('Community detection...') topological_community,used_authors,numClusters = graph_engineering.community_detection(network_filename) print('Used authors : ' + str(used_authors)) # Feature Extraction print('Feature Extraction....') feature_file_list = feature_engineering.extract_features(dataset_filename,used_authors) # Cluster communities # TODO different community detection algorithms # TODO number of clusters based on how many communities print('Cluster communities...') clusters = [] clusters.append(feature_to_cluster(feature_file_list[1:8], numClusters)) # Evaluation print('Evaluations.....') cluster_names = ['K-means'] for i, cluster in enumerate(clusters): evaluations = utils.evaluate_cluster_to_community(topological_community, cluster, 5) utils.plot_results(evaluations,cluster_names[i], "results/" + os.path.basename(dataset_filename)[:-3] + "_result_" + cluster_names[i].replace(" ", "_") + ".png")
def main(algorithm, data, cl_labels, min_k, max_k, max_iterations, epsilon): results, silhouette, chs, ssws, ssbs, ars, hom, comp = [], [], [], [], [], [], [], [] membership, centroids, labels = [], [], [] for c in range(min_k, max_k + 1): if algorithm == 'kmeans': labels, centroids = kmeans.kmeans(data, c) elif algorithm == 'bisecting_kmeans': labels, centroids = bisecting_kmeans.bisecting_kmeans(data, c) elif algorithm == 'fuzzy_cmeans': membership, centroids = fuzzyCmeans.execute(data, max_iterations, c, epsilon) labels = fuzzyCmeans.get_labels(len(data), membership) silhouette.append((c, metrics.silhouette_score(data, labels, metric='euclidean'))) chs.append((c, metrics.calinski_harabaz_score(data, labels))) ssws.append((c, utils.get_ssw(data, centroids, labels))) ssbs.append((c, utils.get_ssb(centroids))) ars.append((c, metrics.adjusted_rand_score(cl_labels, labels))) hom.append((c, metrics.homogeneity_score(cl_labels, labels))) comp.append((c, metrics.completeness_score(cl_labels, labels))) results.append(("Silhouette", "", zip(*silhouette)[0], "", zip(*silhouette)[1], 333, "blue")) results.append(("Calinski-Harabaz Index", "", zip(*chs)[0], "", zip(*chs)[1], 334, "blue")) results.append(("Intra cluster Variance", "", zip(*ssws)[0], "", zip(*ssws)[1], 331, "blue")) results.append(("Inter cluster Variance", "", zip(*ssbs)[0], "", zip(*ssbs)[1], 332, "blue")) results.append(("Adjusted Rand Index", "", zip(*ars)[0], "", zip(*ars)[1], 335, "orange")) results.append(("Homogeneity", "", zip(*hom)[0], "", zip(*hom)[1], 336, "orange")) results.append(("Completeness", "", zip(*comp)[0], "", zip(*comp)[1], 337, "orange")) print(labels) utils.plot_results(results, algorithm)
def train(self): self.training_miss_classifications = [] self.testing_miss_classifications = [] self.session.run(self.initialize) lr = self.config[self.network_type]["initial_learning_rate"] x_test, y_test = self.mnist.test.images, self.mnist.test.labels for epoch in range(self.config["num_epoch"]): miss_classes = [] mu = U.get_momentum(epoch) for itr in range(self.num_itr): x_train, y_train = self.mnist.train.next_batch(self.config["batch_size"]) if self.config[self.network_type]["jitter_images"]: x_train = U.jitter_images(x_train) _, miss_class, logits_val = self.session.run([self.train_op, self.missclassification_error, self.logits], feed_dict={self.x: x_train, self.y: y_train, self.keep_prob_hidden_unit: self.config[self.network_type]["keep_prob_hidden_unit"], self.keep_prob_visible_unit: self.config[self.network_type]["keep_prob_visible_unit"], self.learning_rate: lr, self.momentum: mu, self.max_norm: self.config["max_norm_val"]}) miss_classes.append(miss_class) lr *= self.config["learning_rate_decay"] if (epoch + 1) % self.config["show_every"] == 0: test_miss_class = self.session.run(self.missclassification_error, feed_dict={self.x: x_test, self.y: y_test}) print("epoc: {0}, train_miss_class: {1:0.0f}, test_miss_class: {2:0.0f}" .format(epoch, np.sum(miss_classes), test_miss_class)) self.training_miss_classifications.append(np.sum(miss_classes)) self.testing_miss_classifications.append(test_miss_class) U.save_data(self.training_miss_classifications, self.testing_miss_classifications, self.network_type) U.plot_results(self.training_miss_classifications, self.testing_miss_classifications, self.network_type)
def main(verbose=False, plot=False, save=False, random_ar_param=True): # load configuration dicts. Could be implemented to load from JSON instead. data_config, model_config, train_config = load_config( verbose, random_ar_param) # loads randomly generated data. Could be implemented to load a specific dataset instead. data = load_data(data_config, verbose, plot) # runs training and testing. results_dar, stats_dar = run_training(data, model_config, train_config, verbose) # optional printing if verbose: print(stats_dar) # optional plotting if plot: utils.plot_loss_curve(losses=results_dar["losses"], test_loss=results_dar["test_mse"], epoch_losses=results_dar["epoch_losses"], show=False, save=save) utils.plot_weights(model_config["ar"], results_dar["weights"], data["ar"], model_name="AR-Net", save=save) utils.plot_results(results_dar, model_name="AR-Net", save=save)
def SGD(self, training_data, epochs, mini_batch_size, learning_rate, test_data=None, full_batch=False): if test_data: # https://github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/ea229ac6234b7f3373f351f0b18616ca47edb8a1/network.py#L62 # test_data = list(test_data) n_test = len(test_data) test_results = [] # https://github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/ea229ac6234b7f3373f351f0b18616ca47edb8a1/network.py#L58 # training_data = list(training_data) n = len(training_data) for t in range(epochs): random.shuffle(training_data) mini_batches = [training_data[k: k + mini_batch_size] for k in range(0, n, mini_batch_size)] for mini_batch in mini_batches: # Update parameters if full_batch is False: # mini-batch: a list of tuples, tuple: (x,y), x,y: arrays self.update_mini_batch(mini_batch, learning_rate) else: self.update_full_batch(mini_batch, learning_rate) # Use full matrix of the mini-batch if test_data: test_result = self.evaluate(test_data) test_results.append(test_result) print("Epoch {}: {} / {}".format(t, test_result, n_test)) else: print("Epoch {} complete".format(t)) if test_data: plot_results(test_results)
def train(lr=0.0075, nb_epoch=10, batch_size=256, verbose=1): X_train, y_train, X_test, y_test = build_training_data() model = Sequential() lstm = build_partial_lstm_model() mlp = build_partial_mlp_model() # To SUM you'll have to match the outputs of the partial networks to be the same size, aka 64 as it is now # Also to be able to SUM/MEAN etc. we need to oversize the LSTM a bit to match the output of the MLP so if # the LSTM overfits a bit at the end, now you know why # model.add(Merge([mlp, lstm], mode='sum')) # Concat will work with different sizes model.add(Merge([mlp, lstm], mode='concat')) model.add(Dense(10, activation='softmax')) model.compile(optimizer=RMSprop(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit([X_train, X_train], y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_data=([X_test, X_test], y_test), callbacks=callbacks(), verbose=verbose) score = model.evaluate([X_test, X_test], y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) plot_results(history, score[1], 'MLP w/ LSTM')
def test(model, x_test, y_test, opt): model = torch.load('model/model.pkl') model.eval() pred_dat = [] h, c = model.init_state() seq_len = x_test.shape[1] for i in range(0, seq_len): x = ToVariable(x_test[:, i, :]) x = x.view(-1, 1, 1) pre_out, h, c = model(x, h, c) h = h.data c = c.data if use_cuda: pred_dat.append(pre_out.data.cpu().numpy()) else: pred_dat.append(pre_out.data.numpy()) pred_dat = np.array(pred_dat) pred_dat = pred_dat.transpose(1, 0, 2) pred_dat = (pred_dat[:, :, 0] * (opt.max_data - opt.min_data) + (opt.max_data + opt.min_data)) / 2 y_test = (y_test[:, :, 0] * (opt.max_data - opt.min_data) + (opt.max_data + opt.min_data)) / 2 error = np.sum((pred_dat[:, -opt.test_len:] - y_test[:, -opt.test_len:])** 2) / (opt.test_len * pred_dat.shape[0]) print('The mean square error is: %f' % error) plot_results(pred_dat[0, -opt.test_len:], y_test[0, -opt.test_len:])
def process_results(self, true: list, predicted: list, name=None, **plot_args): errs = dict() for out, out_name in enumerate(self.out_cols): t = true[out] p = predicted[out] if np.isnan(t).sum() > 0: mask = np.invert(np.isnan(t)) t = t[mask] p = p[mask] errors = FindErrors(t, p) errs[out_name + '_errors'] = errors.calculate_all() errs[out_name + '_stats'] = errors.stats() plot_results(t, p, name=os.path.join(self.path, name + out_name), **plot_args) save_config_file(self.path, errors=errs, name=name) return
def calibrate_sensor(sensor, measurements, verbose): parameters = PARAMETERS[sensor] if verbose: print "found %d records" % len(measurements) flt_meas, flt_idx = utils.filter_meas(measurements, parameters.noise_window, parameters.noise_threshold) if verbose: print "remaining %d after low pass" % len(flt_meas) p0 = utils.get_min_max_guess(flt_meas, parameters.sensor_ref) cp0, np0 = utils.scale_measurements(flt_meas, p0) print "initial guess : avg %f std %f" % (np0.mean(), np0.std()) def err_func(p,meas,y): cp, np = utils.scale_measurements(meas, p) err = y*scipy.ones(len(meas)) - np return err p1, success = scipy.optimize.leastsq(err_func, p0[:], args=(flt_meas, parameters.sensor_ref)) cp1, np1 = utils.scale_measurements(flt_meas, p1) print "optimized guess : avg %f std %f" % (np1.mean(), np1.std()) utils.print_xml(p1, sensor, parameters.sensor_res) print "" utils.plot_results(measurements, flt_idx, flt_meas, cp0, np0, cp1, np1, parameters.sensor_ref)
def train(self, num_epochs, model, saved_dir, device, criterion, optimizer, val_every): if criterion is None: criterion = torch.nn.BCELoss() if optimizer is None: optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-5) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9) best_loss = 9999 avg_val_loss_list = [] avg_train_loss_list = [] for epoch in range(num_epochs): temp_epoch_loss = [] for step, (sequence, target) in enumerate(self.train_loader): sequence = sequence target = target sequence, target = sequence.to(device), target.to(device) outputs, _ = model(sequence) loss = criterion(outputs, target) optimizer.zero_grad() loss.backward() optimizer.step() if (step + 1) % 25 == 0: print("Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format( epoch + 1, num_epochs, step + 1, len(self.train_loader), loss.item())) temp_epoch_loss.append(loss.item()) avg_train_loss = sum(temp_epoch_loss) / len(temp_epoch_loss) if (epoch + 1) % val_every == 0: # Compare and save the best model avg_loss = self.validation(epoch + 1, model, self.val_loader, criterion, device) if avg_loss < best_loss: print("Best performance at epoch: {}".format(epoch + 1)) print("Save model in", saved_dir) best_loss = avg_loss # save_model(model, optimizer, epoch, best_loss, saved_dir) if len(avg_val_loss_list) == 0: save_model(model, optimizer, epoch, avg_train_loss, best_loss, saved_dir) else: save_model(model, optimizer, epoch, avg_train_loss_list, avg_val_loss_list, saved_dir) avg_train_loss_list.append(avg_train_loss) avg_val_loss_list.append(avg_loss) plot_results(np.arange(0, num_epochs), avg_train_loss_list, avg_val_loss_list)
def exercise_3(): folder = './data/' ext = '.csv' file_names = ['balance', 'phoneme', 'sonar'] classifiers = [ (KNeighborsClassifier(n_neighbors=5), 'k-NN'), (SVC(kernel="linear", C=0.025), 'SVC'), (DecisionTreeClassifier(max_depth=5), 'Decision Tree'), ] data = {} for fn in file_names: X, y, _ = prepare_data_from_file(folder + fn + ext) data[fn] = {} for clf, clf_name in classifiers: kf = KFold(n_splits=10, shuffle=True) learning_times = [] prediction_times = [] score_array = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] start = time.time() clf.fit(X_train, y_train) end = time.time() learning_time = end - start learning_times.append(learning_time) start = time.time() y_pred = clf.predict(X_test) end = time.time() prediction_time = end - start prediction_times.append(prediction_time) accuracy = accuracy_score(y_test, y_pred) score_array.append(accuracy) avg_learning_time = np.mean(learning_times) avg_prediction_time = np.mean(prediction_times) avg_score = np.mean(score_array) data[fn][clf_name] = [ avg_learning_time, avg_prediction_time, avg_score ] for fn, clfs in data.items(): classifier_names = [*clfs.keys()] values = [*clfs.values()] learning_times = list(map(lambda x: x[0], values)) prediction_times = list(map(lambda x: x[1], values)) scores = list(map(lambda x: x[2], values)) plot_results(learning_times, scores, classifier_names, fn)
def set_annotations_and_plot(file_name, anndf, likelohood_column, plot): print('Reading results...') df = pd.read_csv(file_name, parse_dates=True, index_col='timestamp') df['Annotation'] = anndf.Aux print('Writing annotated results...') df.to_csv(file_name) if plot: utils.plot_results(df, 'Resp', 'anomaly_score', likelohood_column, '.*[HOXC].*') return df
def plot_hogwild(): x_train, y_train, x_test, y_test = load_processed_data(dir_data) # --- 2. plot hogwild for various values of K n_runs = 3 n_workers = 8 T = 1000000 alpha = 0.33 beta = 0.37 theta = 0.2 results = [ AvgLogger([ train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta, K=K, theta=theta, n_processes=n_workers, sequential=False, seed=s)[1] for s in range(n_runs) ]) for K in [3, 10, 50] ] # --- 1. plot comparison between SGD and hogwild, fixed K # n_runs = 3 # n_workers = 8 # T = 1000000 # alpha = 0.33 # beta = 0.37 # theta = 0.2 # results = [AvgLogger([ # train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta, # K=K, theta=theta, n_processes=n_workers, sequential=False, seed=s)[1] # for s in range(n_runs) # ]) for K in [3]] # results.append(AvgLogger([ # train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta, # K=3, theta=theta, n_processes=n_workers, sequential=True, seed=s)[1] # for s in range(n_runs) # ])) # results.append(AvgLogger([ # train_sgd(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, return_avg=True, seed=s)[1] # for s in range(n_runs) # ])) plot_results( results, add_to_title= rf" ($\alpha={alpha}, \beta={beta}, \theta={theta}$, n_runs={n_runs})")
def process_results(self, true, predicted, name=None): if np.isnan(true).sum() > 0: mask = np.invert(np.isnan(true.reshape(-1,))) true = true[mask] predicted = predicted[mask] errors = FindErrors(true, predicted) for er in ['mse', 'rmse', 'r2', 'nse', 'kge', 'rsr', 'percent_bias']: print(er, getattr(errors, er)()) plot_results(true, predicted, name=os.path.join(self.path, name)) return
def generate_markov_model(self): # use percent return and try to minimize variance and maximize return # TODO: generate model every day self.model = mix.GaussianMixture( n_components=3, covariance_type="full", #tied random_state=7, n_init=60) if 'date' in self.train.columns: self.train = self.train.set_index('date') self.model.fit(self.train[['return'] + self.features]) if 'date' in self.test.columns: self.test = self.test.set_index('date') # TODO rename state with english text self.test['state'] = self.model.predict(self.test[['return'] + self.features]) # get next day percent change self.test['next_day_change'] = self.test['close'].shift( -1) / self.test['close'] - 1 #self.test['close'] = self.test['close'].shift(-1) print(self.test) import pdb pdb.set_trace() self.test.to_csv('test.csv') # find the best state numbers results = [] for i in range(self.model.n_components): results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ]) result_df = pd.DataFrame(results, columns=['state', 'mean', 'var']) result_df = result_df.set_index('state').sort_values(by=['mean']) result_df['state_names'] = ['sell', 'buy', 'strong_buy'] self.result_df = result_df print(self.result_df) for i in self.result_df.index: group = self.test[self.test['state'] == i]['next_day_change'] print(i, group.mean(), group.std()) #for g, group in self.test.groupby(by='state'): # print(g, group['next_day_change'].mean(), group['next_day_change'].std()) states_used = result_df.index self.test['close'] = self.test['close'].shift(-1) plot_results(self.test.reset_index(), self.name, result_df.index) """
def train(train_sets: tuple, test_sets: tuple, input_shape: tuple = (1, 128, 128, 1), model_version="1.0.0", epochs: int = 100, classes: int = 2, batch_size: int = 1, verbose=1, out_dir: str = "saved_models"): """ The function to train the model. Parameters: train_sets (tuple): A tuple of np.array for train images and train labels. test_sets (tuple): A tuple of np.array for test images and test labels. input shape (tuple): Input shape of the model. It should be in the form of (1, ..., ...). model_version (str): The version of the model in d.d.d format. epochs (int): The number of epochs. classes (int): The number of classes. batch_size (int): The number of batch size. verbose (bool): Wether to show the progress of each epoch. out_dir (str): The output dir for saving the model in. """ (x_train, y_train), (x_test, y_test) = train_sets, test_sets y_train = keras.utils.to_categorical(y_train, classes) y_test = keras.utils.to_categorical(y_test, classes) m = get_model(model_version) if not m: return model = m.build_model(input_shape) model.compile(loss=BinaryCrossentropy(), optimizer=RMSprop(learning_rate=0.0001), metrics=['accuracy']) saver = ModelSaver(out_dir) csv_logger = CSVLogger( "%s/%s/log.csv" % (out_dir, datetime.datetime.now().date().strftime("%Y_%m_%d")), append=True, separator=',') history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=verbose, validation_data=(x_test, y_test), callbacks=[saver, csv_logger]) model.save("%s/%s/final.hd5" % (out_dir, datetime.datetime.now().date().strftime("%Y_%m_%d"))) print("Model saved in %s as final.hd5" % out_dir) plot_results(history, epochs, out_dir)
def get_results_with_pca(self): results = [] for i in range(self.model.n_components): results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ]) result_df = pd.DataFrame(results, columns = ['state','mean', 'var']) result_df = result_df.set_index('state').sort_values(by=['mean']) print(result_df) self.test['next_change'] = self.test['close'].shift(-1) / self.test['close'] - 1 #self.test[ ['date', 'state', 'close', 'next_change'] ].to_csv('test.csv') for state in result_df.index: this_group = self.test.loc[self.test['state']==state, 'next_change'] print(state, float(this_group.mean()), float(this_group.std())) plot_results(self.test, self.name, result_df.index)
def get_results(self): results = [] for i in range(self.model.n_components): results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ]) result_df = pd.DataFrame(results, columns = ['state','mean', 'var']) result_df = result_df.set_index('state').sort_values(by=['mean']) print(result_df) self.test['next_change'] = self.test['close'].shift(-1) / self.test['close'] - 1 #self.test[ ['date', 'state', 'close', 'next_change'] ].to_csv('test.csv') for state in result_df.index: this_group = self.test.loc[self.test['state']==state, 'next_change'] print(state, float(this_group.mean()), float(this_group.std())) plot_results(self.test, self.name, result_df.index) """ # get next day percent change self.test['next_day_change'] = self.test['close'].shift(-1) / self.test['close'] - 1 #self.test['close'] = self.test['close'].shift(-1) print(self.test) import pdb; pdb.set_trace() self.test.to_csv('test.csv') # find the best state numbers results = [] for i in range(self.model.n_components): results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ]) result_df = pd.DataFrame(results, columns = ['state','mean', 'var']) result_df = result_df.set_index('state').sort_values(by=['mean']) result_df['state_names'] = ['sell','buy','strong_buy'] self.result_df = result_df print(self.result_df) for i in self.result_df.index: group = self.test[self.test['state']==i]['next_day_change'] print(i, group.mean(), group.std()) #for g, group in self.test.groupby(by='state'): # print(g, group['next_day_change'].mean(), group['next_day_change'].std()) states_used = result_df.index self.test['close'] = self.test['close'].shift(-1) plot_results(self.test.reset_index(), self.name, result_df.index) """ """
def showResults(self): if self.solver is None or self.solver.progress < 100: return self.setPlotOptions() self.saveParameters( ) # only save parameters if there are plots to open self.opened_plots = utils.plot_results( pixels=self.solver.pixels, shift_x=self.solver.shift_x, shift_y=self.solver.shift_y, shift_p=self.solver.shift_p, shift_x_y_error=self.solver.shift_x_y_error, box_shift=self.solver.box_shift, fps=self.solver.fps, res=self.solver.res, input_path=self.fileName, output_basepath=self.output_basepath, plots_dict=self.plots_dict, boxes_dict=self.boxes_dict, chop_duration=float(self.lineEdit_chop_sec.text()), start_frame=self.solver.start_frame) print("%d plots shown." % (len(self.opened_plots)))
def eval(epoch, model, eval_loader, device, writer, height, width, batch_size): model.eval() running_loss = 0.0 inputs_epoch, labels_epoch, latent_variables_epoch, reconstructions_epoch = \ torch.Tensor(), torch.LongTensor(), torch.Tensor(), torch.Tensor() with torch.no_grad(): for data in eval_loader: inputs, labels = data[0].to(device), data[1].to(device) reconstructions, latent_variables = model(inputs) loss = model.loss_function(inputs) / batch_size running_loss += loss.item() inputs_epoch = torch.cat( [inputs_epoch, inputs.cpu().detach()], dim=0) labels_epoch = torch.cat( [labels_epoch, labels.cpu().detach()], dim=0) latent_variables_epoch = torch.cat( [latent_variables_epoch, latent_variables.cpu().detach()], dim=0) reconstructions_epoch = torch.cat( [reconstructions_epoch, reconstructions.cpu().detach()], dim=0) fig = plot_results(model, inputs, reconstructions, latent_variables, height, width) writer.add_figure('results', fig, global_step=epoch) project_results(latent_variables_epoch, inputs_epoch, labels_epoch, writer, epoch, height, width) eval_loss = running_loss / len(eval_loader) return eval_loss
def __init__(self, model_name, accum_rate, decay_rate, trade_states, index): self.index = index self.model_name = model_name self.bank_value = 10000 self.accum_rate = accum_rate self.decay_rate = decay_rate self.trade_states = trade_states self.current_accum = self.accum_rate self.held_shares = {} self.held_shares['TQQQ'] = {'num_shares': 0} conn = sqlite3.connect('markov_models.db') sql = 'select * from trades where name = "%s"' % model_name self.trade_days = pd.read_sql(sql, conn) print(self.trade_days) plot_results(self.trade_days, model_name) return # get TQQQ self.tqqq = yfinance.Ticker('TQQQ').history( period='5y', auto_adjust=False).reset_index() self.tqqq.columns = map(str.lower, self.tqqq.columns) self.qqq = yfinance.Ticker('QQQ').history( period='5y', auto_adjust=False).reset_index() self.qqq.columns = map(str.lower, self.qqq.columns) # get TQQQ performance self.tqqq = self.tqqq[ (self.tqqq['date'] >= self.trade_days.head(1)['date'].values[0]) & (self.tqqq['date'] <= self.trade_days.tail(1)['date'].values[0])] self.tqqq_start_price = float(self.tqqq.head(1)['close']) self.tqqq_performance = float(self.tqqq.tail(1)['close']) / float( self.tqqq.head(1)['close']) self.qqq = self.qqq[ (self.qqq['date'] >= self.trade_days.head(1)['date'].values[0]) & (self.qqq['date'] <= self.trade_days.tail(1)['date'].values[0])] self.qqq_start_price = float(self.qqq.head(1)['close']) # start trading self.run_trades()
def run_different_q_gammas(env_name): gammas = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] overall_scores = [] times = [] for gamma in gammas: scores, time = run_q_learning(env_name, 0, 0.6, gamma) overall_scores.append(scores) times.append([time]) title = "Mean Score vs Gamma for {}, QLearning".format(env_name) file_name = "scores/{}-QLearning-Gamma.png".format(env_name) plot_results(overall_scores, "Mean Score of Policy", gammas, "Gamma Value", title, file_name) title = "Time Taken (s) vs Gamma for {}, QLearning".format(env_name) file_name = "times/{}-QLearning-Gamma.png".format(env_name) plot_results(times, "Time Taken (s)", gammas, "Gamma Value", title, file_name)
def run_different_epsilons(env_name): epsilons = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05] overall_scores = [] times = [] for epsilon in epsilons: scores, time = run_q_learning(env_name, epsilon) overall_scores.append(scores) times.append([time]) title = "Mean Score vs Epsilon for {}, QLearning".format(env_name) file_name = "scores/{}-QLearning-Epsilon.png".format(env_name) plot_results(overall_scores, "Mean Score of Policy", epsilons, "Epsilon Value", title, file_name) title = "Time Taken (s) vs Epsilon for {}, QLearning".format(env_name) file_name = "times/{}-QLearning-Epsilon.png".format(env_name) plot_results(times, "Time Taken (s)", epsilons, "Epsilon Value", title, file_name)
def run_bench(api, version): bench_title = api + " IOR benchmark - pdwfs " + version + " - " + str( datetime.utcnow()) + " UTC" print "Running:", bench_title read = "1" # 1: perform read benchmark numTasks = "2" # number of parallel processes filePerProc = "0" # 1: write one file per processes collective = "1" # 1: enable collective IO operations (MPIIO, HDF5 only) segmentCount = "1" # see previous schematic transferSize = [ "512k", "1m", "3m", "5m", "7m", "10m", "25m", "35m", "50m", "60m", "75m", "85m", "100m", "115m", "125m", "150m", "175m", "200m", "225m", "250m" ] utils.build_ior_script(api, read, numTasks, filePerProc, collective, segmentCount, transferSize) with open("run/bench.sh", "w") as f: f.write(bench_script) subprocess.check_call(["bash", "run/bench.sh"]) print " Parsing and saving the results in a plot" df_disk = utils.parse_ior_results("/output/ior_disk.out") df_pdwfs = utils.parse_ior_results("/output/ior_pdwfs.out") os.rename("/output/ior_disk.out", "/output/ior_" + api + "_disk.out") os.rename("/output/ior_pdwfs.out", "/output/ior_" + api + "_pdwfs-" + version + ".out") matplotlib.use('Agg') for readOrWrite in ["write", "read"]: filename = readOrWrite + "_ior_" + api + "_pdwfs-" + version + ".png" utils.plot_results(readOrWrite, df_disk[df_disk["Operation"] == readOrWrite], df_pdwfs[df_pdwfs["Operation"] == readOrWrite], title=bench_title, filename="/output/" + filename) with open("/output/bench.html", "a") as f: f.write("<img src=" + filename + ">\n")
def get_results(self): """ results = [] for i in range(self.model.n_components): results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ]) result_df = pd.DataFrame(results, columns = ['state','mean', 'var']) result_df = result_df.set_index('state').sort_values(by=['mean']) """ #print('===') #print(result_df) self.test['next_change'] = self.test['close'].shift(-1) / self.test['close'] - 1 for state in self.test['state'].unique(): this_group = self.test.loc[self.test['state']==state, 'next_change'] print(state, float(this_group.mean()), float(this_group.std())) print('===') try: plot_results(self.test, self.name) except Exception as e: print(e)
def test(model, x_test, y_test, data_df_combined_clean): model = torch.load('model/model.pkl') model.eval() x_test = ToVariable(x_test).double() h1, c1, h2, c2, h3, c3 = model.init_state(x_test.shape[0]) seq_len = x_test.shape[1] pred_dat, h1, c1, h2, c2, h3, c3 = model(x_test, h1, c1, h2, c2, h3, c3) pred_dat = np.array(pred_dat.detach().numpy()) #De-standardize predictions preds_unstd = pred_dat * data_df_combined_clean.iloc[:, -1].std( ) + data_df_combined_clean.iloc[:, -1].mean() y_test_unstd = y_test * data_df_combined_clean.iloc[:, -1].std( ) + data_df_combined_clean.iloc[:, -1].mean() mrse = np.sqrt( ((preds_unstd[:, -1, :] - y_test_unstd[:, -1, :])**2)).mean(axis=0) print('The mean square error is: %f' % mrse) plot_results(preds_unstd[:, -1, :], y_test_unstd[:, -1, :])
def train(lr=0.0075, nb_epoch=10, batch_size=256, verbose=1): X_train, y_train, X_test, y_test = build_training_data() model = build_partial_mlp_model() model.add(Dense(10, activation='softmax')) model.compile(optimizer=RMSprop(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit(X_train, y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=callbacks(), verbose=verbose) score = model.evaluate(X_test, y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) plot_results(history, score[1], 'mlp')
def run_different_learn_rates(env_name): learning_rates = [ 0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95 ] overall_scores = [] times = [] for learning_rate in learning_rates: scores, time = run_q_learning(env_name, 0, learning_rate) overall_scores.append(scores) times.append([time]) title = "Mean Score vs Learning Rate for {}, QLearning".format(env_name) file_name = "scores/{}-QLearning-Learning-Rate.png".format(env_name) plot_results(overall_scores, "Mean Score of Policy", learning_rates, "Learning Rate Value", title, file_name) title = "Time Taken (s) vs Learning Rate for {}, QLearning".format( env_name) file_name = "times/{}-QLearning-Learning-Rate.png".format(env_name) plot_results(times, "Time Taken (s)", learning_rates, "Learning Rate Value", title, file_name)
def run_different_gammas(method, method_name, env_name): gammas = [ 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1 ] overall_scores = [] times = [] for gamma in gammas: scores, time = method(env_name, gamma) overall_scores.append(scores) times.append([time]) title = "Mean Score vs Gamma for {}, {}".format(env_name, method_name) file_name = "scores/{}-{}.png".format(env_name, method_name) plot_results(overall_scores, "Mean Score of Policy", gammas, "Gamma Value", title, file_name) title = "Time Taken (s) vs Gamma for {}, {}".format(env_name, method_name) file_name = "times/{}-{}.png".format(env_name, method_name) plot_results(times, "Time Taken (s)", gammas, "Gamma Value", title, file_name)
def train_vae(vae, encoder, decoder, x, y, x_train, x_test, y_test, lable_color_dict, group, additional=False): if additional: vae.load_weights('vae_' + group + '.h5') vae.fit(x_train, epochs=50, batch_size=10, validation_data=(x_test, None)) vae.save_weights('vae_' + group + '.h5', overwrite=True) models = (encoder, decoder) data = (x, y) plot_results(models, data, lable_color_dict, model_name='vae_' + group + '.h5')
def evaluate(opt, model, data_loader, logger, error_threshold=0.05, limit=None, vis=None): ''' Loop through the dataset and calculate evaluation metrics. ''' if model.compare_model is not None: logger.print('Comparison: {} ({}), {} ({})'.format(\ model.iterator.name(), model.iterator.n_operations, model.compare_model.name(), model.compare_model.n_operations)) logger.print('Initialization: {}'.format(opt.initialization)) logger.print('Error threshold: {}'.format(error_threshold)) metric = utils.Metrics(scale=1, error_threshold=error_threshold) images = {'error_curves': [], 'results': []} for step, data in enumerate(data_loader): bc, gt, x = data['bc'], data['final'], data['x'] f = None if 'f' not in data else data['f'] if opt.initialization != 'random': # Test time: do not change data if 'random' x = utils.initialize(x, bc, opt.initialization) results, x = model.evaluate(x, gt, bc, f, opt.n_evaluation_steps) # Update metric metric.update(results) if step % opt.log_every == 0: img = utils.plot_error_curves(results, num=4) if vis is not None: vis.add_image({'errors_avg_init': img}, step) images['error_curves'].append(img) img = utils.plot_results({'x': x, 'gt': gt}) if vis is not None: vis.add_image({'results': img}, step) images['results'].append(img) if (step + 1) % opt.log_every == 0: print('Step {}'.format(step + 1)) if limit is not None and (step + 1) == limit: break # Get results results = metric.get_results() for key in results: logger.print('{}: {}'.format(key, results[key])) metric.reset() return results, images
def fit_independent(rng=(-5, 5)): x, y, yerr = load_data("line_data.txt") true_m, true_b, _, _ = load_data("line_true_params.txt") # Build the design matrix. A = np.vander(x, 2) AT = A.T # Compute the mean and covariance of the posterior constraint. cov = np.linalg.inv(np.dot(AT, A / yerr[:, None] ** 2)) mu = np.dot(cov, np.dot(AT, y / yerr ** 2)) # Plot these constraints with the truth and the data points. samples = np.random.multivariate_normal(mu, cov, 1000) fig = plot_results(x, y, yerr, samples, truth=(true_m, true_b)) fig.gca().set_title("assuming independent uncertainties") fig.savefig(os.path.join("figures", "line_independent.png"))
def fit_emcee(rng=(-5, 5)): # Initialize the walkers. ndim, nwalkers = 4, 32 pos = [np.random.randn(ndim) for i in xrange(nwalkers)] # Initialize the sampler. sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr)) # Run a burn-in. print("Running burn-in") pos, lp, state = sampler.run_mcmc(pos, 1000) sampler.reset() # Run the production chain. print("Running production") sampler.run_mcmc(pos, 500) print("Done") fig = plot_results(x, y, yerr, sampler.flatchain, truth=(true_m, true_b)) fig.savefig(os.path.join("figures", "line_emcee.png"))
core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) ############################################################################## # Print machine learning metrics utils.print_dbscan_metrics(X, n_clusters_, labels_true, labels) ############################################################################## # TODO - Xform Back, compute zone size and centroid poi_result_set = utils.add_zoas_to_poi_dataset(labels, poi_dataset) ############################################################################## # Output Results utils.output_results(poi_result_set, screen=s.ZOA_SUMMARY_TO_SCREEN, outfile=s.OUTPUT_FILE) ############################################################################## # Plot result using X_prime a transpose of [[lat, lng]] to [[x=lng, y=lat]] # If mode is proxy, lookup coordinates # X_pr if s.MATPLOT_ZOA_CLUSTERS: if s.MODE == "proxy": X_prime = gadm.lat_lng_tpose2(X, poi_dataset) else: X_prime = gadm.lat_lng_tpose(X) utils.plot_results(labels, X_prime, core_samples_mask)
def main(cwd, do_amgng, amgng_file, ma_window, ma_recalc_delay, do_cla, cla_file, buffer_len, plot): values = inspect.getargvalues(inspect.currentframe())[3] print('using parameters: {}'.format(values)) amgng_df = None if do_amgng: from mgng.amgng import main as amgng_main print('Training AMGNG model...') out_file = os.path.join(cwd, 'out_amgng_{}'.format(amgng_file)) full_path = os.path.join(cwd, amgng_file) start = datetime.now() amgng_main(input_file=full_path, output_file=out_file, buffer_len=buffer_len, index_col='timestamp', skip_rows=[1,2], ma_window=ma_window, ma_recalc_delay=ma_recalc_delay) amgng_time = datetime.now() - start print('Reading results...') amgng_df = pd.read_csv(out_file, parse_dates=True, index_col='timestamp') amgng_df['Annotation'] = anndf.Type print('Writing annotated results...') amgng_df.to_csv(out_file) if plot: utils.plot_results(amgng_df, ['narma30-1000_samples'], 'anomaly_score', 'anomaly_density', '[rs]') print('Time taken: amgng={}'.format(amgng_time)) cla_df = None if do_cla: from cla.swarm import swarm from cla.cla import main as cla_main out_file = os.path.join(cwd, 'out_cla_{}'.format(cla_file)) print('Training CLA model...') full_path = os.path.join(cwd, cla_file) SWARM_DESCRIPTION = { 'includedFields': [ { 'fieldName': 'timestamp', 'fieldType': 'datetime', }, { 'fieldName': 'ECG1', 'fieldType': 'float', }, ], 'streamDef': { 'info': 'chfdbchf13 ECG1', 'version': 1, 'streams': [ { 'info': 'chfdbchf13', 'source': full_path, 'columns': ['*'] } ] }, 'inferenceType': 'TemporalAnomaly', 'inferenceArgs': { 'predictionSteps': [1], 'predictedField': 'ECG1' }, 'iterationCount': buffer_len, 'swarmSize': 'large' } start = datetime.now() swarm(cwd=cwd, input_file=cla_file, swarm_description=SWARM_DESCRIPTION) swarm_time = datetime.now() - start start = datetime.now() cla_main(cwd=cwd, input_file=full_path, output_name=out_file, plot=False, predicted_field='ECG1') cla_time = datetime.now() - start print('Reading results...') cla_df = pd.read_csv(out_file, parse_dates=True, index_col='timestamp') cla_df['Annotation'] = anndf.Type print('Writing annotated results...') cla_df.to_csv(out_file) if plot: utils.plot_results(cla_df, ['ECG1'], 'anomaly_score', 'anomaly_likelihood', '[rs]') print('Time taken: swarm={}, cla={}'.format(swarm_time, cla_time)) return amgng_df, cla_df