def contrast_bn(epochs, lr): train_loader_no_bn = DataLoader(64, data_type='train', scale=True) valid_loader_no_bn = DataLoader(64, data_type='valid', scale=True) test_loader_no_bn = DataLoader(64, data_type='test', scale=True) model = Model(4, [28 * 28, 512, 512, 10], initializer='xavier', optimizer='sgd') his_no_bn = model.train(train_loader_no_bn, valid_loader_no_bn, epochs, learning_rate=lr) pred, label = model.predict(test_loader_no_bn) acc = np.sum(pred == label) / len(pred) print('acc', acc) cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10) cm_plot(cm, 'no_bn_cm, acc {:.3f}'.format(acc)) train_loader_bn = DataLoader(64, data_type='train', scale=True) valid_loader_bn = DataLoader(64, data_type='valid', scale=True) test_loader_bn = DataLoader(64, data_type='test', scale=True) model2 = Model(4, [28 * 28, 512, 512, 10], initializer='xavier', optimizer='sgd') his_bn = model2.train_bn(train_loader_bn, valid_loader_bn, epochs, learning_rate=lr) pred, label = model2.predict(test_loader_bn, bn=True) acc = np.sum(pred == label) / len(pred) print('acc', acc) cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10) cm_plot(cm, 'bn_cm, acc {:.3f}'.format(acc)) plot_history(his_no_bn, his_bn, 'no bn', 'bn', 'bn and no bn training loss', 'bn and no bn validation loss')
def contrast_scale(epochs, lr): train_loader_no_scale = DataLoader(64, data_type='train', scale=False) valid_loader_no_scale = DataLoader(64, data_type='valid', scale=False) test_loader_no_scale = DataLoader(64, data_type='test', scale=False) model = Model(4, [28 * 28, 512, 512, 10], initializer='xavier') his_no_scale = model.train(train_loader_no_scale, valid_loader_no_scale, epochs, learning_rate=lr) pred, label = model.predict(test_loader_no_scale) acc = np.sum(pred == label) / len(pred) print('acc', acc) cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10) cm_plot(cm, 'no_scale_cm, acc {:.3f}'.format(acc)) train_loader_scale = DataLoader(64, data_type='train', scale=True) valid_loader_scale = DataLoader(64, data_type='valid', scale=True) test_loader_scale = DataLoader(64, data_type='test', scale=True) model2 = Model(4, [28 * 28, 512, 512, 10], initializer='xavier') his_scale = model2.train(train_loader_scale, valid_loader_scale, epochs, learning_rate=lr) pred, label = model2.predict(test_loader_scale) acc = np.sum(pred == label) / len(pred) print('acc', acc) cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10) cm_plot(cm, 'scale_cm, acc {:.3f}'.format(acc)) plot_history(his_no_scale, his_scale, 'no scale', 'scale', 'sacle and no scale training loss', 'scale an no scale validation loss')
def hierarchical(encoder, tsne, true_data, true_labels, save_name="hierarchical.png"): """ 1. Predicts labels using hierarchical clustering 2. Prints confusion_matrix 3. Prints t-SNE plot of prediction """ enc_output = encoder.predict(true_data) # Hierarchical Clustering labels = HierarchicalClustering() predictions = labels.draw_dendogram( enc_output, title='Hierarchical Clustering Dendrogram', savetitle="hierarchical.png") # Confusion matrix of hierarchical clustering confusion_matrix(true_labels, predictions, save_name="confusion_matrix_hierarchical.png") # Visualize test predictions from hierarchical true_data = np.reshape(true_data, (len(true_data), 64, 64)) visualize_class_predictions(true_data, true_labels, predictions)
def contrast_dropout(epochs, lr): train_loader_no_dropout = DataLoader(64, data_type='train', scale=True) valid_loader_no_dropout = DataLoader(64, data_type='valid', scale=True) test_loader_no_dropout = DataLoader(64, data_type='test', scale=True) model = Model(4, [28 * 28, 512, 512, 10], initializer='xavier') his_no_dropout = model.train(train_loader_no_dropout, valid_loader_no_dropout, epochs, learning_rate=lr) pred, label = model.predict(test_loader_no_dropout) acc = np.sum(pred == label) / len(pred) print('acc', acc) cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10) cm_plot(cm, 'no_dropout_cm, acc {:.3f}'.format(acc)) train_loader_dropout = DataLoader(64, data_type='train', scale=True) valid_loader_dropout = DataLoader(64, data_type='valid', scale=True) test_loader_dropout = DataLoader(64, data_type='test', scale=True) model2 = Model(4, [28 * 28, 512, 512, 10], initializer='xavier') his_dropout = model2.train(train_loader_dropout, valid_loader_dropout, epochs, learning_rate=lr, dropout_prob=0.3) pred, label = model2.predict(test_loader_dropout) acc = np.sum(pred == label) / len(pred) print('acc', acc) cm = confusion_matrix(label.reshape(-1), pred.reshape(-1), 10) cm_plot(cm, 'dropout_cm, acc {:.3f}'.format(acc)) plot_history(his_no_dropout, his_dropout, 'no dropout', 'dropout', 'dropout and no dropout training loss', 'dropout and no dropout validation loss')
def test_knn(): data, _ = utils.read_table("combined_data_normalized.csv", True) class_index = 4 predictors = [2, 3, 5, 9] results = knn.knn_classifier(data, class_index, predictors, 5, 5) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix(results, "Crime Rate?", "KNN Classifier Prediction of Crime Rate")
def test_naive_bayes(): data, header = utils.read_table("combined_data_normalized.csv", True) class_index = 4 predictors = [2, 3, 5, 7] results = bayes.naive_bayes_classifier(data, header, 10, class_index, predictors, [2, 3, 5, 9]) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix(results, "Crime Rate?", "Naive Bayes Classifier Prediction of Crime Rate")
def test_random_forest(): data, header = utils.read_table("combined_data_discretized.csv", True) class_index = 4 predictors = [2, 3, 5, 9] results = rforest.random_forest_classifier(data, header, class_index, predictors, 100, 25, 3) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix( results, "Crime Rate?", "Random Forest Classifier Prediction of Crime Rate")
def test_decision_tree(): data, header = utils.read_table("combined_data_discretized.csv", True) class_index = 4 predictors = [2, 3, 5, 9] results = dtree.decision_tree_classifier(data, header, class_index, predictors, 30) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix( results, "Crime Rate?", "Decision Tree Classifier Prediction of Crime Rate")
def logistic(Xtrain, Ytrain, Xdev, Ydev, verbose=False, scoring='f1'): """ Trains a Logist Regression Model on the provided data. Scores the model and returns both the model and the score. It also prints the optimal hyperparameters. 5-fold cross validation is performed to tune l1 loss ratio and C (regularization weight). Inputs: Xtrain Ytrain Xdev Ydev Returns: float: the F1 on the dev data for the best model specifications. LogisticRegression: the best trained model. """ print("\n========================\nTraining Logistic Regression\n") if scoring == 'f1': scoring = metrics.make_scorer(metrics.f1_score, average='binary') logit = LogisticRegressionCV(l1_ratios=[.1, .5, .7, .9, .95, .99, 1], Cs=[0.1, 1, 10], max_iter=1e4, solver='saga', scoring=scoring, penalty='elasticnet') logit.fit(Xtrain, Ytrain) best_score = logit.score(Xdev, Ydev) Ydev_pred = logit.predict(Xdev) num_coeff = len(logit.coef_[logit.coef_ != 0]) results = { "F1": best_score, "l1_ratio": logit.l1_ratio_[0], "C": logit.C_[0], "n_nonzero_weights": num_coeff, "accuracy": metrics.accuracy_score(Ydev, Ydev_pred), "precision": metrics.precision_score(Ydev, Ydev_pred, average='binary'), "recall": metrics.recall_score(Ydev, Ydev_pred, average='binary') } try: print(results) except Exception as e: print(f"Error occured printing results: {e}") if verbose: print(f"There are {num_coeff} non-zero weights in the logistic " + "regression model.") utils.confusion_matrix(Ydev, Ydev_pred) utils.roc_auc(logit, Xdev, Ydev) utils.precision_recall(logit, Xdev, Ydev) return results, logit
def heat_map(prds_all, msks_all): if 'grss' in opt.data_dir: y_labels = [ 'Road', 'Tree', 'Red roof', 'Grey roof', 'Concrete\nroof', 'Vegetation' ] sr_heatmap = normalize_rows( confusion_matrix(prds_all, msks_all, opt.num_classes)) fig = plt.figure(figsize=(6, 6)) ax = sns.heatmap(sr_heatmap, linewidth=0.5, cmap='Blues', annot=True, yticklabels=y_labels, xticklabels=False) fig.savefig('heat_maps/' + exp_name + '/' + 'sr.png') elif 'coffee' in opt.data_dir: y_labels = ['non-coffee', 'coffee'] sr_heatmap = normalize_rows( confusion_matrix(prds_all, msks_all, opt.num_classes)) sns.set(font_scale=1.3) fig = plt.figure(figsize=(3.5, 3.5)) ax = sns.heatmap(sr_heatmap, linewidth=0.5, cmap='Blues', annot=True, yticklabels=y_labels, xticklabels=False) fig.savefig('heat_maps/' + exp_name + '/' + 'sr.png') elif 'vaihingen' in opt.data_dir or 'task_test' in opt.data_dir: y_labels = [ 'Impervious\nsurfaces', 'Building', 'Low\nvegetation', 'Tree', 'Car' ] sr_heatmap = normalize_rows( confusion_matrix(prds_all, msks_all, opt.num_classes)) sr_heatmap = np.delete(sr_heatmap, -1, axis=0) sr_heatmap = np.delete(sr_heatmap, -1, axis=1) fig = plt.figure(figsize=(5, 5)) ax = sns.heatmap(sr_heatmap, linewidth=0.5, cmap='Blues', annot=True, yticklabels=y_labels, xticklabels=False) fig.savefig('heat_maps/' + exp_name + '/' + 'sr.png')
def kmean(encoder, tsne, true_data, true_label): """ 1. Predicts labels using k-means clustering 2. Prints confusion_matrix 3. Prints accuracy 4. Prints t-SNE plot of prediction """ enc_output = encoder.predict(true_data) kmean = KMeansClustering() kmean.fit(enc_output) pred = kmean.predict(enc_output) accuracy(true_label, pred) confusion_matrix(true_label, pred, save_name="confusion_matrix_kmean.png") tsne.tsne_plot(true_data, pred, save_data_dir="kmean", save_name="kmean")
def _pred(self, x_df, y_df=None, data='confusion_matrix', format='tensor'): assert isinstance(x_df, pd.DataFrame) if y_df is not None: assert isinstance(y_df, pd.DataFrame) x_tensor = utils.get_tensor(x_df) y_tensor = self.df_to_tensor(y_df) y_pred_prob = self.model(Variable(x_tensor)) # y_pred = x_tensor.mm(w1).clamp(min=0).mm(w2) if data == 'loss': loss = self.loss_fn(y_pred_prob, Variable(y_tensor)) result = loss.data[0] return result elif data == 'pred': data_tensor = y_pred_prob.data else: # data == 'confusion_matrix': y_pred, total_type_num = utils.max_ix(y_pred_prob.data) # total_type_num = len(self.ix_to_label) data_tensor = utils.confusion_matrix(y_pred, y_tensor, total_type_num) if format == 'df': result = pd.DataFrame(data_tensor.numpy()) elif format == 'np': result = data_tensor.numpy() else: # format == 'tensor': result = data_tensor return result
def eval(args): device = torch.device(f"cuda:{args.device_id}") model = AlexNet(n_cls = 100) model.to(device) model.load_state_dict(torch.load(args.pretrained_path)) model.eval() test_loader = getLoaders(split="eval", batch_size = args.batch_size, num_workers=args.num_workers ) pred_arr = [] label_arr = [] with torch.no_grad(): for idx, (img, label) in tqdm(enumerate(test_loader),total= len(test_loader)): img = img.to(device) pred = model.pred(img) # mean of softmax prob from 10 different aug pred = pred.view(-1, 10, 100) pred = pred.mean(dim = 1) pred_arr.append(pred.detach().cpu().numpy()) label_arr.append(label.detach().numpy()) pred_np = np.concatenate(pred_arr) label_np = np.concatenate(label_arr) top_1 = utils.top_k_acc(k = 1, pred = pred_np, label= label_np) top_5 = utils.top_k_acc(k = 5, pred = pred_np, label= label_np) confusion = utils.confusion_matrix(100, pred_np, label_np) torch.save({ "top_1": top_1, "top_5": top_5, "confusion": confusion, }, "result.pth") print(f"top_1: {top_1*100:.2f}, top_5: {top_5*100:.2f}")
def measure(self, X, y, threshold=0.5): y_hat = self.predict(X) TP, FP, FN, TN = utils.confusion_matrix(threshold, y_hat, y) precision = float(TP) / (TP + FP) recall = float(TP) / (TP + FN) F1 = 2 * precision * recall / (precision + recall) return F1
def fix_dropout(self, new_weights, old_weights, mask_prev, mask_next=None): """ Update old weight with new dropout weights. New weights don't have the same format as old ones, so updating them is pretty tricky. :param new_weights: Weights of the Net after dropout :param old_weights: Weights of the Net before dropout (Real weights) :param mask_prev: Mask used on previous nodes :param mask_next: Mask used on next nodes :return: New weights for the real Net """ if mask_next is not None: new_w = old_weights if mask_next is not None: conf_matrix = utils.confusion_matrix(mask_next, mask_prev) # Compute confusion matrix containing indexes of weights to update indexes = np.argwhere(conf_matrix) c = 0 for i in range(len(new_weights)): for j in range(len(new_weights[i])): new_w[indexes[c][0], indexes[c][1]] = new_weights[i, j] c += 1 else: new_w = self.fix_dropout(new_weights, old_weights, mask_prev, np.ones(old_weights.shape[1])) return new_w
def _decode(self, x, x_, attention_mask, threshold=0.5): mask = attention_mask == 1 y = x.masked_select(mask).cpu().long().numpy() y_ = x_.masked_select(mask).cpu().numpy() y_ = np.where(y_ > threshold, 1, 0) # np.array vector float return confusion_matrix(y, y_)
def valid(self, epoch, val_loader): self.G.eval() with torch.no_grad(): # (tn, fp, fn, tp) cm = utils.ConfusionMatrix() for i, (input_, target_, _) in enumerate(val_loader): input_ = input_.to(self.torch_device) output_ = self.G(input_) target_ = target_.to(self.torch_device) ground_truth = target_.int().squeeze(1) prediction = torch.argmax(output_, dim=1).int() cm.update( utils.confusion_matrix(prediction, ground_truth, reduce=False)) metric = 1.5 * cm.f2 + cm.accuracy if metric > self.best_metric: self.best_metric = metric self.save(epoch) self.logger.write( "[Val] epoch: %d accuracy: %f f05: %f f1: %f f2: %f" % (epoch, cm.accuracy, cm.f05, cm.f1, cm.f2))
def validate(valloader, train_state): model = train_state.ema_model criterion = train_state.criterion losses = AverageMeter() accuracy_meter = AverageMeter() # switch to evaluate mode model.eval() n_classes = len(train_state.class_names) confusion = torch.zeros(n_classes, n_classes) end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(valloader): if constants['use_cuda']: inputs, targets = inputs.cuda(), targets.cuda( non_blocking=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc = accuracy(outputs, targets) batch_confusion = confusion_matrix(outputs, targets) confusion += batch_confusion losses.update(loss.item(), inputs.size(0)) accuracy_meter.update(acc.item(), inputs.size(0)) return (losses.avg, accuracy_meter.avg, confusion)
def main(): graph = utils.load_graph() position = utils.get_positions(graph) utils.make_dir('images/louvain') true_communities = utils.get_labels(graph, list(graph.nodes)) utils.plot_communities(graph, position, true_communities, labels=True, title='Butterfly Similarity Network - True Communities', path='images/louvain/communities_true.png') communities = utils.group_communities(louvain_clustering(graph)) utils.plot_communities(graph, position, communities, labels=False, title='Butterfly Similarity Network - Louvain Communities', path='images/louvain/communities_louvain.png') graph_nodes = sorted(list(graph.nodes)) predictions = utils.predict_majority_class(graph, communities) preds = [predictions[n] for n in graph_nodes] labels = [graph.nodes[n]['label'] for n in graph_nodes] utils.accuracy(preds, labels) utils.confusion_matrix(preds, labels, 'Confusion Matrix - Majority Label Predictions from Louvain Communities', 'images/louvain/cm_louvain.png')
def main(args): # Load and standardize data. embedding_file_path = 'node2vec/embeddings/' + args.input X_train, X_test, y_train, y_test = load_splits(embedding_file_path) X_train, X_test = standardize_data(X_train, X_test) # Train classifier and make predictions. optimal_svc = hyperparameter_search(SVC(), X_train, y_train) print('Cross Validation Accuracy:', optimal_svc.best_score_) print('Optimal parameters:', optimal_svc.best_params_) predictions = optimal_svc.predict(X_test) # Report results. utils.make_dir('images/svc') cm_path = 'images/svc/cm_' + args.input[:-4] + '.png' utils.accuracy(predictions, y_test) utils.confusion_matrix(predictions, y_test, 'Confusion Matrix - SVC', cm_path)
def getDECNetworkResults(dec, enc): #Load test dataset test_data = LoadDataset("dataset/kaggle_original_train/", 0) test_data, test_label, val, val_label = test_data.load_data() big_data = LoadDataset("dataset/kaggle_augmented_train_new/", 0) big_data, _, _, _ = big_data.load_data() # make save directory os.makedirs(os.path.join("dec"), exist_ok=True) os.chdir("dec") encoded = enc.predict(test_data) q, _ = dec.predict(test_data, verbose=0) y_pred = q.argmax(1) print(y_pred) confusion_matrix(test_label.astype(np.int64), y_pred) #Take prediction time for i in range(20): iterate = 5000 * (i + 1) data = big_data[0:iterate, :] print(data.shape) print("KMEAN") start = time.time() q, _ = dec.predict(data, verbose=0) y_pred = q.argmax(1) end = time.time() print(end - start) train_x = np.reshape(test_data, (3720, 64, 64)) TSNE = TSNEAlgo() TSNE.tsne_fit(encoded, perplexity=35) TSNE.tsne_plot(train_x, y_pred.astype(int), save_name="Pred", save_data_dir="dec") TSNE.tsne_plot(train_x, test_label.astype(int), save_name="True", save_data_dir="dec")
def main(): graph = utils.load_graph() position = utils.get_positions(graph) utils.make_dir('images/spectral') true_communities = utils.get_labels(graph, list(graph.nodes)) utils.plot_communities(graph, position, true_communities, labels=True, title='Butterfly Similarity Network - True Communities', path='images/spectral/communities_true.png') node_assignments = spectral_clustering(graph) nodes_to_communities = {k:v for (k,v) in zip(range(len(node_assignments)), node_assignments)} communities = utils.group_communities(nodes_to_communities) utils.plot_communities(graph, position, communities, labels=False, title='Butterfly Similarity Network - Spectral Communities', path='images/spectral/communities_spectral.png') graph_nodes = sorted(list(graph.nodes)) predictions = utils.predict_majority_class(graph, communities) preds = [predictions[n] for n in graph_nodes] labels = [graph.nodes[n]['label'] for n in graph_nodes] utils.accuracy(preds, labels) utils.confusion_matrix(preds, labels, 'Confusion Matrix - Spectral Clustering', 'images/spectral/cm_spectral.png')
def train_bottomupdnn_v1(epochs=40, init_epochs=10, lr=0.01): print("======Train base DNN using clean data======") model, train_loader, test_loader, optimizer, criterion = train_init( "BaseDNN", lr=lr) clean_loader, clean_train_loader, noisy_loader = get_dataloader_bu1() model.mode = "BaseModel" # Train base DNN model for epoch in range(init_epochs): train_epoch(model, clean_train_loader, test_loader, optimizer, criterion, epoch) # Estimate confusion matrix for new bottom-up DNN v1 model y_clean, y_pred = get_predict_label(model, clean_loader) cmatrix_clean = confusion_matrix(y_clean, y_pred) y_noisy, y_pred = get_predict_label(model, noisy_loader) cmatrix_noisy = confusion_matrix(y_noisy, y_pred) rmatrix = compute_rmatrix(cmatrix_clean, cmatrix_noisy) cmatrix = compute_estimate_confusion(rmatrix, y_noisy) # Initialize new bottom-up DNN v1 model new_model = models.ButtomUpDNN1(model.params) new_model.confusion.weight = nn.Parameter(cmatrix) new_model.confusion.weight.requires_grad = False optimizer = optim.SGD(new_model.parameters(), lr=lr, momentum=0.9) # Concatenate all clean dataset and get data loader clean_dataset = torch.utils.data.ConcatDataset( [clean_loader.dataset, clean_train_loader.dataset]) all_clean_loader = torch.utils.data.DataLoader(dataset=clean_dataset, batch_size=batch_size, shuffle=True) # Train bottom-up DNN v1 model for epoch in range(init_epochs, epochs): train_epoch_bu1(new_model, all_clean_loader, noisy_loader, test_loader, optimizer, criterion, epoch) # Save estimated Q matrix plt.matshow(cmatrix) plt.colorbar() plt.savefig("./imgs/estimated_Q.png")
def evaluate(self, X, y, with_plot=False): pred = self.__fit(X, y) if self.classification: return confusion_matrix(y, pred) else: if with_plot: plt.figure(figsize=(12, 8)) plt.scatter(y, pred) plt.xlabel('y') plt.ylabel('y_pred') return np.mean((y - pred)**2)
def __model_evaluation(self, arr, p_dna): wt_data, mt_data = arr df_dna = confusion_matrix(wt=wt_data, mt=mt_data, predicted=p_dna) 'Convert to amino acid' wt_aa = map(dna_to_aa, wt_data) mt_aa = map(dna_to_aa, mt_data) p_aa = map(dna_to_aa, p_dna) df_aa = confusion_matrix(wt=wt_aa, mt=mt_aa, predicted=p_aa) df = pd.concat([df_dna, df_aa], axis=1) outfile = 'summary_6nodes_nopostproc.csv' df.to_csv(outfile) 'Show histogram' df['Accuracy'].hist(bins=20) df['TP'].hist(bins=20) df['#Mutate Positions'].hist(bins=20) plt.show()
def train_model(train_config): images,labels = image_load.read_12channel_images(train_config['target_path'],train_config['image_resize']) kflod_container = image_load.KFoldContainer(images,labels,train_config['Kfold']) utils.write_config(train_config,train_config['save_path']+'config.csv') confuse_matrix = [] accuracy = [] duration = [] for k in range(train_config['Kfold']): train_x,train_y,test_x,test_y = kflod_container.get_fold_k(k) train_x,train_y = image_load.augimage(train_x,train_y) print('Load Training {n:d} images'.format(n=len(train_x))) print('Load Testing {n:d} images'.format(n=len(test_x))) train_ds = tf.data.Dataset.from_tensor_slices((train_x,train_y)) train_ds = train_ds.shuffle(buffer_size=train_config['shuffle_buffer_size']).repeat().batch(train_config['batch_size']) test_ds = tf.data.Dataset.from_tensor_slices((test_x,test_y)) test_ds = test_ds.repeat(1).batch(train_config['batch_size']) model = models.model_7_3_12channel(train_config['image_resize'],train_config['l2_factor']) model.compile(optimizer=tf.keras.optimizers.Adam(lr=train_config['lr']), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) save_name = f'{train_config["save_path"]}{train_config["model_name"]}-{k}' cbs = [tf.keras.callbacks.EarlyStopping(patience=train_config['early_stop_patience']), tf.keras.callbacks.ModelCheckpoint(monitor='val_sparse_categorical_accuracy',filepath=save_name,save_best_only=True,save_weights_only=True,verbose=1), utils.HSLRSchedular(train_config['lr'], watch_value_name=train_config['schedular_watch_name'], max_reduce_time=train_config['schedular_max_reduce_time'], reduce_factor=train_config['schedular_reduce_factor'], restart_factor=train_config['schedular_restart_factor'], patience=train_config['schedular_patience'], verbose=0), utils.HSTensorboard(log_dir=f'./logs/{save_name}/',embeddings_metadata=test_x)] ct = time.time() model.fit(train_ds,epochs=train_config['epochs'],steps_per_epoch=train_config['steps_per_epoch'],validation_data=test_ds,callbacks=cbs) duration.append(time.time()-ct) model.load_weights(save_name) logits = model.predict(test_x) cm,acc = utils.confusion_matrix(test_y,tf.argmax(logits,axis=1).numpy()) confuse_matrix.append(cm) accuracy.append(acc) print(f'finish training. k={k}, accuracy={acc:.2f}') sio.savemat(train_config['save_path']+'result.mat',{'cm':np.array(confuse_matrix), 'accuracy':np.array(accuracy), 'duration':np.array(duration)})
def train(epoch): print('\nEpoch: %d' % epoch) global Train_acc net.train() train_loss = 0 correct = 0 total = 0 conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES)) if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0: frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every decay_factor = learning_rate_decay_rate**frac current_lr = opt.lr * decay_factor utils.set_lr(optimizer, current_lr) # set the decayed rate else: current_lr = opt.lr print('learning_rate: %s' % str(current_lr)) for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() utils.clip_gradient(optimizer, 0.1) optimizer.step() train_loss += loss.item() conf_mat += utils.confusion_matrix(outputs, targets, NUM_CLASSES) acc = sum([conf_mat[i, i] for i in range(conf_mat.shape[0])]) / conf_mat.sum() uacc_per_class = [ conf_mat[i, i] / conf_mat[i].sum() for i in range(conf_mat.shape[0]) ] unweighted_acc = sum(uacc_per_class) / len(uacc_per_class) prec_per_class = [ conf_mat[i, i] / conf_mat[:, i].sum() for i in range(conf_mat.shape[0]) ] average_precision = sum(prec_per_class) / len(prec_per_class) utils.progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% | unweighted_Acc: %.3f%%' % (train_loss / (batch_idx + 1), 100. * acc, 100. * unweighted_acc)) Train_acc = 100. * acc
def process_dataset(dataset, colors): y = np.load("./data/" + dataset + '_labels.npy') pred = np.load("./data/" + dataset + '_clasification.npy') segments = np.load("./results/" + dataset + '_segments.npy') test_mask = np.load("./data/" + dataset + '_test_mask.npy').reshape( y.shape) sc_pred = classify_segments(pred, segments) sc_score = utils.balanced_score(y[test_mask], sc_pred[test_mask]) sc_cm = utils.confusion_matrix(y[test_mask], sc_pred[test_mask]) utils.save_json({"sc": sc_score}, dataset + "_sc_score") utils.save_csv(sc_cm, dataset + "_sc_cm") color_map = color_true_map(sc_pred, labels_colors=colors) save_image(color_map, dataset + "_sc_clasification")
def spectral(encoder, tsne, true_data, true_label): """ 1. Predicts labels using spectral clustering 2. Prints confusion_matrix 3. Prints accuracy 4. Prints t-SNE plot of prediction """ enc_output = encoder.predict(true_data) model = SpectralClustering(n_clusters=5, affinity='nearest_neighbors', assign_labels='kmeans') pred = model.fit_predict(enc_output) accuracy(true_label, pred) confusion_matrix(true_label, pred, save_name="confusion_matrix_spectral.png") tsne.tsne_plot(true_data, pred, save_data_dir="spectral", save_name="spectral") tsne.tsne_plot(true_data, true_label, save_data_dir="true_label", save_name="true_label")
def eval(loader, model, is_test=False, confusion_matrix=False, filename=None): """ Evaluate model performance on data object (graph) in loader. :param - loader: torch_geometric DataLoader for BIOSNAP dataset :param - model: trained GNN model ready for making predictions :param - is_test: boolean indicating whether to evaluate on test or eval split """ model.eval() data = [data for data in loader][0] mask = data.test_mask if is_test else data.val_mask with torch.no_grad(): pred = model(data).max(dim=1)[1][mask] label = data.y[mask] if confusion_matrix: utils.make_dir(gnn_utils.images_dir) title = 'Confusion Matrix - GNN' path = gnn_utils.images_dir + 'cm_' + filename + '.png' utils.confusion_matrix(pred.to('cpu'), label.to('cpu'), title, path) correct = pred.eq(label).sum().item() total = mask.sum().item() return correct / total