def calculateWithK(test_set_by_user=[], k=-1, trainingSet=[]): if (k == -1): k = raw_input("Please input K: ") k = int(k) global neighbors, accuracy # data preparation if (test_set_by_user == -1): testSet = get_test_set() else: testSet = test_set_by_user trainingSet = get_training_set() print 'training set --> ' + repr(trainingSet) print 'testing set --> ' + repr(testSet) # predictions predictions = [] for x in range(len(testSet)): neighbors = get_neighbors(trainingSet, testSet[x], k) responses_result = get_response(neighbors) predictions.append(responses_result) print('> predicted = ' + repr(responses_result) + ', actual = ' + repr(testSet[x][-1])) # accuracy: accuracy = get_accuracy(testSet, predictions) print('Accuracy: ' + repr(accuracy) + '%')
def test(): # TODO : Test Later print('==> Testing network..') # Make predictions on full X_test mels y_predicted = accuracy.predict_class_all(create_segmented_mels(X_test), a_net) # Print statistics print(np.sum(accuracy.confusion_matrix(y_predicted, y_test),axis=1)) print(accuracy.confusion_matrix(y_predicted, y_test)) print(accuracy.get_accuracy(y_predicted,y_test))
def calculateWithKFofGraph(test_set_by_user=[], trainingSet=[], k = 1): global neighbors, accuracy testSet = test_set_by_user # predictions predictions = [] for x in range(len(testSet)): neighbors = get_neighbors(trainingSet, testSet[x], k) responses_result = get_response(neighbors) predictions.append(responses_result) print('> predicted = ' + repr(responses_result) + ', actual = ' + repr(testSet[x][-1])) # accuracy: accuracy = get_accuracy(testSet, predictions) print('Accuracy: ' + repr(accuracy) + '%') return accuracy
def main(): """main function""" args = get_args() print("\n#################") print("### Arguments ###") print("#################") for arg in vars(args): print(f"{arg} : {getattr(args, arg)}") print("#################\n") # get the features output_features_filename = get_prefix_file() + ".features" if not os.path.exists(output_features_filename) or True: results_features = get_features(args.arch, 300, pooling=args.pool) with open(output_features_filename, 'wb') as output_file: pickle.dump(results_features, output_file) else: print("## features already generated") return # generate the similarity measures for patch-pair combinations results_comparison = compare(results_features, args.distance) output_comparison_filename = get_prefix_file() + ".comparison" with open(output_comparison_filename, 'wb') as output_file: pickle.dump(results_comparison, output_file) # generate the top-k accuracy (for k = 1 and 5) top1accuracy, top5accuracy = get_accuracy( results_comparison, args.distance) print("Top-1 and 5 accuracy for size {} : {} and {}\n\n".format(args.size, top1accuracy, top5accuracy))
X_test = pool.map(get_wav, X_test) # Convert to MFCC if DEBUG: print('converting to mfcc') X_train = pool.map(to_mfcc, X_train) X_test = pool.map(to_mfcc, X_test) # Create segments from MFCCs X_train, y_train = make_segments(X_train, y_train) X_validation, y_validation = make_segments(X_test, y_test) # Randomize training segments X_train, _, y_train, _ = train_test_split(X_train, y_train, test_size=0) # Train model model = train_model(np.array(X_train), np.array(y_train), np.array(X_validation),np.array(y_validation)) # Make predictions on full X_test MFCCs y_predicted = accuracy.predict_class_all(create_segmented_mfccs(X_test), model) # Print statistics print train_count print test_count print acc_to_beat print np.sum(accuracy.confusion_matrix(y_predicted, y_test),axis=1) print accuracy.confusion_matrix(y_predicted, y_test) print accuracy.get_accuracy(y_predicted,y_test) # Save model save_model(model, model_filename)
if DEBUG: print('Converting to MFCC....') X_train = pool.map(to_mfcc, X_train) X_test = pool.map(to_mfcc, X_test) # Create segments from MFCCs X_train, y_train = make_segments(X_train, y_train) X_validation, y_validation = make_segments(X_test, y_test) # Randomize training segments X_train, _, y_train, _ = train_test_split(X_train, y_train, test_size=0.2) # Train model model = train_model(np.array(X_train), np.array(y_train), np.array(X_validation),np.array(y_validation)) # Make predictions on full X_test MFCCs y_predicted = accuracy.predict_class_all(create_segmented_mfccs(X_test), model) # Save model save_model(model, model_filename) # Print statistics print('Training samples:', train_count) print('Testing samples:', test_count) print('Accuracy to beat:', acc_to_beat) print('Confusion matrix of total samples:\n', np.sum(accuracy.confusion_matrix(y_predicted, y_test),axis=1)) print('Confusion matrix:\n',accuracy.confusion_matrix(y_predicted, y_test)) print('Accuracy:', accuracy.get_accuracy(y_predicted,y_test))
# this part is the main function from data_process import load_csv, spilt_data from feature_extract import summary_by_class from forecast import get_predict from accuracy import get_accuracy if __name__ == '__main__': filename = 'pima-indians-diabetes.data.csv' splitRatio = 0.67 dataset = load_csv(filename) trainingSet, testSet = spilt_data(dataset, splitRatio) # prepare model summaries = summary_by_class(trainingSet) # test model predictions = get_predict(summaries, testSet) accuracy = get_accuracy(testSet, predictions) print accuracy print predictions
import numpy as np import utils as ut import accuracy as ac data_set_train, data_sets_test = ut.get_data() columns = [30, 53] data_set_train_selected, data_sets_test_selected = ut.select_data(data_set_train, data_sets_test, columns) data_set_train_selected[:, 0] += 1 data_set_train_selected[:, 0] = np.log(data_set_train_selected[:, 0]) for i in range(len(data_sets_test_selected)): data_sets_test_selected[i][:, 0] += 1 data_sets_test_selected[i][:, 0] = np.log(data_sets_test_selected[i][:, 0]) ac.get_accuracy(data_set_train_selected, data_sets_test_selected)
def train_model(model, trainds, testds, config, device, writer=None): batch_size = config['data']['batch_size'] status = config['training']['status'] epochs = config['training']['epochs'] balanced_loss = config['loss']['balanced'] # nval = config['nval'] nval_tests = config['nval_tests'] nsave = config['nsave'] model_save = config['model_save'] rank = config['rank'] nranks = config['nranks'] hvd = config['hvd'] num_classes = config['data']['num_classes'] ## create samplers for these datasets train_sampler = torch.utils.data.distributed.DistributedSampler( trainds, nranks, rank, shuffle=True, drop_last=True) test_sampler = torch.utils.data.distributed.DistributedSampler( testds, nranks, rank, shuffle=True, drop_last=True) ## create data loaders train_loader = torch.utils.data.DataLoader( trainds, shuffle=False, sampler=train_sampler, num_workers=config['data']['num_parallel_readers'], batch_size=batch_size, persistent_workers=True) test_loader = torch.utils.data.DataLoader( testds, shuffle=False, sampler=test_sampler, num_workers=config['data']['num_parallel_readers'], batch_size=batch_size, persistent_workers=True) loss_func = loss.get_loss(config) ave_loss = CalcMean.CalcMean() acc_func = accuracy.get_accuracy(config) ave_acc = CalcMean.CalcMean() opt_func = optimizer.get_optimizer(config) opt = opt_func(model.parameters(), **config['optimizer']['args']) lrsched_func = optimizer.get_learning_rate_scheduler(config) lrsched = lrsched_func(opt, **config['lr_schedule']['args']) # Add Horovod Distributed Optimizer if hvd: opt = hvd.DistributedOptimizer( opt, named_parameters=model.named_parameters()) # Broadcast parameters from rank 0 to all other processes. hvd.broadcast_parameters(model.state_dict(), root_rank=0) model.to(device) for epoch in range(epochs): logger.info(' epoch %s of %s', epoch, epochs) train_sampler.set_epoch(epoch) test_sampler.set_epoch(epoch) model.to(device) for batch_counter, (inputs, targets, class_weights, nonzero_mask) in enumerate(train_loader): # move data to device inputs = inputs.to(device) targets = targets.to(device) class_weights = class_weights.to(device) nonzero_mask = nonzero_mask.to(device) # zero grads opt.zero_grad() outputs, endpoints = model(inputs) # set the weights if balanced_loss: weights = class_weights nonzero_to_class_scaler = torch.sum( nonzero_mask.type(torch.float32)) / torch.sum( class_weights.type(torch.float32)) else: weights = nonzero_mask nonzero_to_class_scaler = torch.ones(1, device=device) loss_value = loss_func(outputs, targets.long()) loss_value = torch.mean( loss_value * weights) * nonzero_to_class_scaler # backward calc grads loss_value.backward() # apply grads opt.step() ave_loss.add_value(float(loss_value.to('cpu'))) # calc acc ave_acc.add_value( float(acc_func(outputs, targets, weights).to('cpu'))) # print statistics if batch_counter % status == 0: logger.info( '<[%3d of %3d, %5d of %5d]> train loss: %6.4f acc: %6.4f', epoch + 1, epochs, batch_counter, len(trainds) / nranks / batch_size, ave_loss.mean(), ave_acc.mean()) if writer and rank == 0: global_batch = epoch * len( trainds) / nranks / batch_size + batch_counter writer.add_scalars('loss', {'train': ave_loss.mean()}, global_batch) writer.add_scalars('accuracy', {'train': ave_acc.mean()}, global_batch) #writer.add_histogram('input_trans',endpoints['input_trans'].view(-1),global_batch) ave_loss = CalcMean.CalcMean() ave_acc = CalcMean.CalcMean() # release tensors for memory del inputs, targets, weights, endpoints, loss_value if config['batch_limiter'] and batch_counter > config[ 'batch_limiter']: logger.info('batch limiter enabled, stop training early') break # save at end of epoch torch.save(model.state_dict(), model_save + '_%05d.torch_model_state_dict' % epoch) if nval_tests == -1: nval_tests = len(testds) / nranks / batch_size logger.info('epoch %s complete, running validation on %s batches', epoch, nval_tests) model.to(device) # every epoch, evaluate validation data set with torch.no_grad(): vloss = CalcMean.CalcMean() vacc = CalcMean.CalcMean() vious = [CalcMean.CalcMean() for i in range(num_classes)] for valid_batch_counter, (inputs, targets, class_weights, nonzero_mask) in enumerate(test_loader): inputs = inputs.to(device) targets = targets.to(device) class_weights = class_weights.to(device) nonzero_mask = nonzero_mask.to(device) # set the weights if balanced_loss: weights = class_weights nonzero_to_class_scaler = torch.sum( nonzero_mask.type(torch.float32)) / torch.sum( class_weights.type(torch.float32)) else: weights = nonzero_mask nonzero_to_class_scaler = torch.ones(1, device=device) outputs, endpoints = model(inputs) loss_value = loss_func(outputs, targets.long()) loss_value = torch.mean( loss_value * weights) * nonzero_to_class_scaler vloss.add_value(float(loss_value.to('cpu'))) # calc acc vacc.add_value( float(acc_func(outputs, targets, weights).to('cpu'))) # calc ious ious = get_ious(outputs, targets, weights, num_classes) for i in range(num_classes): vious[i].add_value(float(ious[i])) if valid_batch_counter > nval_tests: break mean_acc = vacc.mean() mean_loss = vloss.mean() # if config['hvd'] is not None: # mean_acc = config['hvd'].allreduce(torch.tensor([mean_acc])) # mean_loss = config['hvd'].allreduce(torch.tensor([mean_loss])) mious = float( torch.sum(torch.FloatTensor([x.mean() for x in vious]))) / num_classes ious_out = { 'jet': vious[0].mean(), 'electron': vious[1].mean(), 'bkgd': vious[2].mean(), 'all': mious } # add validation to tensorboard if writer and rank == 0: global_batch = epoch * len( trainds) / nranks / batch_size + batch_counter writer.add_scalars('loss', {'valid': mean_loss}, global_batch) writer.add_scalars('accuracy', {'valid': mean_acc}, global_batch) writer.add_scalars('IoU', ious_out, global_batch) logger.warning( '>[%3d of %3d, %5d of %5d]<<< ave valid loss: %6.4f ave valid acc: %6.4f on %s batches >>>', epoch + 1, epochs, batch_counter, len(trainds) / nranks / batch_size, mean_loss, mean_acc, valid_batch_counter + 1) logger.warning(' >> ious: %s', ious_out) # update learning rate lrsched.step()
######################################################### ## TESTING ######################################################### # Calculating accuracy by testing on the entire dataset x_test = Variable(torch.from_numpy(X_test)) y_real = y_test output = classifier(x_test.float()) y_pred = [] for y in output: index_max = np.argmax(y.detach().numpy()) y_pred.append(index_max) accuracy = get_accuracy(y_pred, y_real) print("Accuracy = {}%".format(accuracy)) print("Confusion matrix : \n", confusion_matrix(y_real, y_pred)) target_names = ['A', 'B', 'C', 'D', 'E'] print( "Classification Report : \n", classification_report(y_real, y_pred, target_names=target_names, zero_division=0)) # f = open("res.txt", "a") # f.write("{}%\n".format(accuracy)) # f.close()
def train(config, train_data, encoder, decoder): # loss_function_1 = nn.CrossEntropyLoss(ignore_index=0) best_decoder = decoder best_encoder = encoder loss_function_1 = nn.NLLLoss(ignore_index=0) loss_function_2 = nn.CrossEntropyLoss() enc_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate) dec_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate) train_loss_over_epochs = [] val_accuracy_over_epochs_slot = [] val_accuracy_over_epochs_intent = [] best_val = 0 for epoch in range(config.epochs): losses = [] losses_overepoch = [] count = 0 for i, batch in enumerate(getBatch(config.batch_size, train_data)): count = count + 1 x, y_1, y_2 = zip(*batch) x = torch.cat(x) tag_target = torch.cat(y_1).view(-1) intent_target = torch.cat(y_2) x_mask = torch.cat([Variable(torch.BoolTensor(tuple(map(lambda s: s == 0, t.data)))) for t in x])\ .view(config.batch_size, -1) encoder.zero_grad() decoder.zero_grad() output, hidden_c = encoder(x, x_mask) start_decode = Variable(torch.LongTensor([[0] * config.batch_size ])).transpose(1, 0) # pdb.set_trace() tag_score, intent_score = decoder(start_decode, hidden_c, output, x_mask) loss_1 = loss_function_1(tag_score, tag_target) loss_2 = loss_function_2(intent_score, intent_target) #loss = 0.4*loss_1+0.6*loss_2 loss = loss_1 + loss_2 losses.append(loss.item()) losses_overepoch.append(loss.item()) loss.backward() torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0) torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0) enc_optim.step() dec_optim.step() if i % 10 == 0: print("Epoch", epoch, " batch", i, " : ", np.mean(losses)) losses = [] val_accuracy_slot, val_accuracy_intent = get_accuracy(encoder, decoder) print(val_accuracy_slot) print(val_accuracy_intent) if epoch == 1: best_val = val_accuracy_slot if val_accuracy_slot > best_val: best_val = val_accuracy_slot best_decoder = decoder best_encoder = encoder #best_net = net.parameters train_loss_over_epochs.append(np.mean(losses_overepoch)) val_accuracy_over_epochs_slot.append(val_accuracy_slot) val_accuracy_over_epochs_intent.append(val_accuracy_intent) if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) # pdb.set_trace() plt.figure(0) plt.subplot(2, 1, 1) plt.ylabel('Train loss') plt.plot(np.arange(config.epochs), train_loss_over_epochs, 'k-') plt.title('train loss and slot filling accuracy on validation set') plt.xticks(np.arange(config.epochs, dtype=int)) plt.grid(True) plt.subplot(2, 1, 2) plt.plot(np.arange(config.epochs), val_accuracy_over_epochs_slot, 'b-') plt.ylabel('Slot filling accuracy') plt.xlabel('Epochs') plt.xticks(np.arange(config.epochs, dtype=int)) plt.grid(True) plt.savefig("plot1.png") plt.figure(1) plt.subplot(2, 1, 1) plt.ylabel('Train loss') plt.plot(np.arange(config.epochs), train_loss_over_epochs, 'k-') plt.title( 'train loss and intent classification accuracy on validation set') plt.xticks(np.arange(config.epochs, dtype=int)) plt.grid(True) plt.subplot(2, 1, 2) plt.plot(np.arange(config.epochs), val_accuracy_over_epochs_intent, 'b-') plt.ylabel('Intent classification accuracy') plt.xlabel('Epochs') plt.xticks(np.arange(config.epochs, dtype=int)) plt.grid(True) plt.savefig("plot2.png") print('Finished Training') torch.save(best_decoder.state_dict(), os.path.join(config.model_dir, 'jointnlu-decoder.pkl')) torch.save(best_encoder.state_dict(), os.path.join(config.model_dir, 'jointnlu-encoder.pkl')) print("Train Complete!")
import utils as ut import accuracy as ac data_set_train, data_sets_test = ut.get_data() ac.get_accuracy(data_set_train, data_sets_test)
# print (trainer) # Train model model = train_model(np.array(X_train), np.array(y_train), np.array(X_validation), np.array(y_validation)) # model = load_model("model.h5") # predicted = model.predict (k.create_segmented_mfccs(X_test)) # for i in predicted: # print (i) # Make predictions on full X_test MFCCs y_predicted = accuracy.predict_class_all( k.create_segmented_mfccs(X_test), model) # print (y_predicted) # for i in y_predicted: # print (i) # Print statistics print('Training samples:', train_count) print('Testing samples:', test_count) print('Accuracy to beat:', acc_to_beat) print('Confusion matrix of total samples:\n', np.sum(accuracy.confusion_matrix(y_predicted, y_test), axis=1)) print('Confusion matrix:\n', accuracy.confusion_matrix(y_predicted, y_test)) print('Accuracy:', accuracy.get_accuracy(y_predicted, y_test)) results.append(accuracy.confusion_matrix(y_predicted, y_test)) acc.append(accuracy.get_accuracy(y_predicted, y_test)) # Save model print(results) print(acc) save_model(model, model_filename)