def validate(m, X, y): if args.task == 1: y_pred = m.predict(X).tolist() else: y_pred1 = m[0].score_samples(X) y_pred2 = m[1].score_samples(X) y_pred = (y_pred1 > y_pred2).tolist() y_true = y.tolist() auc, eer = get_metrics(y_pred, y_true) return auc, eer
def validate(test_model, num_samples, writer, Y_test, batch_size=100): start_time = timeit.default_timer() batch_index = 0 cost_total, acc_total, loss_total = 0., 0., 0. predictions = None while True: start, end = batch_index * batch_size, min( (batch_index + 1) * batch_size, num_samples) batch_index += 1 pred, cost, loss, acc, sim = test_model(start, end, 0) if predictions is None: predictions = sim else: predictions = numpy.concatenate((predictions, sim), axis=0) cost_total += cost * (end - start) acc_total += acc * (end - start) loss_total += loss * (end - start) if end >= num_samples - 1: break cost_total /= num_samples acc_total /= num_samples loss_total /= num_samples writer.write('\tTesting\tAccuracy = %.4f\tCost = %f\tLoss = %f\n' % (acc_total, cost_total, loss_total)) print '\tTesting\tAccuracy = %.4f\tCost = %f\tLoss = %f' % ( acc_total, cost_total, loss_total) end_time = timeit.default_timer() # print 'Test %.3f seconds' % (end_time - start_time) labels = numpy.argmax(Y_test, axis=1) roc_auc, ap, top1_accu, top5_accu = get_metrics(predictions, labels) print '\tROC-AUC = %.4f\tAP = %.4f\tTop-1 Acc = %.4f\tTop-5 Acc = %.4f' % ( roc_auc, ap, top1_accu, top5_accu) return acc_total
def validate(test_model, writer, Y_test, batch_size=100, seen='seen'): num_samples = Y_test.shape[0] batch_index = 0 cost_total, acc_total, loss_total = 0., 0., 0. predictions = None while True: start, end = batch_index * batch_size, min( (batch_index + 1) * batch_size, num_samples) batch_index += 1 pred, cost, loss, acc, sim = test_model(start, end, 0) if predictions is None: predictions = sim else: predictions = numpy.concatenate((predictions, sim), axis=0) cost_total += cost * (end - start) acc_total += acc * (end - start) loss_total += loss * (end - start) if end >= num_samples - 1: break cost_total /= num_samples acc_total /= num_samples loss_total /= num_samples labels = numpy.argmax(Y_test, axis=1) roc_auc, pr_auc, top1_accu, top5_accu = get_metrics(predictions, labels) print '\t' + seen + '\tROC-AUC = %.4f\tPR-AUC = %.4f\tTop-1 Acc = %.4f\tTop-5 Acc = %.4f'\ % (roc_auc, pr_auc, top1_accu, top5_accu) writer.write( '\t' + seen + '\tROC-AUC = %.4f\tPR-AUC = %.4f\tTop-1 Acc = %.4f\tTop-5 Acc = %.4f\n' % (roc_auc, pr_auc, top1_accu, top5_accu)) return roc_auc, pr_auc, top1_accu, top5_accu
def main(): dots = "." * 6 print('loading dataset{}'.format(dots)) dataset = load_data(DATA_PATH) labels = load_labels(LABEL_PATH) train_text, test_text, train_labels, test_labels = train_test_split( dataset.text, labels.labels, test_size=0.2, random_state=40) print('generating embeddings{}'.format(dots)) print('calling vectorizer api{}'.format(dots)) matrix_embedding = np.zeros((len(dataset), 400)) for i in range(len(dataset)): text = dataset['text'][i] input = {'text': text} response = requests.get('http://vectorizer.host/embed', data=input) vector_embedding = json.loads(response.text) matrix_embedding[i] = vector_embedding embedding_size = matrix_embedding.shape[1] # specifying exact numbers now, need to convert to variables print('initializing model{}'.format(dots)) model = models.keras_model(embedding_size) categorical_labels = to_categorical(labels, num_classes=3) print('train test split') X_train, X_test, y_train, y_test = train_test_split(matrix_embedding, categorical_labels, test_size=0.2, random_state=40) # fit model print('fitting model{}'.format(dots)) model.fit(X_train, y_train, epochs=5, verbose=0) # evaluate model print('generating predictions{}'.format(dots)) y_predicted = model.predict_classes(X_test) y_string_predict = [] for prediction in y_predicted: if prediction == 0: y_string_predict.append('Negative') if prediction == 1: y_string_predict.append('Neutral') if prediction == 2: y_string_predict.append('Positive') accuracy, precision, recall, f1 = evaluate.get_metrics( test_labels, y_predicted) print('accuracy: {} precision: {} recall: {} f1: {}'.format( accuracy, precision, recall, f1)) print('plotting confusion matrix{}'.format(dots)) cm = confusion_matrix(test_labels, y_predicted) print('generated confusion matrix') print(cm) fig = plt.figure(figsize=(10, 10)) plot = evaluate.plot_confusion_matrix( cm, classes=['Negative', 'Neutral', 'Positive'], normalize=False, title='Confusion matrix') plt.savefig('tweet_sentiment_class_confusion_matrix.png') plt.close() print('generating output dataframe{}'.format(dots)) output_df = pd.DataFrame(test_text) output_df.columns = ['Test Tweets'] output_df['Target Sentiment'] = test_labels output_df['Predicted Sentiment'] = y_string_predict output_df.to_csv('tweet_sentiment_prediction_table.csv') return
def dev_prediction(): ''' 对于开发集的预测 ''' dataset = "wavs/dev" # default arguments sr = 22050 length = 256 hop_length = 64 label: dict = read_label_from_file(frame_size=length / sr, frame_shift=hop_length / sr) # a dict keys are file_path items are sequence of VAD aucs = [] eers = [] prec = [] pbar = tqdm(enumerate(list(label.keys()))) for i, path in pbar: label_y = label[path] wav_path = os.path.join(dataset, path + ".wav") data, sample_rate = librosa.load(wav_path, sr=sr) pred, weights,frame_time = pipeline(data,sample_rate,length,hop_length) # label未对最后一个有声时间段后进行标注,故补充 n = len(label_y) while (n < frame_time.shape[0]): label_y.append(0) n += 1 x, y = ROC(weights, label_y) pres = precise(pred,label_y) prec.append(pres) plt.plot(frame_time,pred,'r') plt.legend(["signal", "pred"]) # plt.show() plt.waitforbuttonpress(10) plt.close() # m = get_metrics(pred,label_y) auc, eer = get_metrics(weights, label_y) aucs.append(auc) eers.append(eer) pbar.set_postfix({f'auc of {path}': f"{auc:0.4f}", f"eer of {path}": f"{eer * 100:0.4f}%",f'precision of {path}': f"{pres*100:0.4f}%"}) #显示各个指标 print( f"average auc is {float(np.float32(aucs).mean()):0.4f} , average eer is {float(np.float32(eers).mean()) * 100:0.4f}% and average of precision is {float(np.float32(prec).mean())*100:.4f}%") plt.subplot(1, 3, 1) plt.plot(range(len(list(label.keys()))), aucs, '-') plt.title("auc") plt.subplot(1, 3, 2) plt.plot(range(len(list(label.keys()))), eers, '-') plt.title("eer") plt.subplot(1, 3, 3) plt.plot(range(len(list(label.keys()))), prec, '-') plt.title("precision") plt.show() plt.waitforbuttonpress(10) plt.close()
# cv2.waitKey(0) # cv2.imwrite('./results_imgs/{}'.format(img_name),img) except Exception as e: print("Error with ", img_name) print(e) if idx % 100 == 0: print("{} images done".format(idx)) print("Done with the predictions. Finding the stats now") if eval_files_list_path: with open(os.path.join(output_dir, "predictions.pkl"), "wb") as f: pickle.dump(predictions_dict, f) with open(bbox_dict_path, "rb") as f: bbox_coords_dict = pickle.load(f) tp, fp, fn, precision, recall, f_score = get_metrics(bbox_coords_dict, predictions_dict, threshold=0.1) result = "Precision: {}\nRecall: {}\nF_score: {}\nTP: {}, FP: {}, FN: {}".format( precision, recall, f_score, tp, fp, fn) print(result) with open(os.path.join(output_dir, "results.txt"), "w") as f: f.write(result) # print('Elapsed time = {}'.format(time.time() - st))
def main(): dots = "." * 6 print('loading dataset{}'.format(dots)) dataset = load_data(DATA_PATH) labels = dataset[0] train_text, test_text, train_labels, test_labels = train_test_split( dataset[5], labels, stratify=labels, test_size=0.2, random_state=40) print('generating embeddings{}'.format(dots)) print('calling vectorizer api{}'.format(dots)) matrix_embedding = np.zeros((len(dataset), 300)) for i in range(len(dataset)): text = dataset[5][i] input = {'text': text} response = requests.get('http://vectorizer.host/embed', data=input) vector_embedding = json.loads(response.text) vector_embedding = np.mean(vector_embedding, axis=0) matrix_embedding[i] = vector_embedding embedding_size = matrix_embedding.shape[1] # specifying exact numbers now, need to convert to variables print('initializing model{}'.format(dots)) model = models.keras_model(embedding_size) categorical_labels = to_categorical(labels, num_classes=2) print('train test split') X_train, X_test, y_train, y_test = train_test_split(matrix_embedding, categorical_labels, stratify=labels, test_size=0.2, random_state=40) # fit model print('fitting model{}'.format(dots)) csv_logger = CSVLogger('log.csv', append=True, separator=';') model.fit(X_train, y_train, epochs=5, callbacks=[csv_logger]) model.save('full_data_epoch5.h5') # evaluate model print('generating predictions{}'.format(dots)) y_predicted = model.predict_classes(X_test) accuracy, precision, recall, f1 = evaluate.get_metrics( test_labels, y_predicted) with open("metrics_output.txt", "w") as text_file: print('accuracy: {} precision: {} recall: {} f1: {}'.format( accuracy, precision, recall, f1), file=text_file) print('plotting confusion matrix{}'.format(dots)) cm = confusion_matrix(test_labels, y_predicted) print('generated confusion matrix') print(cm) fig = plt.figure(figsize=(10, 10)) plot = evaluate.plot_confusion_matrix(cm, classes=['Negative', 'Positive'], normalize=False, title='Confusion matrix') plt.savefig('tweet_sentiment_class_confusion_matrix_full_data.png') plt.close() return