def train(): x_train, y_train, x_val, y_val = load_labelled.read_labelled(1) # x_train, y_train, x_val, y_val = load_labelled.read_labelled(1,'r') # x_train, y_train, x_val, y_val = load_labelled.read_labelled(1,'c') x_train = x_train.reshape(x_train.shape[0], -1) x_val = x_val.reshape(x_val.shape[0], -1) clf = svm.SVC(class_weight='balanced', kernel='sigmoid') clf.fit(x_train, y_train) evaluate.evaluate(clf, x_val, y_val)
def train(use_fisher): x, y = loader.load() if use_fisher: x = preprocessing.to_fisher(x) x_train, y_train, x_val, y_val = loader.split_data(x, y) x_train = x_train.reshape(x_train.shape[0], -1) x_val = x_val.reshape(x_val.shape[0], -1) clf = svm.SVC(class_weight='balanced') clf.fit(x_train, y_train) evaluate.evaluate(clf, x_val, y_val)
def train(args): x, y = loader.load() if args.model == 'mlp_fv': x = preprocessing.to_fisher(x) nb_samples, nb_landmarks, l = x.shape input_shape = (nb_landmarks, l) else: nb_samples, nb_frames, nb_landmarks, _ = x.shape input_shape = (nb_frames, nb_landmarks, 3) # x = loader.compact_frames(x, window_size=5, step_size=2) x_train, y_train, x_val, y_val = loader.split_data(x, y) model = get_model(args.model, input_shape=input_shape) print("Input shape: {}".format(x.shape)) print(model.summary()) model.compile(optimizer='adam', loss=losses.binary_crossentropy, metrics=['accuracy']) checkpointer = callbacks.ModelCheckpoint(filepath="data/weights.hdf5", verbose=1, save_best_only=True) early_stopping = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=2) class_weights = get_class_weights(y_train) model.fit(x_train, y_train, epochs=50, batch_size=8, validation_data=(x_val, y_val), callbacks=[ checkpointer, early_stopping, ], class_weight=class_weights, ) # Load best model model.load_weights('data/weights.hdf5') # Print evaluation matrix train_score = model.evaluate(x_train, y_train) val_score = model.evaluate(x_val, y_val) print(model.metrics_names, train_score, val_score) evaluate.evaluate(model, x_train, y_train) evaluate.evaluate(model, x_val, y_val)
def evaluate_retrieval(model, dataset, params, session): print('evaluating retrieval') print('computing vectors...') validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1)]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate((training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print('evaluating...') recall_values = [0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.2] results = e.evaluate(training_vectors, test_vectors, training_labels, test_labels, recall_values) for i, r in enumerate(recall_values): print('precision @ {}: {}'.format(r, results[i]))
def reload_evaluation_ir(params, training_vectors, validation_vectors, test_vectors, suffix=""): ### Information Retrieval dataset = data.Dataset(params['dataset']) log_dir = os.path.join(params['model'], 'logs') if not os.path.exists(log_dir): os.makedirs(log_dir) ir_ratio_list = [0.0001, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8, 1.0] #ir_ratio_list = [0.02] training_labels = np.array( [[y] for y, _ in dataset.rows('training_docnade', num_epochs=1)] ) test_labels = np.array( [[y] for y, _ in dataset.rows('test_docnade', num_epochs=1)] ) test_ir_list = eval.evaluate( training_vectors, test_vectors, training_labels, test_labels, recall=ir_ratio_list, num_classes=params['num_classes'], multi_label=params['multi_label'] ) # logging information with open(os.path.join(log_dir, "reload_info_ir.txt"), "a") as f: f.write("\n\nFractions list: %s" % (ir_ratio_list)) f.write("\nTest IR: %s" % (test_ir_list))
def evaluate(model, dataset, params): with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(params.model) saver.restore(session, ckpt.model_checkpoint_path) print('computing vectors...') validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1) ]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate( (training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print('evaluating...') print("TRAINING VECTORS") print(training_vectors[0]) print("TRAINING LABELS") print(training_labels) recall_values = [0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.2] results = e.evaluate(training_vectors, test_vectors, training_labels, test_labels, recall_values) for i, r in enumerate(recall_values): print('precision @ {}: {}'.format(r, results[i]))
N_EPOCHS = 10 CLIP = 1 best_valid_loss = float('inf') for epoch in range(N_EPOCHS): start_time = time.time() train_loss, _ = train(model, train_loader, optimizer, criterion, CLIP) print(train_loss, _) exit() valid_loss = evaluate(model, valid_loader, criterion) print(train_loss, valid_loss) exit() end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), 'logs/tut1-model.pt') print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s') print( f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
def train(model, dataset, params): log_dir = os.path.join(params.model, 'logs') model_dir = os.path.join(params.model, 'model') with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: avg_loss = tf.placeholder(tf.float32, [], 'loss_ph') tf.summary.scalar('loss', avg_loss) validation = tf.placeholder(tf.float32, [], 'validation_ph') tf.summary.scalar('validation', validation) summary_writer = tf.summary.FileWriter(log_dir, session.graph) summaries = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables()) tf.local_variables_initializer().run() tf.global_variables_initializer().run() losses = [] # This currently streams from disk. You set num_epochs=1 and # wrap this call with something like itertools.cycle to keep # this data in memory. training_data = dataset.batches('training', params.batch_size) best_val = 0.0 training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) for step in range(params.num_steps + 1): _, x, seq_lengths = next(training_data) _, loss = session.run([model.opt, model.opt_loss], feed_dict={ model.x: x, model.seq_lengths: seq_lengths }) losses.append(loss) if step % params.log_every == 0: print('{}: {:.6f}'.format(step, loss)) if step and (step % params.save_every) == 0: validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) val = e.evaluate(training_vectors, validation_vectors, training_labels, validation_labels)[0] print('validation: {:.3f} (best: {:.3f})'.format( val, best_val or 0.0)) if val > best_val: best_val = val print('saving: {}'.format(model_dir)) saver.save(session, model_dir, global_step=step) summary, = session.run( [summaries], feed_dict={ model.x: x, model.seq_lengths: seq_lengths, validation: val, avg_loss: np.average(losses) }) summary_writer.add_summary(summary, step) summary_writer.flush() losses = []
} } if not entry['pred_success']: pred_failures += 1 else: seq_embeddings = encoder_use( [entry['final_adv'], entry['text']]) semantic_sim = np.dot(*seq_embeddings) new_entry['semantic_sim'] = float(semantic_sim) adv_examples.append({k: entry[k] for k in {'label', 'text'}}) #json.dump(new_entry, open(output_pth, "a"), indent=2) output_entries.append(new_entry) num_succeeded = sum( [output_entry['success'] for output_entry in output_entries]) total_num = len(output_entries) num_failed = total_num - num_succeeded - pred_failures desc = f"[Succeeded / Failed / Total] {num_succeeded} / {num_failed} / {total_num}" progressbar.set_description(desc=desc, refresh=True) if (i + 1) % 100 == 0: evaluate(output_entries, pred_failures, eval_pth, params) json.dump(output_entries, open(output_pth, "w"), indent=2) json.dump(adv_examples, open(adv_set_pth, "w"), indent=2) print("--- %.2f mins ---" % (int(time.time() - start_time) / 60.0)) evaluate(output_entries, pred_failures, eval_pth, params)
def pretrain(self, dataset, params, session, #training_epochs=1000, alternate_epochs=10): #training_epochs=100, alternate_epochs=10): training_epochs=20, alternate_epochs=10): #training_epochs=1, alternate_epochs=1): log_dir = os.path.join(params.model, 'logs_nvdm_pretrain') model_dir_ir_nvdm = os.path.join(params.model, 'model_ir_nvdm_pretrain') model_dir_ppl_nvdm = os.path.join(params.model, 'model_ppl_nvdm_pretrain') #model_dir_supervised = os.path.join(params.model, 'model_supervised_nvdm_pretrain') if not os.path.isdir(log_dir): os.mkdir(log_dir) if not os.path.isdir(model_dir_ir_nvdm): os.mkdir(model_dir_ir_nvdm) if not os.path.isdir(model_dir_ppl_nvdm): os.mkdir(model_dir_ppl_nvdm) #if not os.path.isdir(model_dir_supervised): # os.mkdir(model_dir_supervised) train_url = os.path.join(params.dataset, 'training_nvdm_docs_non_replicated.csv') dev_url = os.path.join(params.dataset, 'validation_nvdm_docs_non_replicated.csv') test_url = os.path.join(params.dataset, 'test_nvdm_docs_non_replicated.csv') train_set, train_count = utils.data_set(train_url) test_set, test_count = utils.data_set(test_url) dev_set, dev_count = utils.data_set(dev_url) #dev_batches = utils.create_batches(len(dev_set), self.batch_size, shuffle=False) dev_batches = utils.create_batches(len(dev_set), 512, shuffle=False) #test_batches = utils.create_batches(len(test_set), self.batch_size, shuffle=False) test_batches = utils.create_batches(len(test_set), 512, shuffle=False) training_labels = np.array( [[y] for y, _ in dataset.rows('training_nvdm_docs_non_replicated', num_epochs=1)] ) validation_labels = np.array( [[y] for y, _ in dataset.rows('validation_nvdm_docs_non_replicated', num_epochs=1)] ) test_labels = np.array( [[y] for y, _ in dataset.rows('test_nvdm_docs_non_replicated', num_epochs=1)] ) patience = params.pretrain_patience patience_count = 0 best_dev_ppl = np.inf best_test_ppl = np.inf best_val_nvdm_IR = -1.0 best_test_nvdm_IR = -1.0 enc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='TM_encoder') dec_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='TM_decoder') pretrain_saver = tf.train.Saver(enc_vars + dec_vars) ppl_model = False ir_model = False for epoch in range(training_epochs): epoch_counter = epoch + 1 #train_batches = utils.create_batches(len(train_set), self.batch_size, shuffle=True) train_batches = utils.create_batches(len(train_set), 512, shuffle=True) #------------------------------- # train for switch in range(0, 2): if switch == 0: optim = self.optim_dec print_mode = 'updating decoder' else: optim = self.optim_enc print_mode = 'updating encoder' for i in range(alternate_epochs): print_ppx, print_ppx_perdoc, print_kld = self.run_epoch( train_batches, train_set, train_count, params, session, optimizer=optim ) print('| Epoch train: {:d} |'.format(epoch_counter), print_mode, '{:d}'.format(i), '| Corpus Perplexity: {:.5f}'.format(print_ppx), # perplexity for all docs '| Per doc Perplexity: {:.5f}'.format(print_ppx_perdoc), # perplexity for per doc '| KLD: {:.5}'.format(print_kld)) if epoch_counter >= 1 and epoch_counter % params.nvdm_validation_ppl_freq == 0: ppl_model = True print_ppx, print_ppx_perdoc, print_kld = self.run_epoch( dev_batches, dev_set, dev_count, params, session ) if print_ppx_perdoc < best_dev_ppl: best_dev_ppl = print_ppx_perdoc print("Saving best model.") pretrain_saver.save(session, model_dir_ppl_nvdm + '/model_ppl_nvdm_pretrain', global_step=1) patience_count = 0 else: patience_count += 1 print('| Epoch dev: {:d} |'.format(epoch_counter), '| Corpus Perplexity: {:.9f} |'.format(print_ppx), '| Per doc Perplexity: {:.5f} |'.format(print_ppx_perdoc), '| KLD: {:.5} |'.format(print_kld), '| Best dev PPL: {:.5} |'.format(best_dev_ppl)) with open(log_dir + "/logs_ppl_nvdm_pretrain.txt", "a") as f: f.write('| Epoch Val: {:d} || Val Corpus PPL: {:.9f} || Val Per doc PPL: {:.5f} || Best Val PPL: {:.5} || KLD Val: {:.5} |\n'.format(epoch+1, print_ppx, print_ppx_perdoc, best_dev_ppl, print_kld)) if epoch_counter >= 1 and epoch_counter % params.nvdm_validation_ir_freq == 0: ir_model = True validation_vectors_nvdm = self.hidden_vectors( dataset.batches_nvdm_LM('validation_nvdm_docs_non_replicated', params.batch_size, params.TM_vocab_length, num_epochs=1, multilabel=params.multi_label), params, session ) training_vectors_nvdm = self.hidden_vectors( dataset.batches_nvdm_LM('training_nvdm_docs_non_replicated', params.batch_size, params.TM_vocab_length, num_epochs=1, multilabel=params.multi_label), params, session ) val_nvdm_ir, _ = eval.evaluate( training_vectors_nvdm, validation_vectors_nvdm, training_labels, validation_labels, recall=[0.02], num_classes=params.num_classes, multi_label=params.multi_label ) val_nvdm_ir = val_nvdm_ir[0] # Saving model and Early stopping on IR if val_nvdm_ir > best_val_nvdm_IR: best_val_nvdm_IR = val_nvdm_ir print('saving: {}'.format(model_dir_ir_nvdm)) pretrain_saver.save(session, model_dir_ir_nvdm + '/model_ir_nvdm_pretrain', global_step=1) # patience_count = 0 #else: # patience_count += 1 print("Epoch: %i, Val NVDM IR: %s, best val NVDM IR: %s\n" % (epoch_counter, val_nvdm_ir, best_val_nvdm_IR)) # logging information with open(log_dir + "/logs_ir_nvdm_pretrain.txt", "a") as f: f.write("Epoch: %i, Val NVDM IR: %s, best val NVDM IR: %s\n" % (epoch_counter, val_nvdm_ir, best_val_nvdm_IR)) if patience_count > patience: print("Early stopping.") break if ppl_model: print("Calculating Test PPL.") pretrain_saver.restore(session, tf.train.latest_checkpoint(model_dir_ppl_nvdm)) print_ppx, print_ppx_perdoc, print_kld = self.run_epoch( test_batches, test_set, test_count, params, session ) print('| Corpus Perplexity: {:.9f}'.format(print_ppx), '| Per doc Perplexity: {:.5f}'.format(print_ppx_perdoc), '| KLD: {:.5}'.format(print_kld)) with open(log_dir + "/logs_ppl_nvdm_pretrain.txt", "a") as f: f.write('\n\nTest Corpus PPL: {:.9f} || Test Per doc PPL: {:.5f} || KLD Test: {:.5} |\n'.format(print_ppx, print_ppx_perdoc, print_kld)) if ir_model: print("Calculating Test IR.") pretrain_saver.restore(session, tf.train.latest_checkpoint(model_dir_ir_nvdm)) test_vectors_nvdm = self.hidden_vectors( dataset.batches_nvdm_LM('test_nvdm_docs_non_replicated', params.batch_size, params.TM_vocab_length, num_epochs=1, multilabel=params.multi_label), params, session ) test_nvdm_ir, _ = eval.evaluate( training_vectors_nvdm, test_vectors_nvdm, training_labels, test_labels, recall=[0.02], num_classes=params.num_classes, multi_label=params.multi_label ) test_nvdm_ir = test_nvdm_ir[0] print("Epoch: %i, Test NVDM IR: %s\n" % (epoch_counter, test_nvdm_ir)) # logging information with open(log_dir + "/logs_ir_nvdm_pretrain.txt", "a") as f: f.write("Epoch: %i, Test NVDM IR: %s\n" % (epoch_counter, test_nvdm_ir))
##################################################################################### if opt.do_validation and iteration_number % opt.evaluation_frequency == 0: model.eval() eval_start = timer() # Compute train and validation metrics. num_samples = opt.num_samples_eval num_eval_batches = math.ceil( num_samples / opt.batch_size ) # We evaluate on approximately 1000 samples. print() print("Train evaluation") train_losses, train_metrics = evaluate.evaluate( model, criterion, train_dataloader, num_eval_batches, "train") print() print("Val evaluation") val_losses, val_metrics = evaluate.evaluate( model, criterion, val_dataloader, num_eval_batches, "val") train_writer.add_scalar('Loss/Loss', train_losses["total"], iteration_number) train_writer.add_scalar('Loss/Flow', train_losses["flow"], iteration_number) train_writer.add_scalar('Loss/Graph', train_losses["graph"], iteration_number)
def train(model, dataset, params): log_dir = os.path.join(params.model, 'logs') model_dir = os.path.join(params.model, 'model') with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: avg_d_loss = tf.placeholder(tf.float32, [], 'd_loss_ph') tf.summary.scalar('d_loss', avg_d_loss) avg_g_loss = tf.placeholder(tf.float32, [], 'g_loss_ph') tf.summary.scalar('g_loss', avg_g_loss) validation = tf.placeholder(tf.float32, [], 'validation_ph') tf.summary.scalar('validation', validation) avg_al_loss = tf.placeholder(tf.float32, [], 'al_loss_ph') tf.summary.scalar('al_loss', avg_al_loss) summary_writer = tf.summary.FileWriter(log_dir, session.graph) summaries = tf.summary.merge_all() saver = tf.train.Saver(tf.global_variables()) tf.local_variables_initializer().run() tf.global_variables_initializer().run() d_losses = [] g_losses = [] al_g_losses = [] df_disc_losses = pd.DataFrame() df_gen_losses = pd.DataFrame() df_losses = pd.DataFrame() df_val_scores = pd.DataFrame() df_al_losses = pd.DataFrame() training_data = dataset.batches('training', params.batch_size) best_val = 0.0 training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) for step in range(params.num_steps + 1): _, x = next(training_data) ###### update discriminator d_loss_step = update_disc(model, x, model.D_solver, model.D_loss, params, session) d_losses.append(d_loss_step) df_disc_losses = df_disc_losses.append( { 'step': step, 'disc_loss': d_loss_step }, ignore_index=True) ###### update generator g_loss_list = [] for i in range(0, params.num_gen): g_loss_i, al_loss = update_gen(model, x, model.G_solver[i], model.Gen_loss[i], model.Al_solver, model.Al_gen_loss, params, session) g_loss_list.append(g_loss_i) al_g_losses.append(al_loss) df_al_losses = df_al_losses.append( { 'step': step, 'alpha_gen_loss': al_loss }, ignore_index=True) df_gen_losses = df_gen_losses.append( { 'step': step, 'g_0_loss': g_loss_list[0], 'g_1_loss': g_loss_list[1], 'g_2_loss': g_loss_list[2], 'g_3_loss': g_loss_list[3], 'g_4_loss': g_loss_list[4] }, ignore_index=True) g_losses.append(g_loss_list) ###### print discriminator and generators losses if step % params.log_every == 0: text = '{}: {:.6f} \t' g_losses_print = g_losses print(text.format( step, d_losses[-1], ), g_losses_print[-1], al_g_losses[-1]) ###### print best validation scores if step and (step % params.save_every) == 0: validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) val = e.evaluate(training_vectors, validation_vectors, training_labels, validation_labels)[0] print('validation: {:.3f} (best: {:.3f})'.format( val, best_val or 0.0)) df_val_scores = df_val_scores.append( { 'step': step, 'val_score': val, 'best_val_score': best_val }, ignore_index=True) if val > best_val: best_val = val print('saving: {}'.format(model_dir)) saver.save(session, model_dir, global_step=step) summary, = session.run( [summaries], feed_dict={ model.x: x, model.z: np.random.normal(0, 1, (params.batch_size, params.z_dim)), validation: val, avg_d_loss: np.average(d_losses), avg_al_loss: np.average(al_g_losses), avg_g_loss: np.average(g_losses) }) summary_writer.add_summary(summary, step) summary_writer.flush() d_losses = [] g_losses = [] ###### Store and plot discriminator and generator losses df_losses = df_disc_losses.join(df_gen_losses.set_index('step'), on='step', how='inner') df_losses = df_losses.join(df_al_losses.set_index('step'), on='step', how='inner') df_losses.to_csv(log_dir + '_gen_disc_losses.csv', index=False) df_val_scores.to_csv(log_dir + '_val_scores.csv', index=False) img_loss = df_losses.plot( x='step', y=['disc_loss', 'alpha_gen_loss'], kind='line', title='Trainings Losses: Discriminator & Generator', xlabel="Step", ylabel="Loss").get_figure() img_loss.savefig(log_dir + '_disc_gen_loss.png')
from model.train import train from model.unet import UNet from model.dataloader import testloader from model.evaluate import evaluate import torch COLAB = True BATCH_SIZE = 1 PATH = 'unet_augment.pt' # PATH = '../drive/My Drive/Colab Notebooks/im2height.pt' test_loader = testloader(colab=COLAB, batch_size=BATCH_SIZE) net = UNet() net.load_state_dict(torch.load(PATH)) if torch.cuda.is_available(): net.cuda() criterion = torch.nn.L1Loss() evaluate(net, test_loader, criterion=criterion)
def evaluate(model, dataset, params): log_dir = os.path.join(params.model, 'logs') with tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=params.num_cores, intra_op_parallelism_threads=params.num_cores, gpu_options=tf.GPUOptions(allow_growth=True))) as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(params.model) saver.restore(session, ckpt.model_checkpoint_path) print('computing vectors...') recall_values = [ 0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0 ] validation_labels = np.array( [[y] for y, _ in dataset.rows('validation', num_epochs=1)]) training_labels = np.array( [[y] for y, _ in dataset.rows('training', num_epochs=1)]) training_labels = np.concatenate((training_labels, validation_labels), 0) test_labels = np.array([[y] for y, _ in dataset.rows('test', num_epochs=1) ]) validation_vectors = m.vectors( model, dataset.batches('validation', params.batch_size, num_epochs=1), session) training_vectors = m.vectors( model, dataset.batches('training', params.batch_size, num_epochs=1), session) training_vectors = np.concatenate( (training_vectors, validation_vectors), 0) test_vectors = m.vectors( model, dataset.batches('test', params.batch_size, num_epochs=1), session) print('evaluating...') results = e.evaluate(training_vectors, test_vectors, training_labels, test_labels, recall_values) df_precision_recall = pd.DataFrame(list(zip(recall_values, results)), columns=['recall', 'precision']) for i, r in enumerate(recall_values): print('precision @ {}: {}'.format(r, results[i])) ###### Plot precision-recall values df_precision_recall.to_csv(log_dir + '_precision_recall_values.csv', index=False) img_precision_recall = df_precision_recall.plot( x='recall', y='precision', kind='line', title='Precision vs Recall', xlabel="Recall", ylabel="Precision").get_figure() img_precision_recall.savefig(log_dir + '_precision_recall.png')