Exemplos de evaluate em Python, exemplos de evaluation.evaluation_metrics.evaluate em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: evaluate_cbf.py Projeto: mokarakaya/python-recommender-systems

def evaluate_cbf_models(df, user_key, item_key, rating_key, df_items, dataset):
    train_df, test_df = train_test_split(df)
    test = sparse.coo_matrix(
        (test_df[rating_key], (test_df[user_key], test_df[item_key])))
    train = sparse.coo_matrix(
        (train_df[rating_key], (train_df[user_key], train_df[item_key])))

    models = get_models(df, user_key, item_key, rating_key)
    models = models[dataset]
    for key, model in models.items():
        begin_time = datetime.datetime.now()
        model.fit(train_df, df_items)
        results = evaluate(model, test, train=train)
        print(key)
        print(results)
        print('duration:', datetime.datetime.now() - begin_time)
        print()

Exemplo n.º 2

0

Exibir arquivo

        loss = loss_fn(rnn_tokenized_sentence,
                       target_caption)  # Teacher forced loss
        test_loss.append(loss.data.item())

        rnn_tokenized_sentence_prediction = rnn.sentence_index(
            cnn_feature, vocab)
        rnn_tokenized_sentence_prediction = rnn_tokenized_sentence_prediction.cpu(
        ).data.numpy()
        predicted_words = create_caption_word_format(
            rnn_tokenized_sentence_prediction, vocab, False)

        original_sentence_tokenized = caption.cpu().data.numpy()
        target_words = create_caption_word_format(original_sentence_tokenized,
                                                  vocab, True)

        eval_scores = evaluate(target_words, predicted_words)
        #*print('Target words shape: ' + str(caption.size()))
        #*print('Target words: ' + str(target_words))
        #*print('Predicted words are: ' + str(predicted_words))
        for imgs, tgt, pdt in zip(img_paths, target_words, predicted_words):
            if imgs in target_caption_full.keys():
                target_caption_full[imgs].extend(tgt)
                candidate_caption_full[imgs].extend([pdt])
            else:
                candidate_caption_full[imgs] = []
                target_caption_full[imgs] = tgt
                candidate_caption_full[imgs].append(pdt)

        sf = SmoothingFunction()
        #*bleu1_corpus.append(corpus_bleu(target_words, predicted_words, weights=(1, 0, 0, 0), smoothing_function=sf.method4))
        #*bleu4_corpus.append(corpus_bleu(target_words, predicted_words, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=sf.method4))

Exemplo n.º 3

0

Exibir arquivo

def test_model(cnn, rnn, optimizer, loss_function, data_loader, vocab, params, model_name_load, device, sub_batch_size=-1, beam_size=0):
	'''
	Function to test the model
	'''
	state_dict = torch.load(os.path.join(params['output_dir'], model_name_load + '.ckpt'), map_location=torch.device(device))
	cnn.load_state_dict(state_dict['encoder_state_dict'])
	rnn.load_state_dict(state_dict['decoder_state_dict'])
	optimizer.load_state_dict(state_dict['optimizer_state_dict'])
	print("Model loaded.")

	test_loss = []
	bleu1_corpus = []
	bleu2_corpus = []
	bleu3_corpus = []
	bleu4_corpus = []
	bleu1 = []
	bleu2 = []
	bleu3 = []
	bleu4 = []
	cider = []
	rouge = []
	target_caption_full = {}
	candidate_caption_full = {}

	if sub_batch_size == -1:
		sub_batch_size = len(data_loader)

	start_time = time.time()
	print('Testing started.')
	print("Sub-batch size - ", sub_batch_size)
	for idx, (img_paths, image, caption, caption_len) in enumerate(data_loader, start = 0):
		if idx == sub_batch_size:
			break
		if device == 'cpu':
			image = Variable(image).cpu()
			caption = Variable(caption).cpu()
		elif device == 'gpu':
			image = Variable(image).cuda()
			caption = Variable(caption).cuda()
		else:
			raise ValueError('Please specify a valid device from ["cpu", "gpu"].')
		target_caption = nn.utils.rnn.pack_padded_sequence(caption, caption_len, batch_first=True)[0]
		cnn_feature = cnn(image)
		rnn_tokenized_sentence = rnn(cnn_feature, caption, caption_len) 
		loss = loss_function(rnn_tokenized_sentence, target_caption) # Teacher forced loss
		test_loss.append(loss.data.item())

		rnn_tokenized_sentence_prediction = rnn.sentence_index(cnn_feature, beam_size)
		rnn_tokenized_sentence_prediction = rnn_tokenized_sentence_prediction.cpu().data.numpy()
		predicted_words = create_caption_word_format(rnn_tokenized_sentence_prediction, vocab, False)

		original_sentence_tokenized = caption.cpu().data.numpy()
		target_words = create_caption_word_format(original_sentence_tokenized, vocab, True)

		eval_scores = evaluate(target_words, predicted_words)
		for imgs, tgt, pdt in zip(img_paths, target_words, predicted_words):
			if imgs in target_caption_full.keys():
				target_caption_full[imgs].extend(tgt)
				candidate_caption_full[imgs].extend([pdt])
			else:
				candidate_caption_full[imgs] = []
				target_caption_full[imgs] = tgt
				candidate_caption_full[imgs].append(pdt)

		sf = SmoothingFunction()
		bleu1.append(eval_scores['Bleu_1'])
		bleu2.append(eval_scores['Bleu_2'])
		bleu3.append(eval_scores['Bleu_3'])
		bleu4.append(eval_scores['Bleu_4'])
		cider.append(eval_scores['CIDEr'])
		rouge.append(eval_scores['ROUGE_L'])

		if (idx + 1) % 100 == 0:
			print("Step %d - %0.4f test loss, %0.2f time, %.3f BLEU1, %.3f BLEU2, %.3f BLEU3, %.3f BLEU4, %.3f CIDEr, %.3f ROUGE_L." %(idx + 1, loss, time.time() - start_time, 
					np.mean(bleu1)*100.0, np.mean(bleu2)*100.0, np.mean(bleu3)*100.0, np.mean(bleu4)*100.0, np.mean(cider)*100.0, np.mean(rouge)*100.0))

	print("%0.4f test loss, %0.2f time, %.3f BLEU1, %.3f BLEU2, %.3f BLEU3, %.3f BLEU4, %.3f CIDEr, %.3f ROUGE_L." %(np.mean(test_loss), time.time() - start_time, 
					np.mean(bleu1)*100.0, np.mean(bleu2)*100.0, np.mean(bleu3)*100.0, np.mean(bleu4)*100.0, np.mean(cider)*100.0, np.mean(rouge)*100.0))
	# Save the outputs to file
	with open(os.path.join(params['output_dir'], 'Target_Words_Dict.pickle'), 'wb') as f:
		pickle.dump(target_caption_full, f)

	with open(os.path.join(params['output_dir'], 'Candidate_Words_Dict.pickle'), 'wb') as f:
		pickle.dump(candidate_caption_full, f)

	# ------ Evaluate the BLEU score -------- #
	for img_nm in target_caption_full.keys():
		b1, b2, b3, b4 = 0.0, 0.0, 0.0, 0.0
		for j in range(len(candidate_caption_full[img_nm])):
			b1 += corpus_bleu([target_caption_full[img_nm]] , [candidate_caption_full[img_nm][j]], weights=(1.0, 0.0, 0.0, 0.0), smoothing_function=sf.method4)
			b2 += corpus_bleu([target_caption_full[img_nm]] , [candidate_caption_full[img_nm][j]], weights=(0.5, 0.5, 0.0, 0.0), smoothing_function=sf.method4)
			b3 += corpus_bleu([target_caption_full[img_nm]] , [candidate_caption_full[img_nm][j]], weights=(0.34, 0.33, 0.33, 0.0), smoothing_function=sf.method4)
			b4 += corpus_bleu([target_caption_full[img_nm]] , [candidate_caption_full[img_nm][j]], weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=sf.method4)
		bleu1_corpus.append(b1/len(candidate_caption_full[img_nm]))
		bleu2_corpus.append(b2/len(candidate_caption_full[img_nm]))
		bleu3_corpus.append(b3/len(candidate_caption_full[img_nm]))
		bleu4_corpus.append(b4/len(candidate_caption_full[img_nm]))

	print("%0.4f test loss, %0.2f time, %.3f Final BLEU1, %.3f Final BLEU2, %.3f Final BLEU3, %.3f Final BLEU4" % (np.mean(test_loss), time.time() - start_time, 
					np.mean(np.array(bleu1_corpus))*100.0, np.mean(np.array(bleu2_corpus))*100.0, np.mean(np.array(bleu3_corpus))*100.0, np.mean(np.array(bleu4_corpus))*100.0))
	print('Testing completed.')