Пример #1
0
def get_all_features_ref(reference_answers, student_answer, student_answer_tokens):
	""" Bleu Similarity"""
	bleu_sim = ex.get_bleu_similarity(reference_answers, student_answer)
	""" Cosine distance """
	max_cosine_sim = get_cosine_similarity(reference_answers[0],student_answer)
	for i in range(1,len(reference_answers)):
		cosine_sim = get_cosine_similarity(reference_answers[i],student_answer)
		if cosine_sim > max_cosine_sim :
			max_cosine_sim = cosine_sim
	features = [max_cosine_sim, bleu_sim]
	""" Wu Palmer and other similarity measures used for text Similarity """
	reference_answers_tokens = []
	for answer in reference_answers:
		reference_answer_tokens = word_tokenize(answer)
		reference_answer_tokens = nltk.pos_tag(reference_answer_tokens)
		reference_answer_tokens = [x for x in reference_answer_tokens if x[0] not in EN_STOPWORDS]
		reference_answers_tokens.append(reference_answer_tokens)
	for metric in metrics:
		max_sim = ex.get_text_similarity(reference_answers_tokens[0],student_answer_tokens, metric)
		for i in range(1,len(reference_answers)):
			sim = ex.get_text_similarity(reference_answers_tokens[i],student_answer_tokens, metric)
			if sim > max_sim :
				max_sim = sim
		features.append(max_sim)
	features.append(vec_similarity_ref_answers(reference_answers, student_answer))
	return features
Пример #2
0
def get_all_features_question(question, student_answer, student_answer_tokens):
	cosine_sim = get_cosine_similarity(question,student_answer)
	bleu_sim = ex.get_bleu_similarity([question], student_answer)
	features = [cosine_sim, bleu_sim]

	question_tokens = word_tokenize(question)
	question_tokens = nltk.pos_tag(question_tokens)
	question_tokens = [x for x in question_tokens if x[0] not in EN_STOPWORDS]
	for metric in metrics:
		sim = ex.get_text_similarity(question_tokens,student_answer_tokens,metric)
		features.append(sim)
	features.append(vec_similarity_sentences(question, student_answer))
	return features