def get_all_features_ref(reference_answers, student_answer, student_answer_tokens): """ Bleu Similarity""" bleu_sim = ex.get_bleu_similarity(reference_answers, student_answer) """ Cosine distance """ max_cosine_sim = get_cosine_similarity(reference_answers[0],student_answer) for i in range(1,len(reference_answers)): cosine_sim = get_cosine_similarity(reference_answers[i],student_answer) if cosine_sim > max_cosine_sim : max_cosine_sim = cosine_sim features = [max_cosine_sim, bleu_sim] """ Wu Palmer and other similarity measures used for text Similarity """ reference_answers_tokens = [] for answer in reference_answers: reference_answer_tokens = word_tokenize(answer) reference_answer_tokens = nltk.pos_tag(reference_answer_tokens) reference_answer_tokens = [x for x in reference_answer_tokens if x[0] not in EN_STOPWORDS] reference_answers_tokens.append(reference_answer_tokens) for metric in metrics: max_sim = ex.get_text_similarity(reference_answers_tokens[0],student_answer_tokens, metric) for i in range(1,len(reference_answers)): sim = ex.get_text_similarity(reference_answers_tokens[i],student_answer_tokens, metric) if sim > max_sim : max_sim = sim features.append(max_sim) features.append(vec_similarity_ref_answers(reference_answers, student_answer)) return features
def get_all_features_question(question, student_answer, student_answer_tokens): cosine_sim = get_cosine_similarity(question,student_answer) bleu_sim = ex.get_bleu_similarity([question], student_answer) features = [cosine_sim, bleu_sim] question_tokens = word_tokenize(question) question_tokens = nltk.pos_tag(question_tokens) question_tokens = [x for x in question_tokens if x[0] not in EN_STOPWORDS] for metric in metrics: sim = ex.get_text_similarity(question_tokens,student_answer_tokens,metric) features.append(sim) features.append(vec_similarity_sentences(question, student_answer)) return features