Пример #1
0
def testing(request, type_id):
    type = QuestionType.objects.get(pk=type_id)

    import codecs
    #
    module_dir = os.path.dirname(__file__)  # get current directory
    file_path = os.path.join(module_dir, 'results/results_type_'+type_id+'.txt')
    output_file = codecs.open(file_path, 'a', encoding="utf-8")
    type_str = "\n \n" + type.QuestionType + "\n \n"
    output_file.writelines(type_str)

    questions_ids = [q.id for q in Question.objects.filter(QuestionTypeID=type_id)]
    # print questions
    for question_id in questions_ids:
        question = Question.objects.get(pk=question_id)
        comprehension = Comprehension.objects.get(pk=question.Comprehension_id)
        # print comprehension
        # Split Question tags string into individual tags
        tag_parts = question.QuestionTagsOnly.split('\\')

        # Fetch compound words from the dictionary and count their matches against question words
        compound_words = Dictionary.objects.filter(CompoundWord=True)
        question_words = question.QuestionText.split(' ')
        question_compound_words_count = len([match for match in question_words if match in compound_words])

        question_word_tags = [{'word': w, 'tag': t} for w, t in zip(question_words, tag_parts) if t != "RD_PUNC"]

        all_named_entities = [dict.Word for dict in Dictionary.objects.filter(NamedEntity=True)]
        question_named_entities = [m for m in question_words if m in all_named_entities]

        question_named_entity_types = [ne.name for ne in NamedEntityType.objects.filter(
                dictionary__Word__in=question_named_entities).distinct()]

        question_bag = {'question_word_tags': question_word_tags, 'punctuation_index': 1,
                            'question_compound_words_count': question_compound_words_count,
                            'question_named_entity_types': question_named_entity_types,
                            'question_named_entities': question_named_entities}

        cog_bag = []
        import sentence_formulation as sf
        sentences = sf.formulate(question.QuestionText, comprehension.ComprehensionsText)

        probable_answers = []
        answers_cog_list = []
        for sentence_index, sentence in enumerate(sentences['sentences']):
            words = sentence.split()
            sentence_words = sentence.split()
            # print sentence_words
            sentence_compound_words_count = len([match for match in sentence_words if match in compound_words])
            sentence_named_entities = [m for m in sentence_words if m in all_named_entities]
            sentence_named_entity_types = [ne.name for ne in NamedEntityType.objects.filter(
                    dictionary__Word__in=sentence_named_entities).distinct()]

            del sentence_words[-1]
            # punctuations = [iter + 1 for iter, punc in enumerate(sentence_words) if punc == u'|']
            # print punctuations
            # punctuation_index = punctuations[-1]

            sentence_word_tags = []
            for word in sentence_words:
                obj = Dictionary.objects.filter(Word=word).first()
                if obj != None:
                    sentence_word_tags.append({'word': obj.Word, 'tag': obj.WordType})

            sentence_bag = {'sentence_word_tags': sentence_word_tags, 'punctuation_index': 1,
                                'sentence_compound_words_count': sentence_compound_words_count,
                                'sentence_named_entity_types': sentence_named_entity_types,
                                'sentence_named_entities': sentence_named_entities}
            cog = cogm.calculate_cogm(question_bag, sentence_bag)
            distance_similarity = cogm.euclidean_distance_similarity(cog['sentence_cog']['cogX'], cog['sentence_cog']['cogY'],
                                                             cog['question_cog']['cogX'], cog['question_cog']['cogY'])
            cog['euclidean_distance'] = distance_similarity['euclidean_distance']
            cog['similarity'] = distance_similarity['similarity']
            cog['bonus_value'] = cog['sentence_cog']['bonus_value']
            cog['question_cog']['question'] = question.QuestionText
            cog['sentence_cog']['sentence'] = sentence
            cog['matched_bigrams'] = cog['sentence_cog']['matched_bigrams']

            cog_bag.append(cog)

        cog_bag = sorted(cog_bag, key=itemgetter('similarity'), reverse=True)

        try:
            if len(cog_bag) > 0:
                output = re.sub(r'([\'\"\,])', '', cog_bag[0]['question_cog']['question']) + ", (" + \
                        str(cog_bag[0]['question_cog']['X1']) + " " + \
                        str(cog_bag[0]['question_cog']['Y1']) + ") " + "(" + str(cog_bag[0]['question_cog']['X2']) + " " + \
                        str(cog_bag[0]['question_cog']['Y2']) + ") " + "(" + str(cog_bag[0]['question_cog']['X3']) + " " + \
                        str(cog_bag[0]['question_cog']['Y3']) + "), " + str(cog_bag[0]['question_cog']['lexical_density']) + \
                        ", " + str(cog_bag[0]['question_cog']['readability_index']) + \
                        str(cog_bag[0]['question_cog']['q_lex']) + ", (" + str(cog_bag[0]['sentence_cog']['cogX']) + \
                        " " + str(cog_bag[0]['question_cog']['cogY']) + "), " + str(cog_bag[0]['euclidean_distance']) + ", " + \
                        str(cog_bag[0]['similarity']), "\n"
                output += re.sub(r'([\'\"\,])', '', cog_bag[0]['sentence_cog']['sentence']) + ", (" + \
                        str(cog_bag[0]['sentence_cog']['X1']) + " " + \
                        str(cog_bag[0]['sentence_cog']['Y1']) + ") " + "(" + str(cog_bag[0]['sentence_cog']['X2']) + " " + \
                        str(cog_bag[0]['sentence_cog']['Y2']) + ") " + "(" + str(cog_bag[0]['sentence_cog']['X3']) + " " + \
                        str(cog_bag[0]['sentence_cog']['Y3']) + "), " + str(cog_bag[0]['sentence_cog']['lexical_density']) + \
                        ", " + str(cog_bag[0]['sentence_cog']['readability_index']) + \
                        str(cog_bag[0]['sentence_cog']['s_lex']) + ", (" + str(cog_bag[0]['sentence_cog']['cogX']) + \
                        " " + str(cog_bag[0]['sentence_cog']['cogY']) + "), " + str(cog_bag[0]['euclidean_distance']) + ", " + \
                        str(cog_bag[0]['similarity']), "\n"
                output_file.writelines(output)
        except (IndexError, ValueError):
            pass
        # print output_list
    output_file.close()


    # types = QuestionType.objects.annotate(count=Count('questiontypes')).order_by('-count').values('id', 'count')
    return render(request, 'qa/testing.html', {})
Пример #2
0
def questiondetail(request, question_id):
    try:
        from nltk.corpus import stopwords

        question = Question.objects.get(pk=question_id)
        question_types = question.QuestionTypeID.all()
        comprehension = Comprehension.objects.get(pk=question.Comprehension_id)

        # Split Question tags string into individual tags
        tag_parts = question.QuestionTagsOnly.split('\\')

        # Find Punctuation Index (W3)
        # punctuations = [i+1 for i, x in enumerate(tag_parts) if x == 'RD_PUNC']
        # punctuation_index = punctuations[-1]

        #Fetch compound words from the dictionary and count their matches against question words
        compound_words = Dictionary.objects.filter(CompoundWord=True)
        question_words = question.QuestionText.split(' ')
        question_compound_words_count = len([match for match in question_words if match in compound_words])

        question_word_tags = [{'word':w, 'tag':t}  for w,t in zip(question_words, tag_parts) if t != "RD_PUNC"]

        all_named_entities = [dict.Word for dict in Dictionary.objects.filter(NamedEntity=True)]
        question_named_entities = [m for m in question_words if m in all_named_entities]

        question_named_entity_types = [ne.name for ne in NamedEntityType.objects.filter(dictionary__Word__in=question_named_entities).distinct()]

        question_bag = {'question_word_tags': question_word_tags, 'punctuation_index': 1,
                        'question_compound_words_count': question_compound_words_count,
                        'question_named_entity_types': question_named_entity_types,
                        'question_named_entities': question_named_entities }

        # Break paragraph into sentences
        # sentences = [s.strip() + " |" for s in comprehension.ComprehensionsText.split('|')]
        # del sentences[-1]
        cog_bag = []

        import sentence_formulation as sf

        # sentences = st.who_logic(question.QuestionText, comprehension.ComprehensionsText)
        sentences = sf.formulate(question.QuestionText, comprehension.ComprehensionsText)

        # print sentences
        probable_answers = []
        answers_cog_list = []
        for sentence_index, sentence in enumerate(sentences['sentences']):

            words = sentence.split()

            # sentence_words = [word for word in words if word not in stopwords.words('punjabi')]
            sentence_words = sentence.split()
            # print ",".join(sentence_words)

            sentence_compound_words_count = len([match for match in sentence_words if match in compound_words])
            sentence_named_entities = [m for m in sentence_words if m in all_named_entities]
            sentence_named_entity_types = [ne.name for ne in NamedEntityType.objects.filter(dictionary__Word__in=sentence_named_entities).distinct()]

            # # Find Punctuation Index (W3)
            # punctuations = [iter + 1 for iter, punc in enumerate(sentence_words) if punc == '|']
            # punctuation_index = punctuations[-1]

            del sentence_words[-1]

            # Split Question tags string into individual tags
            sentence_word_tags = []
            for word in sentence_words:
                obj = Dictionary.objects.filter(Word=word).first()
                if obj != None:
                    sentence_word_tags.append({'word': obj.Word, 'tag': obj.WordType})

            sentence_bag = {'sentence_word_tags': sentence_word_tags, 'punctuation_index': 1,
                                'sentence_compound_words_count': sentence_compound_words_count,
                                'sentence_named_entity_types': sentence_named_entity_types,
                                'sentence_named_entities': sentence_named_entities}
            # sentence_tags = []
            # for part in tag_parts:
            #     for p in part:
            #         sentence_tags.append(p)
            #
            # sentence_word_tags = [{'word': w, 'tag': t} for w, t in zip(sentence_words, sentence_tags)]
            # for t in sentence_word_tags:
            #     print t['word'], t['tag']
            #sentence_tags.append('RD_PUNC')


            #print sentence_compound_words_count
            # bonus_value = bonus.get_bonus(question_words, question_named_entity_types, sentence_words,sentence_named_entity_types, sentence_tags)

            # sentence_cog = cogm.calculate_cogm(sentence_words, bonus_value, sentence_compound_words_count, sentence_named_entity_types, sentence_tags, sentence_named_entities)
            cog =  cogm.calculate_cogm(question_bag, sentence_bag)
            distance_similarity = cogm.euclidean_distance_similarity(cog['sentence_cog']['cogX'], cog['sentence_cog']['cogY'],
                                                         cog['question_cog']['cogX'], cog['question_cog']['cogY'])
            cog['euclidean_distance'] = distance_similarity['euclidean_distance']
            cog['similarity'] = distance_similarity['similarity']
            cog['bonus_value'] = cog['sentence_cog']['bonus_value']
            cog['question_cog']['question'] = question.QuestionText
            cog['sentence_cog']['sentence'] = sentence
            cog['matched_bigrams'] = cog['sentence_cog']['matched_bigrams']


            # cog['question_bonus_distance'] = cogm.euclidean_distance(cog['question_cog']['bonus_value']['bonus_x'], cog['question_cog']['bonus_value']['bonus_y'],
            #                                              cog['question_cog']['cogX'], cog['question_cog']['cogY'])
            # cog['sentence_bonus_distance'] = cogm.euclidean_distance(cog['sentence_cog']['bonus_value']['bonus_x'],
            #                                                          cog['sentence_cog']['bonus_value']['bonus_y'],
            #                                                          cog['sentence_cog']['cogX'],
            #                                                          cog['sentence_cog']['cogY'])
            # import math
            # cog['inter_bonus_distance'] = math.sqrt(math.pow(cog['question_bonus_distance'] - cog['sentence_bonus_distance'], 2))


            # probable_answer = {'sentence': sentence, 'sentence_tags': sentence_tags,'cogX': sentence_cog['cogX'], 'cogY': sentence_cog['cogY'], 'cog': [sentence_cog['cogX'], sentence_cog['cogY']],
            #                    'sentence_index': sentence_index + 1, 'lexical_density': sentence_cog['lexical_density'],
            #                    'readability_index': sentence_cog['readability_index'], 'punctuation_index': punctuation_index, 'euclidean_distance': euclidean_distance, 'bonus': bonus_value, 'sentence_cog': sentence_cog }
            # #print probable_answer
            # answers_cog_list.append([sentence_cog['cogX'],sentence_cog['cogY']])
            #print sentence
            cog_bag.append(cog)
            # print cog
        cog_bag = sorted(cog_bag, key=itemgetter('similarity'), reverse=True)

        # import codecs
        #
        # module_dir = os.path.dirname(__file__)  # get current directory
        # file_path = os.path.join(module_dir, 'results.txt')
        # output_file = codecs.open(file_path, 'a', encoding="utf-8")
        # for c in cog_bag:
        #     output = "Question" + "\n" + \
        #              "X1= " + str(c['question_cog']['X1']) + "\n" + \
        #              "Y1= " + str(c['question_cog']['Y1']) + "\n" + \
        #              "X2= " + str(c['question_cog']['X2']) + "\n" + \
        #              "Y2= " + str(c['question_cog']['Y2']) + "\n" + \
        #              "X3= " + str(c['question_cog']['X3']) + "\n" + \
        #              "Y3= " + str(c['question_cog']['Y3']) + "\n" + \
        #              "W1= " + str(c['question_cog']['lexical_density']) + "\n" + \
        #              "W2= " + str(c['question_cog']['readability_index']) + "\n" + \
        #              "W3= " + str(c['question_cog']['q_lex']) + "\n\n" + \
        #              "Sentence" + "\n" + \
        #              "X1= " + str(c['sentence_cog']['X1']) + "\n" + \
        #              "Y1= " + str(c['sentence_cog']['Y1']) + "\n" + \
        #              "X2= " + str(c['sentence_cog']['X2']) + "\n" + \
        #              "Y2= " + str(c['sentence_cog']['Y2']) + "\n" + \
        #              "X3= " + str(c['sentence_cog']['X3']) + "\n" + \
        #              "Y3= " + str(c['sentence_cog']['Y3']) + "\n" + \
        #              "W1= " + str(c['sentence_cog']['lexical_density']) + "\n" + \
        #              "W2= " + str(c['sentence_cog']['readability_index']) + "\n" + \
        #              "W3= " + str(c['sentence_cog']['s_lex']) + "\n\n\n\n"
        #     output_file.writelines(str(output))
        # output_file.close()

        # import codecs
        #
        # module_dir = os.path.dirname(__file__)  # get current directory
        # file_path = os.path.join(module_dir, 'out4.txt')
        # output_file = codecs.open(file_path, 'a', encoding="utf-8")
        # output = re.sub(r'([\'\"\,])', '', cog_bag[0]['question_cog']['question']) + ", (" + \
        #          str(cog_bag[0]['question_cog']['X1']) + " " + \
        #          str(cog_bag[0]['question_cog']['Y1']) + ") " + "(" + str(cog_bag[0]['question_cog']['X2']) + " " + \
        #          str(cog_bag[0]['question_cog']['Y2']) + ") " + "(" + str(cog_bag[0]['question_cog']['X3']) + " " + \
        #          str(cog_bag[0]['question_cog']['Y3']) + "), " + str(cog_bag[0]['question_cog']['lexical_density']) + \
        #          ", " + str(cog_bag[0]['question_cog']['readability_index']) + \
        #          str(cog_bag[0]['question_cog']['q_lex']) + ", (" + str(cog_bag[0]['sentence_cog']['cogX']) + \
        #          " " + str(cog_bag[0]['question_cog']['cogY']) + "), " + str(cog_bag[0]['euclidean_distance']), "\n"
        # output += re.sub(r'([\'\"\,])', '', cog_bag[0]['sentence_cog']['sentence']) + ", (" + \
        #           str(cog_bag[0]['sentence_cog']['X1']) + " " + \
        #           str(cog_bag[0]['sentence_cog']['Y1']) + ") " + "(" + str(cog_bag[0]['sentence_cog']['X2']) + " " + \
        #           str(cog_bag[0]['sentence_cog']['Y2']) + ") " + "(" + str(cog_bag[0]['sentence_cog']['X3']) + " " + \
        #           str(cog_bag[0]['sentence_cog']['Y3']) + "), " + str(cog_bag[0]['sentence_cog']['lexical_density']) + \
        #           ", " + str(cog_bag[0]['sentence_cog']['readability_index']) + \
        #           str(cog_bag[0]['sentence_cog']['s_lex']) + ", (" + str(cog_bag[0]['sentence_cog']['cogX']) + \
        #           " " + str(cog_bag[0]['sentence_cog']['cogY']) + "), " + str(cog_bag[0]['euclidean_distance']), "\n"
        # output_file.writelines(output)
        # output_file.close()

        # cog_bag = sorted(cog_bag, key=itemgetter('euclidean_distance'))
        # cog_bag = sorted(cog_bag, key=itemgetter('bonus_value', 'euclidean_distance'), reverse=True)

        # cog_bag = sorted(cog_bag, key=lambda element: (element['euclidean_distance'], -element['bonus_value']))

        #print answers_cog
        #random.shuffle(answers)

        # nearest_answer = cogm.find_nearest_vector(answers_cog, [question_cog['cogX'], question_cog['cogY']])
        #print nearest_answer



    except Question.DoesNotExist:
        raise Http404('Question does not exist')
    #return render(request,  'qa/questiondetail.html', {'comprehension': comprehension, 'question': question, 'X':[X1,X2,X3], 'Y':[Y1,Y2,Y3], 'graph': graph, 'question_types': question_types})
    # return render(request, 'qa/questiondetail.html', {'comprehension': comprehension, 'question': question, 'q_cog_dump': ujson.dumps(question_cog), 'question_types': question_types, 'answers': answers, 'answers_dump': ujson.dumps(answers)})
    return render(request, 'qa/questiondetail.html',
                  {'comprehension': comprehension, 'question': question, 'cog_bag': cog_bag,
                   'question_types': question_types, \
                   # 'answers': answers, 'answers_dump': ujson.dumps(answers)
                    })