示例#1
0
    def predication(self):
        # 遍历每一个课程,选择相似度最高的分类(前三个)
        for course_name in self.course_name_list:
            if not self.sentence_words_dict.__contains__(course_name):
                continue

            course_name_word_list = self.sentence_words_dict[course_name]
            if len(course_name_word_list) == 0:
                self.course_catalog_unknow_list.append(course_name)
                continue
            # 遍历分类
            index = 0
            res_list = []
            for catalog_name in self.catalog_code_dict.keys():
                catalog_tuple = self.catalog_code_dict.get(catalog_name)
                catalog_code = catalog_tuple[0]
                catalog_name_word_list = catalog_tuple[2]

                score = self.pred_similarity(course_name_word_list,
                                             catalog_name_word_list)
                res = ResultInfo.ResultInfo(index, score, catalog_code,
                                            catalog_name)
                res_list.append(res)
                index += 1
            # 对列表按score降序排列
            res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)

            # 选前3个最高的得分分类
            best_candidate_list = res_list[:3]
            if best_candidate_list[0].score > 0.45:
                self.course_catalogs_good_dict[
                    course_name] = best_candidate_list
            else:
                self.course_catalogs_bad_dict[
                    course_name] = best_candidate_list
    def getKnowledgeByName(self, name, k):
        """
        通过知识名称,查找知识对象
        :param name: 
        :return: 
        """
        flag = False
        if name is None:
            return flag
        for k_key in self.knowledgeByName.keys():
            k_tup = self.knowledgeByName.get(k_key)
            k_words = k_tup[0]
            if len(k_words) == 0:
                continue
            if name == k_key:
                res = ResultInfo.ResultInfo(0, 1.0, k_tup[2], k_key)
                k.append(res)
                flag = True
                break

        return flag
    def predication(self):
        self.bad_examquestion_list = []
        self.course_score = CourseInfomation.CourseScore()
        self.course_score.initCourse(self.course_path_info_list[0].course)
        # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m
        if self.examquestion_info is None:
            return

        qindex = 0
        question_knowledge_map = {}
        for course_path_info in self.course_path_info_list:

            if not self.examquestion_info.examquestion_dict.__contains__(
                    course_path_info.course):
                return
            exam_question_list = self.examquestion_info.examquestion_dict.get(
                course_path_info.course)

            for exam_question in exam_question_list:
                #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权"
                k = exam_question.knowledge_list
                q = exam_question.getContentAndAnswer()
                question_knowledge_map[q] = k
                qindex = qindex + 1

                q_words = self.sentence.splitSentenceCanRepeat(q)
                # 从q中找重点词, 并放大重点词
                q_words = self.preprocessor.enlargeVipWords(q_words, q)
                if len(q_words) == 0:
                    continue
                # 然后再遍历知识点
                index = 0
                res_list = []
                for k_key in self.knowledge.keys():
                    k_tup = self.knowledge.get(k_key)
                    k_words = k_tup[0]
                    if len(k_words) == 0:
                        continue
                    score = self.doc_vec.pred_similarity(q_words, k_words)
                    res = ResultInfo.ResultInfo(index, score, k_tup[2], k_key)
                    res_list.append(res)
                    index += 1
                # 对列表按score降序排列
                res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)
                # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个

                # 统计得分的情况
                self.computeScore(res_list)

                # 统计不超过50分的试题
                if self.badExamquestionStatistics(res_list) == True:
                    self.bad_examquestion_list.append(
                        (exam_question, res_list[0:3]))

                # 获取上级 知识点
                #reslist = self.getParentKnowledge(reslist)
                # 格式化输出
                reslist, wordlist = self.formatOutput(res_list, k)
                # 统计正确率
                if len(reslist) > 0:
                    ns = '问题{0}:'.format(qindex) + q
                    self.outputcontentlist.append(ns + '\n')
                    ns = '电脑标识知识点:' + ';'.join(wordlist)
                    self.outputcontentlist.append(ns + '\n')
                    ns = '知识点评估指标:' + ';'.join(reslist)
                    self.outputcontentlist.append(ns + '\n')
                    #print '老师标识知识点:' + k
                    ns = '老师标识知识点:'
                    self.outputcontentlist.append(ns + '\n')
                    self.outputcontentlist.append('\n')
                    #ns = '电脑标识是否正确:'
                    #self.outputcontentlist.append(ns)

        # 计算正确率
        # 题目总数
        self.course_score.compute()

        ns = '试题总数:{}'.format(self.course_score.score_scope_total)
        self.outputcontentlist.append(ns + '\n')
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            self.course_score.score_scope_more60_count,
            round(self.course_score.score_scope_more60_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            self.course_score.score_scope_between5060_count,
            round(self.course_score.score_scope_between5060_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            self.course_score.score_scope_between4050_count,
            round(self.course_score.score_scope_between4050_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            self.course_score.score_scope_less40_count,
            round(self.course_score.score_scope_less40_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
    def predication(self):
        # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m
        if self.input_subject_file is None:
            self.input_subject_file = self.doc_vec.train_input_subject_file

        question = open(self.input_subject_file, 'r')
        ids_lines = question.readlines()
        qindex = 0
        question_knowledge_map = {}
        self.course_score = CourseInfomation.CourseScore()

        for line in ids_lines:
            #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权"
            line = line.strip('\n')
            index = line.index(':')
            k = line[0:index]
            q = line[index + 1:]
            question_knowledge_map[q] = k
            qindex = qindex + 1
            q_words = self.sentence.splitSentenceCanRepeat(q)
            # 从q中找重点词, 并放大重点词
            q_words = self.preprocessor.enlargeVipWords(q_words, q)
            if len(q_words) == 0:
                continue
            # 然后再遍历知识点
            index = 0
            res_list = []
            for k_key in self.knowledge.keys():
                k_tup = self.knowledge.get(k_key)
                k_words = k_tup[0]
                if len(k_words) == 0:
                    continue
                score = self.doc_vec.pred_similarity(q_words, k_words)
                res = ResultInfo.ResultInfo(index, score,
                                            k_tup[2] + ' ' + k_key)
                res_list.append(res)
                index += 1
            # 对列表按score降序排列
            res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)
            # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个

            # 统计得分的情况
            self.computeScore(res_list)

            # 获取上级 知识点
            #reslist = self.getParentKnowledge(reslist)
            # 格式化输出
            reslist, wordlist = self.formatOutput(res_list)
            # 统计正确率
            if len(reslist) > 0:
                ns = '问题{0}:'.format(qindex) + q
                self.outputcontentlist.append(ns + '\n')
                ns = '电脑标识知识点:' + ';'.join(wordlist)
                self.outputcontentlist.append(ns + '\n')
                ns = '知识点评估指标:' + ';'.join(reslist)
                self.outputcontentlist.append(ns + '\n')
                #print '老师标识知识点:' + k
                ns = '老师标识知识点:' + k
                self.outputcontentlist.append(ns + '\n')
                self.outputcontentlist.append('\n')
                #ns = '电脑标识是否正确:'
                #self.outputcontentlist.append(ns)

        # 计算正确率
        # 题目总数
        self.course_score.compute()

        ns = '试题总数:{}'.format(self.course_score.score_scope_total)
        self.outputcontentlist.append(ns + '\n')
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            self.course_score.score_scope_more60_count,
            round(self.course_score.score_scope_more60_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            self.course_score.score_scope_between5060_count,
            round(self.course_score.score_scope_between5060_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            self.course_score.score_scope_between4050_count,
            round(self.course_score.score_scope_between4050_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            self.course_score.score_scope_less40_count,
            round(self.course_score.score_scope_less40_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns