Пример #1
0
    def statistics(self):
        """
        对course_score_list中的结果,进行统计
        :return: 
        """
        # 课程的数量分布统计变量
        n_coure_count_more50 = 0
        n_coure_score_less50 = 0

        # 统计所有课程,百分比的分布情况
        n_coure_score = CourseInfomation.CourseScore()
        for course_score in self.course_score_list:
            n_coure_score.score_scope_more60_count += course_score.score_scope_more60_count
            n_coure_score.score_scope_between5060_count += course_score.score_scope_between5060_count
            n_coure_score.score_scope_between4050_count += course_score.score_scope_between4050_count
            n_coure_score.score_scope_less40_count += course_score.score_scope_less40_count

            # 如果50分以上的超过50%,则more50+1
            if (n_coure_score.score_scope_between5060_count +
                    n_coure_score.score_scope_more60_count):
                n_coure_count_more50 += 1
            else:
                n_coure_score_less50 += 1

        n_coure_score.compute()
        ns = '试题总数:{}'.format(n_coure_score.score_scope_total)
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            n_coure_score.score_scope_more60_count,
            round(n_coure_score.score_scope_more60_rate * 100, 2))
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            n_coure_score.score_scope_between5060_count,
            round(n_coure_score.score_scope_between5060_rate * 100, 2))
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            n_coure_score.score_scope_between4050_count,
            round(n_coure_score.score_scope_between4050_rate * 100, 2))
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            n_coure_score.score_scope_less40_count,
            round(n_coure_score.score_scope_less40_rate * 100, 2))
        print ns

        # 统计课程所在区域的分布情况
        n_coure_count_total = n_coure_score_less50 + n_coure_count_more50
        n_coure_score_less_rate = float(
            n_coure_score_less50) / n_coure_count_total
        n_coure_count_more_rate = float(
            n_coure_count_more50) / n_coure_count_total
        ns = '50%以上的试题得分大于50分的课程数量:{}  占比:{}'.format(n_coure_count_more50,
                                                     n_coure_count_more_rate)
        print ns
        ns = '50%以上的试题得分小于50分的课程数量:{}  占比:{}'.format(n_coure_score_less50,
                                                     n_coure_score_less_rate)
        print ns
        pass
 def loadProcessedCourse(self, rootpath):
     output_mid_filepath = '{}/statistics-mid.txt'.format(rootpath)
     if not FilePath.fileExist(output_mid_filepath):
         return
     fout = open(output_mid_filepath, 'r')
     lines = fout.readlines()
     for one_course_str in lines:
         course_score = CourseInfomation.CourseScore()
         course_score.initByString(one_course_str)
         key = '{}-{}'.format(course_score.school_code,
                              course_score.course_code)
         self.course_processed_dict[key] = course_score
         self.course_score_list.append(course_score)
     fout.close()
Пример #3
0
    def statistics(self, statistics_filepath):
        """
        对course_score_list中的结果,进行统计
        :return: 
        """
        # bad course list
        bad_course_list = []
        # 课程的数量分布统计变量
        n_coure_count_more50 = 0
        n_coure_score_less50 = 0

        f_stat = open(statistics_filepath, 'w')
        # 统计所有课程,百分比的分布情况
        n_coure_score = CourseInfomation.CourseScore()
        for course_score in self.course_score_list:
            n_coure_score.score_scope_more60_count += course_score.score_scope_more60_count
            n_coure_score.score_scope_between5060_count += course_score.score_scope_between5060_count
            n_coure_score.score_scope_between4050_count += course_score.score_scope_between4050_count
            n_coure_score.score_scope_less40_count += course_score.score_scope_less40_count

            course_descrip = course_score.getDescription()
            f_stat.write('\n'.join(course_descrip))
            f_stat.write('\n\n')

            # 如果50分以上的超过50%,则more50+1
            if (course_score.score_scope_between5060_rate +
                    course_score.score_scope_more60_rate) > 0.5:
                n_coure_count_more50 += 1
            else:
                n_coure_score_less50 += 1
                bad_course_list.append(course_score)

        f_stat.write('所有课程的汇总统计:')
        course_descrip = n_coure_score.getDescription()
        f_stat.write('\n'.join(course_descrip))
        f_stat.write('\n\n')

        # 统计课程所在区域的分布情况
        n_coure_count_total = n_coure_score_less50 + n_coure_count_more50
        if n_coure_count_total > 0:
            n_coure_score_less_rate = float(
                n_coure_score_less50) / n_coure_count_total
            n_coure_count_more_rate = float(
                n_coure_count_more50) / n_coure_count_total
            ns = '50%以上的试题得分大于50分的课程数量:{}  占比:{}'.format(
                n_coure_count_more50, n_coure_count_more_rate)
            print ns
            ns = '50%以上的试题得分小于50分的课程数量:{}  占比:{}'.format(
                n_coure_score_less50, n_coure_score_less_rate)
            print ns

        # 保存bad course信息
        print 'bad course information.'
        for bad_course in bad_course_list:

            course_descrip = bad_course.getDescription()
            f_stat.write('\n'.join(course_descrip))
            f_stat.write('\n')

        # 保存未识别的课程
        f_stat.write('\n\n')
        f_stat.write('未识别的课程:')
        f_stat.write('\n'.join(self.course_unrecongnized))
        f_stat.write('\n\n')
        f_stat.write('超出范围的课程:')
        f_stat.write('\n'.join(self.course_over_scope))
        f_stat.close()
    def predication(self):
        self.bad_examquestion_list = []
        self.course_score = CourseInfomation.CourseScore()
        self.course_score.initCourse(self.course_path_info_list[0].course)
        # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m
        if self.examquestion_info is None:
            return

        qindex = 0
        question_knowledge_map = {}
        for course_path_info in self.course_path_info_list:

            if not self.examquestion_info.examquestion_dict.__contains__(
                    course_path_info.course):
                return
            exam_question_list = self.examquestion_info.examquestion_dict.get(
                course_path_info.course)

            for exam_question in exam_question_list:
                #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权"
                k = exam_question.knowledge_list
                q = exam_question.getContentAndAnswer()
                question_knowledge_map[q] = k
                qindex = qindex + 1

                q_words = self.sentence.splitSentenceCanRepeat(q)
                # 从q中找重点词, 并放大重点词
                q_words = self.preprocessor.enlargeVipWords(q_words, q)
                if len(q_words) == 0:
                    continue
                # 然后再遍历知识点
                index = 0
                res_list = []
                for k_key in self.knowledge.keys():
                    k_tup = self.knowledge.get(k_key)
                    k_words = k_tup[0]
                    if len(k_words) == 0:
                        continue
                    score = self.doc_vec.pred_similarity(q_words, k_words)
                    res = ResultInfo.ResultInfo(index, score, k_tup[2], k_key)
                    res_list.append(res)
                    index += 1
                # 对列表按score降序排列
                res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)
                # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个

                # 统计得分的情况
                self.computeScore(res_list)

                # 统计不超过50分的试题
                if self.badExamquestionStatistics(res_list) == True:
                    self.bad_examquestion_list.append(
                        (exam_question, res_list[0:3]))

                # 获取上级 知识点
                #reslist = self.getParentKnowledge(reslist)
                # 格式化输出
                reslist, wordlist = self.formatOutput(res_list, k)
                # 统计正确率
                if len(reslist) > 0:
                    ns = '问题{0}:'.format(qindex) + q
                    self.outputcontentlist.append(ns + '\n')
                    ns = '电脑标识知识点:' + ';'.join(wordlist)
                    self.outputcontentlist.append(ns + '\n')
                    ns = '知识点评估指标:' + ';'.join(reslist)
                    self.outputcontentlist.append(ns + '\n')
                    #print '老师标识知识点:' + k
                    ns = '老师标识知识点:'
                    self.outputcontentlist.append(ns + '\n')
                    self.outputcontentlist.append('\n')
                    #ns = '电脑标识是否正确:'
                    #self.outputcontentlist.append(ns)

        # 计算正确率
        # 题目总数
        self.course_score.compute()

        ns = '试题总数:{}'.format(self.course_score.score_scope_total)
        self.outputcontentlist.append(ns + '\n')
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            self.course_score.score_scope_more60_count,
            round(self.course_score.score_scope_more60_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            self.course_score.score_scope_between5060_count,
            round(self.course_score.score_scope_between5060_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            self.course_score.score_scope_between4050_count,
            round(self.course_score.score_scope_between4050_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            self.course_score.score_scope_less40_count,
            round(self.course_score.score_scope_less40_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
    def predication(self):
        # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m
        if self.input_subject_file is None:
            self.input_subject_file = self.doc_vec.train_input_subject_file

        question = open(self.input_subject_file, 'r')
        ids_lines = question.readlines()
        qindex = 0
        question_knowledge_map = {}
        self.course_score = CourseInfomation.CourseScore()

        for line in ids_lines:
            #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权"
            line = line.strip('\n')
            index = line.index(':')
            k = line[0:index]
            q = line[index + 1:]
            question_knowledge_map[q] = k
            qindex = qindex + 1
            q_words = self.sentence.splitSentenceCanRepeat(q)
            # 从q中找重点词, 并放大重点词
            q_words = self.preprocessor.enlargeVipWords(q_words, q)
            if len(q_words) == 0:
                continue
            # 然后再遍历知识点
            index = 0
            res_list = []
            for k_key in self.knowledge.keys():
                k_tup = self.knowledge.get(k_key)
                k_words = k_tup[0]
                if len(k_words) == 0:
                    continue
                score = self.doc_vec.pred_similarity(q_words, k_words)
                res = ResultInfo.ResultInfo(index, score,
                                            k_tup[2] + ' ' + k_key)
                res_list.append(res)
                index += 1
            # 对列表按score降序排列
            res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)
            # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个

            # 统计得分的情况
            self.computeScore(res_list)

            # 获取上级 知识点
            #reslist = self.getParentKnowledge(reslist)
            # 格式化输出
            reslist, wordlist = self.formatOutput(res_list)
            # 统计正确率
            if len(reslist) > 0:
                ns = '问题{0}:'.format(qindex) + q
                self.outputcontentlist.append(ns + '\n')
                ns = '电脑标识知识点:' + ';'.join(wordlist)
                self.outputcontentlist.append(ns + '\n')
                ns = '知识点评估指标:' + ';'.join(reslist)
                self.outputcontentlist.append(ns + '\n')
                #print '老师标识知识点:' + k
                ns = '老师标识知识点:' + k
                self.outputcontentlist.append(ns + '\n')
                self.outputcontentlist.append('\n')
                #ns = '电脑标识是否正确:'
                #self.outputcontentlist.append(ns)

        # 计算正确率
        # 题目总数
        self.course_score.compute()

        ns = '试题总数:{}'.format(self.course_score.score_scope_total)
        self.outputcontentlist.append(ns + '\n')
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            self.course_score.score_scope_more60_count,
            round(self.course_score.score_scope_more60_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            self.course_score.score_scope_between5060_count,
            round(self.course_score.score_scope_between5060_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            self.course_score.score_scope_between4050_count,
            round(self.course_score.score_scope_between4050_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            self.course_score.score_scope_less40_count,
            round(self.course_score.score_scope_less40_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns