class Report(object): """docstring for Report""" def __init__(self, exercise_id, update_time='2017-11-01 00:00:00'): # do something inits self.db_fetcher = DataBaseFetcher() self.exercise_id = exercise_id self.dict_content_answer = {} # error content self.curr_time = datetime.datetime.now() self.update_time = update_time # update time self.recommend = RecommendQuestion() self.dict_realtion_quesion, self.dict_question_num = self.getRelationQuestion( ) self.dict_parent_quesion = self.getChildQuestion() self.dict_question_diff = self.getRightDifficulty() self.dict_point, self.dict_topic_point = self.getPoints() self.dict_students = self.getStudentId() self.dict_student_score, self.dict_student_records = {}, { x: [] for x in self.dict_students } self.exam_list_records, self.practice_list_records = self.getExerciseRecord( ) #records # dict self.dict_question_types = { 1: '选择题', 2: '填空题', 4: '简答题', 6: '综合题', 17: '计算题', 44: '证明题' } self.dict_question_difficulty = { 1: '易', 2: '中', 3: '难', 4: '极难' } # 1:易 2:中 3:难 4:极难 # feedback self.dict_students_resource = self.getStudentResource() # resource def getExerciseRecord(self): exam_list_records = [] practice_list_records = [] sql = "select a.question_id, a.student_id, a.ret_num, a.submit_answer, a.submit_time, b.question_type from entity_student_exercise a join \ neworiental_v3.entity_question b on b.id = a.question_id where a.submit_time > \'%s\' and a.exercise_source = 6" % ( self.update_time) dict_is_choice = {} rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: question, student_id, ret, answer, submit_time, question_type = row student_id = long(student_id) if answer == 'null' or answer == 'None' or answer == '' or answer == 'NULL' or answer == None or len( answer) == 0: continue if question in self.dict_realtion_quesion: exam_list_records.append([ question, student_id, ret, answer, submit_time, question_type ]) self.dict_student_records[student_id].append([ question, student_id, ret, answer, submit_time, question_type ]) self.dict_content_answer['%s_%s' % (question, student_id)] = answer elif question in self.dict_parent_quesion: dict_content = self.dict_parent_quesion[question] parent_question_id = dict_content['parent_id'] rank = dict_content['rank'] link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ parent_question_id] if question_num >= 17 and question_num <= 21: if rank == 1: exam_list_records.append([ parent_question_id, student_id, ret, answer, submit_time, question_type ]) self.dict_student_records[student_id].append([ parent_question_id, student_id, ret, answer, submit_time, question_type ]) elif rank == 2: self.dict_content_answer['%s_%s' % (parent_question_id, student_id)] = answer elif question_num >= 22 and question_num <= 23: key = '%s_%s' % (parent_question_id, student_id) if rank == 1: if answer == 'A': dict_is_choice[key] = 1 else: dict_is_choice[key] = 0 elif rank == 2: if key in dict_is_choice: if dict_is_choice[key] == 1: exam_list_records.append([ parent_question_id, student_id, ret, answer, submit_time, question_type ]) self.dict_student_records[student_id].append([ parent_question_id, student_id, ret, answer, submit_time, question_type ]) elif rank == 3: self.dict_content_answer[key] = answer else: practice_list_records.append([ question, student_id, ret, answer, submit_time, question_type ]) return exam_list_records, practice_list_records def getStudentId(self, file_name='students.txt'): list_students = [] with open(file_name) as handle_f: for line in handle_f: system_id = long(line.strip()) list_students.append(system_id) set_studends = set(list_students) # students set str_content = ','.join([str(x) for x in set_studends]) sql = "select a.`system_id`, a.`name` from neworiental_user.entity_user a where type = 2 and a.system_id in (%s)" % str_content dict_students = {} rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: student_id, name = row if student_id in set_studends: dict_students[student_id] = name return dict_students def getRightDifficulty(self): dict_question_diff = {} sql = "select b.id, b.difficulty from neworiental_v3.entity_question b join link_question_answer a on b.id = a.link_question_id where a.update_time >= \'%s\'" % self.update_time rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: question_id, difficulty = row dict_question_diff[question_id] = int(difficulty) return dict_question_diff def getChildQuestion(self): dict_parent_list = {} dict_parent_question = {} sql = "select a.id, a.parent_question_id from link_question_answer b join neworiental_v3.entity_question a on a.parent_question_id = b.link_answer_id \ where b.question_num >= 17 and b.update_time >= \'%s\'" % self.update_time rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: qid = row[0] parent_id = row[1] if parent_id not in dict_parent_list: dict_parent_list[parent_id] = [] dict_parent_list[parent_id].append(qid) for parent_id in dict_parent_list: sort_question = sorted(dict_parent_list[parent_id], reverse=False) count = 1 for item in sort_question: dict_parent_question[item] = { 'parent_id': parent_id, 'rank': count } count += 1 return dict_parent_question def getRelationQuestion(self): dict_realtion_quesion = {} dict_point_question_num = {} sql = "select a.link_question_id, a.link_answer_id, a.link_point_id, a.question_num, b.difficulty, a.question_exercise_id, b.question_type from link_question_answer a join neworiental_v3.entity_question b on a.link_question_id = \ b.id where a.update_time >= \'%s\'" % self.update_time rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') dict_parent_quesion = {} link_parent_list = [] for row in rows: link_question_id, link_answer_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = row dict_realtion_quesion[ link_answer_id] = link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type arr_point = link_point_id.strip().split(',') for point in arr_point: if point not in dict_point_question_num: dict_point_question_num[point] = [] dict_point_question_num[point].append([ link_question_id, question_num, difficulty, question_exercise_id, question_type ]) return dict_realtion_quesion, dict_point_question_num def getScore(self, question_num, answer): ascore, score = 0, 0 if question_num >= 1 and question_num <= 16: score = 5 if (answer == 'A' or answer == 'B') else 0 ascore = 5 elif question_num >= 17 and question_num <= 21: if answer == 'A': score = 4 elif answer == 'B': score = 7 elif answer == 'C': score = 9 elif answer == 'D': score = 12 ascore = 12 elif question_num >= 22 and question_num <= 23: if answer == 'A': score = 4 elif answer == 'B': score = 7 elif answer == 'C': score = 9 elif answer == 'D': score = 10 ascore = 10 return ascore, score def statQustionReport(self): dict_question_count, dict_question_right = {}, {} dict_point_count, dict_point_right = {}, {} dict_error_students, dict_right_students = {}, {} for item in self.exam_list_records: question, student_id, ret, answer, submit_time, question_type = item if question in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ question] arr_points = link_point_id.strip().split(',') ascore, score = self.getScore(question_num, answer) is_right = (answer == 'A') or (answer == 'B') # 1 point for point in arr_points: if point in self.dict_point: name, ptype, question_type, level, parent_id, link_id = self.dict_point[ point] if name not in dict_point_count: dict_point_count[name] = 0 dict_point_right[name] = 0 dict_point_right[name] += 1 if ( question_num <= 16 and is_right) or ( question_num >= 17 and answer == 'D') else 0 # dict_point_count[name] += 1 if link_question_id not in dict_question_count: dict_question_right[link_question_id] = 0 dict_question_count[link_question_id] = 0 dict_question_right[ link_question_id] += 1 if ret == 1 else 0 # dict_question_count[link_question_id] += 1 # questions if link_question_id not in dict_right_students: dict_right_students[link_question_id] = [] dict_error_students[link_question_id] = [] if (question_num <= 16 and is_right) or (question_num >= 17 and answer == 'D'): dict_right_students[link_question_id].append(student_id) else: dict_error_students[link_question_id].append(student_id) if student_id not in self.dict_student_score: self.dict_student_score[student_id] = 0 self.dict_student_score[student_id] += score dict_point_acc = {} for point_name in dict_point_count: acc = dict_point_right[point_name] / dict_point_count[point_name] dict_point_acc[point_name] = acc #print '%s \t%s' % (point_name, acc) # students dict_students_point_question = self.calStudentPoints(dict_point_acc) return dict_students_point_question def getErrorText(self, answer): if answer == 'C': return '审题时,粗心看错了' elif answer == 'D': return '审题时,题意读不懂' elif answer == 'E': return '析题时,思路模糊不清' elif answer == 'F': return '析题时,思路方向错误' elif answer == 'G': return '解题时,公式记错了' elif answer == 'H': return '解题时,计算出错了' elif answer == 'I': return '未作答(或未完成),时间来不及了' elif answer == 'A' or answer == 'B': return '答题正确' def getQuestionWords(self): dict_question_words = {} str_question = ','.join([ str(self.dict_realtion_quesion[x][0]) for x in self.dict_realtion_quesion ]) str_sql = "select c.id, c.subject_id, t.type_id,t.struct_id, c.json_data from neworiental_v3.entity_question c left join neworiental_v3.entity_question_type t on \ t.type_id=c.question_type_id where c.id in (%s)" % str_question s_rows = self.db_fetcher.get_sql_result(str_sql, 'mysql_logdata') for row in s_rows: question, subject_id, type_id, struct_id, json_data = row if json_data != '' and json_data != 'None' and json_data != None: keywords, res = sq.predict(question, subject_id, type_id, struct_id, json_data) dict_question_words[question] = keywords, res return dict_question_words def calStudentPoints(self, dict_point_acc): dict_coef = {x: 1.0 for x in self.dict_point} # bate = 0.25 dict_student_points = {} dict_students_point_question = {} # student -> error question for student_id in self.dict_students: if student_id not in dict_student_points: dict_student_points[student_id] = {} records = sorted(self.dict_student_records[student_id], key=lambda x: x[-1], reverse=True) dict_error = { 'C': 0.0, 'D': 0.0, 'E': 0.0, 'F': 0.0, 'G': 0.0, 'H': 0.0, 'I': 0.0 } mass_score = 0 dict_question_point = {} for item in records: question, student_id, ret, answer, submit_time, question_type = item if answer == 'answer': continue if question in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ question] if question_exercise_id != self.exercise_id: continue if question_type == '单选题' or question_type == '选择题': qtype = 1 elif question_type == '填空题': qtype = 2 else: qtype = 3 difficulty = self.dict_question_diff[link_question_id] arr_points = link_point_id.strip().split(',') a_score, score = self.getScore(question_num, answer) content = '%s_%s' % (question, student_id) if content in self.dict_content_answer: answer = self.dict_content_answer[content] if answer != 'A' and answer != 'B' and answer != 'answer': dict_error[answer] += a_score - score if answer == 'C' or answer == 'G' or answer == 'H': mass_score += a_score - score for point in arr_points: if point not in dict_question_point: dict_question_point[point] = [] dict_question_point[point].append( (link_question_id, qtype, submit_time)) name, ptype, question_type, level, parent_id, link_id = self.dict_point[ point] acc = dict_point_acc[name] if point not in dict_student_points[student_id]: dict_student_points[student_id][point] = 0.0 dict_student_points[student_id][ point] += dict_coef[point] * ( a_score - score ) * acc / math.log(1 + bate * (level + difficulty)) sort_error = sorted(dict_error.items(), key=lambda x: x[1], reverse=True) if len(dict_student_points[student_id]) > 0: sort_point = sorted(dict_student_points[student_id].items(), key=lambda x: x[1], reverse=True) student_name = self.dict_students[student_id] point_name1, ptype1, question_type1, level1, parent_id1, link_id1 = self.dict_point[ point] point_name2, ptype2, question_type2, level2, parent_id2, link_id2 = self.dict_point[ sort_point[1][0]] point_name3, ptype3, question_type3, level3, parent_id2, link_id3 = self.dict_point[ sort_point[2][0]] is_power = 1 if mass_score > 5 else 0 error1, error1score = sort_error[0][0], sort_error[0][1] error2, error2score = sort_error[1][0], sort_error[1][1] error1_name, error2_name = self.getErrorText( error1), self.getErrorText(error2) dict_students_point_question[ student_id] = sort_point, dict_question_point return dict_students_point_question def getPoint2Question(self): dict_point_org_question = {} for answer_id in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ answer_id] arr_point = link_point_id.strip().split(',') for item in arr_point: if item not in dict_point_org_question: dict_point_org_question[item] = [] dict_point_org_question[item].append(link_question_id) return dict_point_org_question def pointsRecQuestion(self, dict_students_point_question, dict_diff, throld=4): # 1,2 choice question 1:easy 2:difficulty 3,4: comprehensive problem 3:easy 4:difficulty # dict_point_org_question = self.getPoint2Question() # org question point dict_question_text = self.getQuestionWords() dict_question_target = {} dict_student_recommend_question = {} print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % ( '学生id', '学生名字', '考点', '考点名称', '原题id', '推荐习题id', '关键词', '难度') for student_id in dict_students_point_question: arr_point, dict_question_point_target = dict_students_point_question[ student_id] student_name = self.dict_students[student_id] haveTime = 12 recommend_questions = [] pre_set = set() for item_target in arr_point: if haveTime <= 0 and len(recommend_questions) % 3 == 0: break item_point = item_target[0] arr_question = sorted(dict_question_point_target[item_point], key=lambda x: x[-1], reverse=True) key = '%s-%s' % (student_id, item_point) rateRight, difficulty = dict_diff[ key] if key in dict_diff else (0.0, 1) pre_set = pre_set | set([x[0] for x in arr_question]) # for base_question, qtype, submit_time in arr_question: if haveTime <= 0 and len(recommend_questions) % 3 == 0: break keywords, text = dict_question_text[base_question] text = text.replace('\n', '').strip('\r').strip() # filter base_question_key = '%s-%s' % (base_question, difficulty) if base_question_key in dict_question_target: target_questions = dict_question_target[ base_question_key] else: target_questions = self.recommend.getEsResult( base_question, text, keywords, difficulty, qtype, pre_set, throld) dict_question_target[ base_question_key] = target_questions for rec_question in target_questions: if haveTime <= 0 and len(recommend_questions) % 3 == 0: break if rec_question in pre_set: continue haveTime += -1 recommend_questions.append( (base_question, keywords, rec_question, item_point, difficulty)) pre_set.add(rec_question) for rec_item in recommend_questions: source_question, keywords, rec_question, item_point, difficulty = rec_item point_name, ptype, question_type, level, parent_id, link_id = self.dict_point[ item_point] print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( student_id, student_name, item_point, point_name, source_question, rec_question, keywords, difficulty) def importDefault(self, is_new, dict_question_topic): defult_list = [(10805815, 0), (10806190, 2), (10805815, 4), (10806184, 6), (10806182, 8), (10806169, 10), (10806104, 12), (10806057, 14), (10805815, 16), (10806068, 18)] update_sql = "insert into entity_recommend_question_bytopic(system_id, type,chapter_id,topic_id, question_id, `master`, duration, important, subject_id, score, school_publish, org_id, org_type) values" new_sql = "insert into sync_student_recommend_question(system_id, resource_type, resource_id, subject_id, tag1, tag2, score, type, type_level, type_id) values" db_rows = self.db_fetcher.get_sql_result( "select student_id from entity_student_white_list", "mysql_white_list") is_first = 1 for row in db_rows: student_id = row[0] for item in defult_list: question_id, subject_id = item type_id = 2186 if question_id in dict_question_topic: set_topic = dict_question_topic[question_id] if len(set_topic) > 0: type_id = set_topic.pop() if is_first == 1: update_sql += "(%s, 2, 0, 0, %s, 2, 2, 1, %s, %s, 0, 113, 2)" % ( student_id, question_id, subject_id, 0) if is_new == 1: new_sql += "(%s, 1, %s, %s, \'最近新错\', \'重难点\', %s, %s, %s, %s)" % ( student_id, question_id, subject_id, 0, 1, 3, type_id) is_first = 0 else: update_sql += ",(%s, 2, 0, 0, %s, 2, 2, 1, %s, %s, 0, 113, 2)" % ( student_id, question_id, subject_id, 0) new_sql += ",(%s, 1, %s, %s, \'最近新错\', \'重难点\', %s, %s, %s, %s)" % ( student_id, question_id, subject_id, 0, 1, 3, type_id) if is_new == 1: self.db_fetcher.commit_sql_cmd(new_sql, 'mysql_v3_white_list') else: self.db_fetcher.commit_sql_cmd(update_sql, 'mysql_white_list') def import2DataBase(self, flag, is_new=1, fname='student_rec.txt'): dict_question_topic = self.recommend.getThisQuestionTopic( ) # question topic if is_new == 1: self.db_fetcher.commit_sql_cmd( "delete from sync_student_recommend_question", 'mysql_v3_white_list') # update else: self.db_fetcher.commit_sql_cmd( "delete from entity_recommend_question_bytopic", 'mysql_white_list') # update # self.importDefault(is_new, dict_question_topic) # import if flag == 1: # 1,3 5 recomend update_sql = "insert into entity_recommend_question_bytopic(system_id, type,chapter_id,topic_id, question_id, `master`, duration, important, subject_id, score, school_publish, org_id, org_type) values" insert_sql = "insert into entity_question_recommend(student_id,student_name,point,point_name,question_id,recommend_id,keywords,difficulty) values" new_sql = "insert into sync_student_recommend_question(system_id, resource_type, resource_id, subject_id, tag1, tag2, score, type, type_level, type_id) values" insert_score, is_first = 0, 1 with open(fname) as rec_f: for line in rec_f: if insert_score > 1: arr = line.strip().split('\t') student_id, question_id = arr[0], long(arr[5]) type_id = 2186 if question_id in dict_question_topic: set_topic = dict_question_topic[question_id] if len(set_topic) > 0: type_id = set_topic.pop() if is_first == 1: update_sql += "(%s, 2, 0, 0, %s, 2, 2, 1, %s, %s, 0, 113, 2)" % ( student_id, question_id, 0, insert_score) insert_sql += "(%s,\'%s\',%s, \'%s\', %s, %s, \'%s\',%s)" % ( arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6], arr[7]) if is_new == 1: new_sql += "(%s, 1, %s, %s, \'最近新错\', \'重难点\', %s, %s, %s, %s)" % ( student_id, question_id, 0, insert_score, 1, 3, type_id) is_first = 0 else: update_sql += ",(%s, 2, 0, 0, %s, 2, 2, 1, %s, %s, 0, 113, 2)" % ( student_id, question_id, 0, insert_score) insert_sql += ",(%s,\'%s\',%s, \'%s\', %s, %s, \'%s\',%s)" % ( arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6], arr[7]) if is_new == 1: new_sql += ",(%s, 1, %s, %s, \'最近新错\', \'重难点\', %s, %s, %s, %s)" % ( student_id, question_id, 0, insert_score, 1, 3, type_id) insert_score += 1 # self.db_fetcher.commit_sql_cmd(insert_sql, 'mysql_logdata') if is_new == 1: # update mysql self.db_fetcher.commit_sql_cmd(new_sql, 'mysql_v3_white_list') # new else: self.db_fetcher.commit_sql_cmd(update_sql, 'mysql_white_list') def getPoints(self): dict_point = {} dict_topic_point = {} sql = 'select id, name, type, question_type, level, parent_id, link_id, topics_id from entity_exam_points' # do something rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: point_id, name, ptype, question_type, level, parent_id, link_id, topics_id = row point_id = str(point_id) dict_point[ point_id] = name, ptype, question_type, level, parent_id, link_id if ptype == 3: # arr_topic = [long(x) for x in topics_id.strip().split(',')] for topic in arr_topic: if topic not in dict_topic_point: dict_topic_point[topic] = [] dict_topic_point[topic].append(point_id) return dict_point, dict_topic_point def getStudentResource(self): dict_students_resource = {} str_content = ','.join([str(x) for x in self.dict_students]) sql = "select b.student_id, a.resource_id, b.deadline from neworiental_v3.link_respackage_publish_resource a JOIN neworiental_v3.link_respackage_student b on \ a.publish_id = b.publish_id where a.resource_type = 2 and b.student_id in (%s)" % ( str_content) rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: student_id, resource_id, deadline = row if resource_id > 0: if student_id not in dict_students_resource: dict_students_resource[student_id] = set() dict_students_resource[student_id].add(resource_id) return dict_students_resource def getQuestionBaseInfo(self, question_set): dict_question_difficulty = {} str_content = ','.join([str(x) for x in question_set]) sql = "select b.id, b.difficulty from neworiental_v3.entity_question where b.id in (%s)" % str_content rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: question_id, difficulty = row dict_question_diff[question_id] = int(difficulty) return dict_question_difficulty def updateStuAdapt2Difficult(self, range_throld=10, difficulty_throld=10): dict_exam_question = {} dict_exam_difficulty = {} dict_question_topic = self.recommend.getThisQuestionTopic() # for exam_item in self.exam_list_records: question, student_id, ret, answer, submit_time, question_type = exam_item if question in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ question] arr_point = link_point_id.strip().split(',') is_right = 1 if answer == 'A' or answer == 'B' else 0 for point in arr_point: key = '%s-%s' % (student_id, point) if key not in dict_exam_question: dict_exam_question[key] = [] dict_exam_difficulty[key] = {} record = (question, is_right, difficulty, submit_time, question_type) dict_exam_question[key].append(record) if difficulty not in dict_exam_difficulty[key]: dict_exam_difficulty[key][difficulty] = [] dict_exam_difficulty[key][difficulty].append(record) question_set = set([x[0] for x in self.practice_list_records]) dict_question_difficulty = self.getQuestionBaseInfo(question_set) for practice_item in self.practice_list_records: question, student_id, ret, answer, submit_time, question_type = practice_item is_right = 1 if ret == 1 else 0 if question in dict_question_difficulty: difficulty = self.dict_question_difficulty[question] if question in dict_question_topic: topic_set = dict_question_topic[question] for topic in topic_set: if topic in self.dict_topic_point: point = self.dict_topic_point[topic] key = '%s-%s' % (student_id, point) if key not in dict_exam_question: dict_exam_question[key] = [] dict_exam_difficulty[key] = {} if difficulty not in dict_exam_difficulty[key]: dict_exam_difficulty[key][difficulty] = [] record = (question, is_right, difficulty, submit_time, question_type) dict_exam_difficulty[key][difficulty].append( record) dict_exam_question[key].append(record) dict_res = {} self.db_fetcher.commit_sql_cmd('delete from entity_student_feature', 'mysql_logdata') for key in dict_exam_question: arr_content = key.split('-') student_id, point = arr_content[0], arr_content[1] question_records = sorted(dict_exam_question[key], key=lambda x: x[-1], reverse=True) right_cnt = 0 throld = min(len(question_records), range_throld) #if throld < 10: continue for record_item in question_records[:throld]: question, is_right, difficulty, submit_time, question_type = record_item right_cnt += is_right rateRight = right_cnt / range_throld adaDiff, pDiff = 0, -1 for difficulty in dict_exam_difficulty[key]: drecords = sorted(dict_exam_difficulty[key][difficulty], key=lambda x: x[-1], reverse=True) difficulty_throld = min(len(drecords), difficulty_throld) #if difficulty_throld < 4: continue dright_cnt = 0 for diff_item in drecords[:difficulty_throld]: question, is_right, difficulty, submit_time, question_type = diff_item dright_cnt += is_right drateRight = dright_cnt / difficulty_throld if drateRight > pDiff: adaDiff = difficulty pDiff = drateRight point_name, ptype, question_type, level, parent_id, link_id = self.dict_point[ point] insert_sql = "insert into entity_student_feature(student_id, point_id, point_name, adapt_difficulty, master) values(%s, %s, \'%s\', %s, %s)" \ % (student_id, point, point_name, adaDiff, rateRight) insert_id = self.db_fetcher.commit_sql_cmd(insert_sql, 'mysql_logdata') dict_res[key] = rateRight, difficulty return dict_res def getStudentScore(self): dict_student_score = {} for exam_item in self.exam_list_records: exam_question, student_id, ret, answer, submit_time, question_type = exam_item if exam_question in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ exam_question] a_score, i_score = self.getScore(question_num, answer) exam_key = '%s-%s' % (student_id, question_exercise_id) if exam_key not in dict_student_score: dict_student_score[exam_key] = 0 dict_student_score[exam_key] += i_score return dict_student_score def getFirstPoint(self): # point dict_relation_point13 = {} sql = "select a.id id3, a.`name` name3, c.id id1, c.`name` name1 from entity_exam_points a join entity_exam_points \ b on b.id = a.parent_id join entity_exam_points c on c.id = b.parent_id where a.type = 3" rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for row in rows: point3, name3, point1, name1 = row dict_relation_point13[point3] = name3, point1, name1 return dict_relation_point13 def getExerciseName(self): dict_exercise_name = {} sql = "select DISTINCT a.question_exercise_id, b.resource_name from link_question_answer a join neworiental_v3.entity_exercise b \ on b.id = a.question_exercise_id where a.update_time >= \'%s\'" % self.update_time exercise_rows = self.db_fetcher.get_sql_result(sql, 'mysql_logdata') for exercise_row in exercise_rows: exercise_id, resource_name = exercise_row dict_exercise_name[exercise_id] = resource_name return dict_exercise_name def getExamData(self, dict_student_score, dict_relation_point13, dict_exercise_name): for exam_item in self.exam_list_records: # exam question, student_id, ret, answer, submit_time, question_type = exam_item if question in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ question] arr_points = [int(x) for x in link_point_id.strip().split(',')] exam_key = '%s-%s' % (student_id, question_exercise_id) str_point1, str_name1, str_point3, str_name3 = '', '', '', '' is_first = True for point3 in arr_points: name3, point1, name1 = dict_relation_point13[point3] if is_first == True: str_point3, str_name3, str_point1, str_name1 = str( point3), str(name3), str(point1), str(name1) is_first = False else: str_point3 += ',%d' % point3 str_name3 += ',%s' % name3 str_point1 += ',%d' % point1 str_name1 += ',%s' % name1 resource_name = dict_exercise_name[question_exercise_id] exercise_score = dict_student_score[ exam_key] if exam_key in dict_student_score else 0 a_score, i_score = self.getScore(question_num, answer) error_text = self.getErrorText(answer) print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % ( student_id, link_question_id, question_type, str_point1, str_name1, str_point3, str_name3, submit_time, resource_name, exercise_score, question_exercise_id, a_score, i_score, difficulty, ret, question_num, error_text, 1) def getPracticeData(self, dict_relation_point13, dict_question_topic, dict_question_base_info): for practice_item in self.practice_list_records: # practice question, student_id, ret, answer, submit_time, question_type = practice_item str_point1, str_name1, str_point3, str_name3 = '', '', '', '' is_first = True if question in dict_question_topic: if question in dict_question_topic: topic_set = dict_question_topic[question] for topic in topic_set: if topic in self.dict_topic_point: arr_point3 = [ int(x) for x in self.dict_topic_point[topic] ] for point3 in arr_point3: name3, point1, name1 = dict_relation_point13[ point3] if is_first == True: str_point3, str_name3, str_point1, str_name1 = str( point3), str(name3), str(point1), str( name1) is_first = False else: str_point3 += ',%s' % point3 str_name3 += ',%s' % name3 str_point1 += ',%s' % point1 str_name1 += ',%s' % name1 a_score, i_score = 0, 0 # 单选题 选择题 填空题 if question_type == '单选题' or question_type == '选择题' or question_type == '填空题': a_score = 5 else: a_score = 12 if ret == 1: i_score = a_score elif ret == 5: i_score = 0.5 * a_score difficulty, qtype = 1, 1 if question in dict_question_base_info: difficulty, qtype = dict_question_base_info[question] errorText = '正确' if ret == 1 else '错误' print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % ( student_id, question, question_type, str_point1, str_name1, str_point3, str_name3, submit_time, '推送练习', 0, 0, a_score, i_score, difficulty, ret, 0, errorText, 0) def getData(self): dict_student_score = self.getStudentScore() dict_relation_point13 = self.getFirstPoint() # point dict_exercise_name = self.getExerciseName() dict_question_topic = self.recommend.getThisQuestionTopic( ) # question topic dict_question_base_info = self.recommend.getThisQuestionBaseInfo( ) # question quality print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % \ ('学生id', '题目id', '题目类型', '一级考点id', '一级考点名称', '三级考点id', '三级考点名称', '提交时间', '试卷名称', '成绩', '题集id', '题目满分', '题目得分', '题目难度', '答题结果', '试卷题号', '错误原因', '考练类型') self.getExamData(dict_student_score, dict_relation_point13, dict_exercise_name) # exam self.getPracticeData(dict_relation_point13, dict_question_topic, dict_question_base_info) # practice def recommendMonday(self, is_new=0, throld=500): dict_question_topic = self.recommend.getThisQuestionTopic( ) # question topic d1 = datetime.datetime.now() d3 = d1 + datetime.timedelta(days=-14) str_monday = d3.strftime("%Y-%m-%d 00:00:00") self.db_fetcher.commit_sql_cmd( "delete from entity_recommend_question_bytopic", 'mysql_white_list') # update self.importDefault(0, dict_question_topic) # import is_first, insert_score = 1, 0 new_sql = "insert into sync_student_recommend_question(system_id, resource_type, resource_id, subject_id, tag1, tag2, score, type, type_level, type_id) values" update_sql = "insert into entity_recommend_question_bytopic(system_id, type,chapter_id, topic_id, question_id, `master`, duration, important, subject_id, score, school_publish, org_id, org_type) values" dict_student_cnt = {} records = self.exam_list_records #+ self.practice_list_records for item in records: question, student_id, ret, answer, submit_time, question_type = item if submit_time > str_monday: if question in self.dict_realtion_quesion: link_question_id, link_point_id, question_num, difficulty, question_exercise_id, question_type = self.dict_realtion_quesion[ question] answer_key = '%s_%s' % (question, student_id) if answer_key in self.dict_content_answer: answer = self.dict_content_answer[answer_key] if answer == 'A' or answer == 'B': ret = 1 else: ret = 2 if ret != 1: if student_id not in dict_student_cnt: dict_student_cnt[student_id] = 0 if dict_student_cnt[student_id] < throld: # print '%s\t%s' % (student_id, link_question_id) if is_first == 1: update_sql += "(%s, 2, 0, 0, %s, 2, 2, 1, %s, %s, 0, 113, 2)" % ( student_id, link_question_id, 0, insert_score) if is_new == 1: new_sql += "(%s, 1, %s, %s, \'最近新错\', \'重难点\', %s, %s, %s, %s)" % ( student_id, link_question_id, 0, insert_score, 1, 3, 2186) is_first = 0 else: update_sql += ",(%s, 2, 0, 0, %s, 2, 2, 1, %s, %s, 0, 113, 2)" % ( student_id, link_question_id, 0, insert_score) if is_new == 1: new_sql += "(%s, 1, %s, %s, \'最近新错\', \'重难点\', %s, %s, %s, %s)" % ( student_id, link_question_id, 0, insert_score, 1, 3, 2186) dict_student_cnt[student_id] += 1 insert_score += 1 if is_new == 1: self.db_fetcher.commit_sql_cmd(new_sql, 'mysql_v3_white_list') else: self.db_fetcher.commit_sql_cmd(update_sql, 'mysql_white_list')
is_first = 1 for u in ul: if is_first > 0: # parse lis = u.find_all('li') for li in lis: if li.h3 is not None: second_text = li.h3.text thirds = li.ul.find_all('li') for th in thirds: attribute_url = '%s/%s' % (jquery_host, th.a.attrs['href']) attribute = th.a.text # attribute print attribute retVal, str_summarize, str_parameter, str_example = parse_jquery_attribute( attribute_url) cnt += 1 insert_sql += "(\'%s\', \'%s\', '%s', '%s', '%s', '%s', '%s')" % \ (attribute, first_text, second_text, retVal, str_summarize, str_parameter, str_example) if cnt % 10 == 0: res = db_fetcher.commit_sql_cmd( insert_sql, "mysql_duyi_core") insert_sql = "insert into crawl_jquery(attribute, first_class, second_class, ret_value, summarize, parameter, example) values" else: insert_sql += ',' is_first = -1 else: break insert_sql = insert_sql[:len(insert_sql) - 1] # process last ',' db_fetcher.commit_sql_cmd(insert_sql, "mysql_duyi_core")
if __name__ == "__main__": # update news_report company now_time = datetime.datetime.now() yes_time = now_time + datetime.timedelta(days=-1) yes_date = yes_time.strftime('%Y-%m-%d') db_data = db_fetcher.get_sql_result( 'select id, article_json, publish from kr_articles where publish = \'%s\'' % yes_date, 'mysql_insight') for pos in range(len(db_data)): kid = str(db_data[pos][0]) try: article_json = str(db_data[pos][1]) article_obj = json.loads(article_json, strict=False) url = 'http://36kr.com/p/%s.html' % kid title = article_obj['title'] source = "36Kr" publish_date = '%s 00:00:00' % str(db_data[pos][2]) domain = '36kr.com' content_html = article_obj['content'].encode( 'utf-8' ) if 'content' in article_obj and article_obj['content'] else '' content = re.sub('<[^>]+>', '', content_html) insert_sql = "insert into news_report(url, publish_date, title, content, source, domain, aid) values(\'%s\', \'%s\', \'%s\', \'%s\', \'%s\',\'%s\',%s)" % ( url, publish_date, title, content, source, domain, kid) db_fetcher.commit_sql_cmd(insert_sql, 'mysql_insight') except: print 'the error article id :%s' % kid
import sys,os reload(sys) sys.setdefaultencoding('utf-8') sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '../../'))) import json,math,MySQLdb,datetime,time import similary_question as sq from recommend import RecommendQuestion from common.db_fetcher import DataBaseFetcher recommend = RecommendQuestion() # object db_fetcher = DataBaseFetcher() question_list = [] # set delete_sql = "DELETE FROM error_question_log WHERE question_id IN ( select c.question_id from (SELECT a.question_id FROM error_question_log a JOIN neworiental_v3.entity_question b ON a.question_id = b.id WHERE b.subject_id != 4) c)" db_fetcher.commit_sql_cmd(delete_sql, 'mysql_logdata') # update rows = db_fetcher.get_sql_result("select question_id from error_question_log", 'mysql_logdata') for row in rows: qid = row[0] question_list.append(qid) def getQuestionWords(question_list): # question word dict_question_info = {} lSet = int(len(question_list) / 1000) + 2 start, end = 0, 0 for i in range(1, lSet): end = i * 1000 if end > len(question_list): end = len(question_list) str_question = ','.join([str(x) for x in question_list[start:end] ]) str_sql = "select c.id, c.subject_id, t.type_id,t.struct_id, c.json_data, c.difficulty, c.question_type from neworiental_v3.entity_question c left \
if __name__ == "__main__": # update news_report company now_time = datetime.datetime.now() yes_time = now_time + datetime.timedelta(days=-1) yes_date = yes_time.strftime('%Y-%m-%d') condition_sql = "publish_date >= \'%s 00:00:00\' and publish_date <= '%s 23:59:59' and type = 0" % ( yes_date, yes_date) db_data = db_fetcher.get_sql_result( 'select id, title, content from news_report where %s' % condition_sql, 'mysql_insight') for pos in range(len(db_data)): kid, title, content = str(db_data[pos][0]), str(db_data[pos][1]), str( db_data[pos][2]) try: topic_coms = comlib.extract_report( title, content.encode('utf8', 'ignore'), '-1') reported_coms = ",".join([x[0] for x in topic_coms]) com_id, is_confident = comlib.extract_report( title, content.encode('utf8', 'ignore')) reported_com = 0 if com_id and com_id != "0" and is_confident: reported_com = int(com_id) event_type = get_event_type(title) update_sql = "update news_report set reported_com= %s, reported_coms = \'%s\',event_type = %s where id = %s" % ( reported_com, reported_coms, event_type, kid) db_fetcher.commit_sql_cmd(update_sql, "mysql_insight") except: print 'happend error title:%s' % title