def dump_question_factor(self, QTYPE, qinfo, solution, analysis_info, knowledge_tree): res = [] df = pd.merge(qinfo, solution[solution.qtype == QTYPE]) df = pd.merge(df, analysis_info[analysis_info.qtype == QTYPE]) df_group = df.groupby(['qtype', 'qid']) for name1, group1 in df_group: qtype, qid = name1 difficulty = group1.difficulty.iloc[0] question_analysis = [] for name2, group2 in group1.groupby(['sub_qid', 'solution_id']): sub_qid, solution_id = name2 S = group2.sort(columns='analysis_id')[['ktag', 'mtag']] analysis_step = [] f = lambda x: None if pd.isnull(x) else set( map(int, x.split('|'))) for _, row in S.iterrows(): ktag, mtag = row analysis_step.append((f(ktag), f(mtag))) question_analysis.append(analysis_step) qfactor = SciQuestionFactor(difficulty) qfactor.add_factor(question_analysis) factor_value = qfactor.dump_factor() factor_key = self.QUESTION_FACTOR_KEY % (qtype, qid) flush_redis(self.redis_db, factor_key, factor_value) indices = self.get_question_knowledge_index( qfactor, knowledge_tree) res.append((qtype, qid, indices)) return res
def dump_question_factor(self, QTYPE, qinfo, solution, analysis_info, knowledge_tree): res = [] df = pd.merge(qinfo, solution[solution.qtype == QTYPE]) df = pd.merge(df, analysis_info[analysis_info.qtype == QTYPE]) df_group = df.groupby(['qtype', 'qid']) for name1, group1 in df_group: qtype, qid = name1 difficulty = group1.difficulty.iloc[0] question_analysis = [] for name2, group2 in group1.groupby(['sub_qid', 'solution_id']): sub_qid, solution_id = name2 S = group2.sort(columns='analysis_id')[['ktag', 'mtag']] analysis_step = [] f = lambda x: None if pd.isnull(x) else set(map(int, x.split('|'))) for _, row in S.iterrows(): ktag, mtag = row analysis_step.append((f(ktag), f(mtag))) question_analysis.append(analysis_step) qfactor = SciQuestionFactor(difficulty) qfactor.add_factor(question_analysis) factor_value = qfactor.dump_factor() factor_key = self.QUESTION_FACTOR_KEY % (qtype, qid) flush_redis(self.redis_db, factor_key, factor_value) indices = self.get_question_knowledge_index(qfactor, knowledge_tree) res.append((qtype, qid, indices)) return res
def dump_question_inv_index(conn_func, redis_db, qinfo): valid_question_set = set() for k, _ in qinfo.iteritems(): qtype, qid, _ = k valid_question_set.add((qtype, qid)) origin = get_origin_info(conn_func) keys = redis_db.hkeys(CET_QUESTION_FEATURE_KEY) res = dict() removed_questions = set() for key in keys: qtype, qid, _ = map(int, key.split(':')) # 可能会因为各种莫名原因导致 redis 中残存有脏数据 # 用本次计算的 qinfo 过滤一遍保证都是已入库题目 if (qtype, qid) not in valid_question_set: removed_questions.add((qtype, qid)) continue exam_type = origin.get(qtype, {}).get(qid, EXAM_TYPE_DEFAULT) if exam_type == EXAM_TYPE_DEFAULT: continue res.setdefault(exam_type, {}).setdefault(qtype, []).append(qid) for exam_type, v in res.iteritems(): for qtype, qids in v.iteritems(): key = CET_QUESTION_INV_INDEX_KEY % (exam_type, qtype) flush_redis(redis_db, key, qids) print 'remove %s questions' % len(removed_questions)
def dump_question_index(redis_db, question_index): idx = {} for exam_kind, qtype, qid in question_index: idx.setdefault((exam_kind, qtype), []).append(qid) for (exam_kind, qtype), qids in idx.iteritems(): key = ENGLISH_QUESTION_INV_INDEX % (exam_kind, qtype) flush_redis(redis_db, key, qids)
def dump_question_factor(self, SUB_QTYPE, qinfo, knowledge_info, knowledge_tree): res = [] df = pd.merge(qinfo, knowledge_info[knowledge_info.sub_type == SUB_QTYPE]) df_group = df.groupby(['qtype', 'qid']) for name, group in df_group: qtype, qid = name difficulty = group.difficulty.iloc[0] S = group[['ktag', 'rktag']] ktag, rktag = set(), set() _f = lambda x: None if pd.isnull(x) else set(map(int, x.split('|'))) for _, row in S.iterrows(): ktag_set, rktag_set = map(_f, row) if ktag_set: ktag.update(ktag_set) if rktag_set: rktag.update(rktag_set) question_info = (ktag, rktag, difficulty) qfactor = ArtQuestionFactor() qfactor.add_factor(question_info) factor_value = qfactor.dump_factor() factor_key = self.QUESTION_FACTOR_KEY % (qtype, qid) flush_redis(self.redis_db, factor_key, factor_value) indices = self.get_question_knowledge_index(qfactor, knowledge_tree) res.append((qtype, qid, indices)) return res
def dump_user_factor(self): user_set = self.get_user_set() for uid in user_set: key = self.USER_FACTOR_KEY % uid ufac = convert_user_factor(self.redis_db, key) if ufac is None: continue adaptive_key = self.ADAPTIVE_USER_FACTOR_KEY % uid flush_redis(self.redis_db, adaptive_key, ufac)
def dump_concept_graph(self): k_str = json.dumps(self.knowledge_mat) m_str = json.dumps(self.method_mat) k_m_str = json.dumps(self.knowledge_method_mat) key = self.CONCEPT_GRAPH value = [k_str, m_str, k_m_str] flush_redis(self.redis_db, key, value) key = self.CONCEPT_GRAPH_TRANSPOSE value = map(transpose_matrix, value) flush_redis(self.redis_db, key, value)
def dump_card_inv_index(self): """计算 concept_graph 中元素(顶点/边)到 card 的索引""" keys = self.get_all_card_factor_keys() res = {} for key in keys: card_type, card_id = key.split("/")[-2:] item_id = "%s:%s" % (card_type, card_id) k_mat = load_card_factor(self.redis_db, key) append_item_id(item_id, k_mat, res, "k", "k") for k1, v in res.iteritems(): for k2, item_ids in v.iteritems(): key = self.CONCEPT_CARD_INDEX % (k1, k2) flush_redis(self.redis_db, key, item_ids)
def dump_card_inv_index(self): '''计算 concept_graph 中元素(顶点/边)到 card 的索引''' keys = self.get_all_card_factor_keys() res = {} for key in keys: card_type, card_id = key.split('/')[-2:] item_id = '%s:%s' % (card_type, card_id) k_mat = load_card_factor(self.redis_db, key) append_item_id(item_id, k_mat, res, 'k', 'k') for k1, v in res.iteritems(): for k2, item_ids in v.iteritems(): key = self.CONCEPT_CARD_INDEX % (k1, k2) flush_redis(self.redis_db, key, item_ids)
def dump_question_inv_index(self): """计算 concept_graph 中元素(顶点/边)到 question 的索引""" keys = self.get_all_question_factor_keys() res = {} for key in keys: qtype, qid = key.split("/")[-2:] item_id = "%s:%s" % (qtype, qid) qfactor = load_question_factor(self.redis_db, key) k_mat, m_mat, k_m_mat, diff = qfactor append_item_id(item_id, k_mat, res, "k", "k") append_item_id(item_id, m_mat, res, "m", "m") append_item_id(item_id, k_m_mat, res, "k", "m") for k1, v in res.iteritems(): for k2, item_ids in v.iteritems(): key = self.CONCEPT_QUESTION_INDEX % (k1, k2) flush_redis(self.redis_db, key, item_ids)
def dump_question_inv_index(self): '''计算 concept_graph 中元素(顶点/边)到 question 的索引''' keys = self.get_all_question_factor_keys() res = {} for key in keys: qtype, qid = key.split('/')[-2:] item_id = '%s:%s' % (qtype, qid) qfactor = load_question_factor(self.redis_db, key) k_mat, m_mat, k_m_mat, diff = qfactor append_item_id(item_id, k_mat, res, 'k', 'k') append_item_id(item_id, m_mat, res, 'm', 'm') append_item_id(item_id, k_m_mat, res, 'k', 'm') for k1, v in res.iteritems(): for k2, item_ids in v.iteritems(): key = self.CONCEPT_QUESTION_INDEX % (k1, k2) flush_redis(self.redis_db, key, item_ids)
def dump_result(result, redis_db): user_fac, item_fac, w, w0, accuracy = result print 'accuracy: %.4f' % accuracy for uid, fac in user_fac.iteritems(): key = CET_MOCK_TEST_USER_FACTOR_KEY % uid flush_redis(redis_db, key, list(fac)) for qid, fac in item_fac.iteritems(): key = CET_MOCK_TEST_QUESTION_FACTOR_KEY % qid flush_redis(redis_db, key, list(fac)) flush_redis(redis_db, CET_MOCK_TEST_MODEL_WEIGHT_KEY, list(w)) redis_db.set(CET_MOCK_TEST_MODEL_BIAS_WEIGHT_KEY, w0)