예제 #1
0
파일: user_factor.py 프로젝트: z-fork/mmml
def calc_user_factor(conn_func):
    records = []
    for parent_kind in CONF_MAP.keys():
        record = get_record_factor(conn_func, parent_kind)
        if record is None or record.empty:
            continue
        records.append(record)
    records = pd.concat(records)

    def _calc_bayes(df):
        ufac = {}
        for idx, row in df.iterrows():
            uid, status, score = row
            correct = status - 1  # 1/2 -> 0/1, 做错/做对
            tags = [x.split(':') for x in score.split('|')]
            tags = [(int(tag), int(d)) for tag, d in tags]
            for tag, d in tags:
                if tag not in ufac:
                    ufac[tag] = population_pri.copy()
                px = np.array(priori[:, d-1]).T
                ufac[tag] = ufac[tag] * (correct * px + (1 - correct) * (1 - px))
        u_score = []
        for tag, fac in ufac.iteritems():
            u_score.append(str(tag) + ':' + str(fac.argmax() + 1))
        return "|".join(u_score)

    user_factor = records.groupby(['uid']).apply(_calc_bayes).reset_index()
    if len(user_factor) == 0:
        user_factor = pd.DataFrame()
    user_factor.columns = ['uid', 'score']
    return user_factor
예제 #2
0
def calc_user_factor(conn_func):
    records = []
    for parent_kind in CONF_MAP.keys():
        record = get_record_factor(conn_func, parent_kind)
        if record is None or record.empty:
            continue
        records.append(record)
    records = pd.concat(records)

    def _calc_bayes(df):
        ufac = {}
        for idx, row in df.iterrows():
            uid, status, score = row
            correct = status - 1  # 1/2 -> 0/1, 做错/做对
            tags = [x.split(':') for x in score.split('|')]
            tags = [(int(tag), int(d)) for tag, d in tags]
            for tag, d in tags:
                if tag not in ufac:
                    ufac[tag] = population_pri.copy()
                px = np.array(priori[:, d - 1]).T
                ufac[tag] = ufac[tag] * (correct * px + (1 - correct) *
                                         (1 - px))
        u_score = []
        for tag, fac in ufac.iteritems():
            u_score.append(str(tag) + ':' + str(fac.argmax() + 1))
        return "|".join(u_score)

    user_factor = records.groupby(['uid']).apply(_calc_bayes).reset_index()
    if len(user_factor) == 0:
        user_factor = pd.DataFrame()
    user_factor.columns = ['uid', 'score']
    return user_factor
예제 #3
0
def remove_unapproved_questions(conn_func, redis_db):
    fields = redis_db.hkeys(ENGLISH_QUESTION_FACTOR_KEY)
    keys_map = dict()
    for field in fields:
        qtype, qid = field.split(':')
        keys_map.setdefault(int(qtype), []).append(field)

    conn = conn_func()
    cursor = conn.cursor()
    count = 0
    for qtype, conf in CONF_MAP.iteritems():
        context_conf = conf['context_conf']
        table = context_conf['table']
        cid = context_conf['cid']
        sql = 'select %s from %s where question_type=%s and state=%s'
        cursor.execute(sql % (cid, table, qtype, K_APPROVED))
        pool = set([qid for qid, in cursor.fetchall()])
        keys = keys_map.get(qtype, [])
        for key in keys:
            qid = int(key.split(':')[1])
            if qid not in pool:
                redis_db.hdel(ENGLISH_QUESTION_FACTOR_KEY, key)
                count += 1
    conn.close()
    print 'remove unapproved english questions: %d' % count
예제 #4
0
def remove_unapproved_questions(conn_func, redis_db):
    fields = redis_db.hkeys(ENGLISH_QUESTION_FACTOR_KEY)
    keys_map = dict()
    for field in fields:
        qtype, qid = field.split(':')
        keys_map.setdefault(int(qtype), []).append(field)

    conn = conn_func()
    cursor = conn.cursor()
    count = 0
    for qtype, conf in CONF_MAP.iteritems():
        context_conf = conf['context_conf']
        table = context_conf['table']
        cid = context_conf['cid']
        sql = 'select %s from %s where question_type=%s and state=%s'
        cursor.execute(sql % (cid, table, qtype, K_APPROVED))
        pool = set([qid for qid, in cursor.fetchall()])
        keys = keys_map.get(qtype, [])
        for key in keys:
            qid = int(key.split(':')[1])
            if qid not in pool:
                redis_db.hdel(ENGLISH_QUESTION_FACTOR_KEY, key)
                count += 1
    conn.close()
    print 'remove unapproved english questions: %d' % count
예제 #5
0
def remove_unapproved_questions(conn_func, banker_conn_func, redis_db):
    conn = conn_func()
    cursor = conn.cursor()
    banker_conn = banker_conn_func()
    banker_cursor = banker_conn.cursor()
    count = 0
    for qtype, conf in CONF_MAP.iteritems():
        context_conf = conf['context_conf']
        table = context_conf['table']
        cid = context_conf['cid']
        sql = 'select %s from %s where question_type=%s and state=%s'
        banker_cursor.execute(sql % (cid, table, qtype, K_APPROVED))
        pool = set([qid for qid, in banker_cursor.fetchall()])
        keys = redis_db.keys(ENGLISH_QUESTION_FACTOR_KEY % (qtype, '*'))
        for key in keys:
            qid = int(key.split('/')[-1])
            if qid not in pool:
                redis_db.delete(ENGLISH_QUESTION_FACTOR_KEY % (qtype, qid))
                count += 1
    conn.close()
    banker_conn.close()
    print 'remove unapproved questions: %d' % count
예제 #6
0
def remove_unapproved_questions(conn_func, banker_conn_func, redis_db):
    conn = conn_func()
    cursor = conn.cursor()
    banker_conn = banker_conn_func()
    banker_cursor = banker_conn.cursor()
    count = 0
    for qtype, conf in CONF_MAP.iteritems():
        context_conf = conf['context_conf']
        table = context_conf['table']
        cid = context_conf['cid']
        sql = 'select %s from %s where question_type=%s and state=%s'
        banker_cursor.execute(sql % (cid, table, qtype, K_APPROVED))
        pool = set([qid for qid, in banker_cursor.fetchall()])
        keys = redis_db.keys(ENGLISH_QUESTION_FACTOR_KEY % (qtype, '*'))
        for key in keys:
            qid = int(key.split('/')[-1])
            if qid not in pool:
                redis_db.delete(ENGLISH_QUESTION_FACTOR_KEY % (qtype, qid))
                count += 1
    conn.close()
    banker_conn.close()
    print 'remove unapproved questions: %d' % count