Пример #1
0
 def __init__(self):
     super().__init__()
     self.queue_flow = mm.get_queue(cm.config['queue_flow']['name'])
     self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
     self.mongo_derivate = Mongo(**cm.config['mongo_jd_cl'])
     self.rule_config_list = self.get_rule_list()
     self.group_tag_config_list = self.get_group_tag_list()
 def __init__(self):
     super(JDXMLWithdrawModelServer, self).__init__()
     self.queue_flow = message_manager.get_queue(
         cm.config['queue_flow']['name'])
     self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
     self.mongo_der = Mongo(**cm.config['mongo_jd_cl'])
     self.model_config_list = self.get_model_list()
     self.group_tag_threshold = self.get_group_tag_threshold()
Пример #3
0
 def __init__(self):
     super(JDXMLCreditServer, self).__init__()
     self.queue_flow = message_manager.get_queue(
         cm.config['queue_flow']['name'])
     self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
     self.mongo_der = Mongo(**cm.config['mongo_jd_cl'])
     self.model_config_list = self.get_model_list()
     self.target = 'jd'
Пример #4
0
 def __init__(self):
     super(JDXModifyQuota, self).__init__()
     self.queue_flow = message_manager.get_queue(
         cm.config['queue_flow']['name'])
     self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
     self.mongo_derivable = Mongo(**cm.config['mongo_jd_cl'])
     self.quota_modify_strategy_list = self.get_quota_modify_strategy_list(
     )  # 获取全部额度变更策略配置
Пример #5
0
def fetch_data_from_db(start_time, end_time):
    db = Mongo(**cm.config['mongo_config'])
    Collection = db.get_collection(cm.config['collection'])
    start_time = pd.to_datetime(start_time)
    end_time = pd.to_datetime(end_time)
    df = pd.DataFrame(
        list(
            Collection.find(
                {'create_time': {
                    '$gte': start_time,
                    '$lt': end_time
                }}, {'X_SZR_EntryDate': 0})))
    bad_cols = []
    for col in df.columns:
        if 'X_JD_ActiveCard' in col:
            bad_cols.append(col)
    df = df.drop(bad_cols, axis=1)
    df.loc[:, '_id'] = df['_id'].str.lower()
    df = df.set_index('_id', drop=False)
    return df
Пример #6
0
    def __init__(self):
        # db client
        self.mysql_mlx_client = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_mlx_client = Mongo(**cm.config['mongo_jd_cl'])

        # db access
        self.ml_result_access = jdx_data.MachineLearningJdResults(
            client=self.mysql_mlx_client)
        self.original_data_access = jdx_data.DerivativeProdData(
            client=self.mongo_mlx_client)
        self.threshold_access = jdx_data.PrThreshold(
            client=self.mysql_mlx_client)
Пример #7
0
    def __init__(self, strategy, *args, **argw):
        self.name = strategy.get('name')
        self.model_path = strategy.get('model_path')
        self.match = strategy.get('match')
        self.params = strategy.get('params')
        self.output = strategy.get('output')
        self.prob = strategy.get('prob')
        self.is_setup = strategy.get('is_setup')
        self.priority = strategy.get('priority')

        self.mysql_mlx_client = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_mlx_client = Mongo(**cm.config['mongo_jd_cl'])
        self.original_data_access = jdx_data.DerivativeProdData(
            client=self.mongo_mlx_client)
        self.strategy_result_access = jdx_data.CreditAmountStragetyResult(
            client=self.mysql_mlx_client)
        self._trans_params()
class JDXMLWithdrawModelServer(message_server.MessageServer):
    def __init__(self):
        super(JDXMLWithdrawModelServer, self).__init__()
        self.queue_flow = message_manager.get_queue(
            cm.config['queue_flow']['name'])
        self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_der = Mongo(**cm.config['mongo_jd_cl'])
        self.model_config_list = self.get_model_list()
        self.group_tag_threshold = self.get_group_tag_threshold()

    def handle_msg(self, msg_dict):
        if 'app_id' not in msg_dict:
            logging.warning('No key in msg called appId!')

        app_id = str(msg_dict['app_id']).lower()
        user_id = msg_dict['user_id']
        category_id = msg_dict['category_id']
        group_tag = msg_dict['group_tag']

        model = self.choose_model(category_id, 'jd')
        feature_data = self.get_der_data(app_id)
        creditLine = float(feature_data.get(
            'X_JD_CreditLine')) if feature_data.get('X_JD_CreditLine') else -1
        principal = float(feature_data.get(
            'X_JD_Principal')) if feature_data.get('X_JD_Principal') else -1

        # 是否是预审核
        is_preAudit = 1 if feature_data.get(
            'X_WorkFlow') == 'buyApplicationPreAudit' or feature_data.get(
                'X_isPreAudit') == 'preAudit' else 0

        model_score = self.run_model(model, feature_data)
        op_score = self.get_op_score(model_score, model['op_coef'],
                                     model['op_intercept'])
        model_pass = 0 if model_score < model['threshold'] else 1

        # 根据op_score判别控制组
        if group_tag == 'B' and model_pass == 0 and op_score >= self.group_tag_threshold:
            model_pass = 1

        self.save_model_result(app_id, user_id, category_id, creditLine,
                               principal, model_score, model['threshold'],
                               model['model_name'], is_preAudit)
        self.save_op_socre_result(app_id, user_id, 'jd', 'withdraw',
                                  category_id, model['model_name'],
                                  model_score, op_score)

        msg = {
            "app_id": app_id,
            "job_name": msg_dict['job_name'],
            "model_pass": model_pass,
            "op_score": op_score
        }
        self.queue_flow.send_message(msg)

    def choose_model(self, category_id, target):
        models = []
        for model in self.model_config_list:
            if model['category_id'] == category_id and model[
                    'target'] == target:
                models.append(model)
        rand = np.random.random()
        model_prod = 0
        for model in models:
            model_prod += model['prob']
            if rand < model_prod:
                return model
        return None

    def get_model_list(self, table='jdx_category_model_relations'):
        sql = """select category_id,model_name,target,estor_path,threshold,op_coef,op_intercept,prob from {} where is_setup=1;""".format(
            table)
        model_list = self.mysql_ml.query(sql)
        return model_list

    def get_group_tag_threshold(self, table='jdx_proba_tags'):
        sql = """select threshold from {} where flow_type='w' and is_setup=1;""".format(
            table)
        group_tag_threshold = self.mysql_ml.query_one(sql)
        return float(
            group_tag_threshold['threshold']) if group_tag_threshold else None

    def run_model(self, chosed_model, feature_data):
        """跑模型,获得模型分数"""
        model_path = chosed_model['estor_path']

        with open(model_path, 'rb') as f:
            pk_obj = pickle.load(f)
            model = pk_obj['model']

        agg_attr = []
        fields = model.get_params()['enum'].clean_col_names
        for attr in fields:
            agg_attr.append(feature_data.get(attr))
        # use pandas to format data, the dimension should be 1 x len(fields)
        result_df = pd.DataFrame(agg_attr, index=fields).T
        assert result_df.ndim == 2, 'result_df should be two dimension'
        # transform data and make prediction
        pred_probas = model.predict_proba(result_df)
        model_score = pred_probas[:, 1][0]
        return float(model_score)

    def get_der_data(self, app_id, collection='derivables'):
        """获取特征数据"""
        result = {}
        try:
            result = self.mongo_der.get_collection(collection).find({
                '_id':
                app_id.upper()
            }).next()
            result = {key: value for key, value in result.items()}
        except Exception:
            pass
        return result

    def get_op_score(self, model_score, op_coef, op_intercept):
        if not (op_coef and op_intercept):
            return 0
        model_ln_odds = math.log(model_score / (1 - model_score)) / math.log(2)
        ln_real_odds = op_coef * model_ln_odds + op_intercept
        op_score = max(min(60 * ln_real_odds + 300, 1000), 0)
        return op_score

    def save_model_result(self,
                          app_id,
                          user_id,
                          category_id,
                          creditLine,
                          principal,
                          result,
                          threshold,
                          model_name,
                          is_preAudit,
                          table='jdx_withdraw_model_results'):
        sql = "insert into {} " \
              "(app_id, user_id, category_id, creditLine, principal, result, threshold, model, is_preAudit) values " \
              "('{}', '{}', '{}', {}, {}, {}, {}, '{}',{})".format(table, app_id, user_id, category_id, creditLine,
                                                                   principal, result, threshold, model_name,
                                                                   is_preAudit)
        return self.mysql_ml.update(sql) == 1

    def save_op_socre_result(self,
                             app_id,
                             user_id,
                             target,
                             server,
                             category_id,
                             model,
                             model_score,
                             op_score,
                             table='jdx_op_score_results'):
        sql = "insert into {} " \
              "(app_id, user_id, target, server, category_id, model, model_score, op_score) " \
              "values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {})".format(table, app_id, user_id, target,
                                                                           server, category_id, model, model_score,
                                                                           op_score)
        return self.mysql_ml.update(sql) == 1
Пример #9
0
 def __init__(self):
     super().__init__()
     self.queue_flow = mm.get_queue(cm.config['queue_flow']['name'])
     self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
     self.mongo_derivate = Mongo(**cm.config['mongo_jd_cl'])
     self.category_list = self.get_category_configs()
Пример #10
0
class JdxCategoryService(message_server.MessageServer):
    def __init__(self):
        super().__init__()
        self.queue_flow = mm.get_queue(cm.config['queue_flow']['name'])
        self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_derivate = Mongo(**cm.config['mongo_jd_cl'])
        self.category_list = self.get_category_configs()

    def handle_msg(self, msg_dict):
        app_id = msg_dict['app_id']
        user_id = msg_dict['user_id']
        derivate_data = self.get_derivate_data(app_id)
        channel = derivate_data.get('X_Origin_source')
        workflow, flow_type = self.get_workflow(derivate_data)
        category_id = self.get_category_id(derivate_data, workflow)
        msg_out = {
            "app_id": app_id,
            "job_name": msg_dict['job_name'],
            "category_id": category_id,
        }
        self.queue_flow.send_message(msg_out)

        self.save_category_results(app_id, user_id, category_id, channel,
                                   flow_type)

    def get_category_configs(self, table='jdx_category_relations'):
        sql = """SELECT category_id,relation from {} WHERE is_setup=1;""".format(
            table)
        category_list = self.mysql_ml.query(sql)
        return category_list

    def get_derivate_data(self, app_id, collection='derivables'):
        der_dict = self.mongo_derivate.get_collection(collection).find_one(
            {'_id': app_id.upper()})
        return der_dict

    def save_category_results(self,
                              app_id,
                              user_id,
                              category_id,
                              channel,
                              flow_type,
                              table='jdx_category_results'):
        sql = """INSERT INTO {} (app_id,user_id,category_id,channel,flow_type) VALUES('{}','{}','{}','{}','{}');""".format(
            table, app_id, user_id, category_id, channel, flow_type)
        self.mysql_ml.update(sql)

    def get_category_id(self, derivate_data, workflow):
        aeval = Interpreter()
        category_id = None
        default_category_id = None
        for category in self.category_list:
            if category['relation'] == workflow + '_default':
                default_category_id = category['category_id']
            try:
                relation_dict = json.loads(category['relation'])
                flag = []
                for k, v in relation_dict.items():
                    aeval.symtable['VALUE'] = derivate_data.get(k)
                    flag.append(aeval(v))
                if all(flag):
                    category_id = category['category_id']
                    break
            except:
                pass
        if not category_id:
            category_id = default_category_id
        return category_id

    def get_workflow(self, derivate_data):
        x_workflow = derivate_data['X_WorkFlow_flag']
        mx_data = derivate_data.get('X_MX_RawReport')
        if x_workflow == 'card':
            workflow = 'opencard'
            flow_type = 'c'
        elif x_workflow == 'reloan' and mx_data:
            workflow = 'reloanwithdraw_mx'
            flow_type = 'w'
        elif x_workflow == 'reloan' and not mx_data:
            workflow = 'reloanwithdraw'
            flow_type = 'w'
        elif x_workflow == 'quota':
            workflow = 'quota'
            flow_type = 'q'
        elif x_workflow == 'firstloan':
            workflow = 'firstwithdraw'
            flow_type = 'f'
        return workflow, flow_type
Пример #11
0
class JDXMLCreditServer(message_server.MessageServer):
    def __init__(self):
        super(JDXMLCreditServer, self).__init__()
        self.queue_flow = message_manager.get_queue(
            cm.config['queue_flow']['name'])
        self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_der = Mongo(**cm.config['mongo_jd_cl'])
        self.model_config_list = self.get_model_list()
        self.target = 'jd'

    def handle_msg(self, msg_dict):
        if 'app_id' not in msg_dict:
            logging.warning('No key in msg called appId!')

        app_id = str(msg_dict['app_id']).lower()
        user_id = msg_dict['user_id']
        category_id = msg_dict['category_id']

        model = self.choose_model(category_id, self.target)
        model_score = self.run_model(app_id, model)
        op_score = self.get_op_score(model_score, model['op_coef'],
                                     model['op_intercept'])

        self.save_model_result(app_id, user_id, -1, -1, {500: model_score}, '',
                               1, -1, model['model_name'], category_id)
        self.save_op_socre_result(app_id, user_id, self.target, 'open_card',
                                  category_id, model['model_name'],
                                  model_score, op_score)

        msg = {
            "app_id": app_id,
            "job_name": msg_dict['job_name'],
            "model_name": model['model_name'],
            "op_score": op_score,
            "model_score": model_score
        }
        self.queue_flow.send_message(msg)

    def choose_model(self, category_id, target):
        models = []
        for model in self.model_config_list:
            if model['category_id'] == category_id and model[
                    'target'] == target:
                models.append(model)
        rand = np.random.random()
        model_prod = 0
        for model in models:
            model_prod += model['prob']
            if rand < model_prod:
                return model
        return None

    def get_model_list(self, table='jdx_category_model_relations'):
        sql = """select category_id,model_name,target,estor_path,op_coef,op_intercept,prob from {} where is_setup=1;""".format(
            table)
        model_list = self.mysql_ml.query(sql)
        return model_list

    def run_model(self, app_id, chosed_model):
        """跑模型,获得模型分数"""
        model_path = chosed_model['estor_path']
        feature_data = self.get_der_data(app_id)

        with open(model_path, 'rb') as f:
            pk_obj = pickle.load(f)
            model = pk_obj['model']

        agg_attr = []
        fields = model.get_params()['enum'].clean_col_names
        for attr in fields:
            agg_attr.append(feature_data.get(attr))
        # use pandas to format data, the dimension should be 1 x len(fields)
        result_df = pd.DataFrame(agg_attr, index=fields).T
        assert result_df.ndim == 2, 'result_df should be two dimension'
        # transform data and make prediction
        pred_probas = model.predict_proba(result_df)
        model_score = pred_probas[:, 1][0]
        return float(model_score)

    def get_der_data(self, app_id, collection='derivables'):
        """获取特征数据"""
        result = {}
        try:
            result = self.mongo_der.get_collection(collection).find({
                '_id':
                app_id.upper()
            }).next()
            result = {key: value for key, value in result.items()}
        except Exception:
            pass
        return result

    def get_op_score(self, model_score, op_coef, op_intercept):
        if not (op_coef and op_intercept):
            return 0
        model_ln_odds = math.log(model_score / (1 - model_score)) / math.log(2)
        ln_real_odds = op_coef * model_ln_odds + op_intercept
        op_score = max(min(60 * ln_real_odds + 300, 1000), 0)
        return op_score

    def save_model_result(self,
                          app_id,
                          user_id,
                          credit_ml,
                          credit_final,
                          result,
                          threshold,
                          judge_by,
                          credit_by,
                          model_name,
                          category_id,
                          table='machine_learning_jdcl_results'):
        sql = "insert into {} " \
              "(app_id, user_id, credit_ml, credit_final, result, threshold, judge_by, credit_by, model, category_id) " \
              "values ('{}', '{}', {}, {}, '{}', '{}', {}, {}, '{}', '{}')".format(table, app_id, user_id,
                                                                                   credit_ml,
                                                                                   credit_final, json.dumps(result),
                                                                                   threshold, judge_by,
                                                                                   credit_by, model_name, category_id)
        return self.mysql_ml.update(sql) == 1

    def save_op_socre_result(self,
                             app_id,
                             user_id,
                             target,
                             server,
                             category_id,
                             model,
                             model_score,
                             op_score,
                             table='jdx_op_score_results'):
        sql = "insert into {} " \
              "(app_id, user_id, target, server, category_id, model, model_score, op_score) " \
              "values ('{}', '{}', '{}', '{}', '{}', '{}', {}, {})".format(table, app_id, user_id, target,
                                                                           server, category_id, model, model_score,
                                                                           op_score)
        return self.mysql_ml.update(sql) == 1
Пример #12
0
class JDXModifyQuota(message_server.MessageServer):
    def __init__(self):
        super(JDXModifyQuota, self).__init__()
        self.queue_flow = message_manager.get_queue(
            cm.config['queue_flow']['name'])
        self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_derivable = Mongo(**cm.config['mongo_jd_cl'])
        self.quota_modify_strategy_list = self.get_quota_modify_strategy_list(
        )  # 获取全部额度变更策略配置

    def handle_msg(self, msg_dict):
        app_id = msg_dict['app_id']
        category_id = msg_dict['category_id']
        model_score = msg_dict['model_score']  # 预测的模型分
        model_name = msg_dict['model_name']  # 预测的模型名
        data = self.get_user_data_by_app_id(app_id)
        user_id = data.get('X_UserId')
        try:
            original_quota = int(float(data.get('X_JD_CreditLine')))  # 用户当前额度
            original_principal = int(float(
                data.get('X_pre_app_principal')))  # 用户最后一次的提现金额
            quota_modify_count = int(
                data.get('X_credit_adjustment_times'))  # 提额次数
            # 根据模型分和当前额度选择一个额度变更策略
            quota_modify_strategy = self.get_quota_modify_strategy(
                category_id, model_score, original_quota)
            # 根据额度变更策略确定额度的变化标签
            modify_tag = self.get_modify_tag(quota_modify_strategy)

            # 最终额度
            if modify_tag >= 0:
                final_quota = round(original_quota +
                                    original_principal * modify_tag)
            else:
                final_quota = round(original_quota * (1 + modify_tag))

            # 用户年龄小于22时,额度打折
            try:
                if int(data['X_IdCardAge']) < 22:
                    final_quota = round(final_quota * 0.5)
            except:
                pass

            # 额度变化值
            quota_variance = final_quota - original_quota

            logging.info("app_id:{}, final_credit:{}".format(
                app_id, final_quota))
            self.queue_flow.send_message({
                "app_id": app_id,
                "job_name": msg_dict['job_name'],
                "final_quota": str(final_quota)
            })
            # 保存额度变更结果
            self.save_quota_modify_results(
                app_id, user_id, category_id,
                quota_modify_strategy['modify_name'], model_score, model_name,
                original_quota, original_principal,
                quota_modify_strategy['score_segment'],
                quota_modify_strategy['quota_segment'],
                quota_modify_strategy['level'], quota_variance, final_quota,
                quota_modify_count)
        except:
            logging.warning(traceback.format_exc())
            final_quota = int(float(data.get('X_JD_CreditLine')))
            quota_variance = 0
            logging.info("app_id:{}, final_credit:{}".format(
                app_id, final_quota))
            self.queue_flow.send_message({
                "app_id": app_id,
                "job_name": msg_dict['job_name'],
                "final_quota": str(final_quota)
            })
            # 保存额度变更结果
            self.save_quota_modify_results(
                app_id, user_id, category_id, '', model_score, model_name,
                data.get('X_JD_CreditLine'), -1, '', '', 0, quota_variance,
                final_quota, data.get('X_credit_adjustment_times'))

    def get_quota_modify_strategy(self, category_id, model_score,
                                  original_quota):
        for quota_modify_strategy in self.quota_modify_strategy_list:
            score_segment = json.loads(quota_modify_strategy['score_segment'])
            quota_segment = json.loads(quota_modify_strategy['quota_segment'])
            score_in = model_score in Interval(score_segment[0],
                                               score_segment[1],
                                               upper_closed=False)
            quota_in = original_quota in Interval(quota_segment[0],
                                                  quota_segment[1],
                                                  upper_closed=False)
            if quota_modify_strategy[
                    'category_id'] == category_id and score_in and quota_in:
                return quota_modify_strategy

    def get_modify_tag(self, quota_modify_strategy):
        action_space = json.loads(quota_modify_strategy['action_space'])
        action_prob = json.loads(quota_modify_strategy['action_prob'])
        rand = np.random.random()
        modify_prob = 0
        for i, prob in enumerate(action_prob):
            modify_prob += prob
            if rand < modify_prob:
                modify_tag = action_space[i]
                return modify_tag

    def save_quota_modify_results(self,
                                  app_id,
                                  user_id,
                                  category_id,
                                  modify_name,
                                  model_score,
                                  model_name,
                                  original_quota,
                                  original_principal,
                                  score_segment,
                                  quota_segment,
                                  level,
                                  quota_variance,
                                  final_quota,
                                  quota_modify_count,
                                  table='jdx_quota_modify_results'):
        sql = "insert into {} (app_id,user_id,category_id,modify_name,model_score,model_name,original_quota," \
              "original_principal,score_segment,quota_segment,level,quota_variance,final_quota,quota_modify_count) " \
              "values ('{}', '{}', '{}', '{}', {}, '{}',{},{}, '{}','{}',{},{},{},{})". \
            format(table, app_id, user_id, category_id, modify_name, model_score, model_name, original_quota,
                   original_principal,score_segment, quota_segment,level,quota_variance, final_quota, quota_modify_count)
        self.mysql_ml.update(sql)

    def get_quota_modify_strategy_list(self, table='jdx_quota_modify_configs'):
        sql = "select modify_name,category_id,score_segment,quota_segment,level,action_space,action_prob " \
              "from {} where is_setup=1;" \
            .format(table)
        quota_modify_strategy_list = self.mysql_ml.query(sql)
        return quota_modify_strategy_list

    def get_user_data_by_app_id(self, app_id, collection='derivables'):
        key_lists = [
            'X_UserId', 'X_JD_CreditLine', 'X_pre_app_principal',
            'X_credit_adjustment_times', 'X_IdCardAge'
        ]
        fields = {x: 1 for x in key_lists}
        data = self.mongo_derivable.get_collection(collection).find_one(
            {'_id': app_id.upper()}, fields)
        return data
class JDXMLQuotaServer(message_server.MessageServer):
    def __init__(self):
        super(JDXMLQuotaServer, self).__init__()
        self.queue_flow = message_manager.get_queue(
            cm.config['queue_flow']['name'])
        self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_derivable = Mongo(**cm.config['mongo_jd_cl'])
        self.model_config_list = self.get_model_list()
        self.target = 'jd'

    def handle_msg(self, msg_dict):
        if 'app_id' not in msg_dict:
            logging.warning('No key in msg called appId!')
        app_id = str(msg_dict['app_id']).lower()
        category_id = msg_dict['category_id']

        model = self.choose_model(category_id, self.target)
        model_score = self.run_model(app_id, model)

        msg = {
            "app_id": app_id,
            "job_name": msg_dict['job_name'],
            "model_name": model['model_name'],
            "model_score": model_score
        }
        self.queue_flow.send_message(msg)

    def choose_model(self, category_id, target):
        models = []
        for model in self.model_config_list:
            if model['category_id'] == category_id and model[
                    'target'] == target:
                models.append(model)
        rand = np.random.random()
        model_prod = 0
        for model in models:
            model_prod += model['prob']
            if rand < model_prod:
                return model
        return None

    def get_model_list(self, table='jdx_category_model_relations'):
        sql = """select category_id,model_name,target,estor_path,op_coef,op_intercept,prob from {} where is_setup=1;""".format(
            table)
        model_list = self.mysql_ml.query(sql)
        return model_list

    def run_model(self, app_id, chosed_model):
        """跑模型,获得模型分数"""
        model_path = chosed_model['estor_path']
        feature_data = self.get_der_data(app_id)

        with open(model_path, 'rb') as f:
            pk_obj = pickle.load(f)
            model = pk_obj['model']

        agg_attr = []
        fields = model.get_params()['enum'].clean_col_names
        for attr in fields:
            agg_attr.append(feature_data.get(attr))
        # use pandas to format data, the dimension should be 1 x len(fields)
        result_df = pd.DataFrame(agg_attr, index=fields).T
        assert result_df.ndim == 2, 'result_df should be two dimension'
        # transform data and make prediction
        pred_probas = model.predict_proba(result_df)
        model_score = pred_probas[:, 1][0]
        return float(model_score)

    def get_der_data(self, app_id, collection='derivables'):
        """获取特征数据"""
        result = {}
        try:
            result = self.mongo_derivable.get_collection(collection).find({
                '_id':
                app_id.upper()
            }).next()
            result = {key: value for key, value in result.items()}
        except Exception:
            pass
        return result
Пример #14
0
class JdxRuler(MessageServer):
    def __init__(self):
        super().__init__()
        self.queue_flow = mm.get_queue(cm.config['queue_flow']['name'])
        self.mysql_ml = MySql(**cm.config['mysql_jd_cl'])
        self.mongo_derivate = Mongo(**cm.config['mongo_jd_cl'])
        self.rule_config_list = self.get_rule_list()
        self.group_tag_config_list = self.get_group_tag_list()

    def handle_msg(self, msg_dict):
        app_id = str(msg_dict['app_id'])
        user_id = msg_dict.get('user_id', '')
        category_id = msg_dict.get('category_id', '')
        der_data = self.get_der_data(app_id)
        group_tag, flow_type = self.get_group_tag_and_flow_type(der_data)  # 控制组标签
        rule_check_result = self.get_rule_check_result(category_id, flow_type, group_tag, der_data)
        hit_rule_str = "!".join(rule_check_result['hit_rule_code'])
        msg = {
            "app_id": app_id,
            "job_name": msg_dict['job_name'],
            "group_tag": group_tag,
            "rule_pass": rule_check_result['result'],
            "hit_rule_str": hit_rule_str
        }
        self.queue_flow.send_message(msg)
        saved_data = {
            'app_id': app_id,
            'user_id': user_id,
            'category_id': category_id,
            'hit_rule_code': rule_check_result['hit_rule_code'],
            'actual_hit_rule_code': rule_check_result['actual_hit_rule_code'],
            'result': rule_check_result['result'],
            'actual_result': rule_check_result['actual_result'],
            'group_tag': group_tag,
            'flow_type': flow_type
        }
        self.save_rule_result(saved_data)

    def get_rule_list(self, table='jd_ml_rules'):
        """获取所有规则配置信息"""
        sql = """select category_id,rule_code,pass_prob,rule_params,flow_type,group_tag from {} where is_setup=1;""".format(
            table)
        rule_list = self.mysql_ml.query(sql)
        return rule_list

    def get_der_data(self, app_id, collection='derivables'):
        """获取特征数据"""
        result = {}
        try:
            result = self.mongo_derivate.get_collection(collection).find({'_id': app_id.upper()}).next()
            result = {key: value for key, value in result.items()}
        except Exception:
            pass
        return result

    def save_rule_result(self, saved_data, table='jd_ml_rule_check_result'):
        """保存规则检查结果"""
        app_id = saved_data.get('app_id')
        user_id = saved_data.get('user_id', '')
        category_id = saved_data.get('category_id')
        hit_rule_code = json.dumps(saved_data.get('hit_rule_code'))
        actual_hit_rule_code = json.dumps(saved_data.get('actual_hit_rule_code'))
        result = saved_data.get('result')
        actual_result = saved_data.get('actual_result')
        group_tag = saved_data.get('group_tag')
        flow_type = saved_data.get('flow_type')

        sql = "insert into {} (app_id,user_id,category_id,hit_rule_code,actual_hit_rule_code,result,actual_result," \
              "group_tag,rule_type) values ('{}','{}','{}','{}','{}',{},{},'{}','{}')".format(table, app_id, user_id,
                                                                                              category_id,
                                                                                              hit_rule_code,
                                                                                              actual_hit_rule_code,
                                                                                              result, actual_result,
                                                                                              group_tag, flow_type)
        self.mysql_ml.update(sql)

    def get_rule_check_result(self, category_id, flow_type, group_tag, der_data):
        """得到规则检测结果"""
        if category_id not in [rule['category_id'] for rule in self.rule_config_list]:
            if flow_type == 'c':
                category_id = 'default_opencard'
            elif flow_type == 'f':
                category_id = 'default_firstloan'
            elif flow_type == 'w':
                category_id = 'default_reloan'

        rule_list = []
        for rule_config in self.rule_config_list:
            if rule_config['category_id'] == category_id and rule_config['flow_type'] == flow_type and rule_config[
                'group_tag'] == group_tag:
                rule_list.append(rule_config)

        hit_rule_code = []
        actual_hit_rule_code = []
        for rule in rule_list:
            rule_code = rule['rule_code']
            pass_prob = int(rule['pass_prob'])
            rule_params = rule['rule_params']
            rule_params = json.loads(rule_params)

            check_list = []
            for rule_dict in rule_params['con']:
                try:
                    f_value = der_data.get(rule_dict['field'])
                    s_value = rule_dict.get('val')
                    op = rule_dict.get('op')
                    flag = self.__condition_exec(f_value, s_value, op)
                except:
                    flag = False
                check_list.append(flag)

            check_result = False
            if rule_params['log_op'] == 'and':
                check_result = all(check_list)
            if rule_params['log_op'] == 'or':
                check_result = any(check_list)

            if check_result:
                hit_rule_code.append(rule_code)
                actual_hit_rule_code.append(rule_code)

                prob = np.random.random()
                if prob < pass_prob:
                    hit_rule_code.remove(rule_code)
        result = 0 if hit_rule_code else 1
        actual_result = 0 if actual_hit_rule_code else 1
        rule_check_result = {
            'hit_rule_code': hit_rule_code,
            'actual_hit_rule_code': actual_hit_rule_code,
            'result': result,
            'actual_result': actual_result,
        }
        return rule_check_result

    def get_group_tag_list(self, table='jdx_proba_tags'):
        """获取控制组"""
        sql = """select tag,proba,flow_type from {} where is_setup=1;""".format(table)
        group_tag_list = self.mysql_ml.query(sql)
        return group_tag_list

    def get_group_tag_and_flow_type(self,der_data):
        X_WorkFlow_flag = der_data.get('X_WorkFlow_flag')
        if X_WorkFlow_flag == 'card':
            flow_type = 'c'
        elif X_WorkFlow_flag == 'firstloan':
            flow_type = 'f'
        elif X_WorkFlow_flag == 'reloan':
            flow_type = 'w'

        for tag_dict in self.group_tag_config_list:
            if tag_dict['flow_type'] == flow_type:
                tag_config = tag_dict
        prob = np.random.random()
        # B表示为控制组
        if prob < float(tag_config['proba']):
            group_tag = 'B'
        else:
            group_tag = 'A'
        return group_tag, flow_type

    def __condition_exec(self, f_value, s_value, op):
        result = False
        if op == '==':
            result = f_value == s_value
        elif op == '>':
            result = float(f_value) > float(s_value)
        elif op == '>=':
            result = float(f_value) >= float(s_value)
        elif op == '<':
            result = float(f_value) < float(s_value)
        elif op == '<=':
            result = float(f_value) <= float(s_value)
        elif op == '!=':
            result = f_value != s_value
        elif op == 'in':
            result = f_value in s_value
        elif op == 'nin':
            result = f_value not in s_value
        return result