示例#1
0
    def get_db_result(_id):
        user = User(session['user'])
        result = Mongo().result_by_id(_id)
        output_dir = result.output_dir
        review = result.review

        if output_dir:
            for case in result.results:
                try:
                    case_config = result.ref_problem[case.id]
                    if case_config:
                        case.attachments = case_config.get_attachments(
                            user_dir=Env.root.joinpath(output_dir))
                except AttributeError:
                    logger.exception('attachments')
                    pass
        if review:
            for line, comments in review.items():
                for i, c in enumerate(comments):
                    comments[i]['text'] = markdown.markdown(c['text'])

        mark_as_read = Mongo().mark_as_read(to=user.id,
                                            _id=_id,
                                            event='new-comment')
        logger.info('mark-as-read: {}', mark_as_read)

        return flask.json.dumps(result)
示例#2
0
    def perma_result(_id):
        user = User(session['user'])
        document = Mongo().result_by_id(_id)
        course = document.ref_course
        problem = document.ref_problem
        breadcrumbs = [
            Link.CoursesBtn(),
            Link.CourseBtn(course),
            Link.ProblemBtn(course, problem)
        ]

        return render_template_ext(
            'view_result.njk',
            user=user,
            notifications=Mongo().load_notifications(user.id),
            results=[document],
            result=None,
            requestReview=False,
            title='Problem %s' % problem.name,
            breadcrumbs=Breadcrumbs.new(*breadcrumbs),
            js=[
                '//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.6/highlight.min.js',
                '/static/js/lib/highlightjs-line-numbers.js'
            ],
            js_no_cache=['sockets.js', 'process.js'])
示例#3
0
 def __init__(self, config_file=None, product=None):
     self.config = self.set_config(config_file)
     self.mysql_risk = MySql(**self.config['mysql_risk'])
     self.mysql_risk_table = None
     self.mongo_derivable = Mongo(**self.config['mongo_derivable'])
     self.mongo_derivable_table = None
     self.except_handler = DingdingExceptionHandler(self.config['robots'])
     self.product = product
     self.ssh_config = self.config['model_file_remote_ssh']
示例#4
0
    def request_review():
        user = User(session['user'])
        data = request.json
        _id = data['_id']
        document = Mongo().result_by_id(_id)
        from_user = document.user or user.id

        # request_dt = document.review_request  # type: datetime
        # if request_dt:
        #     return flask.json.dumps(
        #         dict(result='warning', message=f'Request was already sent on {request_dt:%Y-%m-%d %H:%M:%S}')
        #     )

        # notify all teachers
        reviewers = list()
        for reviewer_obj in document.ref_course.teachers:
            if type(reviewer_obj) is dict:
                reviewer = str(reviewer_obj.get('id', reviewer_obj))
            else:
                reviewer = reviewer_obj

            event_document = {
                'from':
                from_user,
                'to':
                reviewer,
                'course':
                document.course,
                'problem':
                document.problem,
                'document':
                _id,
                'event':
                'codereview',
                'title':
                f'Code review requested by {from_user}',
                'description':
                f'Student {from_user} has requested code review for the problem {document.ref_problem.id}'
            }

            if Mongo().add_notification(event_document):
                logger.info(f'add-notification: {event_document}')
                reviewers.append(reviewer)
            else:
                logger.warning(
                    f'notification already exists: {event_document}')

        Mongo().update_fields(_id, review_request=datetime.datetime.now())

        if reviewers:
            return flask.json.dumps(dict(result='ok', reviewers=reviewers))
        else:
            return flask.json.dumps(
                dict(result='warning', message='Request was already sent'))
示例#5
0
    def __init__(self):
        """
            Inicializa variaveis que serao utilizadas no PreProcessamento.

            Exemplo:
                self.train: Registros da collection train
                self.test: Registros da collection test
                self.dict_map: Registro da collection dict_map
        """

        self.train = pd.DataFrame(Mongo().find("train"))
        self.test = pd.DataFrame(Mongo().find("test"))
        self.dict_map = Mongo().find("target_map")
示例#6
0
    def view_course(course_name, course_year):
        user = User(session['user'])
        course = Courses().find_one(name=course_name,
                                    year=course_year,
                                    only_active=False)
        problems: List[Problem] = sorted(list(
            course.problem_db.find(disabled=(None, False))),
                                         key=problem_cat_getter)

        languages = Languages.db().find(disabled=(None, False))

        if not user.is_admin():
            problems = [p for p in problems if p.is_visible()]

        cat_problems = OrderedDict()
        for cat, items in groupby(problems, key=problem_cat_getter):
            cat_problems[cat] = list(items)

        return render_template_ext(
            'view_course.njk',
            user=user,
            notifications=Mongo().load_notifications(user.id),
            course=course,
            languages=languages,
            has_categories=len(cat_problems) > 1,
            problems=problems,
            cat_problems=cat_problems,
            title=course.name,
            subtitle=course.year,
            breadcrumbs=Breadcrumbs.new(Link.CoursesBtn(), ),
            js_no_cache=['solution.js'])
示例#7
0
    def process_solution(course_name, course_year):
        user = User(session['user'])

        try:
            course = Courses().find_one(name=course_name,
                                        year=course_year,
                                        only_active=False)
            problem = course.problem_db[request.form['prob-id']]
            lang = Languages.db()[request.form['lang-id']]
            solution = request.form['src']
            use_docker = request.form.get('use-docker', 'off') == 'on'

            test_result = crates.TestResult(
                user=user.id,
                problem=problem.id,
                lang=lang.id,
                course=course.id,
                docker=use_docker,
                solution=solution,
                action='solve',
            )
            # save to the db and redirect with _id
            insert_result = Mongo().save_result(test_result.peek())

            return redirect(
                url_for('view_result',
                        course_name=course.name,
                        course_year=course.year,
                        problem_id=problem.id,
                        _id=str(insert_result.inserted_id)))

        except:
            logger.exception('Could not parse data')
示例#8
0
    def read_notifications():
        user = User(session['user'])
        data = request.json

        return flask.json.dumps(
            dict(notifications=Mongo().read_notifications(user.id,
                                                          n_id=data['_id'])))
示例#9
0
    def save_model(self, model):
        """
            Proprosito
            ----------
            Salvar o modelo treinado e fazer a insercao dos parametros no MongoDB

            Parametros
            ----------
            model: Modelo treinado

            Retorno
            ----------
            none
        """

        models_params = {
            "name": self.model_name,
            "X": self.X_columns,
            "y": self.y_columns,
            "params": model.get_params()
        }

        Mongo().insert_one("models", models_params)
        pickle.dump(self.model,
                    open(f"{os.environ['model_path']}{self.model_name}", "wb"))
示例#10
0
    def classify(self, model):
        """
            Proprosito
            ----------
            Faz a classificacao dos registros e salva o resultado no MongoDB

            Parametros
            ----------
            model: Modelo treinado

            Retorno
            ----------
            none
        """

        X = self.test[self.X_columns]
        y_pred = model.predict(X)

        pred = pd.DataFrame({'id': self.test['id'], 'type': y_pred})

        mapped_pred = PreProcessing().target_map(dataframe=pred, invert=True)

        result = {
            "model_name": self.model_name,
            "predict_results":
            list(json.loads(mapped_pred.T.to_json()).values())
        }

        Mongo().insert_one("results", result)
示例#11
0
    def view_courses():
        user = User(session['user'])

        courses = list(Courses().find(only_active=not user.is_admin()))

        return render_template_ext('view_courses.njk',
                                   title='Course list',
                                   user=user,
                                   notifications=Mongo().load_notifications(
                                       user.id),
                                   courses=courses)
示例#12
0
    def perma_result(_id):
        user = User(session['user'])
        document = Mongo().result_by_id(_id)
        course = document.ref_course
        problem = document.ref_problem
        breadcrumbs = [
            Link.CoursesBtn(),
            Link.CourseBtn(course),
            Link.ProblemBtn(course, problem)
        ]

        return render_template_ext(
            'results.njk',
            user=user,
            notifications=Mongo().read_notifications(user.id),
            results=[document],
            result=None,
            requestReview=False,
            title='Problem %s' % problem.name,
            breadcrumbs=Breadcrumbs.new(*breadcrumbs),
        )
示例#13
0
def _process_solution_by_id(_id, rerun=False):
    document = Mongo().result_by_id(_id)

    if document.result is None or rerun:
        _process_solution(
            User(dict(id=document.user)),
            document.action,
            not document.docker,
            document.problem,
            document.course,
            document.lang,
            document.solution,
            document._id
        )
示例#14
0
 def student_process_solution(data):
     print(data)
     user = User(session['user'])
     try:
         document = Mongo().result_by_id(data['_id'])
         _process_solution(
             user=user,
             action=document.action,
             skip_docker=not document.docker,
             problem_id=document.problem,
             course_id=document.course,
             lang_id=document.lang,
             src=document.solution,
             _id=data['_id'],
         )
     except:
         logger.exception('Error while processing solution')
示例#15
0
    def view_result(course_name, course_year, problem_id, _id=None):
        user = User(session['user'])

        if user.is_admin():
            return redirect(
                url_for('admin_problem',
                        course_name=course_name,
                        course_year=course_year,
                        problem_id=problem_id))

        course = Courses().find_one(name=course_name,
                                    year=course_year,
                                    only_active=False)
        problem = course.problem_db[problem_id]
        results = list()
        result = None
        breadcrumbs = [Link.CoursesBtn(), Link.CourseBtn(course)]

        # TODO check access
        if _id:
            document = Mongo().result_by_id(_id)
            if document:
                # add to previous solution if already executed
                if document.result:
                    results.append(document.peek())
                else:
                    result = document.peek()
                    breadcrumbs.append(Link.ProblemBtn(course, problem))

        if Env.use_database:
            for prev in Mongo().peek_last_n_results(20, user.id, course.id,
                                                    problem.id):
                # push only valid result
                if prev.get('result') and str(prev['_id']) != str(_id):
                    results.append(prev)

        if _id:
            for r in results:
                if str(r['_id']) == str(_id):
                    r['active'] = 'active'

        results = sorted(results, reverse=True, key=lambda x: x.get('attempt'))

        return render_template_ext(
            'results.njk',
            user=user,
            notifications=Mongo().read_notifications(user.id),
            results=results,
            result=result,
            requestReview=True,
            title='Problem %s' % problem.name,
            breadcrumbs=Breadcrumbs.new(*breadcrumbs),
        )
示例#16
0
    def view_course(course_name, course_year):
        user = User(session['user'])
        course = Courses().find_one(name=course_name,
                                    year=course_year,
                                    only_active=False)
        problems = list(course.problem_db.find(disabled=(None, False)))
        languages = Languages.db().find(disabled=(None, False))

        return render_template_ext(
            'submit.njk',
            user=user,
            notifications=Mongo().read_notifications(user.id),
            course=course,
            languages=languages,
            problems=problems,
            title=course.name,
            subtitle=course.year,
            breadcrumbs=Breadcrumbs.new(Link.CoursesBtn(), ),
        )
示例#17
0
    def clear_notification():
        data = request.json
        _id = data['_id']
        result = dict(result="ok", message="ok")
        try:
            delete_many_result = Mongo().events.delete_many(dict(document=_id))
            if delete_many_result.deleted_count > 0:
                result[
                    'message'] = f"Ok deleted {delete_many_result.deleted_count} notification related to this result"
                return result

            if delete_many_result.deleted_count == 0:
                result['result'] = "warning"
                result[
                    'message'] = f"No notification related to this result found"
                return result
        except:
            result['result'] = 'error'
            result['message'] = f"No notification related to this result found"
        return result
示例#18
0
    def admin_problem(course_name, course_year, problem_id):
        user = User(session['user'])
        course = Courses().find_one(name=course_name,
                                    year=course_year,
                                    only_active=False)
        problems_ids = ','.join([x.id for x in list(course.problem_db.find())])
        problem = course.problem_db[problem_id]
        languages = Languages.db().find(disabled=(None, False))

        return render_template_ext(
            'problem.njk',
            user=user,
            notifications=Mongo().read_notifications(user.id),
            course=course,
            languages=languages,
            problem=problem,
            problems_ids=problems_ids,
            title='Manage problem %s' % problem.name,
            breadcrumbs=Breadcrumbs.new(Link.CoursesBtn(),
                                        Link.CourseBtn(course)),
        )
示例#19
0
    def get_side_by_side_diff(doc_id, case_id):
        result = Mongo().result_by_id(doc_id)
        output_dir = result.output_dir

        if output_dir:
            try:
                case_config = result.ref_problem[case_id]
                if case_config:
                    attachments = case_config.get_path_to_output_files(
                        user_dir=Env.root.joinpath(output_dir))
                    from utils import comparison
                    result = comparison.line_by_line_diff(
                        Env.root / attachments.reference,
                        Env.root / attachments.generated)
                    return result.html
                else:
                    logger.error(f'Could not find case {case_id}')
                    return f'Could not find case {case_id}'
            except FileNotFoundError:
                logger.exception('Could not find files for comparison')
                return 'Could not find files'
            except:
                logger.exception('Error while comparing')
                return 'Error while comparison'
示例#20
0
def _process_solution(user,
                      action,
                      skip_docker,
                      problem_id,
                      course_id,
                      lang_id=None,
                      src=None,
                      _id=None):
    if not user.is_admin() and (skip_docker or action
                                in (ProcessRequestType.GENERATE_INPUT,
                                    ProcessRequestType.GENERATE_OUTPUT)):
        Emittor.error('Operation not permitted', [
            'You do not have sufficient privileges to perform action:',
            '    %s (skip docker: %s)' %
            (action, skip_docker), '', 'Please contact [email protected]',
            'if you want to gain the privileges.'
        ])
        return

    request = processing.request.ProcessRequest(
        user=user,
        lang=lang_id,
        problem=problem_id,
        course=course_id,
        src=src,
        type=action,
        docker=False if (skip_docker and user.is_admin()) else True,
    )

    if Env.use_database:
        Mongo().save_log(request.get_log_dict())

    # ignore problems which are past due
    if request.problem.time_left < 0:
        return

    Emittor.register_events(request)
    Emittor.queue_status(queue_status())
    queue.append(request)
    Emittor.queue_push(request)

    # put a barrier here so only certain amount fo users can process code at once
    # while other will see queue list
    with thread_lock:
        try:
            request.process()
        except ConfigurationException as e:
            if user.is_admin():
                logger.exception('[visible to admin only] invalid yaml config')
                Emittor.exception(e)
        except Exception as e:
            logger.exception('process error:')
            Emittor.exception(e)
        finally:
            output_dir, attempt = request.save_result()
            if Env.use_database:
                # replace document instead of creating new one
                Mongo().save_result(
                    request.get_result_dict(),
                    _id=_id,
                    output_dir=output_dir,
                    attempt=attempt,
                )
            request.destroy()

    queue.remove(request)
    Emittor.queue_pop(request)
示例#21
0
class BaseFeatureMonitor(object):
    def __init__(self, config_file=None, product=None):
        self.config = self.set_config(config_file)
        self.mysql_risk = MySql(**self.config['mysql_risk'])
        self.mysql_risk_table = None
        self.mongo_derivable = Mongo(**self.config['mongo_derivable'])
        self.mongo_derivable_table = None
        self.except_handler = DingdingExceptionHandler(self.config['robots'])
        self.product = product
        self.ssh_config = self.config['model_file_remote_ssh']

    def set_config(self, config_file):
        with open(config_file, 'r') as f:
            file = f.read()
        config = yaml.load(file)
        return config

    def get_model_path_from_mysql(self, table=None):
        pass

    def get_top_features(self, monitor_flag):
        model_path_list = self.get_model_path_from_mysql()

        final_features = []
        # 连接远程服务器
        ssh_client = paramiko.Transport(self.ssh_config['hostname'], self.ssh_config['port'])
        ssh_client.connect(username=self.ssh_config['username'], password=self.ssh_config['password'])
        sftp = paramiko.SFTPClient.from_transport(ssh_client)

        for model_dict in [model for model in model_path_list if model['monitor_flag'] == monitor_flag]:
            remote_model_path = model_dict['model_path']
            # 判断本地模型文件所在目录是否存在,没有就创建
            if not os.path.isdir(os.path.split(remote_model_path)[0]):
                os.makedirs(os.path.split(remote_model_path)[0])
                # 将远程文件下载到本地
                sftp.get(remote_model_path, remote_model_path)

            with open(remote_model_path, 'rb') as f:
                model_info = pickle.load(f)
            top_columns = []
            try:
                model = model_info['model']
                enum = model.get_params()['enum']
                mm = model.get_params()['clf']
                top_columns = []
                for i, v in enumerate(
                        sorted(zip(map(lambda x: round(x, 4), mm.feature_importances_), enum.clean_col_names),
                               reverse=True)):
                    if i <= 30:
                        top_columns.append(v[1])
            except Exception as e:
                logging.error(e)
            final_features.extend(top_columns)
        sftp.close()
        no_final_features = ['ALPHA_Behavior_submit_date', 'ALPHA_Behavior_submit_hour',
                             'ALPHA_Behavior_submit_weekday', 'X_DNA_Behavior_submit_date',
                             'X_DNA_Behavior_submit_hour', 'X_DNA_Behavior_submit_weekday']  # 这些不监控
        final_features = list(set(final_features) - set(no_final_features))
        logging.info('{}-top_features: {}'.format(self.product, final_features))
        return final_features

    def get_appid_from_mysql(self, start_time, diff_day, diff_hour):
        """获取所需要的11天的所有appid信息"""
        end_time = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') + timedelta(days=diff_day)).strftime(
            "%Y-%m-%d %H:%M:%S")
        start_hour = 0
        end_hour = start_hour + diff_hour
        sql = '''select upper(app_id) as app_id,flow_type,work_flag,date(create_time) as date
                        from {}
                        where create_time >= '{}'
                              and create_time < '{}' 
                              and hour(create_time) >= {}
                              and hour(create_time) <= {}
             '''.format(self.mysql_risk_table, start_time, end_time, start_hour, end_hour)
        res = self.mysql_risk.query(sql)
        return pd.DataFrame(res)

    def get_features(self, df_appid, top_feature):
        appids = list(set(df_appid['app_id'].tolist()))
        qry = {'_id': {'$in': appids}}
        qry1 = {feature: 1 for feature in top_feature}
        res = self.mongo_derivable.get_collection(self.mongo_derivable_table).find(qry, qry1, batch_size=500)
        res_list = list(res)
        return pd.DataFrame(res_list)

    def psi(self, df_feature_1, df_feature_2, feature, bin_num=10):
        df_feature_1['label'] = 0
        df_feature_2['label'] = 1
        df_feature = pd.concat([df_feature_1, df_feature_2])
        df_feature = df_feature.replace('null', np.nan)
        df_feature = df_feature.replace('NaN', np.nan)
        df_feature = df_feature.apply(pd.to_numeric, errors='ignore')
        enum = EnumMapper(maximum_enum_num=100)
        enum.fit(df_feature)
        df_feature = enum.transform(df_feature)
        if feature in df_feature.columns.tolist():
            df_psi = df_feature[[feature, 'label']].copy()
            if df_psi[feature].dtype not in ['int', 'float'] and df_psi[feature].unique().shape[0] > 20:
                # print("The unique number of feature is {}".format(df_psi[feature].unique().shape[0]))
                return None, 999
            else:
                if df_psi[feature].unique().shape[0] > 2:
                    df_psi['bins'] = pd.qcut(df_psi[feature], 10, precision=2, duplicates='drop')
                    nan_df = df_psi[df_psi[feature].map(lambda x: pd.isnull(x))].reset_index(drop=True)
                    if not nan_df.empty:
                        df_psi['bins'] = df_psi['bins'].cat.add_categories('-999')
                        df_psi['bins'] = df_psi['bins'].fillna('-999')
                else:
                    df_psi['bins'] = df_psi[feature].map(lambda x: -999 if pd.isnull(x) else x)
                group_df = df_psi.groupby(['bins', 'label']).size().unstack('label')
                group_df = group_df.fillna(0)
                group_df['b_rate'] = group_df[0] / group_df[0].sum()
                group_df['a_rate'] = group_df[1] / group_df[1].sum()
                e = 0.000000000001
                group_df['psi_part'] = group_df.apply(
                    lambda group_df: (group_df['a_rate'] - group_df['b_rate']) * math.log(
                        (group_df['a_rate'] + e) / (group_df['b_rate'] + e)), axis=1)

                return group_df, group_df.psi_part.sum()
        else:
            return None, 99

    def psi_classified(self, start_time, diff_day, diff_hour, timedetail):
        """psi分类监控"""
        ls_top_loss_rate = []  # 发送到钉钉的丢失率监控列表
        ls_top_psi = []  # 发送到钉钉的psi监控列表
        total_appids_df = self.get_appid_from_mysql(start_time, diff_day, diff_hour)  # 获取所需要的11天的所有appid信息
        total_appids_df.date = total_appids_df.date.map(lambda x: str(x))  # 将里面date字段的类型转换为str
        # 开卡初审
        top_features = self.get_top_features(monitor_flag='cp')
        cp_ls_top_loss_rate, cp_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='c',
                                                            work_flag='precheck')
        if cp_ls_top_loss_rate:
            ls_top_loss_rate.append('#######开卡初审#######')
            ls_top_loss_rate.extend(cp_ls_top_loss_rate)
        if cp_ls_top_psi:
            ls_top_psi.append('#######开卡初审#######')
            ls_top_psi.extend(cp_ls_top_psi)
        # 开卡复审
        top_features = self.get_top_features(monitor_flag='cf')
        cf_ls_top_loss_rate, cf_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='c',
                                                            work_flag='finalcheck')
        if cf_ls_top_loss_rate:
            ls_top_loss_rate.append('#######开卡复审#######')
            ls_top_loss_rate.extend(cf_ls_top_loss_rate)
        if cf_ls_top_psi:
            ls_top_psi.append('#######开卡复审#######')
            ls_top_psi.extend(cf_ls_top_psi)
        # 首贷提现初审
        top_features = self.get_top_features(monitor_flag='fp')
        fp_ls_top_loss_rate, fp_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='f',
                                                            work_flag='precheck')
        if fp_ls_top_loss_rate:
            ls_top_loss_rate.append('#######首贷提现初审#######')
            ls_top_loss_rate.extend(fp_ls_top_loss_rate)
        if fp_ls_top_psi:
            ls_top_psi.append('#######首贷提现初审#######')
            ls_top_psi.extend(fp_ls_top_psi)
        # 首贷提现复审
        top_features = self.get_top_features(monitor_flag='ff')
        ff_ls_top_loss_rate, ff_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='f',
                                                            work_flag='finalcheck')
        if ff_ls_top_loss_rate:
            ls_top_loss_rate.append('#######首贷提现复审#######')
            ls_top_loss_rate.extend(ff_ls_top_loss_rate)
        if ff_ls_top_psi:
            ls_top_psi.append('#######首贷提现复审#######')
            ls_top_psi.extend(ff_ls_top_psi)
        # 复贷初审
        top_features = self.get_top_features(monitor_flag='wp')
        wp_ls_top_loss_rate, wp_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='w',
                                                            work_flag='precheck')
        if wp_ls_top_loss_rate:
            ls_top_loss_rate.append('#######复贷初审#######')
            ls_top_loss_rate.extend(wp_ls_top_loss_rate)
        if wp_ls_top_psi:
            ls_top_psi.append('#######复贷初审#######')
            ls_top_psi.extend(wp_ls_top_psi)
        # 复贷复审
        top_features = self.get_top_features(monitor_flag='wf')
        wf_ls_top_loss_rate, wf_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='w',
                                                            work_flag='finalcheck')
        if wf_ls_top_loss_rate:
            ls_top_loss_rate.append('#######复贷复审#######')
            ls_top_loss_rate.extend(wf_ls_top_loss_rate)
        if wf_ls_top_psi:
            ls_top_psi.append('#######复贷复审#######')
            ls_top_psi.extend(wf_ls_top_psi)

        # 结清调额
        top_features = self.get_top_features(monitor_flag='q')
        q_ls_top_loss_rate, q_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='q',
                                                          work_flag='finalcheck')
        if q_ls_top_loss_rate:
            ls_top_loss_rate.append('#######结清调额#######')
            ls_top_loss_rate.extend(q_ls_top_loss_rate)
        if q_ls_top_psi:
            ls_top_psi.append('#######结清调额#######')
            ls_top_psi.extend(q_ls_top_psi)

        if ls_top_loss_rate:
            ls_top_loss_rate.insert(0, '*******{}-丢失率报警*******'.format(self.product))
            ls_top_loss_rate.insert(1, '时间:{}'.format(datetime.now().strftime('%Y-%m-%d ') + timedetail))
            self.except_handler.handle(msg=ls_top_loss_rate)
        if ls_top_psi:
            ls_top_psi.insert(0, '*******{}-psi报警*******'.format(self.product))
            ls_top_psi.insert(1, '时间:{}'.format(datetime.now().strftime('%Y-%m-%d ') + timedetail))
            self.except_handler.handle(msg=ls_top_psi)

    def psi_distr(self, start_time, total_appids_df, top_features, flow_type, work_flag):
        the_psi_date = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') + timedelta(days=10)).strftime(
            '%Y-%m-%d')  # 所监控的日期
        logging.info('所监控的日期为:{}'.format(the_psi_date))
        # 所监控日期前十天对应类型的appids
        df_appid1 = total_appids_df.query(
            "flow_type=='{}' and work_flag=='{}' and date!='{}'".format(flow_type, work_flag,
                                                                        the_psi_date)).reset_index(drop=True)
        df_appid1 = df_appid1.sample(min(10000, df_appid1.shape[0]))
        logging.info('flow_type:{} work_flag:{} 前十天全部app_id个数:{}'.format(flow_type, work_flag, len(df_appid1)))

        # 所监控日期对应类型的appids
        df_appid2 = total_appids_df.query(
            "flow_type=='{}' and work_flag=='{}' and date=='{}'".format(flow_type, work_flag,
                                                                        the_psi_date)).reset_index(drop=True)
        df_appid2 = df_appid2.sample(min(1000, df_appid2.shape[0]))
        logging.info('flow_type:{} work_flag:{} 所监控的app_id个数:{}'.format(flow_type, work_flag, len(df_appid2)))
        dict_report = {}
        ls_top_psi = []
        ls_top_loss_rate = []
        df_feature_all_1 = self.get_features(df_appid1, top_features)
        df_feature_all_2 = self.get_features(df_appid2, top_features)
        for feature in top_features:
            df_feature_1 = pd.DataFrame(df_feature_all_1, columns=[feature])
            df_feature_2 = pd.DataFrame(df_feature_all_2, columns=[feature])

            # 这里添加计算空值逻辑
            feature_precent = df_feature_2.iloc[:, 0].isna().tolist().count(True) / df_feature_2.shape[0]
            if feature_precent > 0.7:
                ls_top_loss_rate.append("{}--loss_rate:{}".format(feature, round(feature_precent, 3)))

            dict_report[feature] = self.psi(df_feature_1, df_feature_2, feature, bin_num=10)[1]
            if dict_report[feature] > 0.25:
                ls_top_psi.append("{}--psi:{}".format(feature, round(dict_report[feature], 3)))
        return ls_top_loss_rate, ls_top_psi

    # 前一天
    def job1(self):
        """比较昨天top特征的分布与昨天的前10天top特征的分布"""
        logging.info('{} start handle feature_monitor job1!'.format(self.product))
        start_time = (datetime.now() - timedelta(days=11)).strftime(
            '%Y-%m-%d') + ' 00:00:00'  # 获取所监控及其对比的前10天所有appid的开始时间
        diff_day = 11  # 获取从开始时间往后11天的数据
        diff_hour = 24  # 获取每天从0时到24时的数据
        self.psi_classified(start_time, diff_day, diff_hour, timedetail='上午')
        logging.info('{} end handle feature_monitor job1!'.format(self.product))

    # 当天
    def job2(self):
        """比较当天(0-15时)的top特征的分布与前10天(0-16时)top特征的分布"""
        logging.info('{} start handle feature_monitor job2!'.format(self.product))
        start_time = (datetime.now() - timedelta(days=10)).strftime(
            '%Y-%m-%d') + ' 00:00:00'  # 获取所监控及其对比的前10天所有appid的开始时间
        diff_day = 11  # 获取从开始时间往后11天的数据
        diff_hour = 15  # 获取每天从0时到15时的数据
        self.psi_classified(start_time, diff_day, diff_hour, timedetail='下午')
        logging.info('{} end handle feature_monitor job2!'.format(self.product))
示例#22
0
from flask_socketio import emit
from loguru import logger

import processing.request
from database.mongo import Mongo
from database.objects import User
from exceptions import ConfigurationException
from processing import ProcessRequestType
from www import socketio
from www.emittor import Emittor

namespace = None
queue = list()
thread_lock_max = 10
thread_lock = Semaphore(value=thread_lock_max)
mongo = Mongo()


def get_datetime(value=None):
    # return 'aaa'
    return (value if value else dt.datetime.now()).strftime('%y%m%d_%H%M%S')


def queue_status():
    return dict(items=queue, maximum=thread_lock_max, current=len(queue))


def broadcast_queue_status():
    # print('queue-status', queue_status())
    emit('queue-status', dict(status=200, queue=queue_status()))
示例#23
0
    def add_comment():
        data = request.json
        user = User(session['user'])
        # course = Courses()[data['course']]
        # problem = course.problem_db[data['problem']]
        # attempt = data['attempt']

        _id = data['_id']
        document = Mongo().result_by_id(_id)
        review = document.review or dict()
        now = time.time()

        from_user = user.id
        author_user = document.user

        for comment in data['comments']:
            line, text = str(comment['line']), comment['comment']
            review_line = review[line] if line in review else list()
            review_line.append(dict(
                user=user.id,
                time=now,
                text=text,
            ))
            review[line] = review_line
        recipients = {from_user, author_user}
        for cmts in review.values():
            for cmt in cmts:
                recipients.add(cmt['user'])

        for recipient in recipients:
            if recipient == from_user:
                logger.info('Not creating notification for self')
            else:
                event_document = {
                    'from':
                    from_user,
                    'to':
                    recipient,
                    'course':
                    document.course,
                    'problem':
                    document.problem,
                    'document':
                    _id,
                    'event':
                    'new-comment',
                    'title':
                    f'New comment from {from_user}',
                    'description':
                    f'{document.ref_problem.id} User {from_user} commented your code in problem '
                }

                if Mongo().add_notification(event_document):
                    logger.info('add-notification: {}', event_document)
                else:
                    logger.warning('notification already exists: {}',
                                   event_document)

        mark_as_read = Mongo().mark_as_read(_id=_id,
                                            event='codereview',
                                            to=None)
        logger.info('mark-as-read: {}', mark_as_read)

        update_one = Mongo().update_fields(_id, review=review)
        logger.info('document-updated: {}', update_one)

        return flask.json.dumps(dict(result='ok'))
示例#24
0
    def stats():
        data = request.json
        filters = {}

        def add_filter(n, v=None, l=None):
            r = data
            if n.find('.') != -1:
                r, n = data['filters'], n.split('.')[1]

            if r.get(n, None):
                val = l(r.get(n)) if l else r.get(n)
                if val is not SKIP:
                    filters[v or n] = val

        def dummy_object_id(period):
            if period == 'day':
                gen_time = datetime.datetime.today() - datetime.timedelta(
                    days=1)
            elif period == 'week':
                gen_time = datetime.datetime.today() - datetime.timedelta(
                    days=7)
            elif period == 'two weeks':
                gen_time = datetime.datetime.today() - datetime.timedelta(
                    days=14)
            elif period == 'month':
                gen_time = datetime.datetime.today() - datetime.timedelta(
                    days=31)
            else:
                gen_time = datetime.datetime.today() - datetime.timedelta(
                    days=365 * 5)
            return ObjectId.from_datetime(gen_time)

        # {'course': 'TST-2019', 'problem': 'problem-1', 'filters':
        #   {'daterange': 'week', 'status': 'all', 'limit-per-user': '******', 'has-review-flag': 'no', 'search': 'a'}}
        limit_per_user = data['filters']['limit-per-user']
        if limit_per_user == 'all':
            limit_per_user = 1000
        else:
            limit_per_user = int(limit_per_user)

        has_review_flag = data['filters']['has-review-flag']
        if has_review_flag == 'yes':
            filters['review_request'] = {'$ne': None}
        if has_review_flag == 'no':
            filters['review_request'] = {'$exists': False}

        sort_by_inner = data['filters']['sort-by-inner']
        sort_by_outer = data['filters']['sort-by-outer']
        search = str(data['filters']['search']).strip()

        if search:
            filters['user'] = {'$regex': f".*{search}.*"}

        add_filter('course')
        add_filter('filters.problem', 'problem', skip_if_all)
        # add_filter('filters.course', 'course', skip_if_all)

        add_filter('filters.status', 'result.status', skip_if_all)
        add_filter('filters.daterange', '_id',
                   lambda x: {'$gte': dummy_object_id(x)})
        base_properties = {x: 1 for x in Mongo().base_properties}

        pipeline = [
            {
                '$match': filters
            },
            {
                '$project': {
                    'review': 1,
                    **base_properties
                }
            },
            {
                '$sort': {
                    sort_by_inner: -1
                }
            },
            {
                '$group': {
                    '_id': '$user',
                    'results': {
                        '$push': '$$ROOT'
                    }  # $$ROOT
                }
            },
        ]
        # print(pipeline, limit_per_user)
        items = list(Mongo().data.aggregate(pipeline))
        try:
            course = Courses()[data['course']]
        except:
            course = None

        if course:
            for key in data['filters'].keys():
                if key.startswith('tag-'):
                    tag = key[4:]
                    value = data['filters'][key]
                    if value == 'all':
                        continue

                    items = [
                        x for x in items
                        if course.student_has_tag(x['_id'], tag, value)
                    ]

        # tags = .get('tag-group', None)

        def add_fields(x):
            x['firstname'] = str(x['_id']).split('.')[0]
            x['lastname'] = str(x['_id']).split('.')[-1]
            return x

        items = map(add_fields, items)
        items = sorted(items, key=lambda x: x[sort_by_outer])

        result = list()
        for item in items:
            item_copy = deepcopy(item)
            item_copy['results'] = item_copy['results'][0:limit_per_user]
            for attempt in item_copy['results']:
                attempt['time'] = datetime.datetime.timestamp(
                    attempt['_id'].generation_time)
            # item_copy['results'] = sorted(item_copy['results'], key=lambda x: x['time'], reverse=True)

            if 'results' in item_copy:
                item_copy['results'] = [
                    r for r in item_copy['results'] if 'result' in r
                ]
                result.append(item_copy)

        return flask.json.dumps(result)
示例#25
0
 def load_notifications():
     user = User(session['user'])
     return flask.json.dumps(
         dict(notifications=Mongo().load_notifications(user.id).peek(), ))
示例#26
0
def get_linkedin_profile():
    path = settings.DATA_PATH + "\\linkedin\\"
    mongo = Mongo()
    col = mongo.db['person_profiles']
    index = 0
    res = col.find(skip=index)
    id_map = codecs.open(settings.DATA_PATH + "\\idmap" + str(index) + ".txt",
                         'w',
                         encoding="utf-8")
    for item in res:
        id_map.write(str(index) + ' ' + item['_id'] + '\n')
        index += 1
        out = codecs.open(path +
                          item['_id'].strip().replace('"', ' ').split('?')[0],
                          'w',
                          encoding="utf-8")
        print str(index)
        try:
            print item['_id'] + '\n'
        except Exception, e:
            print e
        if item.has_key('interests'):
            out.write(item['interests'] + '\n')
        else:
            print '[DEBUG]No Interests'
        if item.has_key('education'):
            for e in item['education']:
                out.write(e['name'] + '\n')
                if e.has_key('desc'):
                    out.write(e['desc'] + '\n')
        else:
            print '[DEBUG]No Education'
        if item.has_key('group'):
            if item['group'].has_key('member'):
                out.write(item['group']['member'] + '\n')
            if item['group'].has_key('affilition'):
                for a in item['group']['affilition']:
                    out.write(a + '\n')
        else:
            print '[DEBUG]No Group'
        out.write(item['name']['family_name'] + ' ' +
                  item['name']['given_name'])
        if item.has_key('overview_html'):
            soup = BeautifulSoup(item['overview_html'])
            out.write(' '.join(list(soup.strings)) + '\n')
        else:
            print '[DEBUG]No Overview'
        if item.has_key('locality'):
            out.write(item['locality'] + '\n')
        else:
            print '[DEBUG]No Locality'
        if item.has_key('skills'):
            for s in item['skills']:
                out.write(s + '\n')
        else:
            print "[DEBUG]No Skills"
        if item.has_key('industry'):
            out.write(item['industry'] + '\n')
        else:
            print "[DEBUG]No Industry"
        if item.has_key('experience'):
            for e in item['experience']:
                if e.has_key('org'):
                    out.write(e['org'] + '\n')
                if e.has_key('title'):
                    out.write(e['title'] + '\n')
        else:
            print "[DEBUG]No Experience"
        if item.has_key('summary'):
            out.write(item['summary'] + '\n')
        else:
            print "[DEBUG]No Summary"
        out.write('url')
        if item.has_key('specilities'):
            out.write(item['specilities'] + '\n')
        else:
            print "[DEBUG]No Specilities"
        if item.has_key('homepage'):
            for k in item['homepage'].keys():
                for h in item['homepage'][k]:
                    out.write(h + '\n')
        else:
            print "[DEBUG]No Homepage"
        if item.has_key('honors'):
            for h in item['honors']:
                out.write(h + '\n')
        else:
            print "[DEBUG]No Honors"
        out.close()
示例#27
0
def convert_db(from_db, to_db):
    from database.mongo import Mongo
    from plucky import plucks
    from processing.statuses import Status

    mongo = Mongo()

    def rename(document, old_name, new_name):
        if old_name in document:
            document[new_name] = document[old_name]
            del document[old_name]
        return document

    def delete(document, old_name):
        if old_name in document:
            del document[old_name]
        return document

    def compute_score(statuses):
        return dict(
            score=sum(plucks(statuses, 'score')),
            scores=[
                len([s.score for s in statuses if s.code == 100]),
                len([s.score for s in statuses if s.code == 101]),
                len([s.score for s in statuses if s.code in (200, 201)]),
            ])

    processed_ids = [
        str(x['_id'])
        for x in mongo.db.get_collection(to_db).find({}, {'_id': 1})
    ]
    items = mongo.db.get_collection(from_db).find()
    updated = list()

    for item in items:
        # skip already processed items
        if str(item['_id']) in processed_ids:
            continue

        rename(item, 'language', 'lang')
        rename(item, 'tests', 'results')
        delete(item, 'datetime')
        result = item.get('result', {})

        if 'attempt' not in item:
            item['attempt'] = int('{:%Y%H%M%S}'.format(
                item['_id'].generation_time))

        if item.get('action') == 'solve' and 'score' not in result:
            results = item.get('results', [])
            for r in results:
                if 'score' not in r and 'status' in r:
                    r.update(compute_score([Status[r.get('status')]]))

            statuses = list(map(Status.get, plucks(results, 'status')))
            result.update(compute_score(statuses))

        if 'id' in result:
            if str(result['id']).upper() in ('FINAL RESULT', 'EVALUATION'):
                result['id'] = 'Result'
        updated.append(item)

    if updated:
        ack = mongo.db.get_collection(to_db).insert_many(updated)

        print(ack)
        print(ack.acknowledged)
        print(len(ack.inserted_ids))
示例#28
0
    def student_submit_solution(data):
        print(data)
        user = User(session['user'])

        try:
            type = str(data['type'])
            action = ProcessRequestType(type)
            skip_docker = not data.get('docker', True)
        except:
            Emittor.error('Unsupported action', [
                'Given action is not supported:',
                '    %s' % data['type'], '', 'Please contact [email protected]',
                'if you think this is a mistake.'
            ])
            return

        if not user.is_admin() and (skip_docker or action
                                    in (ProcessRequestType.GENERATE_INPUT,
                                        ProcessRequestType.GENERATE_OUTPUT)):
            Emittor.error('Operation not permitted', [
                'You do not have sufficient privileges to perform action:',
                '    %s (skip docker: %s)' %
                (action, skip_docker), '', 'Please contact [email protected]',
                'if you want to gain the privileges.'
            ])
            return

        request = processing.request.ProcessRequest(
            user=user,
            lang=data['lang'],
            problem=data['prob'],
            course=data['course'],
            src=data['src'],
            type=action,
            docker=False if (skip_docker and user.is_admin()) else True,
        )

        if Env.use_database:
            Mongo().save_log(request.get_log_dict())

        # ignore problems which are past due
        if request.problem.time_left < 0:
            return

        Emittor.register_events(request)
        Emittor.queue_status(queue_status())

        time.sleep(0.1)
        queue.append(request)
        Emittor.queue_push(request)

        time.sleep(0.1)

        # put a barrier here so only certain amount fo users can process code at once
        # while other will see queue list
        with thread_lock:
            try:
                request.process()
            except ConfigurationException as e:
                if user.is_admin():
                    logger.exception(
                        '[visible to admin only] invalid yaml config')
                    Emittor.exception(e)
            except Exception as e:
                logger.exception('process error:')
                Emittor.exception(e)
            finally:
                output_dir, attempt = request.save_result()
                if Env.use_database:
                    Mongo().save_result(
                        request.get_result_dict(),
                        output_dir=output_dir,
                        attempt=attempt,
                    )
                request.destroy()

        queue.remove(request)
        Emittor.queue_pop(request)
示例#29
0
    def view_result(course_name, course_year, problem_id, _id=None):
        user = User(session['user'])

        if user.is_admin():
            return redirect(
                url_for('admin_problem',
                        course_name=course_name,
                        course_year=course_year,
                        problem_id=problem_id))

        course = Courses().find_one(name=course_name,
                                    year=course_year,
                                    only_active=False)
        problem = course.problem_db[problem_id]
        results = list()
        result = None
        breadcrumbs = [Link.CoursesBtn(), Link.CourseBtn(course)]

        # TODO check access
        if _id:
            document = Mongo().result_by_id(_id)
            if document:
                # add to previous solution if already executed
                if document.result:
                    results.append(document)
                else:
                    result = document
                    breadcrumbs.append(Link.ProblemBtn(course, problem))

        if Env.use_database:
            for prev in Mongo().peek_last_n_results(10, user.id, course.id,
                                                    problem.id):
                # push only valid result
                if prev.result and str(prev._id) != str(_id):
                    results.append(prev)

        if _id:
            for r in results:
                if str(r._id) == str(_id):
                    r.active = 'active'

        def get_attempt(obj):
            try:
                return int(obj.attempt)
            except:
                return 0

        results = sorted(results, reverse=True, key=get_attempt)

        return render_template_ext(
            'view_result.njk',
            user=user,
            notifications=Mongo().load_notifications(user.id),
            results=results,
            result=result,
            requestReview=True,
            title='Problem %s' % problem.name,
            breadcrumbs=Breadcrumbs.new(*breadcrumbs),
            js=[
                '//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.6/highlight.min.js',
                '/static/js/lib/highlightjs-line-numbers.js'
            ],
            js_no_cache=['sockets.js', 'process.js'])
示例#30
0
class BaseFeatureMonitor(object):
    def __init__(self, config_file=None, product=None):
        self.config = self.set_config(config_file)
        self.mysql_risk = MySql(**self.config['mysql_risk'])
        self.mysql_risk_table = None
        self.mongo_derivable = Mongo(**self.config['mongo_derivable'])
        self.mongo_derivable_table = None
        self.except_handler = DingdingExceptionHandler(
            self.config['robots_psi'])
        self.product = product
        self.ssh_config = self.config['model_file_remote_ssh']

    def set_config(self, config_file):
        with open(config_file, 'r') as f:
            file = f.read()
        config = yaml.load(file)
        return config

    def get_model_path_from_mysql(self, table=None):
        pass

    def get_top_features(self):
        """"

        """
        model_path_list = self.get_model_path_from_mysql()
        model_path_df = pd.DataFrame(model_path_list)
        group_df = model_path_df.groupby('monitor_flag').apply(
            lambda x: x.model_path.unique()).rename(
                'model_path_list').reset_index()
        group_df['top_features'] = group_df['model_path_list'].map(
            lambda x: self.top30_features(x))

        return group_df[['monitor_flag', 'top_features']]

    def top30_features(self, model_path):
        final_features = []
        # 连接远程服务器
        ssh_client = paramiko.Transport(self.ssh_config['hostname'],
                                        self.ssh_config['port'])
        ssh_client.connect(username=self.ssh_config['username'],
                           password=self.ssh_config['password'])
        sftp = paramiko.SFTPClient.from_transport(ssh_client)

        for remote_model_path in model_path:
            # remote_model_path = model_dict['model_path']
            # 判断本地模型文件所在目录是否存在,没有就创建
            if not os.path.isdir(os.path.split(remote_model_path)[0]):
                os.makedirs(os.path.split(remote_model_path)[0])
                # 将远程文件下载到本地
                sftp.get(remote_model_path, remote_model_path)

            with open(remote_model_path, 'rb') as f:
                model_info = pickle.load(f)
            top_columns = []
            try:
                model = model_info['model']
                enum = model.get_params()['enum']
                mm = model.get_params()['clf']
                top_columns = []
                for i, v in enumerate(
                        sorted(zip(
                            map(lambda x: round(x, 4),
                                mm.feature_importances_),
                            enum.clean_col_names),
                               reverse=True)):
                    if i <= 30:
                        top_columns.append(v[1])
            except Exception as e:
                logging.error(e)
            final_features.extend(top_columns)
        sftp.close()
        no_final_features = [
            'ALPHA_Behavior_submit_date', 'ALPHA_Behavior_submit_hour',
            'ALPHA_Behavior_submit_weekday', 'X_DNA_Behavior_submit_date',
            'X_DNA_Behavior_submit_hour', 'X_DNA_Behavior_submit_weekday'
        ]  # 这些不监控
        final_features = list(set(final_features) - set(no_final_features))
        # logging.info('{}-top_features: {}'.format(self.product, final_features))
        return final_features

    def get_appid_from_mysql(self, start_time, diff_day, diff_hour):
        """获取所需要的11天的所有appid信息"""
        end_time = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') +
                    timedelta(days=diff_day)).strftime("%Y-%m-%d %H:%M:%S")
        start_hour = 0
        end_hour = start_hour + diff_hour
        sql = '''select upper(app_id) as app_id,flow_type,work_flag,date(create_time) as date
                        from {}
                        where create_time >= '{}'
                              and create_time < '{}' 
                              and hour(create_time) >= {}
                              and hour(create_time) <= {}
             '''.format(self.mysql_risk_table, start_time, end_time,
                        start_hour, end_hour)
        res = self.mysql_risk.query(sql)
        return pd.DataFrame(res)

    def get_features(self, df_appid, top_feature):
        appids = list(set(df_appid['app_id'].tolist()))
        qry = {'_id': {'$in': appids}}
        qry1 = {feature: 1 for feature in top_feature}
        res = self.mongo_derivable.get_collection(
            self.mongo_derivable_table).find(qry, qry1, batch_size=500)
        res_list = list(res)
        return pd.DataFrame(res_list)

    @staticmethod
    def cal_psi(x, y):
        e = 0.00001
        if x == 0 or y == 0:
            psi = (x - y) * math.log((x + e) / (y + e))
        else:
            psi = (x - y) * math.log(x / y)
        return round(psi, 3)

    def psi(self, df_feature_1, df_feature_2, feature, bin_num=10):
        df_feature_1['label'] = 0
        df_feature_2['label'] = 1
        df_feature = pd.concat([df_feature_1, df_feature_2])
        df_feature = df_feature.replace('null', np.nan)
        df_feature = df_feature.replace('NaN', np.nan)
        df_feature = df_feature.apply(pd.to_numeric, errors='ignore')
        enum = EnumMapper(maximum_enum_num=100)
        enum.fit(df_feature)
        df_feature = enum.transform(df_feature)
        if feature in df_feature.columns.tolist():
            df_psi = df_feature[[feature, 'label']].copy()
            if df_psi[feature].dtype not in [
                    'int', 'float'
            ] and df_psi[feature].unique().shape[0] > 20:
                # print("The unique number of feature is {}".format(df_psi[feature].unique().shape[0]))
                return None, 999
            else:
                if df_psi[feature].unique().shape[0] > 2:
                    df_psi['bins'] = pd.qcut(df_psi[feature],
                                             10,
                                             precision=2,
                                             duplicates='drop')
                    nan_df = df_psi[df_psi[feature].map(
                        lambda x: pd.isnull(x))].reset_index(drop=True)
                    if not nan_df.empty:
                        df_psi['bins'] = df_psi['bins'].cat.add_categories(
                            '(-999.1, -999]')
                        df_psi['bins'] = df_psi['bins'].fillna(
                            '(-999.1, -999]')
                else:
                    df_psi['bins'] = df_psi[feature].map(
                        lambda x: -999 if pd.isnull(x) else x)
                group_df = df_psi.groupby(['bins',
                                           'label']).size().unstack('label')
                group_df = group_df.fillna(0)
                group_df['b_rate'] = group_df[0] / group_df[0].sum()
                group_df['a_rate'] = group_df[1] / group_df[1].sum()
                group_df = group_df.map(lambda x: round(x, 4))
                group_df['psi_part'] = list(
                    map(lambda x, y: self.cal_psi(x, y), group_df.b_rate,
                        group_df.a_rate))
                group_df = group_df.apply(lambda x: round(x, 3))
                group_df = group_df.reset_index()

                return group_df, group_df.psi_part.sum()
        else:
            return None, 99

    @staticmethod
    def define_mf(x, y):
        if x == 'c' and y == 'precheck':
            return 'cp'
        elif x == 'c' and y == 'finalcheck':
            return 'cf'
        elif x == 'f' and y == 'precheck':
            return 'fp'
        elif x == 'f' and y == 'finalcheck':
            return 'ff'
        elif x == 'w' and y == 'precheck':
            return 'wp'
        elif x == 'w' and y == 'finalcheck':
            return 'wf'
        elif x == 'q' and y == 'finalcheck':
            return 'q'

    def psi_classified(self, start_time, diff_day, diff_hour, timedetail):
        """psi分类监控"""
        total_appids_df = self.get_appid_from_mysql(
            start_time, diff_day, diff_hour)  # 获取所需要的11天的所有appid信息
        total_appids_df.date = total_appids_df.date.map(
            lambda x: str(x))  # 将里面date字段的类型转换为str
        total_appids_df['monitor_flag'] = list(
            map(lambda x, y: self.define_mf(x, y), total_appids_df.flow_type,
                total_appids_df.work_flag))
        cp_ls_top_psi = []  # 发送到钉钉的psi监控列表
        features_df = self.get_top_features()
        for monitor_flag in total_appids_df.monitor_flag.unique().tolist():
            top_features = features_df.query(
                "monitor_flag=='{}'".format(monitor_flag)).top_features.values
            if top_features:
                top_psi = self.psi_distr(start_time, total_appids_df,
                                         top_features[0], monitor_flag)
                if monitor_flag == 'cp' and top_psi:
                    cp_ls_top_psi.append('=======开卡初审=======')
                elif monitor_flag == 'cf' and top_psi:
                    cp_ls_top_psi.append('=======开卡复审=======')
                elif monitor_flag == 'fp' and top_psi:
                    cp_ls_top_psi.append('=======首贷提现初审=======')
                elif monitor_flag == 'ff' and top_psi:
                    cp_ls_top_psi.append('=======首贷提现复审=======')
                elif monitor_flag == 'wp' and top_psi:
                    cp_ls_top_psi.append('=======复贷初审=======')
                elif monitor_flag == 'wf' and top_psi:
                    cp_ls_top_psi.append('=======复贷复审=======')
                elif monitor_flag == 'q' and top_psi:
                    cp_ls_top_psi.append('=======结清调额=======')
                else:
                    pass
                cp_ls_top_psi.extend(top_psi)
        logging.info('warming psi list: {}'.format(cp_ls_top_psi))
        if cp_ls_top_psi:
            cp_ls_top_psi.insert(0,
                                 '*******{}-psi报警*******'.format(self.product))
            cp_ls_top_psi.insert(
                1, '时间:{}'.format(datetime.now().strftime('%Y-%m-%d ') +
                                  timedetail))
            self.except_handler.handle(msg=cp_ls_top_psi)

    def psi_distr(self, start_time, total_appids_df, top_features,
                  monitor_flag):
        the_psi_date = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') +
                        timedelta(days=10)).strftime('%Y-%m-%d')  # 所监控的日期
        logging.info('所监控的日期为:{}'.format(the_psi_date))
        # 所监控日期前十天对应类型的appids
        df_appid1 = total_appids_df.query(
            "monitor_flag=='{}' and date!='{}'".format(
                monitor_flag, the_psi_date)).reset_index(drop=True)
        df_appid1 = df_appid1.sample(min(10000, df_appid1.shape[0]))
        logging.info('monitor_flag:{} 前十天全部app_id个数:{}'.format(
            monitor_flag, len(df_appid1)))

        # 所监控日期对应类型的appids
        df_appid2 = total_appids_df.query(
            "monitor_flag=='{}' and date=='{}'".format(
                monitor_flag, the_psi_date)).reset_index(drop=True)
        df_appid2 = df_appid2.sample(min(1000, df_appid2.shape[0]))
        logging.info('monitor_flag:{}  所监控的app_id个数:{}'.format(
            monitor_flag, len(df_appid2)))
        ls_top_psi = []
        df_feature_all_1 = self.get_features(df_appid1, top_features)
        df_feature_all_2 = self.get_features(df_appid2, top_features)
        psi_dict = {}
        for feature in top_features:
            df_feature_1 = pd.DataFrame(df_feature_all_1, columns=[feature])
            df_feature_2 = pd.DataFrame(df_feature_all_2, columns=[feature])
            df, psi = self.psi(df_feature_1, df_feature_2, feature, bin_num=10)
            psi_dict.update({feature: psi})
            if psi > 0.25:
                ls_top_psi.append("{}--psi:{}".format(feature, round(psi, 3)))
                df['bins'] = df['bins'].map(lambda x: str(x))
                max_index = df.query("psi_part=={}".format(
                    df.psi_part.max()))['bins'].values[0]
                str_text = ''
                if str(max_index) == '(-999.1, -999]':
                    str_text += '原因:缺失值变化导致, '
                else:
                    str_text += '原因:区间{}变化所致, '.format(max_index)
                if df.query("bins=='{}'".format(max_index)).a_rate.values[0] > \
                        df.query("bins=='{}'".format(max_index)).b_rate.values[0]:
                    str_text += '当前比例大于过去比例。'
                else:
                    str_text += '当前比例小于过去比例。'
                ls_top_psi.append(str_text)
                ls_top_psi.append('==' * 18)
                ls_top_psi.append(str(df))
                ls_top_psi.append('==' * 18)
        logging.info('{} calculate psi done :{}'.format(
            monitor_flag, psi_dict))
        return ls_top_psi

    # 前一天
    def job1(self):
        """比较昨天top特征的分布与昨天的前10天top特征的分布"""
        logging.info('{} start handle psi_monitor job1!'.format(self.product))
        start_time = (datetime.now() - timedelta(days=11)).strftime(
            '%Y-%m-%d') + ' 00:00:00'  # 获取所监控及其对比的前10天所有appid的开始时间
        diff_day = 11  # 获取从开始时间往后11天的数据
        diff_hour = 24  # 获取每天从0时到24时的数据
        self.psi_classified(start_time,
                            diff_day,
                            diff_hour,
                            timedetail='前一天0-24时分布变化')
        logging.info('{} end handle psi_monitor job1!'.format(self.product))

    # 当天
    def job2(self):
        """比较当天(0-16时)的top特征的分布与前10天(0-16时)top特征的分布"""
        logging.info('{} start handle feature_monitor job2!'.format(
            self.product))
        start_time = (datetime.now() - timedelta(days=10)).strftime(
            '%Y-%m-%d') + ' 00:00:00'  # 获取所监控及其对比的前10天所有appid的开始时间
        diff_day = 11  # 获取从开始时间往后11天的数据
        diff_hour = 15  # 获取每天从0时到15时的数据
        self.psi_classified(start_time,
                            diff_day,
                            diff_hour,
                            timedetail='当天0-16时分布变化')
        logging.info('{} end handle psi_monitor job2!'.format(self.product))

    # 当天
    def job3(self):
        """比较当天(0-18时)的top特征的分布与前10天(0-18时)top特征的分布"""
        logging.info('{} start handle feature_monitor job2!'.format(
            self.product))
        start_time = (datetime.now() -
                      timedelta(days=10)).strftime('%Y-%m-%d') + ' 00:00:00'
        diff_day = 11  # 获取从开始时间往后11天的数据
        diff_hour = 17  # 获取每天从0时到18时的数据
        self.psi_classified(start_time,
                            diff_day,
                            diff_hour,
                            timedetail='当天0-18时分布变化')
        logging.info('{} end handle psi_monitor job3!'.format(self.product))