Пример #1
0
def start_split_subject_stat_task(self, category, task_dt):
    """

    :param self:
    :param category:
    :param task_dt:
    :return:
    """
    logger.info('START(%s): Begin split subject_statistics, condition is %s' %
                (self.request.id, str(category)))

    try:
        result = RedisCache.hget(KEY_CACHE_REPORT_CONDITION, str(category))
        if result is not None:
            logger.warning(
                ' END (%s): DOING or Done split subject_statistics, condition is %s'
                % (self.request.id, str(category)))
            return

        count_list = MemberSubjectStatistics.sync_aggregate(
            stage_list=[GroupStage(category),
                        CountStage()]).to_list(1)
        count = count_list[0].count if count_list else 0
        logger.info('request(%s): SPLIT, count=%s' % (self.request.id, count))

        quot, rema = divmod(count, SKIP_NUM)

        ReportSubjectStatisticsMiddle.sync_delete_many({'category': category})
        task_num = quot
        if rema:
            task_num = quot + 1
            start_task_subject_statistics.delay(category, task_dt,
                                                quot * SKIP_NUM,
                                                self.request.id, task_num)

        for i in range(quot):
            start_task_subject_statistics.delay(category, task_dt,
                                                i * SKIP_NUM, self.request.id,
                                                task_num)

        RedisCache.hset(KEY_CACHE_REPORT_CONDITION, str(category),
                        STATUS_SUBJECT_STATISTICS_IN_PROCESS)

    except Exception:
        logger.error(traceback.format_exc())

    logger.info(' END (%s): Finish split subject_statistics, condition is %s' %
                (self.request.id, str(category)))
Пример #2
0
def start_task_subject_statistics(self,
                                  group_dict=None,
                                  task_dt=None,
                                  skip_num=None,
                                  parent_task_id=None,
                                  task_num=None):
    """

    :param self
    :param group_dict:
    :param task_dt:
    :param skip_num:
    :param parent_task_id:
    :param task_num:
    :return:
    """
    try:
        logger.info(
            'START(%s): condition= %s , skip_num=%s, parent_task_id=%s' %
            (self.request.id, str(group_dict), skip_num, parent_task_id))
        middle_list = list()

        logger.info('group_dict: %s' % str(group_dict))

        stages = get_stages(group_dict, skip_num)
        subject_list = MemberSubjectStatistics.sync_aggregate(
            stage_list=stages, allowDiskUse=True).batch_size(32)

        logger.info('request(%s), aggregate has finished. category is %s' %
                    (self.request.id, str(group_dict)))
        for subject in subject_list:
            mid = ReportSubjectStatisticsMiddle()
            mid.category = group_dict
            mid.condition = subject.id

            mid.custom_code = subject.custom_code
            mid.code = subject.code

            mid.option_dict = {
                str(opt.sort): {
                    'title': opt.title,
                    'correct': opt.correct
                }
                for opt in subject.option_list
            }
            mid.dimension = subject.dimension

            mid.title = subject.title
            mid.total = subject.total
            mid.correct = subject.correct
            if task_dt:
                mid.task_dt = task_dt

            middle_list.append(mid)
            if len(middle_list) >= 5000:
                ReportSubjectStatisticsMiddle.sync_insert_many(middle_list)
                middle_list = list()

        if middle_list:
            ReportSubjectStatisticsMiddle.sync_insert_many(middle_list)

        RedisCache.sadd(parent_task_id, self.request.id)
        task_count = get_task_count(parent_task_id)
        if task_count == task_num:
            RedisCache.hset(KEY_CACHE_REPORT_CONDITION, str(group_dict),
                            STATUS_SUBJECT_STATISTICS_END)
            RedisCache.delete(parent_task_id)
        logger.info(' EXEC(%s): %s tasks has finished' %
                    (self.request.id, task_count))
        logger.info(' END (%s), parent_task_id=%s' %
                    (self.request.id, parent_task_id))
    except Exception:
        logger.error(traceback.format_exc())
Пример #3
0
def do_statistics_subject_radar(cache_key, root_dimension_code,
                                m_city_code_list, province_code_list,
                                city_code_list, gender_list, age_group_list,
                                education_list):
    """

    :param cache_key:
    :param root_dimension_code:
    :param m_city_code_list:
    :param province_code_list:
    :param city_code_list:
    :param gender_list:
    :param age_group_list:
    :param education_list:
    :return:
    """
    RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60)
    data = []
    dimension = SubjectDimension.sync_find_one(
        dict(code=root_dimension_code, status=STATUS_SUBJECT_DIMENSION_ACTIVE))
    if not dimension:
        raise ValueError(
            'can not find dimension by `root_dimension_code`(%s)' %
            root_dimension_code)

    stage_list = []
    #  取前一天凌晨12点之前的数据
    time_match = get_yesterday()
    stage_list.append(MatchStage({'updated_dt': {'$lt': time_match}}))
    if m_city_code_list:
        stage_list.append(MatchStage({'city_code': {'$in': m_city_code_list}}))

    query_dict = {}
    if province_code_list:
        query_dict['province_code'] = {'$in': province_code_list}
    if city_code_list:
        query_dict['city_code'] = {'$in': city_code_list}
    if gender_list:
        query_dict['gender'] = {
            '$in': [int(s_gender) for s_gender in gender_list]
        }
    if age_group_list:
        query_dict['age_group'] = {
            '$in': [int(s_age_group) for s_age_group in age_group_list]
        }
    if education_list:
        query_dict['education'] = {
            '$in': [int(s_education) for s_education in education_list]
        }

    if query_dict:
        stage_list.append(MatchStage(query_dict))

    stage_list.append(
        GroupStage('dimension.%s' % dimension.cid,
                   total={'$sum': '$total'},
                   correct={'$sum': '$correct'}))
    stage_list.append(
        LookupStage(SubjectDimension, '_id', 'cid', 'dimension_list'))
    stat_result = MemberSubjectStatistics.sync_aggregate(stage_list)
    while True:
        try:
            mds = stat_result.next()
            if mds:
                code, title, ordered = '', '', 0
                if hasattr(mds, 'dimension_list') and mds.dimension_list:
                    dimension = mds.dimension_list[0]
                    if dimension:
                        code = dimension.code
                        title = dimension.title
                        ordered = dimension.ordered
                data.append(
                    dict(code=code,
                         title=title,
                         ordered=ordered,
                         correct=mds.correct,
                         total=mds.total))
        except StopIteration:
            break
    if not data:
        early_warning_empty("start_statistics_subject_parameter_radar",
                            cache_key, locals(), '获取维度正确率雷达图统计数据为空,请检查!')
    RedisCache.set(cache_key, msgpack.packb(data))
Пример #4
0
def do_merge_city_stat_accuracy(province_dict: dict, city_code_list=None):
    """
    合并省份统计信息
    :param province_dict:
    :param city_code_list:
    :return:
    """
    if province_dict:
        match_query = {'province_code': {'$in': [code for code in province_dict.keys()]}}
        if city_code_list:
            match_query['city_code'] = {'$in': city_code_list}
        else:
            match_query['city_code'] = {'$ne': None}
        match_stage = MatchStage(match_query)
        group_stage = GroupStage('city_code', t_total={'$sum': '$total'}, t_correct={'$sum': '$correct'},
                                 province_code={'$first': '$province_code'})
        add_fields_stage = AddFieldsStage(t_accuracy={
            '$cond':
                {
                    'if': {'$eq': ['$t_total', 0]},
                    'then': 0,
                    'else':
                        {
                            '$divide': ['$t_correct', '$t_total']
                        }
                }
        })
        sort_stage = SortStage([('t_accuracy', DESC)])
        p_lookup_stage = LookupStage(AdministrativeDivision, 'province_code', 'post_code', 'province_list')
        c_lookup_stage = LookupStage(AdministrativeDivision, '_id', 'post_code', 'city_list')
        city_stat_list = MemberSubjectStatistics.sync_aggregate(
            [match_stage, group_stage, add_fields_stage, sort_stage, p_lookup_stage, c_lookup_stage])
        t_province_dict = {}
        while True:
            try:
                city_stat = city_stat_list.next()
                if not city_stat:
                    continue
                city_list = city_stat.city_list
                total = city_stat.t_total if city_stat.t_total else 0
                correct = city_stat.t_correct if city_stat.t_correct else 0
                if not city_list:
                    continue
                city: FacadeO = city_list[0]
                if not (city and city.parent_code):
                    continue

                p_stat = province_dict.get(city.parent_code)
                if p_stat:
                    if p_stat.get('city_list') is None:
                        p_stat['city_list'] = []
                    p_stat['city_list'].append({
                        'code': city_stat.id,
                        'title': city.title,
                        'correct': correct,
                        'total': total,
                        'data': round(correct / total * 100 if total > 0 else 0, 2)
                    })
                else:
                    province_list = city_stat.province_list
                    if province_list:
                        province: FacadeO = province_list[0]
                        if province:
                            if t_province_dict.get(province.post_code) is None:
                                t_province_dict[province.post_code] = {
                                    'code': province.post_code,
                                    'title': province.title.replace('省', '').replace('市', ''),
                                    'correct': 0,
                                    'total': 0
                                }
                            t_province_dict[province.post_code]['correct'] += correct
                            t_province_dict[province.post_code]['total'] += total
                            t_province_dict['data'] = round(t_province_dict[province.post_code]['correct'] /
                                                            t_province_dict[province.post_code][
                                                                'total'] * 100 if
                                                            t_province_dict[province.post_code][
                                                                'total'] > 0 else 0, 2)

                            if t_province_dict[province.post_code].get('city_list') is None:
                                t_province_dict[province.post_code]['city_list'] = []
                            t_province_dict[province.post_code]['city_list'].append({
                                'code': city_stat.id,
                                'title': city.title,
                                'correct': correct,
                                'total': total,
                                'data': round(correct / total * 100 if total > 0 else 0, 2)
                            })
            except StopIteration:
                break

        if t_province_dict:
            province_dict.update(t_province_dict)
Пример #5
0
def do_statistics_accuracy(cache_key, city_code_list, choice_time):
    """
    学习状况-正确率
    :param cache_key:
    :param city_code_list:
    :param choice_time
    :return:
    """
    RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW)
    #  取前一天凌晨12点之前的数据
    time_match = get_yesterday()
    if not choice_time:
        match_stage = MatchStage({'updated_dt': {'$lt': time_match}})
    else:
        #  当天下一天凌晨的时候
        max_choice_time = choice_time.replace(hour=23, minute=59, second=59, microsecond=999)
        match_stage = MatchStage({'updated_dt': {'$gte': choice_time, '$lt': max_choice_time}})
    stage_list = [match_stage]
    if city_code_list:
        stage_list.append(MatchStage({'city_code': {'$in': city_code_list}}))

    group_stage = GroupStage('province_code', t_total={'$sum': '$total'}, t_correct={'$sum': '$correct'})
    add_fields_stage = AddFieldsStage(t_accuracy={
        '$cond':
            {
                'if': {'$eq': ['$t_total', 0]},
                'then': 0,
                'else':
                    {
                        '$divide': ['$t_correct', '$t_total']
                    }
            }
    })
    sort_stage = SortStage([('t_accuracy', DESC)])
    lookup_stage = LookupStage(AdministrativeDivision, '_id', 'post_code', 'ad_list')
    stage_list.extend([group_stage, add_fields_stage, sort_stage, lookup_stage])
    province_stat_list = MemberSubjectStatistics.sync_aggregate(stage_list)
    province_dict = {}
    while True:
        try:
            province_stat = province_stat_list.next()
            if province_stat:
                province_code = province_stat.id if province_stat.id else '000000'
                total = province_stat.t_total if province_stat.t_total else 0
                correct = province_stat.t_correct if province_stat.t_correct else 0
                title = 'undefined'
                ad_list = province_stat.ad_list
                if ad_list:
                    ad: FacadeO = ad_list[0]
                    if ad:
                        title = ad.title.replace('省', '').replace('市', '')
                province_dict[province_code] = {
                    'code': province_code,
                    'title': title,
                    'correct': correct,
                    'total': total,
                    'data': round(correct / total * 100 if total > 0 else 0, 2)
                }
        except StopIteration:
            break
    # 合并城市统计信息
    do_merge_city_stat_accuracy(province_dict, city_code_list)

    data = [v for v in province_dict.values()]
    RedisCache.set(cache_key, msgpack.packb(data))
    if not data:
        early_warning_empty("start_statistics_member_accuracy", cache_key, city_code_list, '学习近况中正确率数据为空,请检查!')
    return data
Пример #6
0
def do_statistics_subject_cross(cache_key, main_dimension_code, second_dimension_code, m_city_code_list,
                                province_code_list, city_code_list, gender_list, age_group_list,
                                education_list):
    """

    :param cache_key:
    :param main_dimension_code:
    :param second_dimension_code:
    :param m_city_code_list:
    :param province_code_list:
    :param city_code_list:
    :param gender_list:
    :param age_group_list:
    :param education_list:
    :return:
    """
    RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60)
    main_dimension = SubjectDimension.sync_find_one(
        dict(code=main_dimension_code, status=STATUS_SUBJECT_DIMENSION_ACTIVE))
    main_sub_dimension_list = SubjectDimension.sync_find(dict(parent_cid=main_dimension.cid)).sort(
        [('ordered', ASC)]).to_list(None)

    second_dimension = SubjectDimension.sync_find_one(
        dict(code=second_dimension_code, status=STATUS_SUBJECT_DIMENSION_ACTIVE))
    second_sub_dimension_list = SubjectDimension.sync_find(dict(parent_cid=second_dimension.cid)).sort(
        [('ordered', ASC)]).to_list(None)

    data = []
    for index, m_dimen in enumerate(main_sub_dimension_list):
        sub_data_list = []
        for s_dimen in second_sub_dimension_list:
            stage_list = []
            #  取前一天凌晨12点之前的数据
            time_match = get_yesterday()
            stage_list.append(MatchStage({'updated_dt': {'$lt': time_match}}))
            match_dict = {'dimension.%s' % main_dimension.cid: m_dimen.cid,
                          'dimension.%s' % second_dimension.cid: s_dimen.cid}
            if m_city_code_list:
                match_dict['city_code'] = {'$in': m_city_code_list}
            stage_list.append(MatchStage(match_dict))

            query_dict = {}
            if province_code_list:
                query_dict['province_code'] = {'$in': province_code_list}
            if city_code_list:
                query_dict['city_code'] = {'$in': city_code_list}
            if gender_list:
                query_dict['gender'] = {'$in': [int(s_gender) for s_gender in gender_list]}
            if age_group_list:
                query_dict['age_group'] = {'$in': [int(s_age_group) for s_age_group in age_group_list]}
            if education_list:
                query_dict['education'] = {'$in': [int(s_education) for s_education in education_list]}

            if query_dict:
                stage_list.append(MatchStage(query_dict))
            # 分组
            group_params = {
                'total': {'$sum': '$total'},
                'correct': {'$sum': '$correct'}
            }
            stage_list.append(GroupStage(None, **group_params))

            stat_result = MemberSubjectStatistics.sync_aggregate(
                stage_list).to_list(None)
            tmp_data = {
                'code': s_dimen.code,
                'title': s_dimen.title,
                'ordered': s_dimen.ordered,
                'correct': stat_result[0].correct if stat_result else 0,
                'total': stat_result[0].total if stat_result else 0
            }
            sub_data_list.append(tmp_data)
        main_data = {
            'code': str(index + 1),
            'title': m_dimen.title,
            'ordered': index + 1,
            'sub': sub_data_list
        }
        data.append(main_data)

    if data:
        data.sort(key=lambda x: x.get('ordered', 0))
    if not data:
        early_warning_empty("start_statistics_subject_parameter_cross", cache_key, locals(), '获取维度正确率统计数据为空,请检查!')
    RedisCache.set(cache_key, msgpack.packb(data))