def do_statistics_member_quantity(cache_key, city_code_list, choice_time): """开始统计 :param cache_key: :param city_code_list: :param choice_time :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) stage_list = [] if city_code_list: stage_list.append(MatchStage({'city_code': {'$in': city_code_list}})) if not choice_time: # 取前一天凌晨12点之前的数据 yesterday_time = get_yesterday() time_match = MatchStage({'updated_dt': {'$lt': yesterday_time}}) else: # 当天下一天凌晨的时候 max_choice_time = choice_time.replace(hour=23, minute=59, second=59, microsecond=999) time_match = MatchStage({'updated_dt': {'$gte': choice_time, '$lt': max_choice_time}}) stage_list.append(time_match) stage_list.append(MatchStage({'status': STATUS_USER_ACTIVE})) group_stage = GroupStage('province_code', quantity={'$sum': 1}) lookup_stage = LookupStage(AdministrativeDivision, '_id', 'post_code', 'ad_list') sort_stage = SortStage([('quantity', DESC)]) stage_list += [group_stage, lookup_stage, sort_stage] province_cursor = Member.sync_aggregate(stage_list) province_dict = {} while True: try: province_stat = province_cursor.next() if province_stat: province_code = province_stat.id if province_stat.id else '000000' quantity = province_stat.quantity title = 'undefined' ad_list = province_stat.ad_list if ad_list: ad: FacadeO = ad_list[0] if ad: title = ad.title.replace('省', '').replace('市', '') province_dict[province_code] = { 'code': province_code, 'title': title, 'data': quantity } except StopIteration: break # 合并城市统计信息 do_merge_city_stat_member_quantity(province_dict, choice_time, city_code_list) data = [v for v in province_dict.values()] if not data: early_warning_empty("start_statistics_member_quantity", cache_key, city_code_list, '学习近况中人数数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))
def do_statistics_subject_parameter(cache_key, m_province_code_list, m_city_code_list, s_province_code_list, s_city_code_list, s_gender_list, s_age_group_list, s_education_list): """ :param cache_key: :param m_province_code_list: :param m_city_code_list: :param s_province_code_list: :param s_city_code_list: :param s_gender_list: :param s_age_group_list: :param s_education_list: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) data = {} max_q = None max_q_list = SubjectChoiceRules.sync_aggregate( [GroupStage('max', max={'$max': '$quantity'})]).to_list(1) if max_q_list: max_q = max_q_list[0] if max_q and max_q.max > 0: stage_list = do_create_query(max_q.max + 1, m_province_code_list, m_city_code_list, s_province_code_list, s_city_code_list, s_gender_list, s_age_group_list, s_education_list) if stage_list: if stage_list: stat_result = None stat_result_list = MemberDailyStatistics.sync_aggregate( stage_list).to_list(1) if stat_result_list: stat_result = stat_result_list[0] if stat_result: for i in range(max_q.max + 1): attr = str(i) if hasattr(stat_result, attr): data[attr] = getattr(stat_result, attr, 0) if not data: early_warning_empty( "start_statistics_subject_quantity", cache_key, str( dict(cache_key=cache_key, m_province_code_list=m_province_code_list, m_city_code_list=m_city_code_list, s_province_code_list=s_province_code_list, s_city_code_list=s_city_code_list, s_gender_list=s_gender_list, s_age_group_list=s_age_group_list, s_education_list=s_education_list)), '学习趋势统计数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))
def do_statistics_member_time(cache_key, city_code_list, choice_time): """开始统计 :param cache_key: :param city_code_list: :param choice_time: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) ad_map = {} game_data, ad_map = do_stat_in_history(MemberGameHistory, city_code_list, choice_time, ad_map) ckpt_data, ad_map = do_stat_in_history(MemberCheckPointHistory, city_code_list, choice_time, ad_map) # 对学习之旅和科协答题历史记录进行数据整合 for k, city_dict in game_data.items(): if k not in ckpt_data: ckpt_data[k] = city_dict continue # loop city_list for c_name, c_data in city_dict.items(): try: # try to merge ckpt_data[k][c_name] += c_data except KeyError: ckpt_data[k][c_name] = c_data ret_data = [] for prov_code, city_data in ckpt_data.items(): prov = ad_map.get(prov_code) if not prov: prov = AdministrativeDivision.sync_find_one({'code': prov_code, 'parent_code': None}) city_list = [{'title': _k, 'data': _v} for _k, _v in city_data.items()] _ds = [_.get('data') for _ in city_list] city_list.sort(key=lambda x: -x.get('data')) ret_data.append( {'title': prov.title.replace('省', '').replace('市', ''), 'data': sum(_ds), 'city_list': city_list}) ret_data.sort(key=lambda x: -x.get('data')) if not ret_data: early_warning_empty("start_statistics_member_time", cache_key, city_code_list, '学习近况中次数数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(ret_data))
def do_statistics_subject_radar(cache_key, root_dimension_code, m_city_code_list, province_code_list, city_code_list, gender_list, age_group_list, education_list): """ :param cache_key: :param root_dimension_code: :param m_city_code_list: :param province_code_list: :param city_code_list: :param gender_list: :param age_group_list: :param education_list: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) data = [] dimension = SubjectDimension.sync_find_one( dict(code=root_dimension_code, status=STATUS_SUBJECT_DIMENSION_ACTIVE)) if not dimension: raise ValueError( 'can not find dimension by `root_dimension_code`(%s)' % root_dimension_code) stage_list = [] # 取前一天凌晨12点之前的数据 time_match = get_yesterday() stage_list.append(MatchStage({'updated_dt': {'$lt': time_match}})) if m_city_code_list: stage_list.append(MatchStage({'city_code': {'$in': m_city_code_list}})) query_dict = {} if province_code_list: query_dict['province_code'] = {'$in': province_code_list} if city_code_list: query_dict['city_code'] = {'$in': city_code_list} if gender_list: query_dict['gender'] = { '$in': [int(s_gender) for s_gender in gender_list] } if age_group_list: query_dict['age_group'] = { '$in': [int(s_age_group) for s_age_group in age_group_list] } if education_list: query_dict['education'] = { '$in': [int(s_education) for s_education in education_list] } if query_dict: stage_list.append(MatchStage(query_dict)) stage_list.append( GroupStage('dimension.%s' % dimension.cid, total={'$sum': '$total'}, correct={'$sum': '$correct'})) stage_list.append( LookupStage(SubjectDimension, '_id', 'cid', 'dimension_list')) stat_result = MemberSubjectStatistics.sync_aggregate(stage_list) while True: try: mds = stat_result.next() if mds: code, title, ordered = '', '', 0 if hasattr(mds, 'dimension_list') and mds.dimension_list: dimension = mds.dimension_list[0] if dimension: code = dimension.code title = dimension.title ordered = dimension.ordered data.append( dict(code=code, title=title, ordered=ordered, correct=mds.correct, total=mds.total)) except StopIteration: break if not data: early_warning_empty("start_statistics_subject_parameter_radar", cache_key, locals(), '获取维度正确率雷达图统计数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))
def do_statistics_accuracy(cache_key, city_code_list, choice_time): """ 学习状况-正确率 :param cache_key: :param city_code_list: :param choice_time :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW) # 取前一天凌晨12点之前的数据 time_match = get_yesterday() if not choice_time: match_stage = MatchStage({'updated_dt': {'$lt': time_match}}) else: # 当天下一天凌晨的时候 max_choice_time = choice_time.replace(hour=23, minute=59, second=59, microsecond=999) match_stage = MatchStage({'updated_dt': {'$gte': choice_time, '$lt': max_choice_time}}) stage_list = [match_stage] if city_code_list: stage_list.append(MatchStage({'city_code': {'$in': city_code_list}})) group_stage = GroupStage('province_code', t_total={'$sum': '$total'}, t_correct={'$sum': '$correct'}) add_fields_stage = AddFieldsStage(t_accuracy={ '$cond': { 'if': {'$eq': ['$t_total', 0]}, 'then': 0, 'else': { '$divide': ['$t_correct', '$t_total'] } } }) sort_stage = SortStage([('t_accuracy', DESC)]) lookup_stage = LookupStage(AdministrativeDivision, '_id', 'post_code', 'ad_list') stage_list.extend([group_stage, add_fields_stage, sort_stage, lookup_stage]) province_stat_list = MemberSubjectStatistics.sync_aggregate(stage_list) province_dict = {} while True: try: province_stat = province_stat_list.next() if province_stat: province_code = province_stat.id if province_stat.id else '000000' total = province_stat.t_total if province_stat.t_total else 0 correct = province_stat.t_correct if province_stat.t_correct else 0 title = 'undefined' ad_list = province_stat.ad_list if ad_list: ad: FacadeO = ad_list[0] if ad: title = ad.title.replace('省', '').replace('市', '') province_dict[province_code] = { 'code': province_code, 'title': title, 'correct': correct, 'total': total, 'data': round(correct / total * 100 if total > 0 else 0, 2) } except StopIteration: break # 合并城市统计信息 do_merge_city_stat_accuracy(province_dict, city_code_list) data = [v for v in province_dict.values()] RedisCache.set(cache_key, msgpack.packb(data)) if not data: early_warning_empty("start_statistics_member_accuracy", cache_key, city_code_list, '学习近况中正确率数据为空,请检查!') return data
def do_statistics_subject_cross(cache_key, main_dimension_code, second_dimension_code, m_city_code_list, province_code_list, city_code_list, gender_list, age_group_list, education_list): """ :param cache_key: :param main_dimension_code: :param second_dimension_code: :param m_city_code_list: :param province_code_list: :param city_code_list: :param gender_list: :param age_group_list: :param education_list: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) main_dimension = SubjectDimension.sync_find_one( dict(code=main_dimension_code, status=STATUS_SUBJECT_DIMENSION_ACTIVE)) main_sub_dimension_list = SubjectDimension.sync_find(dict(parent_cid=main_dimension.cid)).sort( [('ordered', ASC)]).to_list(None) second_dimension = SubjectDimension.sync_find_one( dict(code=second_dimension_code, status=STATUS_SUBJECT_DIMENSION_ACTIVE)) second_sub_dimension_list = SubjectDimension.sync_find(dict(parent_cid=second_dimension.cid)).sort( [('ordered', ASC)]).to_list(None) data = [] for index, m_dimen in enumerate(main_sub_dimension_list): sub_data_list = [] for s_dimen in second_sub_dimension_list: stage_list = [] # 取前一天凌晨12点之前的数据 time_match = get_yesterday() stage_list.append(MatchStage({'updated_dt': {'$lt': time_match}})) match_dict = {'dimension.%s' % main_dimension.cid: m_dimen.cid, 'dimension.%s' % second_dimension.cid: s_dimen.cid} if m_city_code_list: match_dict['city_code'] = {'$in': m_city_code_list} stage_list.append(MatchStage(match_dict)) query_dict = {} if province_code_list: query_dict['province_code'] = {'$in': province_code_list} if city_code_list: query_dict['city_code'] = {'$in': city_code_list} if gender_list: query_dict['gender'] = {'$in': [int(s_gender) for s_gender in gender_list]} if age_group_list: query_dict['age_group'] = {'$in': [int(s_age_group) for s_age_group in age_group_list]} if education_list: query_dict['education'] = {'$in': [int(s_education) for s_education in education_list]} if query_dict: stage_list.append(MatchStage(query_dict)) # 分组 group_params = { 'total': {'$sum': '$total'}, 'correct': {'$sum': '$correct'} } stage_list.append(GroupStage(None, **group_params)) stat_result = MemberSubjectStatistics.sync_aggregate( stage_list).to_list(None) tmp_data = { 'code': s_dimen.code, 'title': s_dimen.title, 'ordered': s_dimen.ordered, 'correct': stat_result[0].correct if stat_result else 0, 'total': stat_result[0].total if stat_result else 0 } sub_data_list.append(tmp_data) main_data = { 'code': str(index + 1), 'title': m_dimen.title, 'ordered': index + 1, 'sub': sub_data_list } data.append(main_data) if data: data.sort(key=lambda x: x.get('ordered', 0)) if not data: early_warning_empty("start_statistics_subject_parameter_cross", cache_key, locals(), '获取维度正确率统计数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))
def do_statistics_member_top_n(cache_key, m_city_code_list, stat_type, top_n, time_range): """ :param cache_key: :param m_city_code_list: :param stat_type: :param top_n: :param time_range: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) stage_list = [] # 取前一天凌晨12点之前的数据 time_match = get_yesterday() stage_list.append(MatchStage({'updated_dt': {'$lt': time_match}})) if m_city_code_list: stage_list.append(MatchStage({'city_code': {'$in': m_city_code_list}})) s_code = '' e_code = '' if time_range: suffix = time_range[-1:] range_num = int(time_range.replace(suffix, '')) delta = None if suffix.upper() == 'D': delta = datetime.timedelta(days=range_num) elif suffix.upper() == 'M': delta = datetime.timedelta(days=range_num * 30) elif suffix.upper() == 'Y': delta = datetime.timedelta(days=range_num * 365) if delta: s_code = datetime2str(datetime.datetime.now() - delta, date_format='%Y%m%d000000') e_code = datetime2str(datetime.datetime.now(), date_format='%Y%m%d000000') if s_code and e_code: stage_list.append( MatchStage({'daily_code': { '$gte': s_code, '$lte': e_code }})) stage_list.extend([ GroupStage({ 'daily_code': '$daily_code', 'member_cid': '$member_cid', 'province_code': '$province_code' } if stat_type == 1 else { 'daily_code': '$daily_code', 'member_cid': '$member_cid', 'province_code': '$province_code', 'city_code': '$city_code' }, learn_times={'$sum': '$learn_times'}), GroupStage({ 'daily_code': '$_id.daily_code', 'province_code': '$_id.province_code' } if stat_type == 1 else { 'daily_code': '$_id.daily_code', 'province_code': '$_id.province_code', 'city_code': '$_id.city_code' }, count={'$sum': 1}, times={'$sum': '$learn_times'}), LookupStage(AdministrativeDivision, '_id.province_code', 'post_code', 'province_list'), LookupStage(AdministrativeDivision, '_id.city_code', 'post_code', 'city_list'), ProjectStage( **{ '_id': False, 'daily_code': '$_id.daily_code', 'count': '$count', 'times': '$times', 'province_code': '$_id.province_code', 'province_title': '$province_list.title', 'ad_code': '$_id.province_code' if stat_type == 1 else '$_id.city_code', 'ad_title': '$province_list.title' if stat_type == 1 else '$city_list.title' }), SortStage([('daily_code', ASC), ('count', DESC)]) ]) # 检索数据 data = [] stat_cursor = MemberDailyStatistics.sync_aggregate(stage_list) t_code, t_list = None, None top_n_found = False while True: try: daily_stat = stat_cursor.next() if daily_stat: daily_code = daily_stat.daily_code if not daily_code == t_code: t_code = daily_code t_list = [] top_n_found = False print(t_code) if len(t_list) < top_n: t_list.append({ 'date': daily_code[:8], 'ad_code': daily_stat.ad_code, 'province_title': daily_stat.province_title[0] if daily_stat.province_title and stat_type == 2 else '', 'title': daily_stat.ad_title[0] if daily_stat.ad_title else 'undefined', 'quantity': daily_stat.count, 'times': daily_stat.times }) elif not top_n_found: if t_code is not None: data.append(t_list) top_n_found = True except StopIteration: break if not data: early_warning_empty("start_statistics_member_top_n", cache_key, locals(), '每日参与TOP5统计数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))
def do_statistics_member_active(cache_key, m_city_code_list, province_code_list, city_code_list, gender_list, age_group_list, education_list): """ :param cache_key: :param m_city_code_list: :param province_code_list: :param city_code_list: :param gender_list: :param age_group_list: :param education_list: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) # 统计数据 stage_list = [] if m_city_code_list: stage_list.append(MatchStage({'city_code': {'$in': m_city_code_list}})) # 取前一天凌晨12点之前的数据 time_match = get_yesterday() stage_list.append(MatchStage({'updated_dt': {'$lt': time_match}})) query_dict = {} if province_code_list: query_dict['province_code'] = {'$in': province_code_list} if city_code_list: query_dict['city_code'] = {'$in': city_code_list} if gender_list: query_dict['gender'] = { '$in': [int(s_gender) for s_gender in gender_list] } if age_group_list: query_dict['age_group'] = { '$in': [int(s_age_group) for s_age_group in age_group_list] } if education_list: query_dict['education'] = { '$in': [int(s_education) for s_education in education_list] } if query_dict: stage_list.append(MatchStage(query_dict)) stage_list.extend([ GroupStage('learning_code', quantity={'$sum': 1}), SortStage([('_id', ASC)]), LimitStage(8) ]) mld_stat_cursor = MemberLearningDayStatistics.sync_aggregate(stage_list) data = [] while True: try: mld_stat = mld_stat_cursor.next() if mld_stat: data.append({ 'days': mld_stat.id, 'quantity': mld_stat.quantity }) except StopIteration: break all_members = sum([d.get('quantity') for d in data]) data_dict = {'data_list': data, 'all_members': all_members} if not data_dict: early_warning_empty("start_statistics_member_active", cache_key, locals(), '答题活跃度统计数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data_dict))
def do_statistics_quiz_trends(cache_key, stat_type, m_city_code_list, province_code_list, city_code_list, gender_list, age_group_list, education_list, time_range): """ :param cache_key: :param stat_type: :param m_city_code_list: :param province_code_list: :param city_code_list: :param gender_list: :param age_group_list: :param education_list: :param time_range: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 20 * 60) basic_stages = [] if m_city_code_list: basic_stages.append( MatchStage({'city_code': { '$in': m_city_code_list }})) if province_code_list: basic_stages.append( MatchStage({'province_code': { '$in': province_code_list }})) if city_code_list: basic_stages.append(MatchStage({'city_code': {'$in': city_code_list}})) if gender_list: basic_stages.append(MatchStage({'sex': {"$in": gender_list}})) if age_group_list: basic_stages.append(MatchStage({'age_group': {'$in': age_group_list}})) if education_list: basic_stages.append(MatchStage({'education': {'$in': education_list}})) yesterday = get_yesterday() time_match = MatchStage({'created_dt': {'$lte': yesterday}}) if time_range: suffix = time_range[-1:] range_num = int(time_range.replace(suffix, '')) delta = None if suffix.upper() == 'D': delta = datetime.timedelta(days=range_num) elif suffix.upper() == 'M': delta = datetime.timedelta(days=range_num * 30) elif suffix.upper() == 'Y': delta = datetime.timedelta(days=range_num * 365) start_dt = yesterday - delta time_match = MatchStage( {'created_dt': { '$gt': start_dt, '$lt': yesterday }}) data = [] game_data = do_stat_in_history(MemberGameHistory, time_match, basic_stages, stat_type) ckpt_data = do_stat_in_history(MemberCheckPointHistory, time_match, basic_stages, stat_type) for k, v in game_data.items(): try: ckpt_data[k] += v except KeyError: ckpt_data[k] = v data.append({k: ckpt_data[k]}) if not data: early_warning_empty("start_statistics_quiz_trends", cache_key, locals(), '答题趋势统计数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))
def do_statistics_learning_situation(cache_key, chart_type=None, m_city_code_list=None, gender_list=None, province_code_list=None, city_code_list=None, age_group_list=None, education_list=None, dimension=None, time_range=None, dimension_code=None): """ :param cache_key: :param chart_type: :param m_city_code_list: :param gender_list: :param province_code_list: :param city_code_list: :param age_group_list: :param education_list: :param dimension: :param time_range: :param dimension_code: :return: """ RedisCache.set(cache_key, KEY_CACHE_REPORT_DOING_NOW, 5 * 60) stage_list = [] # 取前一天凌晨12点之前的数据 time_match = get_yesterday() stage_list.append(MatchStage({'created_dt': {'$lt': time_match}})) s_code, e_code = '', '' if chart_type == 1: s_code, e_code = get_daily_code_range(time_range) if city_code_list: stage_list.append(MatchStage({'city_code': {'$in': m_city_code_list}})) parent_dimension_cid, dimension_cid = get_dimension(dimension_code) if parent_dimension_cid and dimension_cid: stage_list.append( MatchStage({'dimension.%s' % parent_dimension_cid: dimension_cid})) query_dict = {} if s_code and e_code: query_dict['daily_code'] = {'$gte': s_code, '$lte': e_code} if province_code_list: query_dict['province_code'] = {'$in': province_code_list} if city_code_list: query_dict['city_code'] = {'$in': city_code_list} if gender_list: query_dict['gender'] = { '$in': [int(s_gender) for s_gender in gender_list] } if age_group_list: query_dict['age_group'] = { '$in': [int(s_age_group) for s_age_group in age_group_list] } if education_list: query_dict['education'] = { '$in': [int(s_education) for s_education in education_list] } if dimension: try: s_dimension = json.loads(dimension) for k, v in s_dimension.items(): query_dict['dimension.%s' % k] = {'$in': v} except Exception: pass if query_dict: stage_list.append(MatchStage(query_dict)) stage_list.append( GroupStage('daily_code' if chart_type == 1 else 'learning_code', total={'$sum': '$subject_total_quantity'}, correct={'$sum': '$subject_correct_quantity'})) stage_list.append(SortStage([('_id', ASC)])) if chart_type == 2: stage_list.append(MatchStage({'_id': {'$lte': 20}})) data = [] # 检索数据 if chart_type == 1: stat_cursor = MemberDailyDimensionStatistics.sync_aggregate(stage_list) else: stat_cursor = MemberLearningDayDimensionStatistics.sync_aggregate( stage_list) while True: try: md_stat = stat_cursor.next() if md_stat: data.append({ md_stat.id[:8] if chart_type == 1 else md_stat.id: { 'total': md_stat.total, 'correct': md_stat.correct } }) except StopIteration: break if not data: early_warning_empty("start_statistics_learning_situation", cache_key, locals(), '学习效果中数据为空,请检查!') RedisCache.set(cache_key, msgpack.packb(data))