Пример #1
0
 def download_cat_property(cls):
     cat_id_list = [cat['_id'] for cat in cat_coll.find({}, {'_id': 1})]
     for cat_id in cat_id_list:
         property_list, prop_value_dict = [], {}
         for i in range(1, 3):
             result = get_cat_property(cat_id, i)
             if hasattr(result, 'item_props'):
                 item_props = result.item_props
                 if hasattr(item_props, 'item_prop'):
                     item_prop = item_props.item_prop
                     property_list = []
                     for prop in item_prop:
                         pid = prop.pid
                         property_list.append(pid)
                         prop_value_dict.setdefault(pid, {})
                         prop_value_dict.setdefault(
                             str(pid), {
                                 'pid': pid,
                                 'name': prop.name,
                                 'must': prop.must,
                                 'multi': prop.multi
                             })
                         if hasattr(prop, 'prop_values'):
                             prop_value = prop.prop_values.prop_value
                             for prp_val in prop_value:
                                 prop_value_dict[pid].update({
                                     prp_val.vid: {
                                         'vid':
                                         prp_val.vid,
                                         'name':
                                         prp_val.name,
                                         'is_parent':
                                         getattr(prp_val, 'is_parent',
                                                 False)
                                     }
                                 })
         for pid in prop_value_dict:
             if type(pid) is str:
                 continue
             property_dict = prop_value_dict[str(pid)]
             cat_prop_val_coll.update({
                 'cat_id': cat_id,
                 'pid': pid
             }, {
                 '$set': {
                     'name': property_dict['name'],
                     'must': property_dict['must'],
                     'multi': property_dict['multi'],
                     'prop_value_list': prop_value_dict[pid].values()
                 }
             },
                                      upsert=True)
         cat_coll.update({'_id': cat_id},
                         {'$set': {
                             'property_list': property_list
                         }})
Пример #2
0
 def get_cat_top_words(cls):
     '''
     .获取到类目下的top词
     '''
     result_list = []
     cat_id_list = WordCat.r_wckeyword.smembers('cat_set')
     if not cat_id_list or len(cat_id_list) < 10000:
         cat_id_list = [cat['_id'] for cat in cat_coll.find({}, {'_id': 1})]
     for cat_id in cat_id_list:
         result_list.append(get_cat_top.delay(cat_id))
     cls.monitor_result('add cat top words now len = %s and total len = %s',
                        result_list)
Пример #3
0
    def update_all_cat(cls):
        '''
        .更新所有的类目脚本
        '''
        all_cat_list = []
        cat_dict = get_catinfo_new(0)
        all_cat_list.extend(cat_dict.values())

        def get_sub_cats_new(cat_id_list):
            cat_sub_dict = get_catinfo_new(
                2, [str(cat_id) for cat_id in cat_id_list])
            if cat_sub_dict:
                cat_id_list = cat_sub_dict.keys()
                all_cat_list.extend(cat_sub_dict.values())
                get_sub_cats_new(cat_id_list)

        get_sub_cats_new(cat_dict.keys())
        old_cat_id_list = [cat['_id'] for cat in cat_coll.find({}, {'_id': 1})]
        new_cat_id_list, insert_list = [], []
        for cat in all_cat_list:
            cat_id = cat['cat_id']
            new_cat_id_list.append(cat_id)
            if cat_id in old_cat_id_list:
                cat_coll.update({'_id': cat_id}, {
                    '$set': {
                        'cat_name': cat['cat_name'],
                        'parent_cat_id': cat['parent_cat_id'],
                        'cat_level': cat['cat_level'],
                        'cat_path_name': cat['cat_path_name'],
                        'cat_path_id': cat['cat_path_id'],
                        'last_sync_time': cat['last_sync_time']
                    }
                })
            else:
                insert_list.append({
                    '_id': cat_id,
                    'cat_name': cat['cat_name'],
                    'parent_cat_id': cat['parent_cat_id'],
                    'cat_level': cat['cat_level'],
                    'cat_path_name': cat['cat_path_name'],
                    'cat_path_id': cat['cat_path_id'],
                    'last_sync_time': cat['last_sync_time']
                })
        remove_list = list(set(old_cat_id_list) - set(new_cat_id_list))
        if 0 in remove_list:
            remove_list.remove(0)
        try:
            cat_coll.insert(insert_list, continue_on_error=True, safe=True)
        except:
            pass
        cat_coll.remove({'_id': {'$in': remove_list}})
        Cat.compute_child_list()
Пример #4
0
def get_statictics_category_info():
    key = CacheKey.CRM_CAT_ADG_STATISTICS
    cat_data = crm_cache.get(key)
    if not cat_data:
        crm_cache.delete(CacheKey.CRM_CAT_ADG_STATISTICS_LOCK)
        lock_flag = crm_cache.get(CacheKey.CRM_CAT_ADG_STATISTICS_LOCK)
        if lock_flag:
            return {}

        lock_flag = crm_cache.set(CacheKey.CRM_CAT_ADG_STATISTICS_LOCK, True,
                                  10 * 60 * 60)
        aggr_pipeline = [
            {
                '$group': {
                    '_id': {
                        'category_ids': '$category_ids'
                    },
                    'adgroup_total': {
                        '$sum': 1
                    }
                }
            },
            # 暂过滤掉 < 5000,日后将会扩展
            #                                 {
            #                                     '$match':{
            #                                                     'adgroup_total':{
            #                                                                                 "$gte":5000
            #                                                                      }
            #                                               }
            #                                  },
            {
                '$project': {
                    '_id': 0,
                    'cat_path': "$_id.category_ids",
                    'total': '$adgroup_total'
                }
            },
            {
                '$sort': {
                    'cat_path': 1,
                    'total': -1
                }
            }
        ]

        cat_data = {}
        try:
            result = adg_coll.aggregate(aggr_pipeline)['result']
        except Exception, e:
            log.error('aggregate adgroup by category error, e=%s' % e)
            return cat_data

        cat_mapping = {}
        cat_id_set = set()
        for temp in result:
            total = temp['total']
            if temp.has_key('cat_path'):
                category_ids = str(temp['cat_path']).split()
                for index in xrange(len(category_ids)):
                    cat = '>'.join(category_ids[:index + 1])
                    if cat and cat != 'None':
                        if not cat_mapping.has_key(cat):
                            cat_mapping[cat] = 0
                            cat_id_set.add(int(category_ids[index]))
                        cat_mapping[cat] += total
            else:
                # 理论上应该不存在这个问题
                pass

        cat_name_mapping = {
            cat['_id']: cat['cat_name']
            for cat in cat_coll.find({'_id': {
                "$in": list(cat_id_set)
            }}, {'cat_name': 1})
        }
        for cat_all_path, total in cat_mapping.items():
            cat_id_list = cat_all_path.split('>')
            #  此处需要进行ID与名称之间转换
            cat_all_name = []
            for cat_id in cat_id_list:
                cat_name = cat_name_mapping.get(int(cat_id), '')
                if cat_name:
                    cat_all_name.append(cat_name)
                else:
                    # 理论应不会出现该问题
                    log.error(
                        'error : it should not happen, the program is wrong if it appear, cat_id=%s'
                        % (cat_id))
                    continue
            if cat_id_list:
                base_dict = {
                    'cat_id': cat_id_list[-1],
                    'cat_name': '>'.join(cat_all_name),
                    'adgroup_total': total
                }
                cat_data[int(cat_id_list[-1])] = base_dict

        data = zlib.compress(json.dumps(cat_data), 5)
        crm_cache.set(key, data, 10 * 24 * 60 * 60)
        crm_cache.delete(CacheKey.CRM_CAT_ADG_STATISTICS_LOCK)
Пример #5
0
 def load_cat_prop_dict(cls):
     cat_id_list = [cat['_id'] for cat in cat_coll.find({}, {'_id': 1})]
     for cat_id in cat_id_list:
         cls.load_single_cat_prop(cat_id)