Пример #1
0
def deal_quan_category(city,date):
    '''
    处理店圈的菜品类信息,一级品类,二级品类信息
    :param city:
    :param date:
    :return:
    '''
    sql = """
        select DISTINCT(t1.category_id_level1),t1.category_name_level1,0 pid         
        from t_e_rest_category_city_1712 t1
        where t1.city = '%s'
        and t1.date = '%s'
        union 
        select DISTINCT(t1.category_id_level2),t1.category_name_level2,t1.category_id_level1 pid         
        from t_e_rest_category_city_1712 t1
        where t1.city = '%s'
        and t1.date = '%s'
        
          """%(city,date,city,date)
    result = db2.queryForList(sql, None)
    print(result)

    sql2 = "insert into quan_category values"
    batch = BatchSql(sql2)

    for item in result:
        quan_category = []
        quan_category.extend([0,item[1],date,item[0],item[2],time,time])
        print(quan_category)
        batch.addBatch(quan_category)
        if batch.getSize() > 10000:
            db1.update(batch)
            batch.cleanBatch()
    db1.update(batch)
Пример #2
0
def deal_comment_data(account,date,coupon_name,coupon_content):
    coupon_type = []
    """
    门店新用户立减:首
    满减活动:减
    折扣商品:折
    下单返券:返
    新用户立减活动:首
    商家优惠券:卷
    """
    for item in coupon_name:
        if item == '限量抢购-9.9元晚餐':
            coupon_type.append('抢')
        if item == '限量抢购-19.9元晚餐':
            coupon_type.append('抢')
        if item == '新用户立减':
            coupon_type.append('首')
    for i in range(len(coupon_name)):
        coupon_data = []
        name = coupon_name[i]
        type = coupon_type[i]
        content = coupon_content[i]
        coupon_data.extend([0, str(account), date, str(1), str(type), str(name), str(content)])
        print(coupon_data)

        batch = BatchSql(sql3)
        batch.addBatch(coupon_data)
        db.update(batch)
Пример #3
0
def deal_quan_monitor_activity_data(city,date,all_shopid_list):
    '''
    处理店圈店铺的活动信息
    :param city: 所属城市
    :param date: 数据日期
    :param all_shopid_list: 店圈店铺的ID列表
    :return:
    '''
    sql="""
        select t1.rest_id,t1.description,t1.active_type
        from t_e_rest_active_city_1712 t1
        where t1.city  = '%s'
        and t1.date = '%s'
        and t1.rest_id IN %s
        """%(city,date,all_shopid_list)
    sql2 = "insert into quan_monitor_activity_data values"
    batch = BatchSql(sql2)
    result = db2.queryForList(sql, None)
    print(result)

    for shoplist in all_shopid_list:
        all_activity = []
        quan_monitor_activity_data = []
        for item in result:
            activity = dict(type=item[2],content=item[1])
            if item[0] == shoplist:
                all_activity.append(activity)
        all_activity = json.dumps(all_activity)
        quan_monitor_activity_data.extend([0,date,shoplist,str(all_activity),time,time])
        print(quan_monitor_activity_data)
        batch.addBatch(quan_monitor_activity_data)
        if batch.getSize() > 10000:
            db1.update(batch)
            batch.cleanBatch()
    db1.update(batch)
Пример #4
0
def rate_analyze(id, comment_shopid, date, comment_content):
    '''
    对评论数据进行分词识别出相应的类别,然后写入数据库中
    :param id: 评论ID
    :param date: 当前日期
    :param comment_content:评论的内容
    :return:
    '''
    time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    key_vaule = get_config()
    db2 = DBUtils(('116.62.70.68', 3306, 'bigdata', 'gisfood20171220@nj',
                   'big_data', 'utf8mb4'))
    sql2 = "insert into comment_flag VALUES"
    seg_list = jieba.cut(comment_content)  #利用jieba分词,取出句子中所有的分词内容
    print("评论内容:", comment_content)
    flag = 0  #标志位,0:分词中没有差评信息  1:分词中存在差评信息
    keywords = ' '
    for item in seg_list:  #对一条评论的分词结果进行匹配
        for j in range(len(key_vaule)):  #在词库中进行配对
            if key_vaule.iloc[j, 0] == item:
                print("有差评内容:", item)
                keywords = item
                comment_class = int(key_vaule.iloc[j, 1])  #将评论的标签置为相应的值
                flag = 1
                break
        if flag == 0:
            keywords = r' '
            comment_class = 0  #没有差评的时候标签置为0

    print("评论ID:" + str(id), "所属分类:" + str(comment_class))
    batch = BatchSql(sql2)  #写入数据库
    batch.addBatch(
        [0, id, comment_shopid, date, comment_class, keywords, time, None])
    db2.update(batch)
Пример #5
0
def deal_quan_shop(city,date,all_shopid_list,cate_list):
    '''
    处理店圈店铺的基本数据
    :param city:  店圈所属城市
    :param date:  数据分析日期
    :param all_shopid_list:  店圈数据的店铺列表的数据
    :param cate_list: 店圈数据的品类列表
    :return:
    '''
    sql = """
          select t1.rest_id,t1.rest_name,t1.delivery_id,t1.longitude,t1.latitude,t1.is_new
          from t_e_rest_list_city_1712 t1
          where t1.city = '%s'
          and t1.date = '%s'
          and t1.rest_id IN %s
          """%(city,date,all_shopid_list)

    sql2 = "insert into quan_shop values"
    batch = BatchSql(sql2)

    result = db2.queryForList(sql, None)
    print(result)
    for item in result:
        for cate in cate_list:
            if item[0] == cate[0]:
                quan_shop = []
                quan_shop.extend([0,item[0],1,date,item[1],item[2],item[3],item[4],item[5],cate[1],time,time])
                print(quan_shop)
                batch.addBatch(quan_shop)

        if batch.getSize() > 1000:
            db1.update(batch)
            batch.cleanBatch()
    db1.update(batch)
    def deal_meituan_customer_prefer_activity(self):
        '''
       处理同行顾客喜欢的活动:id,店铺id,店铺名称,日期,
                           同行顾客喜欢的活动,活动类型

       :return:
       '''
        customer_prefer_activity = self.customerPerfer['data']['pubCusAct']
        for item in customer_prefer_activity:
            customer_prefer_activity = []
            customer_prefer_activity_name = item[0]
            customer_prefer_activity_pic = item[1]
            customer_prefer_activity_type = self.deal_customer_prefer_activity_type(
                customer_prefer_activity_pic)
            customer_prefer_activity.extend([
                0, self.shopid, self.shopname,
                str(self.yesterday), customer_prefer_activity_name,
                customer_prefer_activity_type
            ])
            print("顾客喜欢的活动:", customer_prefer_activity)

            sql = "insert into meituan_customer_prefer_activity VALUES"
            batch = BatchSql(sql)
            batch.addBatch(customer_prefer_activity)
            self.db2.update(batch)
Пример #7
0
def run():
    # rest_id,rest_name = get_shopid()
    rest_name = ['义务', '明发', '同曦', '殷象']
    rest_id = [160279990, 161341608, 161378073, 161313783]
    for item in rest_id:
        revenue = []
        print("店铺%s的12月份营收总金额为:" % item)
        shop_money_off = get_shop_money_off(item)
        shop_menu_order = get_shop_menu_order(item)
        all_single_menu_amount = deal_single_menu(shop_menu_order)
        all_activity_menu_amount = deal_activity_menu(shop_menu_order)
        all_common_menu_amount = deal_common_menu(shop_money_off,
                                                  shop_menu_order)

        all_amount = all_single_menu_amount + all_activity_menu_amount + all_common_menu_amount
        print("营收总金额:", all_amount)
        revenue.extend(
            [item, rest_name[rest_id.index(item)], '2017-12-31', all_amount])
        print(revenue)
        print("------------------------------")
        print(rest_id.index(item))

        batch = BatchSql(sql)
        batch.addBatch(revenue)
        db.update(batch)
Пример #8
0
def deal_score_data(score):
    '''
    处理商家评分信息
    :param score:
    :return:
    '''
    print("处理数据", score)
    batch = BatchSql(sql2)
    batch.addBatch(score)
    db.update(batch)
Пример #9
0
class ParseDeliveymode(object):
    def __init__(self,resource_path, city, date,save_date_short,db):
        '''
        :param resource_path: 城市数据文件路径
        :param city: 城市拼音
        :param date: 保存日期
        :param save_date_short:保存日期简写
        :param db: 数据库配置信息
        '''
        self.resource_path = resource_path
        self.city = city
        self.date = date
        self.save_date_short = save_date_short
        self.db = db

        self.sql = "insert into t_e_delivery_mode_city_pre_%s values" % save_date_short
        self.batch = BatchSql(self.sql)

    def parse_data(self, data):
        try:
            rest_id = str(data['param'])
            if len(rest_id) <= 11:
                data = data['data'].get('delivery_mode')
                delever_id = getMapValue(data,"id")
                delever_text = getMapValue(data, "text")
                param = [self.city, self.date,rest_id,delever_text,delever_id]
                self.batch.addBatch(param)
                if self.batch.getSize() > 100000:
                    self.db.update(self.batch)
                    self.batch.cleanBatch()
            else:
                print("错误数据", data)
        except Exception as e:
            print(traceback.print_exc())
            print('解析数据报错\n错误数据:{}'.format(data))

    def run(self):
        f_name = os.path.join(self.resource_path, 'rest_info.pickle')
        print("解析文件:", f_name)
        with open(f_name, "rb+") as f:
            while True:
                try:
                    data = pickle.load(f)
                    self.parse_data(data)
                except EOFError as e:
                    print("文件读取完成", e)
                    break
                except Exception as e2:
                    print("报错", e2)
                    break

        if self.batch.getSize() > 0:
            self.db.update(self.batch)
            self.batch.cleanBatch()
    def deal_meituan_daily_operational_data(self):
        '''
       运营日报数据入库:id,店铺id,店铺名称,日期,
                      日有效订单量,订单收入,
                      曝光量,进店量,下单量,进店转化率,下单转化绿
                      新客人数,新客占比,旧客人数,旧客占比
                      推广资金,(推广点击次数,单词点击费用)
       :return:
       '''
        meituan_daily_operational_data = []
        effectiveOrders = self.effectiveOrders['data']['effectiveOrders']
        settleAcc = self.effectiveOrders['data']['settleAcc']

        exposureNum = self.TrafficStats['data']['flowGeneralInfoVo'][
            'exposureNum']
        visitNum = self.TrafficStats['data']['flowGeneralInfoVo']['visitNum']
        xiadanNum = self.TrafficStats['data']['flowGeneralInfoVo']['orderNum']
        visitRate = self.TrafficStats['data']['flowGeneralInfoVo']['visitRate']
        orderRate = self.TrafficStats['data']['flowGeneralInfoVo']['orderRate']

        newOrderNum = self.customerAnalysis['data']['newOrderNum']
        oldOrderNum = self.customerAnalysis['data']['oldOrderNum']
        orderNum = self.customerAnalysis['data']['orderNum']
        if orderNum == 0:
            newOrderNum_percent = 0
            oldOrderNum_percent = 0
        else:
            newOrderNum_percent = newOrderNum / orderNum
            oldOrderNum_percent = oldOrderNum / orderNum

        history_consume_list = self.history_consume['data']['flowVoList']
        popularize_amount = 0
        for item in history_consume_list:
            history_consume_list_date = str(item['date']).split(' ')[0]
            history_consume_list_reason = item['reason']
            if history_consume_list_date == str(
                    self.yesterday) and history_consume_list_reason == '推广消费':
                popularize_amount += abs(item['amount'])
                # print(popularize_amount)

        meituan_daily_operational_data.extend([
            0, self.shopid, self.shopname,
            str(self.yesterday), effectiveOrders, settleAcc, exposureNum,
            visitNum, xiadanNum, visitRate, orderRate, newOrderNum,
            newOrderNum_percent, oldOrderNum, oldOrderNum_percent,
            popularize_amount
        ])
        print("美团运营日报数据:", meituan_daily_operational_data)
        sql = "insert into meituan_daily_operational_data VALUES"
        batch = BatchSql(sql)
        batch.addBatch(meituan_daily_operational_data)
        self.db2.update(batch)
Пример #11
0
def deal_score_data(base_data,score):
    # 用户评论信息表的内容
    rating_data = []
    rating_data.append(str(base_data[0]))  # 店铺id
    rating_data.append(base_data[1])  # 日期
    rating_data.append(str('1'))  # 饿了么平台标签
    rating_data.append(str(score[0]))  # 用户评论
    print(rating_data)

    # 用户评分数据入库表
    batch = BatchSql(sql2)
    batch.addBatch(rating_data)
    db.update(batch)
Пример #12
0
def deal_coupon_data(account, date, coupon_name, coupon_content):
    '''
    处理店铺活动信息
    :param account:
    :param date:
    :param coupon_name:
    :param coupon_content:
    :return:
    '''
    coupon_type = []
    """
    门店新用户立减:首
    满减活动:减
    折扣商品:折
    下单返券:返
    新用户立减活动:首
    商家优惠券:卷
    """
    for item in coupon_name:
        if item == '折扣商品':
            coupon_type.append('折')
        if item == '下单返券':
            coupon_type.append('返')
        if item == '门店新用户立减':
            coupon_type.append('首')
        if item == '商家优惠券':
            coupon_type.append('卷')
        if item == '满减活动':
            coupon_type.append('减')
        if item == '新用户立减活动':
            coupon_type.append('首')
    for i in range(len(coupon_name)):
        coupon_data = []
        name = coupon_name[i]
        type = coupon_type[i]
        content = coupon_content[i]
        coupon_data.extend([
            0,
            str(account), date,
            str(2),
            str(type),
            str(name),
            str(content)
        ])
        print(coupon_data)

        batch = BatchSql(sql3)
        batch.addBatch(coupon_data)
        db.update(batch)
    def deal_meituan_Operational_data_analysis(self):
        '''
       处理商家后台数据分析的数据:id,店铺id,店铺名称,日期,
                              有效订单,订单收入,
                              有效订单打败同行,订单收入打败同行,
                              下单人数,新客人数,老客人数,
                              曝光人数,访问人数,下单人数,访问转化率,下单转化率,
                              商家评分,口味评分,包装评分,配送评分,
       :return:
       '''
        meituan_Operational_data_analysis = []
        effectiveOrders = self.effectiveOrders['data']['effectiveOrders']
        settleAcc = self.effectiveOrders['data']['settleAcc']

        effectiveOrdersRankPercent = self.PeersCompareAnalysis['data'][
            'effectiveOrdersRankPercent']
        settleAccRankPercent = self.PeersCompareAnalysis['data'][
            'settleAccRankPercent']

        newOrderNum = self.customerAnalysis['data']['newOrderNum']
        oldOrderNum = self.customerAnalysis['data']['oldOrderNum']
        orderNum = self.customerAnalysis['data']['orderNum']

        exposureNum = self.TrafficStats['data']['flowGeneralInfoVo'][
            'exposureNum']
        visitNum = self.TrafficStats['data']['flowGeneralInfoVo']['visitNum']
        xiadanNum = self.TrafficStats['data']['flowGeneralInfoVo']['orderNum']
        visitRate = self.TrafficStats['data']['flowGeneralInfoVo']['visitRate']
        orderRate = self.TrafficStats['data']['flowGeneralInfoVo']['orderRate']

        bizScore = self.commentSummary['data']['bizScore']
        tasteScore = self.commentSummary['data']['tasteScore']
        packScore = self.commentSummary['data']['packScore']
        deliveryScore = self.commentSummary['data']['deliveryScore']

        meituan_Operational_data_analysis.extend([
            0, self.shopid, self.shopname,
            str(self.yesterday), effectiveOrders, settleAcc,
            effectiveOrdersRankPercent, settleAccRankPercent, orderNum,
            newOrderNum, oldOrderNum, exposureNum, visitNum, xiadanNum,
            visitRate, orderRate, bizScore, tasteScore, packScore,
            deliveryScore
        ])
        print('商家后台数据分析:', meituan_Operational_data_analysis)

        sql = "insert into meituan_Operational_data_analysis VALUES"
        batch = BatchSql(sql)
        batch.addBatch(meituan_Operational_data_analysis)
        self.db2.update(batch)
    def deal_commentSummary(self):
        '''
        处理顾客评价的信息:店铺ID、时间、平台、评分、评分日比
        :return:
        '''
        commentSummary_content = []
        bizScore = self.commentSummary['data']['bizScore']
        commentSummary_content.extend(
            [self.shopid, self.date, self.platform, bizScore])
        print('店铺评分信息:', commentSummary_content)

        sql = "insert into rating_score VALUES "
        batch = BatchSql(sql)
        batch.addBatch(commentSummary_content)
        self.db.update(batch)
Пример #15
0
def deal_base_data(account, date, result):
    '''
    处理基础的几条信息
    :param account:
    :param date:
    :param result:
    :return:
    '''
    base_data = []
    base_data.extend([
        account, date, result[4], result[6], result[8], result[10], result[11]
    ])
    print("处理数据:", base_data)
    batch = BatchSql(sql)
    batch.addBatch(base_data)
    db.update(batch)
Пример #16
0
def deal_quan_monitor_hot_sku_data(city,date,all_shopid_list):
    '''
    处理店圈sku数据,9个店铺大约110万条数据
    :param city: 所属城市
    :param date: 数据日期
    :param all_shopid_list: 店圈店铺ID列表
    :return:
    '''
    sql="""
        select t1.rest_id,t1.food_name,t1.price,t1.food_id,t1.food_month_sales,t1.has_activity,t2.delivery_fee
        from t_e_rest_menu_level2_unique_city_nj_1712 t1,
             t_e_rest_list_city_1712 t2   
        where t1.city = '%s'
        and t1.date = '%s'
        and t1.rest_id IN %s
        and t1.food_month_sales != 0
        and t1.city = t2.city
        and t1.date = t2.date
        and t1.rest_id = t2.rest_id
        """%(city,date,all_shopid_list)
    sql2 = 'insert into quan_monitor_hot_sku_data values'
    batch = BatchSql(sql2)
    result = db2.queryForList(sql, None) 
    print(result)

    for shop_list in all_shopid_list:
        for item in result:
            quan_monitor_hot_sku_data = []
            if shop_list == item[0]:
                if item[5] == 1:   #特价菜
                    quan_monitor_hot_sku_data.extend([0,date,shop_list,item[1],item[2],item[2],item[4],time,time])
                    print(quan_monitor_hot_sku_data)
                    batch.addBatch(quan_monitor_hot_sku_data)
                elif item[5] == 0:  #满减菜品
                    sub_price = get_sub_price(city,date,shop_list,item[2])
                    if sub_price == None:
                        sub_price = 0
                    true_price = item[2]-sub_price
                    quan_monitor_hot_sku_data.extend([0,date,shop_list,item[1],item[2],true_price,item[4],time,time])
                    print(quan_monitor_hot_sku_data)
                    batch.addBatch(quan_monitor_hot_sku_data)
        if batch.getSize() > 10000:
            db1.update(batch)
            batch.cleanBatch()
    db1.update(batch)
    def deal_TrafficStats(self):
        '''
       处理数据流量分析的信息:店铺id、时间、曝光人数,访问人数,下单人数,访问转化率,下单转化率
       :return:
       '''
        TrafficStats = []
        exposureNum = self.TrafficStats['data']['flowGeneralInfoVo'][
            'exposureNum']
        visitNum = self.TrafficStats['data']['flowGeneralInfoVo']['visitNum']
        orderNum = self.TrafficStats['data']['flowGeneralInfoVo']['orderNum']
        visitRate = self.TrafficStats['data']['flowGeneralInfoVo']['visitRate']
        orderRate = self.TrafficStats['data']['flowGeneralInfoVo']['orderRate']
        TrafficStats.extend([
            self.shopid, self.date, exposureNum, visitNum, orderNum, visitRate,
            orderRate
        ])

        print('流量分析信息:', TrafficStats)
        sql = "insert into meituan_backstage VALUES "
        batch = BatchSql(sql)
        batch.addBatch(TrafficStats)
        self.db.update(batch)
    def deal_meituan_customer_precision_operation(self):
        '''
       处理精准营销的数据:id,店铺id,店铺名称,日期,
                        营销名称,营销活动人数
       :return:
       '''
        customer_precision_labelList = self.getCouponLabel['data']['labelList']
        for item in customer_precision_labelList:
            customer_precision_operation = []
            precision_operation_name = item['title']
            precision_operation_numbers = item['nums']

            customer_precision_operation.extend([
                0, self.shopid, self.shopname,
                str(self.yesterday), precision_operation_name,
                precision_operation_numbers
            ])
            print('精准营销的活动信息:', customer_precision_operation)

            sql = "insert into meituan_customer_precision_operation VALUES"
            batch = BatchSql(sql)
            batch.addBatch(customer_precision_operation)
            self.db2.update(batch)
    def deal_meituan_customer_prefer_analysis(self):
        '''
       处理同行顾客爱好:id,店铺id,店铺名称,日期
                      同行顾客爱好的菜品名,该菜品的同行百分比
       :return:
       '''
        customer_prefer_analysi = self.customerPerfer['data']['pubCusEat']
        for item in customer_prefer_analysi:
            meituan_customer_prefer_analysi = []
            customer_prefer_name = item[0]
            customer_prefer_percent = item[1]

            meituan_customer_prefer_analysi.extend([
                0, self.shopid, self.shopname,
                str(self.yesterday), customer_prefer_name,
                float(customer_prefer_percent)
            ])
            print('同行顾客爱好:', meituan_customer_prefer_analysi)

            sql = "insert into meituan_customer_prefer_analysis VALUES"
            batch = BatchSql(sql)
            batch.addBatch(meituan_customer_prefer_analysi)
            self.db2.update(batch)
Пример #20
0
def deal_quan_monitor_shop_data(city,date,all_shopid_list):
    '''
    处理店圈的配送费用和配送时间的信息
    :param city: 店圈所属城市
    :param date: 爬取数据的日期
    :param all_shopid_list: 所有店圈的店铺列表信息
    :return:
    '''
    sql = """
          select t1.rest_id,t1.order_month_sales,t2.deliver_time,t1.delivery_fee,t1.min_delivery_price,t2.overall_score
          from t_e_rest_list_city_1712 t1,
               t_e_rest_score_city_1712 t2
          where t1.city  = '%s'
          and t1.date = '%s'
          and t1.rest_id IN %s
          and t1.rest_id = t2.rest_id
          and t1.city = t2.city
          and t1.date = t2.date
          """%(city,date,all_shopid_list)

    sql2 = "insert into quan_monitor_shop_data values"
    batch = BatchSql(sql2)

    result = db2.queryForList(sql, None)

    for item in result:
        quan_monitor_shop_data = []
        score = float('%.1f'%float(item[5]))
        quan_monitor_shop_data.extend([0,date,item[0],item[1],item[2],item[3],item[4],score,time,time])
        print(quan_monitor_shop_data)
        batch.addBatch(quan_monitor_shop_data)

        if batch.getSize() > 10000:
            db1.update(batch)
            batch.cleanBatch()
    db1.update(batch)
    def deal_CustomerReminder(self):
        '''
       处理店铺催单信息:id,店铺id,日期,时间,平台名称,催单次数,催单时间
       :return:
       '''
        CustomerReminder_Info = self.customer_reminderInfo['wmOrderList']
        CustomerReminder_Data = self.customer_reminder['data']
        if CustomerReminder_Info != None:
            for item in CustomerReminder_Info:  #获取另一个催单账号信息借口获取催单的ID
                CustomerReminder_ID = item['wm_order_id_view']
                try:
                    for item in CustomerReminder_Data[str(
                            CustomerReminder_ID)]:  #根据催单ID获取相应的催单的时间信息
                        times = len(CustomerReminder_Data[str(
                            CustomerReminder_ID)])  #催单的次数
                        CustomerReminder = []
                        reminder_time_fmt = item['reminder_time_fmt']
                        response_time_fmt = item['response_time_fmt']
                        reminder_time_fmt = str(reminder_time_fmt).split(
                            ':')[1]
                        response_time_fmt = str(response_time_fmt).split(
                            ':')[1]
                        reminder_time = int(response_time_fmt) - int(
                            reminder_time_fmt)  #商家后台恢复催单信息的时长
                        CustomerReminder.extend([
                            0, self.shopid, self.date, self.time,
                            CustomerReminder_ID, 2, times, reminder_time
                        ])
                        print('客户催单信息:', CustomerReminder)

                        sql = "insert into daily_customer_reminder VALUES"
                        batch = BatchSql(sql)
                        batch.addBatch(CustomerReminder)
                        self.db.update(batch)
                except Exception:
                    pass
    def deal_Activitylist(self):
        '''
       处理店铺活动的信息:id,店铺id,活动id,时间,平台名称,活动类型,活动名称,活动内容
       :return:
       '''
        Activity_onGoingActs = self.activitylist['data']['onGoingActs']
        for item in Activity_onGoingActs:
            Activitylist = []
            actId = item['actId']
            actName = item['actName']
            poiPolicy = item['poiPolicy']

            type = item['type']
            Activity_type = self.deal_Activity_type(type)
            Activitylist.extend([
                0, self.shopid, actId, self.date, 2, Activity_type, actName,
                poiPolicy
            ])

            print('店铺活动信息:', Activitylist)
            sql = "insert into daily_shop_activity_data VALUES"
            batch = BatchSql(sql)
            batch.addBatch(Activitylist)
            self.db3.update(batch)
Пример #23
0
def deal_base_data(base_data):
    # 基本信息数据入库
    batch = BatchSql(sql)
    batch.addBatch(base_data)
    db.update(batch)
Пример #24
0
class ParseHotword(object):
    def __init__(self, resource_path, city, date, save_date_short,
                 err_rest_ids, db):
        '''
        :param resource_path: 城市数据文件路径
        :param city: 城市拼音
        :param date: 保存日期
        :param save_date_short:保存日期简写
        :param err_rest_ids: 错误店铺ID
        :param db: 数据库配置信息
        '''
        self.resource_path = resource_path
        self.city = city
        self.date = date
        self.save_date_short = save_date_short
        self.err_rest_ids = err_rest_ids
        self.db = db

        self.sql = "insert into t_e_hot_search_word_city_%s values" % save_date_short
        self.batch = BatchSql(self.sql)

    def parse_data(self, data):
        try:
            rest_id = str(data["param"][0])
            if rest_id in self.err_rest_ids:
                return
            if len(rest_id) <= 11:
                la = data["param"][1]
                lo = data["param"][2]
                for item in data["data"]:
                    search_word = getMapValue(item, "search_word")
                    if search_word != '-999':
                        self.batch.addBatch([
                            self.city, self.date, rest_id, la, lo, search_word
                        ])
                    if self.batch.getSize() > 100000:
                        self.db.update(self.batch)
                        self.batch.cleanBatch()
            else:
                print("错误数据", data)
                print('错误数据\n{}'.format(data))
        except Exception as e:
            print("解析数据报错", e)
            print('解析数据报错\n错误数据:{}'.format(data))

    def run(self):
        f_name = os.path.join(self.resource_path, 'hot_word.pickle')
        print("解析文件:", f_name)
        with open(f_name, "rb+") as f:
            while True:
                try:
                    data = pickle.load(f)
                    self.parse_data(data)
                except EOFError as e:
                    print("文件读取完成", e)
                    break
                except Exception as e2:
                    print("报错", e2)
                    break

        if self.batch.getSize() > 0:
            self.db.update(self.batch)
            self.batch.cleanBatch()
Пример #25
0
class ParseScore(object):
    def __init__(self,resource_path, city, date,save_date_short, err_rest_ids,db):
        '''
        :param resource_path: 城市数据文件路径
        :param city: 城市拼音
        :param date: 保存日期
        :param save_date_short:保存日期简写
        :param err_rest_ids: 错误店铺ID
        :param db: 数据库配置文件
        '''
        self.resource_path = resource_path
        self.city = city
        self.date = date
        self.save_date_short = save_date_short
        self.err_rest_ids = err_rest_ids
        self.db = db

        self.sql = "insert into t_e_rest_score_city_%s values" %save_date_short
        self.batch = BatchSql(self.sql)

    def parse_data(self,data):
        try:
            rest_id = data['param']
            if rest_id in self.err_rest_ids:
                return
            if len(rest_id) <= 11:
                self.batch.addBatch([self.city, self.date, rest_id,
                                getMapValue(data['data'], "compare_rating"),
                                getMapValue(data['data'], "deliver_time"),
                                getMapValue(data['data'], "food_score"),
                                getMapValue(data['data'], "overall_score"),
                                getMapValue(data['data'], "service_score")])
                if self.batch.getSize() > 100000:
                    self.db.update(self.batch)
                    self.batch.cleanBatch()
            else:
                print("错误数据", data)
                print('错误数据\n{}'.format(data))
        except Exception as e:
            print("解析数据报错", e)
            print('解析数据报错\n错误数据:{}'.format(data))


    def run(self):
        f_name = os.path.join(self.resource_path, 'score.pickle')
        print("解析文件:", f_name)
        with open(f_name, "rb+") as f:
            while True:
                try:
                    data = pickle.load(f)
                    self.parse_data(data)
                except EOFError as e:
                    print("文件读取完成", e)
                    break
                except Exception as e2:
                    print("报错", e2)
                    break

        if self.batch.getSize() > 0:
            self.db.update(self.batch)
            self.batch.cleanBatch()
Пример #26
0
class ParseCategory(object):
    def __init__(self, resource_path, city, city_cn, date, date_short, db):
        '''
        :param resource_path: 城市数据文件路径
        :param city: 城市拼音
        :param city_cn: 城市中文
        :param date: 保存日期
        :param date_short: 保存日期简写
        :param db: 数据配置
        :return:
        '''
        self.resource_path = resource_path
        self.city = city
        self.city_cn = city_cn
        self.date = date
        self.date_short = date_short
        self.db = db

        self.sql1 = "insert into t_e_rest_list_city_pre_%s values" % date_short
        self.sql2 = "insert into t_e_rest_active_city_%s values" % date_short
        self.sql3 = "insert into t_e_rest_category_city_%s values" % date_short
        self.sql4 = "insert into t_e_rest_open_time_city_%s values" % date_short
        self.sql5 = "insert into t_e_rest_money_off_city_%s values" % date_short
        self.sql6 = "insert into t_e_rest_money_off_avg_city_%s values" % date_short

        self.batch1 = BatchSql(self.sql1)
        self.batch2 = BatchSql(self.sql2)
        self.batch3 = BatchSql(self.sql3)
        self.batch4 = BatchSql(self.sql4)
        self.batch5 = BatchSql(self.sql5)
        self.batch6 = BatchSql(self.sql6)

        self.err_rest_ids = set()
        self.rest_category = defaultdict(set)
        self.category_level1_list = []
        self.category_level2_list = []

    def get_data_by_level2_id(self, category_level2_id):
        category_level1_id, category_level1_name, category_level2_name = '-999', '-999', '-999'
        for item in self.category_level2_list:
            if category_level2_id == str(item.get('id2')):
                category_level2_name = str(item.get('name2'))
                category_level1_id = str(item.get('id1'))
                category_level1_name = str(item.get('name'))
                break
        # for item in category_level1_list:
        #     if category_level1_id == str(item[0]):
        #         category_level1_name = str(item[1])
        #         break
        return category_level1_id, category_level1_name, category_level2_id, category_level2_name

    def parse_data(self, data, m):
        '''
        :param data: 城市数据
        :param city: 城市拼音
        :param date: 保存日期
        :param m:    城市地图信息
        :param date_short: 保存日期简写
        :param db:   数据库配置信息
        :return:
        '''
        try:
            category_item = data['param'][2]
            category_level2_id = str(category_item.get("id2"))
            item = data["data"]
            rest_id = getMapValue(item, "id")
            lat = float(getMapValue(item, "latitude"))
            lng = float(getMapValue(item, "longitude"))
            if len(rest_id) > 11:
                print('错误店铺id\nrest_id: {}'.format(rest_id))
            elif not (m.is_in_city(lng, lat)):
                self.err_rest_ids.add(rest_id)
                print('错误地址\nrest_id: {}, lat: {}, lng: {}, name: {}\n'
                      'address: {}'.format(rest_id, lat, lng,
                                           getMapValue(item, "name"),
                                           getMapValue(item, "address")))
            else:
                # 店铺去重
                if not self.rest_category[rest_id]:
                    self.batch1.addBatch([
                        self.city, self.date, rest_id,
                        getMapValue(item, "name"),
                        getMapValue(item, "phone", '/'),
                        getMapValue(item, "address"),
                        getMapValue(item, "average_cost"),
                        [1, 0][getMapValue(item, "delivery_mode") == '-999'],
                        getMapValue(item, "float_delivery_fee"),
                        getMapValue(item, "float_minimum_order_amount"),
                        [0, 1][getMapValue(item, "is_new") == 'True'],
                        [0, 1][getMapValue(item, "is_premium") == 'True'], lat,
                        lng,
                        getMapValue(item, "recent_order_num"), '-999'
                    ])
                    if self.batch1.getSize() > 10000:
                        self.db.update(self.batch1)
                        self.batch1.cleanBatch()
                    opening_hours = getMapValue(item, "opening_hours")
                    arr = json.loads(opening_hours.replace("\'", "\""))
                    if not isinstance(arr, list):
                        print('错误数据:', rest_id, opening_hours)
                        print('错误数据\nrest_id: {}, opening_hours: {}'.format(
                            rest_id, opening_hours))
                    else:
                        for s in arr:
                            times = s.split("/")
                            self.batch4.addBatch(
                                [self.city, self.date, rest_id, *times])
                            if self.batch4.getSize() > 100000:
                                self.db.update(self.batch4)
                                self.batch4.cleanBatch()
                    # TODO 平均满减折扣有问题
                    discount_rate_list = []
                    for active in item.get('activities'):
                        attribute = getMapValue(active, "attribute")
                        active_type = getMapValue(active, "icon_name")
                        self.batch2.addBatch([
                            self.city, self.date, rest_id,
                            getMapValue(active, "description"), active_type
                        ])
                        if self.batch2.getSize() > 100000:
                            self.db.update(self.batch2)
                            self.batch2.cleanBatch()
                        if active_type == '减' and attribute != '-999':
                            arr = json.loads(attribute)
                            for key, value in arr.items():
                                if isinstance(value, dict):
                                    sub_price = value.get("1")
                                    if sub_price == 0:
                                        sub_price = value.get("0")
                                else:
                                    sub_price = value
                                key = round(float(key), 2)
                                sub_price = round(float(sub_price), 2)
                                discount_rate = round(sub_price / key, 4)
                                discount_rate_list.append(discount_rate)
                                self.batch5.addBatch([
                                    self.city, self.date, rest_id, key,
                                    sub_price, discount_rate
                                ])
                                if self.batch5.getSize() > 100000:
                                    self.db.update(self.batch5)
                                    self.batch5.cleanBatch()

                    # TODO 平均满减折扣有问题
                    if len(discount_rate_list):
                        avg_discount_rate = sum(discount_rate_list) / len(
                            discount_rate_list)
                        avg_discount_rate = round(avg_discount_rate, 4)
                        self.batch6.addBatch(
                            [self.city, self.date, rest_id, avg_discount_rate])
                        if self.batch6.getSize() > 100000:
                            self.db.update(self.batch6)
                            self.batch6.cleanBatch()

                # 店铺对应品类去重
                if category_level2_id not in self.rest_category[rest_id]:
                    self.rest_category[rest_id].add(category_level2_id)
                    #TODO 取消了单独的品类文件,品类信息和店铺信息保存一起,可直接获取该店铺对应品类信息
                    # category_level1_id, category_level1_name, category_level2_id, category_level2_name = get_data_by_level2_id(category_level2_id)

                    category_level1_id = category_item.get('id1')
                    category_level1_name = category_item.get('name')
                    category_level2_name = category_item.get('name2')
                    self.batch3.addBatch([
                        self.city, self.date, category_level1_id,
                        category_level1_name, category_level2_id,
                        category_level2_name, rest_id,
                        getMapValue(item, "name")
                    ])
                    if self.batch3.getSize() > 100000:
                        self.db.update(self.batch3)
                        self.batch3.cleanBatch()
        except Exception as e:
            print("解析数据报错", traceback.format_exc())
            print('解析数据报错\n错误数据:{}'.format(data))

    def run(self):
        from ShopMonitor.elm.ElmWebParse.ElmMapUtil import ElmMapUtil
        m = ElmMapUtil(self.city_cn)
        f_name = os.path.join(self.resource_path, 'category.pickle')
        print("解析rest文件:", f_name)
        with open(f_name, "rb+") as f:
            while 1:
                try:
                    data = pickle.load(f)
                    self.parse_data(data, m)
                    # parse_data(data, city, date,m,date_short,db)
                except EOFError as e:
                    print("rest文件读取完成", e)
                    break
                except Exception as e2:
                    print("报错", e2)
                    break

        if self.batch1.getSize() > 0:
            self.db.update(self.batch1)
            self.batch1.cleanBatch()
        if self.batch2.getSize() > 0:
            self.db.update(self.batch2)
            self.batch2.cleanBatch()
        if self.batch3.getSize() > 0:
            self.db.update(self.batch3)
            self.batch3.cleanBatch()
        if self.batch4.getSize() > 0:
            self.db.update(self.batch4)
            self.batch4.cleanBatch()
        if self.batch5.getSize() > 0:
            self.db.update(self.batch5)
            self.batch5.cleanBatch()
        if self.batch6.getSize() > 0:
            self.db.update(self.batch6)
            self.batch6.cleanBatch()

        print('rest_list数:', len(self.rest_category.keys()))
        print('rest_category数:',
              sum([len(x) for x in self.rest_category.values()]))

        print(self.city_cn, "错误店铺数:", len(self.err_rest_ids))
        # with open("err_rest_ids_" + city_cn + ".txt", "w") as f:
        #     for item in err_rest_ids:
        #         f.write(str(item) + '\n')
        return self.err_rest_ids
Пример #27
0
class ParseMenu(object):
    def __init__(self, resource_path, city, date, city_short, save_date_short,
                 err_rest_ids, db):
        '''
        :param resource_path: 数据文件路径
        :param city: 城市拼音
        :param date: 保存日期
        :param city_short: 城市简写
        :param save_date_short: 保存日期简写
        :param err_rest_ids: 错误城市id
        :param db: 数据库配置信息
        '''
        self.resource_path = resource_path
        self.city = city
        self.date = date
        self.city_short = city_short
        self.save_date_short = save_date_short
        self.err_rest_ids = err_rest_ids
        self.db = db

        self.sql1 = "insert into t_e_rest_menu_level1_city_%s values" % save_date_short
        self.sql2 = "insert into t_e_rest_menu_level2_city_%s_%s values" % (
            city_short, save_date_short)
        self.sql3 = "insert into t_e_rest_menu_level2_unique_city_%s_%s values" % (
            city_short, save_date_short)
        self.sql4 = "insert into t_e_rest_menu_discount_city_%s values" % save_date_short

        self.batch1 = BatchSql(self.sql1)
        self.batch2 = BatchSql(self.sql2)
        self.batch3 = BatchSql(self.sql3)
        self.batch4 = BatchSql(self.sql4)

        self.rest_food = defaultdict(set)

    def parse_data(self, data):
        try:
            rest_id = getMapValue(data, 'param')
            if rest_id in self.err_rest_ids:
                return
            for item in data["data"]:
                # 验证是否是错误数据
                try:
                    foods = item['foods'][::-1]
                except KeyError:
                    print('错误数据\n{}'.format(item))
                    continue
                menu_name = getMapValue(item, 'name')
                self.batch1.addBatch(
                    [self.city, self.date, rest_id, menu_name])
                if self.batch1.getSize() > 100000:
                    self.db.update(self.batch1)
                    self.batch1.cleanBatch()
                for item2 in foods:
                    food_id = getMapValue(item2, 'virtual_food_id')
                    food_price_current = getMapValue(item2["specfoods"][0],
                                                     'price')
                    food_price_primary = getMapValue(item2["specfoods"][0],
                                                     'original_price')
                    has_activity = '0'
                    if food_price_primary not in ['-999', '0']:
                        has_activity = '1'
                    self.batch2.addBatch([
                        self.city, self.date, rest_id, food_id,
                        getMapValue(item2, 'name'),
                        getMapValue(item2, 'month_sales'),
                        getMapValue(item2, 'rating'),
                        getMapValue(item2, 'satisfy_count'),
                        food_price_current, has_activity, menu_name
                    ])
                    if self.batch2.getSize() > 100000:
                        self.db.update(self.batch2)
                        self.batch2.cleanBatch()
                    if food_id not in self.rest_food[rest_id]:
                        self.rest_food[rest_id].add(food_id)
                        self.batch3.addBatch([
                            self.city, self.date, rest_id, food_id,
                            getMapValue(item2, 'name'),
                            getMapValue(item2, 'month_sales'),
                            getMapValue(item2, 'rating'),
                            getMapValue(item2, 'satisfy_count'),
                            food_price_current, has_activity, menu_name
                        ])
                        if self.batch3.getSize() > 100000:
                            self.db.update(self.batch3)
                            self.batch3.cleanBatch()
                        if food_price_primary not in ['-999', '0']:
                            discount = '%.6f' % (float(food_price_current) /
                                                 float(food_price_primary))
                            self.batch4.addBatch([
                                self.city, self.date, rest_id, food_id,
                                food_price_primary, food_price_current,
                                discount
                            ])
                            if self.batch4.getSize() > 100000:
                                self.db.update(self.batch4)
                                self.batch4.cleanBatch()
        except Exception as e:
            print("解析数据报错", e)
            print('解析数据报错\n错误数据:{}'.format(data))

    def run(self):
        f_name = os.path.join(self.resource_path, 'menu.pickle')
        print("解析文件:", f_name)
        with open(f_name, "rb+") as f:
            while 1:
                try:
                    data = pickle.load(f)
                    self.parse_data(data)
                    # break
                except EOFError as e:
                    print("文件读取完成", e)
                    break
                except Exception as e2:
                    print("报错", e2)
                    break
        if self.batch1.getSize() > 0:
            self.db.update(self.batch1)
            self.batch1.cleanBatch()
        if self.batch2.getSize() > 0:
            self.db.update(self.batch2)
            self.batch2.cleanBatch()
        if self.batch3.getSize() > 0:
            self.db.update(self.batch3)
            self.batch3.cleanBatch()
        if self.batch4.getSize() > 0:
            self.db.update(self.batch4)
            self.batch4.cleanBatch()
Пример #28
0
def deal_quan_position():
    '''
    处理店圈的店铺列表信息,获取1.5,3,5公里的店圈id列表信息,并写入数据表quan_position中
    :return:
    '''
    for i in range(len(area_shop_id)):          #遍历所有店铺列表
        for j in range(0,3):                    #获取三种店圈半径类型的数据
            sql = """
                  SELECT t1.rest_id
                    FROM t_e_rest_list_city_1712 t1
                    where t1.longitude IN
                    (SELECT t1.longitude
                     FROM
                     t_e_rest_list_city_1712 t1
                    WHERE
                     t1.city = 'nanjing'
                    AND round(
                     6378.138 * 2 * asin(
                      sqrt(
                       pow(
                        sin(
                         (
                          t1.latitude * pi() / 180 - %s * pi() / 180
                         ) / 2
                        ),
                        2
                       ) + cos(t1.latitude * pi() / 180) * cos(%s * pi() / 180) * pow(
                        sin(
                         (
                          t1.longitude * pi() / 180 - %s * pi() / 180
                         ) / 2
                        ),
                        2
                       )
                      )
                     ) * 1000
                    ) <= %s)
                    and t1.latitude IN
                    (SELECT t1.latitude
                     FROM
                     t_e_rest_list_city_1712 t1
                    WHERE
                     t1.city = 'nanjing'
                    AND round(
                     6378.138 * 2 * asin(
                      sqrt(
                       pow(
                        sin(
                         (
                          t1.latitude * pi() / 180 - %s * pi() / 180
                         ) / 2
                        ),
                        2
                       ) + cos(t1.latitude * pi() / 180) * cos(%s * pi() / 180) * pow(
                        sin(
                         (
                          t1.longitude * pi() / 180 - %s * pi() / 180
                         ) / 2
                        ),
                        2
                       )
                      )
                     ) * 1000
                    ) <= %s)
            
                  """%(shop_latitude[i],shop_latitude[i],shop_longitude[i],type[j],shop_latitude[i],shop_latitude[i],shop_longitude[i],type[j])
            # print(sql)
            area_shop_list = db2.queryForList(sql,None)   #获取店圈的店铺id信息
            all_area_shop_list = []
            for item in area_shop_list:
                all_area_shop_list.append(item[0])

            all_str_shop_ids = str(all_area_shop_list)          #店铺列表信息的简单处理,将[]去除
            all_str_shop_ids = all_str_shop_ids.split('[')[1]
            all_str_shop_ids = all_str_shop_ids.split(']')[0]

            sql2 = 'insert into quan_position values'
            quan_position = []
            quan_position.extend([0,'2017-12-31',area_shop_id[i],area_shop_name[i],j+1,all_str_shop_ids,time,time])
            print("店圈列表信息:",quan_position)

            batch = BatchSql(sql2)
            batch.addBatch(quan_position)
            db1.update(batch)