def deal_quan_category(city,date): ''' 处理店圈的菜品类信息,一级品类,二级品类信息 :param city: :param date: :return: ''' sql = """ select DISTINCT(t1.category_id_level1),t1.category_name_level1,0 pid from t_e_rest_category_city_1712 t1 where t1.city = '%s' and t1.date = '%s' union select DISTINCT(t1.category_id_level2),t1.category_name_level2,t1.category_id_level1 pid from t_e_rest_category_city_1712 t1 where t1.city = '%s' and t1.date = '%s' """%(city,date,city,date) result = db2.queryForList(sql, None) print(result) sql2 = "insert into quan_category values" batch = BatchSql(sql2) for item in result: quan_category = [] quan_category.extend([0,item[1],date,item[0],item[2],time,time]) print(quan_category) batch.addBatch(quan_category) if batch.getSize() > 10000: db1.update(batch) batch.cleanBatch() db1.update(batch)
def deal_comment_data(account,date,coupon_name,coupon_content): coupon_type = [] """ 门店新用户立减:首 满减活动:减 折扣商品:折 下单返券:返 新用户立减活动:首 商家优惠券:卷 """ for item in coupon_name: if item == '限量抢购-9.9元晚餐': coupon_type.append('抢') if item == '限量抢购-19.9元晚餐': coupon_type.append('抢') if item == '新用户立减': coupon_type.append('首') for i in range(len(coupon_name)): coupon_data = [] name = coupon_name[i] type = coupon_type[i] content = coupon_content[i] coupon_data.extend([0, str(account), date, str(1), str(type), str(name), str(content)]) print(coupon_data) batch = BatchSql(sql3) batch.addBatch(coupon_data) db.update(batch)
def deal_quan_monitor_activity_data(city,date,all_shopid_list): ''' 处理店圈店铺的活动信息 :param city: 所属城市 :param date: 数据日期 :param all_shopid_list: 店圈店铺的ID列表 :return: ''' sql=""" select t1.rest_id,t1.description,t1.active_type from t_e_rest_active_city_1712 t1 where t1.city = '%s' and t1.date = '%s' and t1.rest_id IN %s """%(city,date,all_shopid_list) sql2 = "insert into quan_monitor_activity_data values" batch = BatchSql(sql2) result = db2.queryForList(sql, None) print(result) for shoplist in all_shopid_list: all_activity = [] quan_monitor_activity_data = [] for item in result: activity = dict(type=item[2],content=item[1]) if item[0] == shoplist: all_activity.append(activity) all_activity = json.dumps(all_activity) quan_monitor_activity_data.extend([0,date,shoplist,str(all_activity),time,time]) print(quan_monitor_activity_data) batch.addBatch(quan_monitor_activity_data) if batch.getSize() > 10000: db1.update(batch) batch.cleanBatch() db1.update(batch)
def rate_analyze(id, comment_shopid, date, comment_content): ''' 对评论数据进行分词识别出相应的类别,然后写入数据库中 :param id: 评论ID :param date: 当前日期 :param comment_content:评论的内容 :return: ''' time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') key_vaule = get_config() db2 = DBUtils(('116.62.70.68', 3306, 'bigdata', 'gisfood20171220@nj', 'big_data', 'utf8mb4')) sql2 = "insert into comment_flag VALUES" seg_list = jieba.cut(comment_content) #利用jieba分词,取出句子中所有的分词内容 print("评论内容:", comment_content) flag = 0 #标志位,0:分词中没有差评信息 1:分词中存在差评信息 keywords = ' ' for item in seg_list: #对一条评论的分词结果进行匹配 for j in range(len(key_vaule)): #在词库中进行配对 if key_vaule.iloc[j, 0] == item: print("有差评内容:", item) keywords = item comment_class = int(key_vaule.iloc[j, 1]) #将评论的标签置为相应的值 flag = 1 break if flag == 0: keywords = r' ' comment_class = 0 #没有差评的时候标签置为0 print("评论ID:" + str(id), "所属分类:" + str(comment_class)) batch = BatchSql(sql2) #写入数据库 batch.addBatch( [0, id, comment_shopid, date, comment_class, keywords, time, None]) db2.update(batch)
def deal_quan_shop(city,date,all_shopid_list,cate_list): ''' 处理店圈店铺的基本数据 :param city: 店圈所属城市 :param date: 数据分析日期 :param all_shopid_list: 店圈数据的店铺列表的数据 :param cate_list: 店圈数据的品类列表 :return: ''' sql = """ select t1.rest_id,t1.rest_name,t1.delivery_id,t1.longitude,t1.latitude,t1.is_new from t_e_rest_list_city_1712 t1 where t1.city = '%s' and t1.date = '%s' and t1.rest_id IN %s """%(city,date,all_shopid_list) sql2 = "insert into quan_shop values" batch = BatchSql(sql2) result = db2.queryForList(sql, None) print(result) for item in result: for cate in cate_list: if item[0] == cate[0]: quan_shop = [] quan_shop.extend([0,item[0],1,date,item[1],item[2],item[3],item[4],item[5],cate[1],time,time]) print(quan_shop) batch.addBatch(quan_shop) if batch.getSize() > 1000: db1.update(batch) batch.cleanBatch() db1.update(batch)
def deal_meituan_customer_prefer_activity(self): ''' 处理同行顾客喜欢的活动:id,店铺id,店铺名称,日期, 同行顾客喜欢的活动,活动类型 :return: ''' customer_prefer_activity = self.customerPerfer['data']['pubCusAct'] for item in customer_prefer_activity: customer_prefer_activity = [] customer_prefer_activity_name = item[0] customer_prefer_activity_pic = item[1] customer_prefer_activity_type = self.deal_customer_prefer_activity_type( customer_prefer_activity_pic) customer_prefer_activity.extend([ 0, self.shopid, self.shopname, str(self.yesterday), customer_prefer_activity_name, customer_prefer_activity_type ]) print("顾客喜欢的活动:", customer_prefer_activity) sql = "insert into meituan_customer_prefer_activity VALUES" batch = BatchSql(sql) batch.addBatch(customer_prefer_activity) self.db2.update(batch)
def run(): # rest_id,rest_name = get_shopid() rest_name = ['义务', '明发', '同曦', '殷象'] rest_id = [160279990, 161341608, 161378073, 161313783] for item in rest_id: revenue = [] print("店铺%s的12月份营收总金额为:" % item) shop_money_off = get_shop_money_off(item) shop_menu_order = get_shop_menu_order(item) all_single_menu_amount = deal_single_menu(shop_menu_order) all_activity_menu_amount = deal_activity_menu(shop_menu_order) all_common_menu_amount = deal_common_menu(shop_money_off, shop_menu_order) all_amount = all_single_menu_amount + all_activity_menu_amount + all_common_menu_amount print("营收总金额:", all_amount) revenue.extend( [item, rest_name[rest_id.index(item)], '2017-12-31', all_amount]) print(revenue) print("------------------------------") print(rest_id.index(item)) batch = BatchSql(sql) batch.addBatch(revenue) db.update(batch)
def deal_score_data(score): ''' 处理商家评分信息 :param score: :return: ''' print("处理数据", score) batch = BatchSql(sql2) batch.addBatch(score) db.update(batch)
class ParseDeliveymode(object): def __init__(self,resource_path, city, date,save_date_short,db): ''' :param resource_path: 城市数据文件路径 :param city: 城市拼音 :param date: 保存日期 :param save_date_short:保存日期简写 :param db: 数据库配置信息 ''' self.resource_path = resource_path self.city = city self.date = date self.save_date_short = save_date_short self.db = db self.sql = "insert into t_e_delivery_mode_city_pre_%s values" % save_date_short self.batch = BatchSql(self.sql) def parse_data(self, data): try: rest_id = str(data['param']) if len(rest_id) <= 11: data = data['data'].get('delivery_mode') delever_id = getMapValue(data,"id") delever_text = getMapValue(data, "text") param = [self.city, self.date,rest_id,delever_text,delever_id] self.batch.addBatch(param) if self.batch.getSize() > 100000: self.db.update(self.batch) self.batch.cleanBatch() else: print("错误数据", data) except Exception as e: print(traceback.print_exc()) print('解析数据报错\n错误数据:{}'.format(data)) def run(self): f_name = os.path.join(self.resource_path, 'rest_info.pickle') print("解析文件:", f_name) with open(f_name, "rb+") as f: while True: try: data = pickle.load(f) self.parse_data(data) except EOFError as e: print("文件读取完成", e) break except Exception as e2: print("报错", e2) break if self.batch.getSize() > 0: self.db.update(self.batch) self.batch.cleanBatch()
def deal_meituan_daily_operational_data(self): ''' 运营日报数据入库:id,店铺id,店铺名称,日期, 日有效订单量,订单收入, 曝光量,进店量,下单量,进店转化率,下单转化绿 新客人数,新客占比,旧客人数,旧客占比 推广资金,(推广点击次数,单词点击费用) :return: ''' meituan_daily_operational_data = [] effectiveOrders = self.effectiveOrders['data']['effectiveOrders'] settleAcc = self.effectiveOrders['data']['settleAcc'] exposureNum = self.TrafficStats['data']['flowGeneralInfoVo'][ 'exposureNum'] visitNum = self.TrafficStats['data']['flowGeneralInfoVo']['visitNum'] xiadanNum = self.TrafficStats['data']['flowGeneralInfoVo']['orderNum'] visitRate = self.TrafficStats['data']['flowGeneralInfoVo']['visitRate'] orderRate = self.TrafficStats['data']['flowGeneralInfoVo']['orderRate'] newOrderNum = self.customerAnalysis['data']['newOrderNum'] oldOrderNum = self.customerAnalysis['data']['oldOrderNum'] orderNum = self.customerAnalysis['data']['orderNum'] if orderNum == 0: newOrderNum_percent = 0 oldOrderNum_percent = 0 else: newOrderNum_percent = newOrderNum / orderNum oldOrderNum_percent = oldOrderNum / orderNum history_consume_list = self.history_consume['data']['flowVoList'] popularize_amount = 0 for item in history_consume_list: history_consume_list_date = str(item['date']).split(' ')[0] history_consume_list_reason = item['reason'] if history_consume_list_date == str( self.yesterday) and history_consume_list_reason == '推广消费': popularize_amount += abs(item['amount']) # print(popularize_amount) meituan_daily_operational_data.extend([ 0, self.shopid, self.shopname, str(self.yesterday), effectiveOrders, settleAcc, exposureNum, visitNum, xiadanNum, visitRate, orderRate, newOrderNum, newOrderNum_percent, oldOrderNum, oldOrderNum_percent, popularize_amount ]) print("美团运营日报数据:", meituan_daily_operational_data) sql = "insert into meituan_daily_operational_data VALUES" batch = BatchSql(sql) batch.addBatch(meituan_daily_operational_data) self.db2.update(batch)
def deal_score_data(base_data,score): # 用户评论信息表的内容 rating_data = [] rating_data.append(str(base_data[0])) # 店铺id rating_data.append(base_data[1]) # 日期 rating_data.append(str('1')) # 饿了么平台标签 rating_data.append(str(score[0])) # 用户评论 print(rating_data) # 用户评分数据入库表 batch = BatchSql(sql2) batch.addBatch(rating_data) db.update(batch)
def deal_coupon_data(account, date, coupon_name, coupon_content): ''' 处理店铺活动信息 :param account: :param date: :param coupon_name: :param coupon_content: :return: ''' coupon_type = [] """ 门店新用户立减:首 满减活动:减 折扣商品:折 下单返券:返 新用户立减活动:首 商家优惠券:卷 """ for item in coupon_name: if item == '折扣商品': coupon_type.append('折') if item == '下单返券': coupon_type.append('返') if item == '门店新用户立减': coupon_type.append('首') if item == '商家优惠券': coupon_type.append('卷') if item == '满减活动': coupon_type.append('减') if item == '新用户立减活动': coupon_type.append('首') for i in range(len(coupon_name)): coupon_data = [] name = coupon_name[i] type = coupon_type[i] content = coupon_content[i] coupon_data.extend([ 0, str(account), date, str(2), str(type), str(name), str(content) ]) print(coupon_data) batch = BatchSql(sql3) batch.addBatch(coupon_data) db.update(batch)
def deal_meituan_Operational_data_analysis(self): ''' 处理商家后台数据分析的数据:id,店铺id,店铺名称,日期, 有效订单,订单收入, 有效订单打败同行,订单收入打败同行, 下单人数,新客人数,老客人数, 曝光人数,访问人数,下单人数,访问转化率,下单转化率, 商家评分,口味评分,包装评分,配送评分, :return: ''' meituan_Operational_data_analysis = [] effectiveOrders = self.effectiveOrders['data']['effectiveOrders'] settleAcc = self.effectiveOrders['data']['settleAcc'] effectiveOrdersRankPercent = self.PeersCompareAnalysis['data'][ 'effectiveOrdersRankPercent'] settleAccRankPercent = self.PeersCompareAnalysis['data'][ 'settleAccRankPercent'] newOrderNum = self.customerAnalysis['data']['newOrderNum'] oldOrderNum = self.customerAnalysis['data']['oldOrderNum'] orderNum = self.customerAnalysis['data']['orderNum'] exposureNum = self.TrafficStats['data']['flowGeneralInfoVo'][ 'exposureNum'] visitNum = self.TrafficStats['data']['flowGeneralInfoVo']['visitNum'] xiadanNum = self.TrafficStats['data']['flowGeneralInfoVo']['orderNum'] visitRate = self.TrafficStats['data']['flowGeneralInfoVo']['visitRate'] orderRate = self.TrafficStats['data']['flowGeneralInfoVo']['orderRate'] bizScore = self.commentSummary['data']['bizScore'] tasteScore = self.commentSummary['data']['tasteScore'] packScore = self.commentSummary['data']['packScore'] deliveryScore = self.commentSummary['data']['deliveryScore'] meituan_Operational_data_analysis.extend([ 0, self.shopid, self.shopname, str(self.yesterday), effectiveOrders, settleAcc, effectiveOrdersRankPercent, settleAccRankPercent, orderNum, newOrderNum, oldOrderNum, exposureNum, visitNum, xiadanNum, visitRate, orderRate, bizScore, tasteScore, packScore, deliveryScore ]) print('商家后台数据分析:', meituan_Operational_data_analysis) sql = "insert into meituan_Operational_data_analysis VALUES" batch = BatchSql(sql) batch.addBatch(meituan_Operational_data_analysis) self.db2.update(batch)
def deal_commentSummary(self): ''' 处理顾客评价的信息:店铺ID、时间、平台、评分、评分日比 :return: ''' commentSummary_content = [] bizScore = self.commentSummary['data']['bizScore'] commentSummary_content.extend( [self.shopid, self.date, self.platform, bizScore]) print('店铺评分信息:', commentSummary_content) sql = "insert into rating_score VALUES " batch = BatchSql(sql) batch.addBatch(commentSummary_content) self.db.update(batch)
def deal_base_data(account, date, result): ''' 处理基础的几条信息 :param account: :param date: :param result: :return: ''' base_data = [] base_data.extend([ account, date, result[4], result[6], result[8], result[10], result[11] ]) print("处理数据:", base_data) batch = BatchSql(sql) batch.addBatch(base_data) db.update(batch)
def deal_quan_monitor_hot_sku_data(city,date,all_shopid_list): ''' 处理店圈sku数据,9个店铺大约110万条数据 :param city: 所属城市 :param date: 数据日期 :param all_shopid_list: 店圈店铺ID列表 :return: ''' sql=""" select t1.rest_id,t1.food_name,t1.price,t1.food_id,t1.food_month_sales,t1.has_activity,t2.delivery_fee from t_e_rest_menu_level2_unique_city_nj_1712 t1, t_e_rest_list_city_1712 t2 where t1.city = '%s' and t1.date = '%s' and t1.rest_id IN %s and t1.food_month_sales != 0 and t1.city = t2.city and t1.date = t2.date and t1.rest_id = t2.rest_id """%(city,date,all_shopid_list) sql2 = 'insert into quan_monitor_hot_sku_data values' batch = BatchSql(sql2) result = db2.queryForList(sql, None) print(result) for shop_list in all_shopid_list: for item in result: quan_monitor_hot_sku_data = [] if shop_list == item[0]: if item[5] == 1: #特价菜 quan_monitor_hot_sku_data.extend([0,date,shop_list,item[1],item[2],item[2],item[4],time,time]) print(quan_monitor_hot_sku_data) batch.addBatch(quan_monitor_hot_sku_data) elif item[5] == 0: #满减菜品 sub_price = get_sub_price(city,date,shop_list,item[2]) if sub_price == None: sub_price = 0 true_price = item[2]-sub_price quan_monitor_hot_sku_data.extend([0,date,shop_list,item[1],item[2],true_price,item[4],time,time]) print(quan_monitor_hot_sku_data) batch.addBatch(quan_monitor_hot_sku_data) if batch.getSize() > 10000: db1.update(batch) batch.cleanBatch() db1.update(batch)
def deal_TrafficStats(self): ''' 处理数据流量分析的信息:店铺id、时间、曝光人数,访问人数,下单人数,访问转化率,下单转化率 :return: ''' TrafficStats = [] exposureNum = self.TrafficStats['data']['flowGeneralInfoVo'][ 'exposureNum'] visitNum = self.TrafficStats['data']['flowGeneralInfoVo']['visitNum'] orderNum = self.TrafficStats['data']['flowGeneralInfoVo']['orderNum'] visitRate = self.TrafficStats['data']['flowGeneralInfoVo']['visitRate'] orderRate = self.TrafficStats['data']['flowGeneralInfoVo']['orderRate'] TrafficStats.extend([ self.shopid, self.date, exposureNum, visitNum, orderNum, visitRate, orderRate ]) print('流量分析信息:', TrafficStats) sql = "insert into meituan_backstage VALUES " batch = BatchSql(sql) batch.addBatch(TrafficStats) self.db.update(batch)
def deal_meituan_customer_precision_operation(self): ''' 处理精准营销的数据:id,店铺id,店铺名称,日期, 营销名称,营销活动人数 :return: ''' customer_precision_labelList = self.getCouponLabel['data']['labelList'] for item in customer_precision_labelList: customer_precision_operation = [] precision_operation_name = item['title'] precision_operation_numbers = item['nums'] customer_precision_operation.extend([ 0, self.shopid, self.shopname, str(self.yesterday), precision_operation_name, precision_operation_numbers ]) print('精准营销的活动信息:', customer_precision_operation) sql = "insert into meituan_customer_precision_operation VALUES" batch = BatchSql(sql) batch.addBatch(customer_precision_operation) self.db2.update(batch)
def deal_meituan_customer_prefer_analysis(self): ''' 处理同行顾客爱好:id,店铺id,店铺名称,日期 同行顾客爱好的菜品名,该菜品的同行百分比 :return: ''' customer_prefer_analysi = self.customerPerfer['data']['pubCusEat'] for item in customer_prefer_analysi: meituan_customer_prefer_analysi = [] customer_prefer_name = item[0] customer_prefer_percent = item[1] meituan_customer_prefer_analysi.extend([ 0, self.shopid, self.shopname, str(self.yesterday), customer_prefer_name, float(customer_prefer_percent) ]) print('同行顾客爱好:', meituan_customer_prefer_analysi) sql = "insert into meituan_customer_prefer_analysis VALUES" batch = BatchSql(sql) batch.addBatch(meituan_customer_prefer_analysi) self.db2.update(batch)
def deal_quan_monitor_shop_data(city,date,all_shopid_list): ''' 处理店圈的配送费用和配送时间的信息 :param city: 店圈所属城市 :param date: 爬取数据的日期 :param all_shopid_list: 所有店圈的店铺列表信息 :return: ''' sql = """ select t1.rest_id,t1.order_month_sales,t2.deliver_time,t1.delivery_fee,t1.min_delivery_price,t2.overall_score from t_e_rest_list_city_1712 t1, t_e_rest_score_city_1712 t2 where t1.city = '%s' and t1.date = '%s' and t1.rest_id IN %s and t1.rest_id = t2.rest_id and t1.city = t2.city and t1.date = t2.date """%(city,date,all_shopid_list) sql2 = "insert into quan_monitor_shop_data values" batch = BatchSql(sql2) result = db2.queryForList(sql, None) for item in result: quan_monitor_shop_data = [] score = float('%.1f'%float(item[5])) quan_monitor_shop_data.extend([0,date,item[0],item[1],item[2],item[3],item[4],score,time,time]) print(quan_monitor_shop_data) batch.addBatch(quan_monitor_shop_data) if batch.getSize() > 10000: db1.update(batch) batch.cleanBatch() db1.update(batch)
def deal_CustomerReminder(self): ''' 处理店铺催单信息:id,店铺id,日期,时间,平台名称,催单次数,催单时间 :return: ''' CustomerReminder_Info = self.customer_reminderInfo['wmOrderList'] CustomerReminder_Data = self.customer_reminder['data'] if CustomerReminder_Info != None: for item in CustomerReminder_Info: #获取另一个催单账号信息借口获取催单的ID CustomerReminder_ID = item['wm_order_id_view'] try: for item in CustomerReminder_Data[str( CustomerReminder_ID)]: #根据催单ID获取相应的催单的时间信息 times = len(CustomerReminder_Data[str( CustomerReminder_ID)]) #催单的次数 CustomerReminder = [] reminder_time_fmt = item['reminder_time_fmt'] response_time_fmt = item['response_time_fmt'] reminder_time_fmt = str(reminder_time_fmt).split( ':')[1] response_time_fmt = str(response_time_fmt).split( ':')[1] reminder_time = int(response_time_fmt) - int( reminder_time_fmt) #商家后台恢复催单信息的时长 CustomerReminder.extend([ 0, self.shopid, self.date, self.time, CustomerReminder_ID, 2, times, reminder_time ]) print('客户催单信息:', CustomerReminder) sql = "insert into daily_customer_reminder VALUES" batch = BatchSql(sql) batch.addBatch(CustomerReminder) self.db.update(batch) except Exception: pass
def deal_Activitylist(self): ''' 处理店铺活动的信息:id,店铺id,活动id,时间,平台名称,活动类型,活动名称,活动内容 :return: ''' Activity_onGoingActs = self.activitylist['data']['onGoingActs'] for item in Activity_onGoingActs: Activitylist = [] actId = item['actId'] actName = item['actName'] poiPolicy = item['poiPolicy'] type = item['type'] Activity_type = self.deal_Activity_type(type) Activitylist.extend([ 0, self.shopid, actId, self.date, 2, Activity_type, actName, poiPolicy ]) print('店铺活动信息:', Activitylist) sql = "insert into daily_shop_activity_data VALUES" batch = BatchSql(sql) batch.addBatch(Activitylist) self.db3.update(batch)
def deal_base_data(base_data): # 基本信息数据入库 batch = BatchSql(sql) batch.addBatch(base_data) db.update(batch)
class ParseHotword(object): def __init__(self, resource_path, city, date, save_date_short, err_rest_ids, db): ''' :param resource_path: 城市数据文件路径 :param city: 城市拼音 :param date: 保存日期 :param save_date_short:保存日期简写 :param err_rest_ids: 错误店铺ID :param db: 数据库配置信息 ''' self.resource_path = resource_path self.city = city self.date = date self.save_date_short = save_date_short self.err_rest_ids = err_rest_ids self.db = db self.sql = "insert into t_e_hot_search_word_city_%s values" % save_date_short self.batch = BatchSql(self.sql) def parse_data(self, data): try: rest_id = str(data["param"][0]) if rest_id in self.err_rest_ids: return if len(rest_id) <= 11: la = data["param"][1] lo = data["param"][2] for item in data["data"]: search_word = getMapValue(item, "search_word") if search_word != '-999': self.batch.addBatch([ self.city, self.date, rest_id, la, lo, search_word ]) if self.batch.getSize() > 100000: self.db.update(self.batch) self.batch.cleanBatch() else: print("错误数据", data) print('错误数据\n{}'.format(data)) except Exception as e: print("解析数据报错", e) print('解析数据报错\n错误数据:{}'.format(data)) def run(self): f_name = os.path.join(self.resource_path, 'hot_word.pickle') print("解析文件:", f_name) with open(f_name, "rb+") as f: while True: try: data = pickle.load(f) self.parse_data(data) except EOFError as e: print("文件读取完成", e) break except Exception as e2: print("报错", e2) break if self.batch.getSize() > 0: self.db.update(self.batch) self.batch.cleanBatch()
class ParseScore(object): def __init__(self,resource_path, city, date,save_date_short, err_rest_ids,db): ''' :param resource_path: 城市数据文件路径 :param city: 城市拼音 :param date: 保存日期 :param save_date_short:保存日期简写 :param err_rest_ids: 错误店铺ID :param db: 数据库配置文件 ''' self.resource_path = resource_path self.city = city self.date = date self.save_date_short = save_date_short self.err_rest_ids = err_rest_ids self.db = db self.sql = "insert into t_e_rest_score_city_%s values" %save_date_short self.batch = BatchSql(self.sql) def parse_data(self,data): try: rest_id = data['param'] if rest_id in self.err_rest_ids: return if len(rest_id) <= 11: self.batch.addBatch([self.city, self.date, rest_id, getMapValue(data['data'], "compare_rating"), getMapValue(data['data'], "deliver_time"), getMapValue(data['data'], "food_score"), getMapValue(data['data'], "overall_score"), getMapValue(data['data'], "service_score")]) if self.batch.getSize() > 100000: self.db.update(self.batch) self.batch.cleanBatch() else: print("错误数据", data) print('错误数据\n{}'.format(data)) except Exception as e: print("解析数据报错", e) print('解析数据报错\n错误数据:{}'.format(data)) def run(self): f_name = os.path.join(self.resource_path, 'score.pickle') print("解析文件:", f_name) with open(f_name, "rb+") as f: while True: try: data = pickle.load(f) self.parse_data(data) except EOFError as e: print("文件读取完成", e) break except Exception as e2: print("报错", e2) break if self.batch.getSize() > 0: self.db.update(self.batch) self.batch.cleanBatch()
class ParseCategory(object): def __init__(self, resource_path, city, city_cn, date, date_short, db): ''' :param resource_path: 城市数据文件路径 :param city: 城市拼音 :param city_cn: 城市中文 :param date: 保存日期 :param date_short: 保存日期简写 :param db: 数据配置 :return: ''' self.resource_path = resource_path self.city = city self.city_cn = city_cn self.date = date self.date_short = date_short self.db = db self.sql1 = "insert into t_e_rest_list_city_pre_%s values" % date_short self.sql2 = "insert into t_e_rest_active_city_%s values" % date_short self.sql3 = "insert into t_e_rest_category_city_%s values" % date_short self.sql4 = "insert into t_e_rest_open_time_city_%s values" % date_short self.sql5 = "insert into t_e_rest_money_off_city_%s values" % date_short self.sql6 = "insert into t_e_rest_money_off_avg_city_%s values" % date_short self.batch1 = BatchSql(self.sql1) self.batch2 = BatchSql(self.sql2) self.batch3 = BatchSql(self.sql3) self.batch4 = BatchSql(self.sql4) self.batch5 = BatchSql(self.sql5) self.batch6 = BatchSql(self.sql6) self.err_rest_ids = set() self.rest_category = defaultdict(set) self.category_level1_list = [] self.category_level2_list = [] def get_data_by_level2_id(self, category_level2_id): category_level1_id, category_level1_name, category_level2_name = '-999', '-999', '-999' for item in self.category_level2_list: if category_level2_id == str(item.get('id2')): category_level2_name = str(item.get('name2')) category_level1_id = str(item.get('id1')) category_level1_name = str(item.get('name')) break # for item in category_level1_list: # if category_level1_id == str(item[0]): # category_level1_name = str(item[1]) # break return category_level1_id, category_level1_name, category_level2_id, category_level2_name def parse_data(self, data, m): ''' :param data: 城市数据 :param city: 城市拼音 :param date: 保存日期 :param m: 城市地图信息 :param date_short: 保存日期简写 :param db: 数据库配置信息 :return: ''' try: category_item = data['param'][2] category_level2_id = str(category_item.get("id2")) item = data["data"] rest_id = getMapValue(item, "id") lat = float(getMapValue(item, "latitude")) lng = float(getMapValue(item, "longitude")) if len(rest_id) > 11: print('错误店铺id\nrest_id: {}'.format(rest_id)) elif not (m.is_in_city(lng, lat)): self.err_rest_ids.add(rest_id) print('错误地址\nrest_id: {}, lat: {}, lng: {}, name: {}\n' 'address: {}'.format(rest_id, lat, lng, getMapValue(item, "name"), getMapValue(item, "address"))) else: # 店铺去重 if not self.rest_category[rest_id]: self.batch1.addBatch([ self.city, self.date, rest_id, getMapValue(item, "name"), getMapValue(item, "phone", '/'), getMapValue(item, "address"), getMapValue(item, "average_cost"), [1, 0][getMapValue(item, "delivery_mode") == '-999'], getMapValue(item, "float_delivery_fee"), getMapValue(item, "float_minimum_order_amount"), [0, 1][getMapValue(item, "is_new") == 'True'], [0, 1][getMapValue(item, "is_premium") == 'True'], lat, lng, getMapValue(item, "recent_order_num"), '-999' ]) if self.batch1.getSize() > 10000: self.db.update(self.batch1) self.batch1.cleanBatch() opening_hours = getMapValue(item, "opening_hours") arr = json.loads(opening_hours.replace("\'", "\"")) if not isinstance(arr, list): print('错误数据:', rest_id, opening_hours) print('错误数据\nrest_id: {}, opening_hours: {}'.format( rest_id, opening_hours)) else: for s in arr: times = s.split("/") self.batch4.addBatch( [self.city, self.date, rest_id, *times]) if self.batch4.getSize() > 100000: self.db.update(self.batch4) self.batch4.cleanBatch() # TODO 平均满减折扣有问题 discount_rate_list = [] for active in item.get('activities'): attribute = getMapValue(active, "attribute") active_type = getMapValue(active, "icon_name") self.batch2.addBatch([ self.city, self.date, rest_id, getMapValue(active, "description"), active_type ]) if self.batch2.getSize() > 100000: self.db.update(self.batch2) self.batch2.cleanBatch() if active_type == '减' and attribute != '-999': arr = json.loads(attribute) for key, value in arr.items(): if isinstance(value, dict): sub_price = value.get("1") if sub_price == 0: sub_price = value.get("0") else: sub_price = value key = round(float(key), 2) sub_price = round(float(sub_price), 2) discount_rate = round(sub_price / key, 4) discount_rate_list.append(discount_rate) self.batch5.addBatch([ self.city, self.date, rest_id, key, sub_price, discount_rate ]) if self.batch5.getSize() > 100000: self.db.update(self.batch5) self.batch5.cleanBatch() # TODO 平均满减折扣有问题 if len(discount_rate_list): avg_discount_rate = sum(discount_rate_list) / len( discount_rate_list) avg_discount_rate = round(avg_discount_rate, 4) self.batch6.addBatch( [self.city, self.date, rest_id, avg_discount_rate]) if self.batch6.getSize() > 100000: self.db.update(self.batch6) self.batch6.cleanBatch() # 店铺对应品类去重 if category_level2_id not in self.rest_category[rest_id]: self.rest_category[rest_id].add(category_level2_id) #TODO 取消了单独的品类文件,品类信息和店铺信息保存一起,可直接获取该店铺对应品类信息 # category_level1_id, category_level1_name, category_level2_id, category_level2_name = get_data_by_level2_id(category_level2_id) category_level1_id = category_item.get('id1') category_level1_name = category_item.get('name') category_level2_name = category_item.get('name2') self.batch3.addBatch([ self.city, self.date, category_level1_id, category_level1_name, category_level2_id, category_level2_name, rest_id, getMapValue(item, "name") ]) if self.batch3.getSize() > 100000: self.db.update(self.batch3) self.batch3.cleanBatch() except Exception as e: print("解析数据报错", traceback.format_exc()) print('解析数据报错\n错误数据:{}'.format(data)) def run(self): from ShopMonitor.elm.ElmWebParse.ElmMapUtil import ElmMapUtil m = ElmMapUtil(self.city_cn) f_name = os.path.join(self.resource_path, 'category.pickle') print("解析rest文件:", f_name) with open(f_name, "rb+") as f: while 1: try: data = pickle.load(f) self.parse_data(data, m) # parse_data(data, city, date,m,date_short,db) except EOFError as e: print("rest文件读取完成", e) break except Exception as e2: print("报错", e2) break if self.batch1.getSize() > 0: self.db.update(self.batch1) self.batch1.cleanBatch() if self.batch2.getSize() > 0: self.db.update(self.batch2) self.batch2.cleanBatch() if self.batch3.getSize() > 0: self.db.update(self.batch3) self.batch3.cleanBatch() if self.batch4.getSize() > 0: self.db.update(self.batch4) self.batch4.cleanBatch() if self.batch5.getSize() > 0: self.db.update(self.batch5) self.batch5.cleanBatch() if self.batch6.getSize() > 0: self.db.update(self.batch6) self.batch6.cleanBatch() print('rest_list数:', len(self.rest_category.keys())) print('rest_category数:', sum([len(x) for x in self.rest_category.values()])) print(self.city_cn, "错误店铺数:", len(self.err_rest_ids)) # with open("err_rest_ids_" + city_cn + ".txt", "w") as f: # for item in err_rest_ids: # f.write(str(item) + '\n') return self.err_rest_ids
class ParseMenu(object): def __init__(self, resource_path, city, date, city_short, save_date_short, err_rest_ids, db): ''' :param resource_path: 数据文件路径 :param city: 城市拼音 :param date: 保存日期 :param city_short: 城市简写 :param save_date_short: 保存日期简写 :param err_rest_ids: 错误城市id :param db: 数据库配置信息 ''' self.resource_path = resource_path self.city = city self.date = date self.city_short = city_short self.save_date_short = save_date_short self.err_rest_ids = err_rest_ids self.db = db self.sql1 = "insert into t_e_rest_menu_level1_city_%s values" % save_date_short self.sql2 = "insert into t_e_rest_menu_level2_city_%s_%s values" % ( city_short, save_date_short) self.sql3 = "insert into t_e_rest_menu_level2_unique_city_%s_%s values" % ( city_short, save_date_short) self.sql4 = "insert into t_e_rest_menu_discount_city_%s values" % save_date_short self.batch1 = BatchSql(self.sql1) self.batch2 = BatchSql(self.sql2) self.batch3 = BatchSql(self.sql3) self.batch4 = BatchSql(self.sql4) self.rest_food = defaultdict(set) def parse_data(self, data): try: rest_id = getMapValue(data, 'param') if rest_id in self.err_rest_ids: return for item in data["data"]: # 验证是否是错误数据 try: foods = item['foods'][::-1] except KeyError: print('错误数据\n{}'.format(item)) continue menu_name = getMapValue(item, 'name') self.batch1.addBatch( [self.city, self.date, rest_id, menu_name]) if self.batch1.getSize() > 100000: self.db.update(self.batch1) self.batch1.cleanBatch() for item2 in foods: food_id = getMapValue(item2, 'virtual_food_id') food_price_current = getMapValue(item2["specfoods"][0], 'price') food_price_primary = getMapValue(item2["specfoods"][0], 'original_price') has_activity = '0' if food_price_primary not in ['-999', '0']: has_activity = '1' self.batch2.addBatch([ self.city, self.date, rest_id, food_id, getMapValue(item2, 'name'), getMapValue(item2, 'month_sales'), getMapValue(item2, 'rating'), getMapValue(item2, 'satisfy_count'), food_price_current, has_activity, menu_name ]) if self.batch2.getSize() > 100000: self.db.update(self.batch2) self.batch2.cleanBatch() if food_id not in self.rest_food[rest_id]: self.rest_food[rest_id].add(food_id) self.batch3.addBatch([ self.city, self.date, rest_id, food_id, getMapValue(item2, 'name'), getMapValue(item2, 'month_sales'), getMapValue(item2, 'rating'), getMapValue(item2, 'satisfy_count'), food_price_current, has_activity, menu_name ]) if self.batch3.getSize() > 100000: self.db.update(self.batch3) self.batch3.cleanBatch() if food_price_primary not in ['-999', '0']: discount = '%.6f' % (float(food_price_current) / float(food_price_primary)) self.batch4.addBatch([ self.city, self.date, rest_id, food_id, food_price_primary, food_price_current, discount ]) if self.batch4.getSize() > 100000: self.db.update(self.batch4) self.batch4.cleanBatch() except Exception as e: print("解析数据报错", e) print('解析数据报错\n错误数据:{}'.format(data)) def run(self): f_name = os.path.join(self.resource_path, 'menu.pickle') print("解析文件:", f_name) with open(f_name, "rb+") as f: while 1: try: data = pickle.load(f) self.parse_data(data) # break except EOFError as e: print("文件读取完成", e) break except Exception as e2: print("报错", e2) break if self.batch1.getSize() > 0: self.db.update(self.batch1) self.batch1.cleanBatch() if self.batch2.getSize() > 0: self.db.update(self.batch2) self.batch2.cleanBatch() if self.batch3.getSize() > 0: self.db.update(self.batch3) self.batch3.cleanBatch() if self.batch4.getSize() > 0: self.db.update(self.batch4) self.batch4.cleanBatch()
def deal_quan_position(): ''' 处理店圈的店铺列表信息,获取1.5,3,5公里的店圈id列表信息,并写入数据表quan_position中 :return: ''' for i in range(len(area_shop_id)): #遍历所有店铺列表 for j in range(0,3): #获取三种店圈半径类型的数据 sql = """ SELECT t1.rest_id FROM t_e_rest_list_city_1712 t1 where t1.longitude IN (SELECT t1.longitude FROM t_e_rest_list_city_1712 t1 WHERE t1.city = 'nanjing' AND round( 6378.138 * 2 * asin( sqrt( pow( sin( ( t1.latitude * pi() / 180 - %s * pi() / 180 ) / 2 ), 2 ) + cos(t1.latitude * pi() / 180) * cos(%s * pi() / 180) * pow( sin( ( t1.longitude * pi() / 180 - %s * pi() / 180 ) / 2 ), 2 ) ) ) * 1000 ) <= %s) and t1.latitude IN (SELECT t1.latitude FROM t_e_rest_list_city_1712 t1 WHERE t1.city = 'nanjing' AND round( 6378.138 * 2 * asin( sqrt( pow( sin( ( t1.latitude * pi() / 180 - %s * pi() / 180 ) / 2 ), 2 ) + cos(t1.latitude * pi() / 180) * cos(%s * pi() / 180) * pow( sin( ( t1.longitude * pi() / 180 - %s * pi() / 180 ) / 2 ), 2 ) ) ) * 1000 ) <= %s) """%(shop_latitude[i],shop_latitude[i],shop_longitude[i],type[j],shop_latitude[i],shop_latitude[i],shop_longitude[i],type[j]) # print(sql) area_shop_list = db2.queryForList(sql,None) #获取店圈的店铺id信息 all_area_shop_list = [] for item in area_shop_list: all_area_shop_list.append(item[0]) all_str_shop_ids = str(all_area_shop_list) #店铺列表信息的简单处理,将[]去除 all_str_shop_ids = all_str_shop_ids.split('[')[1] all_str_shop_ids = all_str_shop_ids.split(']')[0] sql2 = 'insert into quan_position values' quan_position = [] quan_position.extend([0,'2017-12-31',area_shop_id[i],area_shop_name[i],j+1,all_str_shop_ids,time,time]) print("店圈列表信息:",quan_position) batch = BatchSql(sql2) batch.addBatch(quan_position) db1.update(batch)