def get_basic_hidden_info(shop_id): """ 获取基础隐藏信息(名称、地址、电话号、cityid) @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?' \ 'shopId=' + str(shop_id) + '&_token=' + str(get_token( shop_url)) + '&tcv=ck9rmnrofg&uuid=6ca1f51a-7653-b987-3cd6-95f3aadb13b8.1619854599&platform=1' \ '&partner=150&optimusCode=10&originUrl=' + str(shop_url) r = requests_util.get_requests(url, request_type='json') r_text = requests_util.replace_json_text(r.text, get_font_msg()) r_json = json.loads(r_text) # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] logger.warning('处理验证码,按任意键继续:', verify_page_url) input() elif r_json['code'] == 200: msg = r_json['msg']['shopInfo'] shop_name = msg['shopName'] shop_address = BeautifulSoup(msg['address'], 'lxml').text + BeautifulSoup( msg['crossRoad'], 'lxml').text shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text + BeautifulSoup( msg['phoneNo2'], 'lxml').text return [shop_name, shop_address, shop_number] else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_promo_info(shop_id): """ 优惠券信息 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'ttp://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId=' + str( shop_id) + '&cityId=19&mainCategoryId=2821&_token=' + str(get_token( shop_url)) + '&uuid=38af1c67-4a50-3220-06f6-bf9f16e71c41.1611146098&platform=1&partner=150&optimusCode=10' \ '&originUrl=' + shop_url r = requests_util.get_requests(url, request_type='json') r_text = requests_util.replace_json_text(r.text, get_font_msg()) r_json = json.loads(r_text) # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] logger.warning('处理验证码,按任意键继续:', verify_page_url) input() elif r_json['code'] == 200: msg = r_json['msg']['shopInfo'] shop_name = msg['shopName'] shop_address = BeautifulSoup(msg['address'], 'lxml').text + BeautifulSoup( msg['crossRoad'], 'lxml').text shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text + BeautifulSoup( msg['phoneNo2'], 'lxml').text return [shop_name, shop_address, shop_number] else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_review_and_star(shop_id): """ 获取评分、人均,评论数 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId=' + str( shop_id) + '&cityId=19&mainCategoryId=2821&_token=' + str(get_token( shop_url)) + '&uuid=38af1c67-4a50-3220-06f6-bf9f16e71c41.1611146098&platform=1&partner=150&optimusCode=10' \ '&originUrl=' + shop_url r = requests_util.get_requests(url, request_type='json') r_text = requests_util.replace_json_text(r.text, get_font_msg()) r_json = json.loads(r_text) # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] logger.warning('处理验证码,按任意键继续:', verify_page_url) input() elif r_json['code'] == 200: shop_base_score = r_json['fiveScore'] score_title_list = r_json['shopScoreTitleList'] avg_price = BeautifulSoup(r_json['avgPrice'], 'lxml').text review_count = BeautifulSoup(r_json['defaultReviewCount'], 'lxml').text score_list = [] for each in r_json['shopRefinedScoreValueList']: score_list.append(BeautifulSoup(each, 'lxml').text) scores = '' for i, score in enumerate(score_list): scores = scores + ' ' + score_title_list[i] + score_list[i] return [shop_base_score, scores, avg_price, review_count] else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_basic_hidden_info(shop_id): """ 获取基础隐藏信息(名称、地址、电话号、cityid) @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?' \ 'shopId=' + str(shop_id) + \ '&_token=' + str(get_token(shop_url)) + \ '&tcv=' + str(spider_config.TCV) + \ '&uuid=' + str(spider_config.UUID) + \ '&platform=1' \ '&partner=150' \ '&optimusCode=10' \ '&originUrl=' + str(shop_url) # 这里处理解决请求会异常的问题 retry_time = 5 while True: retry_time -= 1 r = requests_util.get_requests(url, request_type='proxy, no cookie') r_text = requests_util.replace_json_text(r.text, get_font_msg()) try: r_json = json.loads(r_text) # 前置验证码过滤 if r_json['code'] == 200: break if retry_time == 0: logger.warning('替换tsv和uuid') exit() except: pass # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] print('处理验证码,按任意键回车后继续:', verify_page_url) input() elif r_json['code'] == 200: msg = r_json['msg']['shopInfo'] shop_name = msg['shopName'] shop_address = BeautifulSoup(msg['address'], 'lxml').text if msg['address'] is not None else '' + \ BeautifulSoup(msg[ 'crossRoad'], 'lxml').text if \ msg['crossRoad'] is not None else '' shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text if msg['phoneNo'] is not None else '' + ', ' + \ BeautifulSoup( msg['phoneNo2'], 'lxml').text if \ msg['phoneNo2'] is not None else '' return { '店铺id': shop_id, '店铺名': shop_name, '店铺地址': shop_address, '店铺电话': shop_number } else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_review_and_star(shop_id): """ 获取评分、人均,评论数 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?' \ 'shopId=' + str(shop_id) + \ '&cityId=19' \ '&mainCategoryId=2821' \ '&_token=' + str(get_token(shop_url)) + \ '&uuid=' + str(spider_config.UUID) + \ '&platform=1' \ '&partner=150' \ '&optimusCode=10' \ '&originUrl=' + shop_url # 这里处理解决请求会异常的问题 while True: r = requests_util.get_requests(url, request_type='proxy, no cookie') r_text = requests_util.replace_json_text(r.text, get_font_msg()) try: r_json = json.loads(r_text) # 前置验证码过滤 if r_json['code'] == 200: break except: pass # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] print('处理验证码,按任意键回车后继续:', verify_page_url) input() elif r_json['code'] == 200: shop_base_score = r_json['fiveScore'] score_title_list = r_json['shopScoreTitleList'] avg_price = BeautifulSoup(r_json['avgPrice'], 'lxml').text review_count = BeautifulSoup(r_json['defaultReviewCount'], 'lxml').text score_list = [] for each in r_json['shopRefinedScoreValueList']: score_list.append(BeautifulSoup(each, 'lxml').text) # scores = '' # for i, score in enumerate(score_list): # scores = scores + ' ' + score_title_list[i] + score_list[i] scores = {} for i, score in enumerate(score_list): scores[score_title_list[i]] = score_list[i] # return [shop_base_score, scores, avg_price, review_count] return { '店铺id': shop_id, '店铺总分': shop_base_score, '店铺评分': scores, '人均价格': avg_price, '评论总数': review_count } else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_basic_review(shop_id): """ 获取评分、人均,评论数 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/allReview?' \ 'shopId=' + str(shop_id) + \ '&cityId=19' \ '&shopType=10' \ '&tcv=' + str(spider_config.TCV) + \ '&_token=' + str(get_token(shop_url)) + \ '&uuid=' + str(spider_config.UUID) + \ '&platform=1' \ '&partner=150' \ '&optimusCode=10' \ '&originUrl=' + shop_url # 这里处理解决请求会异常的问题 while True: r = requests_util.get_requests(url, request_type='proxy, no cookie') r_text = requests_util.replace_json_text(r.text, get_font_msg()) try: r_json = json.loads(r_text) # 前置验证码过滤 if r_json['code'] == 200: break except: pass # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] print('处理验证码,按任意键回车后继续:', verify_page_url) input() get_basic_review(shop_id) elif r_json['code'] == 200: # 获取评论的标签以及每个标签的个数 summaries = [] for summary in r_json['summarys']: summaries.append({ '描述': summary['summaryString'], '个数': summary['summaryCount'] }) # 获取评论数量信息 all_review_count = r_json['reviewCountAll'] review_with_pic_count = r_json['reviewCountPic'] good_review_count = r_json['reviewCountGood'] mid_review_count = r_json['reviewCountCommon'] bad_review_count = r_json['reviewCountBad'] # 获取精选评论详情信息 reviews = [] for review in r_json['reviewAllDOList']: # 基础评论信息 review_info = review['reviewDataVO'] review_id = review_info['reviewData']['reviewId'] review_star = review_info['reviewData']['star'] review_body = BeautifulSoup(review_info['reviewData']['reviewBody'], 'lxml').text review_vote_count = review_info['reviewData']['voteCount'] review_reply_count = review_info['reviewData']['replyCount'] review_view_count = review_info['reviewData']['viewCount'] # 喜欢的菜 if review_info['reviewData']['extInfoList'] is not None: review_like_dish = review_info['reviewData']['extInfoList'][0]['values'] else: review_like_dish = [] review_avg_price = review_info['reviewData']['avgPrice'] review_publish_time = review_info['addTimeVO'] # 商家回复 review_merchant_reply = review_info['followNoteString'] # 用户评论图片 if review['picList'] is not None: review_pic_list = [] for each_pic in review['picList']: review_pic_list.append(each_pic['bigPicture']) else: review_pic_list = [] # 获取用户相关信息 review_username = review['user']['userNickName'] user_id = review['user']['userId'] # each_review = [shop_id, review_id, user_id, review_username, review_star, review_body, review_vote_count, # review_reply_count, review_view_count, review_avg_price, review_like_dish, # review_publish_time, review_merchant_reply, review_pic_list] each_review = { '店铺id': shop_id, '评论id': review_id, '用户id': user_id, '用户名': review_username, '用户打分': review_star, '评论内容': review_body, '点赞个数': review_vote_count, '回复个数': review_reply_count, '浏览次数': review_view_count, '人均价格': review_avg_price, '喜欢的菜': review_like_dish, '发布时间': review_publish_time, '商家回复': review_merchant_reply, '评论图片': review_pic_list, } reviews.append(each_review) # 推荐菜 dish_tag_list = r_json['dishTagStrList'] # return [summaries, all_review_count, good_review_count, mid_review_count, bad_review_count, # review_with_pic_count, reviews, dish_tag_list] return { '店铺id': shop_id, '评论摘要': summaries, '评论总数': all_review_count, '好评个数': good_review_count, '中评个数': mid_review_count, '差评个数': bad_review_count, '带图评论个数': review_with_pic_count, '精选评论': reviews, '推荐菜': dish_tag_list, } else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')