def get_hotelids(url): session = create_meituan_session() session.headers["Accept"] = "application/json, text/plain, */*" session.headers["Origin"] = "https://hotel.meituan.com" session.headers["Referer"] = "https://hotel.meituan.com/chongqing/" temp = 0 count = 0 while True: time.sleep(random.uniform(0.5, 2)) r = session.get(url.format(temp)) print(url.format(temp)) items = r.json().get("ct_pois") temp = temp + 20 if not items: count = count + 1 if count >= 3: print("酒店爬取完毕 {}".format(temp)) break continue else: count = 0 for item in items: url1 = hotel_detail.format(item.get("poiid")) try: get_hotel_detail(url1) except: print("error hotel url wrong {}:{}".format( traceback.print_exc(), url1))
def get_hotelids(url): session = create_meituan_session() session.headers["Accept"] = "application/json, text/plain, */*" session.headers["Origin"] = "https://hotel.meituan.com" session.headers["Referer"] = "https://hotel.meituan.com/chongqing/" session.headers["Host"] = "ihotel.meituan.com" # session.headers["Cookie"] = "_lxsdk_cuid=16bd9b99c9ec8-035ab9a6954478-36664c08-1fa400-16bd9b99ca0c8; iuuid=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; cityname=%E9%87%8D%E5%BA%86; _lxsdk=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; _hc.v=10962146-cd2f-a7a9-c15a-d942a6e12989.1562744821; ci=45; rvct=45%2C1%2C114; _ga=GA1.2.1247011406.1563181057; _lx_utm=utm_source%3Dso.com%26utm_medium%3Dorganic; uuid=36ead8f00d76403086f7.1566196693.1.0.0; IJSESSIONID=jaox69mj94cq1rkw4jpfsxtuw; _lxsdk_s=16ca898f3b0-645-dd2-166%7C%7C39" temp = 0 count = 0 while True: time.sleep(random.uniform(1, 2)) r = session.get(url.format(temp)) # print(url.format(temp)) items = r.json().get("ct_pois") temp = temp + 20 if not items: count = count + 1 if count >= 3: print("酒店爬取完毕 {}".format(temp)) break continue else: count = 0 for item in items: url1 = hotel_detail.format(item.get("poiid")) try: get_hotel_detail(url1) except: print("error hotel url wrong {}:{}".format(traceback.print_exc(), url1))
def tt(): url = "https://waimai.meituan.com/ajax/poilist?_token={}" url_part = "classify_type=cate_all&sort_type=0&price_type=0&support_online_pay=0&support_invoice=0&support_logistic=0&page_offset=21&page_size=20&mtsi_font_css_version=fbfd973f&uuid=sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR&platform=1&partner=4&originUrl=https%3A%2F%2Fwaimai.meituan.com%2Fhome%2Fwm7c4e547jzc" session = create_meituan_session() session.headers[ "Cookie"] = """_ga=GA1.2.1825814531.1546998833; _lxsdk_cuid=16bd023931bc8-066f6cebecafc4-e343166-1fa400-16bd023931bc8; _hc.v=3c8a39ae-eab5-7e72-9895-1e0cefa4d0eb.1562565129; w_utmz="utm_campaign=(direct)&utm_source=(direct)&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_uuid=sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR; _ga=GA1.3.1825814531.1546998833; iuuid=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; _lxsdk=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; lsu=; webp=1; a2h=4; uuid=28a5322f-73aa-4105-9f73-e89a9f269c19; lat=29.379629; lng=106.508968; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _gid=GA1.3.94908691.1563514021; waddrname="%E6%B8%9D%E5%8C%97%E5%8C%BA"; w_geoid=wm7c4e547jzc; w_cid=500112; w_cpy=yubeiqu; w_cpy_cn="%E6%B8%9D%E5%8C%97%E5%8C%BA"; w_ah="29.72392799332738,106.63755979388952,%E6%B8%9D%E5%8C%97%E5%8C%BA|29.547192882746458,106.46446477621794,%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; wm_order_channel=default; utm_source=; au_trace_key_net=default; openh5_uuid=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; cssVersion=e7b07c0d; w_visitid=a6e81603-aad3-4196-bbb4-1b5e142f1a3d; __mta=19355143.1562652960491.1563514446993.1563519385490.10; JSESSIONID=1fuhw9hnatu1kwuka3fzu9vpz; IJSESSIONID=15mjbuk0aqekc17d0vnnrqakfu; __utma=74597006.1825814531.1546998833.1562738511.1563519924.3; __utmc=74597006; __utmz=74597006.1563519924.3.2.utmcsr=blog.csdn.net|utmccn=(referral)|utmcmd=referral|utmcct=/xing851483876/article/details/81842329; ci3=1; rvct=30%2C1%2C45; latlng=29.607883,106.289549,1563519957917; ci=45; cityname=%E9%87%8D%E5%BA%86; i_extend=C189913015384320739764905118182476349850_b1_c0_e153957522001196166114GimthomepageallcateH__a100001__b3; __utmb=74597006.11.9.1563520047564; _gat=1; _lxsdk_s=16c08b298fb-479-720-5d5%7C%7C95""" # session.headers["Cookie"] = """_ga=GA1.2.1825814531.1546998833; _lxsdk_cuid=16bd023931bc8-066f6cebecafc4-e343166-1fa400-16bd023931bc8; _hc.v=3c8a39ae-eab5-7e72-9895-1e0cefa4d0eb.1562565129; w_utmz="utm_campaign=(direct)&utm_source=(direct)&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_uuid=sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR; _ga=GA1.3.1825814531.1546998833; iuuid=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; cityname=%E9%87%8D%E5%BA%86; _lxsdk=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; lsu=; webp=1; __utmz=74597006.1562738329.1.1.utmcsr=blog.csdn.net|utmccn=(referral)|utmcmd=referral|utmcct=/xing851483876/article/details/81842329; a2h=4; __utma=74597006.1825814531.1546998833.1562738329.1562738511.2; i_extend=H__a100001__b2; ci=1; rvct=1%2C45; uuid=28a5322f-73aa-4105-9f73-e89a9f269c19; lat=29.379629; lng=106.508968; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; w_visitid=a5730afa-b068-4383-8d81-63bdb54e4862; _gid=GA1.3.94908691.1563514021; __mta=19355143.1562652960491.1562724541943.1563514021916.8; waddrname="%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; w_geoid=wm78ndvhcgfz; w_cid=500106; w_cpy=shapingbaqu; w_cpy_cn="%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; w_ah="29.547192882746458,106.46446477621794,%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; JSESSIONID=1m1fko4dqfqffs1pf0rg7ny1s; _lxsdk_s=16c08b298fb-479-720-5d5%7C%7C8""" session.headers["Host"] = "waimai.meituan.com" session.headers["Origin"] = "https://waimai.meituan.com" session.headers["Referer"] = "https://waimai.meituan.com/home/wm7c4e547jzc" session.headers[ "X-FOR-WITH"] = "+lHk/N6Q9uaY3Tzpeuo9ROcfGvsgcl8Buo7Vs+MGKtxoqTqnAHsH4F+b8mv5Umzg8ft7p0LHh8p577Es5MYJ6WN1zgK0n8D4fISWvoap57EYNFDH2Iu9qtZNEpXYEZLN1ZNmecB/MNbW7PZP/r7IIg==" session.headers["X-Requested-With"] = "XMLHttpRequest" session.headers[ "Accept"] = "application/json, text/javascript, */*; q=0.01" session.headers[ "Accept-Language"] = "zh-CN,zh;q=0.9,ja-JP;q=0.8,ja;q=0.7,en-US;q=0.6,en;q=0.5" session.headers["Accept-Encoding"] = "gzip, deflate, br" session.headers[ "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" session.headers["Content-Length"] = "330" test_url = "https://waimai.meituan.com/ajax/poilist?_token=eJx90ltvokAUAOD/Mq8S5wIDjEkfREuLd7BodbMPqKggAwoI6mb/+w7ttrNPS0j4cm45OeEXyJ0d6GAkHkMBVZiDDsBt1NaBAspCZKiuUoI0hFSdKmD7b4xiZmgK2OSLPuj8wAwRRWX4ZxPxREBECFIwMtFP5ctUmGjibaocUQSOZXkuOhDWQcSDqM3DqLwGaXubcXjMeAhrbmy1kGpG/NiKnf5TDsRM/tbMZIQo2CSGqP9L+kHcUJNUJYkklkTfxEzSlDQk9Q+ihpoklkTfRB/DMGtoSOqSVFKTJJL4iwZjkqYklSSSss2UbebnDmbDzza9odoc9NQcVHyD78OqOlP6zqJJll/JsfhrRGMRHVKhcHCbxEUyrR9d39u3kuh+30yes0109eyL36u7/ayOVjhRB/uT70Nc3Kbz/B5mz9f8ml6mj6s1PVuWvZy5Lda9r+1uT/fnqXugAx8f/CJSMyctnVUZtW45X4bu0jExT8JqaU17WatAhutdDovx5GXo1D5JMEkOkyDYjZbbnjkcma/87eh1jd3C86zVLC5ize0WD7Sp65WVeuHQ7w3eMc+I7dVpGuCkh7M9fF2rhvuYl6/r8Sgj94ltW+HwpfBfxLJsOB0/u60xHd49DoOjTasZZ2Uxh1V8yU/GzoMUhsl5vLzBHS/XdtHf4ysr3/uPUXWO3xewMluWM4v1amXMA7jhye1UPz2B338A6rr1bA==" data = { "classify_type": "cate_all", "sort_type": "0", "price_type": "0", "support_online_pay": "0", "support_invoice": "0", "page_offset": "21", "page-size": "20", "mtsi_font_css_version": "940b920e", "uuid": "sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR", "platform": "1", "partner": "4", "originUrl": "https%3A%2F%2Fwaimai.meituan.com%2Fhome%2Fwm7c4e547jzc" } r = session.post(test_url, json=data, allow_redirects=False) print(r.text)
from utils.make_sessions import create_meituan_session import json import re from bs4 import BeautifulSoup import time import random from utils.models import MeiTuanShop from utils.esbackends import es_search, EsBackends session = create_meituan_session() from utils.sqlbackends import session_scope cookies = "__mta=146208011.1562725971505.1562742466866.1562806821246.4; _lxsdk_cuid=16bd9b99c9ec8-035ab9a6954478-36664c08-1fa400-16bd9b99ca0c8; client-id=047f9384-30b4-4cce-aedb-773f7a31fd8a; mtcdn=K; uuid={}; _lx_utm=utm_source%3Dso.com%26utm_medium%3Dorganic; ci=45; rvct=45%2C114; __mta=146208011.1562725971505.1562729909942.1562742466866.3; IJSESSIONID=vguqtn4rvu70q8m6y7wyaoli; iuuid=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; cityname=%E9%87%8D%E5%BA%86; _lxsdk=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; _hc.v=10962146-cd2f-a7a9-c15a-d942a6e12989.1562744821; lat=29.551617; lng=106.460599; _lxsdk_s=16bde89f341-2f4-c1e-768%7C%7C11" def parse_jiehun_item(session, url): ess = es_search("meituan", url) if ess[0] and ess[1]: pass else: time.sleep(random.uniform(1, 3)) print("pase jiehun url {}".format(url)) resu = {} jiehun_url = "https://www.meituan.com/jiehun/{}/" # session.headers[ # "Cookie"] = "__mta=146208011.1562725971505.1562821920182.1562822162903.6; _lxsdk_cuid=16bd9b99c9ec8-035ab9a6954478-36664c08-1fa400-16bd9b99ca0c8; client-id=047f9384-30b4-4cce-aedb-773f7a31fd8a; mtcdn=K; uuid=3b49df191ddb4094bc3c.1562729907.1.0.0; _lx_utm=utm_source%3Dso.com%26utm_medium%3Dorganic; ci=45; rvct=45%2C114; IJSESSIONID=vguqtn4rvu70q8m6y7wyaoli; iuuid=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; cityname=%E9%87%8D%E5%BA%86; _lxsdk=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; _hc.v=10962146-cd2f-a7a9-c15a-d942a6e12989.1562744821; __mta=146208011.1562725971505.1562742466866.1562812609604.4; lat=29.535538; lng=106.512486; _lxsdk_s=16bdf56dfcd-a48-5e0-95b%7C%7C18" r = session.get(url, timeout=5) rule = r'window.AppData = (.+?);</script>' slotList = re.findall(rule, r.text) if slotList: res = json.loads(slotList[0]) # print(res)