Exemplo n.º 1
0
def get_hotelids(url):
    session = create_meituan_session()
    session.headers["Accept"] = "application/json, text/plain, */*"
    session.headers["Origin"] = "https://hotel.meituan.com"
    session.headers["Referer"] = "https://hotel.meituan.com/chongqing/"
    temp = 0
    count = 0
    while True:
        time.sleep(random.uniform(0.5, 2))
        r = session.get(url.format(temp))
        print(url.format(temp))
        items = r.json().get("ct_pois")
        temp = temp + 20
        if not items:
            count = count + 1
            if count >= 3:
                print("酒店爬取完毕 {}".format(temp))
                break
            continue
        else:
            count = 0
        for item in items:
            url1 = hotel_detail.format(item.get("poiid"))
            try:
                get_hotel_detail(url1)
            except:
                print("error hotel url wrong {}:{}".format(
                    traceback.print_exc(), url1))
Exemplo n.º 2
0
def get_hotelids(url):
    session = create_meituan_session()
    session.headers["Accept"] = "application/json, text/plain, */*"
    session.headers["Origin"] = "https://hotel.meituan.com"
    session.headers["Referer"] = "https://hotel.meituan.com/chongqing/"
    session.headers["Host"] = "ihotel.meituan.com"
    # session.headers["Cookie"] = "_lxsdk_cuid=16bd9b99c9ec8-035ab9a6954478-36664c08-1fa400-16bd9b99ca0c8; iuuid=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; cityname=%E9%87%8D%E5%BA%86; _lxsdk=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; _hc.v=10962146-cd2f-a7a9-c15a-d942a6e12989.1562744821; ci=45; rvct=45%2C1%2C114; _ga=GA1.2.1247011406.1563181057; _lx_utm=utm_source%3Dso.com%26utm_medium%3Dorganic; uuid=36ead8f00d76403086f7.1566196693.1.0.0; IJSESSIONID=jaox69mj94cq1rkw4jpfsxtuw; _lxsdk_s=16ca898f3b0-645-dd2-166%7C%7C39"
    temp = 0
    count = 0
    while True:
        time.sleep(random.uniform(1, 2))
        r = session.get(url.format(temp))
        # print(url.format(temp))
        items = r.json().get("ct_pois")
        temp = temp + 20
        if not items:
            count = count + 1
            if count >= 3:
                print("酒店爬取完毕 {}".format(temp))
                break
            continue
        else:
            count = 0
        for item in items:
            url1 = hotel_detail.format(item.get("poiid"))
            try:
                get_hotel_detail(url1)
            except:
                print("error hotel url wrong {}:{}".format(traceback.print_exc(), url1))
Exemplo n.º 3
0
def tt():
    url = "https://waimai.meituan.com/ajax/poilist?_token={}"
    url_part = "classify_type=cate_all&sort_type=0&price_type=0&support_online_pay=0&support_invoice=0&support_logistic=0&page_offset=21&page_size=20&mtsi_font_css_version=fbfd973f&uuid=sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR&platform=1&partner=4&originUrl=https%3A%2F%2Fwaimai.meituan.com%2Fhome%2Fwm7c4e547jzc"
    session = create_meituan_session()
    session.headers[
        "Cookie"] = """_ga=GA1.2.1825814531.1546998833; _lxsdk_cuid=16bd023931bc8-066f6cebecafc4-e343166-1fa400-16bd023931bc8; _hc.v=3c8a39ae-eab5-7e72-9895-1e0cefa4d0eb.1562565129; w_utmz="utm_campaign=(direct)&utm_source=(direct)&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_uuid=sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR; _ga=GA1.3.1825814531.1546998833; iuuid=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; _lxsdk=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; lsu=; webp=1; a2h=4; uuid=28a5322f-73aa-4105-9f73-e89a9f269c19; lat=29.379629; lng=106.508968; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _gid=GA1.3.94908691.1563514021; waddrname="%E6%B8%9D%E5%8C%97%E5%8C%BA"; w_geoid=wm7c4e547jzc; w_cid=500112; w_cpy=yubeiqu; w_cpy_cn="%E6%B8%9D%E5%8C%97%E5%8C%BA"; w_ah="29.72392799332738,106.63755979388952,%E6%B8%9D%E5%8C%97%E5%8C%BA|29.547192882746458,106.46446477621794,%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; wm_order_channel=default; utm_source=; au_trace_key_net=default; openh5_uuid=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; cssVersion=e7b07c0d; w_visitid=a6e81603-aad3-4196-bbb4-1b5e142f1a3d; __mta=19355143.1562652960491.1563514446993.1563519385490.10; JSESSIONID=1fuhw9hnatu1kwuka3fzu9vpz; IJSESSIONID=15mjbuk0aqekc17d0vnnrqakfu; __utma=74597006.1825814531.1546998833.1562738511.1563519924.3; __utmc=74597006; __utmz=74597006.1563519924.3.2.utmcsr=blog.csdn.net|utmccn=(referral)|utmcmd=referral|utmcct=/xing851483876/article/details/81842329; ci3=1; rvct=30%2C1%2C45; latlng=29.607883,106.289549,1563519957917; ci=45; cityname=%E9%87%8D%E5%BA%86; i_extend=C189913015384320739764905118182476349850_b1_c0_e153957522001196166114GimthomepageallcateH__a100001__b3; __utmb=74597006.11.9.1563520047564; _gat=1; _lxsdk_s=16c08b298fb-479-720-5d5%7C%7C95"""
    # session.headers["Cookie"] = """_ga=GA1.2.1825814531.1546998833; _lxsdk_cuid=16bd023931bc8-066f6cebecafc4-e343166-1fa400-16bd023931bc8; _hc.v=3c8a39ae-eab5-7e72-9895-1e0cefa4d0eb.1562565129; w_utmz="utm_campaign=(direct)&utm_source=(direct)&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_uuid=sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR; _ga=GA1.3.1825814531.1546998833; iuuid=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; cityname=%E9%87%8D%E5%BA%86; _lxsdk=C1EA418E59192BE72919EF4468CFA088AFC416E2D10120BB18440DA3BF854258; lsu=; webp=1; __utmz=74597006.1562738329.1.1.utmcsr=blog.csdn.net|utmccn=(referral)|utmcmd=referral|utmcct=/xing851483876/article/details/81842329; a2h=4; __utma=74597006.1825814531.1546998833.1562738329.1562738511.2; i_extend=H__a100001__b2; ci=1; rvct=1%2C45; uuid=28a5322f-73aa-4105-9f73-e89a9f269c19; lat=29.379629; lng=106.508968; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; w_visitid=a5730afa-b068-4383-8d81-63bdb54e4862; _gid=GA1.3.94908691.1563514021; __mta=19355143.1562652960491.1562724541943.1563514021916.8; waddrname="%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; w_geoid=wm78ndvhcgfz; w_cid=500106; w_cpy=shapingbaqu; w_cpy_cn="%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; w_ah="29.547192882746458,106.46446477621794,%E6%B2%99%E5%9D%AA%E5%9D%9D%E5%8C%BA"; JSESSIONID=1m1fko4dqfqffs1pf0rg7ny1s; _lxsdk_s=16c08b298fb-479-720-5d5%7C%7C8"""
    session.headers["Host"] = "waimai.meituan.com"
    session.headers["Origin"] = "https://waimai.meituan.com"
    session.headers["Referer"] = "https://waimai.meituan.com/home/wm7c4e547jzc"
    session.headers[
        "X-FOR-WITH"] = "+lHk/N6Q9uaY3Tzpeuo9ROcfGvsgcl8Buo7Vs+MGKtxoqTqnAHsH4F+b8mv5Umzg8ft7p0LHh8p577Es5MYJ6WN1zgK0n8D4fISWvoap57EYNFDH2Iu9qtZNEpXYEZLN1ZNmecB/MNbW7PZP/r7IIg=="
    session.headers["X-Requested-With"] = "XMLHttpRequest"
    session.headers[
        "Accept"] = "application/json, text/javascript, */*; q=0.01"
    session.headers[
        "Accept-Language"] = "zh-CN,zh;q=0.9,ja-JP;q=0.8,ja;q=0.7,en-US;q=0.6,en;q=0.5"
    session.headers["Accept-Encoding"] = "gzip, deflate, br"
    session.headers[
        "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
    session.headers["Content-Length"] = "330"

    test_url = "https://waimai.meituan.com/ajax/poilist?_token=eJx90ltvokAUAOD/Mq8S5wIDjEkfREuLd7BodbMPqKggAwoI6mb/+w7ttrNPS0j4cm45OeEXyJ0d6GAkHkMBVZiDDsBt1NaBAspCZKiuUoI0hFSdKmD7b4xiZmgK2OSLPuj8wAwRRWX4ZxPxREBECFIwMtFP5ctUmGjibaocUQSOZXkuOhDWQcSDqM3DqLwGaXubcXjMeAhrbmy1kGpG/NiKnf5TDsRM/tbMZIQo2CSGqP9L+kHcUJNUJYkklkTfxEzSlDQk9Q+ihpoklkTfRB/DMGtoSOqSVFKTJJL4iwZjkqYklSSSss2UbebnDmbDzza9odoc9NQcVHyD78OqOlP6zqJJll/JsfhrRGMRHVKhcHCbxEUyrR9d39u3kuh+30yes0109eyL36u7/ayOVjhRB/uT70Nc3Kbz/B5mz9f8ml6mj6s1PVuWvZy5Lda9r+1uT/fnqXugAx8f/CJSMyctnVUZtW45X4bu0jExT8JqaU17WatAhutdDovx5GXo1D5JMEkOkyDYjZbbnjkcma/87eh1jd3C86zVLC5ize0WD7Sp65WVeuHQ7w3eMc+I7dVpGuCkh7M9fF2rhvuYl6/r8Sgj94ltW+HwpfBfxLJsOB0/u60xHd49DoOjTasZZ2Uxh1V8yU/GzoMUhsl5vLzBHS/XdtHf4ysr3/uPUXWO3xewMluWM4v1amXMA7jhye1UPz2B338A6rr1bA=="

    data = {
        "classify_type": "cate_all",
        "sort_type": "0",
        "price_type": "0",
        "support_online_pay": "0",
        "support_invoice": "0",
        "page_offset": "21",
        "page-size": "20",
        "mtsi_font_css_version": "940b920e",
        "uuid":
        "sc1hcPvIyaPZBE0QsSkAhFqwX2HqN6zQxRCc74e_VCWvu6Ocy45g8S2kTudMAuwR",
        "platform": "1",
        "partner": "4",
        "originUrl": "https%3A%2F%2Fwaimai.meituan.com%2Fhome%2Fwm7c4e547jzc"
    }
    r = session.post(test_url, json=data, allow_redirects=False)
    print(r.text)
Exemplo n.º 4
0
from utils.make_sessions import create_meituan_session
import json
import re
from bs4 import BeautifulSoup
import time
import random
from utils.models import MeiTuanShop
from utils.esbackends import es_search, EsBackends
session = create_meituan_session()
from utils.sqlbackends import session_scope

cookies = "__mta=146208011.1562725971505.1562742466866.1562806821246.4; _lxsdk_cuid=16bd9b99c9ec8-035ab9a6954478-36664c08-1fa400-16bd9b99ca0c8; client-id=047f9384-30b4-4cce-aedb-773f7a31fd8a; mtcdn=K; uuid={}; _lx_utm=utm_source%3Dso.com%26utm_medium%3Dorganic; ci=45; rvct=45%2C114; __mta=146208011.1562725971505.1562729909942.1562742466866.3; IJSESSIONID=vguqtn4rvu70q8m6y7wyaoli; iuuid=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; cityname=%E9%87%8D%E5%BA%86; _lxsdk=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; _hc.v=10962146-cd2f-a7a9-c15a-d942a6e12989.1562744821; lat=29.551617; lng=106.460599; _lxsdk_s=16bde89f341-2f4-c1e-768%7C%7C11"


def parse_jiehun_item(session, url):
    ess = es_search("meituan", url)
    if ess[0] and ess[1]:
        pass
    else:
        time.sleep(random.uniform(1, 3))
        print("pase jiehun url {}".format(url))
        resu = {}
        jiehun_url = "https://www.meituan.com/jiehun/{}/"
        # session.headers[
        #     "Cookie"] = "__mta=146208011.1562725971505.1562821920182.1562822162903.6; _lxsdk_cuid=16bd9b99c9ec8-035ab9a6954478-36664c08-1fa400-16bd9b99ca0c8; client-id=047f9384-30b4-4cce-aedb-773f7a31fd8a; mtcdn=K; uuid=3b49df191ddb4094bc3c.1562729907.1.0.0; _lx_utm=utm_source%3Dso.com%26utm_medium%3Dorganic; ci=45; rvct=45%2C114; IJSESSIONID=vguqtn4rvu70q8m6y7wyaoli; iuuid=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; cityname=%E9%87%8D%E5%BA%86; _lxsdk=8369B0074906E31235D094B1D10CB5398B04DC92AAFDBADB7477CB96EEFF986E; _hc.v=10962146-cd2f-a7a9-c15a-d942a6e12989.1562744821; __mta=146208011.1562725971505.1562742466866.1562812609604.4; lat=29.535538; lng=106.512486; _lxsdk_s=16bdf56dfcd-a48-5e0-95b%7C%7C18"
        r = session.get(url, timeout=5)
        rule = r'window.AppData = (.+?);</script>'
        slotList = re.findall(rule, r.text)
        if slotList:
            res = json.loads(slotList[0])
            # print(res)