Exemplo n.º 1
0
def parser(url_info):
    url_info['_id'] = str(url_info['_id'])
    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    html = tools.get_json_by_requests(root_url, headers=headers)
    data_info = jsonpath.jsonpath(html, '$..video_info')
    for data in data_info:
        title = data.get('title')
        video_url = data.get('play_url')
        img_url = data.get('cover_url')
        release_time = stamp_to_date(data.get('upline_time'))

        if video_url !='':
            info_type = 1
        else:
            info_type = 2

        base_parser.save_info('content_info', site_id=SITE_ID, url=video_url, title=title,site_name=NAME,
                                          content='', release_time=release_time, image_url=img_url,
                                          video_url=video_url, is_out_link=1, download_image=False, is_debug=False,
                                          info_type=info_type)

    base_parser.update_url('urls', root_url, Constance.DONE)
Exemplo n.º 2
0
def parser(url_info):
    # url  = 'http://user.xiaoyouzb.net/v3/vod/small_recommend?nwtime=1571816563&sign=883f96aee2655d8885e7815de3423df7&type=1&cateId=13&pageNum=0&isFirst=N&_u=edac2c15598946bd9ba7bda78a83489c&version=4.7.0&platform=android&appx=yuntu&apppn=org.fungo.fungolive&enterprise=0&channel=tencent&market=32&os_version=8.0.0&device_model=MIX%25202&device_code=780493075490198&udid=77e2cb72797f20afdcaaa6265872cea9&androidId=220240afd2e0e640&source=android'
    root_url = url_info['url']
    cname = url_info['remark']["category_name"]
    headers = {
        "User-Agent": "yuntutv/4.7.0 (Android 8.0.0)",
        "Host": "user.xiaoyouzb.net"
    }
    json_data = tools.get_json_by_requests(root_url, headers=headers)
    data_infos = json_data["data"]
    for data_info in data_infos:
        publishTime = data_info["publishTime"]
        release_time = tools.timestamp_to_date(str(publishTime)[:-3])
        title = data_info["content"]
        content = data_info["content"]
        video_url = data_info["videoUrl"]
        img_url = data_info["coverUrl"]
        base_parser.save_info('content_info', site_id=SITE_ID, url=video_url, title=title, site_name=NAME,
                              content=content, release_time=release_time, image_url=img_url,
                              video_url=video_url, is_out_link=1, download_image=False, is_debug=False,
                              )
    base_parser.update_url('urls', root_url, Constance.DONE)
Exemplo n.º 3
0
def parser(url_info):
    root_url = url_info['url']
    para = url_info["remark"]["para_template"]
    headers = url_info["remark"]["header_template"]
    response = requests.get(root_url, params=para, headers=headers)
    time.sleep(2)
    json_info = response.json()
    cate = url_info["remark"]["cate_name"]
    data_jsons = jsonpath(json_info, "$..items..data")
    if cate != '':
        for data_info in data_jsons:
            data_json = json.loads(data_info)
            title = jsonpath(data_json, "$..title")[0]
            img_str = glom(data_json, "coverUrl")
            img_json = json.loads(img_str)
            img_url = img_json["L"][0]
            content = jsonpath(data_json, "$..summary")[0]
            updateTime = jsonpath(data_json, "$..updateTime")[0]
            video_str = glom(data_json, "videoUrl")
            video_json = json.loads(video_str)
            video_url = video_json["source"]["hd"]
            release_time = tools.timestamp_to_date(str(updateTime)[:-3])
            base_parser.save_info(
                'content_info',
                site_id=SITE_ID,
                url=video_url,
                title=title,
                site_name=NAME,
                content=content,
                release_time=release_time,
                image_url=img_url,
                video_url=video_url,
                is_out_link=1,
                download_image=False,
                is_debug=False,
            )

    base_parser.update_url('urls', root_url, Constance.DONE)
Exemplo n.º 4
0
def parser(url_info):
    url_info['_id'] = str(url_info['_id'])
    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    data = tools.get_json_by_requests(root_url)
    data_info = data.get("returnData").get('news')
    for info in data_info:
        # print(info)
        url = info['url']
        release_time = info['publishDate']
        title = info['title']
        video_url = jsonpath.jsonpath(info['video'], '$..relativeUrl')[0]
        img_url = info['logo']

        if video_url != '':
            info_type = 1
        else:
            info_type = 2

        base_parser.save_info('content_info',
                              site_id=SITE_ID,
                              url=url,
                              title=title,
                              site_name=NAME,
                              content='',
                              release_time=release_time,
                              image_url=img_url,
                              video_url=video_url,
                              is_out_link=1,
                              download_image=False,
                              is_debug=False,
                              info_type=info_type)

    base_parser.update_url('urls', root_url, Constance.DONE)
Exemplo n.º 5
0
def parser(url_info):
    url_info['_id'] = str(url_info['_id'])
    root_url = url_info['url']
    depth = url_info['depth']
    site_id = url_info['site_id']
    remark = url_info['remark']
    response = requests.get(root_url, headers=headers, verify=False)
    time.sleep(1)
    response.encoding = 'utf-8'
    html = response.json()
    data_info = jsonpath.jsonpath(html, "$..articles")
    for datas in data_info:
        for key, data in datas.items():
            title = data.get('title')
            url = data.get('url')
            ums_id_url = data.get("zzd_url")
            img_url = glom(data, "videos")[0]['poster']['url']
            release_time = glom(data, 'grab_time')
            release_time = stamp_to_date(release_time)

            ums_id = ''.join(re.findall('ums_id=(.*?)&', ums_id_url))
            wm_id = ''.join(jsonpath.jsonpath(data, '$..wm_id'))
            wm_cid = ''.join(jsonpath.jsonpath(data, '$..outer_id'))
            share_url = glom(data, "share_url")

            token = get_token.get_cookies(url)
            headers2 = {
                "Accept":
                "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
                "Accept-Encoding":
                "gzip, deflate, br",
                "Accept-Language":
                "zh-CN,zh;q=0.9",
                "Cache-Control":
                "max-age=0",
                "Connection":
                "keep-alive",
                # "Cookie": "cna=jfAyFgS8dngCAXt/00SmKeKX; isg=BLi417M0EasPYH0vvOczKR61iWaKiUzQuZOVr_IoEPOpDVj3mjJ0O81swUUYXdSD; _pk_ref.070b5f1f4053.1564=%5B%22%22%2C%22%22%2C1571910457%2C%22http%3A%2F%2Fiflow.uczzd.cn%2Fiflow%2Fapi%2Fv1%2Fchannel%2F622810092%3Fmethod%3Dnew%26ftime%3D1571902273506%26recoid%3D6995844991074725827%26count%3D20%26content_ratio%3D100%22%5D; _pk_id.070b5f1f4053.1564=5136a990-361d-4dc8-9cdd-2a4c4ffa6748.1571901120.3.1571910528.1571908320.",
                "Host":
                "mparticle.uc.cn",
                "Sec-Fetch-Mode":
                "navigate",
                "Sec-Fetch-Site":
                "none",
                "Sec-Fetch-User":
                "******",
                "Upgrade-Insecure-Requests":
                "1",
                "User-Agent":
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
            }
            video_url_info_url = f'https://mparticle.uc.cn/api/vps?token={token}&ums_id={ums_id}&wm_cid={wm_cid}&wm_id={wm_id}&resolution=high'
            video_url_info = tools.get_json_by_requests(video_url_info_url,
                                                        headers=headers2)
            # print(video_url_info_url)
            # print(video_url_info)
            time.sleep(1)
            video_url = glom(video_url_info, 'data.url')

            if video_url != '':
                info_type = 1
            else:
                info_type = 2

            base_parser.save_info('content_info',
                                  site_id=SITE_ID,
                                  url=url,
                                  title=title,
                                  site_name=NAME,
                                  content='',
                                  release_time=release_time,
                                  image_url=img_url,
                                  video_url=video_url,
                                  is_out_link=1,
                                  download_image=False,
                                  is_debug=False,
                                  info_type=info_type)

    base_parser.update_url('urls', root_url, Constance.DONE)