Exemplo n.º 1
0
    def download_video(self, aweme_id):

        query_params = self.common_params
        query_params['aweme_id'] = aweme_id

        params = {**query_params, **self.common_params}

        url = URL.video_detail_url()

        real_url = gen_url(self.token, url, params)

        post_data = {
            "aweme_id": aweme_id
        }

        cookies = self.__COOKIES
        cookies['install_id'] = str(self.__device["install_id"])

        resp = fetch(real_url,
                     data=post_data,
                     cookies=cookies,
                     headers=self.__HEADERS,
                     timeout=3).json()

        try:
            play_addr_raw = resp['aweme_detail']['video']['play_addr']['url_list']
            play_addr = play_addr_raw[0]
            print(play_addr)
            content = fetch(play_addr).content
        except:
            self.logger.warning("提取视频信息失败...")
            content = None

        return content
Exemplo n.º 2
0
def gen_url(token, raw_url, query):
    if isinstance(query, dict):
        query = params2str(query)
    url = raw_url + "?" + query
    resp = requests.post(URL.api_sign(token), json={"url": url}).json()
    real_url = resp['url']
    return real_url
Exemplo n.º 3
0
    def download_video(self, aweme_id):
        query_params = self.common_params
        query_params['aweme_id'] = aweme_id

        sign = getSign(self.__get_token(), query_params)
        params = {**query_params, **sign}

        post_data = {"aweme_id": aweme_id}

        resp = requests.get(URL.video_detail_url(),
                            params=params,
                            data=post_data,
                            verify=False,
                            headers=self.__HEADERS)
        resp_result = resp.json()
        # print(resp_result)
        try:
            play_addr_raw = resp_result['aweme_detail']['video']['play_addr'][
                'url_list']
            play_addr = play_addr_raw[0]
            content = requests.get(play_addr).content
        except:
            self.logger.warning("提取视频信息失败...")
            content = None

        return content
Exemplo n.º 4
0
def get_device(token):

    device_info = requests.get(URL.api_device(token), timeout=10)

    device_info_json = device_info.json()

    return device_info_json
Exemplo n.º 5
0
def gen_url(token, raw_url, query):
    if isinstance(query, dict):
        query = params2str(query)
    url = raw_url + "?" + query
    resp = requests.post(URL.api_sign(token), json={"url": url})
    if resp.status_code != 200:
        print("您当日的 api 次数已经使用完毕, 请明日再来爬取吧...")
        return
    real_url = resp.json()['url']
    return real_url
Exemplo n.º 6
0
def gen_real_url(token, raw_url, query):
    if isinstance(query, dict):
        query = params2str(query)
    url = raw_url + "?" + query
    resp = fetch(URL.api_sign(token), json={"url": url}, method="post")
    if not resp:
        print("你的当日 token 次数已经用完,请明天再来尝试吧...")
        raise BaseException("you have run out of token.Please try tomorrow")
    resp_json = resp.json()
    real_url = resp_json['url']
    return real_url
Exemplo n.º 7
0
    def grab_video(self, user_id, action, content, max_cursor=0):

        url = URL.favorite_url() if action == "USER_LIKE" else URL.post_url()

        favorite_params = copy.deepcopy(self.__USER_VIDEO_PARAMS)
        favorite_params['user_id'] = user_id
        favorite_params['max_cursor'] = max_cursor
        query_params = {**favorite_params, **self.common_params}
        real_url = gen_url(self.token, url, query_params)
        # 目前支持两种类型爬取,用户喜欢过的,和当前用户所有已发布的视频
        cookies = self.__COOKIES
        cookies['install_id'] = str(self.__device["install_id"])

        resp = requests.get(real_url,
                            verify=False,
                            cookies=cookies,
                            headers={"User-Agent": "okhttp/3.10.0.1"},
                            timeout=3)

        favorite_info = json.loads(resp.content.decode("utf-8"))

        hasmore = favorite_info.get('has_more')
        max_cursor = favorite_info.get('max_cursor')
        video_infos = favorite_info.get('aweme_list')

        for per_video in video_infos:
            author_nick_name = per_video['author'].get("nickname")
            author_uid = per_video['author'].get('uid')
            video_desc = per_video.get('desc')
            music_id = per_video['music']['play_url'].get('uri') if content == "-m" else None
            download_item = {
                "author_nick_name": author_nick_name,
                "video_desc": video_desc,
                "author_uid": author_uid,
                "music_id": music_id
            }
            aweme_id = per_video.get("aweme_id")
            self.download_user_video(aweme_id, **download_item)
            time.sleep(5)

        return hasmore, max_cursor
Exemplo n.º 8
0
    def download_music(self, music_id):

        if not music_id:
            return

        url = URL.music_url(music_id)

        resp = requests.get(url, headers=self.__HEADERS, verify=False)

        music_content = resp.content

        return music_content
Exemplo n.º 9
0
    def grab_video(self, user_id, action, content, max_cursor=0):
        favorite_params = copy.deepcopy(self.__USER_VIDEO_PARAMS)
        favorite_params['user_id'] = user_id
        favorite_params['max_cursor'] = max_cursor
        query_params = {**favorite_params, **self.common_params}
        sign = getSign(self.__get_token(), query_params)
        params = {**query_params, **sign}

        # 目前支持两种类型爬取,用户喜欢过的,和当前用户所有已发布的视频
        url = URL.favorite_url() if action == "USER_LIKE" else URL.post_url()
        resp = requests.get(url,
                            params=params,
                            verify=False,
                            headers=self.__HEADERS)

        favorite_info = resp.json()

        hasmore = favorite_info.get('has_more')
        max_cursor = favorite_info.get('max_cursor')
        video_infos = favorite_info.get('aweme_list')

        for per_video in video_infos:
            author_nick_name = per_video['author'].get("nickname")
            author_uid = per_video['author'].get('uid')
            video_desc = per_video.get('desc')
            music_id = per_video['music']['play_url'].get(
                'uri') if content == "-m" else None
            download_item = {
                "author_nick_name": author_nick_name,
                "video_desc": video_desc,
                "author_uid": author_uid,
                "music_id": music_id
            }
            aweme_id = per_video.get("aweme_id")
            self.download_user_video(aweme_id, **download_item)
            time.sleep(5)

        return hasmore, max_cursor
Exemplo n.º 10
0
    def __grab_comment(self, aweme_id, cursor, upvote_bound=10):
        url = URL.comment_url()
        comment_params = copy.deepcopy(self.__COMMENT_LIST_PARAMS)
        comment_params['aweme_id'] = aweme_id
        comment_params['cursor'] = cursor
        params = {**comment_params, **self.common_params}
        real_url = gen_url(self.token, url, params)

        cookies = self.__COOKIES
        cookies['install_id'] = str(self.__device["install_id"])

        resp = requests.get(real_url,
                            verify=False,
                            cookies=cookies,
                            headers=self.__HEADERS)

        comment_content = json.loads(resp.content.decode("utf-8"))

        comments = comment_content.get("comments")

        for per_comment in comments:
            is_reply = per_comment.get("reply_comment")
            if is_reply:
                upvote_count = is_reply[0].get("digg_count")
                comment_info = {
                    "text": is_reply[0].get("text"),
                    "upvote_count": upvote_count,
                    "nick_name": is_reply[0]['user'].get("nickname"),
                    "user_id": is_reply[0]['user'].get("uid"),

                }
            else:
                upvote_count = per_comment.get("digg_count")
                comment_info = {
                    "text": per_comment.get("text"),
                    "upvote_count": upvote_count,
                    "nick_name": per_comment['user'].get("nickname"),
                    "user_id": per_comment['user'].get("uid"),
                }

            if int(upvote_count) < upvote_bound:
                return -2

            self.download_comment(aweme_id, **comment_info)

        # print(text,upvote_count,nick_name,user_id)

        hasmore = int(comment_content.get("hasmore"))

        return hasmore
Exemplo n.º 11
0
    def __grab_comment(self, aweme_id, cursor, upvote_bound=10):
        comment_params = copy.deepcopy(self.__COMMENT_LIST_PARAMS)
        comment_params['aweme_id'] = aweme_id
        comment_params['cursor'] = cursor
        query_params = {**comment_params, **self.common_params}
        sign = getSign(self.__get_token(), query_params)
        params = {**query_params, **sign}
        resp = requests.get(URL.comment_url(),
                            params=params,
                            verify=False,
                            headers=self.__HEADERS)
        comment_content = resp.json()

        comments = comment_content.get("comments")

        for per_comment in comments:
            is_reply = per_comment.get("reply_comment")
            if is_reply:
                upvote_count = is_reply[0].get("digg_count")
                comment_info = {
                    "text": is_reply[0].get("text"),
                    "upvote_count": upvote_count,
                    "nick_name": is_reply[0]['user'].get("nickname"),
                    "user_id": is_reply[0]['user'].get("uid"),

                }
            else:
                upvote_count = per_comment.get("digg_count")
                comment_info = {
                    "text": per_comment.get("text"),
                    "upvote_count": upvote_count,
                    "nick_name": per_comment['user'].get("nickname"),
                    "user_id": per_comment['user'].get("uid"),
                }

            print(upvote_count)

            if int(upvote_count) < upvote_bound:
                return -2

            self.download_comment(aweme_id, **comment_info)

        # print(text,upvote_count,nick_name,user_id)

        hasmore = int(comment_content.get("hasmore"))

        return hasmore
Exemplo n.º 12
0
    def like_video(self, aweme_id):
        query_params = {**{"pass-region": "1"}, **self.common_params}
        sign = getSign(self.__get_token(), query_params)
        params = {**query_params, **sign}

        form_params = {"aweme_id": aweme_id, "type": 1}

        headers = copy.deepcopy(self.__HEADERS)
        headers["sdk-version"] = '1'
        headers["Accept-Encoding"] = 'br, gzip, deflate'

        print(self.__request.cookies)

        result = self.__request.post(URL.like_video_url(),
                                     params=params,
                                     data=form_params,
                                     verify=False,
                                     headers=headers)

        print(result.json())
Exemplo n.º 13
0
def gen_device(token):
    resp = fetch(URL.api_device(token), timeout=10).json()
    return resp