Пример #1
0
    def subscribers_parse(self, response, variables):
        j_body = json.loads(response.text)
        next_page_subscribers = j_body['data']['user']['edge_followed_by']['page_info']
        followers = j_body['data']['user']['edge_followed_by']['edges']
        for follower in followers:
            loader = ItemLoader(item=InstaparserItem())
            loader.add_value('_id', follower['node']['id'])
            loader.add_value('username', follower['node']['username'])
            loader.add_value('full_name', follower['node']['full_name'])
            loader.add_value('is_private', follower['node']['is_private'])
            loader.add_value('profile_pic_url', follower['node']['profile_pic_url'])
            loader.add_value('type_user', 'subscribers')
            yield loader.load_item()

            if next_page_subscribers.get('has_next_page'):
                variables['after'] = next_page_subscribers['end_cursor']

                url_posts = f'{self.graphql_link}query_hash={self.hash_followers}&{urlencode(variables)}'
                yield response.follow(
                    url_posts,
                    callback=self.subscribers_parse,
                    cb_kwargs={'variables': deepcopy(variables)}
                )
            else:
                url_posts = f'{self.graphql_link}query_hash={self.hash_following}&{urlencode(variables)}'
                yield response.follow(
                    url_posts,
                    callback=self.subscriptions_parse,
                    cb_kwargs={'variables': deepcopy(variables)}
                )
Пример #2
0
    def user_posts_parse(self, response: HtmlResponse, username, user_id, variables, followed_by=False):
        j_data = json.loads(response.text)
        page_info = j_data.get('data').get('user').get('edge_followed_by' if followed_by else 'edge_follow')
        if page_info is None:
            return
        page_info = page_info.get('page_info') if page_info is not None else None
        if page_info.get('has_next_page'):
            variables['after'] = page_info['end_cursor']
            url_posts = f'{self.graphql_url}query_hash={self.user_followers_hash}&{urlencode(variables)}'
            yield response.follow(
                url_posts,
                callback=self.user_posts_parse,
                cb_kwargs={
                    'username': username,
                    'user_id': user_id,
                    'variables': variables
                }
            )

        users = j_data.get('data').get('user').get('edge_followed_by' if followed_by else 'edge_follow').get(
            'edges')
        for user in users:
            item = InstaparserItem(
                user_id=user.get('node').get('id'),
                user_name=user.get('node').get('username'),
                full_name=user.get('node').get('full_name'),
                photo=user.get('node').get('profile_pic_url'),
                is_followed_by=user_id if followed_by else None,
                follows=None if followed_by else user_id
            )
            yield item
Пример #3
0
 def parse_following(self, response: HtmlResponse, username, user_id,
                     variables):
     data = response.json()
     data = data["data"]["user"]["edge_follow"]
     page_info = data.get("page_info", None)
     if page_info["has_next_page"]:
         variables["after"] = page_info["end_cursor"]
         str_variables = quote(
             str(variables).replace(" ", "").replace("'", '"'))
         url = self.graphql_url + f"query_hash={self.following_hash}&variables={str_variables}"
         yield response.follow(url,
                               callback=self.parse_following,
                               cb_kwargs={
                                   "username": username,
                                   "user_id": user_id,
                                   "variables": deepcopy(variables)
                               })
     followings = data["edges"]
     followings_summary = []
     for following in followings:
         following_summary = {}
         following_summary['username'] = following['node']['username']
         following_summary['user_id'] = following['node']['id']
         following_summary['photo'] = following['node']['profile_pic_url']
         following_summary['is_private'] = following['node']['is_private']
         followings_summary.append(following_summary)
     yield InstaparserItem(followings=followings_summary,
                           user_id=user_id,
                           username=username)
Пример #4
0
    def user_data_subscribers_parse(self, response: HtmlResponse, username,
                                    user_id, variables):
        j_data = json.loads(response.text)
        page_info = j_data['data']['user']['edge_follow']['page_info']
        if page_info['has_next_page']:
            variables['after'] = page_info['end_cursor']
            next_url_subscribers = f'{self.graphql_url}query_hash={self.query_subscriber_hash}&{urlencode(variables)}'
            yield response.follow(next_url_subscribers,
                                  callback=self.user_data_subscribers_parse,
                                  cb_kwargs={
                                      'username': username,
                                      'user_id': user_id,
                                      'variables': deepcopy(variables)
                                  })

        subscribers = j_data['data']['user']['edge_follow']['edges']
        for subscriber in subscribers:
            item = InstaparserItem(
                subscriber_username=subscriber['node']['username'],
                subscriber_fullname=subscriber['node']['full_name'],
                subscriber_id=subscriber['node']['id'],
                subscriber_profile_pic_url=subscriber['node']
                ['profile_pic_url'],
                subscriber_data=subscriber,
                subscriber_owner_id=user_id,
                data_type='subscribe')
            yield item
Пример #5
0
 def user_data_followers(
         self, response: HtmlResponse, username, user_id, variables
 ):  # Принимаем ответ. Не забываем про параметры от cb_kwargs
     j_data = json.loads(response.text)
     users = j_data.get('data').get('user').get('edge_followed_by').get(
         'edges')  # Сами пользователи
     for user in users:  # Перебираем юзеров, собираем данные
         item = InstaparserItem(
             user_id=user_id,
             user_type='followers',
             user_fid=user['node']['id'],
             user_name=user['node']['username'],
             user_full_name=user['node']['full_name'],
             user_userpic=user['node']['profile_pic_url'])
         yield item
     page_info = j_data.get('data').get('user').get('edge_followed_by').get(
         'page_info')
     if page_info.get('has_next_page'):  # Если есть следующая страница
         variables['after'] = page_info[
             'end_cursor']  # Новый параметр для перехода на след. страницу
         url = f'{self.graphql_url}query_hash={self.query_hash_followers}&{urlencode(variables)}'
         yield response.follow(url,
                               callback=self.user_data_followers,
                               cb_kwargs={
                                   'username': username,
                                   'user_id': user_id,
                                   'variables': deepcopy(variables)
                               })
Пример #6
0
 def user_subscribers_parse(self, response: HtmlResponse, username, user_id,
                            variables):
     j_data = response.json()
     page_info = j_data.get('data').get('user').get('edge_followed_by').get(
         'page_info')
     if page_info.get('has_next_page'):
         variables['after'] = page_info.get('end_cursor')
         url_subscribers = f'{self.graphql_url}query_hash={self.subscriber_hash}&{urlencode(variables)}'
         yield response.follow(url_subscribers,
                               callback=self.user_subscribers_parse,
                               cb_kwargs={
                                   'username': username,
                                   'user_id': user_id,
                                   'variables': deepcopy(variables)
                               })
     subscribers = j_data.get('data').get('user').get(
         'edge_followed_by').get('edges')
     for subscriber in subscribers:
         item = InstaparserItem(
             subscribe_user_id=user_id,
             photo=subscriber.get('node').get('profile_pic_url'),
             user_id=subscriber.get('node').get('id'),
             user_name=subscriber.get('node').get('username'),
             user_data=subscriber.get('node'))
         yield item
Пример #7
0
    def user_subscription_parse(self, response: HtmlResponse, username,
                                user_id, variables):
        j_data = response.json()
        page_info = j_data.get('data').get('user').get('edge_follow').get(
            'page_info')
        if page_info.get('has_next_page'):
            variables['after'] = page_info.get('end_cursor')

            url_posts = f'{self.graphql_url}query_hash={self.subscription_hash}&{urlencode(variables)}'
            yield response.follow(url_posts,
                                  callback=self.user_subscription_parse,
                                  cb_kwargs={
                                      'username': username,
                                      'user_id': user_id,
                                      'variables': deepcopy(variables)
                                  })
        posts = j_data.get('data').get('user').get('edge_follow').get('edges')
        for post in posts:
            item = InstaparserItem(
                user_id=user_id,
                subscription_id=post.get('node').get('id'),
                subscription_username=post.get('node').get('username'),
                subscriber_id=user_id,
                subscriber_username=username,
                full_name=post.get('node').get('full_name'),
                profile_pic_url=post.get('node').get('profile_pic_url'),
                post_data=post.get('node'))
            yield item
Пример #8
0
 def user_subscriptions_continue(
         self, response: HtmlResponse, username, user_id, variables
 ):  # Принимаем ответ. Не забываем про параметры от cb_kwargs
     j_data = json.loads(response.text)
     page_info = j_data.get('data').get('user').get('edge_follow').get(
         'page_info')
     if page_info.get('has_next_page'):  # Если есть следующая страница
         variables['after'] = page_info[
             'end_cursor']  # Новый параметр для перехода на след. страницу
         url_subscriptions = f'{self.graphql_url}query_hash={self.subscriptions_hash}&{urlencode(variables)}'
         yield response.follow(url_subscriptions,
                               callback=self.user_subscriptions_continue,
                               cb_kwargs={
                                   'username': username,
                                   'user_id': user_id,
                                   'variables': deepcopy(variables)
                               })
     subscriptions = j_data.get('data').get('user').get('edge_follow').get(
         'edges')  # Подписки
     for subscription in subscriptions:  # Перебираем посты, собираем данные
         item = InstaparserItem(
             user_id=user_id,
             subscription_id=subscription['node']['id'],
             subscription_name=subscription['node']['username'],
             subscription_photo=subscription['node']['profile_pic_url'],
             subscription=subscription['node'])
     yield item  # В пайплайн
Пример #9
0
    def users_parse(self, response: HtmlResponse, target_username, type,
                    variables):
        j_data = json.loads(response.text)

        type_field = 'edge_followed_by' if type == 'followers' else 'edge_follow'

        page_info = j_data.get('data').get('user').get(type_field).get(
            'page_info')
        if page_info['has_next_page']:
            variables['after'] = page_info['end_cursor']

            url = f"{response.url[:response.url.find('&')]}&{urlencode(variables)}"
            yield response.follow(url,
                                  callback=self.users_parse,
                                  cb_kwargs={
                                      'target_username': target_username,
                                      'type': type,
                                      'variables': deepcopy(variables)
                                  })

            users = j_data.get('data').get('user').get(type_field).get('edges')
            for user in users:
                node = user.get('node')
                item = InstaparserItem(
                    _id=node.get('id'),
                    user_name=node.get('username'),
                    full_name=node.get('full_name'),
                    photo=node.get('profile_pic_url'),
                    insert_to_collection=f'{target_username}_{type}')
                yield item
Пример #10
0
 def user_posts_parse(
         self, response: HtmlResponse, username, user_id, variables
 ):  #Принимаем ответ. Не забываем про параметры от cb_kwargs
     j_data = json.loads(response.text)
     page_info = j_data.get('data').get('user').get(
         'edge_owner_to_timeline_media').get('page_info')
     if page_info.get('has_next_page'):  #Если есть следующая страница
         variables['after'] = page_info[
             'end_cursor']  #Новый параметр для перехода на след. страницу
         url_posts = f'{self.graphql_url}query_hash={self.posts_hash}&{urlencode(variables)}'
         yield response.follow(url_posts,
                               callback=self.user_posts_parse,
                               cb_kwargs={
                                   'username': username,
                                   'user_id': user_id,
                                   'variables': deepcopy(variables)
                               })
     posts = j_data.get('data').get('user').get(
         'edge_owner_to_timeline_media').get('edges')  #Сами посты
     for post in posts:  #Перебираем посты, собираем данные
         item = InstaparserItem(
             user_id=user_id,
             photo=post['node']['display_url'],
             likes=post['node']['edge_media_preview_like']['count'],
             post=post['node'])
     yield item  #В пайплайн
Пример #11
0
 def user_follower_parse(self, response: HtmlResponse, username, user_id,
                         variables):
     j_data = json.loads(response.text)
     page_info = j_data.get('data').get('user').get('edge_followed_by').get(
         'page_info')
     if page_info.get('has_next_page'):
         variables['after'] = page_info['end_cursor']
         url_posts = f'{self.graphql_url}query_hash={self.following_hash}&{urlencode(variables)}'
         yield response.follow(url_posts,
                               callback=self.user_follower_parse,
                               cb_kwargs={
                                   'username': username,
                                   'user_id': user_id,
                                   'variables': deepcopy(variables)
                               })
     follows = j_data.get('data').get('user').get('edge_followed_by').get(
         'edges')
     for follow in follows:
         item = InstaparserItem(user_id=user_id,
                                following_id=None,
                                following_name=None,
                                follower_id=follow['node']['id'],
                                follower_name=follow['node']['username'],
                                node=follow['node'])
         yield item
Пример #12
0
    def users_parse(self, response: HtmlResponse, target_username, flag, variables):
        # Получаем текст ответа
        j_data = json.loads(response.text)
        # в зависимости от вызвавшего метода присваеиваем значение параметру type_field
        type_field = 'edge_followed_by' if flag == 'followers' else 'edge_follow'
        # из ответа забираем page_info
        page_info = j_data.get('data').get('user').get(type_field).get('page_info')
        # если page_info имеет информацию о следующей странице, забираем указатель на нее и уходим в рекурсию
        if page_info['has_next_page']:
            variables['after'] = page_info['end_cursor']

            url = f"{response.url[:response.url.find('&')]}&{urlencode(variables)}"
            yield response.follow(
                url,
                callback=self.users_parse,
                cb_kwargs={'target_username': target_username, 'flag': flag, 'variables': deepcopy(variables)}
            )
            # внутри рекурсивного вызова обрабатываем полученный ответ, получая список ребер графа
            users = j_data.get('data').get('user').get(type_field).get('edges')
            for user in users:
                node = user.get('node')
                item = InstaparserItem(
                    _id=node.get('id'),
                    user_name=node.get('username'),
                    full_name=node.get('full_name'),
                    photo=node.get('profile_pic_url'),
                    insert_to_collection=f'{target_username}_{flag}'
                )
                yield item
 def user_subscrib_parse(self, response: HtmlResponse, username, user_id, variables, s_hash, page_info_get, status):
     j_data = json.loads(response.text)
     page_info = j_data.get('data').get('user').get(page_info_get).get('page_info')
     if page_info.get('has_next_page'):  # Если есть следующая страница
         variables['after'] = page_info['end_cursor']  # Новый параметр для перехода на след. страницу
         url_subscrib = f'{self.graphql_url}query_hash={s_hash}&{urlencode(variables)}'
         yield response.follow(
             url_subscrib,
             callback=self.user_subscrib_parse,
             cb_kwargs={'username': username,
                        's_hash': s_hash,
                        'page_info_get': page_info_get,
                        'status': status,
                        'user_id': user_id,
                        'variables': deepcopy(variables)}
         )
     subscribs = j_data.get('data').get('user').get(page_info_get).get('edges')  # Сами подписчики
     for subscrib in subscribs:  # Перебираем посты, собираем данные
         item = InstaparserItem(
             user_id=user_id,
             subscriber_status=status,
             id=subscrib['node']['id'],
             name=subscrib['node']['username'],
             photo=subscrib['node']['profile_pic_url'],
             full_info=subscrib['node'],
             _id=user_id + subscrib['node']['id']
         )
     yield item  # В пайплайн
Пример #14
0
    def follows_parse(self, response, info, variables):
        j_body = json.loads(response.text)
        page_info = j_body.get('data',
                               {}).get('user',
                                       {}).get('edge_follow',
                                               {}).get('page_info', {})
        follows = j_body.get('data', {}).get('user',
                                             {}).get('edge_follow',
                                                     {}).get('edges', {})
        for foll in follows:
            item = InstaparserItem(
                _id=f"{foll['node']['id']}_{info['user_id']}",
                follower_id=info['user_id'],
                follower_name=info['user'],
                follower_full_name=info['is_private'],
                follower_pic_url=info['pic_url'],
                follower_is_private=info['full_name'],
                follow_id=foll['node']['id'],
                follow_name=foll['node']['username'],
                follow_full_name=foll['node']['full_name'],
                follow_pic_url=foll['node']['profile_pic_url'],
                follow_is_private=foll['node']['is_private'])
            yield item
        if page_info.get('has_next_page'):
            variables['after'] = page_info['end_cursor']

            url_posts = f'{self.graphql_url}query_hash={self.hash_follows}&{urlencode(variables)}'

            yield response.follow(url_posts,
                                  callback=self.follows_parse,
                                  cb_kwargs={
                                      'info': info,
                                      'variables': deepcopy(variables)
                                  })
Пример #15
0
    def parse_user(
            self, response: HtmlResponse, username, user_id, variables,
            query_hash
    ):  # Принимаем ответ. Не забываем про параметры от cb_kwargs
        j_data = json.loads(response.text)
        target_type = 'followed_by' if query_hash == self.subscribers_hash else 'follow'
        if target_type == 'followed_by':
            data = j_data.get('data').get('user').get('edge_followed_by')
        else:
            data = j_data.get('data').get('user').get('edge_follow')

        page_info = data.get('page_info')

        if page_info.get('has_next_page'):  # Если есть следующая страница
            variables['after'] = page_info[
                'end_cursor']  # Новый параметр для перехода на след. страницу
            yield response.follow(get_graphql_url(self.graphql_url, query_hash,
                                                  variables),
                                  callback=self.parse_user,
                                  cb_kwargs={
                                      'username': username,
                                      'user_id': user_id,
                                      'variables': deepcopy(variables),
                                      'query_hash': query_hash
                                  })
        users = data.get('edges')
        for user in users:
            item = InstaparserItem(username=username,
                                   target_type=target_type,
                                   id=user_id,
                                   photo=user['node']['profile_pic_url'],
                                   name=user['node']['username'])
            yield item  # В пайплайн
Пример #16
0
 def parse_user_followings(self, response: HtmlResponse, username, user_id, variables):
     j_data = json.loads(response.text)
     page_info = j_data.get('data').get('user').get('edge_follow').get('page_info')
     if page_info['has_next_page']:
         variables['after'] = page_info['end_cursor']
         url_followings = f'{self.graphql_url}query_hash={self.followings_hash}&{urlencode(variables)}'
         yield response.follow(
             url_followings,
             callback=self.parse_user_followings,
             cb_kwargs={'username': username,
                        'user_id': user_id,
                        'variables': deepcopy(variables)}
         )
     followings = j_data.get('data').get('user').get('edge_follow').get('edges')
     for following in followings:
         yield InstaparserItem(
             user_name=username,
             user_id=user_id,
             type_field='following',
             follow_name=following['node']['username'],
             follow_id=following['node']['id'],
             follow_photo=following['node']['profile_pic_url']
         )
Пример #17
0
 def user_subscribers_parse(self, response: HtmlResponse, variables, username, user_id):
     j_data = json.loads(response.text)
     page_info = j_data.get('data').get('user').get('edge_follow').get('page_info')
     if page_info.get('has_next_page'):
         variables['after'] = page_info.get('end_cursor')
         url_subscribers = f'{self.graphql_url}?query_hash={self.subscribers_hash}&{urlencode(variables)}'
         yield response.follow(
             url_subscribers,
             callback=self.user_subscribers_parse,
             cb_kwargs={'variables': deepcopy(variables),
                        'username': username,
                        'user_id': user_id}
         )
     subscribers = j_data.get('data').get('user').get('edge_follow').get('edges')
     for subscriber in subscribers:
         yield InstaparserItem(
             parse_user=username,
             status='subscriptions',
             user_id=user_id,
             insta_id=subscriber.get('node').get('id'),
             insta_name=subscriber.get('node').get('username'),
             photo=subscriber.get('node').get('profile_pic_url')
             #user_data=follower.get('node')
         )
Пример #18
0
    def posts_parse(self,response,user_id, variables):
        j_body = json.loads(response.text)
        page_info = j_body.get('data').get('user').get('edge_owner_to_timeline_media').get('page_info')
        if page_info['has_next_page']:
            variables['after'] = page_info['end_cursor']

            url_posts = f'{self.graphql_url}query_hash={self.hash_posts}&{urlencode(variables)}'

            yield response.follow(
                url_posts,
                callback=self.posts_parse,
                cb_kwargs={'user_id': user_id,
                           'variables': deepcopy(variables)}
            )
        posts = j_body.get('data').get('user').get('edge_owner_to_timeline_media').get('edges')
        for post in posts:
            item = InstaparserItem(
                user_id = user_id,
                photo = post['node']['display_url'],
                likes = post['node']['edge_media_preview_like']['count'],
                post_data = post['node']
            )

            yield item