def subscribers_parse(self, response, variables): j_body = json.loads(response.text) next_page_subscribers = j_body['data']['user']['edge_followed_by']['page_info'] followers = j_body['data']['user']['edge_followed_by']['edges'] for follower in followers: loader = ItemLoader(item=InstaparserItem()) loader.add_value('_id', follower['node']['id']) loader.add_value('username', follower['node']['username']) loader.add_value('full_name', follower['node']['full_name']) loader.add_value('is_private', follower['node']['is_private']) loader.add_value('profile_pic_url', follower['node']['profile_pic_url']) loader.add_value('type_user', 'subscribers') yield loader.load_item() if next_page_subscribers.get('has_next_page'): variables['after'] = next_page_subscribers['end_cursor'] url_posts = f'{self.graphql_link}query_hash={self.hash_followers}&{urlencode(variables)}' yield response.follow( url_posts, callback=self.subscribers_parse, cb_kwargs={'variables': deepcopy(variables)} ) else: url_posts = f'{self.graphql_link}query_hash={self.hash_following}&{urlencode(variables)}' yield response.follow( url_posts, callback=self.subscriptions_parse, cb_kwargs={'variables': deepcopy(variables)} )
def user_posts_parse(self, response: HtmlResponse, username, user_id, variables, followed_by=False): j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get('edge_followed_by' if followed_by else 'edge_follow') if page_info is None: return page_info = page_info.get('page_info') if page_info is not None else None if page_info.get('has_next_page'): variables['after'] = page_info['end_cursor'] url_posts = f'{self.graphql_url}query_hash={self.user_followers_hash}&{urlencode(variables)}' yield response.follow( url_posts, callback=self.user_posts_parse, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': variables } ) users = j_data.get('data').get('user').get('edge_followed_by' if followed_by else 'edge_follow').get( 'edges') for user in users: item = InstaparserItem( user_id=user.get('node').get('id'), user_name=user.get('node').get('username'), full_name=user.get('node').get('full_name'), photo=user.get('node').get('profile_pic_url'), is_followed_by=user_id if followed_by else None, follows=None if followed_by else user_id ) yield item
def parse_following(self, response: HtmlResponse, username, user_id, variables): data = response.json() data = data["data"]["user"]["edge_follow"] page_info = data.get("page_info", None) if page_info["has_next_page"]: variables["after"] = page_info["end_cursor"] str_variables = quote( str(variables).replace(" ", "").replace("'", '"')) url = self.graphql_url + f"query_hash={self.following_hash}&variables={str_variables}" yield response.follow(url, callback=self.parse_following, cb_kwargs={ "username": username, "user_id": user_id, "variables": deepcopy(variables) }) followings = data["edges"] followings_summary = [] for following in followings: following_summary = {} following_summary['username'] = following['node']['username'] following_summary['user_id'] = following['node']['id'] following_summary['photo'] = following['node']['profile_pic_url'] following_summary['is_private'] = following['node']['is_private'] followings_summary.append(following_summary) yield InstaparserItem(followings=followings_summary, user_id=user_id, username=username)
def user_data_subscribers_parse(self, response: HtmlResponse, username, user_id, variables): j_data = json.loads(response.text) page_info = j_data['data']['user']['edge_follow']['page_info'] if page_info['has_next_page']: variables['after'] = page_info['end_cursor'] next_url_subscribers = f'{self.graphql_url}query_hash={self.query_subscriber_hash}&{urlencode(variables)}' yield response.follow(next_url_subscribers, callback=self.user_data_subscribers_parse, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) }) subscribers = j_data['data']['user']['edge_follow']['edges'] for subscriber in subscribers: item = InstaparserItem( subscriber_username=subscriber['node']['username'], subscriber_fullname=subscriber['node']['full_name'], subscriber_id=subscriber['node']['id'], subscriber_profile_pic_url=subscriber['node'] ['profile_pic_url'], subscriber_data=subscriber, subscriber_owner_id=user_id, data_type='subscribe') yield item
def user_data_followers( self, response: HtmlResponse, username, user_id, variables ): # Принимаем ответ. Не забываем про параметры от cb_kwargs j_data = json.loads(response.text) users = j_data.get('data').get('user').get('edge_followed_by').get( 'edges') # Сами пользователи for user in users: # Перебираем юзеров, собираем данные item = InstaparserItem( user_id=user_id, user_type='followers', user_fid=user['node']['id'], user_name=user['node']['username'], user_full_name=user['node']['full_name'], user_userpic=user['node']['profile_pic_url']) yield item page_info = j_data.get('data').get('user').get('edge_followed_by').get( 'page_info') if page_info.get('has_next_page'): # Если есть следующая страница variables['after'] = page_info[ 'end_cursor'] # Новый параметр для перехода на след. страницу url = f'{self.graphql_url}query_hash={self.query_hash_followers}&{urlencode(variables)}' yield response.follow(url, callback=self.user_data_followers, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) })
def user_subscribers_parse(self, response: HtmlResponse, username, user_id, variables): j_data = response.json() page_info = j_data.get('data').get('user').get('edge_followed_by').get( 'page_info') if page_info.get('has_next_page'): variables['after'] = page_info.get('end_cursor') url_subscribers = f'{self.graphql_url}query_hash={self.subscriber_hash}&{urlencode(variables)}' yield response.follow(url_subscribers, callback=self.user_subscribers_parse, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) }) subscribers = j_data.get('data').get('user').get( 'edge_followed_by').get('edges') for subscriber in subscribers: item = InstaparserItem( subscribe_user_id=user_id, photo=subscriber.get('node').get('profile_pic_url'), user_id=subscriber.get('node').get('id'), user_name=subscriber.get('node').get('username'), user_data=subscriber.get('node')) yield item
def user_subscription_parse(self, response: HtmlResponse, username, user_id, variables): j_data = response.json() page_info = j_data.get('data').get('user').get('edge_follow').get( 'page_info') if page_info.get('has_next_page'): variables['after'] = page_info.get('end_cursor') url_posts = f'{self.graphql_url}query_hash={self.subscription_hash}&{urlencode(variables)}' yield response.follow(url_posts, callback=self.user_subscription_parse, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) }) posts = j_data.get('data').get('user').get('edge_follow').get('edges') for post in posts: item = InstaparserItem( user_id=user_id, subscription_id=post.get('node').get('id'), subscription_username=post.get('node').get('username'), subscriber_id=user_id, subscriber_username=username, full_name=post.get('node').get('full_name'), profile_pic_url=post.get('node').get('profile_pic_url'), post_data=post.get('node')) yield item
def user_subscriptions_continue( self, response: HtmlResponse, username, user_id, variables ): # Принимаем ответ. Не забываем про параметры от cb_kwargs j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get('edge_follow').get( 'page_info') if page_info.get('has_next_page'): # Если есть следующая страница variables['after'] = page_info[ 'end_cursor'] # Новый параметр для перехода на след. страницу url_subscriptions = f'{self.graphql_url}query_hash={self.subscriptions_hash}&{urlencode(variables)}' yield response.follow(url_subscriptions, callback=self.user_subscriptions_continue, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) }) subscriptions = j_data.get('data').get('user').get('edge_follow').get( 'edges') # Подписки for subscription in subscriptions: # Перебираем посты, собираем данные item = InstaparserItem( user_id=user_id, subscription_id=subscription['node']['id'], subscription_name=subscription['node']['username'], subscription_photo=subscription['node']['profile_pic_url'], subscription=subscription['node']) yield item # В пайплайн
def users_parse(self, response: HtmlResponse, target_username, type, variables): j_data = json.loads(response.text) type_field = 'edge_followed_by' if type == 'followers' else 'edge_follow' page_info = j_data.get('data').get('user').get(type_field).get( 'page_info') if page_info['has_next_page']: variables['after'] = page_info['end_cursor'] url = f"{response.url[:response.url.find('&')]}&{urlencode(variables)}" yield response.follow(url, callback=self.users_parse, cb_kwargs={ 'target_username': target_username, 'type': type, 'variables': deepcopy(variables) }) users = j_data.get('data').get('user').get(type_field).get('edges') for user in users: node = user.get('node') item = InstaparserItem( _id=node.get('id'), user_name=node.get('username'), full_name=node.get('full_name'), photo=node.get('profile_pic_url'), insert_to_collection=f'{target_username}_{type}') yield item
def user_posts_parse( self, response: HtmlResponse, username, user_id, variables ): #Принимаем ответ. Не забываем про параметры от cb_kwargs j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get( 'edge_owner_to_timeline_media').get('page_info') if page_info.get('has_next_page'): #Если есть следующая страница variables['after'] = page_info[ 'end_cursor'] #Новый параметр для перехода на след. страницу url_posts = f'{self.graphql_url}query_hash={self.posts_hash}&{urlencode(variables)}' yield response.follow(url_posts, callback=self.user_posts_parse, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) }) posts = j_data.get('data').get('user').get( 'edge_owner_to_timeline_media').get('edges') #Сами посты for post in posts: #Перебираем посты, собираем данные item = InstaparserItem( user_id=user_id, photo=post['node']['display_url'], likes=post['node']['edge_media_preview_like']['count'], post=post['node']) yield item #В пайплайн
def user_follower_parse(self, response: HtmlResponse, username, user_id, variables): j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get('edge_followed_by').get( 'page_info') if page_info.get('has_next_page'): variables['after'] = page_info['end_cursor'] url_posts = f'{self.graphql_url}query_hash={self.following_hash}&{urlencode(variables)}' yield response.follow(url_posts, callback=self.user_follower_parse, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables) }) follows = j_data.get('data').get('user').get('edge_followed_by').get( 'edges') for follow in follows: item = InstaparserItem(user_id=user_id, following_id=None, following_name=None, follower_id=follow['node']['id'], follower_name=follow['node']['username'], node=follow['node']) yield item
def users_parse(self, response: HtmlResponse, target_username, flag, variables): # Получаем текст ответа j_data = json.loads(response.text) # в зависимости от вызвавшего метода присваеиваем значение параметру type_field type_field = 'edge_followed_by' if flag == 'followers' else 'edge_follow' # из ответа забираем page_info page_info = j_data.get('data').get('user').get(type_field).get('page_info') # если page_info имеет информацию о следующей странице, забираем указатель на нее и уходим в рекурсию if page_info['has_next_page']: variables['after'] = page_info['end_cursor'] url = f"{response.url[:response.url.find('&')]}&{urlencode(variables)}" yield response.follow( url, callback=self.users_parse, cb_kwargs={'target_username': target_username, 'flag': flag, 'variables': deepcopy(variables)} ) # внутри рекурсивного вызова обрабатываем полученный ответ, получая список ребер графа users = j_data.get('data').get('user').get(type_field).get('edges') for user in users: node = user.get('node') item = InstaparserItem( _id=node.get('id'), user_name=node.get('username'), full_name=node.get('full_name'), photo=node.get('profile_pic_url'), insert_to_collection=f'{target_username}_{flag}' ) yield item
def user_subscrib_parse(self, response: HtmlResponse, username, user_id, variables, s_hash, page_info_get, status): j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get(page_info_get).get('page_info') if page_info.get('has_next_page'): # Если есть следующая страница variables['after'] = page_info['end_cursor'] # Новый параметр для перехода на след. страницу url_subscrib = f'{self.graphql_url}query_hash={s_hash}&{urlencode(variables)}' yield response.follow( url_subscrib, callback=self.user_subscrib_parse, cb_kwargs={'username': username, 's_hash': s_hash, 'page_info_get': page_info_get, 'status': status, 'user_id': user_id, 'variables': deepcopy(variables)} ) subscribs = j_data.get('data').get('user').get(page_info_get).get('edges') # Сами подписчики for subscrib in subscribs: # Перебираем посты, собираем данные item = InstaparserItem( user_id=user_id, subscriber_status=status, id=subscrib['node']['id'], name=subscrib['node']['username'], photo=subscrib['node']['profile_pic_url'], full_info=subscrib['node'], _id=user_id + subscrib['node']['id'] ) yield item # В пайплайн
def follows_parse(self, response, info, variables): j_body = json.loads(response.text) page_info = j_body.get('data', {}).get('user', {}).get('edge_follow', {}).get('page_info', {}) follows = j_body.get('data', {}).get('user', {}).get('edge_follow', {}).get('edges', {}) for foll in follows: item = InstaparserItem( _id=f"{foll['node']['id']}_{info['user_id']}", follower_id=info['user_id'], follower_name=info['user'], follower_full_name=info['is_private'], follower_pic_url=info['pic_url'], follower_is_private=info['full_name'], follow_id=foll['node']['id'], follow_name=foll['node']['username'], follow_full_name=foll['node']['full_name'], follow_pic_url=foll['node']['profile_pic_url'], follow_is_private=foll['node']['is_private']) yield item if page_info.get('has_next_page'): variables['after'] = page_info['end_cursor'] url_posts = f'{self.graphql_url}query_hash={self.hash_follows}&{urlencode(variables)}' yield response.follow(url_posts, callback=self.follows_parse, cb_kwargs={ 'info': info, 'variables': deepcopy(variables) })
def parse_user( self, response: HtmlResponse, username, user_id, variables, query_hash ): # Принимаем ответ. Не забываем про параметры от cb_kwargs j_data = json.loads(response.text) target_type = 'followed_by' if query_hash == self.subscribers_hash else 'follow' if target_type == 'followed_by': data = j_data.get('data').get('user').get('edge_followed_by') else: data = j_data.get('data').get('user').get('edge_follow') page_info = data.get('page_info') if page_info.get('has_next_page'): # Если есть следующая страница variables['after'] = page_info[ 'end_cursor'] # Новый параметр для перехода на след. страницу yield response.follow(get_graphql_url(self.graphql_url, query_hash, variables), callback=self.parse_user, cb_kwargs={ 'username': username, 'user_id': user_id, 'variables': deepcopy(variables), 'query_hash': query_hash }) users = data.get('edges') for user in users: item = InstaparserItem(username=username, target_type=target_type, id=user_id, photo=user['node']['profile_pic_url'], name=user['node']['username']) yield item # В пайплайн
def parse_user_followings(self, response: HtmlResponse, username, user_id, variables): j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get('edge_follow').get('page_info') if page_info['has_next_page']: variables['after'] = page_info['end_cursor'] url_followings = f'{self.graphql_url}query_hash={self.followings_hash}&{urlencode(variables)}' yield response.follow( url_followings, callback=self.parse_user_followings, cb_kwargs={'username': username, 'user_id': user_id, 'variables': deepcopy(variables)} ) followings = j_data.get('data').get('user').get('edge_follow').get('edges') for following in followings: yield InstaparserItem( user_name=username, user_id=user_id, type_field='following', follow_name=following['node']['username'], follow_id=following['node']['id'], follow_photo=following['node']['profile_pic_url'] )
def user_subscribers_parse(self, response: HtmlResponse, variables, username, user_id): j_data = json.loads(response.text) page_info = j_data.get('data').get('user').get('edge_follow').get('page_info') if page_info.get('has_next_page'): variables['after'] = page_info.get('end_cursor') url_subscribers = f'{self.graphql_url}?query_hash={self.subscribers_hash}&{urlencode(variables)}' yield response.follow( url_subscribers, callback=self.user_subscribers_parse, cb_kwargs={'variables': deepcopy(variables), 'username': username, 'user_id': user_id} ) subscribers = j_data.get('data').get('user').get('edge_follow').get('edges') for subscriber in subscribers: yield InstaparserItem( parse_user=username, status='subscriptions', user_id=user_id, insta_id=subscriber.get('node').get('id'), insta_name=subscriber.get('node').get('username'), photo=subscriber.get('node').get('profile_pic_url') #user_data=follower.get('node') )
def posts_parse(self,response,user_id, variables): j_body = json.loads(response.text) page_info = j_body.get('data').get('user').get('edge_owner_to_timeline_media').get('page_info') if page_info['has_next_page']: variables['after'] = page_info['end_cursor'] url_posts = f'{self.graphql_url}query_hash={self.hash_posts}&{urlencode(variables)}' yield response.follow( url_posts, callback=self.posts_parse, cb_kwargs={'user_id': user_id, 'variables': deepcopy(variables)} ) posts = j_body.get('data').get('user').get('edge_owner_to_timeline_media').get('edges') for post in posts: item = InstaparserItem( user_id = user_id, photo = post['node']['display_url'], likes = post['node']['edge_media_preview_like']['count'], post_data = post['node'] ) yield item