示例#1
0
	def logout(self):
		if self.logined:
			send_request(API_LOGOUT,
				method='get',
				session=self.session,
				headers=self.logined_headers,
				proxy=PROXY_GLOBAL)
			if not self.session.cookies.get_dict().get('sessionid'):
				logger.info('Logout successfully~')
示例#2
0
 def wrapper(self, username=None, **kwargs):
     API = APIS[opt]
     headers = self.logined_headers
     cookies = self.session.cookies.get_dict()
     headers['x-csrftoken'] = cookies['csrftoken']
     data = _data
     if username and isinstance(username, str):
         if mode == 'user':
             target = self.get_user(username)
             url = API.format(userid=target.Id)
         elif mode == 'tag':
             url = API.format(tag=username)
     else:
         url = API
     if opt == 'set_filter':
         if kwargs.get('default'):
             data = {'config_value': '1'}
             logger.debug(f'set comments filter to default.')
         elif kwargs.get('keywords'):
             url = API_SET_COMMENT_FILTER_kEYWORDS
             k = kwargs['keywords']
             _ = ','.join([str(i) for i in k
                           ]) if len(k) > 1 else str(k[0]) + ','
             data = {'keywords': _}
             logger.debug(f'set comments filter keywords to {k}.')
             send_request(API_SET_COMMENT_FILTER,
                          session=self.session,
                          headers=headers,
                          method='post',
                          data={'config_value': '0'},
                          proxy=PROXY_GLOBAL)
         else:
             data = {'config_value': '0'}
             logger.debug(
                 f'set comments filter keywords not in default mode.')
     response = send_request(url,
                             session=self.session,
                             headers=headers,
                             method='post',
                             data=data,
                             proxy=PROXY_GLOBAL)
     res = response.json()
     self.__opt__ = res
     if not username is None:
         ret = func(self, username)
     else:
         ret = func(self, **kwargs)
     self.__opt__ = None
     return ret
示例#3
0
	def get_homepage(self,reget=False):
		if reget or not self._homepage:
			response = send_request(self.url,
				headers=COMMON_HEADERS,
				proxy=PROXY_GLOBAL)
			self._homepage = response.text
		return self._homepage
示例#4
0
	def get_page_comments(self,shortcode,delay=DELAY,count=-1,save=False,path=None,tname=None):
		results = []
		_count = 0
		page = self.get_page_info(shortcode)
		comment_card = page['graphql']['shortcode_media']['edge_media_to_comment']
		total = comment_card['count']
		page_info = comment_card['page_info']
		top_comments = comment_card['edges']
		end_cursor = page_info['end_cursor']
		has_next = page_info['has_next_page']
		headers = COMMON_HEADERS
		headers['x-ig-app-id']=self.app_id
		headers['referer'] = API_PICTURE_PAGE.format(shortcode=shortcode)
		_check = count if count > 0 else total
		for i in top_comments:
			if save:
				self.db.save(i,tname=tname)
			results.append(i)
			_count += 1
			if (_count >= count or _count >= total) and (count > 0):
				logger.info(f'[Done]Get crawled comments of page:"{shortcode}":{len(results)}.[Total({total})]')
				return results
		if not has_next:
			logger.info(f'[Done]Get crawled comments of page:"{shortcode}":{len(results)}.[Total({total})]')
			return results
		while 1:
			if not end_cursor:
				logger.info(f'[Done]Get crawled comments of page:"{shortcode}":{len(results)}.[Total({total})]')
				break
			params = copy.deepcopy(COMMENTS_PARAMS)
			params['query_hash']=self.comment_hash
			params['variables']=params['variables'].replace('$',end_cursor).replace('%',shortcode)
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis']=md5ed
			response = send_request(API_USER_POSTS,
				params=params,
				headers=headers,
				delay=delay,
				proxy=PROXY_GLOBAL,
				json=True)
			json_data = response.json()
			data = json_data['data']['shortcode_media']['edge_media_to_comment']['edges']
			page_info = json_data['data']['shortcode_media']['edge_media_to_comment']['page_info']
			for i in data:
				if save:
					self.db.save(i,tname=tname)
				results.append(i)
				_count += 1
				if (_count >= count or _count >= total) and (count > 0):
					logger.info(f'[Done]Get crawled comments of page:"{shortcode}"'
						f':{len(results)}.[Total({total})]')
					return results
			logger.info(f'Current crawled comments of page "{shortcode}"'
				f':{len(results)}.[{round(len(results)/_check,4)*100  if _check else 0}%]')
			end_cursor = page_info['end_cursor']
			if not page_info['has_next_page']:
				logger.info(f'[Done]Get crawled comments of page:"{shortcode}"'
					f':{len(results)}.[Total({total})]')
				break
		return results
示例#5
0
	def get_posts(self,delay=DELAY,count=-1,save=False,path=None,tname=None):
		_count = 0
		results = []
		_check = count if count > 0 else self.posts_count
		top_posts_card = self.info['edge_owner_to_timeline_media']
		top_posts = top_posts_card['edges']
		end_cursor = top_posts_card['page_info']['end_cursor']
		posts_query_id = self.queryIds[2]
		headers = COMMON_HEADERS
		headers['x-ig-app-id']=self.app_id
		for i in top_posts:
			if save:
				self.db.save(i,tname=tname)
			_count += 1
			results.append(i)
			if (_count >= count or _count >= self.posts_count) and (count > 0):
				logger.info(f'[Done]The length of crawled data of user "{self.name}"'
					f':{len(results)}.[Total({self.posts_count})]')
				return results
		logger.info(f'Total posts of user "{self.name}":{self.posts_count}.')
		while 1:
			if not end_cursor:
				logger.info(f'[Done]The length of crawled data of user "{self.name}"'
					f':{len(results)}.[Total({self.posts_count})]')
				break
			params = {}
			params['query_hash']=posts_query_id
			params['variables']=r'{"id":"'+self.Id+'","first":"'+\
			str(USER_POSTS_MAX)+'","after":"'+end_cursor+'"}'
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis']=md5ed
			response = send_request(API_USER_POSTS,
				params=params,
				headers=headers,
				delay=delay,
				json=True,
				proxy=PROXY_GLOBAL)
			json_data = response.json()
			data = json_data['data']['user']\
			['edge_owner_to_timeline_media']['edges']
			page_info = json_data['data']['user']\
			['edge_owner_to_timeline_media']['page_info']
			for i in data:
				if save:
					self.db.save(i,tname=tname)
				results.append(i)
				_count += 1
				if (_count >= count or _count >= self.posts_count) and (count > 0):
					logger.info(f'[Done]The length of crawled data of user "{self.name}"'
						f':{len(results)}.[Total({self.posts_count})]')
					return results
			logger.info(f'Current amount of posts of user "{self.name}"'
				f':{len(results)}.[{round(len(results)/_check,4)*100 if _check else 0}%]')
			end_cursor = page_info['end_cursor']
			if not page_info['has_next_page']:
				logger.info(f'[Done]The length of crawled data of user "{self.name}"'
					f':{len(results)}.[Total({self.posts_count})]')
				break
		return results
示例#6
0
	def get_channel_posts(self,delay=DELAY,count=-1,save=False,path=None,tname=None):
		_count = 0
		results = []
		_check = count if count > 0 else self.channel_posts_count
		top_posts_card = self.info['edge_felix_video_timeline']
		top_posts = top_posts_card['edges']
		end_cursor = top_posts_card['page_info']['end_cursor']
		headers = COMMON_HEADERS
		headers['x-ig-app-id'] = self.app_id
		for i in top_posts:
			if save:
				self.db.save(i,tname=tname)
			_count += 1
			results.append(i)
			if (_count >= count or _count >= self.channel_posts_count) and (count > 0):
				logger.info(f'[Done]The amount of crawled channel posts data of user "{self.name}":{len(results)}.'
					f'[Total({self.channel_posts_count})]')
				return results
		logger.info(f'Total channel posts of user "{self.name}":{self.channel_posts_count}.')
		while 1:
			if not end_cursor:
				logger.info(f'[Done]The amount of crawled channel posts data of user "{self.name}":{len(results)}.'
					f'[Total({self.channel_posts_count})]')
				break
			params = copy.deepcopy(CHANNEL_PARAMS)
			params['variables'] = params['variables'].replace('%',self.Id).replace('$',end_cursor)
			params['query_hash'] = self.channel_hash
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis']=md5ed
			response = send_request(API_USER_POSTS,
				session=self.instagram.session,
				params=params,
				headers=headers,
				delay=delay,
				json=True,
				proxy=PROXY_GLOBAL)
			json_data = response.json()
			posts = json_data['data']['user']['edge_felix_video_timeline']['edges']
			page_info = json_data['data']['user']['edge_felix_video_timeline']['page_info']
			has_next_page = page_info['has_next_page']
			end_cursor = page_info['end_cursor']
			for i in posts:
				if save:
					self.db.save(i,tname=tname)
				results.append(i)
				_count += 1
				if (_count >= count or _count >= self.channel_posts_count) and (count > 0):
					logger.info(f'[Done]The amount of crawled channel posts data of user "{self.name}"'
						f':{len(results)}.[Total({self.channel_posts_count})]')
					return results
			logger.info(f'Current amount of crawled channel posts data of user "{self.name}"'
				f':{len(results)}.[{round(len(results)/_check,4)*100 if _check else 0}%]')
			if not has_next_page:
				logger.info(f'[Done]The amount of crawled channel posts data of user "{self.name}"'
					f':{len(results)}.[Total({self.channel_posts_count})]')
				break
		return results
示例#7
0
	def get_query_hashs(self):
		js_url = HOST+from_pattern(self.homepage,PATTERN_QUERY_JS)
		response = send_request(js_url,proxy=PROXY_GLOBAL)
		self._queryHashs = list(from_pattern(response.text,PATTERN_FANS_FOLLOW))
		pic_page_hashs = list(from_pattern(response.text,PATTERN_PICTURE_PAGE))
		tag_hash = from_pattern(response.text,PATTERN_POSTS,allget=True)
		following_tag_hash = from_pattern(response.text,PATTERN_HASHTAG,allget=True)
		comment_liker_hash = from_pattern(response.text,PATTERN_LIKER,allget=True)
		self._queryHashs.extend(pic_page_hashs+tag_hash+following_tag_hash+comment_liker_hash)
		return self._queryHashs
示例#8
0
文件: obj.py 项目: xuan2261/InsBot
	def to_binary(self):
		if os.path.isfile(self.string):
			with open(self.string,'rb') as f:
				pic = f.read()
		elif w3lib.url.is_url(self.string):
				response = send_request(self.string)
				pic = response.content
		else:
			raise TypeError(f'Expected a url or disk path,got "{self.string}".')
		return pic
示例#9
0
	def get_media_likers(self,short_code,save=False,count=-1,delay=DELAY,tname=None,path=None):
		_count = 0
		results = []
		end_cursor = ''
		total = 0
		_check = 0
		while 1:
			params = copy.deepcopy(MEDIA_LIKER_PARAMS)
			headers = copy.deepcopy(COMMON_HEADERS)
			params['query_hash']=self.liker_hash
			params['variables'] = params['variables'].replace('$', short_code).replace('%', end_cursor)
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis']=md5ed
			response = send_request(API_USER_POSTS,
									json=True,
									delay=delay,
									headers=headers,
									params=params)
			data = response.json()
			liker_card = data['data']['shortcode_media']['edge_liked_by']
			if _count==0:
				total = liker_card['count']
				_check = count if count >0 else total
				logger.info(f'Total amount of users who liked media({short_code}) : {total}')
			likers = liker_card['edges']
			page_info = liker_card['page_info']
			end_cursor = page_info['end_cursor']
			has_next_page = page_info['has_next_page']
			logger.info(f'Current grabbed users who liked media({short_code}):{len(likers)}.[{round(len(results)/_check,4)*100}%]')
			for i in likers:
				_count += 1
				results.append(i)
				if (_count >= count or _count >= total) and (count > 0):
					logger.info(f'[Done]Total crawled users who liked media({short_code}) :{len(results)}')
					return results
				if save:
					self.db.save(i, tname=tname)
			if not has_next_page:
				logger.info(f'[Done]Total crawled users who liked media({short_code}) :{len(results)}')
				return results
示例#10
0
	def get_tagged_posts(self,delay=DELAY,count=-1,save=False,path=None,tname=None):
		_count = 0
		results = []
		end_cursor = ''
		while 1:
			headers = COMMON_HEADERS
			params = copy.deepcopy(CHANNEL_PARAMS)
			params['variables'] = params['variables'].replace('%',self.Id).replace('$',end_cursor)
			params['query_hash'] = self.marked_id
			headers['x-ig-app-id'] = self.app_id
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis']=md5ed
			response = send_request(API_USER_POSTS,
				params=params,
				headers=headers,
				delay=delay,
				json=True,
				proxy=PROXY_GLOBAL)
			json_data = response.json()
			posts = json_data['data']['user']['edge_user_to_photos_of_you']['edges']
			page_info = json_data['data']['user']['edge_user_to_photos_of_you']['page_info']
			has_next_page = page_info['has_next_page']
			end_cursor = page_info['end_cursor']
			for i in posts:
				if save:
					self.db.save(i,tname=tname)
				results.append(i)
				_count += 1
				if _count >= count and count > 0:
					logger.info(f'[Done]The amount of crawled tagged posts by user "{self.name}":{len(results)}.')
					return results
			logger.info(f'Current amount of crawled tagged posts by user "{self.name}":{len(results)}.')
			if not has_next_page:
				logger.info(f'[Done]The amount of crawled tagged posts by user "{self.name}":{len(results)}.')
				break
		return results
示例#11
0
	def get_comment_likers(self,comment_id,save=False,count=-1,delay=DELAY,tname=None,path=None):
		_count = 0
		results = []
		end_cursor = ''
		while 1:
			params = copy.deepcopy(COMMENT_LIKER_PARAMS)
			headers = copy.deepcopy(COMMON_HEADERS)
			params['query_hash'] = self.comment_liker_hash
			params['variables'] = params['variables'].replace('$', comment_id).replace('%', end_cursor)
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis'] = md5ed
			response = send_request(API_USER_POSTS,
									session=self.session,
									json=True,
									delay=delay,
									headers=headers,
									params=params)
			data = response.json()
			liker_card = data['data']['comment']['edge_liked_by']
			likers = liker_card['edges']
			page_info = liker_card['page_info']
			end_cursor = page_info['end_cursor']
			has_next_page = page_info['has_next_page']
			logger.info(
				f'Current grabbed users who liked comment({comment_id}):{len(likers)}.')
			for i in likers:
				_count += 1
				results.append(i)
				if _count >= count  and (count > 0):
					logger.info(f'[Done]Total crawled users who liked comment({comment_id}) :{len(results)}')
					return results
				if save:
					self.db.save(i, tname=tname)
			if not has_next_page:
				logger.info(f'[Done]Total crawled users who liked comment({comment_id}) :{len(results)}')
				return results
示例#12
0
	def get_channel_hash(self):
		js_url = HOST+from_pattern(self.homepage,PATTERN_APP_ID_JS)
		response = send_request(js_url,proxy=PROXY_GLOBAL)
		self._channel_hash = from_pattern(response.text,PATTERN_CHANNEL)
		return self._channel_hash
示例#13
0
	def get_liker_hash(self):
		js_url = HOST + from_pattern(self.homepage, PATTERN_APP_ID_JS)
		response = send_request(js_url, proxy=PROXY_GLOBAL)
		self._liker_hash = from_pattern(response.text,PATTERN_LIKER)
		return  self._liker_hash
示例#14
0
	def get_web_app_id(self):
		js_url = HOST+from_pattern(self.homepage,PATTERN_APP_ID_JS)
		response = send_request(js_url,proxy=PROXY_GLOBAL)
		self._web_app_id = from_pattern(response.text,PATTERN_WEB_APP_ID)
		return self._web_app_id
示例#15
0
	def get_posts_by_tag(self,tag,delay=DELAY,top_only=True,count=-1,save=False,tname=None,path=None):
		url = API_TAG_POSTS.format(tag=tag)
		response = send_request(url,json=True)
		data = response.json()
		hashtags = data['graphql']['hashtag']
		media_posts = hashtags['edge_hashtag_to_media']
		top_posts = hashtags['edge_hashtag_to_top_posts']['edges']
		total = media_posts['count']
		current_posts = media_posts['edges']
		page_info = media_posts['page_info']
		end_cursor = page_info['end_cursor']
		has_next_page = page_info['has_next_page']
		results = []
		_count = 0
		_check = count if count > 0 else total
		headers=COMMON_HEADERS
		headers['x-ig-app-id']=self.app_id
		logger.info(f'Total posts of tag "{tag}":{total}')
		if top_only:
			for i in top_posts:
				if save:
					self.db.save(i,tname=tname)
			return top_posts
		else:
			for i in current_posts:
				_count+=1
				results.append(i)
				if (_count>=count or _count>=total) and (count>0):
					logger.info(f'[Done]Total crawled posts of tag "{tag}":{len(results)}')
					return results
				if save:
					self.db.save(i,tname=tname)
		while 1:
			if not has_next_page:
				return results
			params = copy.deepcopy(TAG_PARAMS)
			params['query_hash']=self.tag_hash
			params['variables']=params['variables'].replace('$',tag).replace('%',end_cursor)
			md5ed = md5(self.rhx_gis + ":" + params['variables'])
			headers['x-instagram-gis']=md5ed
			response = send_request(API_USER_POSTS,
				params=params,
				delay=delay,
				headers=headers,
				json=True)
			data = response.json()
			hashtags = data['data']['hashtag']
			media_posts = hashtags['edge_hashtag_to_media']
			current_posts = media_posts['edges']
			page_info = media_posts['page_info']
			end_cursor = page_info['end_cursor']
			has_next_page = page_info['has_next_page']
			logger.info(f'Amount of current crawled posts of tag "{tag}"'
				f':{len(results)}.[{round(len(results)/_check,4)*100 if _check else 0}%]')
			for i in current_posts:
				_count+=1
				results.append(i)
				if (_count>=count or _count>=total) and (count>0):
					logger.info(f'[Done]Total crawled posts of tag "{tag}":{len(results)}')
					return results
				if save:
					self.db.save(i,tname=tname)
示例#16
0
	def get_query_ids(self):
		js_url = HOST+from_pattern(self.homepage,PATTERN_POSTS_JS)
		response = send_request(js_url,proxy=PROXY_GLOBAL)
		self._queryIds = from_pattern(response.text,PATTERN_POSTS,allget=True)
		return self._queryIds
示例#17
0
文件: obj.py 项目: xuan2261/InsBot
 def download_image(self,url,path=None):
 	with open(path,'wb') as f:
 		response = send_request(url)
 		f.write(response.content)
示例#18
0
 def wrapper(self, *args, **kwargs):
     url = APIS[opt] if api is None else api
     if login:
         headers = self.logined_headers
         cookies = self.session.cookies.get_dict()
         headers['x-csrftoken'] = cookies['csrftoken']
     else:
         headers = copy.deepcopy(COMMON_HEADERS)
     if not produce:
         response = send_request(url,
                                 session=self.session,
                                 headers=headers,
                                 method=method,
                                 data=data,
                                 params=params,
                                 json=True,
                                 delay=DELAY,
                                 proxy=PROXY_GLOBAL)
         res = response.json()
         self.__opt__ = res
         ret = func(self, *args, **kwargs)
     else:
         ret = True
         data_dict, tips = func(self, *args, **kwargs)
         url = data_dict.get('url') if data_dict.get('url') else url
         headers.update(data_dict.get('headers', {}))
         if opt == 'create_post':
             headers.pop('Content-Type')
             headers.pop('content-length')
         response = send_request(url,
                                 session=self.session,
                                 headers=headers,
                                 method=method,
                                 params=data_dict.get('params', None),
                                 data=data_dict.get('data', None),
                                 proxy=PROXY_GLOBAL,
                                 json=True,
                                 delay=DELAY,
                                 **data_dict.get('http_kwargs', {}))
         if response is None:
             return
         res = response.json()
         if callback and callable(callback):
             cb_args = data_dict.get('cb_kwargs', {})
             return callback(self, res, **cb_args)
         if res and (res.get('status', '') == 'ok'
                     or res.get('graphql')):
             if opt == 'reset_password':
                 self.pwd = data_dict['data']['new_password1']
             if opt == 'upload_pic':
                 if not res.get('has_profile_pic'):
                     if data_dict['data']:
                         if not res.get('upload_id'):
                             logger.info(tips['failed'])
                         else:
                             logger.info(tips['ok'])
                             return res.get('upload_id')
                     else:
                         logger.info(tips['ok'])
                     return ret
             logger.info(tips['ok'])
             if opt == 'create_post':
                 logger.info(
                     f"Posted media id:{res.get('media').get('pk')}")
                 return res
         else:
             logger.info(tips['failed'])
             logger.info(f"error:{res['message']}")
             ret = False
     self.__opt__ = None
     if out:
         return res
     return ret