def search(timestamp, tag='photoslicebot'): ''' Ищет фотографии по хэштегу :param timestamp: :param tag: :return: ''' posts = [] url = "https://www.instagram.com/explore/tags/%s/" % tag payload = {'__a': '1'} res = requests.get(url, params=payload).json() edges = res['graphql']['hashtag']['edge_hashtag_to_media']['edges'] #res = ie.tag(tag) codes = [] for data in edges: # тут уже сортированные по дате посты data = data['node'] if data['taken_at_timestamp'] > timestamp: if not data['is_video']: codes.append(data['shortcode']) else: log.info('CRAWLER: There is a video {}'.format( data['shortcode'])) for code in codes: image = ie.media(code) try: if 'edge_media_to_caption' in image.data: if 'edges' in image.data['edge_media_to_caption']: if len(image.data['edge_sidecar_to_children'] ['edges']) > 1: p = Post( username=image.data['owner']['username'], caption=image.data['edge_media_to_caption'] ['edges'][0]['node']['text'], location=image.data['location'], urls=list( x['node']['display_url'] for x in image.data['edge_sidecar_to_children'] ['edges']), date=image.data['taken_at_timestamp'], code=code) posts.append(p) else: log.warning('CRAWLER: Post {} isn\'t a panorama'.format(code)) except Exception, err: log.error('CRAWLER: Get post {0} info error {1}'.format(code, err))
def Search(Query_List, Task_ID, Type, **kwargs): Data_to_Cache = [] Cached_Data = [] if kwargs.get('Limit'): if int(kwargs["Limit"]) > 0: Limit = kwargs["Limit"] else: Limit = 10 else: Limit = 10 Directory = General.Make_Directory(Plugin_Name.lower()) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Plugin_Name.lower()) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) Cached_Data = General.Get_Cache(Directory, Plugin_Name) if not Cached_Data: Cached_Data = [] Query_List = General.Convert_to_List(Query_List) for Query in Query_List: if Type == "User": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = instagram_explore.user(Query) CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json") Posts = CSE_Response[0]["edge_owner_to_timeline_media"]["edges"] Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower()) Current_Step = 0 for Post in Posts: Shortcode = Post["node"]["shortcode"] URL = "https://www.instagram.com/p/" + Shortcode + "/" if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(Limit): if Output_file: Output_Connections.Output(Output_file, URL, General.Get_Title(URL)) Data_to_Cache.append(URL) Current_Step += 1 elif Type == "Tag": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = instagram_explore.tag(Query) CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json") Posts = CSE_Response[0]["edge_hashtag_to_media"]["edges"] Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower()) Current_Step = 0 for Post in Posts: Shortcode = Post["node"]["shortcode"] URL = "https://www.instagram.com/p/" + Shortcode + "/" if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(Limit): if Output_file: Output_Connections.Output(Output_file, URL, General.Get_Title(URL)) Data_to_Cache.append(URL) Current_Step += 1 elif Type == "Location": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = location(Query) CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json") Posts = CSE_Response[0]["edge_location_to_media"]["edges"] Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower()) Current_Step = 0 for Post in Posts: Shortcode = Post["node"]["shortcode"] URL = "https://www.instagram.com/p/" + Shortcode + "/" if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(Limit): if Output_file: Output_Connections.Output(Output_file, URL, General.Get_Title(URL)) Data_to_Cache.append(URL) Current_Step += 1 elif Type == "Media": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = instagram_explore.media(Query) if CSE_Response: CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json") URL = "https://www.instagram.com/p/" + Query + "/" if URL not in Cached_Data and URL not in Data_to_Cache: if Output_file: Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower()) Output_Connections.Output(Output_file, URL, General.Get_Title(URL)) Data_to_Cache.append(URL) else: logging.warning(General.Date() + " - " + __name__.strip('plugins.') + " - Invalid response.") else: logging.warning(General.Date() + " - " + __name__.strip('plugins.') + " - Invalid type provided.") if Cached_Data: General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "a") else: General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "w")
def Search(Query_List, Task_ID, Type, **kwargs): try: Data_to_Cache = [] Directory = General.Make_Directory(Plugin_Name.lower()) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Plugin_Name.lower()) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) Cached_Data = General.Get_Cache(Directory, Plugin_Name) Query_List = General.Convert_to_List(Query_List) Limit = General.Get_Limit(kwargs) for Query in Query_List: if Type == "User": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = instagram_explore.user(Query) CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Main_File = General.Main_File_Create( Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, The_File_Extensions["Main"]) Posts = CSE_Response[0]["edge_owner_to_timeline_media"][ "edges"] Output_Connections = General.Connections( Query, Local_Plugin_Name, "instagram.com", "Social Media - Person", Task_ID, Local_Plugin_Name.lower()) Current_Step = 0 for Post in Posts: Shortcode = Post["node"]["shortcode"] URL = f"https://www.instagram.com/p/{Shortcode}/" Title = "IG | " + General.Get_Title(URL) if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int( Limit): Response = requests.get(URL, headers=headers).text Output_file = General.Create_Query_Results_Output_File( Directory, Query, Local_Plugin_Name, Response, Shortcode, The_File_Extensions["Query"]) if Output_file: Output_Connections.Output([Main_File, Output_file], URL, Title, Plugin_Name.lower()) Data_to_Cache.append(URL) else: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) Current_Step += 1 elif Type == "Tag": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = instagram_explore.tag(Query) CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Main_File = General.Main_File_Create( Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, The_File_Extensions["Main"]) Posts = CSE_Response[0]["edge_hashtag_to_media"]["edges"] Output_Connections = General.Connections( Query, Local_Plugin_Name, "instagram.com", "Social Media - Person", Task_ID, Local_Plugin_Name.lower()) Current_Step = 0 for Post in Posts: Shortcode = Post["node"]["shortcode"] URL = f"https://www.instagram.com/p/{Shortcode}/" Title = "IG | " + General.Get_Title(URL) if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int( Limit): Response = requests.get(URL, headers=headers).text Output_file = General.Create_Query_Results_Output_File( Directory, Query, Local_Plugin_Name, Response, Shortcode, The_File_Extensions["Query"]) if Output_file: Output_Connections.Output([Main_File, Output_file], URL, Title, Plugin_Name.lower()) Data_to_Cache.append(URL) else: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) Current_Step += 1 elif Type == "Location": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = location(Query) CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Main_File = General.Main_File_Create( Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, The_File_Extensions["Main"]) Posts = CSE_Response[0]["edge_location_to_media"]["edges"] Output_Connections = General.Connections( Query, Local_Plugin_Name, "instagram.com", "Social Media - Place", Task_ID, Local_Plugin_Name.lower()) Current_Step = 0 for Post in Posts: Shortcode = Post["node"]["shortcode"] URL = f"https://www.instagram.com/p/{Shortcode}/" Title = "IG | " + General.Get_Title(URL) if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int( Limit): Response = requests.get(URL, headers=headers).text Output_file = General.Create_Query_Results_Output_File( Directory, Query, Local_Plugin_Name, Response, Shortcode, The_File_Extensions["Query"]) if Output_file: Output_Connections.Output([Main_File, Output_file], URL, Title, Plugin_Name.lower()) Data_to_Cache.append(URL) else: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) Current_Step += 1 elif Type == "Media": Local_Plugin_Name = Plugin_Name + "-" + Type CSE_Response = instagram_explore.media(Query) if CSE_Response: CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True) Main_File = General.Main_File_Create( Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, The_File_Extensions["Main"]) URL = f"https://www.instagram.com/p/{Query}/" Title = "IG | " + General.Get_Title(URL) if URL not in Cached_Data and URL not in Data_to_Cache: Response = requests.get(URL, headers=headers).text Output_file = General.Create_Query_Results_Output_File( Directory, Query, Local_Plugin_Name, Response, Shortcode, The_File_Extensions["Query"]) if Output_file: Output_Connections = General.Connections( Query, Local_Plugin_Name, "instagram.com", "Social Media - Media", Task_ID, Local_Plugin_Name.lower()) Output_Connections.Output([Main_File, Output_file], URL, Title, Plugin_Name.lower()) Data_to_Cache.append(URL) else: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) else: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - Invalid response." ) else: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - Invalid type provided." ) if Cached_Data: General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "a") else: General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "w") except Exception as e: logging.warning( f"{General.Date()} - {__name__.strip('plugins.')} - {str(e)}")
images = ie.tag_images('cat').data import instagram_explore as ie # Search location id res = ie.location('213163910') print(res.data) # Next page data, cursor = ie.location('213163910', res.cursor) # Image only images = ie.location_images('213163910').data import instagram_explore as ie # Search media code res = ie.media('BFRO_5WBQfc') print(res.data) # Image only image = ie.media_image('BFRO_5WBQfc').data log.basicConfig(level=log.INFO) HashTagSearchExample().extract_recent_tag("palmeiras")
def test_media(): res = ie.media('BFRO_5WBQfc') assert isinstance(res, tuple) assert isinstance(res.data, dict) assert res.cursor is None