def Search(Query_List, Task_ID, Limit=10): try: Data_to_Cache = [] Directory = General.Make_Directory(Plugin_Name.lower()) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Plugin_Name.lower()) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) Craigslist_Location = Load_Configuration() Cached_Data_Object = General.Cache(Directory, Plugin_Name) Cached_Data = Cached_Data_Object.Get_Cache() Query_List = General.Convert_to_List(Query_List) Limit = General.Get_Limit(Limit) for Query in Query_List: if Craigslist_Location: Main_URL = f"https://{Craigslist_Location.lower()}.craigslist.org/search/sss?format=rss&query={Query}" Craigslist_Response = feedparser.parse(Main_URL) Craigslist_Items = Craigslist_Response["items"] Current_Step = 0 for Item in Craigslist_Items: Item_URL = Item["link"] if Item_URL not in Cached_Data and Item_URL not in Data_to_Cache and Current_Step < int( Limit): Craigslist_Responses = Common.Request_Handler( Item_URL, Filter=True, Host= f"https://{Craigslist_Location.lower()}.craigslist.org" ) Craigslist_Response = Craigslist_Responses["Filtered"] Local_URL = f"https://{Craigslist_Location.lower()}.craigslist.org/" Local_Domain = f"{Craigslist_Location.lower()}.craigslist.org" Filename = Item_URL.replace(Local_URL, "") Filename = Filename.replace(".html/", "") Filename = Filename.replace(".html", "") Filename = Filename.replace("/", "-") Output_file = General.Create_Query_Results_Output_File( Directory, Query, Plugin_Name, Craigslist_Response, Filename, The_File_Extension) if Output_file: Output_Connections = General.Connections( Query, Plugin_Name, Local_Domain, "Search Result", Task_ID, Plugin_Name.lower()) Output_Connections.Output( [Output_file], Item_URL, General.Get_Title(Item_URL), Plugin_Name.lower()) Data_to_Cache.append(Item_URL) else: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) Current_Step += 1 Cached_Data_Object.Write_Cache(Data_to_Cache) except Exception as e: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
def Search(Query_List, Task_ID, Type, Limit=10): try: Data_to_Cache = [] Directory = General.Make_Directory(Concat_Plugin_Name) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Concat_Plugin_Name) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) Cached_Data_Object = General.Cache(Directory, Plugin_Name) Cached_Data = Cached_Data_Object.Get_Cache() Query_List = General.Convert_to_List(Query_List) for Query in Query_List: try: if Type == "UKBN": Authorization_Key = Load_Configuration() if Authorization_Key: Authorization_Key = "Basic " + Authorization_Key.decode('ascii') headers_auth = {"Authorization": Authorization_Key} Main_URL = f'https://api.{Domain}/company/{Query}' Response = Common.Request_Handler(Main_URL, Optional_Headers=headers_auth) JSON_Object = Common.JSON_Handler(Response) JSON_Response = JSON_Object.To_JSON_Loads() Indented_JSON_Response = JSON_Object.Dump_JSON() try: Query = str(int(Query)) if Response and '{"errors":[{"error":"company-profile-not-found","type":"ch:service"}]}' not in Response: if Main_URL not in Cached_Data and Main_URL not in Data_to_Cache: Current_Company_Number = str(JSON_Response["company_number"]) Result_URL = f'https://beta.{Domain}/company/{Current_Company_Number}' Result_Responses = Common.Request_Handler(Result_URL, Filter=True, Host=f"https://beta.{Domain}") Result_Response = Result_Responses["Filtered"] UKCN = str(JSON_Response["company_name"]) Main_Output_File = General.Main_File_Create(Directory, Plugin_Name, Indented_JSON_Response, Query, The_File_Extensions["Main"]) Output_file = General.Create_Query_Results_Output_File(Directory, Query, Plugin_Name, Result_Response, UKCN, The_File_Extensions["Query"]) if Output_file: Output_Connections = General.Connections(Query, Plugin_Name, Domain, "Company Details", Task_ID, Plugin_Name) Output_Connections.Output([Main_Output_File, Output_file], Result_URL, f"UK Business Number {Query}", Concat_Plugin_Name) Data_to_Cache.append(Main_URL) else: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist.") except: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Invalid query provided for UKBN Search.") else: logging.info(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to retrieve API key.") elif Type == "UKCN": Authorization_Key = Load_Configuration() if Authorization_Key: Authorization_Key = "Basic " + Authorization_Key.decode('ascii') Limit = General.Get_Limit(Limit) try: Main_URL = f'https://api.{Domain}/search/companies?q={Query}&items_per_page={Limit}' headers_auth = {"Authorization": Authorization_Key} Response = Common.Request_Handler(Main_URL, Optional_Headers=headers_auth) JSON_Object = Common.JSON_Handler(Response) JSON_Response = JSON_Object.To_JSON_Loads() Indented_JSON_Response = JSON_Object.Dump_JSON() try: if JSON_Response['total_results'] > 0: Main_Output_File = General.Main_File_Create(Directory, Plugin_Name, Indented_JSON_Response, Query, The_File_Extensions["Main"]) Output_Connections = General.Connections(Query, Plugin_Name, Domain, "Company Details", Task_ID, Plugin_Name) for Item in JSON_Response['items']: UKBN_URL = Item['links']['self'] Full_UKBN_URL = f'https://beta.{Domain}{str(UKBN_URL)}' UKBN = UKBN_URL.strip("/company/") if Full_UKBN_URL not in Cached_Data and Full_UKBN_URL not in Data_to_Cache: UKCN = Item['title'] Current_Responses = Common.Request_Handler(Full_UKBN_URL, Filter=True, Host=f"https://beta.{Domain}") Current_Response = Current_Responses["Filtered"] Output_file = General.Create_Query_Results_Output_File(Directory, Query, Plugin_Name, str(Current_Response), UKCN, The_File_Extensions["Query"]) if Output_file: Output_Connections.Output([Main_Output_File, Output_file], Full_UKBN_URL, f"UK Business Number {UKBN} for Query {Query}", Concat_Plugin_Name) Data_to_Cache.append(Full_UKBN_URL) else: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist.") except: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Error during UKCN Search, perhaps the rate limit has been exceeded.") except: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Invalid query provided for UKCN Search.") else: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to retrieve API key.") else: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Invalid request type.") except: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to make request.") Cached_Data_Object.Write_Cache(Data_to_Cache) except Exception as e: logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
def Search(Query_List, Task_ID, Limit=10): try: Data_to_Cache = [] Directory = General.Make_Directory(Concat_Plugin_Name) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Concat_Plugin_Name) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) Cached_Data_Object = General.Cache(Directory, Plugin_Name) Cached_Data = Cached_Data_Object.Get_Cache() Query_List = General.Convert_to_List(Query_List) Limit = General.Get_Limit(Limit) for Query in Query_List: # Query can be Title or ISBN Main_URL = f"http://{Domain}/search.php?req={Query}&lg_topic=libgen&open=0&view=simple&res=100&phrase=1&column=def" Lib_Gen_Response = Common.Request_Handler(Main_URL) Main_File = General.Main_File_Create(Directory, Plugin_Name, Lib_Gen_Response, Query, The_File_Extension) Lib_Gen_Regex = Common.Regex_Handler( Lib_Gen_Response, Custom_Regex=r"book\/index\.php\?md5=[A-Fa-f0-9]{32}", Findall=True) if Lib_Gen_Regex: Current_Step = 0 for Regex in Lib_Gen_Regex: Item_URL = f"http://{Domain}/{Regex}" Title = General.Get_Title(Item_URL).replace( "Genesis:", "Genesis |") Lib_Item_Responses = Common.Request_Handler( Item_URL, Filter=True, Host=f"http://{Domain}") Lib_Item_Response = Lib_Item_Responses["Filtered"] if Item_URL not in Cached_Data and Item_URL not in Data_to_Cache and Current_Step < int( Limit): Output_file = General.Create_Query_Results_Output_File( Directory, Query, Plugin_Name, Lib_Item_Response, Regex, The_File_Extension) if Output_file: Output_Connections = General.Connections( Query, Plugin_Name, Domain, "Publication", Task_ID, Concat_Plugin_Name) Output_Connections.Output([Main_File, Output_file], Item_URL, Title, Concat_Plugin_Name) Data_to_Cache.append(Item_URL) else: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) Current_Step += 1 else: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to match regular expression." ) Cached_Data_Object.Write_Cache(Data_to_Cache) except Exception as e: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
def Search(Query_List, Task_ID): try: Data_to_Cache = [] Directory = General.Make_Directory(Plugin_Name.lower()) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Plugin_Name.lower()) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) Cached_Data_Object = General.Cache(Directory, Plugin_Name) Cached_Data = Cached_Data_Object.Get_Cache() Query_List = General.Convert_to_List(Query_List) for Query in Query_List: Main_URL = f"http://{Domain}/{Query}" Responses = Common.Request_Handler(Main_URL, Filter=True, Host=f"https://www.{Domain}") Response = Responses["Regular"] Filtered_Response = Responses["Filtered"] Kik_Item_Regex = Common.Regex_Handler( Response, Custom_Regex= rf"\<h1\sclass\=\"display\-name\"\>(.+)\<\/h1>\s+\<h2\sclass\=\"username\"\>{Query}\<\/h2\>" ) if Kik_Item_Regex: if Kik_Item_Regex.group(1) != " ": Output_Connections = General.Connections( Query, Plugin_Name, Domain, "Social Media - Person", Task_ID, Plugin_Name.lower()) Title = f"Kik | {Kik_Item_Regex.group(1)}" if Main_URL not in Cached_Data and Main_URL not in Data_to_Cache: Output_file = General.Main_File_Create( Directory, Plugin_Name, Filtered_Response, Query, The_File_Extension) if Output_file: print(Main_URL, Title) Output_Connections.Output([Output_file], Main_URL, Title, Plugin_Name.lower()) Data_to_Cache.append(Main_URL) else: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) else: logging.info( f"{Common.Date()} - {__name__.strip('plugins.')} - Query didn't match regex pattern." ) Cached_Data_Object.Write_Cache(Data_to_Cache) except Exception as e: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
def Search(Query_List, Task_ID, Limit=10): try: Data_to_Cache = [] Directory = General.Make_Directory(Plugin_Name.lower()) logger = logging.getLogger() logger.setLevel(logging.INFO) Log_File = General.Logging(Directory, Plugin_Name.lower()) handler = logging.FileHandler(os.path.join(Directory, Log_File), "w") handler.setLevel(logging.DEBUG) formatter = logging.Formatter("%(levelname)s - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) YouTube_Details = Load_Configuration() Cached_Data_Object = General.Cache(Directory, Plugin_Name) Cached_Data = Cached_Data_Object.Get_Cache() Query_List = General.Convert_to_List(Query_List) Limit = General.Get_Limit(Limit) for Query in Query_List: YouTube_Handler = discovery.build(YouTube_Details[1], YouTube_Details[2], developerKey=YouTube_Details[0], cache_discovery=False) Search_Response = YouTube_Handler.search().list( q=Query, type='video', part='id,snippet', maxResults=Limit, ).execute() JSON_Object = Common.JSON_Handler(Search_Response.get('items', [])) JSON_Output_Response = JSON_Object.Dump_JSON() Main_File = General.Main_File_Create(Directory, Plugin_Name, JSON_Output_Response, Query, The_File_Extensions["Main"]) Output_Connections = General.Connections(Query, Plugin_Name, Domain, "Social Media - Media", Task_ID, Plugin_Name.lower()) for Search_Result in Search_Response.get('items', []): Full_Video_URL = f"https://www.{Domain}/watch?v=" + Search_Result[ 'id']['videoId'] Search_Video_Responses = Common.Request_Handler( Full_Video_URL, Filter=True, Host=f"https://www.{Domain}") Search_Video_Response = Search_Video_Responses["Filtered"] Title = "YouTube | " + Search_Result['snippet']['title'] if Full_Video_URL not in Cached_Data and Full_Video_URL not in Data_to_Cache: Output_file = General.Create_Query_Results_Output_File( Directory, Query, Plugin_Name, Search_Video_Response, Search_Result['id']['videoId'], The_File_Extensions["Query"]) if Output_file: Output_Connections.Output([Main_File, Output_file], Full_Video_URL, Title, Plugin_Name.lower()) Data_to_Cache.append(Full_Video_URL) else: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist." ) Cached_Data_Object.Write_Cache(Data_to_Cache) except Exception as e: logging.warning( f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")