Пример #1
0
def Search(Query_List, Task_ID, Limit=10):

    try:
        Data_to_Cache = []
        Directory = General.Make_Directory(Plugin_Name.lower())
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        Log_File = General.Logging(Directory, Plugin_Name.lower())
        handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        Craigslist_Location = Load_Configuration()
        Cached_Data_Object = General.Cache(Directory, Plugin_Name)
        Cached_Data = Cached_Data_Object.Get_Cache()
        Query_List = General.Convert_to_List(Query_List)
        Limit = General.Get_Limit(Limit)

        for Query in Query_List:

            if Craigslist_Location:
                Main_URL = f"https://{Craigslist_Location.lower()}.craigslist.org/search/sss?format=rss&query={Query}"
                Craigslist_Response = feedparser.parse(Main_URL)
                Craigslist_Items = Craigslist_Response["items"]
                Current_Step = 0

                for Item in Craigslist_Items:
                    Item_URL = Item["link"]

                    if Item_URL not in Cached_Data and Item_URL not in Data_to_Cache and Current_Step < int(
                            Limit):
                        Craigslist_Responses = Common.Request_Handler(
                            Item_URL,
                            Filter=True,
                            Host=
                            f"https://{Craigslist_Location.lower()}.craigslist.org"
                        )
                        Craigslist_Response = Craigslist_Responses["Filtered"]
                        Local_URL = f"https://{Craigslist_Location.lower()}.craigslist.org/"
                        Local_Domain = f"{Craigslist_Location.lower()}.craigslist.org"
                        Filename = Item_URL.replace(Local_URL, "")
                        Filename = Filename.replace(".html/", "")
                        Filename = Filename.replace(".html", "")
                        Filename = Filename.replace("/", "-")
                        Output_file = General.Create_Query_Results_Output_File(
                            Directory, Query, Plugin_Name, Craigslist_Response,
                            Filename, The_File_Extension)

                        if Output_file:
                            Output_Connections = General.Connections(
                                Query, Plugin_Name, Local_Domain,
                                "Search Result", Task_ID, Plugin_Name.lower())
                            Output_Connections.Output(
                                [Output_file], Item_URL,
                                General.Get_Title(Item_URL),
                                Plugin_Name.lower())
                            Data_to_Cache.append(Item_URL)

                        else:
                            logging.warning(
                                f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                        Current_Step += 1

        Cached_Data_Object.Write_Cache(Data_to_Cache)

    except Exception as e:
        logging.warning(
            f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
Пример #2
0
def Search(Query_List, Task_ID, Type, Limit=10):

    try:
        Data_to_Cache = []
        Directory = General.Make_Directory(Concat_Plugin_Name)
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        Log_File = General.Logging(Directory, Concat_Plugin_Name)
        handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        Cached_Data_Object = General.Cache(Directory, Plugin_Name)
        Cached_Data = Cached_Data_Object.Get_Cache()
        Query_List = General.Convert_to_List(Query_List)

        for Query in Query_List:

            try:

                if Type == "UKBN":
                    Authorization_Key = Load_Configuration()

                    if Authorization_Key:
                        Authorization_Key = "Basic " + Authorization_Key.decode('ascii')
                        headers_auth = {"Authorization": Authorization_Key}
                        Main_URL = f'https://api.{Domain}/company/{Query}'
                        Response = Common.Request_Handler(Main_URL, Optional_Headers=headers_auth)
                        JSON_Object = Common.JSON_Handler(Response)
                        JSON_Response = JSON_Object.To_JSON_Loads()
                        Indented_JSON_Response = JSON_Object.Dump_JSON()

                        try:
                            Query = str(int(Query))

                            if Response and '{"errors":[{"error":"company-profile-not-found","type":"ch:service"}]}' not in Response:

                                if Main_URL not in Cached_Data and Main_URL not in Data_to_Cache:
                                    Current_Company_Number = str(JSON_Response["company_number"])
                                    Result_URL = f'https://beta.{Domain}/company/{Current_Company_Number}'
                                    Result_Responses = Common.Request_Handler(Result_URL, Filter=True, Host=f"https://beta.{Domain}")
                                    Result_Response = Result_Responses["Filtered"]
                                    UKCN = str(JSON_Response["company_name"])
                                    Main_Output_File = General.Main_File_Create(Directory, Plugin_Name, Indented_JSON_Response, Query, The_File_Extensions["Main"])
                                    Output_file = General.Create_Query_Results_Output_File(Directory, Query, Plugin_Name, Result_Response, UKCN, The_File_Extensions["Query"])

                                    if Output_file:
                                        Output_Connections = General.Connections(Query, Plugin_Name, Domain, "Company Details", Task_ID, Plugin_Name)
                                        Output_Connections.Output([Main_Output_File, Output_file], Result_URL, f"UK Business Number {Query}", Concat_Plugin_Name)
                                        Data_to_Cache.append(Main_URL)

                                    else:
                                        logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist.")

                        except:
                            logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Invalid query provided for UKBN Search.")

                    else:
                        logging.info(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to retrieve API key.")

                elif Type == "UKCN":
                    Authorization_Key = Load_Configuration()

                    if Authorization_Key:
                        Authorization_Key = "Basic " + Authorization_Key.decode('ascii')
                        Limit = General.Get_Limit(Limit)

                        try:
                            Main_URL = f'https://api.{Domain}/search/companies?q={Query}&items_per_page={Limit}'
                            headers_auth = {"Authorization": Authorization_Key}
                            Response = Common.Request_Handler(Main_URL, Optional_Headers=headers_auth)
                            JSON_Object = Common.JSON_Handler(Response)
                            JSON_Response = JSON_Object.To_JSON_Loads()
                            Indented_JSON_Response = JSON_Object.Dump_JSON()

                            try:

                                if JSON_Response['total_results'] > 0:
                                    Main_Output_File = General.Main_File_Create(Directory, Plugin_Name, Indented_JSON_Response, Query, The_File_Extensions["Main"])
                                    Output_Connections = General.Connections(Query, Plugin_Name, Domain, "Company Details", Task_ID, Plugin_Name)

                                    for Item in JSON_Response['items']:
                                        UKBN_URL = Item['links']['self']
                                        Full_UKBN_URL = f'https://beta.{Domain}{str(UKBN_URL)}'
                                        UKBN = UKBN_URL.strip("/company/")

                                        if Full_UKBN_URL not in Cached_Data and Full_UKBN_URL not in Data_to_Cache:
                                            UKCN = Item['title']
                                            Current_Responses = Common.Request_Handler(Full_UKBN_URL, Filter=True, Host=f"https://beta.{Domain}")
                                            Current_Response = Current_Responses["Filtered"]
                                            Output_file = General.Create_Query_Results_Output_File(Directory, Query, Plugin_Name, str(Current_Response), UKCN, The_File_Extensions["Query"])

                                            if Output_file:
                                                Output_Connections.Output([Main_Output_File, Output_file], Full_UKBN_URL, f"UK Business Number {UKBN} for Query {Query}", Concat_Plugin_Name)
                                                Data_to_Cache.append(Full_UKBN_URL)

                                            else:
                                                logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist.")

                            except:
                                logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Error during UKCN Search, perhaps the rate limit has been exceeded.")

                        except:
                            logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Invalid query provided for UKCN Search.")

                    else:
                        logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to retrieve API key.")

                else:
                    logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Invalid request type.")

            except:
                logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to make request.")

        Cached_Data_Object.Write_Cache(Data_to_Cache)

    except Exception as e:
        logging.warning(f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
Пример #3
0
def Search(Query_List, Task_ID, Limit=10):

    try:
        Data_to_Cache = []
        Directory = General.Make_Directory(Concat_Plugin_Name)
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        Log_File = General.Logging(Directory, Concat_Plugin_Name)
        handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        Cached_Data_Object = General.Cache(Directory, Plugin_Name)
        Cached_Data = Cached_Data_Object.Get_Cache()
        Query_List = General.Convert_to_List(Query_List)
        Limit = General.Get_Limit(Limit)

        for Query in Query_List:
            # Query can be Title or ISBN
            Main_URL = f"http://{Domain}/search.php?req={Query}&lg_topic=libgen&open=0&view=simple&res=100&phrase=1&column=def"
            Lib_Gen_Response = Common.Request_Handler(Main_URL)
            Main_File = General.Main_File_Create(Directory, Plugin_Name,
                                                 Lib_Gen_Response, Query,
                                                 The_File_Extension)
            Lib_Gen_Regex = Common.Regex_Handler(
                Lib_Gen_Response,
                Custom_Regex=r"book\/index\.php\?md5=[A-Fa-f0-9]{32}",
                Findall=True)

            if Lib_Gen_Regex:
                Current_Step = 0

                for Regex in Lib_Gen_Regex:
                    Item_URL = f"http://{Domain}/{Regex}"
                    Title = General.Get_Title(Item_URL).replace(
                        "Genesis:", "Genesis |")
                    Lib_Item_Responses = Common.Request_Handler(
                        Item_URL, Filter=True, Host=f"http://{Domain}")
                    Lib_Item_Response = Lib_Item_Responses["Filtered"]

                    if Item_URL not in Cached_Data and Item_URL not in Data_to_Cache and Current_Step < int(
                            Limit):
                        Output_file = General.Create_Query_Results_Output_File(
                            Directory, Query, Plugin_Name, Lib_Item_Response,
                            Regex, The_File_Extension)

                        if Output_file:
                            Output_Connections = General.Connections(
                                Query, Plugin_Name, Domain, "Publication",
                                Task_ID, Concat_Plugin_Name)
                            Output_Connections.Output([Main_File, Output_file],
                                                      Item_URL, Title,
                                                      Concat_Plugin_Name)
                            Data_to_Cache.append(Item_URL)

                        else:
                            logging.warning(
                                f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                        Current_Step += 1

            else:
                logging.warning(
                    f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to match regular expression."
                )

        Cached_Data_Object.Write_Cache(Data_to_Cache)

    except Exception as e:
        logging.warning(
            f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
Пример #4
0
def Search(Query_List, Task_ID):

    try:
        Data_to_Cache = []
        Directory = General.Make_Directory(Plugin_Name.lower())
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        Log_File = General.Logging(Directory, Plugin_Name.lower())
        handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        Cached_Data_Object = General.Cache(Directory, Plugin_Name)
        Cached_Data = Cached_Data_Object.Get_Cache()
        Query_List = General.Convert_to_List(Query_List)

        for Query in Query_List:
            Main_URL = f"http://{Domain}/{Query}"
            Responses = Common.Request_Handler(Main_URL,
                                               Filter=True,
                                               Host=f"https://www.{Domain}")
            Response = Responses["Regular"]
            Filtered_Response = Responses["Filtered"]
            Kik_Item_Regex = Common.Regex_Handler(
                Response,
                Custom_Regex=
                rf"\<h1\sclass\=\"display\-name\"\>(.+)\<\/h1>\s+\<h2\sclass\=\"username\"\>{Query}\<\/h2\>"
            )

            if Kik_Item_Regex:

                if Kik_Item_Regex.group(1) != " ":
                    Output_Connections = General.Connections(
                        Query, Plugin_Name, Domain, "Social Media - Person",
                        Task_ID, Plugin_Name.lower())
                    Title = f"Kik | {Kik_Item_Regex.group(1)}"

                    if Main_URL not in Cached_Data and Main_URL not in Data_to_Cache:
                        Output_file = General.Main_File_Create(
                            Directory, Plugin_Name, Filtered_Response, Query,
                            The_File_Extension)

                        if Output_file:
                            print(Main_URL, Title)
                            Output_Connections.Output([Output_file],
                                                      Main_URL, Title,
                                                      Plugin_Name.lower())
                            Data_to_Cache.append(Main_URL)

                        else:
                            logging.warning(
                                f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                else:
                    logging.info(
                        f"{Common.Date()} - {__name__.strip('plugins.')} - Query didn't match regex pattern."
                    )

        Cached_Data_Object.Write_Cache(Data_to_Cache)

    except Exception as e:
        logging.warning(
            f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")
Пример #5
0
def Search(Query_List, Task_ID, Limit=10):

    try:
        Data_to_Cache = []
        Directory = General.Make_Directory(Plugin_Name.lower())
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        Log_File = General.Logging(Directory, Plugin_Name.lower())
        handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        YouTube_Details = Load_Configuration()
        Cached_Data_Object = General.Cache(Directory, Plugin_Name)
        Cached_Data = Cached_Data_Object.Get_Cache()
        Query_List = General.Convert_to_List(Query_List)
        Limit = General.Get_Limit(Limit)

        for Query in Query_List:
            YouTube_Handler = discovery.build(YouTube_Details[1],
                                              YouTube_Details[2],
                                              developerKey=YouTube_Details[0],
                                              cache_discovery=False)
            Search_Response = YouTube_Handler.search().list(
                q=Query,
                type='video',
                part='id,snippet',
                maxResults=Limit,
            ).execute()
            JSON_Object = Common.JSON_Handler(Search_Response.get('items', []))
            JSON_Output_Response = JSON_Object.Dump_JSON()
            Main_File = General.Main_File_Create(Directory, Plugin_Name,
                                                 JSON_Output_Response, Query,
                                                 The_File_Extensions["Main"])
            Output_Connections = General.Connections(Query, Plugin_Name,
                                                     Domain,
                                                     "Social Media - Media",
                                                     Task_ID,
                                                     Plugin_Name.lower())

            for Search_Result in Search_Response.get('items', []):
                Full_Video_URL = f"https://www.{Domain}/watch?v=" + Search_Result[
                    'id']['videoId']
                Search_Video_Responses = Common.Request_Handler(
                    Full_Video_URL, Filter=True, Host=f"https://www.{Domain}")
                Search_Video_Response = Search_Video_Responses["Filtered"]
                Title = "YouTube | " + Search_Result['snippet']['title']

                if Full_Video_URL not in Cached_Data and Full_Video_URL not in Data_to_Cache:
                    Output_file = General.Create_Query_Results_Output_File(
                        Directory, Query, Plugin_Name, Search_Video_Response,
                        Search_Result['id']['videoId'],
                        The_File_Extensions["Query"])

                    if Output_file:
                        Output_Connections.Output([Main_File, Output_file],
                                                  Full_Video_URL, Title,
                                                  Plugin_Name.lower())
                        Data_to_Cache.append(Full_Video_URL)

                    else:
                        logging.warning(
                            f"{Common.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                        )

        Cached_Data_Object.Write_Cache(Data_to_Cache)

    except Exception as e:
        logging.warning(
            f"{Common.Date()} - {__name__.strip('plugins.')} - {str(e)}")