Пример #1
0
def get_soup_impressum(website_links_list):
    impressumlist = [
        "impressum", "legal", "imprint", "disclaimer", "kontakt", "contact"
    ]
    for impressum in impressumlist:
        for link in website_links_list:
            if impressum in link.lower():
                print(link)
                soup = get_soup(link)
                return soup
                break
    return None
Пример #2
0
def get_soup_northdata(firma):
    base_url = "https://www.northdata.de/"
    name_split = firma.split(" ")
    name_split_string = [""]
    for i in range(0, len(name_split)):
        name_split_string[0] = str(name_split_string[0]) + str(
            name_split[i]) + "+"
    name_split_string[0] = name_split_string[0][:-1]
    search_url = str(base_url) + str(name_split_string[0])
    # print(search_url)
    soup = get_soup(search_url)
    soup = test_northdata(soup)
    return soup
Пример #3
0
def GetLinks(url):
    RawLinks = []
    soup = get_soup(url)
    links_html = soup.find_all('a')
    for link_html in links_html:
        link = str(link_html.get('href'))
        if len(link) >= 4:
            if url[-1] == "/":
                url = url[:-1]
            if 'http' not in link:
                link = url + link
            RawLinks.append(link)
    RawLinks = list(set(RawLinks))
    #print(RawLinks)
    return RawLinks