示例#1
0
def _add_to_urls_to_urllist(account: Account, current_list: UrlList,
                            urls: List[str]) -> Dict[str, Any]:

    counters: Dict[str, int] = {'added_to_list': 0, 'already_in_list': 0}

    for url in urls:

        # if already in list, don't need to save it again
        already_in_list = UrlList.objects.all().filter(
            account=account, id=current_list.id,
            urls__url__iexact=url).exists()
        if already_in_list:
            counters['already_in_list'] += 1
            continue

        # if url already in database, we only need to add it to the list:
        existing_url = Url.objects.all().filter(url=url).first()
        if existing_url:
            current_list.urls.add(existing_url)
            counters['added_to_list'] += 1
        else:
            new_url = Url.add(url)

            # always try to find a few dns endpoints...
            compose_discover_task(urls_filter={'pk': new_url.id}).apply_async()

            current_list.urls.add(new_url)
            counters['added_to_list'] += 1

    return counters
示例#2
0
def add_urls_to_organizations(organizations: List[Organization], urls: List[str]) -> None:
    for organization in organizations:
        for url in urls:
            # make the API easier to use:
            # will parse extensive urls: https://www.apple.com:80/yolo/swag
            extract = tldextract.extract(url)

            if extract.subdomain:
                url = f"{extract.subdomain}.{extract.domain}.{extract.suffix}"
                new_url = Url.add(url)
                new_url.organization.add(organization)

            if extract.domain:
                url = f"{extract.domain}.{extract.suffix}"
                new_url = Url.add(url)
                new_url.organization.add(organization)
示例#3
0
def get_url(new_url_string: str):
    # first check if one exists, if not, create it.
    url = Url.objects.all().filter(url=new_url_string).first()
    if url:
        return url, False

    url = Url.add(new_url_string)
    return url, True