def _add_to_urls_to_urllist(account: Account, current_list: UrlList, urls: List[str]) -> Dict[str, Any]: counters: Dict[str, int] = {'added_to_list': 0, 'already_in_list': 0} for url in urls: # if already in list, don't need to save it again already_in_list = UrlList.objects.all().filter( account=account, id=current_list.id, urls__url__iexact=url).exists() if already_in_list: counters['already_in_list'] += 1 continue # if url already in database, we only need to add it to the list: existing_url = Url.objects.all().filter(url=url).first() if existing_url: current_list.urls.add(existing_url) counters['added_to_list'] += 1 else: new_url = Url.add(url) # always try to find a few dns endpoints... compose_discover_task(urls_filter={'pk': new_url.id}).apply_async() current_list.urls.add(new_url) counters['added_to_list'] += 1 return counters
def add_urls_to_organizations(organizations: List[Organization], urls: List[str]) -> None: for organization in organizations: for url in urls: # make the API easier to use: # will parse extensive urls: https://www.apple.com:80/yolo/swag extract = tldextract.extract(url) if extract.subdomain: url = f"{extract.subdomain}.{extract.domain}.{extract.suffix}" new_url = Url.add(url) new_url.organization.add(organization) if extract.domain: url = f"{extract.domain}.{extract.suffix}" new_url = Url.add(url) new_url.organization.add(organization)
def get_url(new_url_string: str): # first check if one exists, if not, create it. url = Url.objects.all().filter(url=new_url_string).first() if url: return url, False url = Url.add(new_url_string) return url, True