示例#1
0
def clean_urls(urls: List[str]) -> Dict[str, List]:
    """
    Incorrect urls are urls that are not following the uri scheme standard and don't have a recognizable suffix. They
    are returned for informational purposes and can contain utter garbage. The editor of the urls can then easily see
    if the urls are entered correctly and might correct some mistakes.

    :param urls:
    :return:
    """

    result: Dict[str, List] = {'incorrect': [], 'correct': []}

    for url in urls:
        # all urls in the system must be lowercase (if applicable to used character)
        url = url.lower()

        if not Url.is_valid_url(url):
            result['incorrect'].append(url)
        else:
            result['correct'].append(url)

    return result
示例#2
0
def add_urls(organization_id, urls: str):
    # todo: how does it behave with urls with protocol?

    # urls is basically garbage input on multiple lines with spaces and comma's and all kinds of unicode.
    # here we try to break up this garbage into small pieces text, some are a url, some are garbage...
    urls = urls.replace(",", " ")
    urls = urls.replace("\n", " ")
    urls = urls.split(" ")
    urls = [u.strip() for u in urls]

    not_valid = []
    valid = []
    for url in urls:
        if not Url.is_valid_url(url):
            not_valid.append(url)
        else:
            valid.append(url)

    if not Organization.objects.all().filter(id=organization_id).exists():
        return operation_response(error=True,
                                  message="Organization could not be found.")

    if not valid:
        return operation_response(error=True, message="No valid url found.")

    organization = Organization.objects.all().filter(
        id=organization_id).first()
    for url in valid:
        organization.add_url(url)

    if not_valid:
        return operation_response(
            success=True,
            message=f"{len(valid)} urls have been added.",
            data={"invalid_domains": not_valid})
    else:
        return operation_response(
            success=True, message=f"{len(valid)} urls have been added.")
示例#3
0
def alter_url_in_urllist(account, data) -> Dict[str, Any]:
    # data = {'list_id': list.id, 'url_id': url.id, 'new_url_string': url.url}

    expected_keys = ['list_id', 'url_id', 'new_url_string']
    if check_keys(expected_keys, data):
        return operation_response(error=True, message="Missing keys in data.")

    # what was the old id we're changing?
    old_url = Url.objects.all().filter(pk=data['url_id']).first()
    if not old_url:
        return operation_response(error=True,
                                  message="The old url does not exist.")

    if old_url.url == data['new_url_string']:
        # no changes
        return operation_response(success=True, message="Saved.")

    # is this really a list?
    urllist = UrlList.objects.all().filter(account=account,
                                           pk=data['list_id']).first()
    if not urllist:
        return operation_response(error=True, message="List does not exist.")

    # is the url valid?
    if not Url.is_valid_url(data['new_url_string']):
        return operation_response(
            error=True, message="New url does not have the correct format.")

    # fetch the url, or create it if it doesn't exist.
    new_url, created = get_url(data['new_url_string'])

    # don't throw away the url, only from the list. (don't call delete, as it will delete the record)
    urllist.urls.remove(old_url)
    # Save after deletion, in case the same url is added it will not cause a foreign key error.
    urllist.save()

    urllist.urls.add(new_url)
    urllist.save()

    # somewhat inefficient to do 4 queries, yet, good enough
    old_url_has_mail_endpoint = Endpoint.objects.all().filter(
        url=old_url, is_dead=False, protocol='dns_soa').exists()
    old_url_has_web_endpoint = Endpoint.objects.all().filter(
        url=old_url, is_dead=False, protocol='dns_a_aaa').exists()

    if not created:
        new_url_has_mail_endpoint = Endpoint.objects.all().filter(
            url=new_url, is_dead=False, protocol='dns_soa').exists()
        new_url_has_web_endpoint = Endpoint.objects.all().filter(
            url=new_url, is_dead=False, protocol='dns_a_aaa').exists()
    else:
        new_url_has_mail_endpoint = 'unknown'
        new_url_has_web_endpoint = 'unknown'

    new_fragments = tldextract.extract(new_url.url)
    old_fragments = tldextract.extract(old_url.url)

    return operation_response(success=True,
                              message="Saved.",
                              data={
                                  'created': {
                                      'id': new_url.id,
                                      'url': new_url.url,
                                      'created_on': new_url.created_on,
                                      'has_mail_endpoint':
                                      new_url_has_mail_endpoint,
                                      'has_web_endpoint':
                                      new_url_has_web_endpoint,
                                      'subdomain': new_fragments.subdomain,
                                      'domain': new_fragments.domain,
                                      'suffix': new_fragments.suffix
                                  },
                                  'removed': {
                                      'id': old_url.id,
                                      'url': old_url.url,
                                      'created_on': old_url.created_on,
                                      'has_mail_endpoint':
                                      old_url_has_mail_endpoint,
                                      'has_web_endpoint':
                                      old_url_has_web_endpoint,
                                      'subdomain': old_fragments.subdomain,
                                      'domain': old_fragments.domain,
                                      'suffix': old_fragments.suffix
                                  },
                              })