예제 #1
0
def apply_disavow(disavow_entries, urls_list):
    """ Using a disavow file, tests which of a file of urls would be
        disavowed and which wouldn't.
    """

    disavow_links = []
    disavow_domains = []
    output_dict = {}

    if 'urls' in disavow_entries:
        disavow_links_details = normalize_and_dedupe_with_counts(
            disavow_entries['urls'])
        disavow_links = disavow_links_details.clean_urls
        output_dict[
            'disavow_links_entered'] = disavow_links_details.urls_entered
        output_dict[
            'unique_disavow_links_entered'] = disavow_links_details.unique_urls_entered
    if 'domains' in disavow_entries:
        disavow_domains_details = normalize_and_dedupe_with_counts(
            disavow_entries['domains'])
        disavow_domains = subdomains(disavow_domains_details.clean_urls)

    urls_to_test_details = normalize_and_dedupe_with_counts(urls_list)
    urls = urls_to_test_details.clean_urls

    disavowed_urls = []
    non_disavowed_urls = []

    for url in urls:
        if (url in disavow_links) or (subdomain(url) in disavow_domains):
            disavowed_urls.append(url)
        else:
            non_disavowed_urls.append(url)

    total_disavowed_links = len(disavowed_urls)
    total_remaining_links = len(non_disavowed_urls)

    output_dict.update({
        'disavowed': disavowed_urls,
        'non_disavowed': non_disavowed_urls,
        'domains_entered': disavow_domains_details.urls_entered,
        'unique_domains_entered': disavow_domains_details.unique_urls_entered,
        'urls_entered_to_test': urls_to_test_details.urls_entered,
        'unique_urls_entered_to_test':
        urls_to_test_details.unique_urls_entered,
        'total_disavowed_links': total_disavowed_links,
        'total_remaining_links': total_remaining_links
    })

    return output_dict
예제 #2
0
def apply_disavow(disavow_entries, urls_list):
    """ Using a disavow file, tests which of a file of urls would be
        disavowed and which wouldn't.
    """

    disavow_links = []
    disavow_domains = []
    output_dict = {}

    if 'urls' in disavow_entries:
        disavow_links_details = normalize_and_dedupe_with_counts(disavow_entries['urls'])
        disavow_links = disavow_links_details.clean_urls
        output_dict['disavow_links_entered'] = disavow_links_details.urls_entered
        output_dict['unique_disavow_links_entered'] = disavow_links_details.unique_urls_entered
    if 'domains' in disavow_entries:
        disavow_domains_details = normalize_and_dedupe_with_counts(disavow_entries['domains'])
        disavow_domains = subdomains(disavow_domains_details.clean_urls)

    urls_to_test_details = normalize_and_dedupe_with_counts(urls_list)
    urls = urls_to_test_details.clean_urls

    disavowed_urls = []
    non_disavowed_urls = []

    for url in urls:
        if (url in disavow_links) or (subdomain(url) in disavow_domains):
            disavowed_urls.append(url)
        else:
            non_disavowed_urls.append(url)

    total_disavowed_links = len(disavowed_urls)
    total_remaining_links = len(non_disavowed_urls)

    output_dict.update({
        'disavowed': disavowed_urls,
        'non_disavowed': non_disavowed_urls,
        'domains_entered': disavow_domains_details.urls_entered,
        'unique_domains_entered': disavow_domains_details.unique_urls_entered,
        'urls_entered_to_test': urls_to_test_details.urls_entered,
        'unique_urls_entered_to_test': urls_to_test_details.unique_urls_entered,
        'total_disavowed_links': total_disavowed_links,
        'total_remaining_links': total_remaining_links
    })

    return output_dict
예제 #3
0
def disavow_file_to_dict(file_contents, domain_limit=False):
    """ Takes a disavow file and applies many helper functions,
        outputting a dictionary with old and new domain entries,
        the individual links to be disavowed, as well as useful counts.
    """

    entries_dict = import_from_file_contents(file_contents)
    link_entries_details = normalize_and_dedupe_with_counts(
        entries_dict['urls'])
    link_entries = link_entries_details.clean_urls
    domain_entries_details = normalize_and_dedupe_with_counts(
        entries_dict['domains'])
    domain_entries = subdomains(domain_entries_details.clean_urls)

    if domain_entries:
        applied_disavow = apply_disavow({"domains": entries_dict['domains']},
                                        entries_dict['urls'])
        link_entries = applied_disavow['non_disavowed']

    final_domain_entries = set()
    final_domain_entries.update(domain_entries)

    if domain_limit:
        link_entries, new_domain_entries = apply_domain_limit(
            link_entries, domain_limit)
        final_domain_entries.update(new_domain_entries)

    if domain_entries and domain_limit:
        domain_entries, new_domain_entries = remove_redundant_domains(
            domain_entries, new_domain_entries)
    # total_domains_disavowed = len(domain_entries + new_domain_entries)
    links_disavowed = len(link_entries)

    return {
        'domain_entries': list(final_domain_entries),
        'url_entries': link_entries,
        'urls_entered_count': link_entries_details.urls_entered,
        'urls_disavowed_count': links_disavowed,
        'unique_urls_entered_count': link_entries_details.unique_urls_entered,
        'domain_entries_entered_count': domain_entries_details.urls_entered,
    }
예제 #4
0
def disavow_file_to_dict(file_contents, domain_limit=False):
    """ Takes a disavow file and applies many helper functions,
        outputting a dictionary with old and new domain entries,
        the individual links to be disavowed, as well as useful counts.
    """

    entries_dict = import_from_file_contents(file_contents)
    link_entries_details = normalize_and_dedupe_with_counts(entries_dict['urls'])
    link_entries = link_entries_details.clean_urls
    domain_entries_details = normalize_and_dedupe_with_counts(entries_dict['domains'])
    domain_entries = subdomains(domain_entries_details.clean_urls)

    if domain_entries:
        applied_disavow = apply_disavow({"domains": entries_dict['domains']}, entries_dict['urls'])
        link_entries = applied_disavow['non_disavowed']

    final_domain_entries = set()
    final_domain_entries.update(domain_entries)

    if domain_limit:
        link_entries, new_domain_entries = apply_domain_limit(link_entries, domain_limit)
        final_domain_entries.update(new_domain_entries)

    if domain_entries and domain_limit:
        domain_entries, new_domain_entries = remove_redundant_domains(domain_entries,
                                                                      new_domain_entries)
    # total_domains_disavowed = len(domain_entries + new_domain_entries)
    links_disavowed = len(link_entries)

    return {
        'domain_entries': list(final_domain_entries),
        'url_entries': link_entries,

        'urls_entered_count': link_entries_details.urls_entered,
        'urls_disavowed_count': links_disavowed,
        'unique_urls_entered_count': link_entries_details.unique_urls_entered,
        'domain_entries_entered_count': domain_entries_details.urls_entered,
    }