def rate_urllist_on_moment(urllist: UrlList,
                           when: datetime = None,
                           prevent_duplicates: bool = True):
    # If there is no time slicing, then it's today.
    if not when:
        when = datetime.now(pytz.utc)

    log.info("Creating report for urllist %s on %s" % (
        urllist,
        when,
    ))

    if UrlListReport.objects.all().filter(urllist=urllist,
                                          at_when=when).exists():
        log.debug(
            "UrllistReport already exists for %s on %s. Not overwriting." %
            (urllist, when))
        return

    urls = relevant_urls_at_timepoint_urllist(urllist=urllist, when=when)
    all_url_ratings = get_latest_urlratings_fast(urls, when)
    calculation = aggegrate_url_rating_scores(
        all_url_ratings,
        only_include_issues=urllist_report_content[urllist.scan_type])

    try:
        last = UrlListReport.objects.filter(
            urllist=urllist, at_when__lte=when).latest('at_when')
    except UrlListReport.DoesNotExist:
        last = UrlListReport()  # create a dummy one for comparison

    calculation['name'] = urllist.name

    if prevent_duplicates:
        if not DeepDiff(last.calculation,
                        calculation,
                        ignore_order=True,
                        report_repetition=True):
            log.warning(
                "The report for %s on %s is the same as the report from %s. Not saving."
                % (urllist, when, last.at_when))
            return

    log.info(
        "The calculation for %s on %s has changed, so we're saving this rating."
        % (urllist, when))

    # remove urls and name from scores object, so it can be used as initialization parameters (saves lines)
    # this is by reference, meaning that the calculation will be affected if we don't work on a clone.
    init_scores = deepcopy(calculation)
    del (init_scores['name'])
    del (init_scores['urls'])

    report = UrlListReport(**init_scores)
    report.urllist = urllist
    report.at_when = when
    report.average_internet_nl_score = sum_internet_nl_scores_over_rating(
        calculation)
    report.calculation = calculation
    report.save()
def rate_urllist_on_moment(urllist: UrlList, when: datetime = None, prevent_duplicates: bool = True):
    """
    :param urllist:
    :param when: A moment in time of which data should be aggregated
    :param prevent_duplicates: If the last report had the same data, don't save a new report but return the last report
    instead.
    :return: UrlListReport
    """
    # If there is no time slicing, then it's today.
    if not when:
        when = datetime.now(pytz.utc)

    log.info("Creating report for urllist %s on %s" % (urllist, when, ))

    if UrlListReport.objects.all().filter(urllist=urllist, at_when=when).exists():
        log.debug("UrllistReport already exists for %s on %s. Not overwriting." % (urllist, when))
        existing_report = UrlListReport.objects.all().filter(urllist=urllist, at_when=when).first()
        return existing_report

    urls = relevant_urls_at_timepoint_urllist(urllist=urllist, when=when)
    all_url_ratings = get_latest_urlratings_fast(urls, when)

    # Clean the url_ratings to only include the content we need, only the content (being removed)
    # and only the endpoint types
    for urlrating in all_url_ratings:
        calculation = remove_issues_from_calculation(urlrating.calculation, urllist_report_content[urllist.scan_type])

        # Some endpoint types use the same ratings, such as dns_soa and dns_mx... This means that not
        # all endpoints will be removed for internet.nl. We need the following endpoints per scan:
        # -> note: urllist stores web/mail, they mean: web and mail_dashboard.
        endpoint_types_per_scan = {"web": "dns_a_aaaa", "mail": "dns_soa"}
        calculation = only_include_endpoint_protocols(calculation, [endpoint_types_per_scan[urllist.scan_type]])

        # This already overrides endpoint statistics, use the calculation you get from this.
        calculation, amount_of_issues = statistics_over_url_calculation(calculation)
        # overwrite the rest of the statistics.
        calculation = add_statistics_to_calculation(calculation, amount_of_issues)

        urlrating.calculation = calculation

    calculation = aggegrate_url_rating_scores(all_url_ratings)

    try:
        last = UrlListReport.objects.filter(urllist=urllist, at_when__lte=when).latest('at_when')
    except UrlListReport.DoesNotExist:
        last = UrlListReport()  # create a dummy one for comparison

    calculation['name'] = urllist.name

    if prevent_duplicates:
        if not DeepDiff(last.calculation, calculation, ignore_order=True, report_repetition=True):
            log.warning("The report for %s on %s is the same as the report from %s. Not saving." % (
                urllist, when, last.at_when))
            return last

    log.info("The calculation for %s on %s has changed, so we're saving this rating." % (urllist, when))

    # remove urls and name from scores object, so it can be used as initialization parameters (saves lines)
    # this is by reference, meaning that the calculation will be affected if we don't work on a clone.
    init_scores = deepcopy(calculation)
    del(init_scores['name'])
    del(init_scores['urls'])

    report = UrlListReport(**init_scores)
    report.urllist = urllist
    report.at_when = when
    report.average_internet_nl_score = sum_internet_nl_scores_over_rating(calculation)
    report.calculation = calculation
    report.save()
    return report
def rate_urllist_on_moment(urllist: UrlList,
                           when: datetime = None,
                           prevent_duplicates: bool = True,
                           scan_type: str = "web") -> int:
    """
    :param urllist:
    :param when: A moment in time of which data should be aggregated
    :param prevent_duplicates: If the last report had the same data, don't save a new report but return the last report
    instead.
    :return: UrlListReport id
    """
    # If there is no time slicing, then it's today.
    if not when:
        when = datetime.now(pytz.utc)

    log.info(f"Creating report for urllist {urllist} on {when}")

    if UrlListReport.objects.all().filter(urllist=urllist,
                                          at_when=when).exists():
        log.debug(
            f"UrllistReport already exists for {urllist} on {when}. Not overwriting."
        )
        existing_report = UrlListReport.objects.all().filter(
            urllist=urllist, at_when=when).first()
        return int(existing_report.id)

    urls = relevant_urls_at_timepoint_urllist(urllist=urllist, when=when)
    log.debug(f'Found {len(urls)} to be relevant at this moment.')

    calculation = create_calculation_on_urls(urls, when, scan_type=scan_type)

    try:
        last = UrlListReport.objects.filter(
            urllist=urllist, at_when__lte=when).latest('at_when')
    except UrlListReport.DoesNotExist:
        last = UrlListReport()  # create a dummy one for comparison

    calculation['name'] = urllist.name

    if prevent_duplicates:
        if not DeepDiff(last.calculation,
                        calculation,
                        ignore_order=True,
                        report_repetition=True):
            log.info(
                f"The report for {urllist} on {when} is the same as the report from {last.at_when}. Not saving."
            )
            return int(last.id)

    log.info(
        f"The calculation for {urllist} on {when} has changed, so we're saving this rating."
    )

    # remove urls and name from scores object, so it can be used as initialization parameters (saves lines)
    # this is by reference, meaning that the calculation will be affected if we don't work on a clone.
    init_scores = deepcopy(calculation)
    del init_scores['name']
    del init_scores['urls']

    external_scan_type = {
        "web": "web",
        "mail": "mail",
        "mail_dashboard": "mail"
    }
    report = UrlListReport(**init_scores)
    report.urllist = urllist
    report.report_type = external_scan_type[scan_type]
    report.at_when = when
    report.average_internet_nl_score = sum_internet_nl_scores_over_rating(
        calculation)
    report.calculation = calculation
    report.save()
    return int(report.id)