def get_lastest_map_health_data(country: str = "NL", organization_type: str = "municipality") -> Dict[str, Any]: map_configuration = ( Configuration.objects.all() .filter(country=get_country(country), organization_type=get_organization_type(organization_type)) .first() ) if not map_configuration: return { "outdate_period_in_hours": -1, "percentage_up_to_date": -1, "percentage_out_of_date": -1, "amount_up_to_date": -1, "amount_out_of_date": -1, "per_scan": [], } try: mh = MapHealthReport.objects.all().filter(map_configuration=map_configuration).latest("at_when") except MapHealthReport.DoesNotExist: return { "outdate_period_in_hours": -1, "percentage_up_to_date": -1, "percentage_out_of_date": -1, "amount_up_to_date": -1, "amount_out_of_date": -1, "per_scan": [], } return mh.detailed_report
def get_cached_map_data(country: str = "NL", organization_type: str = "municipality", days_back: int = 0, filters: List[str] = None): """ Caching is split up into two queries. This is done on purpose, as MySQL cannot deal with text fields efficiently. MySQL will be very slow if there are filtering conditions if there is a Textfield in the result. Even if the textfield is not filtered on directly. To given an impression: this query used to take 7 seconds with just 20.000 records in the database and 760 in the corresponding key. After splitting it up into two queries, the total of both would be <0.5 seconds. """ # prevent mutable default if not filters: filters = ["all"] cached = (MapDataCache.objects.all().filter( country=country, organization_type=get_organization_type(organization_type), at_when=datetime.now(pytz.utc) - relativedelta(days=int(days_back)), filters=filters, ).defer("dataset").first()) if not cached: return False my_dataset = MapDataCache.objects.only("id", "dataset").get(id=cached.id) if not my_dataset: return False return my_dataset.dataset
def get_stats_outdated(country, organization_type, weeks_back): """ Stats are calculated using websecmap calculate_high_level_statistics :param country: :param organization_type: :param weeks_back: :return: """ timeframes = { "now": 0, "7 days ago": 0, "2 weeks ago": 0, "3 weeks ago": 0, "1 months ago": 0, "2 months ago": 0, "3 months ago": 0, } reports = {} for stat in timeframes: when = stats_determine_when(stat, weeks_back).date() # seven queryies, but __never__ a missing result. stats = (HighLevelStatistic.objects.all().filter( country=country, organization_type=get_organization_type(organization_type), at_when__lte=when).order_by("-at_when").first()) # no stats before a certain date, or database empty. if stats: reports[stat] = stats.report return reports
def get_top_fail_data(country: str = "NL", organization_type="municipality", weeks_back=0): when = get_when(weeks_back) cursor = connection.cursor() sql = """ SELECT low, organization.name, organizations_organizationtype.name, organization.id, at_when, organization.twitter_handle, high, medium, low, total_urls, total_endpoints FROM map_organizationreport INNER JOIN organization on organization.id = map_organizationreport.organization_id INNER JOIN organizations_organizationtype on organizations_organizationtype.id = organization.type_id INNER JOIN coordinate ON coordinate.organization_id = organization.id INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as org2 ON org2.id = or2.organization_id WHERE at_when <= '%(when)s' AND org2.country = '%(country)s' AND org2.type_id = '%(OrganizationTypeId)s' GROUP BY organization_id ) as stacked_organization_report ON stacked_organization_report.id2 = map_organizationreport.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 GROUP BY organization.name HAVING high > 0 or medium > 0 ORDER BY high DESC, medium DESC, medium DESC, organization.name ASC """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } # log.debug(sql) cursor.execute(sql) rows = cursor.fetchall() return rows_to_dataset(rows, when)
def get_organization_report_by_name( country: str = DEFAULT_COUNTRY, organization_type=DEFAULT_LAYER, organization_name=None, weeks_back=0 ): organization = Organization.objects.filter( computed_name_slug=slugify(organization_name), country=get_country(country), type=get_organization_type(organization_type), is_dead=False, ).first() log.debug("- %s %s %s " % (organization_name, get_country(country), get_organization_type(organization_type))) log.debug(organization) if not organization: return {} return get_organization_report_by_id(organization.pk, weeks_back)
def export_coordinates(country: str = "NL", organization_type="municipality"): organizations = Organization.objects.all().filter( country=get_country(country), type=get_organization_type(organization_type)) columns = ["id", "organization", "geojsontype", "area"] query = (Coordinate.objects.all().filter( organization__in=list(organizations), is_dead=False).values_list("id", "organization", "geojsontype", "area")) return generic_export(query, columns)
def export_urls(country: str = "NL", organization_type="municipality"): organizations = Organization.objects.all().filter( country=get_country(country), type=get_organization_type(organization_type)) columns = ["id", "url", "organization"] query = (Url.objects.all().filter(organization__in=list(organizations), is_dead=False, not_resolvable=False).values_list( "id", "url", "organization")) return generic_export(query, columns)
def get_2ndlevel_domains(country, layer): """ This is a starting point for SIDN to upload information for domains. This is basically a list of all 2nd level domains for a layer. Data for this is uploaded. """ urls = (Url.objects.all().filter( Q(computed_subdomain__isnull=True) | Q(computed_subdomain=""), organization__country=get_country(country), organization__type=get_organization_type(layer), ).values_list("url", flat=True)) urls = list(set(urls)) return urls
def get_stats(country, organization_type, weeks_back): """ Stats are calculated using websecmap calculate_high_level_statistics :param country: :param organization_type: :param weeks_back: :return: """ when = datetime.now(pytz.utc) - relativedelta(days=int(weeks_back * 7)) # seven queryies, but __never__ a missing result. stats = (HighLevelStatistic.objects.all().filter( country=country, organization_type=get_organization_type(organization_type), at_when__lte=when).order_by("-at_when")[0:366]) reports = { "organizations": [], "urls": [], "explained": {}, "endpoints_now": 0, "endpoint": [] } for stat in stats: r = stat.report reports["organizations"].append({ "high": r["high"], "medium": r["medium"], "good": r["good"], "date": stat.at_when.isoformat() }) reports["urls"].append({ "high": r["high_urls"], "medium": r["medium_urls"], "good": r["good_urls"], "date": stat.at_when.isoformat(), }) first = stats.first() if first: r = first.report reports["endpoint"] = r["endpoint"] reports["explained"] = r["explained"] reports["endpoints_now"] = r["endpoints"] return reports
def export_urls_only(country: str = "NL", organization_type="municipality"): # how to reuse the column definition as both headers columns = [ "id", "url", "not_resolvable", "is_dead", "computed_subdomain", "computed_domain", "computed_suffix" ] queryset = (Url.objects.all().filter( is_dead=False, not_resolvable=False, organization__is_dead=False, organization__country=get_country(country), organization__type=get_organization_type(organization_type), ).values_list("id", "url", "not_resolvable", "is_dead", "computed_subdomain", "computed_domain", "computed_suffix")) return generic_export(queryset, columns)
def get_vulnerability_graph(country, organization_type, weeks_back): organization_type_id = get_organization_type(organization_type) country = get_country(country) when = timezone.now() - relativedelta(weeks=int(weeks_back)) one_year_ago = when - timedelta(days=365) data = (VulnerabilityStatistic.objects.all().filter( organization_type=organization_type_id, country=country, at_when__lte=when, at_when__gte=one_year_ago).order_by("scan_type", "at_when")) """ Desired output: "security_headers_x_frame_options": [ { "date": "2018-07-17", "high": 0, "medium": 3950, "low": 0 }, { "date": "2018-07-24", "high": 0, "medium": 2940, "low": 0 }, """ stats = {} for statistic in data: if statistic.scan_type not in stats: stats[statistic.scan_type] = [] stats[statistic.scan_type].append({ "high": statistic.high, "medium": statistic.medium, "low": statistic.low, "date": statistic.at_when.isoformat(), "urls": statistic.urls, "ok_urls": statistic.ok_urls, "endpoints": statistic.endpoints, "ok_endpoints": statistic.ok_endpoints, "ok": statistic.ok, }) return stats
def what_to_improve(country: str, organization_type: str, issue_type: str): # todo: check if the layer is published. policy = POLICY.get(issue_type, None) if not policy: log.debug(f"No policy found for {issue_type}") return [] country = get_country(country) organization_type = get_organization_type(organization_type) if issue_type in URL_SCAN_TYPES: return what_to_improve_ugs(country, organization_type, issue_type, policy) else: return what_to_improve_epgs(country, organization_type, issue_type, policy)
def get_organization_vulnerability_timeline_via_name( organization_name: str, organization_type: str = "", country: str = ""): layer = get_organization_type( organization_type) if organization_type else get_default_layer() country = get_country(code=country) if country else get_default_country() organization = (Organization.objects.all().filter(country=country, type=layer, name=organization_name, is_dead=False).first()) if not organization: return {} return get_organization_vulnerability_timeline(organization.id)
def export_organizations(country: str = "NL", organization_type="municipality"): columns = ["id", "name", "type", "wikidata", "wikipedia", "twitter_handle"] query = (Organization.objects.all().filter( country=get_country(country), type=get_organization_type(organization_type), is_dead=False).values_list("id", "name", "type", "wikidata", "wikipedia", "twitter_handle")) # values doesn't work anymore to determine what columns are relevant for the export. # we need to set it on the factory. # This does not restrain the amount of fields, all fields are included. # the export documentation is pretty terrible (there is none) and the code is obscure. So we're not using this # Left here for references, if anyone decides that # from import_export.resources import modelresource_factory # is a wise choice to use. It's not / it hardly is. # exporter = modelresource_factory(query.model) # exporter.Meta.fields = ['id', 'name', 'type', 'wikidata', 'wikipedia', 'twitter_handle'] # dataset = exporter().export(query) return generic_export(query, columns)
def get_top_win_data(country: str = "NL", organization_type="municipality", weeks_back=0): """ Can't use the object.raw syntax of django. OperationalError: near "%": syntax error... Probably not supported, although the manual says so. Maybe because we use a multiline string? https://code.djangoproject.com/ticket/10070 Dictionary params are not supported with the SQLite backend; with this backend, you must pass parameters as a list. When doing it right, the exception happens: format requires a mapping. Then just rely on the checks we do. :param country: :param organization_type: :param weeks_back: :return: """ when = get_when(weeks_back) cursor = connection.cursor() sql = """ SELECT low, organization.name, organizations_organizationtype.name, organization.id, at_when, organization.twitter_handle, high, medium, low, total_urls, total_endpoints, organization.is_dead FROM map_organizationreport INNER JOIN organization on organization.id = map_organizationreport.organization_id INNER JOIN organizations_organizationtype on organizations_organizationtype.id = organization.type_id INNER JOIN coordinate ON coordinate.organization_id = organization.id INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as org2 ON org2.id = or2.organization_id WHERE at_when <= '%(when)s' AND org2.country = '%(country)s' AND org2.type_id = '%(OrganizationTypeId)s' GROUP BY organization_id ) as stacked_organization_report ON stacked_organization_report.id2 = map_organizationreport.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 GROUP BY organization.name HAVING high = 0 AND medium = 0 ORDER BY low ASC, total_endpoints DESC, organization.name ASC """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } # log.debug(sql) cursor.execute(sql) rows = cursor.fetchall() return rows_to_dataset(rows, when)
def get_map_data(country: str = "NL", organization_type: str = "municipality", days_back: int = 0, displayed_issue: str = None): # A bug in the live version (possibly due to |safe) gives a Country(code='NL')} instead of # the country code of the country. Here this is worked around, but something is wrong and should be fixed. if hasattr(country, "code"): country = country.code when = datetime.now(pytz.utc) - relativedelta(days=int(days_back)) desired_url_scans = [] desired_endpoint_scans = [] if displayed_issue in URL_SCAN_TYPES: desired_url_scans += [displayed_issue] if displayed_issue in ENDPOINT_SCAN_TYPES: desired_endpoint_scans += [displayed_issue] # fallback if no data is "all", which is the default. if not desired_url_scans and not desired_endpoint_scans: desired_url_scans = URL_SCAN_TYPES desired_endpoint_scans = ENDPOINT_SCAN_TYPES filters = ["all"] else: filters = desired_url_scans + desired_endpoint_scans cached = get_cached_map_data(country, organization_type, days_back, filters) if cached: return cached """ Returns a json structure containing all current map data. This is used by the client to render the map. Renditions of this dataset might be pushed to gitlab automatically. :return: """ data = { "metadata": { "type": "FeatureCollection", "render_date": datetime.now(pytz.utc).isoformat(), "data_from_time": when.isoformat(), "remark": remark, "applied filter": displayed_issue, "layer": organization_type, "country": country, }, "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, "features": [], } cursor = connection.cursor() # Sept 2019: MySQL has an issue with mediumtext fields. When joined, and the query is not optimized, the # result will take 2 minutes to complete. Would you not select the mediumtext field, the query finishes in a second. # That is why there are two queries to retrieve map data from the database. sql = """ SELECT map_organizationreport.low, organization.name, organizations_organizationtype.name, coordinate_stack.area, coordinate_stack.geoJsonType, organization.id, map_organizationreport.id as organization_report_id, map_organizationreport.high, map_organizationreport.medium, map_organizationreport.low, map_organizationreport.total_issues, map_organizationreport.total_urls, map_organizationreport.high_urls, map_organizationreport.medium_urls, map_organizationreport.low_urls, coordinate_stack.stacked_coordinate_id FROM map_organizationreport INNER JOIN (SELECT stacked_organization.id as stacked_organization_id FROM organization stacked_organization WHERE ( stacked_organization.created_on <= '%(when)s' AND stacked_organization.is_dead = 0 AND stacked_organization.type_id=%(OrganizationTypeId)s AND stacked_organization.country='%(country)s' ) OR ( '%(when)s' BETWEEN stacked_organization.created_on AND stacked_organization.is_dead_since AND stacked_organization.is_dead = 1 AND stacked_organization.type_id=%(OrganizationTypeId)s AND stacked_organization.country='%(country)s' )) as organization_stack ON organization_stack.stacked_organization_id = map_organizationreport.organization_id INNER JOIN organization on organization.id = stacked_organization_id INNER JOIN organizations_organizationtype on organizations_organizationtype.id = organization.type_id INNER JOIN (SELECT MAX(stacked_coordinate.id) as stacked_coordinate_id, area, geoJsonType, organization_id FROM coordinate stacked_coordinate INNER JOIN organization filter_organization ON (stacked_coordinate.organization_id = filter_organization.id) WHERE ( stacked_coordinate.created_on <= '%(when)s' AND stacked_coordinate.is_dead = 0 AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s ) OR ('%(when)s' BETWEEN stacked_coordinate.created_on AND stacked_coordinate.is_dead_since AND stacked_coordinate.is_dead = 1 AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s ) GROUP BY area, organization_id ) as coordinate_stack ON coordinate_stack.organization_id = map_organizationreport.organization_id INNER JOIN (SELECT MAX(map_organizationreport.id) as stacked_organizationrating_id FROM map_organizationreport INNER JOIN organization filter_organization2 ON (filter_organization2.id = map_organizationreport.organization_id) WHERE at_when <= '%(when)s' AND filter_organization2.country='%(country)s' AND filter_organization2.type_id=%(OrganizationTypeId)s GROUP BY organization_id ) as stacked_organizationrating ON stacked_organizationrating.stacked_organizationrating_id = map_organizationreport.id WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country= '%(country)s' GROUP BY coordinate_stack.area, organization.name ORDER BY map_organizationreport.at_when ASC """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } cursor.execute(sql) rows = cursor.fetchall() needed_reports = [] for i in rows: # prevent sequence item 0: expected str instance, int found needed_reports.append(str(i[6])) reports = get_reports_by_ids(needed_reports) # todo: http://www.gadzmo.com/python/using-pythons-dictcursor-in-mysql-to-return-a-dict-with-keys/ # unfortunately numbered results are used. There is no decent solution for sqlite and the column to dict # translation is somewhat hairy. A rawquery would probably be better if possible. for i in rows: # Here we're going to do something stupid: to rebuild the high, medium, low classifcation based on scan_types # It's somewhat insane to do it like this, but it's also insane to keep adding columns for each vulnerability # that's added to the system. This solution will be a bit slow, but given the caching and such it wouldn't # hurt too much. # Also: we've optimized for calculation in the past, but we're not even using it until now. So that part of # this code is pretty optimized :) # This feature is created to give an instant overview of what issues are where. This will lead more clicks to # reports. # The caching of this url should be decent, as people want to click fast. Filtering on the client # would be possible using the calculation field. Perhaps that should be the way. Yet then we have to do # filtering with javascript, which is error prone (todo: this will be done in the future, as it responds faster # but it will also mean an enormous increase of data sent to the client.) # It's actually reasonably fast. high, medium, low, ok = 0, 0, 0, 0 calculation = json.loads(reports[i[6]]) for url in calculation["organization"]["urls"]: for url_rating in url["ratings"]: if (url_rating["type"] in desired_url_scans and url_rating.get( "comply_or_explain_valid_at_time_of_report", False) is False): high += url_rating["high"] medium += url_rating["medium"] low += url_rating["low"] ok += url_rating["ok"] # it's possible the url doesn't have ratings. for endpoint in url["endpoints"]: for endpoint_rating in endpoint["ratings"]: if (endpoint_rating["type"] in desired_endpoint_scans and endpoint_rating.get( "comply_or_explain_valid_at_time_of_report", False) is False): high += endpoint_rating["high"] medium += endpoint_rating["medium"] low += endpoint_rating["low"] ok += endpoint_rating["ok"] # figure out if red, orange or green: # #162, only make things red if there is a critical issue. # removed json parsing of the calculation. This saves time. # no contents, no endpoint ever mentioned in any url (which is a standard attribute) if "total_urls" not in calculation["organization"] or not calculation[ "organization"]["total_urls"]: severity = "unknown" else: # things have to be OK in order to be colored. If it's all empty... then it's not OK. severity = "high" if high else "medium" if medium else "low" if low else "good" if ok else "unknown" dataset = { "type": "Feature", "properties": { "organization_id": i[5], "organization_type": i[2], "organization_name": i[1], "organization_name_lowercase": i[1].lower(), "organization_slug": slugify(i[1]), "additional_keywords": extract_domains(calculation), "high": high, "medium": medium, "low": low, "data_from": when.isoformat(), "severity": severity, "total_urls": i[11], # = 100% "high_urls": i[12], "medium_urls": i[13], "low_urls": i[14], }, "geometry": { # the coordinate ID makes it easy to check if the geometry has changed shape/location. "coordinate_id": i[15], "type": i[4], # Sometimes the data is a string, sometimes it's a list. The admin # interface might influence this. The fastest would be to use a string, instead of # loading some json. "coordinates": proper_coordinate(i[3], i[4]), }, } # calculate some statistics, so the frontends do not have to... # prevent division by zero if i[11]: total_urls = int(i[11]) high_urls = int(i[12]) medium_urls = int(i[13]) low_urls = int(i[14]) dataset["properties"]["percentages"] = { "high_urls": round(high_urls / total_urls, 2) * 100, "medium_urls": round(medium_urls / total_urls, 2) * 100, "low_urls": round(low_urls / total_urls, 2) * 100, "good_urls": round((total_urls - (high_urls + medium_urls + low_urls)) / total_urls, 2) * 100, } else: dataset["properties"]["percentages"] = { "high_urls": 0, "medium_urls": 0, "low_urls": 0, "good_urls": 0, } data["features"].append(dataset) return data
def get_improvements(country, organization_type, weeks_back, weeks_duration): # todo: adjustable timespan # todo: adjustable weeks_back weeks_back = int(weeks_back) weeks_duration = int(weeks_duration) if not weeks_duration: weeks_duration = 1 when = get_when(weeks_back) # looks a lot like graphs, but then just subtract/add some values and done (?) # compare the first urlrating to the last urlrating # but do not include urls that don't exist. sql = """ SELECT reporting_urlreport.id as id, calculation FROM reporting_urlreport INNER JOIN ( SELECT MAX(id) as id2 FROM reporting_urlreport or2 WHERE at_when <= '%(when)s' GROUP BY url_id ) as x ON x.id2 = reporting_urlreport.id INNER JOIN url ON reporting_urlreport.url_id = url.id INNER JOIN url_organization on url.id = url_organization.url_id INNER JOIN organization ON url_organization.organization_id = organization.id WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } newest_urlratings = UrlReport.objects.raw(sql) # this of course doesn't work with the first day, as then we didn't measure # everything (and the ratings for several issues are 0... sql = """ SELECT reporting_urlreport.id as id, calculation FROM reporting_urlreport INNER JOIN ( SELECT MAX(id) as id2 FROM reporting_urlreport or2 WHERE at_when <= '%(when)s' GROUP BY url_id ) as x ON x.id2 = reporting_urlreport.id INNER JOIN url ON reporting_urlreport.url_id = url.id INNER JOIN url_organization on url.id = url_organization.url_id INNER JOIN organization ON url_organization.organization_id = organization.id WHERE organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' """ % { "when": when - timedelta(days=(weeks_duration * 7)), "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } oldest_urlratings = UrlReport.objects.raw(sql) old_measurement = {} new_measurement = {} scan_types = [] # stats for the newest, should be made a function: for urlrating in newest_urlratings: # url level, why are there reports without url ratings / empty url ratings like if "ratings" in urlrating.calculation: for rating in urlrating.calculation["ratings"]: if rating["type"] not in new_measurement: new_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) new_measurement[rating["type"]]["high"] += rating["high"] new_measurement[rating["type"]]["medium"] += rating["medium"] new_measurement[rating["type"]]["low"] += rating["low"] if "endpoints" not in urlrating.calculation: continue for endpoint in urlrating.calculation["endpoints"]: for rating in endpoint["ratings"]: if rating["type"] not in new_measurement: new_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) new_measurement[rating["type"]]["high"] += rating["high"] new_measurement[rating["type"]]["medium"] += rating["medium"] new_measurement[rating["type"]]["low"] += rating["low"] # and the oldest stats, which should be the same function for urlrating in oldest_urlratings: if "ratings" in urlrating.calculation: for rating in urlrating.calculation["ratings"]: if rating["type"] not in old_measurement: old_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) old_measurement[rating["type"]]["high"] += rating["high"] old_measurement[rating["type"]]["medium"] += rating["medium"] old_measurement[rating["type"]]["low"] += rating["low"] if "endpoints" not in urlrating.calculation: continue for endpoint in urlrating.calculation["endpoints"]: for rating in endpoint["ratings"]: if rating["type"] not in old_measurement: old_measurement[rating["type"]] = { "high": 0, "medium": 0, "low": 0 } if rating["type"] not in scan_types: scan_types.append(rating["type"]) old_measurement[rating["type"]]["high"] += rating["high"] old_measurement[rating["type"]]["medium"] += rating["medium"] old_measurement[rating["type"]]["low"] += rating["low"] # and now do some magic to see the changes in this timespan: changes = {} for scan_type in scan_types: if scan_type not in changes: changes[scan_type] = {} if scan_type not in old_measurement: old_measurement[scan_type] = {} if scan_type not in new_measurement: new_measurement[scan_type] = {} changes[scan_type] = { "old": { "date": timezone.now() - timedelta(days=(weeks_duration * 7)), "high": old_measurement[scan_type].get("high", 0), "medium": old_measurement[scan_type].get("medium", 0), "low": old_measurement[scan_type].get("low", 0), }, "new": { "date": when, "high": new_measurement[scan_type].get("high", 0), "medium": new_measurement[scan_type].get("medium", 0), "low": new_measurement[scan_type].get("low", 0), }, "improvements": { "high": old_measurement[scan_type].get("high", 0) - new_measurement[scan_type].get("high", 0), "medium": old_measurement[scan_type].get("medium", 0) - new_measurement[scan_type].get("medium", 0), "low": old_measurement[scan_type].get("low", 0) - new_measurement[scan_type].get("low", 0), }, } # and now for overall changes, what everyone is coming for... for scan_type in scan_types: changes["overall"] = { "old": { "high": changes.get("overall", {}).get("old", {}).get("high", 0) + changes[scan_type]["old"]["high"], "medium": changes.get("overall", {}).get("old", {}).get("medium", 0) + changes[scan_type]["old"]["medium"], "low": changes.get("overall", {}).get("old", {}).get("low", 0) + changes[scan_type]["old"]["low"], }, "new": { "high": changes.get("overall", {}).get("new", {}).get("high", 0) + changes[scan_type]["new"]["high"], "medium": changes.get("overall", {}).get("new", {}).get("medium", 0) + changes[scan_type]["new"]["medium"], "low": changes.get("overall", {}).get("new", {}).get("low", 0) + changes[scan_type]["new"]["low"], }, "improvements": { "high": changes.get("overall", {}).get("improvements", {}).get( "high", 0) + changes[scan_type]["improvements"]["high"], "medium": changes.get("overall", {}).get("improvements", {}).get( "medium", 0) + changes[scan_type]["improvements"]["medium"], "low": changes.get("overall", {}).get("improvements", {}).get( "low", 0) + changes[scan_type]["improvements"]["low"], }, } return changes
def get_all_latest_scans(country, organization_type): dataset = { "scans": defaultdict(list), "render_date": timezone.now().isoformat(), "remark": remark, } filtered_organization_type = get_organization_type(organization_type) filtered_country = get_country(country) # Really get the latest, without double results that apply for multiple organizations. # Do not show anything that is dead, on any level. for scan_type in PUBLISHED_ENDPOINT_SCAN_TYPES: scans = ( EndpointGenericScan.objects.filter( type=scan_type, is_the_latest_scan=True, ) .annotate( n_urls=Count( "endpoint", filter=Q( endpoint__is_dead=False, endpoint__url__not_resolvable=False, endpoint__url__is_dead=False, endpoint__url__organization__is_dead=False, endpoint__url__organization__country=filtered_country, endpoint__url__organization__type_id=filtered_organization_type, ), ) ) .filter(n_urls__gte=1) .order_by("-rating_determined_on")[0:6] ) print(scans.query) for scan in scans: calculation = get_severity(scan) dataset["scans"][scan_type].append( { "url": scan.endpoint.url.url, "service": f"{scan.endpoint.protocol}/{scan.endpoint.port} (IPv{scan.endpoint.ip_version})", "protocol": scan.endpoint.protocol, "port": scan.endpoint.port, "ip_version": scan.endpoint.ip_version, "explanation": calculation.get("explanation", ""), "high": calculation.get("high", 0), "medium": calculation.get("medium", 0), "low": calculation.get("low", 0), "last_scan_humanized": naturaltime(scan.last_scan_moment), "last_scan_moment": scan.last_scan_moment.isoformat(), } ) for scan_type in PUBLISHED_URL_SCAN_TYPES: scans = ( UrlGenericScan.objects.filter( type=scan_type, is_the_latest_scan=True, ) .annotate( n_urls=Count( "url", filter=Q( url__organization__is_dead=False, url__organization__country=filtered_country, url__organization__type_id=filtered_organization_type, url__is_dead=False, url__not_resolvable=False, ), ) ) .filter(n_urls=1) .order_by("-rating_determined_on")[0:6] ) for scan in scans: calculation = get_severity(scan) # url scans dataset["scans"][scan_type].append( { "url": scan.url.url, "service": f"{scan.url.url}", "protocol": scan_type, "port": "-", "ip_version": "-", "explanation": calculation.get("explanation", ""), "high": calculation.get("high", 0), "medium": calculation.get("medium", 0), "low": calculation.get("low", 0), "last_scan_humanized": naturaltime(scan.last_scan_moment), "last_scan_moment": scan.last_scan_moment.isoformat(), } ) return dataset
def get_ticker_data( country: str = "NL", organization_type: str = "municipality", weeks_back: int = 0, weeks_duration: int = 0 ): weeks_back = int(weeks_back) weeks_duration = int(weeks_duration) # Gives ticker data of organizations, like in news scrolling: # On organization level, could be on URL level in the future (selecing more cool urls?) # Organizations are far more meaningful. # Amsterdam 42 +1, 32 +2, 12 -, Zutphen 12 -3, 32 -1, 3 +3, etc. if not weeks_duration: weeks_duration = 10 when = get_when(weeks_back) # looks a lot like graphs, but then just subtract/add some values and done (?) # compare the first urlrating to the last urlrating # but do not include urls that don't exist. sql = """ SELECT map_organizationreport.id as id, name, high, medium, low FROM map_organizationreport INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as filter_organization ON (filter_organization.id = or2.organization_id) WHERE at_when <= '%(when)s' AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s GROUP BY organization_id ) as stacked_organizationreport ON stacked_organizationreport.id2 = map_organizationreport.id INNER JOIN organization ON map_organizationreport.organization_id = organization.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 """ % { "when": when, "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } newest_urlratings = list(OrganizationReport.objects.raw(sql)) # this of course doesn't work with the first day, as then we didn't measure # everything (and the ratings for several issues are 0... sql = """ SELECT map_organizationreport.id as id, name, high, medium, low FROM map_organizationreport INNER JOIN ( SELECT MAX(or2.id) as id2 FROM map_organizationreport or2 INNER JOIN organization as filter_organization ON (filter_organization.id = or2.organization_id) WHERE at_when <= '%(when)s' AND filter_organization.country='%(country)s' AND filter_organization.type_id=%(OrganizationTypeId)s GROUP BY organization_id ) as stacked_organizationreport ON stacked_organizationreport.id2 = map_organizationreport.id INNER JOIN organization ON map_organizationreport.organization_id = organization.id WHERE (('%(when)s' BETWEEN organization.created_on AND organization.is_dead_since AND organization.is_dead = 1 ) OR ( organization.created_on <= '%(when)s' AND organization.is_dead = 0 )) AND organization.type_id = '%(OrganizationTypeId)s' AND organization.country = '%(country)s' AND total_urls > 0 """ % { "when": when - timedelta(days=(weeks_duration * 7)), "OrganizationTypeId": get_organization_type(organization_type), "country": get_country(country), } oldest_urlratings = list(OrganizationReport.objects.raw(sql)) # create a dict, where the keys are pointing to the ratings. This makes it easy to match the # correct ones. And handle missing oldest ratings for example. oldest_urlratings_dict = {} for oldest_urlrating in oldest_urlratings: oldest_urlratings_dict[oldest_urlrating.name] = oldest_urlrating # insuccesful rebuild? Or not enough organizations? if not newest_urlratings: return {"changes": {}, "slogan": config.TICKER_SLOGAN} changes = [] for newest_urlrating in newest_urlratings: try: matching_oldest = oldest_urlratings_dict[newest_urlrating.name] except KeyError: matching_oldest = None if not matching_oldest: high_then = medium_then = low_then = "-" high_changes = newest_urlrating.high medium_changes = newest_urlrating.medium low_changes = newest_urlrating.low else: high_then = matching_oldest.high medium_then = matching_oldest.medium low_then = matching_oldest.low high_changes = newest_urlrating.high - matching_oldest.high medium_changes = newest_urlrating.medium - matching_oldest.medium low_changes = newest_urlrating.low - matching_oldest.low change = { "organization": newest_urlrating.name, "high_now": newest_urlrating.high, "medium_now": newest_urlrating.medium, "low_now": newest_urlrating.low, "high_then": high_then, "medium_then": medium_then, "low_then": low_then, "high_changes": high_changes, "medium_changes": medium_changes, "low_changes": int(low_changes), } changes.append(change) data = {"changes": changes, "slogan": config.TICKER_SLOGAN} return data
def get_all_explains(country, organization_type, limit=0): limit = 999999 if limit == 0 else limit country = get_country(country) organization_type = get_organization_type(organization_type) # some urls are used by a lot of organizations, we only need the distinct explanation, and # the organizations will be matched later. # https://stackoverflow.com/questions/30752268/how-to-filter-objects-for-count-annotation-in-django # we're currently not taking into account what endpoint, URL's and organizations are alive at a certain point. # in that sense we're not taking history in account... Only what is relevant _NOW_ # This should change, adding some nice complex Q conditions for checking a certain date. # something like: & ( Q(url__organization__is_dead=False, created_on__lte=some_date) # | Q(is_dead=True, created_on__lte=some_date, is_dead_since__gte=some_date) # # & ( Q(is_dead=False, created_on__lte=some_date) # | Q(is_dead=True, created_on__lte=some_date, is_dead_since__gte=some_date) ugss = ( UrlGenericScan.objects.all() .filter( comply_or_explain_is_explained=True, is_the_latest_scan=True, ) .annotate( n_urls=Count( "url", filter=Q( url__organization__is_dead=False, url__organization__country=country, url__organization__type_id=organization_type, url__is_dead=False, url__not_resolvable=False, ), ) ) .filter(n_urls=1) .order_by("-comply_or_explain_explained_on")[0:limit] ) egss = ( EndpointGenericScan.objects.all() .filter( comply_or_explain_is_explained=True, is_the_latest_scan=True, ) .annotate( n_urls=Count( "endpoint", filter=Q( endpoint__is_dead=False, endpoint__url__not_resolvable=False, endpoint__url__is_dead=False, endpoint__url__organization__is_dead=False, endpoint__url__organization__country=country, endpoint__url__organization__type_id=organization_type, ), ) ) .filter(n_urls__gte=1) .order_by("-comply_or_explain_explained_on")[0:limit] ) explains = [] for scan in ugss: explains.append(get_explanation("url", scan)) for scan in egss: explains.append(get_explanation("endpoint", scan)) # sorting explains = sorted(explains, key=lambda k: (k["explained_on"]), reverse=True) return explains