Пример #1
0
def test_parse_eli_geometry(in_geojson: Dict[str, Any], expected_wkt: str):
    """Test conversion of "ELI" geometries to Geojson compatible geometries"""
    assert parse_eli_geometry(in_geojson).wkt == expected_wkt  # type: ignore
Пример #2
0
def check_wms(source, info_msgs, warning_msgs, error_msgs):
    """
    Check WMS source

    Parameters
    ----------
    source : dict
        Source dictionary
    info_msgs : list
        Good messages
    warning_msgs: list
        Warning messages
    error_msgs: list:
        Error Messages
    """

    wms_url = source["properties"]["url"]
    source_headers = get_http_headers(source)

    params = ["{proj}", "{bbox}", "{width}", "{height}"]
    missingparams = [p for p in params if p not in wms_url]
    if len(missingparams) > 0:
        error_msgs.append(
            f"The following values are missing in the URL: {','.join(missingparams)}"
        )

    wms_args = {}
    u = urlparse(wms_url)
    url_parts = list(u)
    for k, v in parse_qsl(u.query, keep_blank_values=True):
        wms_args[k.lower()] = v

    def validate_wms_getmap_url():
        """
        Layers and styles can contain whitespaces. Ignore them here. They are checked against GetCapabilities later.
        """
        url_parts_without_layers = "&".join([
            f"{key}={value}" for key, value in wms_args.items()
            if key not in {"layers", "styles"}
        ])
        parts = url_parts.copy()
        parts[4] = url_parts_without_layers
        url = urlunparse(parts).replace("{", "").replace("}", "")
        return validators.url(url)

    if not validate_wms_getmap_url():
        error_msgs.append(f"URL validation error: {wms_url}")

    # Check mandatory WMS GetMap parameters (Table 8, Section 7.3.2, WMS 1.3.0 specification)
    missing_request_parameters = set()
    is_esri = "request" not in wms_args
    if is_esri:
        required_parameters = [
            "f", "bbox", "size", "imageSR", "bboxSR", "format"
        ]
    else:
        required_parameters = [
            "version",
            "request",
            "layers",
            "bbox",
            "width",
            "height",
            "format",
        ]
    for request_parameter in required_parameters:
        if request_parameter.lower() not in wms_args:
            missing_request_parameters.add(request_parameter)

    # Nothing more to do for esri rest api
    if is_esri:
        return

    if "version" in wms_args and wms_args["version"] == "1.3.0":
        if "crs" not in wms_args:
            missing_request_parameters.add("crs")
        if "srs" in wms_args:
            error_msgs.append(
                f"WMS {wms_args['version']} urls should not contain SRS parameter."
            )
    elif "version" in wms_args and not wms_args["version"] == "1.3.0":
        if "srs" not in wms_args:
            missing_request_parameters.add("srs")
        if "crs" in wms_args:
            error_msgs.append(
                f"WMS {wms_args['version']} urls should not contain CRS parameter."
            )
    if len(missing_request_parameters) > 0:
        missing_request_parameters_str = ",".join(missing_request_parameters)
        error_msgs.append(
            f"Parameter '{missing_request_parameters_str}' is missing in url.")
        return
    # Styles is mandatory according to the WMS specification, but some WMS servers seems not to care
    if "styles" not in wms_args:
        warning_msgs.append(
            "Parameter 'styles' is missing in url. 'STYLES=' can be used to request default style."
        )

    # We first send a service=WMS&request=GetCapabilities request to server
    # According to the WMS Specification Section 6.2 Version numbering and negotiation, the server should return
    # the GetCapabilities XML with the highest version the server supports.
    # If this fails, it is tried to explicitly specify a WMS version
    exceptions = []
    wms = None
    for wmsversion in [None, "1.3.0", "1.1.1", "1.1.0", "1.0.0"]:
        if wmsversion is None:
            wmsversion_str = "-"
        else:
            wmsversion_str = wmsversion

        try:
            wms_getcapabilites_url = wmshelper.get_getcapabilities_url(
                wms_url, wmsversion)
            r = requests.get(wms_getcapabilites_url, headers=source_headers)
            xml = r.text
            wms = wmshelper.parse_wms(xml)
            if wms is not None:
                break
        except Exception as e:
            exceptions.append(f"WMS {wmsversion_str}: Error: {e}")
            continue

    if wms is None:
        for msg in exceptions:
            error_msgs.append(msg)
        return

    for access_constraint in wms["AccessConstraints"]:
        info_msgs.append(f"AccessConstraints: {access_constraint}")
    for fee in wms["Fees"]:
        info_msgs.append(f"Fee: {fee}")

    if source["geometry"] is None:
        geom = None
    else:
        geom = eliutils.parse_eli_geometry(source["geometry"])

    # Check layers
    if "layers" in wms_args:
        layer_arg = wms_args["layers"]
        layers = layer_arg.split(",")
        not_found_layers = []
        for layer_name in layer_arg.split(","):
            if layer_name not in wms["layers"]:
                not_found_layers.append(layer_name)
        if len(not_found_layers) > 0:
            error_msgs.append(
                f"Layers '{','.join(not_found_layers)}' not advertised by WMS GetCapabilities "
                "request.")

        # Check source geometry against layer bounding box
        # Regardless of its projection, each layer should advertise an approximated bounding box in lon/lat.
        # See WMS 1.3.0 Specification Section 7.2.4.6.6 EX_GeographicBoundingBox
        if geom is not None and geom.is_valid:
            max_outside = 0.0
            for layer_name in layers:
                if layer_name in wms["layers"]:
                    bbox = wms["layers"][layer_name]["BBOX"]
                    geom_bbox = box(*bbox)
                    geom_outside_bbox = geom.difference(geom_bbox)
                    area_outside_bbox = geom_outside_bbox.area / geom.area * 100.0
                    max_outside = max(max_outside, area_outside_bbox)
            # 5% is an arbitrary chosen value and should be adapted as needed
            if max_outside > 5.0:
                error_msgs.append(
                    f"{round(area_outside_bbox, 2)}% of geometry is outside of the layers bounding box. "
                    "Geometry should be checked")

        # Check styles
        if "styles" in wms_args:
            style = wms_args["styles"]
            # default style needs not to be advertised by the server
            if not (style == "default" or style == ""
                    or style == "," * len(layers)):
                styles = wms_args["styles"].split(",")
                if not len(styles) == len(layers):
                    error_msgs.append(
                        "Not the same number of styles and layers.")
                else:
                    for layer_name, style in zip(layers, styles):
                        if (len(style) > 0 and not style == "default"
                                and layer_name in wms["layers"] and style
                                not in wms["layers"][layer_name]["Styles"]):
                            error_msgs.append(
                                f"Layer '{layer_name}' does not support style '{style}'"
                            )

        # Check CRS
        crs_should_included_if_available = {"EPSG:4326", "EPSG:3857", "CRS:84"}
        if "available_projections" not in source["properties"]:
            error_msgs.append(
                "source is missing 'available_projections' element.")
        else:
            for layer_name in layers:
                if layer_name in wms["layers"]:
                    not_supported_crs = set()
                    for crs in source["properties"]["available_projections"]:
                        if crs.upper() not in wms["layers"][layer_name]["CRS"]:
                            not_supported_crs.add(crs)

                    if len(not_supported_crs) > 0:
                        supported_crs_str = ",".join(
                            wms["layers"][layer_name]["CRS"])
                        not_supported_crs_str = ",".join(not_supported_crs)
                        warning_msgs.append(
                            f"Layer '{layer_name}': CRS '{not_supported_crs_str}' not in: {supported_crs_str}. Some server support CRS which are not advertised."
                        )

                    supported_but_not_included = set()
                    for crs in crs_should_included_if_available:
                        if (crs not in source["properties"]
                            ["available_projections"]
                                and crs in wms["layers"][layer_name]["CRS"]):
                            supported_but_not_included.add(crs)

                    if len(supported_but_not_included) > 0:
                        supported_but_not_included_str = ",".join(
                            supported_but_not_included)
                        warning_msgs.append(
                            f"Layer '{layer_name}': CRS '{supported_but_not_included_str}' not included in available_projections but "
                            "supported by server.")

    if wms_args["version"] < wms["version"]:
        warning_msgs.append(
            f"Query requests WMS version '{wms_args['version']}', server supports '{wms['version']}'"
        )

    # Check formats
    imagery_format = wms_args["format"]
    imagery_formats_str = "', '".join(wms["formats"])
    if imagery_format not in wms["formats"]:
        error_msgs.append(
            f"Format '{imagery_format}' not in '{imagery_formats_str}'.")

    if ("category" in source["properties"]
            and "photo" in source["properties"]["category"]):
        if "jpeg" not in imagery_format and "jpeg" in imagery_formats_str:
            warning_msgs.append(
                f"Server supports JPEG, but '{imagery_format}' is used. "
                "JPEG is typically preferred for photo sources, but might not be always "
                "the best choice. "
                f"(Server supports: '{imagery_formats_str}')")
Пример #3
0
async def process_source(filename, session: ClientSession):
    try:
        async with aiofiles.open(filename, mode="r", encoding="utf-8") as f:
            contents = await f.read()
            source = json.loads(contents)

        # Exclude sources
        # Skip non wms layers
        if not source["properties"]["type"] == "wms":
            return
        # check if it is esri rest and not wms
        if "bboxSR" in source["properties"]["url"]:
            return
        if "available_projections" not in source["properties"]:
            return
        if "header" in source["properties"]["url"]:
            return
        if "geometry" not in source:
            return
        processed_sources.add(filename)

        category = source["properties"].get("category", None)

        if source["geometry"] is None:
            geom = box(-180, -90, 180, 90)
            pt = Point(7.44, 46.56)
        else:
            geom = eliutils.parse_eli_geometry(source["geometry"])
            pt = geom.representative_point()

        test_zoom_level = ZOOM_LEVEL
        if "min_zoom" in source["properties"]:
            test_zoom_level = max(source["properties"]["min_zoom"],
                                  test_zoom_level)
        if "max_zoom" in source["properties"]:
            test_zoom_level = min(source["properties"]["max_zoom"],
                                  test_zoom_level)

        old_url = source["properties"]["url"]
        old_projections = source["properties"]["available_projections"]

        # Get existing image hash
        original_img_messages = []
        status, image_hash = await get_image(
            url=old_url,
            available_projections=old_projections,
            lon=pt.x,
            lat=pt.y,
            zoom=test_zoom_level,
            session=session,
            messages=original_img_messages,
        )
        if not status == ImageHashStatus.SUCCESS or image_hash is None:
            ignored_sources[
                filename] = "Not possible to download reference image"
            # We are finished if it was not possible to get the image
            return

        if max_count(str(image_hash)) == 16:

            if ("category" in source["properties"]
                    and "photo" in source["properties"]["category"]):
                msgs = "\n\t".join(original_img_messages)
                logging.warning(
                    f"{filename}: has category {category} but image hash is {image_hash}:\n\t{msgs}"
                )

            # These image hashes indicate that the downloaded image is not useful to determine
            # if the updated query returns the same image
            error_msgs = "\n\t".join(original_img_messages)
            logging.warning(
                f"{filename}: Image hash {image_hash} not useful ({category}): \n\t{error_msgs}"
            )
            ignored_sources[
                filename] = f"Image hash {image_hash} not useful ({category})"
            return

        # Update wms
        wms_messages = []
        result = await update_wms(old_url, session, wms_messages)
        if result is None:
            error_msgs = "\n\t".join(wms_messages)
            logging.info(
                f"{filename}: Not possible to update wms url:\n\t{error_msgs}")
            ignored_sources[filename] = "Not possible to update wms url"
            return
        new_url = result["url"]
        new_projections = result["available_projections"]
        del result

        # Download image for updated url
        new_img_messages = []
        new_status, new_image_hash = await get_image(
            url=new_url,
            available_projections=new_projections,
            lon=pt.x,
            lat=pt.y,
            zoom=test_zoom_level,
            session=session,
            messages=new_img_messages,
        )

        if not new_status == ImageHashStatus.SUCCESS or new_image_hash is None:
            error_msgs = "\n\t".join(new_img_messages)
            logging.warning(
                f"{filename}: Could not download image with updated url: {new_status}\n\t{error_msgs}"
            )
            ignored_sources[
                filename] = "Could not download image with updated url"
            return

        # Only sources are updated where the new query returns the same image
        if not image_similar(image_hash, new_image_hash, test_zoom_level):
            error_original_img_messages = "\n\t".join(original_img_messages)
            error_new_img_messages = "\n\t".join(new_img_messages)
            logging.info(
                f"{filename}: ImageHash not the same for: {filename}: {image_hash} - {new_image_hash}: {image_hash - new_image_hash}\n\t{error_original_img_messages} \n\t{error_new_img_messages}"
            )
            ignored_sources[
                filename] = f"ImageHash for reference image and image with updated url differs: {image_hash} - {new_image_hash}: {image_hash - new_image_hash}"
            return

        # Test if selected projections work despite not being advertised
        for EPSG in {"EPSG:3857", "EPSG:4326"}:
            if EPSG not in new_projections:
                epsg_check_messages = []
                epsg_image_status, epsg_image_hash = await get_image(
                    url=new_url,
                    available_projections=[EPSG],
                    lon=pt.x,
                    lat=pt.y,
                    zoom=test_zoom_level,
                    session=session,
                    messages=epsg_check_messages,
                )

                epsg_check_messages_str = "\n\t".join(epsg_check_messages)
                logging.info(
                    f"{filename}: Test if projection {EPSG} works despite not advertised:\n\t{epsg_check_messages_str}"
                )

                if epsg_image_status == ImageHashStatus.NETWORK_ERROR:
                    if EPSG in old_projections and EPSG not in new_projections:
                        new_projections.add(EPSG)
                        added_projections[filename][
                            "Network error, but projection was previously included."].append(
                                EPSG)

                elif epsg_image_status == ImageHashStatus.SUCCESS:

                    # Relax similarity constraint to account for differences due to reprojection
                    hash_diff = image_hash - epsg_image_hash
                    if image_similar(image_hash, epsg_image_hash,
                                     test_zoom_level):
                        new_projections.add(EPSG)
                        added_projections[filename][
                            "Projection returns similar image despite not advertised."].append(
                                EPSG)
                        logging.info(
                            f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {image_hash}: {hash_diff}"
                        )
                    elif epsg_image_hash is not None:
                        logging.info(
                            f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {image_hash}: {hash_diff}"
                        )
                    else:
                        logging.info(
                            f"{filename}: Do not add {EPSG} No image returned."
                        )

        # Servers might support projections that are not used in the area covered by a source
        # Keep only EPSG codes that are used in the area covered by the sources geometry
        if source["geometry"] is not None:
            epsg_outside_area_of_use = set()
            for epsg in new_projections:
                try:
                    if epsg == "CRS:84":
                        continue
                    crs = CRS.from_string(epsg)
                    area_of_use = crs.area_of_use
                    crs_box = box(
                        area_of_use.west,
                        area_of_use.south,
                        area_of_use.east,
                        area_of_use.north,
                    )
                    if not crs_box.intersects(geom):
                        epsg_outside_area_of_use.add(epsg)
                except Exception as e:
                    logging.exception(
                        f"{filename}: Could not check area of use for projection {epsg}: {e}"
                    )
                    continue
            if len(new_projections) == len(epsg_outside_area_of_use):
                logging.error(
                    f"{filename}: epsg_outside_area_of_use filter removes all EPSG"
                )
            if len(epsg_outside_area_of_use) > 0:

                if len(epsg_outside_area_of_use) <= 10:
                    removed_projections[filename][
                        "EPSG outside area of use"].extend(
                            list(epsg_outside_area_of_use))
                else:
                    removed_projections[filename][
                        "EPSG outside area of use"].extend(
                            list(epsg_outside_area_of_use)[:10] + [
                                "...",
                                f"+ {len(epsg_outside_area_of_use)-10} more"
                            ])

            new_projections -= epsg_outside_area_of_use

        # Servers that report a lot of projection may be configured wrongly
        # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised
        if len(new_projections) > 15:
            filtered_projs = set()
            for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]:
                if proj in new_projections:
                    filtered_projs.add(proj)
            for proj in old_projections:
                if proj in new_projections:
                    filtered_projs.add(proj)
            new_projections = filtered_projs

        # Filter alias projections
        if "EPSG:3857" in new_projections:
            included_alias_projections = new_projections.intersection(
                wmshelper.epsg_3857_alias)
            if len(included_alias_projections) > 0:
                removed_projections[filename]["Alias projections"].extend(
                    list(included_alias_projections))
                new_projections -= included_alias_projections
        else:
            # if EPSG:3857 not present but alias, keep only alias with highest number to be consistent
            result_epsg_3857_alias = new_projections & wmshelper.epsg_3857_alias
            result_epsg_3857_alias_sorted = list(
                sorted(
                    result_epsg_3857_alias,
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                    reverse=True,
                ))
            if len(result_epsg_3857_alias_sorted) > 1:
                removed_projections[filename]["Alias projections"].extend(
                    list(result_epsg_3857_alias_sorted[1:]))
            new_projections -= set(result_epsg_3857_alias_sorted[1:])

        # Filter deprecated projections
        if len(new_projections - wmshelper.valid_epsgs) > 0:
            removed_projections[filename]["Deprecated projections"].extend(
                list(new_projections - wmshelper.valid_epsgs))
        new_projections.intersection_update(wmshelper.valid_epsgs)

        # Check if projections are supported by server
        not_supported_projections = set()
        image_hashes = {}
        for proj in new_projections:
            proj_messages = []
            proj_status, proj_image_hash = await get_image(
                url=new_url,
                available_projections=[proj],
                lon=pt.x,
                lat=pt.y,
                zoom=test_zoom_level,
                session=session,
                messages=proj_messages,
            )
            image_hashes[proj] = {
                "status": proj_status,
                "hash": proj_image_hash,
                "logs": proj_messages,
            }

            msgs = "\n\t".join(proj_messages)
            logging.info(
                f"{filename} Projection check: {proj}: {proj_status}:\n\t{msgs}"
            )

            if proj_status == ImageHashStatus.IMAGE_ERROR:
                not_supported_projections.add(proj)
                removed_projections[filename][
                    "Projection check: does not return an image"].append(proj)
            elif proj_status == ImageHashStatus.NETWORK_ERROR:
                # If not sucessfull status do not add if not previously addedd
                if proj not in old_projections:
                    removed_projections[filename][
                        "Projection check: network error and previously not included"].append(
                            proj)
                    not_supported_projections.add(proj)

        if len(not_supported_projections) > 0:
            removed = ",".join(not_supported_projections)
            logging.info(
                f"{filename}: remove projections that are advertised but do not return an image: {removed}"
            )
            new_projections -= not_supported_projections

        # Check if EPSG:3857 and EPSG:4326 are similar
        if ("EPSG:3857" in image_hashes and "EPSG:4326" in image_hashes and
                image_hashes["EPSG:3857"]["status"] == ImageHashStatus.SUCCESS
                and image_hashes["EPSG:4326"]["status"]
                == ImageHashStatus.SUCCESS):
            img_hash_3857 = image_hashes["EPSG:3857"]["hash"]
            img_hash_4326 = image_hashes["EPSG:4326"]["hash"]
            if not image_similar(img_hash_3857, img_hash_4326,
                                 test_zoom_level):
                msgs = "\n\t".join(image_hashes["EPSG:3857"]["logs"] +
                                   image_hashes["EPSG:4326"]["logs"])
                logging.warning(
                    f"{filename}: ({category}) ImageHash for EPSG:3857 and EPSG:4326 not similiar: {img_hash_3857} - {img_hash_4326}: {img_hash_3857-img_hash_4326}:\n\t{msgs}"
                )

        # Check if only formatting has changed
        url_has_changed = not compare_urls(source["properties"]["url"],
                                           new_url)
        projections_have_changed = not compare_projs(
            source["properties"]["available_projections"],
            new_projections,
        )

        if url_has_changed:
            source["properties"]["url"] = new_url
        if projections_have_changed:
            source["properties"]["available_projections"] = list(
                sorted(
                    new_projections,
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                ))

        if url_has_changed or projections_have_changed:
            with open(filename, "w", encoding="utf-8") as out:
                json.dump(source,
                          out,
                          indent=4,
                          sort_keys=False,
                          ensure_ascii=False)
                out.write("\n")
    except Exception as e:
        logging.exception(
            f"{filename}: Error occured while processing source: {e}")
Пример #4
0
async def process_source(filename: str, session: ClientSession):
    try:
        async with aiofiles.open(filename, mode="r", encoding="utf-8") as f:
            contents = await f.read()
            source = json.loads(contents)

        # Exclude sources
        # Skip non wms layers
        if not source["properties"]["type"] == "wms":
            return
        # check if it is esri rest and not wms
        if "bboxSR" in source["properties"]["url"]:
            return
        if "available_projections" not in source["properties"]:
            return
        if "header" in source["properties"]["url"]:
            return
        if "geometry" not in source:
            return
        processed_sources.add(filename)

        category = source["properties"].get("category", None)

        if source["geometry"] is None:
            geom: MultiPolygon | Polygon = box(-180, -90, 180, 90)
            pt: Point = Point(7.44, 46.56)
        else:
            geom = eliutils.parse_eli_geometry(source["geometry"])
            pt: Point = geom.representative_point()  # type: ignore

        test_zoom_level = ZOOM_LEVEL
        if "min_zoom" in source["properties"]:
            test_zoom_level = max(source["properties"]["min_zoom"],
                                  test_zoom_level)
        if "max_zoom" in source["properties"]:
            test_zoom_level = min(source["properties"]["max_zoom"],
                                  test_zoom_level)

        old_url = source["properties"]["url"]
        old_projections = source["properties"]["available_projections"]

        # Get existing image hash
        original_img_messages: List[str] = []
        original_image_result = await get_image(
            url=old_url,
            available_projections=old_projections,
            lon=pt.x,  # type: ignore
            lat=pt.y,  # type: ignore
            zoom=test_zoom_level,
            session=session,
            messages=original_img_messages,
        )
        if not original_image_result.status == ImageHashStatus.SUCCESS or original_image_result.image_hash is None:  # type: ignore
            ignored_sources[
                filename] = "Not possible to download reference image"
            # We are finished if it was not possible to get the image
            return

        if max_count(str(
                original_image_result.image_hash)) == 16:  # type: ignore
            if "category" in source["properties"] and "photo" in source[
                    "properties"]["category"]:
                msgs = "\n\t".join(original_img_messages)
                logging.warning(
                    f"{filename}: has category {category} but image hash is {original_image_result.image_hash}:\n\t{msgs}"
                )  # type: ignore

            # These image hashes indicate that the downloaded image is not useful to determine
            # if the updated query returns the same image
            error_msgs = "\n\t".join(original_img_messages)
            logging.warning(
                f"{filename}: Image hash {original_image_result.image_hash} not useful ({category}): \n\t{error_msgs}"
            )  # type: ignore
            ignored_sources[
                filename] = f"Image hash {original_image_result.image_hash} not useful ({category})"  # type: ignore
            return

        # Update wms
        wms_messages: List[str] = []
        result = await update_wms(old_url, session, wms_messages)
        if result is None:
            error_msgs = "\n\t".join(wms_messages)
            logging.info(
                f"{filename}: Not possible to update wms url:\n\t{error_msgs}")
            ignored_sources[filename] = "Not possible to update wms url"
            return

        new_url, new_projections = result
        del result

        # Servers that report a lot of projection may be configured wrongly
        # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised
        if len(new_projections) > 15:
            filtered_projs: Set[str] = set()
            for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]:
                if proj in new_projections:
                    filtered_projs.add(proj)
            for proj in old_projections:
                if proj in new_projections:
                    filtered_projs.add(proj)
            new_projections = filtered_projs

        # Download image for updated url
        new_img_messages: List[str] = []
        updated_image_result = await get_image(
            url=new_url,
            available_projections=new_projections,
            lon=pt.x,  # type: ignore
            lat=pt.y,  # type: ignore
            zoom=test_zoom_level,
            session=session,
            messages=new_img_messages,
        )

        if not updated_image_result.status == ImageHashStatus.SUCCESS or updated_image_result.image_hash is None:  # type: ignore
            error_msgs = "\n\t".join(new_img_messages)
            logging.warning(
                f"{filename}: Could not download image with updated url: {updated_image_result.status}\n\t{error_msgs}"
            )
            ignored_sources[
                filename] = "Could not download image with updated url"
            return

        # Only sources are updated where the new query returns the same image
        if not image_similar(original_image_result.image_hash,
                             updated_image_result.image_hash,
                             test_zoom_level):  # type: ignore

            original_hash = original_image_result.image_hash  # type: ignore
            new_hash = updated_image_result.image_hash  # type: ignore
            hash_diff = original_hash - new_hash  # type: ignore

            error_original_img_messages = "\n\t".join(original_img_messages)
            error_new_img_messages = "\n\t".join(new_img_messages)
            logging.info(
                f"{filename}: ImageHash not the same for: {filename}: {original_hash} - {new_hash}: {hash_diff}\n\t{error_original_img_messages} \n\t{error_new_img_messages}"
            )
            ignored_sources[
                filename] = f"ImageHash for reference image and image with updated url differs: {original_hash} - {new_hash}: {new_hash}"
            return

        # Test if selected projections work despite not being advertised
        for EPSG in {"EPSG:3857", "EPSG:4326"}:
            if EPSG not in new_projections:
                epsg_check_messages: List[str] = []
                epsg_image_result = await get_image(
                    url=new_url,
                    available_projections=[EPSG],
                    lon=pt.x,  # type: ignore
                    lat=pt.y,  # type: ignore
                    zoom=test_zoom_level,
                    session=session,
                    messages=epsg_check_messages,
                )

                epsg_check_messages_str = "\n\t".join(epsg_check_messages)
                logging.info(
                    f"{filename}: Test if projection {EPSG} works despite not advertised:\n\t{epsg_check_messages_str}"
                )

                if epsg_image_result.status == ImageHashStatus.NETWORK_ERROR:
                    if EPSG in old_projections and EPSG not in new_projections:
                        new_projections.add(EPSG)
                        added_projections[filename][
                            "Network error, but projection was previously included."].append(
                                EPSG)

                elif epsg_image_result.status == ImageHashStatus.SUCCESS:

                    epsg_image_hash = epsg_image_result.image_hash  # type: ignore
                    original_image_hash = original_image_result.image_hash  # type: ignore

                    # Relax similarity constraint to account for differences due to loss of quality due to re-projection
                    hash_diff = original_image_result.image_hash - epsg_image_result.image_hash  # type: ignore
                    if image_similar(original_image_result.image_hash,
                                     epsg_image_result.image_hash,
                                     test_zoom_level):  # type: ignore
                        new_projections.add(EPSG)
                        added_projections[filename][
                            "Projection returns similar image despite not advertised."].append(
                                EPSG)
                        logging.info(
                            f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {original_image_hash}: {hash_diff}"
                        )
                    elif epsg_image_hash is not None:
                        logging.info(
                            f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {original_image_hash}: {hash_diff}"
                        )
                    else:
                        logging.info(
                            f"{filename}: Do not add {EPSG} No image returned."
                        )

        # Check if projections are supported by server
        not_supported_projections: Set[str] = set()
        image_hashes: Dict[str, Tuple[ImageResult, List[str]]] = {}
        for proj in new_projections:
            proj_messages: List[str] = []
            epsg_image_result = await get_image(
                url=new_url,
                available_projections=[proj],
                lon=pt.x,  # type: ignore
                lat=pt.y,  # type: ignore
                zoom=test_zoom_level,
                session=session,
                messages=proj_messages,
            )

            image_hashes[proj] = (epsg_image_result, proj_messages)

            msgs = "\n\t".join(proj_messages)
            logging.info(
                f"{filename} Projection check: {proj}: {epsg_image_result.status}:\n\t{msgs}"
            )

            if epsg_image_result.status == ImageHashStatus.IMAGE_ERROR:
                not_supported_projections.add(proj)
                removed_projections[filename][
                    "Projection check: does not return an image"].append(proj)
            elif epsg_image_result.status == ImageHashStatus.NETWORK_ERROR:
                # If not successfully status do not add if not previously added
                if proj not in old_projections:
                    removed_projections[filename][
                        "Projection check: network error and previously not included"].append(
                            proj)
                    not_supported_projections.add(proj)

        if len(not_supported_projections) > 0:
            removed = ",".join(not_supported_projections)
            logging.info(
                f"{filename}: remove projections that are advertised but do not return an image: {removed}"
            )
            new_projections -= not_supported_projections

        # Check if EPSG:3857 and EPSG:4326 are similar
        if ("EPSG:3857" in image_hashes and "EPSG:4326" in image_hashes and
                image_hashes["EPSG:3857"][0].status == ImageHashStatus.SUCCESS
                and image_hashes["EPSG:4326"][0].status
                == ImageHashStatus.SUCCESS):
            img_hash_3857 = image_hashes["EPSG:3857"][
                0].image_hash  # type: ignore
            img_hash_4326 = image_hashes["EPSG:4326"][
                0].image_hash  # type: ignore
            diff_hash = img_hash_3857 - img_hash_4326  # type: ignore
            if not image_similar(img_hash_3857, img_hash_4326,
                                 test_zoom_level):
                msgs = "\n\t".join(image_hashes["EPSG:3857"][1] +
                                   image_hashes["EPSG:4326"][1])
                logging.warning(
                    f"{filename}: ({category}) ImageHash for EPSG:3857 and EPSG:4326 not similar: {img_hash_3857} - {img_hash_4326}: {diff_hash}:\n\t{msgs}"
                )

        # Check projections again to filter out EPSG:3857 alias
        new_projections = eliutils.clean_projections(new_projections)

        # Check if only formatting has changed
        url_has_changed = not compare_urls(source["properties"]["url"],
                                           new_url)
        projections_have_changed = not compare_projs(
            source["properties"]["available_projections"],
            new_projections,
        )

        if url_has_changed:
            source["properties"]["url"] = new_url
        if projections_have_changed:
            source["properties"]["available_projections"] = list(
                sorted(
                    new_projections,
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                ))

        if url_has_changed or projections_have_changed:
            with open(filename, "w", encoding="utf-8") as out:
                json.dump(source,
                          out,
                          indent=4,
                          sort_keys=False,
                          ensure_ascii=False)
                out.write("\n")
    except Exception as e:
        logging.exception(
            f"{filename}: Error occurred while processing source: {e}")
Пример #5
0
def check_tms(source: Dict[str, Any], messages: List[Message]) -> None:
    """Check TMS source

    Parameters
    ----------
    source : Dict[str, Any]
        The source
    messages : List[Message]
        The list to add messages to
    """

    try:
        url = source["properties"]["url"]
        source_headers = get_http_headers(source)

        if source["geometry"] is None:
            geom = None
        else:
            geom = eliutils.parse_eli_geometry(source["geometry"])

        # Validate URL
        try:
            _url = re.sub(r"switch:?([^}]*)", "switch",
                          url).replace("{", "").replace("}", "")
            validators.url(_url)  # type: ignore
        except validators.utils.ValidationFailure as e:
            messages.append(
                Message(level=MessageLevel.ERROR,
                        message=f"URL validation error {e} / {url}"))

        # Check URL parameter
        parameters = {}

        # {z} instead of {zoom}
        if "{z}" in source["properties"]["url"]:
            messages.append(
                Message(
                    level=MessageLevel.ERROR,
                    message=
                    f"Parameter {{z}} is used instead of {{zoom}} in tile url: {url}"
                ))
            return

        # We can't test sources that have an apikey, that is unknown to ELI
        if "{apikey}" in url:
            messages.append(
                Message(
                    level=MessageLevel.WARNING,
                    message=
                    f"Not possible to check URL, apikey is required: {url}"))
            return

        # If URL contains a {switch:a,b,c} parameters, use the first for tests
        match = re.search(r"switch:?([^}]*)", url)
        if match is not None:
            switches = match.group(1).split(",")
            url = url.replace(match.group(0), "switch")
            parameters["switch"] = switches[0]

        # Check zoom levels
        min_zoom = 0
        max_zoom = 22
        if "min_zoom" in source["properties"]:
            min_zoom = int(source["properties"]["min_zoom"])
        if "max_zoom" in source["properties"]:
            max_zoom = int(source["properties"]["max_zoom"])

        # Check if we find a TileMap Resource to check for zoom levels
        # While there is a typical location for metadata, there is no requirement
        # that the metadata need to be located there.
        tms_url = tmshelper.TMSURL(url=url)
        tilemap_resource_url = tms_url.get_tilemap_resource_url()

        if tilemap_resource_url is not None:
            for tilemap_url in [
                    tilemap_resource_url,
                    tilemap_resource_url + "/tilemapresource.xml",
            ]:
                try:
                    r, xml = get_text_encoded(tilemap_url.format(**parameters),
                                              headers=headers)
                    if r.status_code == 200 and xml is not None:
                        try:
                            tilemap_resource = tmshelper.TileMapResource(xml)
                        except Exception:
                            # Not all TMS server provide TileMap resources.
                            continue

                        if tilemap_resource.tile_map is None:
                            continue

                        # Check zoom levels against TileMapResource
                        tilemap_minzoom, tilemap_maxzoom = tilemap_resource.get_min_max_zoom_level(
                        )
                        if min_zoom == tilemap_minzoom:
                            messages.append(
                                Message(
                                    level=MessageLevel.WARNING,
                                    message=
                                    f"min_zoom level '{min_zoom}' not the same as specified in TileMap: '{tilemap_minzoom}': {tilemap_url}. "
                                    "Caution: this might be intentional as some server timeout for low zoom levels.",
                                ))
                        if not max_zoom == tilemap_maxzoom:
                            messages.append(
                                Message(
                                    level=MessageLevel.WARNING,
                                    message=
                                    f"max_zoom level '{max_zoom}' not the same as specified in TileMap: '{tilemap_maxzoom}': {tilemap_url}",
                                ))

                        # Check geometry within bbox
                        if geom is not None and tilemap_resource.tile_map.bbox84 is not None:
                            max_area_outside = max_area_outside_bbox(
                                geom, tilemap_resource.tile_map.bbox84)
                            # 5% is an arbitrary chosen value and should be adapted as needed
                            if max_area_outside > 5.0:
                                messages.append(
                                    Message(
                                        level=MessageLevel.ERROR,
                                        message=
                                        f"{round(max_area_outside, 2)}% of geometry is outside of the layers bounding box. Geometry should be checked",
                                    ))
                        break

                except Exception as e:
                    print(f"Error fetching TMS: {e}: {url}")
                    pass

        # Test zoom levels by accessing tiles for a point within the geometry
        if geom is not None:
            centroid: Point = geom.representative_point()  # type: ignore
        else:
            centroid = Point(6.1, 49.6)
        centroid_x: float = centroid.x  # type: ignore
        centroid_y: float = centroid.y  # type: ignore

        zoom_failures: List[Tuple[int, str, int, Optional[str]]] = []
        zoom_success: List[int] = []
        tested_zooms: Set[int] = set()

        def test_zoom(zoom: int) -> None:
            tested_zooms.add(zoom)
            tile: mercantile.Tile = mercantile.tile(centroid_x, centroid_y,
                                                    zoom)  # type: ignore

            tile_x: int = tile.x  # type: ignore
            tile_y: int = tile.y  # type: ignore

            query_url = url
            if "{-y}" in url:
                y = 2**zoom - 1 - tile_y
                query_url = query_url.replace("{-y}", str(y))
            elif "{!y}" in url:
                y = 2**(zoom - 1) - 1 - tile_y
                query_url = query_url.replace("{!y}", str(y))
            else:
                query_url = query_url.replace("{y}", str(tile_y))

            parameters["x"] = tile_x
            parameters["zoom"] = zoom
            query_url = query_url.format(**parameters)

            url_is_good, http_code, mime = test_image(query_url,
                                                      source_headers)
            if url_is_good:
                zoom_success.append(zoom)
            else:
                zoom_failures.append((zoom, query_url, http_code, mime))

        # Test zoom levels
        for zoom in range(min_zoom, max_zoom + 1):
            test_zoom(zoom)

        tested_str = ",".join(list(map(str, sorted(tested_zooms))))
        sorted_failures = sorted(zoom_failures, key=lambda x: x[0])

        if len(zoom_failures) == 0 and len(zoom_success) > 0:
            messages.append(
                Message(
                    level=MessageLevel.INFO,
                    message=f"Zoom levels reachable. (Tested: {tested_str})"))
        elif len(zoom_failures) > 0 and len(zoom_success) > 0:

            not_found_str = ",".join(
                list(map(str, [level for level, _, _, _ in sorted_failures])))
            messages.append(
                Message(
                    level=MessageLevel.WARNING,
                    message=
                    f"Zoom level {not_found_str} not reachable. (Tested: {tested_str}) Tiles might not be present at tested location: {centroid_x},{centroid_y}",
                ))

            for level, url, http_code, mime_type in sorted_failures:
                messages.append(
                    Message(
                        level=MessageLevel.WARNING,
                        message=
                        f"URL for zoom level {level} returned HTTP Code {http_code}: {url} MIME type: {mime_type}",
                    ))
        else:
            messages.append(
                Message(
                    level=MessageLevel.ERROR,
                    message=
                    f"No zoom level reachable. (Tested: {tested_str}) Tiles might not be present at tested location: {centroid_x},{centroid_y}",
                ))
            for level, url, http_code, mime_type in sorted_failures:
                messages.append(
                    Message(
                        level=MessageLevel.WARNING,
                        message=
                        f"URL for zoom level {level} returned HTTP Code {http_code}: {url} MIME type: {mime_type}",
                    ))

    except Exception as e:
        messages.append(
            Message(
                level=MessageLevel.ERROR,
                message=f"Failed testing TMS source: Exception: {e}",
            ))
Пример #6
0
def check_wms(source: Dict[str, Any], messages: List[Message]) -> None:
    """Check WMS source

    Parameters
    ----------
    source : Dict[str, Any]
        The source
    messages : List[Message]
        The list to add messages to
    """

    url = source["properties"]["url"]
    wms_url = wmshelper.WMSURL(url)
    source_headers = get_http_headers(source)

    params = ["{proj}", "{bbox}", "{width}", "{height}"]
    missingparams = [p for p in params if p not in url]
    if len(missingparams) > 0:
        messages.append(
            Message(
                level=MessageLevel.ERROR,
                message=
                f"The following values are missing in the URL: {','.join(missingparams)}",
            ))

    try:
        wms_url.is_valid_getmap_url()
    except validators.utils.ValidationFailure as e:
        messages.append(
            Message(level=MessageLevel.ERROR,
                    message=f"URL validation error {e} for {url}"))

    # Check mandatory WMS GetMap parameters (Table 8, Section 7.3.2, WMS 1.3.0 specification)
    # Normalize parameter names to lower case
    wms_args = {key.lower(): value for key, value in wms_url.get_parameters()}

    # Check if it is actually a ESRI Rest url and not a WMS url
    is_esri = "request" not in wms_args

    # Check if required parameters are missing
    missing_request_parameters: Set[str] = set()
    if is_esri:
        required_parameters = [
            "f", "bbox", "size", "imageSR", "bboxSR", "format"
        ]
    else:
        required_parameters = [
            "version",
            "request",
            "layers",
            "bbox",
            "width",
            "height",
            "format",
        ]
    for request_parameter in required_parameters:
        if request_parameter.lower() not in wms_args:
            missing_request_parameters.add(request_parameter)

    if not is_esri:
        if "version" in wms_args and wms_args["version"] == "1.3.0":
            if "crs" not in wms_args:
                missing_request_parameters.add("crs")
            if "srs" in wms_args:
                messages.append(
                    Message(
                        level=MessageLevel.ERROR,
                        message=
                        f"WMS {wms_args['version']} URLs should not contain SRS parameter: {url}",
                    ))
        elif "version" in wms_args and not wms_args["version"] == "1.3.0":
            if "srs" not in wms_args:
                missing_request_parameters.add("srs")
            if "crs" in wms_args:
                messages.append(
                    Message(
                        level=MessageLevel.ERROR,
                        message=
                        f"WMS {wms_args['version']} URLs should not contain CRS parameter: {url}",
                    ))
    if len(missing_request_parameters) > 0:
        missing_request_parameters_str = ",".join(missing_request_parameters)
        messages.append(
            Message(
                level=MessageLevel.ERROR,
                message=
                f"Parameter '{missing_request_parameters_str}' is missing in URL: {url}.",
            ))
        return

    # Nothing more to do for ESRI Rest API
    if is_esri:
        return

    # Styles is mandatory according to the WMS specification, but some WMS servers seems not to care
    if "styles" not in wms_args:
        messages.append(
            Message(
                level=MessageLevel.WARNING,
                message=
                f"Parameter 'styles' is missing in url. 'STYLES=' can be used to request default style.: {url}",
            ))

    # We first send a service=WMS&request=GetCapabilities request to server
    # According to the WMS Specification Section 6.2 Version numbering and negotiation, the server should return
    # the GetCapabilities XML with the highest version the server supports.
    # If this fails, it is tried to explicitly specify a WMS version
    exceptions: List[str] = []
    wms = None
    for wms_version in [None, "1.3.0", "1.1.1", "1.1.0", "1.0.0"]:
        if wms_version is None:
            wms_version_str = "-"
        else:
            wms_version_str = wms_version

        wms_getcapabilities_url = None
        try:
            wms_getcapabilities_url = wms_url.get_capabilities_url(
                wms_version=wms_version)
            _, xml = get_text_encoded(wms_getcapabilities_url,
                                      headers=source_headers)
            if xml is not None:
                wms = wmshelper.WMSCapabilities(xml)
            break
        except Exception as e:
            exceptions.append(
                f"WMS {wms_version_str}: Error: {e} {wms_getcapabilities_url}")
            continue

    # Check if it was possible to parse the WMS GetCapability response
    # If not, there is nothing left to check
    if wms is None:
        for msg in exceptions:
            messages.append(Message(
                level=MessageLevel.ERROR,
                message=msg,
            ))
        return

    # Log access constraints and fees metadata
    for access_constraint in wms.access_constraints:
        messages.append(
            Message(
                level=MessageLevel.INFO,
                message=f"AccessConstraints: {access_constraint}",
            ))
    for fee in wms.fees:
        messages.append(
            Message(
                level=MessageLevel.INFO,
                message=f"Fee: {fee}",
            ))

    if source["geometry"] is None:
        geom = None
    else:
        geom = eliutils.parse_eli_geometry(source["geometry"])

    # Check layers
    if "layers" in wms_args:

        layers = wms_args["layers"].split(",")

        # Check if layers in WMS GetMap URL are advertised by WMS server.
        not_found_layers = [
            layer_name for layer_name in layers if layer_name not in wms.layers
        ]
        if len(not_found_layers) > 0:
            messages.append(
                Message(
                    level=MessageLevel.ERROR,
                    message=
                    f"Layers '{','.join(not_found_layers)}' not advertised by WMS GetCapabilities request (Some server do not advertise layers, but they are very rare).: {url}",
                ))

        # Check source geometry against layer bounding box
        # Regardless of its projection, each layer should advertise an approximated bounding box in lon/lat.
        # See WMS 1.3.0 Specification Section 7.2.4.6.6 EX_GeographicBoundingBox
        if geom is not None and geom.is_valid:  # type: ignore

            bboxs = [
                wms.layers[layer_name].bbox for layer_name in layers
                if layer_name in wms.layers and wms.layers[layer_name].bbox
            ]
            bboxs = [bbox for bbox in bboxs if bbox is not None]
            max_area_outside = max_area_outside_bbox(geom, bboxs)

            # 5% is an arbitrary chosen value and should be adapted as needed
            if max_area_outside > 5.0:
                messages.append(
                    Message(
                        level=MessageLevel.ERROR,
                        message=
                        f"{round(max_area_outside, 2)}% of geometry is outside of the layers bounding box. Geometry should be checked",
                    ))

        # Check styles
        if "styles" in wms_args:

            style_parameter = wms_args["styles"]

            # default style needs not to be advertised by the server
            if not (style_parameter == "default" or style_parameter == ""
                    or style_parameter == "," * len(layers)):
                styles = style_parameter.split(",")
                if not len(styles) == len(layers):
                    messages.append(
                        Message(
                            level=MessageLevel.ERROR,
                            message=
                            f"Not the same number of styles and layers. {len(styles)} vs {len(layers)}",
                        ))
                else:
                    for layer_name, style_name in zip(layers, styles):
                        if (len(style_name) > 0 and not style_name == "default"
                                and layer_name in wms.layers and style_name
                                not in wms.layers[layer_name].styles):
                            messages.append(
                                Message(
                                    level=MessageLevel.ERROR,
                                    message=
                                    f"Layer '{layer_name}' does not support style '{style_name}'",
                                ))

        # Check CRS
        if "available_projections" not in source["properties"]:
            messages.append(
                Message(
                    level=MessageLevel.ERROR,
                    message=
                    f"Sources of type wms must include the 'available_projections' element.",
                ))
        else:

            # A WMS server can include many CRS. Some of them are frequently used by editors. We require them to be included if they are supported by the WMS server.
            crs_should_included_if_available = {
                "EPSG:4326", "EPSG:3857", "CRS:84"
            }

            for layer_name in layers:
                if layer_name in wms.layers:

                    # Check for CRS in available_projections that are not advertised by the WMS server
                    not_supported_crs: Set[str] = set()
                    available_projections: List[str] = source["properties"][
                        "available_projections"]

                    for crs in available_projections:
                        if crs.upper() not in wms.layers[layer_name].crs:
                            not_supported_crs.add(crs)

                    if len(not_supported_crs) > 0:
                        supported_crs_str = ",".join(
                            wms.layers[layer_name].crs)
                        not_supported_crs_str = ",".join(not_supported_crs)
                        messages.append(
                            Message(
                                level=MessageLevel.WARNING,
                                message=
                                f"Layer '{layer_name}': CRS '{not_supported_crs_str}' not in: {supported_crs_str}. Some server support CRS which are not advertised.",
                            ))

                    # Check for CRS supported by the WMS server but not in available_projections
                    supported_but_not_included: Set[str] = set()
                    for crs in crs_should_included_if_available:
                        if crs not in available_projections and crs in wms.layers[
                                layer_name].crs:
                            supported_but_not_included.add(crs)

                    if len(supported_but_not_included) > 0:
                        supported_but_not_included_str = ",".join(
                            supported_but_not_included)
                        messages.append(
                            Message(
                                level=MessageLevel.WARNING,
                                message=
                                f"Layer '{layer_name}': CRS '{supported_but_not_included_str}' not included in available_projections but supported by server.",
                            ))

    # Check if server supports a newer WMS version as in url
    if wms_args["version"] < wms.version:
        messages.append(
            Message(
                level=MessageLevel.WARNING,
                message=
                f"Query requests WMS version '{wms_args['version']}', server supports '{wms.version}'",
            ))

    # Check image formats
    request_imagery_format = wms_args["format"]
    wms_advertised_formats_str = "', '".join(wms.formats)
    if request_imagery_format not in wms.formats:
        messages.append(
            Message(
                level=MessageLevel.ERROR,
                message=
                f"Format '{request_imagery_format}' not in '{wms_advertised_formats_str}': {url}.",
            ))

    # For photo sources it is recommended to use jpeg format, if it is available
    if "category" in source["properties"] and "photo" in source["properties"][
            "category"]:
        if "jpeg" not in request_imagery_format and "jpeg" in wms.formats:
            messages.append(
                Message(
                    level=MessageLevel.WARNING,
                    message=
                    f"Server supports JPEG, but '{request_imagery_format}' is used. "
                    f"JPEG is typically preferred for photo sources, but might not be always "
                    f"the best choice. (Server supports: '{wms_advertised_formats_str}')",
                ))