Пример #1
0
def validate_internal_dframe(obs_df: pd.DataFrame) -> bool:
    """Validate the internal dataframe format for observations.

    Will log warnings and/or errors if anything found.

    Args:
        obs_df: Dataframe to validate

    Returns:
        True if everything is ok (or empty)
    """
    failed = False
    if obs_df.empty:
        logger.warning("Observation dataframe empty")
        return True
    if "CLASS" not in obs_df:
        logger.error("CLASS is not in dataframe - not valid")
        failed = True
    if "LABEL" not in obs_df:
        logger.error("LABEL is not in dataframe - not valid")
        failed = True
    non_supported_classes = set(obs_df["CLASS"]) - set(CLASS_SHORTNAME.keys())
    if non_supported_classes:
        logger.error("Unsupported observation classes: %s",
                     str(non_supported_classes))
        failed = True

    index = {"CLASS", "LABEL", "OBS",
             "SEGMENT"}.intersection(set(obs_df.columns))
    repeated_rows = obs_df[obs_df.set_index(
        list(index)).index.duplicated(keep=False)]
    if not repeated_rows.empty:
        logger.error("Non-unique observation classes and labels")
        logger.error("\n%s",
                     str(repeated_rows.dropna(axis="columns", how="all")))
        failed = True

    # Possibilities for further validation:
    #  * Check that segment has start and end if not default.
    #  * SUMMARY_OBSERVATION requires four arguments (also for resinsight?)
    #  * BLOCK_OBSERVATIONk requires two global, and j, k, value, error for
    #    each subunit.
    #  * block requires label
    #  * general requires data, restart, obs_file. index_list, index_file,
    #  * error_covariance is optional.

    logger.info("Observation dataframe validated")
    return not failed
Пример #2
0
def autoparse_file(filename):
    """Detects the observation file format for a given filename. This
    is done by attempting to parse its content and giving up on
    exceptions.

    NB: In case of ERT file formats, the include statements are
    interpreted relative to current working directory. Thus it
    is recommended to reparse with correct cwd after detecting ERT file
    format. The correct cwd for include-statement is the path of the
    ERT config file, which is outside the context of fmuobs.

    Args:
        filename (str)

    Returns:
        tuple: First element is a string in [resinsight, csv, yaml, ert], second
        element is a dataframe or a dict (if input was yaml).
    """
    try:
        dframe = pd.read_csv(filename, sep=";")
        if {"DATE", "VECTOR", "VALUE", "ERROR"}.issubset(
            set(dframe.columns)
        ) and not dframe.empty:
            logger.info("Parsed %s as a ResInsight observation file", filename)
            return ("resinsight", resinsight_df2df(dframe))
    except ValueError:
        pass

    try:
        dframe = pd.read_csv(filename, sep=",")
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            logger.info(
                "Parsed %s as a CSV (internal dataframe format for ertobs) file",
                filename,
            )
            if "DATE" in dframe:
                dframe["DATE"] = pd.to_datetime(dframe["DATE"])
            return ("csv", dframe)
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            obsdict = yaml.safe_load(f_handle.read())
        if isinstance(obsdict, dict):
            if obsdict.get("smry", None) or obsdict.get("rft", None):
                logger.info("Parsed %s as a YAML file with observations", filename)
                return ("yaml", obsdict2df(obsdict))
    except yaml.scanner.ScannerError as exception:
        # This occurs if there are tabs in the file, which is not
        # allowed in a YAML file (but it can be present in ERT observation files)
        logger.debug("ScannerError while attempting yaml-parsing")
        logger.debug(str(exception))
    except ValueError:
        pass

    try:
        with open(filename) as f_handle:
            # This function does not have information on include file paths.
            # Accept a FileNotFoundError while parsing, if we encounter that
            # it is most likely an ert file, but which needs additional hints
            # on where include files are located.
            try:
                dframe = ertobs2df(f_handle.read())
            except FileNotFoundError:
                logger.info(
                    "Parsed %s as an ERT observation file, with include statements",
                    filename,
                )
                return ("ert", pd.DataFrame())
        if {"CLASS", "LABEL"}.issubset(dframe.columns) and not dframe.empty:
            if set(dframe["CLASS"]).intersection(set(CLASS_SHORTNAME.keys())):
                logger.info("Parsed %s as an ERT observation file", filename)
                return ("ert", dframe)
    except ValueError:
        pass

    logger.error(
        "Unable to parse %s as any supported observation file format", filename
    )
    return (None, pd.DataFrame)