def test_create_from_aggregated_csv_file_smry_csv(testdata_folder: Path,
                                                  tmp_path: Path) -> None:
    factory = EnsembleTableProviderFactory(tmp_path,
                                           backing_type=BACKING_TYPE_TO_TEST,
                                           allow_storage_writes=True)
    providerset = factory.create_provider_set_from_aggregated_csv_file(
        testdata_folder / "reek_test_data" / "aggregated_data" / "smry.csv")

    assert providerset.ensemble_names() == ["iter-0"]
    provider = providerset.ensemble_provider("iter-0")

    assert len(provider.column_names()) == 17
    assert provider.column_names()[0] == "DATE"
    assert provider.column_names()[16] == "YEARS"

    assert len(provider.realizations()) == 40

    valdf = provider.get_column_data(["YEARS"])
    assert len(valdf.columns) == 2
    assert valdf.columns[0] == "REAL"
    assert valdf.columns[1] == "YEARS"
    assert valdf["REAL"].nunique() == 40

    valdf = provider.get_column_data(["YEARS"], [0, 39, 10])
    assert len(valdf.columns) == 2
    assert valdf.columns[0] == "REAL"
    assert valdf.columns[1] == "YEARS"
    assert valdf["REAL"].nunique() == 3
def test_create_from_per_realization_csv_file(testdata_folder: Path,
                                              tmp_path: Path) -> None:

    ensembles: Dict[str, str] = {
        "iter-0": str(testdata_folder / "01_drogon_ahm/realization-*/iter-0"),
        "iter-3": str(testdata_folder / "01_drogon_ahm/realization-*/iter-3"),
    }

    csvfile = "share/results/tables/rft.csv"

    factory = EnsembleTableProviderFactory(tmp_path,
                                           backing_type=BACKING_TYPE_TO_TEST,
                                           allow_storage_writes=True)
    providerset = factory.create_provider_set_from_per_realization_csv_file(
        ensembles, csvfile)

    assert providerset.ensemble_names() == ["iter-0", "iter-3"]
    provider = providerset.ensemble_provider("iter-0")

    all_column_names = provider.column_names()
    # print(all_column_names)
    assert len(all_column_names) == 13

    assert len(provider.realizations()) == 100

    valdf = provider.get_column_data(["CONIDX"], [2])
    assert valdf.shape == (218, 2)
    assert valdf.columns[0] == "REAL"
    assert valdf.columns[1] == "CONIDX"
    assert valdf["REAL"].unique() == [2]
    assert valdf["CONIDX"].nunique() == 24
    assert sorted(valdf["CONIDX"].unique()) == list(range(1, 25))
예제 #3
0
    def __init__(
        self,
        app: Dash,
        webviz_settings: WebvizSettings,
        ensembles: Optional[list] = None,
        rel_file_pattern: str = "share/results/unsmry/*.arrow",
        statistics_file: str = "share/results/tables/gridpropstatistics.csv",
        surface_renaming: Optional[dict] = None,
        time_index: str = "monthly",
        column_keys: Optional[list] = None,
        csvfile_statistics: Path = None,
        csvfile_smry: Path = None,
    ):
        super().__init__()
        self.theme: WebvizConfigTheme = webviz_settings.theme
        self.ensembles = ensembles
        self._surface_folders: Union[dict, None] = None
        self._vmodel: Optional[Union[SimulationTimeSeriesModel,
                                     ProviderTimeSeriesDataModel]] = None
        run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE
        table_provider = EnsembleTableProviderFactory.instance()

        if ensembles is not None:
            ensemble_paths = {
                ensemble_name:
                webviz_settings.shared_settings["scratch_ensembles"]
                [ensemble_name]
                for ensemble_name in ensembles
            }

            resampling_frequency = Frequency(time_index)
            provider_factory = EnsembleSummaryProviderFactory.instance()

            try:
                provider_set = {
                    ens: provider_factory.create_from_arrow_unsmry_presampled(
                        str(ens_path), rel_file_pattern, resampling_frequency)
                    for ens, ens_path in ensemble_paths.items()
                }
                self._vmodel = ProviderTimeSeriesDataModel(
                    provider_set=provider_set, column_keys=column_keys)
                property_df = create_df_from_table_provider(
                    table_provider.
                    create_provider_set_from_per_realization_csv_file(
                        ensemble_paths, statistics_file))
            except ValueError as error:
                message = (
                    f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n"
                    "If no arrow files have been generated with `ERT` using `ECL2CSV`, "
                    "the commandline tool `smry2arrow_batch` can be used to generate arrow "
                    "files for an ensemble")
                if not run_mode_portable:
                    raise ValueError(message) from error

                # NOTE: this part below is to ensure backwards compatibility for portable app's
                # created before the arrow support. It should be removed in the future.
                emodel: EnsembleSetModel = (
                    caching_ensemble_set_model_factory.get_or_create_model(
                        ensemble_paths=ensemble_paths,
                        time_index=time_index,
                        column_keys=column_keys,
                    ))
                self._vmodel = SimulationTimeSeriesModel(
                    dataframe=emodel.get_or_load_smry_cached())
                property_df = emodel.load_csv(csv_file=Path(statistics_file))

            self._surface_folders = {
                ens: Path(ens_path.split("realization")[0]) /
                "share/results/maps" / ens
                for ens, ens_path in ensemble_paths.items()
            }

        else:
            if csvfile_statistics is None:
                raise ValueError(
                    "If not 'ensembles', then csvfile_statistics must be provided"
                )
            # NOTE: the try/except is for backwards compatibility with existing portable app's.
            # It should be removed in the future together with the support of aggregated csv-files
            try:
                property_df = create_df_from_table_provider(
                    table_provider.
                    create_provider_set_from_aggregated_csv_file(
                        csvfile_statistics))
            except FileNotFoundError:
                if not run_mode_portable:
                    raise
                property_df = read_csv(csvfile_statistics)

            if csvfile_smry is not None:
                try:
                    smry_df = create_df_from_table_provider(
                        table_provider.
                        create_provider_set_from_aggregated_csv_file(
                            csvfile_smry))
                except FileNotFoundError:
                    if not run_mode_portable:
                        raise
                    smry_df = read_csv(csvfile_smry)

                self._vmodel = SimulationTimeSeriesModel(dataframe=smry_df)

        self._pmodel = PropertyStatisticsModel(dataframe=property_df,
                                               theme=self.theme)

        self._surface_renaming = surface_renaming if surface_renaming else {}
        self._surface_table = generate_surface_table(
            statistics_dframe=self._pmodel.dataframe,
            ensembles=self._pmodel.ensembles,
            surface_folders=self._surface_folders,
            surface_renaming=self._surface_renaming,
        )
        self.set_callbacks(app)
예제 #4
0
    def __init__(
        self,
        app: Dash,
        webviz_settings: WebvizSettings,
        csvfile: str = None,
        ensembles: list = None,
        aggregated_csvfile: Path = None,
        aggregated_parameterfile: Path = None,
        observation_file: Path = None,
        observation_group: str = "general",
        remap_observation_keys: Dict[str, str] = None,
        remap_observation_values: Dict[str, str] = None,
        colors: Dict = None,
        initial_data: Dict = None,
        initial_layout: Dict = None,
    ):
        super().__init__()

        provider = EnsembleTableProviderFactory.instance()
        self._initial_data = initial_data if initial_data else {}
        self._initial_layout = initial_layout if initial_layout else {}
        if ensembles is not None and csvfile is not None:
            ensembles_dict: Dict[str, str] = {
                ens_name:
                webviz_settings.shared_settings["scratch_ensembles"][ens_name]
                for ens_name in ensembles
            }
            self._parameterproviderset = (
                provider.
                create_provider_set_from_per_realization_parameter_file(
                    ensembles_dict))
            self._tableproviderset = (
                provider.create_provider_set_from_per_realization_csv_file(
                    ensembles_dict, csvfile))
            self._ensemble_names = ensembles
        elif aggregated_csvfile and aggregated_parameterfile is not None:
            self._tableproviderset = (
                provider.create_provider_set_from_aggregated_csv_file(
                    aggregated_csvfile))
            self._parameterproviderset = (
                provider.create_provider_set_from_aggregated_csv_file(
                    aggregated_parameterfile))
            self._ensemble_names = self._tableproviderset.ensemble_names()
        else:
            raise ValueError(
                "Specify either ensembles and csvfile or aggregated_csvfile "
                "and aggregated_parameterfile")
        all_parameters: list = [
            self._parameterproviderset.ensemble_provider(ens).column_names()
            for ens in self._ensemble_names
        ]
        self._parameter_names: list = list(set().union(*all_parameters))
        all_data_columns: list = [
            self._tableproviderset.ensemble_provider(ens).column_names()
            for ens in self._ensemble_names
        ]
        self._data_column_names: list = list(set().union(*all_data_columns))
        dfs = []
        for ens in self._ensemble_names:
            df = self._parameterproviderset.ensemble_provider(
                ens).get_column_data(column_names=self._parameterproviderset.
                                     ensemble_provider(ens).column_names())
            df["ENSEMBLE"] = ens
            dfs.append(df)
        parameterdf = pd.concat(dfs)
        self._realizations = sorted(list(parameterdf["REAL"].unique()))
        self._parameter_filter = ParameterFilter(self.uuid("parameter-filter"),
                                                 parameterdf)
        self._observationfile = observation_file
        self._observationmodel = (ObservationModel(
            get_path(self._observationfile),
            observation_group,
            remap_observation_keys,
            remap_observation_values,
        ) if self._observationfile else None)
        WEBVIZ_ASSETS.add(
            Path(webviz_subsurface.__file__).parent / "_assets" / "js" /
            "clientside_functions.js")

        self._colors: Dict = unique_colors(self._ensemble_names,
                                           webviz_settings.theme)
        if colors is not None:
            self._colors.update(colors)

        self.set_callbacks(app)
    def __init__(
        self,
        app,
        webviz_settings: WebvizSettings,
        parameter_csv: Path = None,
        response_csv: Path = None,
        ensembles: list = None,
        rel_file_pattern: str = "share/results/unsmry/*.arrow",
        response_file: str = None,
        response_filters: dict = None,
        response_ignore: list = None,
        response_include: list = None,
        column_keys: list = None,
        sampling: str = "monthly",
        aggregation: str = "sum",
        corr_method: str = "pearson",
    ):

        super().__init__()

        self.parameter_csv = parameter_csv if parameter_csv else None
        self.response_csv = response_csv if response_csv else None
        self.response_file = response_file if response_file else None
        self.response_filters = response_filters if response_filters else {}
        self.column_keys = column_keys
        self._sampling = Frequency(sampling)
        self.corr_method = corr_method
        self.aggregation = aggregation
        if response_ignore and response_include:
            raise ValueError(
                'Incorrent argument. either provide "response_include", '
                '"response_ignore" or neither')
        if parameter_csv and response_csv:
            if ensembles or response_file:
                raise ValueError(
                    'Incorrect arguments. Either provide "csv files" or '
                    '"ensembles and response_file".')
            parameterdf = read_csv(self.parameter_csv)
            self.responsedf = read_csv(self.response_csv)

        elif ensembles:
            self.ens_paths = {
                ens: webviz_settings.shared_settings["scratch_ensembles"][ens]
                for ens in ensembles
            }
            table_provider_factory = EnsembleTableProviderFactory.instance()
            parameterdf = create_df_from_table_provider(
                table_provider_factory.
                create_provider_set_from_per_realization_parameter_file(
                    self.ens_paths))
            if self.response_file:
                self.responsedf = load_csv(
                    ensemble_paths=self.ens_paths,
                    csv_file=response_file,
                    ensemble_set_name="EnsembleSet",
                )
            else:
                smry_provider_factory = EnsembleSummaryProviderFactory.instance(
                )
                provider_set = {
                    ens_name:
                    smry_provider_factory.create_from_arrow_unsmry_presampled(
                        ens_path, rel_file_pattern, self._sampling)
                    for ens_name, ens_path in self.ens_paths.items()
                }
                self.response_filters["DATE"] = "single"
                self.responsedf = create_df_from_summary_provider(
                    provider_set,
                    self.column_keys,
                )
        else:
            raise ValueError(
                'Incorrect arguments. Either provide "csv files" or "ensembles and response_file".'
            )
        pmodel = ParametersModel(dataframe=parameterdf,
                                 keep_numeric_only=True,
                                 drop_constants=True)
        self.parameterdf = pmodel.dataframe
        self.parameter_columns = pmodel.parameters

        parresp.check_runs(self.parameterdf, self.responsedf)
        parresp.check_response_filters(self.responsedf, self.response_filters)

        # Only select numerical responses
        self.response_columns = parresp.filter_numerical_columns(
            df=self.responsedf,
            column_ignore=response_ignore,
            column_include=response_include,
            filter_columns=self.response_filters.keys(),
        )

        self.theme = webviz_settings.theme
        self.set_callbacks(app)
예제 #6
0
    def __init__(
        self,
        app: Dash,
        webviz_settings: WebvizSettings,
        csvfile: str = None,
        ensemble: str = None,
        aggregated_csvfile: Path = None,
        aggregated_parameterfile: Path = None,
        initial_response: str = None,
        single_value_selectors: List[str] = None,
        multi_value_selectors: List[str] = None,
    ):
        super().__init__()
        self._single_filters = single_value_selectors if single_value_selectors else []
        self._multi_filters = multi_value_selectors if multi_value_selectors else []
        provider = EnsembleTableProviderFactory.instance()

        if ensemble is not None and csvfile is not None:
            ensemble_dict: Dict[str, str] = {
                ensemble:
                webviz_settings.shared_settings["scratch_ensembles"][ensemble]
            }
            self._parameterproviderset = (
                provider.
                create_provider_set_from_per_realization_parameter_file(
                    ensemble_dict))
            self._tableproviderset = (
                provider.create_provider_set_from_per_realization_csv_file(
                    ensemble_dict, csvfile))
            self._ensemble_name = ensemble
        elif aggregated_csvfile and aggregated_parameterfile is not None:
            self._tableproviderset = (
                provider.create_provider_set_from_aggregated_csv_file(
                    aggregated_csvfile))
            self._parameterproviderset = (
                provider.create_provider_set_from_aggregated_csv_file(
                    aggregated_parameterfile))
            if len(self._tableproviderset.ensemble_names()) != 1:
                raise ValueError("Csv file has multiple ensembles. "
                                 "This plugin only supports a single ensemble")
            self._ensemble_name = self._tableproviderset.ensemble_names()[0]
        else:
            raise ValueError(
                "Specify either ensembles and csvfile or aggregated_csvfile "
                "and aggregated_parameterfile")

        try:
            design_matrix_df = self._parameterproviderset.ensemble_provider(
                self._ensemble_name).get_column_data(
                    column_names=["SENSNAME", "SENSCASE"])
        except KeyError as exc:
            raise KeyError(
                "Required columns 'SENSNAME' and 'SENSCASE' is missing "
                f"from {self._ensemble_name}. Cannot calculate tornado plots"
            ) from exc
        design_matrix_df["ENSEMBLE"] = self._ensemble_name
        design_matrix_df["SENSTYPE"] = design_matrix_df.apply(
            lambda row: find_sens_type(row.SENSCASE), axis=1)
        self._tornado_widget = TornadoWidget(realizations=design_matrix_df,
                                             app=app,
                                             webviz_settings=webviz_settings)
        self._responses: List[str] = self._tableproviderset.ensemble_provider(
            self._ensemble_name).column_names()
        if self._single_filters:
            self._responses = [
                response for response in self._responses
                if response not in self._single_filters
            ]
        if self._multi_filters:
            self._responses = [
                response for response in self._responses
                if response not in self._multi_filters
            ]
        self._initial_response: str = (initial_response if initial_response
                                       else self._responses[0])
        self.set_callbacks(app)