def test_qcdata(): """Testing getting data with _QCForwardData class""" qcdata = QCData() qcdata.parse(data=DATA1) assert isinstance(qcdata, QCData) assert isinstance(qcdata.grid, xtgeo.Grid) assert isinstance(qcdata.gridprops, xtgeo.GridProperties) assert isinstance(qcdata.wells, xtgeo.Wells) assert qcdata._project is None assert qcdata.grid.ncol == 40 zone = qcdata.gridprops.get_prop_by_name(ZONENAME) assert isinstance(zone, xtgeo.GridProperty) assert zone.name == ZONENAME assert zone.values.mean() == pytest.approx(1.92773, abs=0.01) assert zone.ncol == 40 op1 = qcdata.wells.get_well("OP_1") assert ZONELOGNAME in op1.dataframe.columns
class GridQuality(QCForward): def run(self, data, reuse=False, project=None): """Main routine for evaluating grid quality and stop/warn if too bad The routine depends on existing XTGeo functions for this purpose. Args: data (dict or str): The input data either as a Python dictionary or a path to a YAML file reuse (bool or list): Reusing some "timeconsuming to read" data in the instance. If True, then grid and gridprops will be reused as default. Alternatively it can be a list for more fine grained control, e.g. ["grid", "gridprops", "wells"] project (Union[object, str]): For usage inside RMS, None if running files """ self._data = self.handle_data(data, project) self._validate_input(self._data, project) QCC.verbosity = self._data.get("verbosity", 0) # parse data that are special for this check QCC.print_info("Parsing additional data...") self.ldata = _LocalData() self.ldata.parse_data(data) if isinstance(self.gdata, QCData): self.gdata.parse(data=data, reuse=reuse, project=project) else: self.gdata = QCData() self.gdata.parse(data) dfr = self.check_gridquality() QCC.print_debug(f"Results: \n{dfr}") self.evaluate_qcreport(dfr, "grid quality") def check_gridquality(self): """ Given data, do check of gridquality via XTGeo Final result will be a table like this:: WARNRULE WARN% STOPRULE STOP% STATUS... GRIDQUALITY minangle_top_base[0] all>10%ifx<60 13.44 all>0%ifx<40 2.32 WARN collapsed[0] all>15% 12.25 allcells>30% 0.0 OK """ # get properties via XTGeo method get_gridquality_properties() gqc = self.gdata.grid.get_gridquality_properties() actions = self.ldata.actions if actions is None: raise ValueError("No actions are defined for grid quality") result = OrderedDict([ ("GRIDQUALITY", []), ("WARNRULE", []), ("WARN%", []), ("STOPRULE", []), ("STOP%", []), ("STATUS", []), ]) for prop in gqc.props: # gqc.props is a list of all gridquality properties, but not all of these # are defined in input actions. therules = actions.get(prop.name, None) if self.data[ "project"] and self.ldata.writeicon and therules is not None: QCC.print_info(f"Write icon in RMS for {prop.name}") prop.to_roxar(self.data["project"], self.data["grid"], prop.name) if therules is None: continue for numrule, therule in enumerate(therules): warnrule = ActionsParser(therule.get("warn", None), mode="warn", verbosity=QCC.verbosity) stoprule = ActionsParser(therule.get("stop", None), mode="stop", verbosity=QCC.verbosity) QCC.print_debug(f"WARN RULE {warnrule.status}") QCC.print_debug(f"STOP RULE {stoprule.status}") # if stoprule is None or warnrule is None: # raise ValueError("Rules for both warn and stop must be defined") result["GRIDQUALITY"].append(f"{prop.name}[{numrule}]") status = "OK" for issue in [warnrule, stoprule]: status, result = self._evaluate_allcells( issue, result, prop, status) result["STATUS"].append(status) dfr = self.make_report(result, reportfile=self.ldata.reportfile, nametag=self.ldata.nametag) dfr.set_index("GRIDQUALITY", inplace=True) return dfr @staticmethod def _evaluate_allcells(issue, inresult, prop, instatus): """Evaluation of all cells per issue (warn or stop) given the criteria.""" result = deepcopy(inresult) if issue.status is None: result[issue.mode.upper() + "%"].append(UNDEF) result[issue.mode.upper() + "RULE"].append(UNDEF) status = "OK" return status, result ncell = prop.values.count() if issue.given == "<": nbyrule = (prop.values < issue.criteria).sum() elif issue.given == ">": nbyrule = (prop.values > issue.criteria).sum() else: # e.g. discrete qual parameters such as 'faulted' have only 0 or 1 values nbyrule = (prop.values > 0).sum() actualpercent = 100.0 * nbyrule / ncell result[issue.mode.upper() + "%"].append(actualpercent) result[issue.mode.upper() + "RULE"].append(issue.expression) if issue.compare == ">" and actualpercent > issue.limit: status = issue.mode.upper() elif issue.compare == "<" and actualpercent < issue.limit: status = issue.mode.upper() else: status = instatus return status, result @staticmethod def _validate_input(data, project): """Validate data against JSON schemas, TODO complete schemas""" spath = Path(fmu.tools.__file__).parent / "qcforward" / "_schemas" schemafile = "gridquality_asfile.json" if project: schemafile = "gridquality_asroxapi.json" with open((spath / schemafile), "r") as thisschema: schema = json.load(thisschema) validate(instance=data, schema=schema)
class QCProperties: """ The QCProperties class consists of a set of methods for extracting property statistics from 3D Grids, Raw and Blocked wells. The methods for statistics extraction can be run individually, or a yaml-configuration file can be used to enable an automatic run of the methods. See the method 'from_yaml'. When several methods of statistics extraction has been run within the instance, a merged dataframe is available through the 'dataframe' property. All methods can be run from either RMS python, or from files. XTGeo is being utilized to get a dataframe from the input parameter data. XTGeo data is reused in the instance to increase performance. Methods for extracting statistics from 3D Grids, Raw and Blocked wells: Args: data (dict): The input data as a Python dictionary (see description of valid argument keys in documentation) reuse (bool or list): If True, then grid and gridprops will be reused as default. Alternatively it can be a list for more fine grained control, e.g. ["grid", "gridprops", "wells"] project (obj or str): For usage inside RMS Returns: A PropStat() instance """ def __init__(self): self._propstats = [] # list of PropStat() instances self._dataframe = pd.DataFrame( ) # merged dataframe with continous stats self._dataframe_disc = pd.DataFrame( ) # merged dataframe with discrete stats self._xtgdata = QCData() # QCData instance, general XTGeo data # Properties: # ================================================================================== @property def dataframe(self): """A merged dataframe from all the PropStat() instances""" self._dataframe = self._create_dataframe(self._dataframe) return self._dataframe @property def dataframe_disc(self): """A merged dataframe from all the PropStat() instances""" self._dataframe_disc = self._create_dataframe(self._dataframe_disc, discrete=True) return self._dataframe_disc @property def xtgdata(self): """The QCData instance""" return self._xtgdata # Hidden methods: # ================================================================================== def _input_preparations(self, project, data, reuse, dtype, qcdata=None): """ Prepare the input parameter data for use with a PropStat() instance. Parameters are loaded to XTGeo and can be reused in the instance. """ data = data.copy() data["dtype"] = dtype data["project"] = project if dtype == "bwells": data["bwells"] = data.pop("wells") pdata = PropStatParameterData( properties=data["properties"], selectors=data.get("selectors", {}), filters=data.get("filters", None), verbosity=data.get("verbosity", 0), ) if dtype == "grid": pfiles = {} for elem in ["properties", "selectors", "filters"]: if elem in data and isinstance(data[elem], dict): for values in data[elem].values(): if "pfile" in values: pfiles[values["name"]] = values["pfile"] data["gridprops"] = [[param, pfiles[param]] if param in pfiles else ["unknown", param] for param in pdata.params] if qcdata is not None: self._xtgdata = qcdata self._xtgdata.parse( project=data["project"], data=data, reuse=reuse, wells_settings=None if dtype == "grid" else { "lognames": pdata.params, }, ) return pdata, data def _dataload_and_calculation(self, project, data, reuse, dtype, qcdata=None): """ Load data to XTGeo and xtract statistics. Can be """ # create PropStatParameterData() instance and load parameters to xtgeo pdata, data = self._input_preparations(project, data, reuse, dtype, qcdata) QCC.print_info("Extracting property statistics...") # compute statistics propstat = PropStat(parameter_data=pdata, xtgeo_data=self._xtgdata, data=data) self._propstats.append(propstat) return propstat def _extract_statistics(self, project, data, reuse, dtype, qcdata): """ Single statistics extraction, or multiple if multiple filters are defined. All PropStat() instances will be appended to the self._propstats list and are used to create a merged dataframe for the instance. Returns: A single PropStat() instance or a list of PropStat() intances if multiple filters are used. """ QCC.verbosity = data.get("verbosity", 0) if "multiple_filters" in data: propstats = [] for name, filters in data["multiple_filters"].items(): QCC.print_info(f"Starting run with name '{name}', " f"using filters {filters}") usedata = data.copy() usedata["filters"] = filters usedata["name"] = name pstat = self._dataload_and_calculation(project, data=usedata, reuse=True, dtype=dtype, qcdata=qcdata) propstats.append(pstat) return propstats else: return self._dataload_and_calculation(project, data, reuse, dtype, qcdata) def _initiate_from_config(self, cfg, project=None, reuse=False): """ Run methods for statistics extraction based on entries in yaml-config""" with open(cfg, "r") as stream: data = yaml.safe_load(stream) if "grid" in data: for item in data["grid"]: self.get_grid_statistics(data=item, project=project, reuse=reuse) if "wells" in data: for item in data["wells"]: self.get_well_statistics(data=item, project=project, reuse=reuse) if "blockedwells" in data: for item in data["blockedwells"]: self.get_bwell_statistics(data=item, project=project, reuse=reuse) def _create_dataframe(self, dframe, discrete=False): """ Combine dataframe from all PropStat() instances. Update dataframe if out of sync with self._propstats """ if (self._propstats and dframe.empty) or (len(self._propstats) != len( dframe["ID"].unique())): dframe = combine_property_statistics(self._propstats, discrete=discrete, verbosity=QCC.verbosity) return dframe # QC methods: # ================================================================================== def get_grid_statistics( self, data: dict, project: object = None, reuse: bool = False, qcdata: QCData = None, ) -> PropStat: """Extract property statistics from 3D Grid""" return self._extract_statistics(project, data, reuse, dtype="grid", qcdata=qcdata) def get_well_statistics( self, data: dict, project: object = None, reuse: bool = False, qcdata: QCData = None, ) -> PropStat: """Extract property statistics from wells """ return self._extract_statistics(project, data, reuse, dtype="wells", qcdata=qcdata) def get_bwell_statistics( self, data: dict, project: object = None, reuse: bool = False, qcdata: QCData = None, ) -> PropStat: """Extract property statistics from blocked wells """ return self._extract_statistics(project, data, reuse, dtype="bwells", qcdata=qcdata) def from_yaml(self, cfg: str, project: object = None, reuse: bool = False): """ Use yaml-configuration file to run the statistics extractions methods.""" self._initiate_from_config(cfg, project, reuse) def to_csv(self, csvfile: str, disc: bool = False): """ Write combined dataframe to csv """ dframe = self.dataframe if not disc else self.dataframe_disc dframe.to_csv(csvfile, index=False) QCC.print_info( f"Dataframe with {'discrete' if disc else 'continous'} ") QCC.print_info(f"property statistics written to {csvfile}")
class BlockedWellsVsGridProperties(QCForward): def run( self, data: Union[dict, str], reuse: Optional[bool] = False, project: Optional[Any] = None, ): """Main routine for evaluating blockedwells vs gridproperties The routine depends on existing XTGeo functions for this purpose. Args: data (dict or str): The input data either as a Python dictionary or a path to a YAML file reuse (bool or list): Reusing some "timeconsuming to read" data in the instance. If True, then grid and gridprops will be reused as default. Alternatively it can be a list for more fine grained control, e.g. ["grid", "gridprops", "bwells"] project (Union[object, str]): For usage inside RMS, None if running files """ self._data = self.handle_data(data, project) self._validate_input(self._data, project) QCC.verbosity = self._data.get("verbosity", 0) # parse data that are special for this check QCC.print_info("Parsing additional data...") self.ldata = _LocalData() self.ldata.parse_data(self._data) # now need to retrieve blocked properties and grid properties from the "compare" # dictionary: wsettings = {"lognames": list(self.ldata.compare.keys())} if project: # inside RMS, get gridprops implicitly from compare values self._data["gridprops"] = list(self.ldata.compare.values()) if not isinstance(self.gdata, QCData): self.gdata = QCData() self.gdata.parse(data=self._data, reuse=reuse, project=project, wells_settings=wsettings) dfr, comb = self.compare_bw_props() QCC.print_debug(f"Results: \n{dfr}") status = self.evaluate_qcreport(dfr, "blocked wells vs grid props", stopaction=False) # make it possible to print the underlying dataframe, either some wells (.e.g # the failing) or all wells. If 'fail' it will only show those lines that # contains FAIL show = self.ldata.show_data if show is None or show is False: pass elif isinstance(show, dict): if "lines" not in show or "wellstatus" not in show: raise ValueError( f"The 'showdata' entry is in an invalid form or format: {show}" ) lines = show["lines"].upper() wstatus = show["wellstatus"].upper() print( f"\n** Key 'show_data' is active, here showing lines with {lines} " f"for wells classified as {wstatus} **") # filter out all line with word FAIL or WARN or ... , h/t HAVB fcomb = comb[comb.astype(str).agg("".join, axis=1).str.contains(lines)] if len(fcomb) > 0: mask = dfr["STATUS"] == wstatus wells = [ well for well in dfr[mask]["WELL"].unique() if well != "all" ] if wells: print(f"Wells within {wstatus} criteria are: {wells}:\n") print(fcomb[fcomb["WELLNAME"].isin(wells)].to_string()) else: print(f"No wells within {wstatus} criteria") else: print(f"No lines are matching {lines}. Wrong input?:\n") else: print("Show all well cells for all wells:") if len(comb) > 0: print(comb.to_string()) if status == "STOP": QCC.force_stop("STOP criteria is found!") def compare_bw_props(self) -> pd.DataFrame: """Given data, do a comparison of blcked wells cells vs props, via XTGeo.""" # dataframe for the blocked wells dfbw = self.gdata.bwells.get_dataframe() if self._gdata.project is not None: # when parsing blocked wells from RMS, cell indices starts from 0, not 1 dfbw["I_INDEX"] += 1 dfbw["J_INDEX"] += 1 dfbw["K_INDEX"] += 1 # filtering on depth tvd_range: if self.ldata.tvd_range and isinstance(self.ldata.tvd_range, list): zmin = self.ldata.tvd_range[0] zmax = self.ldata.tvd_range[1] if zmin >= zmax: raise ValueError("The zmin value >= zmax in 'tvd_range'") dfbw = dfbw[dfbw["Z_TVDSS"] >= zmin] dfbw = dfbw[dfbw["Z_TVDSS"] <= zmax] if dfbw.empty: raise RuntimeError( f"No wells left after tvd_range: {self.ldata.tvd_range}") # dataframe for the properties, need some processing (column names) dfprops = self.gdata.gridprops.get_dataframe(ijk=True, grid=self.gdata.grid) dfprops = dfprops.rename(columns={ "IX": "I_INDEX", "JY": "J_INDEX", "KZ": "K_INDEX" }) # merge the dataframe on I J K index comb = pd.merge( dfbw, dfprops, how="inner", on=["I_INDEX", "J_INDEX", "K_INDEX"], suffixes=("__bw", "__model"), # in case the names are equal -> add suffix ) QCC.print_debug("Made a combined dataframe!") QCC.print_debug(f"\n {comb}") diffs = {} # compare the relevant properties for bwprop, modelprop in self._ldata.compare.items(): usebwprop = bwprop if bwprop != modelprop else bwprop + "__bw" usemodelprop = modelprop if bwprop != modelprop else modelprop + "__model" dname = bwprop + ":" + modelprop dnameflag = dname + "_flag" comb = self._eval_tolerance(comb, usebwprop, usemodelprop, dname, dnameflag) diffs[dname] = dnameflag return self._evaluate_diffs(comb, diffs), comb def _eval_tolerance(self, df_in, bwprop, modelprop, diffname, diffnameflag): """Make a flag log for diffs based on tolerance input.""" comb = df_in.copy() tol = self.ldata.tolerance relative = isinstance(tol, dict) and "rel" in tol tolerance = tol if isinstance(tol, float) else list(tol.values())[0] comb[diffname] = comb[bwprop] - comb[modelprop] comb[diffnameflag] = "MATCH" if relative: # adjust relative to be weighted on mean() value comb[bwprop + "_mean"] = comb[bwprop].mean() comb[diffname + "_rel"] = comb[diffname] / comb[bwprop + "_mean"] comb.loc[abs(comb[diffname + "_rel"]) > tolerance, diffnameflag] = "FAIL" else: comb.loc[abs(comb[diffname]) > tolerance, diffnameflag] = "FAIL" return comb def _evaluate_diffs(self, comb, diffs) -> pd.DataFrame: result: OrderedDict = OrderedDict([ ("WELL", []), ("COMPARE(BW:MODEL)", []), ("WARNRULE", []), ("STOPRULE", []), ("MATCH%", []), ("STATUS", []), ]) wells = list(comb["WELLNAME"].unique()) wells.append("all") QCC.print_info("Compare per well...") for wname in wells: subset = comb[comb["WELLNAME"] == wname] for diff, flag in diffs.items(): result["WELL"].append(wname) result["COMPARE(BW:MODEL)"].append(diff) if wname != "all": match = subset[flag].value_counts( normalize=True)["MATCH"] * 100.0 else: match = comb[flag].value_counts( normalize=True)["MATCH"] * 100.0 result["MATCH%"].append(match) status = "OK" for therule in self.ldata.actions: warnrule = ActionsParser(therule.get("warn", None), mode="warn", verbosity=QCC.verbosity) stoprule = ActionsParser(therule.get("stop", None), mode="stop", verbosity=QCC.verbosity) for _, issue in enumerate([warnrule, stoprule]): if wname != "all" and not issue.all: rulename = issue.mode.upper() + "RULE" result[rulename].append(issue.expression) if issue.compare == "<" and match < issue.limit: status = issue.mode.upper() elif wname == "all" and issue.all: rulename = issue.mode.upper() + "RULE" result[rulename].append(issue.expression) if issue.compare == "<" and match < issue.limit: status = issue.mode.upper() result["STATUS"].append(status) dfr = self.make_report(result, reportfile=self.ldata.reportfile, nametag=self.ldata.nametag) QCC.print_info("Dataframe is created") return dfr @staticmethod def _validate_input(data, project): """Validate data against JSON schemas, TODO complete schemas""" spath = Path(fmu.tools.__file__).parent / "qcforward" / "_schemas" schemafile = "bw_vs_gridprops_asfile.json" if project: schemafile = "bw_vs_gridprops_asroxapi.json" with open((spath / schemafile), "r", encoding="utf8") as thisschema: schema = json.load(thisschema) validate(instance=data, schema=schema)