def test_injectsatnumcount(): """Test that we always get out a string with TABDIMS""" assert "TABDIMS" in inferdims.inject_dimcount("", "TABDIMS", 0, 0) assert "TABDIMS" in inferdims.inject_dimcount("", "TABDIMS", 0, 1) assert "TABDIMS" in inferdims.inject_dimcount("TABDIMS", "TABDIMS", 0, 1) assert "99" in inferdims.inject_dimcount("", "TABDIMS", 0, 99) assert " 1* " in inferdims.inject_dimcount("", "TABDIMS", 1, 99) assert "*" not in inferdims.inject_dimcount("", "TABDIMS", 0, 99) assert "EQLDIMS" in inferdims.inject_dimcount("", "EQLDIMS", 0, 0) assert "EQLDIMS" in inferdims.inject_dimcount("", "EQLDIMS", 0, 1) assert "EQLDIMS" in inferdims.inject_dimcount("EQLDIMS", "EQLDIMS", 0, 1) assert "99" in inferdims.inject_dimcount("", "TABDIMS", 0, 99)
def test_inject_dimcount(): """Test error conditions""" with pytest.raises(ValueError, match="Only supports TABDIMS and EQLDIMS"): inferdims.inject_dimcount("SWOF\n0/\n0/\n", "WELLDIMS", 0, 1) with pytest.raises(ValueError, match="Only support item 0 and 1 in TABDIMS"): inferdims.inject_dimcount("SWOF\n0/\n0/\n", "TABDIMS", 2, 1) with pytest.raises(ValueError, match="Only item 0 in EQLDIMS can be injected"): inferdims.inject_dimcount("EQUIL\n0/\n0/\n", "EQLDIMS", 1, 1) with pytest.raises(AssertionError, match="dimvalue must be larger than zero"): inferdims.inject_dimcount("SWOF\n0/\n0/\n", "TABDIMS", 1, 0, 0)
def deck2df(deck, satnumcount=None): """Extract the data in the saturation function keywords as a Pandas DataFrame. Data for all saturation functions are merged into one dataframe. The two first columns in the dataframe are 'KEYWORD' (which can be SWOF, SGOF, etc.), and then SATNUM which is an index counter from 1 and onwards. Then follows the data for each individual keyword that is found in the deck. SATNUM data can only be parsed correctly if TABDIMS is present and stating how many saturation functions there should be. If you have a string with TABDIMS missing, you must supply this as a string to this function, and not a parsed deck, as the default parser in EclFiles is very permissive (and only returning the first function by default). Arguments: deck (sunbeam.deck or str): Incoming data deck. Always supply as a string if you don't know TABDIMS-NTSFUN. satnumcount (int): Number of SATNUMs defined in the deck, only needed if TABDIMS with NTSFUN is not found in the deck. If not supplied (or None) and NTSFUN is not defined, it will be attempted inferred. Return: pd.DataFrame, columns 'SW', 'KRW', 'KROW', 'PC', .. """ if "TABDIMS" not in deck: if not isinstance(deck, str): logging.critical( "Will not be able to guess NTSFUN from a parsed deck without TABDIMS." ) logging.critical( ( "Only data for first SATNUM will be returned." "Instead, supply string to deck2df()" ) ) satnumcount = 1 # If TABDIMS is in the deck, NTSFUN always has a value. It will # be set to 1 if defaulted. if not satnumcount: logging.warning( "TABDIMS+NTSFUN or satnumcount not supplied. Will be guessed." ) ntsfun_estimate = inferdims.guess_dim(deck, "TABDIMS", 0) augmented_strdeck = inferdims.inject_dimcount( str(deck), "TABDIMS", 0, ntsfun_estimate ) # Re-parse the modified deck: deck = EclFiles.str2deck(augmented_strdeck) else: augmented_strdeck = inferdims.inject_dimcount( str(deck), "TABDIMS", 0, satnumcount ) # Re-parse the modified deck: deck = EclFiles.str2deck(augmented_strdeck) frames = [] for keyword in KEYWORD_COLUMNS: if keyword in deck: satnum = 1 for deckrecord in deck[keyword]: # All data for an entire SATNUM is returned in one list data = np.array(deckrecord[0]) # Split up into the correct number of columns column_count = len(KEYWORD_COLUMNS[keyword]) if len(data) % column_count: logging.error("Inconsistent data length or bug") return pd.DataFrame() satpoints = int(len(data) / column_count) dframe = pd.DataFrame( columns=KEYWORD_COLUMNS[keyword], data=data.reshape(satpoints, column_count), ) dframe["SATNUM"] = satnum dframe["KEYWORD"] = keyword dframe = dframe[["KEYWORD", "SATNUM"] + KEYWORD_COLUMNS[keyword]] satnum += 1 frames.append(dframe) nonempty_frames = [frame for frame in frames if not frame.empty] if nonempty_frames: return pd.concat(nonempty_frames, axis=0, sort=False) logging.warning("No saturation data found in deck") return pd.DataFrame()
def deck2df(deck, ntequl=None): """Extract the data in the EQUIL keyword as a Pandas DataFrame. How each data value in the EQUIL records are to be interpreted depends on the phase configuration in the deck, which means that we need more than the EQUIL section alone to determine the dataframe. If ntequil is not supplied and EQLDIMS is not in the deck, the equil data is not well defined in terms of sunbeam. This means that we have to infer the correct number of EQUIL lines from what gives us successful parsing from sunbeam. In those cases, the deck must be supplied as a string, if not, extra EQUIL lines are possibly already removed by the sunbeam parser in eclfiles.str2deck(). Arguments: deck (sunbeam.deck or str): Eclipse deck or string with deck. If not string, EQLDIMS must be present in the deck. ntequil (int): If not None, should state the NTEQUL in EQLDIMS. If None and EQLDIMS is not present, it will be inferred. Return: pd.DataFrame """ if "EQLDIMS" not in deck: if not isinstance(deck, str): logging.critical( "Will not be able to guess NTEQUL from a parsed deck without EQLDIMS." ) logging.critical( ("Only data for the first EQUIL will be returned. " "Instead, supply string to deck2df()")) ntequl = 1 if not ntequl: logging.warning( "EQLDIMS+NTEQUL or ntequl not supplied. Will be guessed") ntequl_estimate = inferdims.guess_dim(deck, "EQLDIMS", 0) augmented_strdeck = inferdims.inject_dimcount( deck, "EQLDIMS", 0, ntequl_estimate) deck = EclFiles.str2deck(augmented_strdeck) else: augmented_strdeck = inferdims.inject_dimcount( deck, "EQLDIMS", 0, ntequl) deck = EclFiles.str2deck(augmented_strdeck) if isinstance(deck, str): deck = EclFiles.str2deck(deck) phasecount = sum(["OIL" in deck, "GAS" in deck, "WATER" in deck]) if "OIL" in deck and "GAS" in deck and "WATER" in deck: # oil-water-gas columnnames = [ "DATUM", "PRESSURE", "OWC", "PCOWC", "GOC", "PCGOC", "INITRS", "INITRV", "ACCURACY", ] if "OIL" not in deck and "GAS" in deck and "WATER" in deck: # gas-water columnnames = [ "DATUM", "PRESSURE", "GWC", "PCGWC", "IGNORE1", "IGNORE2", "IGNORE3", "IGNORE4", "ACCURACY", ] if "OIL" in deck and "GAS" not in deck and "WATER" in deck: # oil-water columnnames = [ "DATUM", "PRESSURE", "OWC", "PCOWC", "IGNORE1", "IGNORE2", "IGNORE3", "IGNORE4", "ACCURACY", ] if "OIL" in deck and "GAS" in deck and "WATER" not in deck: # oil-gas columnnames = [ "DATUM", "PRESSURE", "IGNORE1", "IGNORE2", "GOC", "PCGOC", "IGNORE3", "IGNORE4", "ACCURACY", ] if phasecount == 1: columnnames = ["DATUM", "PRESSURE"] if not columnnames: raise ValueError("Unsupported phase configuration") if "EQUIL" not in deck: return pd.DataFrame records = [] for rec in deck["EQUIL"]: rowlist = [x[0] for x in rec] if len(rowlist) > len(columnnames): rowlist = rowlist[:len(columnnames)] logging.warning("Something wrong with columnnames " + "or EQUIL-data, data is chopped!") records.append(rowlist) dataframe = pd.DataFrame(columns=columnnames, data=records) # The column handling can be made prettier.. for col in dataframe.columns: if "IGNORE" in col: del dataframe[col] return dataframe