def get( self, field: str = invalidstr, method: str = "m", meanlength: int = invalidint, ) -> np.ndarray: """Return the saved result data named 'field'. Valid fields are listed in self.datanames If no field is provided, the full dataset will be returned. 'method' can be set to: - 'm', the average values over all processes is returned (default). - 's', the standard deviation of the values ove all processes is returned. - '+X', where X is a numerical value, the returned value is 'mean + X·std' - '-X', where X is a numerical value, the returned value is 'mean - X·std' - '*', all individual values from each process is returned - 'pX', where X is an integer, the values from process number X is returned. - 'sum', the values of each process are summed together. If meanlength is set, a running mean of the corresponding length is returned. @param field: data field (Default value = invalidstr) @type field: str @param method: method for processing data over processes (Default value = 'm') @type method: str @param meanlength: running mean length (Default value = invalidint) @type method: int @return: the processed set of data @rtype: ndarray """ loc = self._loc(field) if isvalid(field) else slice(None, None, None) if method == "*": return self.data[:, loc] if method[0] == "p": res = self.data[int(method[1:]), loc] elif method[0] == "sum": res = np.nansum(self.data[:, loc, :], axis=0) elif method[0] in ["m", "s", "+", "-"]: mean = np.nanmean(self.data[:, loc, :], axis=0) if method == "m": res = mean else: std = np.nanstd(self.data[:, loc, :], axis=0) if method == "s": res = std else: res = mean + float(method) * std else: raise ValueError(f"'method'={method} is invalid") return ( # Running mean taken from # https://stackoverflow.com/questions/13728392/moving-average-or-running-mean np.convolve( res, np.ones((meanlength, )) / meanlength, mode="valid") if isvalid(meanlength) else res)
def connect(self, filename: str = "") -> None: """Direct logging to textfile 'filename'; if empty, log to standard output. @param filename: log file name @type filename: str """ self.filename = filename if filename else invalidstr dest = filename if isvalid(filename) else "stream" if self.connected: self.disconnect("Disconnecting old handler before connection to new one.") self._handler = FileHandler(self.filename) if isvalid(self.filename) else StreamHandler() self._logger.addHandler(self._handler) self.debug(f"Connected to {self.filename}; reason: Logger directed to {dest}") self.connected = True
def fromhdf5( cls, filename: str, snapnum: int = invalidint, snapstep: int = -1, **kwd: Any, ) -> "System": """ Create a System object from a result hdf5 file. Additional parameters can be passed, they will override the one defined in the hdf5 file. if a 'snapnum' is provided, initial conditions will be set from the recorded snapshot as saved by the corresponding thread number at the corresponding 'snapstep' (defaulting to the final snapshot) @param filename: Name of the Param json file @type filename: str @param snapnum: snapshot number to start from (Default value = invalindint) @type snapnum: int @param snapstep: snapshot step to start from (Default value = -1) @type snapstep: int @param kwd: Additional parameters @return: newly created System object @rtype: System """ res = ResultReader(filename) param = res.parameters param.set_param(**kwd) if isvalid(snapnum): if snapnum < 0: snapnum = MPI_STATUS.rank % res.size param.set_param(init=res.getsnap_comp(snapnum, snapstep)) return cls(param)
def table(self, maps: str = invalidstr, method: str = "m", meanlength: int = invalidint) -> DataFrame: """Return the requested data formatted in a pandas DataFrame. If maps is set, a dataframe of the corresponding map field will be returned Valid values are listed self.mapnames. else, a dataframe with all datanames will be returned 'method' and 'meanlength' parameters are as defined in getmap and get methods. @param maps: map field name (Default value = invalidstr) @type maps: @param method: method for processing data over processes (Default value = 'm') @type method: str @param meanlength: running mean length (Default value = invalidint) @type meanlength: int @return: the processed set of data @rtype: DataFrame """ if isvalid(maps): data = self.getmap(field=maps, method=method, meanlength=meanlength) index = self.categories(maps) else: data = self.get(method=method, meanlength=meanlength) index = self.datanames return DataFrame(data, index=index)
def getmap(self, field: str, method: str = "m", meanlength: int = invalidint) -> np.ndarray: """ Return the saved property map data named 'field'. Valid fields are listed in self.mapnames 'method' can be set to: - 'm', the average values over all processes is returned (default). - 's', the standard deviation of the values ove all processes is returned. - '+X', where X is a numerical value, the returned value is 'mean + X·std' - '-X', where X is a numerical value, the returned value is 'mean - X·std' - '*', all individual values from each process is returned - 'pX', where X is an integer, the values from process number X is returned. If meanlength is set, a running mean of the corresponding length is returned. @param field: data field (Default value = invalidstr) @type field: str @param method: method for processing data over processes (Default value = 'm') @type method: str @param meanlength: running mean length (Default value = invalidint) @type meanlength: int @return: the processed set of data @rtype: ndarray """ try: data = self.maps[field][:, :, 1:] except KeyError: raise ValueError(f"{field} is not a recorded map name") if method == "*": return data if method[0] == "p": res = data[int(method[1:])] elif method[0] in ["m", "s", "+", "-"]: mean = np.nanmean(data, axis=0) if method == "m": res = mean else: std = np.nanstd(data, axis=0) if method == "s": res = std else: res = mean + float(method) * std else: raise ValueError(f"'method'={method} is invalid") return ( # Running mean taken from # https://stackoverflow.com/questions/13728392/moving-average-or-running-mean np.convolve( res, np.ones((meanlength, )) / meanlength, mode="valid") if isvalid(meanlength) else res)
def unregister(self, proba_pos: int) -> None: """Unregister object located at the given storage index. @param proba_pos: storage index of the object @param proba_pos: int """ if isvalid(proba_pos): self.probtot -= self._problist[proba_pos] self._problist[proba_pos] = 0.0 self._mapobj[proba_pos] = None self._queue.append(proba_pos)
def data_resize(self, nbcol: float = invalidfloat) -> None: """Resize data size of hdf5 datasets. @param nbcol: number of column to resize to (if invalid, cutout empty lines) (Default value = invalidfloat) @type nbcol: float """ if not isvalid(nbcol): nbcol = MPI_STATUS.max(self.currentcol) self.data.resize(nbcol, axis=2) for datamap in self.maps.values(): datamap.resize(nbcol + 1, axis=2)
def update(self, proba_pos: int, proba: float) -> None: """Update the probability of an object. @param proba_pos: storage index of the object @param proba_pos: int @param proba: new probability @type proba: float """ # assertion shall greatly reduce perf for non-optimized python code! assert isvalid(proba_pos) # get proba change from the event delta = proba - self._problist[proba_pos] # Set the new probability of the event self._problist[proba_pos] = proba # Update the probability of the proba sum self.probtot += delta
def __init__( self, filename: str, maxstrlen: int = 256, lengrow: int = 10, timeformat: str = "%H:%M:%S, %d/%m/%y", ) -> None: """Open the hdf5 result file. @param filename: name of the hdf5 file @type filename: str @param maxstrlen: maximum length of strings stored in the file (Default value = 256) @type maxlength: int @param lengrow: maximal remaining data space left empty before adding more space (default value = 10) @type lengrow: int @param timeformat: time/date formatting string @type timeformat: str """ if not isvalid(filename) or filename == "": raise FileNotFoundError(f"Plese enter a valid output file name") self.filename: str = MPI_STATUS.bcast(filename) """name of hdf5 file""" self.maxstrlen: int = maxstrlen """maximum length of strings stored in the file""" self.lengrow: int = lengrow """maximal remaining data space left empty before adding more space""" self.timeformat: str = timeformat """time/date formatting string""" self.h5file: File """hdf5 file object""" try: if MPI_STATUS.ismpi: self.h5file = File(filename, "w", driver="mpio", comm=MPI_STATUS.comm) else: self.h5file = File(filename, "w") except OSError as err: raise FileCreationError(f"'{filename}': {err}") except ValueError as err: raise FileNotFoundError(f"Couldn't find file {filename} : {err}") # result data self._init_stat: bool = False """flag indicating if the writer is initialized""" self.nbcol: int """Available number of columns for writing data""" self.dcol: int """Number of column increment to add when space goes missing""" self.run: Group """hdf5 Group 'Run' for storing generic run informations""" self.params: Group """hdf5 Group 'Parameters' for storing run parameters""" self.statparam: Group """hdf5 Group 'Stats' for storing statistics parameters""" self.mapparam: Group """hdf5 Group 'Maps' for storing maps parameters""" self.ruleparam: Group """hdf5 Group 'Rules' for storing ruleset parameters""" self.dataset: Group """hdf5 Group 'Dataset' for storing result data""" self.data: Dataset """hdf5 Dataset 'Dataset/results' (recording results)""" self.end: Dataset """hdf5 Dataset 'Dataset/end' (recording end messages)""" self.snapshots: Group """hdf5 Group 'Snapshots' for storing snapshots""" self.timesnap: Dataset """hdf5 Dataset 'Snapshots/time' (times of snapshots)""" self.compsnap: Dataset """hdf5 Dataset 'Snapshots/compounds' (compounds snapshots)""" self.reacsnap: Dataset """hdf5 Dataset 'Snapshots/reactions' (reactions snapshots)""" self.reacsnapsaved: Dataset """hdf5 Dataset 'Snapshots/reactions_saved' (were reactions snapshotted?)""" self._snapsized: bool """If True, file space have been correctly sized for storing snapshots""" self.maps: Group """hdf5 Group 'Maps' for storing maps statistics""" self.currentcol: int """Cuurent column to save data""" # Logging data self._init_log: bool = False """flag indicating if logging into the writer is initialized""" self.maxlog: int """maximum log line to be recorded in file""" self.dlog: int """log line increment to add in file when space goes missing""" self.logging: Group """hdf5 Group 'Logging' for storing log data""" self.logcount: Dataset """hdf5 Dataset 'Logging/count' (number of recorded log lines)""" self.logs: Dataset """hdf5 Dataset 'Logging/logs' (recorded log lines)"""