def save_statistical_features(): """Compute the (spatial) mean temperatures on the full time domain and save them for later. This only needs to be done once. """ # Load the full data set. gems_data, t = utils.load_gems_data() # Lift the data (convert to molar concentrations). with utils.timed_block("Lifting GEMS data"): lifted_data = dproc.lift(gems_data) # Compute statistical features. with utils.timed_block("Computing statistical features of variables"): mins, maxs, sums, stds, means = {}, {}, {}, {}, {} for var in config.ROM_VARIABLES: val = dproc.getvar(var, lifted_data) mins[var] = val.min(axis=0) maxs[var] = val.max(axis=0) sums[var] = val.sum(axis=0) stds[var] = val.std(axis=0) means[var] = sums[var] / val.shape[0] # Save the data. data_path = config.statistical_features_path() with utils.timed_block("Saving statistical features"): with h5py.File(data_path, 'w') as hf: for var in config.ROM_VARIABLES: hf.create_dataset(f"{var}_min", data=mins[var]) hf.create_dataset(f"{var}_max", data=maxs[var]) hf.create_dataset(f"{var}_sum", data=sums[var]) hf.create_dataset(f"{var}_std", data=stds[var]) hf.create_dataset(f"{var}_mean", data=means[var]) hf.create_dataset("time", data=t) logging.info(f"Statistical features saved to {data_path}")
def load_statistical_features(keys, k=None): """Load statistical features of the lifted data, computed over the spatial domain at each point in time. Parameters ---------- keys : list(str) Which data set(s) to load. Options: * {var}_min : minimum of variable var * {var}_max : maximum of variable var * {var}_sum : sum (integral) of variable var * {var}_std : standard deviation of variable var * {var}_mean : mean of variable var Here var is a member of config.ROM_VARIABLES. Examples: * "T_mean" -> mean temperature * "vx_min" -> minimum x-velocity * "CH4_sum" -> methane molar concentration integral k : int, slice, or one-dimensional ndarray of sorted integer indices Number of time steps of data to load (default all). Returns ------- features : dict(str -> (k,) ndarray) or (k,) ndarray Dictionary of statistical feature arrays with keys `keys`. If only one key is given, return the actual array, not a dict. t : (k,) ndarray Time domain corresponding to the statistical features. """ # Locate the data. data_path = _checkexists(config.statistical_features_path()) # Parse arguments. if isinstance(keys, str): keys = [keys] elif keys is None: keys = ["T_mean"] + [f"{spc}_int" for spc in config.SPECIES] if np.isscalar(k) or k is None: k = slice(None, k) # Extract the data. features = {} with timed_block(f"Loading statistical features from {data_path}"): with h5py.File(data_path, 'r') as hf: if len(keys) == 1: return hf[keys[0]][k], hf["time"][k] else: return {key: hf[key][k] for key in keys}, hf["time"][k]
def save_statistical_features(): """Compute the spatial and temporal statistics (min, max, mean, etc.) for each variable and save them for later. This only needs to be done once. """ # Load the full data set. gems_data, t = utils.load_gems_data() # Lift the data (convert to molar concentrations). with utils.timed_block("Lifting GEMS data"): lifted_data = dproc.lift(gems_data) # Compute statistical features. with utils.timed_block("Computing statistical features of variables"): mins, maxs, sums, stds, means = {}, {}, {}, {}, {} for var in config.ROM_VARIABLES: val = dproc.getvar(var, lifted_data) for axis, label in enumerate(["space", "time"]): name = f"{label}/{var}" print(f"\n\tmin_{label}({var})", end='..', flush=True) mins[name] = val.min(axis=axis) print(f"max_{label}({var})", end='..', flush=True) maxs[name] = val.max(axis=axis) print(f"sum_{label}({var})", end='..', flush=True) sums[name] = val.sum(axis=axis) print(f"std_{label}({var})", end='..', flush=True) stds[name] = val.std(axis=axis) means[f"space/{var}"] = sums[f"space/{var}"] / val.shape[0] means[f"time/{var}"] = sums[f"time/{var}"] / t.size # Save the data. data_path = config.statistical_features_path() with utils.timed_block("Saving statistical features"): with h5py.File(data_path, 'w') as hf: for var in config.ROM_VARIABLES: for prefix in ["space", "time"]: name = f"{prefix}/{var}" hf.create_dataset(f"{name}_min", data=mins[name]) hf.create_dataset(f"{name}_max", data=maxs[name]) hf.create_dataset(f"{name}_sum", data=sums[name]) hf.create_dataset(f"{name}_std", data=stds[name]) hf.create_dataset(f"{name}_mean", data=means[name]) hf.create_dataset("t", data=t) logging.info(f"Statistical features saved to {data_path}")
def main(trainsize, r, regs, elems=None, plotPointTrace=False, plotRelativeErrors=False, plotSpatialStatistics=False): """Make the indicated visualization. Parameters ---------- trainsize : int Number of snapshots used to train the ROM. r : int Dimension of the ROM. regs : two positive floats Regularization hyperparameters used to train the ROM. elems : list(int) or ndarray(int) Indices in the spatial domain at which to compute time traces. """ utils.reset_logger(trainsize) # Point traces in time. if plotPointTrace: logging.info("POINT TRACES") point_traces(trainsize, r, regs, elems) # Relative projection / prediction errors in time. if plotRelativeErrors: logging.info("ERRORS IN TIME") errors_in_time(trainsize, r, regs) # Spatial statistic in time. if plotSpatialStatistics: logging.info("SPATIAL STATISTICS") # Compute GEMS features if needed (only done once). if not os.path.isfile(config.statistical_features_path()): save_statistical_features() spatial_statistics(trainsize, r, regs)
def main(trainsize, r, reg, elems, plotTimeTrace=False, plotStatisticalFeatures=False): """Make the indicated visualization. Parameters ---------- trainsize : int Number of snapshots used to train the ROM. r : int Dimension of the ROM. This is also the number of retained POD modes (left singular vectors) used to project the training data. reg : float The regularization parameters used to train each ROM. elems : list(int) or ndarray(int) Indices in the spatial domain at which to compute time traces. """ utils.reset_logger(trainsize) # Time traces (single ROM, several monitoring locations). if plotTimeTrace: logging.info("TIME TRACES") time_traces(trainsize, r, reg, elems) # Statistical features (single ROM, several features). if plotStatisticalFeatures: logging.info("STATISTICAL FEATURES") # Compute GEMS features if needed (only done once). if not os.path.isfile(config.statistical_features_path()): save_statistical_features() statistical_features(trainsize, r, reg)
def load_temporal_statistics(keys): """Load statistical features of the lifted data, computed over the temporal domain at each spatial point. Parameters ---------- keys : list(str) Which data set(s) to load. Options: * {var}_min : minimum of variable var * {var}_max : maximum of variable var * {var}_sum : sum (integral) of variable var * {var}_std : standard deviation of variable var * {var}_mean : mean of variable var Here var is a member of config.ROM_VARIABLES. Examples: * "T_mean" -> time-averaged temperature * "vx_min" -> minimum x-velocity * "CH4_sum" -> methane molar concentration time integral Returns ------- features : dict(str -> (N,) ndarray) or (N,) ndarray Dictionary of statistical feature arrays with keys `keys`. If only one key is given, return the actual array, not a dict. """ # Locate the data. data_path = _checkexists(config.statistical_features_path()) # Parse arguments. if isinstance(keys, str): keys = [keys] # Extract the data. with timed_block(f"Loading statistical features from {data_path}"): with h5py.File(data_path, 'r') as hf: if len(keys) == 1: return hf[f"time/{keys[0]}"][:] return {key: hf[f"time/{key}"][:] for key in keys}