def get_feature(key, data, V=None, scales=None): """Reconstruct a spatial statistical feature from data, unprojecting and unscaling if needed. Parameters ---------- key : str Which statistical feature to calculate (T_mean, CH4_sum, etc.) data : (r,nt) or (config.DOF*config.NUM_ROMVARS,nt) ndarray Data from which to extract the features, either the output of a ROM or a high-dimensional data set. V : (config.DOF*config.NUM_ROMVARS,r) ndarray or None Rank-r POD basis. Only needed if data is low-dimensional ROM output. scales : (config.NUM_ROMVARS,2) ndarray or None Information for how the data was scaled (see data_processing.scale()). Only needed if `data` is low-dimensional ROM output. Returns ------- feature : (nt,) ndarray The specified statistical feature. """ var, action = key.split('_') print(f"{action}({var})", end='..', flush=True) if V is not None and scales is not None: variable = dproc.unscale(dproc.getvar(var, V) @ data, scales, var) else: variable = dproc.getvar(var, data) return eval(f"variable.{action}(axis=0)")
def save_statistical_features(): """Compute the (spatial) mean temperatures on the full time domain and save them for later. This only needs to be done once. """ # Load the full data set. gems_data, t = utils.load_gems_data() # Lift the data (convert to molar concentrations). with utils.timed_block("Lifting GEMS data"): lifted_data = dproc.lift(gems_data) # Compute statistical features. with utils.timed_block("Computing statistical features of variables"): mins, maxs, sums, stds, means = {}, {}, {}, {}, {} for var in config.ROM_VARIABLES: val = dproc.getvar(var, lifted_data) mins[var] = val.min(axis=0) maxs[var] = val.max(axis=0) sums[var] = val.sum(axis=0) stds[var] = val.std(axis=0) means[var] = sums[var] / val.shape[0] # Save the data. data_path = config.statistical_features_path() with utils.timed_block("Saving statistical features"): with h5py.File(data_path, 'w') as hf: for var in config.ROM_VARIABLES: hf.create_dataset(f"{var}_min", data=mins[var]) hf.create_dataset(f"{var}_max", data=maxs[var]) hf.create_dataset(f"{var}_sum", data=sums[var]) hf.create_dataset(f"{var}_std", data=stds[var]) hf.create_dataset(f"{var}_mean", data=means[var]) hf.create_dataset("time", data=t) logging.info(f"Statistical features saved to {data_path}")
def save_statistical_features(): """Compute the spatial and temporal statistics (min, max, mean, etc.) for each variable and save them for later. This only needs to be done once. """ # Load the full data set. gems_data, t = utils.load_gems_data() # Lift the data (convert to molar concentrations). with utils.timed_block("Lifting GEMS data"): lifted_data = dproc.lift(gems_data) # Compute statistical features. with utils.timed_block("Computing statistical features of variables"): mins, maxs, sums, stds, means = {}, {}, {}, {}, {} for var in config.ROM_VARIABLES: val = dproc.getvar(var, lifted_data) for axis, label in enumerate(["space", "time"]): name = f"{label}/{var}" print(f"\n\tmin_{label}({var})", end='..', flush=True) mins[name] = val.min(axis=axis) print(f"max_{label}({var})", end='..', flush=True) maxs[name] = val.max(axis=axis) print(f"sum_{label}({var})", end='..', flush=True) sums[name] = val.sum(axis=axis) print(f"std_{label}({var})", end='..', flush=True) stds[name] = val.std(axis=axis) means[f"space/{var}"] = sums[f"space/{var}"] / val.shape[0] means[f"time/{var}"] = sums[f"time/{var}"] / t.size # Save the data. data_path = config.statistical_features_path() with utils.timed_block("Saving statistical features"): with h5py.File(data_path, 'w') as hf: for var in config.ROM_VARIABLES: for prefix in ["space", "time"]: name = f"{prefix}/{var}" hf.create_dataset(f"{name}_min", data=mins[name]) hf.create_dataset(f"{name}_max", data=maxs[name]) hf.create_dataset(f"{name}_sum", data=sums[name]) hf.create_dataset(f"{name}_std", data=stds[name]) hf.create_dataset(f"{name}_mean", data=means[name]) hf.create_dataset("t", data=t) logging.info(f"Statistical features saved to {data_path}")
def test_scalers(lifted_data): """Test data_processing.scale() and data_processing.unscale(), including checking that they are inverses. """ # Shift the test data (learning the scaling simultaneously). with utils.timed_block("Scaling lifted test data"): shifted_data, scales = dproc.scale(lifted_data.copy()) assert np.allclose(scales[:, -2:], config.SCALE_TO) # Verify the scales and that the shift worked for each variable. with utils.timed_block("Verifying shift results with scales"): for i, v in enumerate(config.ROM_VARIABLES): s = slice(i * config.DOF, (i + 1) * config.DOF) if v in ["vx", "vy"]: assert -scales[i, 0] == scales[i, 1] assert scales[i, 1] == np.abs(lifted_data[s]).max() assert np.isclose(np.abs(shifted_data[s]).max(), 1) else: assert lifted_data[s].min() == scales[i, 0] assert lifted_data[s].max() == scales[i, 1] assert np.isclose(shifted_data[s].min(), scales[i, 2]) assert np.isclose(shifted_data[s].max(), scales[i, 3]) # Redo the shift with the given scales and compare the results. with utils.timed_block("Verifying repeat shift with given scales"): shifted_data2, _ = dproc.scale(lifted_data.copy(), scales) assert np.allclose(shifted_data2, shifted_data) # Undo the shift and compare the results. with utils.timed_block("Verifying inverse scaling"): unshifted_data = dproc.unscale(shifted_data, scales) assert np.allclose(unshifted_data, lifted_data) # Check the inverse property for a subset of the variables. with utils.timed_block("Repeating experiment with nontrivial varindices"): variables = np.random.choice(config.ROM_VARIABLES, size=4, replace=False) subset = np.vstack([dproc.getvar(v, lifted_data) for v in variables]) shifted_subset, _ = dproc.scale(subset.copy(), scales, variables) unshifted_subset = dproc.unscale(shifted_subset, scales, variables) assert np.allclose(unshifted_subset, subset)
def errors_in_time(trainsize, r, regs, cutoff=60000): """Plot spatially averaged errors, and the projection error, in time. Parameters ---------- trainsize : int Number of snapshots used to train the ROM. r : int Dimension of the ROM. regs : two positive floats Regularization hyperparameters used to train the ROM. cutoff : int Numer of time steps to plot. """ # Load and simulate the ROM. t, V, scales, q_rom = simulate_rom(trainsize, r, regs, cutoff) # Load and lift the true results. data, _ = utils.load_gems_data(cols=cutoff) with utils.timed_block("Lifting GEMS data"): data_gems = dproc.lift(data[:, :cutoff]) del data # Shift and project the data (unscaling done later by chunk). with utils.timed_block("Projecting GEMS data to POD subspace"): data_shifted, _ = dproc.scale(data_gems.copy(), scales) data_proj = V.T @ data_shifted del data_shifted # Initialize the figure. fig, axes = plt.subplots(3, 3, figsize=(12, 6), sharex=True) # Compute and plot errors in each variable. for var, ax in zip(config.ROM_VARIABLES, axes.flat): with utils.timed_block(f"Reconstructing results for {var}"): Vvar = dproc.getvar(var, V) gems_var = dproc.getvar(var, data_gems) proj_var = dproc.unscale(Vvar @ data_proj, scales, var) pred_var = dproc.unscale(Vvar @ q_rom, scales, var) with utils.timed_block(f"Calculating error in {var}"): denom = np.abs(gems_var).max(axis=0) proj_error = np.mean(np.abs(proj_var - gems_var), axis=0) / denom pred_error = np.mean(np.abs(pred_var - gems_var), axis=0) / denom # Plot results. ax.plot(t, proj_error, '-', lw=1, label="Projection Error", c=config.GEMS_STYLE['color']) ax.plot(t, pred_error, '-', lw=1, label="ROM Error", c=config.ROM_STYLE['color']) ax.axvline(t[trainsize], color='k') ax.set_ylabel(config.VARTITLES[var]) # Format the figure. for ax in axes[-1, :]: ax.set_xlim(t[0], t[-1]) ax.set_xticks(np.arange(t[0], t[-1] + .001, .002)) ax.set_xlabel("Time [s]", fontsize=12) # Make legend centered below the subplots. fig.tight_layout(rect=[0, .1, 1, 1]) leg = axes[0, 0].legend(ncol=2, fontsize=14, loc="lower center", bbox_to_anchor=(.5, 0), bbox_transform=fig.transFigure) for line in leg.get_lines(): line.set_linestyle('-') line.set_linewidth(5) # Save the figure. utils.save_figure(f"errors" f"_{config.TRNFMT(trainsize)}" f"_{config.DIMFMT(r)}" f"_{config.REGFMT(regs)}.pdf")
def main(timeindices, variables=None, snaptype=["gems", "rom", "error"], trainsize=None, r=None, reg=None): """Convert a snapshot in .h5 format to a .dat file that matches the format of grid.dat. The new file is saved in `config.tecplot_path()` with the same filename and the new file extension .dat. Parameters ---------- timeindices : ndarray(int) or int Indices (one-based) in the full time domain of the snapshots to save. variables : str or list(str) The variables to scale, a subset of config.ROM_VARIABLES. Defaults to all variables. snaptype : {"rom", "gems", "error"} or list(str) Which kinds of snapshots to save. Options: * "gems": snapshots from the full-order GEMS data; * "rom": reconstructed snapshots produced by a ROM; * "error": absolute error between the full-order data and the reduced-order reconstruction. If "rom" or "error" are selected, the ROM is selected by the remaining arguments. trainsize : int Number of snapshots used to train the ROM. r : int Number of retained modes in the ROM. reg : float Regularization factor used to train the ROM. """ utils.reset_logger(trainsize) # Parse parameters. timeindices = np.sort(np.atleast_1d(timeindices)) simtime = timeindices.max() t = utils.load_time_domain(simtime + 1) # Parse the variables. if variables is None: variables = config.ROM_VARIABLES elif isinstance(variables, str): variables = [variables] varnames = '\n'.join(f'"{v}"' for v in variables) if isinstance(snaptype, str): snaptype = [snaptype] for stype in snaptype: if stype not in ("gems", "rom", "error"): raise ValueError(f"invalid snaptype '{stype}'") # Read the grid file. with utils.timed_block("Reading Tecplot grid data"): # Parse the header. grid_path = config.grid_data_path() with open(grid_path, 'r') as infile: grid = infile.read() if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF: raise RuntimeError(f"{grid_path} DOF and config.DOF do not match") num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0]) end_of_header = re.findall(r"DT=.*?\n", grid)[0] headersize = grid.find(end_of_header) + len(end_of_header) # Extract geometry information. grid_data = grid[headersize:].split() x = grid_data[:num_nodes] y = grid_data[num_nodes:2 * num_nodes] cell_volume = grid_data[2 * num_nodes:3 * num_nodes] connectivity = grid_data[3 * num_nodes:] # Extract full-order data if needed. if ("gems" in snaptype) or ("error" in snaptype): gems_data, _ = utils.load_gems_data(cols=timeindices) with utils.timed_block("Lifting selected snapshots of GEMS data"): lifted_data = dproc.lift(gems_data) true_snaps = np.concatenate( [dproc.getvar(v, lifted_data) for v in variables]) # Simulate ROM if needed. if ("rom" in snaptype) or ("error" in snaptype): # Load the SVD data. V, _ = utils.load_basis(trainsize, r) # Load the initial conditions and scales. X_, _, _, scales = utils.load_projected_data(trainsize, r) # Load the appropriate ROM. rom = utils.load_rom(trainsize, r, reg) # Simulate the ROM over the time domain. with utils.timed_block(f"Simulating ROM with r={r:d}, reg={reg:.0e}"): x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45") if np.any(np.isnan(x_rom)) or x_rom.shape[1] < simtime: raise ValueError("ROM unstable!") # Reconstruct the results (only selected variables / snapshots). with utils.timed_block("Reconstructing simulation results"): x_rec = dproc.unscale(V[:, :r] @ x_rom[:, timeindices], scales) x_rec = np.concatenate([dproc.getvar(v, x_rec) for v in variables]) dsets = {} if "rom" in snaptype: dsets["rom"] = x_rec if "gems" in snaptype: dsets["gems"] = true_snaps if "error" in snaptype: with utils.timed_block("Computing absolute error of reconstruction"): abs_err = np.abs(true_snaps - x_rec) dsets["error"] = abs_err # Save each of the selected snapshots in Tecplot format matching grid.dat. for j, tindex in enumerate(timeindices): header = HEADER.format(varnames, tindex, t[tindex], num_nodes, config.DOF, len(variables) + 2, "SINGLE " * len(variables)) for label, dset in dsets.items(): if label == "gems": save_path = config.gems_snapshot_path(tindex) if label in ("rom", "error"): folder = config.rom_snapshot_path(trainsize, r, reg) save_path = os.path.join(folder, f"{label}_{tindex:05d}.dat") with utils.timed_block(f"Writing {label} snapshot {tindex:05d}"): with open(save_path, 'w') as outfile: # Write the header. outfile.write(header) # Write the geometry data (x,y coordinates). for i in range(0, len(x), NCOLS): outfile.write(' '.join(x[i:i + NCOLS]) + '\n') for i in range(0, len(y), NCOLS): outfile.write(' '.join(y[i:i + NCOLS]) + '\n') # Write the data for each variable. for i in range(0, dset.shape[0], NCOLS): row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS, j]) outfile.write(row + '\n') # Write connectivity information. for i in range(0, len(connectivity), NCOLS): outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
def test_getvar(lifted_data): """Test data_processing.getvar().""" with utils.timed_block("Verifying variable extraction"): for i, v in enumerate(config.ROM_VARIABLES): s = slice(i * config.DOF, (i + 1) * config.DOF) assert np.all(dproc.getvar(v, lifted_data) == lifted_data[s])
def basis(trainsize, r, variables=None): """Export the POD basis vectors to Tecplot format. Parameters ---------- trainsize : int Number of snapshots used to compute the basis. r : int Number of basis vectors to save. variables : str or list(str) Variables to save, a subset of config.ROM_VARIABLES. Defaults to all variables. """ utils.reset_logger(trainsize) if variables is None: variables = config.ROM_VARIABLES elif isinstance(variables, str): variables = [variables] varnames = '\n'.join(f'"{v}"' for v in variables) # Read the grid file. with utils.timed_block("Reading Tecplot grid data"): # Parse the header. grid_path = config.grid_data_path() with open(grid_path, 'r') as infile: grid = infile.read() if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF: raise RuntimeError(f"{grid_path} DOF and config.DOF do not match") num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0]) end_of_header = re.findall(r"DT=.*?\n", grid)[0] headersize = grid.find(end_of_header) + len(end_of_header) # Extract geometry information. grid_data = grid[headersize:].split() x = grid_data[:num_nodes] y = grid_data[num_nodes:2 * num_nodes] # cell_volume = grid_data[2*num_nodes:3*num_nodes] connectivity = grid_data[3 * num_nodes:] # Load the basis and extract desired variables. V, _, _ = utils.load_basis(trainsize, r) V = np.concatenate([dproc.getvar(var, V) for var in variables]) # Save each of the basis vectors in Tecplot format matching grid.dat. for j in range(r): header = HEADER.format(varnames, j, j, num_nodes, config.DOF, len(variables) + 2, "DOUBLE " * len(variables)) save_folder = config._makefolder(config.tecplot_path(), "basis", config.TRNFMT(trainsize)) save_path = os.path.join(save_folder, f"vec_{j+1:03d}.dat") with utils.timed_block(f"Writing basis vector {j+1:d}"): with open(save_path, 'w') as outfile: # Write the header. outfile.write(header) # Write the geometry data (x,y coordinates). for i in range(0, len(x), NCOLS): outfile.write(' '.join(x[i:i + NCOLS]) + '\n') for i in range(0, len(y), NCOLS): outfile.write(' '.join(y[i:i + NCOLS]) + '\n') # Write the data for each variable. for i in range(0, V.shape[0], NCOLS): row = ' '.join(f"{v:.9E}" for v in V[i:i + NCOLS, j]) outfile.write(row + '\n') # Write connectivity information. for i in range(0, len(connectivity), NCOLS): outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n') print(f"Basis info exported to {save_folder}/*.dat.")
def temperature_average(trainsize, r, reg, cutoff=60000): """Get the average-in-time temperature profile for the GEMS data and a specific ROM. Parameters ---------- trainsize : int Number of snapshots used to train the ROM. r : int Dimension of the ROM. reg : float Regularization hyperparameters used to train the ROM. cutoff : int Number of time steps to average over. """ utils.reset_logger(trainsize) # Read the grid file. with utils.timed_block("Reading Tecplot grid data"): # Parse the header. grid_path = config.grid_data_path() with open(grid_path, 'r') as infile: grid = infile.read() if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF: raise RuntimeError(f"{grid_path} DOF and config.DOF do not match") num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0]) end_of_header = re.findall(r"DT=.*?\n", grid)[0] headersize = grid.find(end_of_header) + len(end_of_header) # Extract geometry information. grid_data = grid[headersize:].split() x = grid_data[:num_nodes] y = grid_data[num_nodes:2 * num_nodes] # cell_volume = grid_data[2*num_nodes:3*num_nodes] connectivity = grid_data[3 * num_nodes:] # Compute full-order time-averaged temperature from GEMS data. _s = config.DOF * config.GEMS_VARIABLES.index("T") gems_data, _ = utils.load_gems_data(rows=slice(_s, _s + config.DOF), cols=cutoff) with utils.timed_block("Computing time-averaged GEMS temperature"): T_gems = gems_data.mean(axis=1) assert T_gems.shape == (config.DOF, ) # Simulate ROM and compute the time-averaged temperature. t, V, scales, q_rom = step4.simulate_rom(trainsize, r, reg, steps=cutoff) with utils.timed_block("Reconstructing ROM simulation results"): T_rom = dproc.unscale(dproc.getvar("T", V) @ q_rom, scales, "T") T_rom = T_rom.mean(axis=1) assert T_rom.shape == (config.DOF, ) header = HEADER.format('"T"', 0, 0, num_nodes, config.DOF, 3, "DOUBLE " * 3) header = header.replace("VARLOCATION=([3-3]", "VARLOCATION=([3]") for label, dset in zip(["gems", "rom"], [T_gems, T_rom]): if label == "gems": save_path = os.path.join(config.tecplot_path(), "gems", "temperature_average.dat") elif label == "rom": folder = config.rom_snapshot_path(trainsize, r, reg) save_path = os.path.join(folder, "temperature_average.dat") with utils.timed_block(f"Writing {label} temperature average"): with open(save_path, 'w') as outfile: # Write the header. outfile.write(header) # Write the geometry data (x,y coordinates). for i in range(0, len(x), NCOLS): outfile.write(' '.join(x[i:i + NCOLS]) + '\n') for i in range(0, len(y), NCOLS): outfile.write(' '.join(y[i:i + NCOLS]) + '\n') # Write the data for each variable. for i in range(0, dset.shape[0], NCOLS): row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS]) outfile.write(row + '\n') # Write connectivity information. for i in range(0, len(connectivity), NCOLS): outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')