def main(timeindices, variables=None, snaptype=["gems", "rom", "error"], trainsize=None, r=None, reg=None): """Convert a snapshot in .h5 format to a .dat file that matches the format of grid.dat. The new file is saved in `config.tecplot_path()` with the same filename and the new file extension .dat. Parameters ---------- timeindices : ndarray(int) or int Indices (one-based) in the full time domain of the snapshots to save. variables : str or list(str) The variables to scale, a subset of config.ROM_VARIABLES. Defaults to all variables. snaptype : {"rom", "gems", "error"} or list(str) Which kinds of snapshots to save. Options: * "gems": snapshots from the full-order GEMS data; * "rom": reconstructed snapshots produced by a ROM; * "error": absolute error between the full-order data and the reduced-order reconstruction. If "rom" or "error" are selected, the ROM is selected by the remaining arguments. trainsize : int Number of snapshots used to train the ROM. r : int Number of retained modes in the ROM. reg : float Regularization factor used to train the ROM. """ utils.reset_logger(trainsize) # Parse parameters. timeindices = np.sort(np.atleast_1d(timeindices)) simtime = timeindices.max() t = utils.load_time_domain(simtime + 1) # Parse the variables. if variables is None: variables = config.ROM_VARIABLES elif isinstance(variables, str): variables = [variables] varnames = '\n'.join(f'"{v}"' for v in variables) if isinstance(snaptype, str): snaptype = [snaptype] for stype in snaptype: if stype not in ("gems", "rom", "error"): raise ValueError(f"invalid snaptype '{stype}'") # Read the grid file. with utils.timed_block("Reading Tecplot grid data"): # Parse the header. grid_path = config.grid_data_path() with open(grid_path, 'r') as infile: grid = infile.read() if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF: raise RuntimeError(f"{grid_path} DOF and config.DOF do not match") num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0]) end_of_header = re.findall(r"DT=.*?\n", grid)[0] headersize = grid.find(end_of_header) + len(end_of_header) # Extract geometry information. grid_data = grid[headersize:].split() x = grid_data[:num_nodes] y = grid_data[num_nodes:2 * num_nodes] cell_volume = grid_data[2 * num_nodes:3 * num_nodes] connectivity = grid_data[3 * num_nodes:] # Extract full-order data if needed. if ("gems" in snaptype) or ("error" in snaptype): gems_data, _ = utils.load_gems_data(cols=timeindices) with utils.timed_block("Lifting selected snapshots of GEMS data"): lifted_data = dproc.lift(gems_data) true_snaps = np.concatenate( [dproc.getvar(v, lifted_data) for v in variables]) # Simulate ROM if needed. if ("rom" in snaptype) or ("error" in snaptype): # Load the SVD data. V, _ = utils.load_basis(trainsize, r) # Load the initial conditions and scales. X_, _, _, scales = utils.load_projected_data(trainsize, r) # Load the appropriate ROM. rom = utils.load_rom(trainsize, r, reg) # Simulate the ROM over the time domain. with utils.timed_block(f"Simulating ROM with r={r:d}, reg={reg:.0e}"): x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45") if np.any(np.isnan(x_rom)) or x_rom.shape[1] < simtime: raise ValueError("ROM unstable!") # Reconstruct the results (only selected variables / snapshots). with utils.timed_block("Reconstructing simulation results"): x_rec = dproc.unscale(V[:, :r] @ x_rom[:, timeindices], scales) x_rec = np.concatenate([dproc.getvar(v, x_rec) for v in variables]) dsets = {} if "rom" in snaptype: dsets["rom"] = x_rec if "gems" in snaptype: dsets["gems"] = true_snaps if "error" in snaptype: with utils.timed_block("Computing absolute error of reconstruction"): abs_err = np.abs(true_snaps - x_rec) dsets["error"] = abs_err # Save each of the selected snapshots in Tecplot format matching grid.dat. for j, tindex in enumerate(timeindices): header = HEADER.format(varnames, tindex, t[tindex], num_nodes, config.DOF, len(variables) + 2, "SINGLE " * len(variables)) for label, dset in dsets.items(): if label == "gems": save_path = config.gems_snapshot_path(tindex) if label in ("rom", "error"): folder = config.rom_snapshot_path(trainsize, r, reg) save_path = os.path.join(folder, f"{label}_{tindex:05d}.dat") with utils.timed_block(f"Writing {label} snapshot {tindex:05d}"): with open(save_path, 'w') as outfile: # Write the header. outfile.write(header) # Write the geometry data (x,y coordinates). for i in range(0, len(x), NCOLS): outfile.write(' '.join(x[i:i + NCOLS]) + '\n') for i in range(0, len(y), NCOLS): outfile.write(' '.join(y[i:i + NCOLS]) + '\n') # Write the data for each variable. for i in range(0, dset.shape[0], NCOLS): row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS, j]) outfile.write(row + '\n') # Write connectivity information. for i in range(0, len(connectivity), NCOLS): outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
def basis(trainsize, r, variables=None): """Export the POD basis vectors to Tecplot format. Parameters ---------- trainsize : int Number of snapshots used to compute the basis. r : int Number of basis vectors to save. variables : str or list(str) Variables to save, a subset of config.ROM_VARIABLES. Defaults to all variables. """ utils.reset_logger(trainsize) if variables is None: variables = config.ROM_VARIABLES elif isinstance(variables, str): variables = [variables] varnames = '\n'.join(f'"{v}"' for v in variables) # Read the grid file. with utils.timed_block("Reading Tecplot grid data"): # Parse the header. grid_path = config.grid_data_path() with open(grid_path, 'r') as infile: grid = infile.read() if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF: raise RuntimeError(f"{grid_path} DOF and config.DOF do not match") num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0]) end_of_header = re.findall(r"DT=.*?\n", grid)[0] headersize = grid.find(end_of_header) + len(end_of_header) # Extract geometry information. grid_data = grid[headersize:].split() x = grid_data[:num_nodes] y = grid_data[num_nodes:2 * num_nodes] # cell_volume = grid_data[2*num_nodes:3*num_nodes] connectivity = grid_data[3 * num_nodes:] # Load the basis and extract desired variables. V, _, _ = utils.load_basis(trainsize, r) V = np.concatenate([dproc.getvar(var, V) for var in variables]) # Save each of the basis vectors in Tecplot format matching grid.dat. for j in range(r): header = HEADER.format(varnames, j, j, num_nodes, config.DOF, len(variables) + 2, "DOUBLE " * len(variables)) save_folder = config._makefolder(config.tecplot_path(), "basis", config.TRNFMT(trainsize)) save_path = os.path.join(save_folder, f"vec_{j+1:03d}.dat") with utils.timed_block(f"Writing basis vector {j+1:d}"): with open(save_path, 'w') as outfile: # Write the header. outfile.write(header) # Write the geometry data (x,y coordinates). for i in range(0, len(x), NCOLS): outfile.write(' '.join(x[i:i + NCOLS]) + '\n') for i in range(0, len(y), NCOLS): outfile.write(' '.join(y[i:i + NCOLS]) + '\n') # Write the data for each variable. for i in range(0, V.shape[0], NCOLS): row = ' '.join(f"{v:.9E}" for v in V[i:i + NCOLS, j]) outfile.write(row + '\n') # Write connectivity information. for i in range(0, len(connectivity), NCOLS): outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n') print(f"Basis info exported to {save_folder}/*.dat.")
def main(data_folder, overwrite=False, serial=False): """Extract snapshot data, in parallel, from the .tar files in the specified folder of the form Data_<first-snapshot>to<last-snapshot>.tar. Parameters ---------- data_folder : str Path to the folder that contains the raw GEMS .tar data files, preferably as an absolute path (e.g., /path/to/folder). overwrite : bool If False and the snapshot matrix file exists, raise an error. If True, overwrite the existing snapshot matrix file if it exists. serial : bool If True, do the unpacking sequentially in 10,000 snapshot chunks. If False, do the unpacking in parallel with 10,000 snapshot chunks. """ utils.reset_logger() # If it exists, copy the grid file to the Tecplot data directory. source = os.path.join(data_folder, config.GRID_FILE) if os.path.isfile(source): target = config.grid_data_path() with utils.timed_block(f"Copying {source} to {target}"): shutil.copy(source, target) else: logging.warning(f"Grid file {source} not found!") # Locate and sort raw .tar files. target_pattern = os.path.join(data_folder, "Data_*to*.tar") tarfiles = sorted(glob.glob(target_pattern)) if not tarfiles: raise FileNotFoundError(target_pattern) # Get the snapshot indices corresponding to each file from the file names. starts, stops = [], [] for i,tfile in enumerate(tarfiles): matches = re.findall(r"Data_(\d+)to(\d+).tar", tfile) if not matches: raise ValueError(f"file {tfile} not named with convention " "Data_<first-snapshot>to<last-snapshot>.tar") start, stop = [int(d) for d in matches[0]] if i == 0: start0 = start # Offset starts.append(start - start0) stops.append(stop + 1 - start0) if i > 0 and stops[i-1] != starts[i]: raise ValueError(f"file {tfile} not continuous from previous set") num_snapshots = stops[-1] # Create an empty HDF5 file of appropriate size for the data. save_path = config.gems_data_path() if os.path.isfile(save_path) and not overwrite: raise FileExistsError(f"{save_path} (use --overwrite to overwrite)") with utils.timed_block("Initializing HDF5 file for data"): with h5py.File(save_path, 'w') as hf: hf.create_dataset("data", shape=(config.DOF*config.NUM_GEMSVARS, num_snapshots), dtype=np.float64) hf.create_dataset("time", shape=(num_snapshots,), dtype=np.float64) logging.info(f"Data file initialized as {save_path}.") # Read the files in chunks. args = zip(tarfiles, starts, stops) if serial: # Read the files serially (sequentially). for tf, start, stop in args: _read_tar_and_save_data(tf, start, stop, parallel=False) else: # Read the files in parallel. with mp.Pool(initializer=_globalize_lock, initargs=(mp.Lock(),), processes=min([len(tarfiles), mp.cpu_count()])) as pool: pool.starmap(_read_tar_and_save_data, args)
def temperature_average(trainsize, r, reg, cutoff=60000): """Get the average-in-time temperature profile for the GEMS data and a specific ROM. Parameters ---------- trainsize : int Number of snapshots used to train the ROM. r : int Dimension of the ROM. reg : float Regularization hyperparameters used to train the ROM. cutoff : int Number of time steps to average over. """ utils.reset_logger(trainsize) # Read the grid file. with utils.timed_block("Reading Tecplot grid data"): # Parse the header. grid_path = config.grid_data_path() with open(grid_path, 'r') as infile: grid = infile.read() if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF: raise RuntimeError(f"{grid_path} DOF and config.DOF do not match") num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0]) end_of_header = re.findall(r"DT=.*?\n", grid)[0] headersize = grid.find(end_of_header) + len(end_of_header) # Extract geometry information. grid_data = grid[headersize:].split() x = grid_data[:num_nodes] y = grid_data[num_nodes:2 * num_nodes] # cell_volume = grid_data[2*num_nodes:3*num_nodes] connectivity = grid_data[3 * num_nodes:] # Compute full-order time-averaged temperature from GEMS data. _s = config.DOF * config.GEMS_VARIABLES.index("T") gems_data, _ = utils.load_gems_data(rows=slice(_s, _s + config.DOF), cols=cutoff) with utils.timed_block("Computing time-averaged GEMS temperature"): T_gems = gems_data.mean(axis=1) assert T_gems.shape == (config.DOF, ) # Simulate ROM and compute the time-averaged temperature. t, V, scales, q_rom = step4.simulate_rom(trainsize, r, reg, steps=cutoff) with utils.timed_block("Reconstructing ROM simulation results"): T_rom = dproc.unscale(dproc.getvar("T", V) @ q_rom, scales, "T") T_rom = T_rom.mean(axis=1) assert T_rom.shape == (config.DOF, ) header = HEADER.format('"T"', 0, 0, num_nodes, config.DOF, 3, "DOUBLE " * 3) header = header.replace("VARLOCATION=([3-3]", "VARLOCATION=([3]") for label, dset in zip(["gems", "rom"], [T_gems, T_rom]): if label == "gems": save_path = os.path.join(config.tecplot_path(), "gems", "temperature_average.dat") elif label == "rom": folder = config.rom_snapshot_path(trainsize, r, reg) save_path = os.path.join(folder, "temperature_average.dat") with utils.timed_block(f"Writing {label} temperature average"): with open(save_path, 'w') as outfile: # Write the header. outfile.write(header) # Write the geometry data (x,y coordinates). for i in range(0, len(x), NCOLS): outfile.write(' '.join(x[i:i + NCOLS]) + '\n') for i in range(0, len(y), NCOLS): outfile.write(' '.join(y[i:i + NCOLS]) + '\n') # Write the data for each variable. for i in range(0, dset.shape[0], NCOLS): row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS]) outfile.write(row + '\n') # Write connectivity information. for i in range(0, len(connectivity), NCOLS): outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')