def main(timeindices,
         variables=None,
         snaptype=["gems", "rom", "error"],
         trainsize=None,
         r=None,
         reg=None):
    """Convert a snapshot in .h5 format to a .dat file that matches the format
    of grid.dat. The new file is saved in `config.tecplot_path()` with the same
    filename and the new file extension .dat.

    Parameters
    ----------
    timeindices : ndarray(int) or int
        Indices (one-based) in the full time domain of the snapshots to save.

    variables : str or list(str)
        The variables to scale, a subset of config.ROM_VARIABLES.
        Defaults to all variables.

    snaptype : {"rom", "gems", "error"} or list(str)
        Which kinds of snapshots to save. Options:
        * "gems": snapshots from the full-order GEMS data;
        * "rom": reconstructed snapshots produced by a ROM;
        * "error": absolute error between the full-order data
                   and the reduced-order reconstruction.
        If "rom" or "error" are selected, the ROM is selected by the
        remaining arguments.

    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Number of retained modes in the ROM.

    reg : float
        Regularization factor used to train the ROM.
    """
    utils.reset_logger(trainsize)

    # Parse parameters.
    timeindices = np.sort(np.atleast_1d(timeindices))
    simtime = timeindices.max()
    t = utils.load_time_domain(simtime + 1)

    # Parse the variables.
    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    if isinstance(snaptype, str):
        snaptype = [snaptype]
    for stype in snaptype:
        if stype not in ("gems", "rom", "error"):
            raise ValueError(f"invalid snaptype '{stype}'")

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        cell_volume = grid_data[2 * num_nodes:3 * num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Extract full-order data if needed.
    if ("gems" in snaptype) or ("error" in snaptype):
        gems_data, _ = utils.load_gems_data(cols=timeindices)
        with utils.timed_block("Lifting selected snapshots of GEMS data"):
            lifted_data = dproc.lift(gems_data)
            true_snaps = np.concatenate(
                [dproc.getvar(v, lifted_data) for v in variables])
    # Simulate ROM if needed.
    if ("rom" in snaptype) or ("error" in snaptype):
        # Load the SVD data.
        V, _ = utils.load_basis(trainsize, r)

        # Load the initial conditions and scales.
        X_, _, _, scales = utils.load_projected_data(trainsize, r)

        # Load the appropriate ROM.
        rom = utils.load_rom(trainsize, r, reg)

        # Simulate the ROM over the time domain.
        with utils.timed_block(f"Simulating ROM with r={r:d}, reg={reg:.0e}"):
            x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45")
            if np.any(np.isnan(x_rom)) or x_rom.shape[1] < simtime:
                raise ValueError("ROM unstable!")

        # Reconstruct the results (only selected variables / snapshots).
        with utils.timed_block("Reconstructing simulation results"):
            x_rec = dproc.unscale(V[:, :r] @ x_rom[:, timeindices], scales)
            x_rec = np.concatenate([dproc.getvar(v, x_rec) for v in variables])

    dsets = {}
    if "rom" in snaptype:
        dsets["rom"] = x_rec
    if "gems" in snaptype:
        dsets["gems"] = true_snaps
    if "error" in snaptype:
        with utils.timed_block("Computing absolute error of reconstruction"):
            abs_err = np.abs(true_snaps - x_rec)
        dsets["error"] = abs_err

    # Save each of the selected snapshots in Tecplot format matching grid.dat.
    for j, tindex in enumerate(timeindices):

        header = HEADER.format(varnames, tindex, t[tindex], num_nodes,
                               config.DOF,
                               len(variables) + 2, "SINGLE " * len(variables))
        for label, dset in dsets.items():

            if label == "gems":
                save_path = config.gems_snapshot_path(tindex)
            if label in ("rom", "error"):
                folder = config.rom_snapshot_path(trainsize, r, reg)
                save_path = os.path.join(folder, f"{label}_{tindex:05d}.dat")
            with utils.timed_block(f"Writing {label} snapshot {tindex:05d}"):
                with open(save_path, 'w') as outfile:
                    # Write the header.
                    outfile.write(header)

                    # Write the geometry data (x,y coordinates).
                    for i in range(0, len(x), NCOLS):
                        outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                    for i in range(0, len(y), NCOLS):
                        outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                    # Write the data for each variable.
                    for i in range(0, dset.shape[0], NCOLS):
                        row = ' '.join(f"{v:.9E}"
                                       for v in dset[i:i + NCOLS, j])
                        outfile.write(row + '\n')

                    # Write connectivity information.
                    for i in range(0, len(connectivity), NCOLS):
                        outfile.write(' '.join(connectivity[i:i + NCOLS]) +
                                      '\n')
Пример #2
0
def basis(trainsize, r, variables=None):
    """Export the POD basis vectors to Tecplot format.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to compute the basis.

    r : int
        Number of basis vectors to save.

    variables : str or list(str)
        Variables to save, a subset of config.ROM_VARIABLES.
        Defaults to all variables.
    """
    utils.reset_logger(trainsize)

    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Load the basis and extract desired variables.
    V, _, _ = utils.load_basis(trainsize, r)
    V = np.concatenate([dproc.getvar(var, V) for var in variables])

    # Save each of the basis vectors in Tecplot format matching grid.dat.
    for j in range(r):
        header = HEADER.format(varnames, j, j, num_nodes, config.DOF,
                               len(variables) + 2, "DOUBLE " * len(variables))
        save_folder = config._makefolder(config.tecplot_path(), "basis",
                                         config.TRNFMT(trainsize))
        save_path = os.path.join(save_folder, f"vec_{j+1:03d}.dat")
        with utils.timed_block(f"Writing basis vector {j+1:d}"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, V.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in V[i:i + NCOLS, j])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
    print(f"Basis info exported to {save_folder}/*.dat.")
def main(data_folder, overwrite=False, serial=False):
    """Extract snapshot data, in parallel, from the .tar files in the
    specified folder of the form Data_<first-snapshot>to<last-snapshot>.tar.

    Parameters
    ----------
    data_folder : str
        Path to the folder that contains the raw GEMS .tar data files,
        preferably as an absolute path (e.g., /path/to/folder).

    overwrite : bool
        If False and the snapshot matrix file exists, raise an error.
        If True, overwrite the existing snapshot matrix file if it exists.

    serial : bool
        If True, do the unpacking sequentially in 10,000 snapshot chunks.
        If False, do the unpacking in parallel with 10,000 snapshot chunks.
    """
    utils.reset_logger()

    # If it exists, copy the grid file to the Tecplot data directory.
    source = os.path.join(data_folder, config.GRID_FILE)
    if os.path.isfile(source):
        target = config.grid_data_path()
        with utils.timed_block(f"Copying {source} to {target}"):
            shutil.copy(source, target)
    else:
        logging.warning(f"Grid file {source} not found!")

    # Locate and sort raw .tar files.
    target_pattern = os.path.join(data_folder, "Data_*to*.tar")
    tarfiles = sorted(glob.glob(target_pattern))
    if not tarfiles:
        raise FileNotFoundError(target_pattern)

    # Get the snapshot indices corresponding to each file from the file names.
    starts, stops = [], []
    for i,tfile in enumerate(tarfiles):
        matches = re.findall(r"Data_(\d+)to(\d+).tar", tfile)
        if not matches:
            raise ValueError(f"file {tfile} not named with convention "
                             "Data_<first-snapshot>to<last-snapshot>.tar")
        start, stop = [int(d) for d in matches[0]]
        if i == 0:
            start0 = start  # Offset
        starts.append(start - start0)
        stops.append(stop + 1 - start0)

        if i > 0 and stops[i-1] != starts[i]:
            raise ValueError(f"file {tfile} not continuous from previous set")
    num_snapshots = stops[-1]

    # Create an empty HDF5 file of appropriate size for the data.
    save_path = config.gems_data_path()
    if os.path.isfile(save_path) and not overwrite:
        raise FileExistsError(f"{save_path} (use --overwrite to overwrite)")
    with utils.timed_block("Initializing HDF5 file for data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", shape=(config.DOF*config.NUM_GEMSVARS,
                                             num_snapshots),
                                      dtype=np.float64)
            hf.create_dataset("time", shape=(num_snapshots,),
                                      dtype=np.float64)
    logging.info(f"Data file initialized as {save_path}.")

    # Read the files in chunks.
    args = zip(tarfiles, starts, stops)
    if serial:       # Read the files serially (sequentially).
        for tf, start, stop in args:
            _read_tar_and_save_data(tf, start, stop, parallel=False)
    else:            # Read the files in parallel.
        with mp.Pool(initializer=_globalize_lock, initargs=(mp.Lock(),),
                     processes=min([len(tarfiles), mp.cpu_count()])) as pool:
            pool.starmap(_read_tar_and_save_data, args)
Пример #4
0
def temperature_average(trainsize, r, reg, cutoff=60000):
    """Get the average-in-time temperature profile for the GEMS data and a
    specific ROM.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    reg : float
        Regularization hyperparameters used to train the ROM.

    cutoff : int
        Number of time steps to average over.
    """
    utils.reset_logger(trainsize)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Compute full-order time-averaged temperature from GEMS data.
    _s = config.DOF * config.GEMS_VARIABLES.index("T")
    gems_data, _ = utils.load_gems_data(rows=slice(_s, _s + config.DOF),
                                        cols=cutoff)
    with utils.timed_block("Computing time-averaged GEMS temperature"):
        T_gems = gems_data.mean(axis=1)
        assert T_gems.shape == (config.DOF, )

    # Simulate ROM and compute the time-averaged temperature.
    t, V, scales, q_rom = step4.simulate_rom(trainsize, r, reg, steps=cutoff)
    with utils.timed_block("Reconstructing ROM simulation results"):
        T_rom = dproc.unscale(dproc.getvar("T", V) @ q_rom, scales, "T")
        T_rom = T_rom.mean(axis=1)
        assert T_rom.shape == (config.DOF, )

    header = HEADER.format('"T"', 0, 0, num_nodes, config.DOF, 3,
                           "DOUBLE " * 3)
    header = header.replace("VARLOCATION=([3-3]", "VARLOCATION=([3]")
    for label, dset in zip(["gems", "rom"], [T_gems, T_rom]):
        if label == "gems":
            save_path = os.path.join(config.tecplot_path(), "gems",
                                     "temperature_average.dat")
        elif label == "rom":
            folder = config.rom_snapshot_path(trainsize, r, reg)
            save_path = os.path.join(folder, "temperature_average.dat")
        with utils.timed_block(f"Writing {label} temperature average"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, dset.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')