Пример #1
0
def get_feature(key, data, V=None, scales=None):
    """Reconstruct a spatial statistical feature from data, unprojecting and
    unscaling if needed.

    Parameters
    ----------
    key : str
        Which statistical feature to calculate (T_mean, CH4_sum, etc.)

    data : (r,nt) or (config.DOF*config.NUM_ROMVARS,nt) ndarray
        Data from which to extract the features, either the output of a ROM
        or a high-dimensional data set.

    V : (config.DOF*config.NUM_ROMVARS,r) ndarray or None
        Rank-r POD basis. Only needed if data is low-dimensional ROM output.

    scales : (config.NUM_ROMVARS,2) ndarray or None
        Information for how the data was scaled (see data_processing.scale()).
        Only needed if `data` is low-dimensional ROM output.

    Returns
    -------
    feature : (nt,) ndarray
        The specified statistical feature.
    """
    var, action = key.split('_')
    print(f"{action}({var})", end='..', flush=True)
    if V is not None and scales is not None:
        variable = dproc.unscale(dproc.getvar(var, V) @ data, scales, var)
    else:
        variable = dproc.getvar(var, data)
    return eval(f"variable.{action}(axis=0)")
def save_statistical_features():
    """Compute the (spatial) mean temperatures on the full time domain and
    save them for later. This only needs to be done once.
    """
    # Load the full data set.
    gems_data, t = utils.load_gems_data()

    # Lift the data (convert to molar concentrations).
    with utils.timed_block("Lifting GEMS data"):
        lifted_data = dproc.lift(gems_data)

    # Compute statistical features.
    with utils.timed_block("Computing statistical features of variables"):
        mins, maxs, sums, stds, means = {}, {}, {}, {}, {}
        for var in config.ROM_VARIABLES:
            val = dproc.getvar(var, lifted_data)
            mins[var] = val.min(axis=0)
            maxs[var] = val.max(axis=0)
            sums[var] = val.sum(axis=0)
            stds[var] = val.std(axis=0)
            means[var] = sums[var] / val.shape[0]

    # Save the data.
    data_path = config.statistical_features_path()
    with utils.timed_block("Saving statistical features"):
        with h5py.File(data_path, 'w') as hf:
            for var in config.ROM_VARIABLES:
                hf.create_dataset(f"{var}_min", data=mins[var])
                hf.create_dataset(f"{var}_max", data=maxs[var])
                hf.create_dataset(f"{var}_sum", data=sums[var])
                hf.create_dataset(f"{var}_std", data=stds[var])
                hf.create_dataset(f"{var}_mean", data=means[var])
            hf.create_dataset("time", data=t)
    logging.info(f"Statistical features saved to {data_path}")
Пример #3
0
def save_statistical_features():
    """Compute the spatial and temporal statistics (min, max, mean, etc.)
    for each variable and save them for later. This only needs to be done once.
    """
    # Load the full data set.
    gems_data, t = utils.load_gems_data()

    # Lift the data (convert to molar concentrations).
    with utils.timed_block("Lifting GEMS data"):
        lifted_data = dproc.lift(gems_data)

    # Compute statistical features.
    with utils.timed_block("Computing statistical features of variables"):
        mins, maxs, sums, stds, means = {}, {}, {}, {}, {}
        for var in config.ROM_VARIABLES:
            val = dproc.getvar(var, lifted_data)
            for axis, label in enumerate(["space", "time"]):
                name = f"{label}/{var}"
                print(f"\n\tmin_{label}({var})", end='..', flush=True)
                mins[name] = val.min(axis=axis)
                print(f"max_{label}({var})", end='..', flush=True)
                maxs[name] = val.max(axis=axis)
                print(f"sum_{label}({var})", end='..', flush=True)
                sums[name] = val.sum(axis=axis)
                print(f"std_{label}({var})", end='..', flush=True)
                stds[name] = val.std(axis=axis)
            means[f"space/{var}"] = sums[f"space/{var}"] / val.shape[0]
            means[f"time/{var}"] = sums[f"time/{var}"] / t.size

    # Save the data.
    data_path = config.statistical_features_path()
    with utils.timed_block("Saving statistical features"):
        with h5py.File(data_path, 'w') as hf:
            for var in config.ROM_VARIABLES:
                for prefix in ["space", "time"]:
                    name = f"{prefix}/{var}"
                    hf.create_dataset(f"{name}_min", data=mins[name])
                    hf.create_dataset(f"{name}_max", data=maxs[name])
                    hf.create_dataset(f"{name}_sum", data=sums[name])
                    hf.create_dataset(f"{name}_std", data=stds[name])
                    hf.create_dataset(f"{name}_mean", data=means[name])
            hf.create_dataset("t", data=t)
    logging.info(f"Statistical features saved to {data_path}")
Пример #4
0
def test_scalers(lifted_data):
    """Test data_processing.scale() and data_processing.unscale(),
    including checking that they are inverses.
    """
    # Shift the test data (learning the scaling simultaneously).
    with utils.timed_block("Scaling lifted test data"):
        shifted_data, scales = dproc.scale(lifted_data.copy())
        assert np.allclose(scales[:, -2:], config.SCALE_TO)

    # Verify the scales and that the shift worked for each variable.
    with utils.timed_block("Verifying shift results with scales"):
        for i, v in enumerate(config.ROM_VARIABLES):
            s = slice(i * config.DOF, (i + 1) * config.DOF)
            if v in ["vx", "vy"]:
                assert -scales[i, 0] == scales[i, 1]
                assert scales[i, 1] == np.abs(lifted_data[s]).max()
                assert np.isclose(np.abs(shifted_data[s]).max(), 1)
            else:
                assert lifted_data[s].min() == scales[i, 0]
                assert lifted_data[s].max() == scales[i, 1]
                assert np.isclose(shifted_data[s].min(), scales[i, 2])
                assert np.isclose(shifted_data[s].max(), scales[i, 3])

    # Redo the shift with the given scales and compare the results.
    with utils.timed_block("Verifying repeat shift with given scales"):
        shifted_data2, _ = dproc.scale(lifted_data.copy(), scales)
        assert np.allclose(shifted_data2, shifted_data)

    # Undo the shift and compare the results.
    with utils.timed_block("Verifying inverse scaling"):
        unshifted_data = dproc.unscale(shifted_data, scales)
        assert np.allclose(unshifted_data, lifted_data)

    # Check the inverse property for a subset of the variables.
    with utils.timed_block("Repeating experiment with nontrivial varindices"):
        variables = np.random.choice(config.ROM_VARIABLES,
                                     size=4,
                                     replace=False)
        subset = np.vstack([dproc.getvar(v, lifted_data) for v in variables])
        shifted_subset, _ = dproc.scale(subset.copy(), scales, variables)
        unshifted_subset = dproc.unscale(shifted_subset, scales, variables)
        assert np.allclose(unshifted_subset, subset)
Пример #5
0
def errors_in_time(trainsize, r, regs, cutoff=60000):
    """Plot spatially averaged errors, and the projection error, in time.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two positive floats
        Regularization hyperparameters used to train the ROM.

    cutoff : int
        Numer of time steps to plot.
    """
    # Load and simulate the ROM.
    t, V, scales, q_rom = simulate_rom(trainsize, r, regs, cutoff)

    # Load and lift the true results.
    data, _ = utils.load_gems_data(cols=cutoff)
    with utils.timed_block("Lifting GEMS data"):
        data_gems = dproc.lift(data[:, :cutoff])
    del data

    # Shift and project the data (unscaling done later by chunk).
    with utils.timed_block("Projecting GEMS data to POD subspace"):
        data_shifted, _ = dproc.scale(data_gems.copy(), scales)
        data_proj = V.T @ data_shifted
        del data_shifted

    # Initialize the figure.
    fig, axes = plt.subplots(3, 3, figsize=(12, 6), sharex=True)

    # Compute and plot errors in each variable.
    for var, ax in zip(config.ROM_VARIABLES, axes.flat):

        with utils.timed_block(f"Reconstructing results for {var}"):
            Vvar = dproc.getvar(var, V)
            gems_var = dproc.getvar(var, data_gems)
            proj_var = dproc.unscale(Vvar @ data_proj, scales, var)
            pred_var = dproc.unscale(Vvar @ q_rom, scales, var)

        with utils.timed_block(f"Calculating error in {var}"):
            denom = np.abs(gems_var).max(axis=0)
            proj_error = np.mean(np.abs(proj_var - gems_var), axis=0) / denom
            pred_error = np.mean(np.abs(pred_var - gems_var), axis=0) / denom

        # Plot results.
        ax.plot(t,
                proj_error,
                '-',
                lw=1,
                label="Projection Error",
                c=config.GEMS_STYLE['color'])
        ax.plot(t,
                pred_error,
                '-',
                lw=1,
                label="ROM Error",
                c=config.ROM_STYLE['color'])
        ax.axvline(t[trainsize], color='k')
        ax.set_ylabel(config.VARTITLES[var])

    # Format the figure.
    for ax in axes[-1, :]:
        ax.set_xlim(t[0], t[-1])
        ax.set_xticks(np.arange(t[0], t[-1] + .001, .002))
        ax.set_xlabel("Time [s]", fontsize=12)

    # Make legend centered below the subplots.
    fig.tight_layout(rect=[0, .1, 1, 1])
    leg = axes[0, 0].legend(ncol=2,
                            fontsize=14,
                            loc="lower center",
                            bbox_to_anchor=(.5, 0),
                            bbox_transform=fig.transFigure)
    for line in leg.get_lines():
        line.set_linestyle('-')
        line.set_linewidth(5)

    # Save the figure.
    utils.save_figure(f"errors"
                      f"_{config.TRNFMT(trainsize)}"
                      f"_{config.DIMFMT(r)}"
                      f"_{config.REGFMT(regs)}.pdf")
def main(timeindices,
         variables=None,
         snaptype=["gems", "rom", "error"],
         trainsize=None,
         r=None,
         reg=None):
    """Convert a snapshot in .h5 format to a .dat file that matches the format
    of grid.dat. The new file is saved in `config.tecplot_path()` with the same
    filename and the new file extension .dat.

    Parameters
    ----------
    timeindices : ndarray(int) or int
        Indices (one-based) in the full time domain of the snapshots to save.

    variables : str or list(str)
        The variables to scale, a subset of config.ROM_VARIABLES.
        Defaults to all variables.

    snaptype : {"rom", "gems", "error"} or list(str)
        Which kinds of snapshots to save. Options:
        * "gems": snapshots from the full-order GEMS data;
        * "rom": reconstructed snapshots produced by a ROM;
        * "error": absolute error between the full-order data
                   and the reduced-order reconstruction.
        If "rom" or "error" are selected, the ROM is selected by the
        remaining arguments.

    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Number of retained modes in the ROM.

    reg : float
        Regularization factor used to train the ROM.
    """
    utils.reset_logger(trainsize)

    # Parse parameters.
    timeindices = np.sort(np.atleast_1d(timeindices))
    simtime = timeindices.max()
    t = utils.load_time_domain(simtime + 1)

    # Parse the variables.
    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    if isinstance(snaptype, str):
        snaptype = [snaptype]
    for stype in snaptype:
        if stype not in ("gems", "rom", "error"):
            raise ValueError(f"invalid snaptype '{stype}'")

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        cell_volume = grid_data[2 * num_nodes:3 * num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Extract full-order data if needed.
    if ("gems" in snaptype) or ("error" in snaptype):
        gems_data, _ = utils.load_gems_data(cols=timeindices)
        with utils.timed_block("Lifting selected snapshots of GEMS data"):
            lifted_data = dproc.lift(gems_data)
            true_snaps = np.concatenate(
                [dproc.getvar(v, lifted_data) for v in variables])
    # Simulate ROM if needed.
    if ("rom" in snaptype) or ("error" in snaptype):
        # Load the SVD data.
        V, _ = utils.load_basis(trainsize, r)

        # Load the initial conditions and scales.
        X_, _, _, scales = utils.load_projected_data(trainsize, r)

        # Load the appropriate ROM.
        rom = utils.load_rom(trainsize, r, reg)

        # Simulate the ROM over the time domain.
        with utils.timed_block(f"Simulating ROM with r={r:d}, reg={reg:.0e}"):
            x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45")
            if np.any(np.isnan(x_rom)) or x_rom.shape[1] < simtime:
                raise ValueError("ROM unstable!")

        # Reconstruct the results (only selected variables / snapshots).
        with utils.timed_block("Reconstructing simulation results"):
            x_rec = dproc.unscale(V[:, :r] @ x_rom[:, timeindices], scales)
            x_rec = np.concatenate([dproc.getvar(v, x_rec) for v in variables])

    dsets = {}
    if "rom" in snaptype:
        dsets["rom"] = x_rec
    if "gems" in snaptype:
        dsets["gems"] = true_snaps
    if "error" in snaptype:
        with utils.timed_block("Computing absolute error of reconstruction"):
            abs_err = np.abs(true_snaps - x_rec)
        dsets["error"] = abs_err

    # Save each of the selected snapshots in Tecplot format matching grid.dat.
    for j, tindex in enumerate(timeindices):

        header = HEADER.format(varnames, tindex, t[tindex], num_nodes,
                               config.DOF,
                               len(variables) + 2, "SINGLE " * len(variables))
        for label, dset in dsets.items():

            if label == "gems":
                save_path = config.gems_snapshot_path(tindex)
            if label in ("rom", "error"):
                folder = config.rom_snapshot_path(trainsize, r, reg)
                save_path = os.path.join(folder, f"{label}_{tindex:05d}.dat")
            with utils.timed_block(f"Writing {label} snapshot {tindex:05d}"):
                with open(save_path, 'w') as outfile:
                    # Write the header.
                    outfile.write(header)

                    # Write the geometry data (x,y coordinates).
                    for i in range(0, len(x), NCOLS):
                        outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                    for i in range(0, len(y), NCOLS):
                        outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                    # Write the data for each variable.
                    for i in range(0, dset.shape[0], NCOLS):
                        row = ' '.join(f"{v:.9E}"
                                       for v in dset[i:i + NCOLS, j])
                        outfile.write(row + '\n')

                    # Write connectivity information.
                    for i in range(0, len(connectivity), NCOLS):
                        outfile.write(' '.join(connectivity[i:i + NCOLS]) +
                                      '\n')
Пример #7
0
def test_getvar(lifted_data):
    """Test data_processing.getvar()."""
    with utils.timed_block("Verifying variable extraction"):
        for i, v in enumerate(config.ROM_VARIABLES):
            s = slice(i * config.DOF, (i + 1) * config.DOF)
            assert np.all(dproc.getvar(v, lifted_data) == lifted_data[s])
Пример #8
0
def basis(trainsize, r, variables=None):
    """Export the POD basis vectors to Tecplot format.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to compute the basis.

    r : int
        Number of basis vectors to save.

    variables : str or list(str)
        Variables to save, a subset of config.ROM_VARIABLES.
        Defaults to all variables.
    """
    utils.reset_logger(trainsize)

    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Load the basis and extract desired variables.
    V, _, _ = utils.load_basis(trainsize, r)
    V = np.concatenate([dproc.getvar(var, V) for var in variables])

    # Save each of the basis vectors in Tecplot format matching grid.dat.
    for j in range(r):
        header = HEADER.format(varnames, j, j, num_nodes, config.DOF,
                               len(variables) + 2, "DOUBLE " * len(variables))
        save_folder = config._makefolder(config.tecplot_path(), "basis",
                                         config.TRNFMT(trainsize))
        save_path = os.path.join(save_folder, f"vec_{j+1:03d}.dat")
        with utils.timed_block(f"Writing basis vector {j+1:d}"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, V.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in V[i:i + NCOLS, j])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
    print(f"Basis info exported to {save_folder}/*.dat.")
Пример #9
0
def temperature_average(trainsize, r, reg, cutoff=60000):
    """Get the average-in-time temperature profile for the GEMS data and a
    specific ROM.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    reg : float
        Regularization hyperparameters used to train the ROM.

    cutoff : int
        Number of time steps to average over.
    """
    utils.reset_logger(trainsize)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Compute full-order time-averaged temperature from GEMS data.
    _s = config.DOF * config.GEMS_VARIABLES.index("T")
    gems_data, _ = utils.load_gems_data(rows=slice(_s, _s + config.DOF),
                                        cols=cutoff)
    with utils.timed_block("Computing time-averaged GEMS temperature"):
        T_gems = gems_data.mean(axis=1)
        assert T_gems.shape == (config.DOF, )

    # Simulate ROM and compute the time-averaged temperature.
    t, V, scales, q_rom = step4.simulate_rom(trainsize, r, reg, steps=cutoff)
    with utils.timed_block("Reconstructing ROM simulation results"):
        T_rom = dproc.unscale(dproc.getvar("T", V) @ q_rom, scales, "T")
        T_rom = T_rom.mean(axis=1)
        assert T_rom.shape == (config.DOF, )

    header = HEADER.format('"T"', 0, 0, num_nodes, config.DOF, 3,
                           "DOUBLE " * 3)
    header = header.replace("VARLOCATION=([3-3]", "VARLOCATION=([3]")
    for label, dset in zip(["gems", "rom"], [T_gems, T_rom]):
        if label == "gems":
            save_path = os.path.join(config.tecplot_path(), "gems",
                                     "temperature_average.dat")
        elif label == "rom":
            folder = config.rom_snapshot_path(trainsize, r, reg)
            save_path = os.path.join(folder, "temperature_average.dat")
        with utils.timed_block(f"Writing {label} temperature average"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, dset.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')