def scale_and_save_data(trainsize, lifted_data, time_domain): """Scale lifted snapshots (by variable) and save the scaled snapshots. Parameters ---------- trainsize : int Number of snapshots to scale and save. lifted_data : (NUM_ROMVARS*DOF, k>trainsize) ndarray Lifted snapshots to scale and then save. time_domain : (k>trainsize,) ndarray The time domain corresponding to the lifted snapshots. """ # Scale the learning variables to the bounds in config.SCALE_TO. with utils.timed_block(f"Scaling {trainsize:d} lifted snapshots"): scaled_data, scales = dproc.scale(lifted_data[:,:trainsize].copy()) # Save the lifted, scaled training data. save_path = config.scaled_data_path(trainsize) with utils.timed_block("Saving scaled, lifted training data"): with h5py.File(save_path, 'w') as hf: hf.create_dataset("data", data=scaled_data) hf.create_dataset("time", data=time_domain[:trainsize]) hf.create_dataset("scales", data=scales) logging.info(f"Scaled data saved as {save_path}.\n") return scaled_data, scales
def make_pred(X, scaler_file, model_hdf5): X_scaled = scale(X, scaler_file) # load weights into new model model = load_model(model_hdf5) print("Keras model restored.") y_pred = model.predict(X_scaled) return y_pred
def test_scalers(lifted_data): """Test data_processing.scale() and data_processing.unscale(), including checking that they are inverses. """ # Shift the test data (learning the scaling simultaneously). with utils.timed_block("Scaling lifted test data"): shifted_data, scales = dproc.scale(lifted_data.copy()) assert np.allclose(scales[:, -2:], config.SCALE_TO) # Verify the scales and that the shift worked for each variable. with utils.timed_block("Verifying shift results with scales"): for i, v in enumerate(config.ROM_VARIABLES): s = slice(i * config.DOF, (i + 1) * config.DOF) if v in ["vx", "vy"]: assert -scales[i, 0] == scales[i, 1] assert scales[i, 1] == np.abs(lifted_data[s]).max() assert np.isclose(np.abs(shifted_data[s]).max(), 1) else: assert lifted_data[s].min() == scales[i, 0] assert lifted_data[s].max() == scales[i, 1] assert np.isclose(shifted_data[s].min(), scales[i, 2]) assert np.isclose(shifted_data[s].max(), scales[i, 3]) # Redo the shift with the given scales and compare the results. with utils.timed_block("Verifying repeat shift with given scales"): shifted_data2, _ = dproc.scale(lifted_data.copy(), scales) assert np.allclose(shifted_data2, shifted_data) # Undo the shift and compare the results. with utils.timed_block("Verifying inverse scaling"): unshifted_data = dproc.unscale(shifted_data, scales) assert np.allclose(unshifted_data, lifted_data) # Check the inverse property for a subset of the variables. with utils.timed_block("Repeating experiment with nontrivial varindices"): variables = np.random.choice(config.ROM_VARIABLES, size=4, replace=False) subset = np.vstack([dproc.getvar(v, lifted_data) for v in variables]) shifted_subset, _ = dproc.scale(subset.copy(), scales, variables) unshifted_subset = dproc.unscale(shifted_subset, scales, variables) assert np.allclose(unshifted_subset, subset)
def scale_and_save_data(trainsize, lifted_data, time_domain, center=False): """Scale lifted snapshots (by variable) and save the scaled snapshots. Parameters ---------- trainsize : int Number of snapshots to scale and save. lifted_data : (NUM_ROMVARS*DOF, k>trainsize) ndarray Lifted snapshots to scale and then save. time_domain : (k>trainsize,) ndarray The time domain corresponding to the lifted snapshots. center : bool If True, center the scaled snapshots by the mean scaled snapshot before computing the POD basis. Default False (no shift). Returns ------- training_data : (NUM_ROMVARS*DOF, trainsize) ndarray Scaled, shifted snapshots to use as training data for the basis. qbar : (NUM_ROMVARS*DOF,) ndarray Mean snapshot of the scaled training data. All zeros if center=False. scales : (NUM_ROMVARS,2) ndarray Info on how the snapshot data was scaled. """ # Scale the learning variables to the bounds in config.SCALE_TO. with utils.timed_block(f"Scaling {trainsize:d} lifted snapshots"): training_data, scales = dproc.scale(lifted_data[:, :trainsize].copy()) # Shift the scaled data by the mean snapshot. if center: with utils.timed_block("Shifting scaled snapshots by mean"): qbar = np.mean(training_data, axis=1) # Compute mean snapshot. training_data -= qbar.reshape((-1, 1)) # Shift columns by mean. else: qbar = np.zeros(training_data.shape[0]) # Save the lifted, scaled training data. save_path = config.scaled_data_path(trainsize) with utils.timed_block("Saving scaled, lifted training data"): with h5py.File(save_path, 'w') as hf: hf.create_dataset("data", data=training_data) hf.create_dataset("time", data=time_domain[:trainsize]) hf.create_dataset("mean", data=qbar) hf.create_dataset("scales", data=scales) logging.info(f"Processed data saved to {save_path}.\n") return training_data, qbar, scales
def errors_in_time(trainsize, r, regs, cutoff=60000): """Plot spatially averaged errors, and the projection error, in time. Parameters ---------- trainsize : int Number of snapshots used to train the ROM. r : int Dimension of the ROM. regs : two positive floats Regularization hyperparameters used to train the ROM. cutoff : int Numer of time steps to plot. """ # Load and simulate the ROM. t, V, scales, q_rom = simulate_rom(trainsize, r, regs, cutoff) # Load and lift the true results. data, _ = utils.load_gems_data(cols=cutoff) with utils.timed_block("Lifting GEMS data"): data_gems = dproc.lift(data[:, :cutoff]) del data # Shift and project the data (unscaling done later by chunk). with utils.timed_block("Projecting GEMS data to POD subspace"): data_shifted, _ = dproc.scale(data_gems.copy(), scales) data_proj = V.T @ data_shifted del data_shifted # Initialize the figure. fig, axes = plt.subplots(3, 3, figsize=(12, 6), sharex=True) # Compute and plot errors in each variable. for var, ax in zip(config.ROM_VARIABLES, axes.flat): with utils.timed_block(f"Reconstructing results for {var}"): Vvar = dproc.getvar(var, V) gems_var = dproc.getvar(var, data_gems) proj_var = dproc.unscale(Vvar @ data_proj, scales, var) pred_var = dproc.unscale(Vvar @ q_rom, scales, var) with utils.timed_block(f"Calculating error in {var}"): denom = np.abs(gems_var).max(axis=0) proj_error = np.mean(np.abs(proj_var - gems_var), axis=0) / denom pred_error = np.mean(np.abs(pred_var - gems_var), axis=0) / denom # Plot results. ax.plot(t, proj_error, '-', lw=1, label="Projection Error", c=config.GEMS_STYLE['color']) ax.plot(t, pred_error, '-', lw=1, label="ROM Error", c=config.ROM_STYLE['color']) ax.axvline(t[trainsize], color='k') ax.set_ylabel(config.VARTITLES[var]) # Format the figure. for ax in axes[-1, :]: ax.set_xlim(t[0], t[-1]) ax.set_xticks(np.arange(t[0], t[-1] + .001, .002)) ax.set_xlabel("Time [s]", fontsize=12) # Make legend centered below the subplots. fig.tight_layout(rect=[0, .1, 1, 1]) leg = axes[0, 0].legend(ncol=2, fontsize=14, loc="lower center", bbox_to_anchor=(.5, 0), bbox_transform=fig.transFigure) for line in leg.get_lines(): line.set_linestyle('-') line.set_linewidth(5) # Save the figure. utils.save_figure(f"errors" f"_{config.TRNFMT(trainsize)}" f"_{config.DIMFMT(r)}" f"_{config.REGFMT(regs)}.pdf")
os.makedirs(X_scaler_folder) X_scaler_file = os.path.join(X_scaler_folder, "X_scaler.save") now = datetime.datetime.now() tensorboard_log_folder = os.path.join( trained_model_folder, "tensorboard_log_folder", "run-" + datetime.datetime.strftime(now, format="%Y_%m_%d_%H%M")) if not os.path.isdir(tensorboard_log_folder): os.makedirs(tensorboard_log_folder) hdf5_model = os.path.join(trained_model_folder, "keras_neural_network" + ".h5") score_model = os.path.join(trained_model_folder, "eval_keras_neural_network" + ".csv") # scaling and saving the scaler at the right location make_scaler(X_train, X_scaler_file) X_train_scaled = scale(X_train, X_scaler_file) X_validation_scaled = scale(X_validation, X_scaler_file) # training a model with the selected hyperparameters trained_model = train_model( X_train_scaled, y_train, X_validation_scaled, y_validation, hdf5_model, score_model, tensorboard_log_folder, nb_additional_layers=nb_additional_layers, learning_rate=lr, nb_epochs=500, minibatch_size=8)
make_train_validation_test_split = True if make_train_validation_test_split: iris_dataset_df = pd.read_csv(IRIS_DATASET_FILE) train_validation_test_split(iris_dataset_df) train_set_df = pd.read_csv(os.path.join(TRAIN_VALIDATION_TEST_FOLDER, "train_set.csv"), sep=";") X_train, y_train = X_y_extraction(train_set_df) validation_set_df = pd.read_csv(os.path.join(TRAIN_VALIDATION_TEST_FOLDER, "validation_set.csv"), sep=";") X_validation, y_validation = X_y_extraction(validation_set_df) # Scaling the input data make_scaler(X_train, X_SCALER_FILE) X_train_scaled = scale(X_train, X_SCALER_FILE) X_validation_scaled = scale(X_validation, X_SCALER_FILE) # Training trained_model = train_model(X_train_scaled, y_train, X_validation_scaled, y_validation, HDF5_MODEL, SCORE_MODEL, tensorboard_log_folder, nb_additional_layers=1, learning_rate=0.01, nb_epochs=500, minibatch_size=8)