def get_predictions(
    plevels: List[float],
    filepath: Union[
        os.PathLike,
        str] = "/data/cees/zespinos/runs/feature_experiments/40_levels/year_three/evaluate/gwfu/data_avail",
):
    months = [60 * i for i in range(24 * 1 + 1)]
    data_avail = ["three", "six", "nine", "full_features"]
    data_predictions = []
    for avail in data_avail:
        avail_metrics = from_pickle(
            os.path.join(filepath, avail, "metrics.pkl"))
        avail_pred = from_pickle(
            os.path.join(filepath, avail, "predictions.pkl"))
        avail_truth = avail_pred["targets"].T
        avail_truth = avail_truth.reshape(33, 1440, 64, 128).swapaxes(1, 0)
        avail_truth = avail_truth[:, LOWEST_PLEVEL:LAST_PLEVEL, :, :]

        avail_pred = avail_pred["predictions"].T
        avail_pred = avail_pred.reshape(33, 1440, 64, 128).swapaxes(1, 0)
        avail_pred = avail_pred[:, LOWEST_PLEVEL:LAST_PLEVEL, :, :]

        avail_pred = generate_monthly_averages(avail_pred, months)
        data_predictions.append(avail_pred)
        print(f"{avail}: ", avail_metrics["r_squared"])

    avail_pred = None
    avail_truth = generate_monthly_averages(avail_truth, months)
    data_predictions.append(avail_truth)
    data_predictions = np.concatenate(data_predictions, axis=1)

    return data_predictions
Пример #2
0
def predicted_qbo(
    plevels: List[float],
    filepath: Union[
        os.PathLike,
        str] = "/data/cees/zespinos/runs/feature_experiments/40_levels",
):

    years = ["year_two", "year_one", "year_three", "year_four", "year_five"]
    year_predictions = []
    for year in years:
        year_metrics = from_pickle(
            os.path.join(filepath,
                         f"{year}/evaluate/gwfu/full_features/metrics.pkl"))
        year_data = from_pickle(
            os.path.join(
                filepath,
                f"{year}/evaluate/gwfu/full_features/predictions.pkl"))
        year_data = year_data["predictions"].T
        year_data = year_data.reshape(33, 1440, 64, 128).swapaxes(1, 0)
        year_predictions.append(year_data[:, LOWEST_PLEVEL:LAST_PLEVEL, :, :])
        print(f"{year}: ", year_metrics["r_squared"])

    year_data = None
    months = [60 * i for i in range(24 * len(year_predictions) + 1)]

    year_predictions = np.concatenate(year_predictions, axis=0)

    year_predictions = generate_monthly_averages(year_predictions, months)

    return year_predictions
Пример #3
0
 def __init__(self, scaler: Dict[str, Union[str, bool]],
              save_path: Union[os.PathLike, str]):
     self.save_path = save_path
     if scaler["load"]:
         # load scalers
         self.tensors_scaler = from_pickle(
             os.path.join(scaler["path"], TENSORS_SCALER_FN))
         self.gwfu_scaler = from_pickle(
             os.path.join(scaler["path"], GWFU_SCALER_FN))
         self.gwfv_scaler = from_pickle(
             os.path.join(scaler["path"], GWFV_SCALER_FN))
     else:
         self.tensors_scaler = StandardScaler()
         self.gwfu_scaler = StandardScaler()
         self.gwfv_scaler = StandardScaler()
def get_num_samples(source_path: Union[str, os.PathLike]) -> int:
    """
    Finds the number of samples in source_path/tensors.csv
    Subtract one to remove header from count
    """
    metadata = from_pickle(os.path.join(source_path, "metadata.pkl"))
    return metadata["total_samples"]
Пример #5
0
def get_metadata(source_path: Union[os.PathLike, str]):
    """
    Gets metadata from source_path
    """
    metadata_fn = os.path.join(source_path, "metadata.pkl")
    metadata = from_pickle(metadata_fn)
    return metadata
def get_eval(path):
    data = from_pickle(path)
    pred = data["predictions"].T
    pred = pred.reshape(33, 1440, 64, 128).swapaxes(1, 0)

    targets = data["targets"].T
    targets = targets.reshape(33, 1440, 64, 128).swapaxes(1, 0)

    return targets, pred
Пример #7
0
def save_metadata(
    save_path: Union[os.PathLike, str],
    source_path: Union[os.PathLike, str],
    metadata: Any,
):
    prev_metadata = from_pickle(os.path.join(source_path, "metadata.pkl"))
    # Shallow Merge
    metadata = {**prev_metadata, **metadata}
    to_pickle(path=os.path.join(save_path, "metadata.pkl"), obj=metadata)
Пример #8
0
def aggregate_experiment_results(metrics_path, experiments):
       u_experiments, v_experiments = defaultdict(lambda: {}), defaultdict(lambda: {})
       for experiment, label in experiments.items():
              if experiment not in ["vtemp", "vhght", "vlatlon"]:
                     metrics = from_pickle(
                            os.path.join(metrics_path, "gwfu", experiment, "metrics.pkl")
                     )
                     u_experiments[label]["maes"] = metrics["maes"]
                     u_experiments[label]["rmse"] = metrics["rmse"]
                     u_experiments[label]["r_squared"] = metrics["r_squared"]

              if experiment not in ["utemp", "uhght", "ulatlon"]:
                     metrics = from_pickle(
                            os.path.join(metrics_path, "gwfv", experiment, "metrics.pkl")
                     )
                     v_experiments[label]["maes"] = metrics["maes"]
                     v_experiments[label]["rmse"] = metrics["rmse"]
                     v_experiments[label]["r_squared"] = metrics["r_squared"]

       return u_experiments, v_experiments
Пример #9
0
    def __init__(self,
        source_path: Union[os.PathLike, str],
        scaler_path: Union[os.PathLike, str],
        num_samples: Union[None, int],
        target: str,
        save_path: Union[os.PathLike, str],
        model,
    ) -> None:

        X_fp = os.path.join(source_path, "tensors.csv")
        Y_fp  = os.path.join(source_path,  f"{target}.csv")

        # Get Scalers
        X_scaler_fp = os.path.join(scaler_path, "tensors_scaler.pkl")
        self.X_scaler = from_pickle(X_scaler_fp)

        Y_scaler_fp = os.path.join(scaler_path, f"{target}_scaler.pkl")
        self.Y_scaler = from_pickle(Y_scaler_fp)

        for X, Y in zip(
                pd.read_csv(X_fp, header=None, chunksize=num_samples),
                pd.read_csv(Y_fp, header=None, chunksize=num_samples)
        ):

            # Tensors
            self.X_raw = X.to_numpy()
            self.X = self.X_scaler.transform(self.X_raw)

            # Targets
            self.Y_raw = Y.to_numpy()
            self.Y = self.Y_scaler.transform(self.Y_raw)

            # Predictions
            self.Y_pred = self.predict(model)

            return
Пример #10
0
def main(**params):
    """
    Train Model
    """
    with tracking(
        experiment="train",
        params=params,
        local_dir=params["save_path"],
        tracking=params["tracking"]
    ):
        target = params["target"]
        os.makedirs(params["save_path"], exist_ok=True)
        metadata = get_metadata(params["source_path"][0])

        # Get Model
        if params["model_path"] is None:
            logger.info("Training new model")
            Model = get_model(params["model"])
            model = Model.build((metadata["input_shape"],), metadata["output_shape"], params["learning_rate"])
        else:
            model_path = params["model_path"]
            logger.info(f"Training model from {model_path}")
            model = load_model(params["model_path"], params["learning_rate"])
            model.summary()


        # Get scalers
        tensors_scaler = from_pickle(os.path.join(params["scaler_path"], "tensors_scaler.pkl"))
        target_scaler = from_pickle(os.path.join(params["scaler_path"], f"{target}_scaler.pkl"))


        # Create data generators
        train_generator = DataGenerator(
            tensors_filepath=[os.path.join(path, "train_tensors.csv") for path in params["source_path"]],
            target_filepath=[os.path.join(path, f"train_{target}.csv") for path in params["source_path"]],
            batch_size=params["batch_size"],
            chunk_size=params["chunk_size"],
            num_samples=metadata["total_samples"]*len(params["source_path"]),
            tensors_scaler=tensors_scaler,
            target_scaler=target_scaler,
            name="train",
            train_with_random=params["train_with_random"],
        )

        val_generator = DataGenerator(
            tensors_filepath=[os.path.join(path, "val_tensors.csv") for path in params["source_path"]],
            target_filepath=[os.path.join(path,  f"val_{target}.csv") for path in params["source_path"]],
            batch_size=params["batch_size"],
            chunk_size=params["chunk_size"],
            num_samples=metadata["total_samples"]*len(params["source_path"]),
            tensors_scaler=tensors_scaler,
            target_scaler=target_scaler,
            name="val",
            train_with_random=params["train_with_random"],
        )

        # Fit Model
        callbacks = get_callbacks(params["save_path"], params["model"])
        # model.run_eagerly = True
        history = model.fit(
            x=train_generator,
            validation_data=val_generator,
            steps_per_epoch=params["steps_per_epoch"],
            validation_steps=params["validation_steps"],
            epochs=params["epochs"],
            verbose=params["verbose"],
            callbacks=callbacks,
            use_multiprocessing=params["use_multiprocessing"],
        )
Пример #11
0
    def __init__(
        self,
        source_path: Union[os.PathLike, str],
        scaler_path: Union[os.PathLike, str],
        num_samples: Union[None, float],
        target: str,
        remove_outliers: Union[str, float],
        save_path: Union[os.PathLike, str],
        model,
        evaluate_with_random: bool = False,
    ) -> None:

        test_tensors_fp = os.path.join(source_path, "tensors.csv")
        test_targets_fp = os.path.join(source_path, f"{target}.csv")

        # Get Scalers
        tensors_scaler_fp = os.path.join(scaler_path, "tensors_scaler.pkl")
        tensors_scaler = from_pickle(tensors_scaler_fp)

        target_scaler_fp = os.path.join(scaler_path, f"{target}_scaler.pkl")
        target_scaler = from_pickle(target_scaler_fp)

        self.predictions = []
        self.targets = []
        chunksize = 100000
        num_total_predictions = 0
        if num_samples is not None and int(num_samples) < chunksize:
            num_samples = int(num_samples)
            chunksize = num_samples

        for test_tensors, test_targets in tqdm(
                zip(
                    pd.read_csv(test_tensors_fp,
                                header=None,
                                chunksize=chunksize),
                    pd.read_csv(test_targets_fp,
                                header=None,
                                chunksize=chunksize),
                ), "Load test data"):
            if num_samples is not None and num_total_predictions >= int(
                    num_samples):
                break

            test_tensors = test_tensors.to_numpy()
            test_targets = test_targets.to_numpy()

            # Transform Targets
            test_tensors = tensors_scaler.transform(test_tensors)
            if evaluate_with_random:
                test_tensors = np.random.normal(loc=0.0,
                                                scale=1.0,
                                                size=test_tensors.shape)

            self.targets.append(test_targets)
            self.predictions.append(
                self.predict(
                    model=model,
                    tensors=test_tensors,
                    target_scaler=target_scaler,
                ))

            num_total_predictions += chunksize

        self.predictions = np.concatenate(np.array(self.predictions), axis=0)
        self.targets = np.concatenate(np.array(self.targets), axis=0)

        # Removes outliers and returns dictionary keyed on each pressure level
        self.plevel_predictions, self.plevel_targets = self.split_predictions_on_plevel(
            predictions=self.predictions,
            targets=self.targets,
            outliers=remove_outliers,
        )

        # Save unaltered predictions and targets
        to_pickle(path=os.path.join(save_path, "predictions.pkl"),
                  obj={
                      "predictions": self.predictions,
                      "targets": self.targets,
                  })
Пример #12
0
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from lrgwd.utils.io import from_pickle

gwfu_scaler = from_pickle("../runs/massive/split/gwfu_scaler.pkl")

for gwfu_chunk, tensors_chunk in zip(
        pd.read_csv("../runs/massive/split/train_gwfu.csv", chunksize=100000),
        pd.read_csv("../runs/massive/split/train_tensors.csv",
                    chunksize=100000)):
    gwfu_chunk = gwfu_chunk.to_numpy()
    tensors_chunk = tensors_chunk.to_numpy()
    break

scaled_gwfu_chunk = gwfu_scaler.transform(gwfu_chunk)
plevels = gwfu_chunk[0].shape[0]

for plevel in reversed(range(plevels)):

    # raw_gwfu = gwfu_chunk[:,plevel]
    scaled_gwfu = scaled_gwfu_chunk[:, plevel]

    fig = plt.figure(figsize=(8, 6))
    plt.hist([scaled_gwfu], bins=1000, label=["scaled_gwfu"])
    plt.xlabel("gwfu (m/s^2)", size=14)
    plt.ylabel("Count", size=14)
    plt.title(f"Histogram scaled_gwfu for Plevel {plevel}")
    plt.legend(loc='upper right')