示例#1
0
 def select_obs(settings, vec):
     """Selects and return a subset of observations and their indexes
     from vec according to a user selected mode"""
     npoints = VDAInit.__get_npoints_from_shape(vec.shape)
     if settings.OBS_MODE == "rand":
         # Define observations as a random subset of the control state.
         if hasattr(settings, "NOBS"):
             nobs = settings.NOBS
         else:
             nobs = int(settings.OBS_FRAC *
                        npoints)  #number of observations
         assert nobs <= npoints, "You can't select more observations that are in the state space"
         if nobs == npoints:  #then we are selecting all points
             settings.OBS_MODE = "all"
             return VDAInit.__select_all_obs(vec)
         ML_utils.set_seeds(
             seed=settings.SEED
         )  #set seeds so that the selected subset is the same every time
         obs_idx = random.sample(
             range(npoints), nobs)  #select nobs integers w/o replacement
         observations = np.take(vec, obs_idx)
     elif settings.OBS_MODE == "single_max":
         nobs = 1
         obs_idx = np.argmax(vec)
         obs_idx = [obs_idx]
         observations = np.take(vec, obs_idx)
     elif settings.OBS_MODE == "all":
         observations, obs_idx, nobs = VDAInit.__select_all_obs(vec)
     else:
         raise ValueError("OBS_MODE = {} is not allowed.".format(
             settings.OBS_MODE))
     assert nobs == len(obs_idx)
     return observations, obs_idx, nobs
示例#2
0
 def test_set_seeds_normal(self):
     seed = 42
     ML_utils.set_seeds(seed)
     a = np.random.randn(45)
     ML_utils.set_seeds(seed)
     b = np.random.randn(45)
     c = np.random.randn(45)
     assert np.allclose(a, b)
     assert not np.allclose(b, c)
    def __init__(self,
                 AE_settings,
                 expdir,
                 batch_sz=BATCH,
                 model=None,
                 start_epoch=None):
        """Initilaizes the AE training class.

        ::AE_settings - a settings.config.Config class with the DA settings
        ::expdir - a directory of form `experiments/<possible_path>` to keep logs
        ::calc_DA_MAE - boolean. If True, training will evaluate DA Mean Absolute Error
            during the training cycle. Note: this is *MUCH* slower
        """

        self.settings = AE_settings

        err_msg = """AE_settings must be an AE configuration class"""
        assert self.settings.COMPRESSION_METHOD == "AE", err_msg

        if model is not None:  #for retraining
            assert start_epoch is not None, "If you are RE-training model you must pass start_epoch"
            assert start_epoch >= 0
            self.start_epoch = start_epoch
            self.model = model
            print("Loaded model, ", end="")
        else:
            self.start_epoch = 0
            self.model = ML_utils.load_model_from_settings(AE_settings)
            print("Initialized model, ", end="")

        print("Number of parameters:",
              sum(p.numel() for p in self.model.parameters()))

        self.batch_sz = batch_sz
        self.settings.batch_sz = batch_sz

        self.expdir = init_expdir(expdir)
        self.settings_fp = self.expdir + "settings.txt"

        if self.settings.SAVE == True:
            with open(self.settings_fp, "wb") as f:
                pickle.dump(self.settings, f)
        ML_utils.set_seeds()  #set seeds before init model

        self.device = ML_utils.get_device()
        self.columns = [
            "epoch", "reconstruction_err", "DA_MAE", "DA_ratio_improve_MAE",
            "time_DA(s)", "time_epoch(s)"
        ]
示例#4
0
    def train_test_DA_split_maybe_normalize(X, settings):
        """Returns non-overlapping train/test and DA control state data.
        This function also deals with normalization (to ensure than only the
        training data is used for normalization mean and std)"""

        M, n = SplitData.get_dim_X(X, settings)

        hist_idx = int(M * settings.HIST_FRAC)
        hist_X = X[:
                   hist_idx]  #select historical data (i.e. training set in ML terminology)
        # that will be used for normalize

        #use only the training set to calculate mean and std
        mean = np.mean(hist_X, axis=0)
        std = np.std(hist_X, axis=0)

        #Some std are zero - set the norm to 1 in this case so that feature is zero post-normalization
        std = np.where(std <= 0., 1, std)

        if settings.NORMALIZE:
            X = (X - mean)
            X = (X / std)

        # Split X into historical and present data. We will
        # assimilate "observations" at a single timestep t_DA
        # which corresponds to the control state u_c
        # We will take initial condition u_0, as mean of historical data

        t_DA = M - (settings.TDA_IDX_FROM_END + 1)  #idx of Data Assimilation
        assert t_DA >= hist_idx, (
            "Cannot select observation from historical data."
            "Reduce HIST_FRAC or reduce TDA_IDX_FROM_END to prevent overlap.\n"
            "t_DA = {} and hist_idx = {}".format(t_DA, hist_idx))
        assert t_DA > hist_idx, ("Test set cannot have zero size")

        train_X = X[:hist_idx]
        test_X = X[hist_idx:t_DA]
        u_c = X[t_DA]  #control state (for DA)

        if settings.SHUFFLE_DATA:
            ML_utils.set_seeds()
            np.random.shuffle(train_X)
            np.random.shuffle(test_X)

        return train_X, test_X, u_c, X, mean, std
    def training_loop_AE(self,
                         device=None,
                         print_every=2,
                         test_every=5,
                         save_every=5,
                         model_dir=None):
        """Runs a torch AE model training loop.
        NOTE: Ensure that the loss_fn is in mode "sum"
        """
        model = self.model
        self.model_dir = model_dir

        if device == None:
            device = ML_utils.get_device()
        self.device = device

        ML_utils.set_seeds()
        train_losses = []
        test_losses = []

        self.start = self.num_epochs_cv + self.start_epoch
        self.end = self.start_epoch + self.num_epoch
        epoch = self.end - 1  #for case where no training occurs

        for epoch in range(self.start, self.end):

            self.epoch = epoch

            train_loss, test_loss = self.train_one_epoch(
                epoch, print_every, test_every)
            train_losses.append(train_loss)
            if test_loss:
                test_losses.append(test_loss)

        if epoch % save_every != 0 and self.model_dir != None:
            #Save model (if new model hasn't just been saved)
            model_fp_new = "{}{}.pth".format(self.model_dir, epoch)
            torch.save(model.state_dict(), model_fp_new)

        return train_losses, test_losses
    def __maybe_cross_val_lr(self, test_every, num_epochs_cv=8):
        if not num_epochs_cv:
            self.num_epochs_cv = 0
            return self.learning_rate
        elif self.num_epoch < num_epochs_cv:
            self.num_epochs_cv = self.num_epoch
        else:
            self.num_epochs_cv = num_epochs_cv

        mult = 1
        if self.settings.BATCH_NORM:  #i.e. generally larger learning_rate with BN
            mult = 5

        mult *= BATCH_MULT  #linear multiply by size of batch: https://arxiv.org/abs/1706.02677

        lrs_base = [0.0001, 0.0003, 0.001]
        lrs = [mult * x for x in lrs_base]

        res = []
        optimizers = []

        for idx, lr in enumerate(lrs):

            ML_utils.set_seeds()  #set seeds before init model
            self.model = ML_utils.load_model_from_settings(self.settings)
            self.optimizer = optim.Adam(self.model.parameters(), lr)
            test_losses = []
            train_losses = []
            print("learning rate:", lr)
            for epoch in range(self.start_epoch,
                               self.num_epochs_cv + self.start_epoch):
                self.epoch = epoch
                train, test = self.train_one_epoch(epoch, self.print_every,
                                                   test_every,
                                                   self.num_epochs_cv)
                if test:
                    test_losses.append(test)
                train_losses.append(train)

            df = pd.DataFrame(train_losses, columns=self.columns)
            train_final = df.tail(1).reconstruction_err

            res.append(train_final.values[0])
            optimizers.append(self.optimizer)

            #save model if best so far

            if res[-1] == min(res):
                best_test = test_losses
                best_train = train_losses
                best_idx = idx
                model_fp_new = "{}{}-{}.pth".format(self.model_dir, epoch, lr)
                torch.save(self.model.state_dict(), model_fp_new)
                best_model = self.model

        self.learning_rate = lrs[best_idx] * 0.8
        self.optimizer = optimizers[best_idx]
        self.model = best_model
        test_loss = best_test
        train_loss = best_train
        return self.learning_rate, train_loss, test_loss
示例#7
0
 def test_set_seeds_raiseNameError(self):
     env = os.environ
     if env.get("SEED"):
         del env["SEED"]
     with pytest.raises(NameError):
         ML_utils.set_seeds()