示例#1
0
    def _set_network(self):
        """Setup the network and explain matrix."""
        self.network = tab_network.TabNet(
            self.input_dim,
            self.output_dim,
            n_d=self.n_d,
            n_a=self.n_a,
            n_steps=self.n_steps,
            gamma=self.gamma,
            cat_idxs=self.cat_idxs,
            cat_dims=self.cat_dims,
            cat_emb_dim=self.cat_emb_dim,
            n_independent=self.n_independent,
            n_shared=self.n_shared,
            epsilon=self.epsilon,
            virtual_batch_size=self.virtual_batch_size,
            momentum=self.momentum,
            mask_type=self.mask_type,
        ).to(self.device)

        self.reducing_matrix = create_explain_matrix(
            self.network.input_dim,
            self.network.cat_emb_dim,
            self.network.cat_idxs,
            self.network.post_embed_dim,
        )
示例#2
0
    def init_network(
                    self,
                    input_dim,
                    output_dim,
                    n_d,
                    n_a,
                    n_steps,
                    gamma,
                    cat_idxs,
                    cat_dims,
                    cat_emb_dim,
                    n_independent,
                    n_shared,
                    epsilon,
                    virtual_batch_size,
                    momentum,
                    device_name,
                    mask_type,
                    ):
        self.network = tab_network.TabNet(
            input_dim,
            output_dim,
            n_d=n_d,
            n_a=n_a,
            n_steps=n_steps,
            gamma=gamma,
            cat_idxs=cat_idxs,
            cat_dims=cat_dims,
            cat_emb_dim=cat_emb_dim,
            n_independent=n_independent,
            n_shared=n_shared,
            epsilon=epsilon,
            virtual_batch_size=virtual_batch_size,
            momentum=momentum,
            device_name=device_name,
            mask_type=mask_type).to(self.device)

        self.reducing_matrix = create_explain_matrix(
            self.network.input_dim,
            self.network.cat_emb_dim,
            self.network.cat_idxs,
            self.network.post_embed_dim)
示例#3
0
    def fit(self,
            X_train,
            y_train,
            X_valid=None,
            y_valid=None,
            loss_fn=None,
            weights=0,
            max_epochs=100,
            patience=10,
            batch_size=1024,
            virtual_batch_size=128,
            num_workers=0,
            drop_last=False):
        """Train a neural network stored in self.network
        Using train_dataloader for training data and
        valid_dataloader for validation.

        Parameters
        ----------
            X_train: np.ndarray
                Train set
            y_train : np.array
                Train targets
            X_train: np.ndarray
                Train set
            y_train : np.array
                Train targets
            weights : bool or dictionnary
                0 for no balancing
                1 for automated balancing
                dict for custom weights per class
            max_epochs : int
                Maximum number of epochs during training
            patience : int
                Number of consecutive non improving epoch before early stopping
            batch_size : int
                Training batch size
            virtual_batch_size : int
                Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size)
            num_workers : int
                Number of workers used in torch.utils.data.DataLoader
            drop_last : bool
                Whether to drop last batch during training
        """
        # update model name

        self.update_fit_params(X_train, y_train, X_valid, y_valid, loss_fn,
                               weights, max_epochs, patience, batch_size,
                               virtual_batch_size, num_workers, drop_last)

        train_dataloader, valid_dataloader = self.construct_loaders(
            X_train, y_train, X_valid, y_valid, self.updated_weights,
            self.batch_size, self.num_workers, self.drop_last)

        self.network = tab_network.TabNet(
            self.input_dim,
            self.output_dim,
            n_d=self.n_d,
            n_a=self.n_d,
            n_steps=self.n_steps,
            gamma=self.gamma,
            cat_idxs=self.cat_idxs,
            cat_dims=self.cat_dims,
            cat_emb_dim=self.cat_emb_dim,
            n_independent=self.n_independent,
            n_shared=self.n_shared,
            epsilon=self.epsilon,
            virtual_batch_size=self.virtual_batch_size,
            momentum=self.momentum,
            device_name=self.device_name).to(self.device)

        self.reducing_matrix = create_explain_matrix(
            self.network.input_dim, self.network.cat_emb_dim,
            self.network.cat_idxs, self.network.post_embed_dim)

        self.optimizer = self.optimizer_fn(self.network.parameters(),
                                           lr=self.lr)

        if self.scheduler_fn:
            self.scheduler = self.scheduler_fn(self.optimizer,
                                               **self.scheduler_params)
        else:
            self.scheduler = None

        losses_train = []
        losses_valid = []

        metrics_train = []
        metrics_valid = []

        if self.verbose > 0:
            print("Will train until validation stopping metric",
                  f"hasn't improved in {self.patience} rounds.")
            msg_epoch = f'| EPOCH |  train  |   valid  | total time (s)'
            print('---------------------------------------')
            print(msg_epoch)

        total_time = 0
        while (self.epoch < self.max_epochs
               and self.patience_counter < self.patience):
            starting_time = time.time()
            fit_metrics = self.fit_epoch(train_dataloader, valid_dataloader)

            # leaving it here, may be used for callbacks later
            losses_train.append(fit_metrics['train']['loss_avg'])
            losses_valid.append(fit_metrics['valid']['total_loss'])
            metrics_train.append(fit_metrics['train']['stopping_loss'])
            metrics_valid.append(fit_metrics['valid']['stopping_loss'])

            stopping_loss = fit_metrics['valid']['stopping_loss']
            if stopping_loss < self.best_cost:
                self.best_cost = stopping_loss
                self.patience_counter = 0
                # Saving model
                self.best_network = copy.deepcopy(self.network)
                # Updating feature_importances_
                self.feature_importances_ = fit_metrics['train'][
                    'feature_importances_']
            else:
                self.patience_counter += 1

            self.epoch += 1
            total_time += time.time() - starting_time
            if self.verbose > 0:
                if self.epoch % self.verbose == 0:
                    separator = "|"
                    msg_epoch = f"| {self.epoch:<5} | "
                    msg_epoch += f"{-fit_metrics['train']['stopping_loss']:.5f}"
                    msg_epoch += f' {separator:<2} '
                    msg_epoch += f"{-fit_metrics['valid']['stopping_loss']:.5f}"
                    msg_epoch += f' {separator:<2} '
                    msg_epoch += f" {np.round(total_time, 1):<10}"
                    print(msg_epoch)

        if self.verbose > 0:
            if self.patience_counter == self.patience:
                print(f"Early stopping occured at epoch {self.epoch}")
            print(f"Training done in {total_time:.3f} seconds.")
            print('---------------------------------------')

        self.history = {
            "train": {
                "loss": losses_train,
                "metric": metrics_train
            },
            "valid": {
                "loss": losses_valid,
                "metric": metrics_valid
            }
        }
        # load best models post training
        self.load_best_model()
示例#4
0
    def fit(self,
            X_train,
            y_train,
            X_valid=None,
            y_valid=None,
            loss_fn=None,
            weights=0,
            max_epochs=100,
            patience=10,
            batch_size=1024,
            virtual_batch_size=128):
        """Train a neural network stored in self.network
        Using train_dataloader for training data and
        valid_dataloader for validation.

        Parameters
        ----------
            X_train: np.ndarray
                Train set
            y_train : np.array
                Train targets
            X_train: np.ndarray
                Train set
            y_train : np.array
                Train targets
            weights : bool or dictionnary
                0 for no balancing
                1 for automated balancing
                dict for custom weights per class
            max_epochs : int
                Maximum number of epochs during training
            patience : int
                Number of consecutive non improving epoch before early stopping
            batch_size : int
                Training batch size
            virtual_batch_size : int
                Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size)
        """

        self.update_fit_params(X_train, y_train, X_valid, y_valid, loss_fn,
                               weights, max_epochs, patience, batch_size,
                               virtual_batch_size)

        train_dataloader, valid_dataloader = self.construct_loaders(
            X_train, y_train, X_valid, y_valid, self.updated_weights,
            self.batch_size)

        self.network = tab_network.TabNet(
            self.input_dim,
            self.output_dim,
            n_d=self.n_d,
            n_a=self.n_d,
            n_steps=self.n_steps,
            gamma=self.gamma,
            cat_idxs=self.cat_idxs,
            cat_dims=self.cat_dims,
            cat_emb_dim=self.cat_emb_dim,
            n_independent=self.n_independent,
            n_shared=self.n_shared,
            epsilon=self.epsilon,
            virtual_batch_size=self.virtual_batch_size,
            momentum=self.momentum,
            device_name=self.device_name).to(self.device)

        self.optimizer = self.optimizer_fn(self.network.parameters(),
                                           **self.opt_params)

        if self.scheduler_fn:
            self.scheduler = self.scheduler_fn(self.optimizer,
                                               **self.scheduler_params)
        else:
            self.scheduler = None

        losses_train = []
        losses_valid = []

        metrics_train = []
        metrics_valid = []

        while (self.epoch < self.max_epochs
               and self.patience_counter < self.patience):
            print(f"EPOCH : {self.epoch}")
            fit_metrics = self.fit_epoch(train_dataloader, valid_dataloader)
            losses_train.append(fit_metrics['train']['loss_avg'])
            losses_valid.append(fit_metrics['valid']['total_loss'])
            metrics_train.append(fit_metrics['train']['stopping_loss'])
            metrics_valid.append(fit_metrics['valid']['stopping_loss'])

            stopping_loss = fit_metrics['valid']['stopping_loss']
            if stopping_loss < self.best_cost:
                self.best_cost = stopping_loss
                self.patience_counter = 0
                # Saving model
                torch.save(self.network,
                           self.saving_path + f"{self.model_name}.pt")
                # Updating feature_importances_
                self.feature_importances_ = fit_metrics['train'][
                    'feature_importances_']
            else:
                self.patience_counter += 1

            print("Best metric valid: ", self.best_cost)
            self.epoch += 1

            if self.epoch % self.verbose == 0:
                plot_losses(losses_train, losses_valid, metrics_train,
                            metrics_valid)

        # load best models post training
        self.load_best_model()
    def __init__(self, context: PyTorchTrialContext):
        super().__init__(context)

        self.context = context
        clip_value = None
        if clip_value:
            self.clip_grads = lambda params: nn.utils.clip_grad_norm_(
                params, clip_value)
        else:
            self.clip_grads = None

        path_train = f"https://{S3_BUCKET}.s3-us-west-2.amazonaws.com/{S3_KEY}/{TRAIN_CSV}"
        path_valid = f"https://{S3_BUCKET}.s3-us-west-2.amazonaws.com/{S3_KEY}/{VAL_CSV}"
        path_store = f"https://{S3_BUCKET}.s3-us-west-2.amazonaws.com/{S3_KEY}/{STORE_CSV}"

        print("Downloading data")
        urllib.request.urlretrieve(path_train, TRAIN_CSV)
        urllib.request.urlretrieve(path_valid, VAL_CSV)
        urllib.request.urlretrieve(path_store, STORE_CSV)
        print("Done downloading data")

        # CUDF
        if self.context.get_hparam("cudf"):
            print("Reading CSVs with cudf")
            df_train = cudf.read_csv(TRAIN_CSV)
            df_valid = cudf.read_csv(VAL_CSV)
            df_store = cudf.read_csv(STORE_CSV)

            print("Joining dataframes")
            df_train_joined = df_train.join(df_store,
                                            how='left',
                                            on='store_id',
                                            rsuffix='store').fillna(0)
            df_val_joined = df_valid.join(df_store,
                                          how='left',
                                          on='store_id',
                                          rsuffix='store').fillna(0)
            print("Done joining")
            cols = df_train_joined.columns.tolist()

            X_train = df_train_joined[cols[:12] + cols[14:]].values.astype(
                np.float32)
            y_train = df_train_joined[cols[12]].values.astype(np.float32)
            X_valid = df_val_joined[cols[:12] + cols[14:]].values.astype(
                np.float32)
            y_valid = df_val_joined[cols[12]].values.astype(np.float32)
            print("Done loading data")
            self.train_dataset = TorchDataset(cupy.asnumpy(X_train),
                                              cupy.asnumpy(y_train))
            self.valid_dataset = TorchDataset(cupy.asnumpy(X_valid),
                                              cupy.asnumpy(y_valid))
        else:
            print("Reading CSVs with pandas")
            df_train = pd.read_csv(TRAIN_CSV)
            df_valid = pd.read_csv(VAL_CSV)
            df_store = pd.read_csv(STORE_CSV)

            print("Joining dataframes")
            df_train_joined = df_train.join(df_store,
                                            how='left',
                                            on='store_id',
                                            rsuffix='store').fillna(0)
            df_val_joined = df_valid.join(df_store,
                                          how='left',
                                          on='store_id',
                                          rsuffix='store').fillna(0)
            print("Done joining")
            cols = df_train_joined.columns.tolist()

            X_train = df_train_joined[cols[:12] + cols[14:]].values.astype(
                np.float32)
            y_train = df_train_joined[cols[12]].values.astype(np.float32)
            X_valid = df_val_joined[cols[:12] + cols[14:]].values.astype(
                np.float32)
            y_valid = df_val_joined[cols[12]].values.astype(np.float32)

            print("Done loading data")
            self.train_dataset = TorchDataset(X_train, y_train)
            self.valid_dataset = TorchDataset(X_valid, y_valid)

        self.lambda_sparse = 10**(-self.context.get_hparam("lambda_sparse"))
        self.loss_fn = nn.functional.mse_loss
        self.optimizer_params = {
            "lr": self.context.get_hparam("learning_rate"),
        }
        self.model = tab_network.TabNet(
            input_dim=22,
            output_dim=1,
            n_d=self.context.get_hparam("n_d"),
            n_a=self.context.get_hparam("n_a"),
            n_steps=self.context.get_hparam("n_steps"),
            gamma=self.context.get_hparam("gamma"),
            cat_idxs=[],
            cat_dims=[],
            cat_emb_dim=1,
            n_independent=2,
            n_shared=2,
            epsilon=1e-15,
            virtual_batch_size=256 *
            self.context.get_hparam("virtual_batch_size"),
            momentum=self.context.get_hparam("momentum"),
            mask_type="sparsemax")
        self.model = self.context.wrap_model(self.model)
        self.optimizer = self.context.wrap_optimizer(
            opt.Adam(self.model.parameters(), **self.optimizer_params))
        lmbda = lambda epoch: self.context.get_hparam("lr_decay")
        self.lr_scheduler = self.context.wrap_lr_scheduler(
            opt.lr_scheduler.MultiplicativeLR(self.optimizer, lr_lambda=lmbda),
            step_mode=LRScheduler.StepMode.MANUAL_STEP)