Пример #1
0
    def fit(
        self,
        dataset: DatasetH,
        evals_result=dict(),
        save_path=None,
    ):
        label_train, label_valid = dataset.prepare(
            ["train", "valid"],
            col_set=["label"],
            data_key=DataHandlerLP.DK_R,
        )
        self.fit_thresh(label_train)
        df_train, df_valid = dataset.prepare(
            ["train", "valid"],
            col_set=["feature", "label"],
            data_key=DataHandlerLP.DK_L,
        )
        df_train = self.gen_market_label(df_train, label_train)
        df_valid = self.gen_market_label(df_valid, label_valid)

        x_train, y_train, m_train = df_train["feature"], df_train["label"], df_train["market_return"]
        x_valid, y_valid, m_valid = df_valid["feature"], df_valid["label"], df_valid["market_return"]

        evals_result["train"] = []
        evals_result["valid"] = []
        # load pretrained base_model

        if self.base_model == "LSTM":
            pretrained_model = LSTMModel()
        elif self.base_model == "GRU":
            pretrained_model = GRUModel()
        else:
            raise ValueError("unknown base model name `%s`" % self.base_model)

        if self.model_path is not None:
            self.logger.info("Loading pretrained model...")
            pretrained_model.load_state_dict(torch.load(self.model_path, map_location=self.device))

            model_dict = self.ADD_model.enc_excess.state_dict()
            pretrained_dict = {k: v for k, v in pretrained_model.rnn.state_dict().items() if k in model_dict}
            model_dict.update(pretrained_dict)
            self.ADD_model.enc_excess.load_state_dict(model_dict)
            model_dict = self.ADD_model.enc_market.state_dict()
            pretrained_dict = {k: v for k, v in pretrained_model.rnn.state_dict().items() if k in model_dict}
            model_dict.update(pretrained_dict)
            self.ADD_model.enc_market.load_state_dict(model_dict)
            self.logger.info("Loading pretrained model Done...")

        self.bootstrap_fit(x_train, y_train, m_train, x_valid, y_valid, m_valid)

        best_param = copy.deepcopy(self.ADD_model.state_dict())
        save_path = get_or_create_path(save_path)
        torch.save(best_param, save_path)
        if self.use_gpu:
            torch.cuda.empty_cache()
Пример #2
0
    def fit(
            self,
            dataset: DatasetH,
            evals_result=dict(),
            save_path=None,
    ):

        df_train, df_valid = dataset.prepare(
            ["train", "valid"],
            col_set=["feature", "label"],
            data_key=DataHandlerLP.DK_L,
        )
        #  splits = ['2011-06-30']
        days = df_train.index.get_level_values(level=0).unique()
        train_splits = np.array_split(days, self.n_splits)
        train_splits = [df_train[s[0]:s[-1]] for s in train_splits]
        train_loader_list = [
            get_stock_loader(df, self.batch_size) for df in train_splits
        ]

        save_path = get_or_create_path(save_path)
        stop_steps = 0
        best_score = -np.inf
        best_epoch = 0
        evals_result["train"] = []
        evals_result["valid"] = []

        # train
        self.logger.info("training...")
        self.fitted = True
        best_score = -np.inf
        best_epoch = 0
        weight_mat, dist_mat = None, None

        for step in range(self.n_epochs):
            self.logger.info("Epoch%d:", step)
            self.logger.info("training...")
            weight_mat, dist_mat = self.train_AdaRNN(train_loader_list, step,
                                                     dist_mat, weight_mat)
            self.logger.info("evaluating...")
            train_metrics = self.test_epoch(df_train)
            valid_metrics = self.test_epoch(df_valid)
            self.log_metrics("train: ", train_metrics)
            self.log_metrics("valid: ", valid_metrics)

            valid_score = valid_metrics[self.metric]
            train_score = train_metrics[self.metric]
            evals_result["train"].append(train_score)
            evals_result["valid"].append(valid_score)
            if valid_score > best_score:
                best_score = valid_score
                stop_steps = 0
                best_epoch = step
                best_param = copy.deepcopy(self.model.state_dict())
            else:
                stop_steps += 1
                if stop_steps >= self.early_stop:
                    self.logger.info("early stop")
                    break

        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
        self.model.load_state_dict(best_param)
        torch.save(best_param, save_path)

        if self.use_gpu:
            torch.cuda.empty_cache()
        return best_score
Пример #3
0
    def fit(
        self,
        dataset: DatasetH,
        save_dir: Optional[Text] = None,
    ):
        def _prepare_dataset(df_data):
            return th_data.TensorDataset(
                torch.from_numpy(df_data["feature"].values).float(),
                torch.from_numpy(df_data["label"].values).squeeze().float(),
            )

        def _prepare_loader(dataset, shuffle):
            return th_data.DataLoader(
                dataset,
                batch_size=self.opt_config["batch_size"],
                drop_last=False,
                pin_memory=True,
                num_workers=self.opt_config["num_workers"],
                shuffle=shuffle,
            )

        df_train, df_valid, df_test = dataset.prepare(
            ["train", "valid", "test"],
            col_set=["feature", "label"],
            data_key=DataHandlerLP.DK_L,
        )
        train_dataset, valid_dataset, test_dataset = (
            _prepare_dataset(df_train),
            _prepare_dataset(df_valid),
            _prepare_dataset(df_test),
        )
        train_loader, valid_loader, test_loader = (
            _prepare_loader(train_dataset, True),
            _prepare_loader(valid_dataset, False),
            _prepare_loader(test_dataset, False),
        )

        save_dir = get_or_create_path(save_dir, return_dir=True)
        self.logger.info("Fit procedure for [{:}] with save path={:}".format(
            self.__class__.__name__, save_dir))

        def _internal_test(ckp_epoch=None, results_dict=None):
            with torch.no_grad():
                train_loss, train_score = self.train_or_test_epoch(
                    train_loader, self.model, self.loss_fn, self.metric_fn,
                    False, None)
                valid_loss, valid_score = self.train_or_test_epoch(
                    valid_loader, self.model, self.loss_fn, self.metric_fn,
                    False, None)
                test_loss, test_score = self.train_or_test_epoch(
                    test_loader, self.model, self.loss_fn, self.metric_fn,
                    False, None)
                xstr = (
                    "train-score={:.6f}, valid-score={:.6f}, test-score={:.6f}"
                    .format(train_score, valid_score, test_score))
                if ckp_epoch is not None and isinstance(results_dict, dict):
                    results_dict["train"][ckp_epoch] = train_score
                    results_dict["valid"][ckp_epoch] = valid_score
                    results_dict["test"][ckp_epoch] = test_score
                return dict(train=train_score,
                            valid=valid_score,
                            test=test_score), xstr

        # Pre-fetch the potential checkpoints
        ckp_path = os.path.join(save_dir,
                                "{:}.pth".format(self.__class__.__name__))
        if os.path.exists(ckp_path):
            ckp_data = torch.load(ckp_path, map_location=self.device)
            stop_steps, best_score, best_epoch = (
                ckp_data["stop_steps"],
                ckp_data["best_score"],
                ckp_data["best_epoch"],
            )
            start_epoch, best_param = ckp_data["start_epoch"], ckp_data[
                "best_param"]
            results_dict = ckp_data["results_dict"]
            self.model.load_state_dict(ckp_data["net_state_dict"])
            self.train_optimizer.load_state_dict(ckp_data["opt_state_dict"])
            self.logger.info(
                "Resume from existing checkpoint: {:}".format(ckp_path))
        else:
            stop_steps, best_score, best_epoch = 0, -np.inf, -1
            start_epoch, best_param = 0, None
            results_dict = dict(train=OrderedDict(),
                                valid=OrderedDict(),
                                test=OrderedDict())
            _, eval_str = _internal_test(-1, results_dict)
            self.logger.info(
                "Training from scratch, metrics@start: {:}".format(eval_str))

        for iepoch in range(start_epoch, self.opt_config["epochs"]):
            self.logger.info(
                "Epoch={:03d}/{:03d} ::==>> Best valid @{:03d} ({:.6f})".
                format(iepoch, self.opt_config["epochs"], best_epoch,
                       best_score))
            train_loss, train_score = self.train_or_test_epoch(
                train_loader,
                self.model,
                self.loss_fn,
                self.metric_fn,
                True,
                self.train_optimizer,
            )
            self.logger.info("Training :: loss={:.6f}, score={:.6f}".format(
                train_loss, train_score))

            current_eval_scores, eval_str = _internal_test(
                iepoch, results_dict)
            self.logger.info("Evaluating :: {:}".format(eval_str))

            if current_eval_scores["valid"] > best_score:
                stop_steps, best_epoch, best_score = (
                    0,
                    iepoch,
                    current_eval_scores["valid"],
                )
                best_param = copy.deepcopy(self.model.state_dict())
            else:
                stop_steps += 1
                if stop_steps >= self.opt_config["early_stop"]:
                    self.logger.info(
                        "early stop at {:}-th epoch, where the best is @{:}".
                        format(iepoch, best_epoch))
                    break
            save_info = dict(
                net_config=self.net_config,
                opt_config=self.opt_config,
                net_state_dict=self.model.state_dict(),
                opt_state_dict=self.train_optimizer.state_dict(),
                best_param=best_param,
                stop_steps=stop_steps,
                best_score=best_score,
                best_epoch=best_epoch,
                results_dict=results_dict,
                start_epoch=iepoch + 1,
            )
            torch.save(save_info, ckp_path)
        self.logger.info("The best score: {:.6f} @ {:02d}-th epoch".format(
            best_score, best_epoch))
        self.model.load_state_dict(best_param)
        _, eval_str = _internal_test("final", results_dict)
        self.logger.info("Reload the best parameter :: {:}".format(eval_str))

        if self.use_gpu:
            torch.cuda.empty_cache()
        self.fitted = True