Exemplo n.º 1
0
def preprocess(fe_cfg: Config):
    fe_name = fe_cfg.basic.name
    target_col = fe_cfg.column.target
    train_path = f"{DataPath.interim.train}.jbl"
    test_path = f"{DataPath.interim.test}.jbl"

    for path, is_train in zip([train_path, test_path], [True, False]):
        df = Jbl.load(path)
        if "frame" in fe_cfg.__annotations__:
            if "window" in fe_cfg.frame.__annotations__:
                frame_column = fe_cfg.frame.column
                frame_window = fe_cfg.frame.window
                df = _filter_frame_window(df, frame_column, frame_window)
            else:
                frame_start_q = fe_cfg.frame.start
                frame_end_q = fe_cfg.frame.end
                df = _filter_frame(df, frame_start_q, frame_end_q)
        df_processed = _build_features(df, is_train, fe_cfg)
        if is_train:
            X = df_processed.drop(target_col, axis=1)
            y = df_processed[target_col]
        else:
            X = df_processed.copy()
            y = None
        X_save_path = (f"{DataPath.processed.X_train}_{fe_name}.jbl"
                       if is_train else
                       f"{DataPath.processed.X_test}_{fe_name}.jbl")
        Jbl.save(X, X_save_path)
        if is_train:
            y_save_path = f"{DataPath.processed.y_train}_{fe_name}.jbl"
            Jbl.save(y, y_save_path)
Exemplo n.º 2
0
def join_data():
    train_files = os.listdir(DataPath.raw.train_dir)
    test_files = os.listdir(DataPath.raw.test_dir)

    for files, is_train in zip([train_files, test_files], [True, False]):
        dfs = []
        for f in files:
            if is_train:
                path = f"{DataPath.raw.train_dir}/{f}"
            else:
                path = f"{DataPath.raw.test_dir}/{f}"
            df_tmp = pd.read_csv(path)
            stem = os.path.splitext(f)[0]
            if int(stem) < 400:
                df_tmp["is_screen_play"] = 1
            else:
                df_tmp["is_screen_play"] = 0
            df_tmp["filename"] = stem
            dfs.append(df_tmp)
        df = pd.concat(dfs, axis=0, ignore_index=True)
        df = df.sort_values(["filename", "frame"]).reset_index(drop=True)
        if not is_train:
            df = df.drop("is_screen_play", axis=1)
        del dfs
        gc.collect()

        save_path = (f"{DataPath.interim.train}.jbl"
                     if is_train else f"{DataPath.interim.test}.jbl")
        Jbl.save(df, save_path)
Exemplo n.º 3
0
 def save_model(self, model_path: str) -> None:
     """モデルの保存を行う
     :param path: モデルの保存先パス
     """
     model_path_dir = os.path.dirname(model_path)
     mkdir(model_path_dir)
     Jbl.save(self.model, model_path)
Exemplo n.º 4
0
    def __init__(self, cfgs: Dict[str, Config], logger):
        super().__init__(cfgs, logger)
        self.X_train = Jbl.load(
            f"{DataPath.processed.X_train}_{self.fe_name}.jbl")
        self.y_train = Jbl.load(
            f"{DataPath.processed.y_train}_{self.fe_name}.jbl")
        self.X_test = Jbl.load(
            f"{DataPath.processed.X_test}_{self.fe_name}.jbl")

        self.best_threshold = 0.0
Exemplo n.º 5
0
    def __init__(self, cfgs: Dict[str, Config], logger: logging.Logger):
        blend_cfg = cfgs["blend"]

        self.description = blend_cfg.basic.description
        self.exp_name = blend_cfg.basic.exp_name
        self.run_name = blend_cfg.basic.name
        self.run_id = None
        self.fe_name = blend_cfg.basic.fe_name
        self.run_cfg = blend_cfg
        self.params = blend_cfg.params
        self.cv = generate_cv(blend_cfg)
        self.column = blend_cfg.column
        self.cat_cols = (blend_cfg.column.categorical if "categorical"
                         in blend_cfg.column.__annotations__ else None)
        self.kfold = blend_cfg.kfold
        self.evaluation_metric = blend_cfg.model.eval_metric
        self.logger = logger

        @dataclass
        class advanced:
            PseudoRunner: PseudoRunner = None
            ResRunner: ResRunner = None
            AdversarialValidation: AdversarialValidation = None
            Selector: Selector = None

        self.advanced = advanced

        if blend_cfg.model.name in models_map.keys():
            self.model_cls = models_map[blend_cfg.model.name]
        else:
            raise ValueError(f"model_name {self.model_cls} not found")

        trs = []
        tes = []
        for run_name, _ in blend_cfg.result.__annotations__.items():
            tr = Jbl.load(f"{ModelPath.prediction}/{run_name}-train.jbl")
            te = Jbl.load(f"{ModelPath.prediction}/{run_name}-test.jbl")
            trs.append(tr)
            tes.append(te)
        train = pd.DataFrame(trs).T
        train.columns = list(blend_cfg.result.__annotations__.keys())
        test = pd.DataFrame(tes).T
        test.columns = list(blend_cfg.result.__annotations__.keys())
        target = [1] * 400 + [0] * (1528 - 400)
        train["y"] = target
        self.X_train = train.drop("y", axis=1)
        self.y_train = train["y"]
        self.X_test = test.copy()

        self.best_threshold = 0.0
Exemplo n.º 6
0
 def submission(self):
     if self.advanced and "separate" in self.advanced.__annotations__:
         sub = Jbl.load(
             f"{DataPath.processed.prefix}/X_test_{self.fe_name}.jbl"
         ).loc[:, [self.separate_col]]
         separate_col_uniques = sub[self.separate_col].unique()
         results = {}
         for separate_col_val in separate_col_uniques:
             pred = Jbl.load(
                 f"{ModelPath.prediction}/{self.run_name}-{separate_col_val}-test.jbl"
             )
             sub_separate_idx = sub[sub[self.separate_col] ==
                                    separate_col_val].index
             result = {
                 idx_: [p_]
                 for idx_, p_ in zip(sub_separate_idx, pred)
             }
             results.update(result)
         sub = (pd.DataFrame(results).T.reset_index().rename(
             columns={
                 "index": "id",
                 0: self.column.target
             }).sort_values("id").reset_index(drop=True))
         sub.loc[:, "id"] = (
             Jbl.load(f"{DataPath.interim.test}").loc[:, ["id"]].values)
         pred = sub[self.column.target].values
     else:
         # sub = Jbl.load(f"{DataPath.interim.test}").loc[:, ["id"]]
         # pred = Jbl.load(f"{ModelPath.prediction}/{self.run_name}-test.jbl")
         sub = pd.DataFrame()
         pred = Jbl.load(
             f"{ModelPath.prediction}/{self.run_name}-test-binarized.jbl")
     pred = pred.reshape(-1, )
     if self.advanced and "predict_exp" in self.advanced.__annotations__:
         sub[self.column.target] = np.exp(pred)
     else:
         sub[self.column.target] = pred
     # sub.to_csv(
     #     f"{DataPath.submission}/submission_{self.run_name}.csv", index=False,
     # )
     sub.to_csv(
         f"{ModelPath.submission}/submission_{self.run_name}.csv",
         index=False,
         header=None,
     )
Exemplo n.º 7
0
 def submission(self):
     pred = Jbl.load(
         f"{PATH['prefix']['prediction']}/{self.run_name}-test.jbl")
     sub = Loader().load_test().loc[:, ["id"]]
     if self.advanced and "predict_exp" in self.advanced:
         sub[self.cols_definition["target_col"]] = np.exp(pred)
     else:
         sub[self.cols_definition["target_col"]] = pred
     sub.to_csv(
         f"{PATH['prefix']['submission']}/submission_{self.run_name}.csv",
         index=False,
     )
Exemplo n.º 8
0
    def __init__(self, config: dict, cv):
        self.exp_name = config["exp_name"]
        self.run_name = config["run_name"]
        self.run_id = None
        self.fe_name = config["fe_name"]
        self.X_train = Jbl.load(
            f"{PATH['prefix']['processed']}/X_train_{config['fe_name']}.jbl")
        self.y_train = Jbl.load(
            f"{PATH['prefix']['processed']}/y_train_{config['fe_name']}.jbl")
        self.X_test = Jbl.load(
            f"{PATH['prefix']['processed']}/X_test_{config['fe_name']}.jbl")
        self.evaluation_metric = config["evaluation_metric"]
        self.params = config["params"]
        self.cols_definition = config["cols_definition"]
        self.kfold = config["kfold"]["method"]
        self.cv = cv
        self.description = config["description"]
        self.advanced = config["advanced"] if "advanced" in config else None

        if config["model_name"] in models_map.keys():
            self.model_cls = models_map[config["model_name"]]
        else:
            raise ValueError
Exemplo n.º 9
0
    def train_fold(self, i_fold: int):
        """クロスバリデーションでのfoldを指定して学習・評価を行う
        他のメソッドから呼び出すほか、単体でも確認やパラメータ調整に用いる
        :param i_fold: foldの番号(すべてのときには'all'とする)
        :return: (モデルのインスタンス、レコードのインデックス、予測値、評価によるスコア)のタプル
        """
        # 学習データの読込
        X_train = self.X_train.copy()
        y_train = self.y_train.copy()

        # 残差の設定
        if self.advanced and "ResRunner" in self.advanced:
            oof = Jbl.load(self.advanced["ResRunner"]["oof"])
            X_train["res"] = (y_train - oof).abs()

        # 学習データ・バリデーションデータをセットする
        tr_idx, va_idx = self.load_index_fold(i_fold)
        X_tr, y_tr = X_train.iloc[tr_idx], y_train.iloc[tr_idx]
        X_val, y_val = X_train.iloc[va_idx], y_train.iloc[va_idx]

        # 残差でダウンサンプリング
        if self.advanced and "ResRunner" in self.advanced:
            X_tr = X_tr.loc[(
                X_tr["res"] <
                self.advanced["ResRunner"]["res_threshold"]).values]
            y_tr = y_tr.loc[(
                X_tr["res"] <
                self.advanced["ResRunner"]["res_threshold"]).values]
            print(X_tr.shape)
            X_tr.drop("res", axis=1, inplace=True)
            X_val.drop("res", axis=1, inplace=True)

        # Pseudo Lebeling
        if self.advanced and "PseudoRunner" in self.advanced:
            y_test_pred = Jbl.load(
                self.advanced["PseudoRunner"]["y_test_pred"])
            if "pl_threshold" in self.advanced["PseudoRunner"]:
                X_add = self.X_test.loc[
                    (y_test_pred < self.
                     advanced["PseudoRunner"]["pl_threshold"])
                    | (y_test_pred > 1 -
                       self.advanced["PseudoRunner"]["pl_threshold"])]
                y_add = pd.DataFrame(y_test_pred).loc[
                    (y_test_pred < self.
                     advanced["PseudoRunner"]["pl_threshold"])
                    | (y_test_pred > 1 -
                       self.advanced["PseudoRunner"]["pl_threshold"])]
                y_add = pd.DataFrame(
                    ([1 if ya > 0.5 else 0 for ya in y_add[0]]))
            elif "pl_threshold_neg" in self.advanced["PseudoRunner"]:
                X_add = self.X_test.loc[
                    (y_test_pred < self.
                     advanced["PseudoRunner"]["pl_threshold_neg"])
                    | (y_test_pred > self.
                       advanced["PseudoRunner"]["pl_threshold_pos"])]
                y_add = pd.DataFrame(y_test_pred).loc[
                    (y_test_pred < self.
                     advanced["PseudoRunner"]["pl_threshold_neg"])
                    | (y_test_pred > self.
                       advanced["PseudoRunner"]["pl_threshold_pos"])]
                y_add = pd.DataFrame(
                    ([1 if ya > 0.5 else 0 for ya in y_add[0]]))
            else:
                X_add = self.X_test
                y_add = pd.DataFrame(y_test_pred)
            print(f"added X_test: {len(X_add)}")
            X_tr = pd.concat([X_tr, X_add])
            y_tr = pd.concat([y_tr, y_add])

        # 学習を行う
        model = self.build_model(i_fold)
        model.train(X_tr, y_tr, X_val, y_val, self.X_test)

        # バリデーションデータへの予測・評価を行う
        pred_val = model.predict(X_val)

        # 後処理
        pred_val = postprocess(pred_val)

        score = self.evaluate(y_val.values, pred_val)

        # モデル、インデックス、予測値、評価を返す
        return model, va_idx, pred_val, score
Exemplo n.º 10
0
    def run_predict_cv(self) -> None:
        """クロスバリデーションで学習した各foldのモデルの平均により、テストデータの予測を行う
        あらかじめrun_train_cvを実行しておく必要がある
        """

        logger.info(f"{self.run_name} - start prediction cv")
        X_test = self.X_test
        preds = []

        show_feature_importance = "LGBM" in str(self.model_cls)
        if show_feature_importance:
            feature_importances = pd.DataFrame()

        # 各foldのモデルで予測を行う
        for i_fold in range(self.cv.n_splits):
            logger.info(f"{self.run_name} - start prediction fold:{i_fold}")
            model = self.build_model(i_fold)
            model.load_model()
            pred = model.predict(X_test)
            preds.append(pred)
            logger.info(f"{self.run_name} - end prediction fold:{i_fold}")
            if show_feature_importance:
                feature_importances = pd.concat(
                    [feature_importances,
                     model.feature_importance(X_test)],
                    axis=0)

        # 予測の平均値を出力する
        pred_avg = np.mean(preds, axis=0)

        # 予測結果の保存
        Jbl.save(pred_avg,
                 f"{PATH['prefix']['prediction']}/{self.run_name}-test.jbl")

        logger.info(f"{self.run_name} - end prediction cv")

        # 特徴量の重要度
        if show_feature_importance:
            aggs = (feature_importances.groupby("Feature").mean().sort_values(
                by="importance", ascending=False))
            cols = aggs[:200].index
            pd.DataFrame(aggs.index).to_csv(
                f"{PATH['prefix']['importance']}/{self.run_name}-fi.csv",
                index=False)

            best_features = feature_importances.loc[
                feature_importances.Feature.isin(cols)]
            plt.figure(figsize=(14, 26))
            sns.barplot(
                x="importance",
                y="Feature",
                data=best_features.sort_values(by="importance",
                                               ascending=False),
            )
            plt.title("LightGBM Features (averaged over folds)")
            plt.tight_layout()
            plt.savefig(
                f"{PATH['prefix']['importance']}/{self.run_name}-fi.png")
            plt.show()

            # mlflow
            mlflow.start_run(run_id=self.run_id)
            log_artifact(
                f"{PATH['prefix']['importance']}/{self.run_name}-fi.png")
            mlflow.end_run()
Exemplo n.º 11
0
    def run_train_cv(self) -> None:
        """クロスバリデーションでの学習・評価を行う
        学習・評価とともに、各foldのモデルの保存、スコアのログ出力についても行う
        """
        # mlflow
        mlflow.set_experiment(self.exp_name)
        mlflow.start_run(run_name=self.run_name)
        logger.info(f"{self.run_name} - start training cv")

        scores = []
        va_idxes = []
        preds = []

        # Adversarial validation
        if self.advanced and "adversarial_validation" in self.advanced:
            X_train = self.X_train
            X_test = self.X_test
            X_train["target"] = 0
            X_test["target"] = 1
            X_train = pd.concat([X_train, X_test],
                                sort=False).reset_index(drop=True)
            y_train = X_train["target"]
            X_train.drop("target", axis=1, inplace=True)
            X_test.drop("target", axis=1, inplace=True)
            self.X_train = X_train
            self.y_train = y_train

        # 各foldで学習を行う
        for i_fold in range(self.cv.n_splits):
            # 学習を行う
            logger.info(f"{self.run_name} fold {i_fold} - start training")
            model, va_idx, va_pred, score = self.train_fold(i_fold)
            logger.info(
                f"{self.run_name} fold {i_fold} - end training - score {score}\tbest_iteration: {model.model.best_iteration}"
            )

            # モデルを保存する
            model.save_model()

            # 結果を保持する
            va_idxes.append(va_idx)
            scores.append(score)
            preds.append(va_pred)

        # 各foldの結果をまとめる
        va_idxes = np.concatenate(va_idxes)
        order = np.argsort(va_idxes)
        preds = np.concatenate(preds, axis=0)
        preds = preds[order]

        cv_score = self.evaluate(self.y_train.values, preds)

        logger.info(
            f"{self.run_name} - end training cv - score {cv_score}\tbest_iteration: {model.model.best_iteration}"
        )

        # 予測結果の保存
        Jbl.save(preds,
                 f"{PATH['prefix']['prediction']}/{self.run_name}-train.jbl")

        # mlflow
        self.run_id = mlflow.active_run().info.run_id
        log_param("model_name", self.model_cls.__class__.__name__)
        log_param("fe_name", self.fe_name)
        log_param("train_params", self.params)
        log_param("cv_strategy", str(self.cv))
        log_param("evaluation_metric", self.evaluation_metric)
        log_metric("cv_score", cv_score)
        log_param(
            "fold_scores",
            dict(
                zip(
                    [f"fold_{i}" for i in range(len(scores))],
                    [round(s, 4) for s in scores],
                )),
        )
        log_param("cols_definition", self.cols_definition)
        log_param("description", self.description)
        mlflow.end_run()
Exemplo n.º 12
0
 def load_model(self, path: str = "models/model"):
     model_path = os.path.join(path, f"{self.run_fold_name}.model")
     self.model = Jbl.load(model_path)
Exemplo n.º 13
0
 def save_model(self, path: str = "models/model"):
     model_path = os.path.join(path, f"{self.run_fold_name}.model")
     os.makedirs(os.path.dirname(model_path), exist_ok=True)
     Jbl.save(self.model, model_path)
Exemplo n.º 14
0
 def load_model(self, model_path: str) -> None:
     """モデルの読み込みを行う
     :param path: モデルの読み込み先パス
     """
     self.model = Jbl.load(model_path)
Exemplo n.º 15
0
    def run_train_cv(self) -> None:
        """クロスバリデーションでの学習・評価を行う
        学習・評価とともに、各foldのモデルの保存、スコアのログ出力についても行う
        """
        # mlflow
        mlflow.set_experiment(self.exp_name)
        mlflow.start_run(run_name=self.run_name)
        self.logger.info(f"{self.run_name} - start training cv")

        scores = []
        va_idxes = []
        preds = []

        # Adversarial validation
        if self.advanced and self.advanced.AdversarialValidation is not None:
            X_train = self.X_train.copy()
            X_test = self.X_test.copy()
            X_train["target"] = 0
            X_test["target"] = 1
            X_train = pd.concat([X_train, X_test],
                                sort=False).reset_index(drop=True)
            y_train = X_train["target"]
            X_train.drop("target", axis=1, inplace=True)
            X_test.drop("target", axis=1, inplace=True)
            self.X_train = X_train
            self.y_train = y_train

        # 特徴量選択
        if self.advanced and self.advanced.Selector is not None:
            self.logger.info(f"{self.run_name} - start feature_selection")
            self.logger.info(
                f"{self.run_name} - #features before selection: {len(self.X_train.columns.tolist())}"
            )
            selector_params = dataclasses.asdict(self.advanced.Selector)
            selector_name = selector_params.pop("name")
            selector = None
            if selector_name == "GBDTFeatureSelector":
                selector = GBDTFeatureSelector(
                    input_cols=self.X_train.columns.tolist(),
                    target_col=self.column.target,
                    **selector_params,
                )
            else:
                ValueError(f"{selector_name} is not implemented")
            self.X_train = selector.fit_transform(
                pd.concat([self.X_train, self.y_train], axis=1))
            self.X_test = selector.transform(self.X_test)
            self.logger.info(
                f"{self.run_name} - #features after selection: {len(self.X_train.columns.tolist())}"
            )
            self.logger.info(f"{self.run_name} - end feature_selection")

            os.makedirs(f"{ModelPath.selector}", exist_ok=True)
            Jbl.save(selector,
                     f"{ModelPath.selector}/{self.run_name}.selector")

        # 各foldで学習を行う
        for i_fold in range(self.cv.n_splits):
            # 学習を行う
            self.logger.info(f"{self.run_name} fold {i_fold} - start training")
            model, va_idx, va_pred, score = self.train_fold(i_fold)
            fold_score_log = (
                f"{self.run_name} fold {i_fold} - end training - score {score}"
            )
            if hasattr(model.model, "best_iteration"):
                fold_score_log += f"\tbest_iteration: {model.model.best_iteration}"
            self.logger.info(fold_score_log)
            self.logger.info(
                f"{self.run_name} fold {i_fold} - best threshold - {self.best_threshold}"
            )

            # モデルを保存する
            model.save_model()

            # 結果を保持する
            va_idxes.append(va_idx)
            scores.append(score)
            preds.append(va_pred)

        # 各foldの結果をまとめる
        va_idxes = np.concatenate(va_idxes)
        order = np.argsort(va_idxes)
        preds = np.concatenate(preds, axis=0)
        preds = preds[order]

        cv_score = self.evaluate(self.y_train.values, preds)
        preds_binarized = np.where(preds > self.best_threshold, 1, 0)

        self.logger.info(
            f"{self.run_name} - end training cv - score {cv_score}")
        self.logger.info(
            f"{self.run_name} - best threshold - {self.best_threshold}")

        # 予測結果の保存
        Jbl.save(preds, f"{ModelPath.prediction}/{self.run_name}-train.jbl")
        Jbl.save(
            preds_binarized,
            f"{ModelPath.prediction}/{self.run_name}-train-binarized.jbl",
        )
        Jbl.save(
            self.best_threshold,
            f"{ModelPath.prediction}/{self.run_name}-best-threshold.jbl",
        )

        # mlflow
        self.mlflow(cv_score, scores)
Exemplo n.º 16
0
 def load_model(self):
     model_path = os.path.join(f"{ModelPath.model}",
                               f"{self.run_fold_name}.model")
     self.model = Jbl.load(model_path)
Exemplo n.º 17
0
 def save_model(self):
     model_path = os.path.join(f"{ModelPath.model}",
                               f"{self.run_fold_name}.model")
     os.makedirs(os.path.dirname(model_path), exist_ok=True)
     Jbl.save(self.model, model_path)
Exemplo n.º 18
0
 def save_model(self, path: str = "models/model"):
     model_path = os.path.join(path, f"{self.run_fold_name}.model")
     Jbl.save(self.model, model_path)
     print(f"{model_path} is saved")
Exemplo n.º 19
0
    def run_predict_cv(self) -> None:
        """クロスバリデーションで学習した各foldのモデルの平均により、テストデータの予測を行う
        あらかじめrun_train_cvを実行しておく必要がある
        """

        self.logger.info(f"{self.run_name} - start prediction cv")
        X_test = self.X_test.copy()

        if self.advanced and self.advanced.Selector is not None:
            selector = Jbl.load(
                f"{ModelPath.selector}/{self.run_name}.selector")
            X_test = selector.transform(X_test)

        preds = []
        show_feature_importance = "LGBM" in str(self.model_cls)
        feature_importances = pd.DataFrame()

        # 各foldのモデルで予測を行う
        for i_fold in range(self.cv.n_splits):
            self.logger.info(
                f"{self.run_name} - start prediction fold:{i_fold}")
            if self.pretrain is None:
                model = self.build_model(i_fold)
            else:
                model = self.model_cls(f"{self.pretrain.run_name}-{i_fold}",
                                       self.run_cfg, self.cat_cols)
            model.load_model()
            pred = model.predict(X_test)
            # 後処理
            pred = postprocess_prediction(pred)
            preds.append(pred)
            self.logger.info(f"{self.run_name} - end prediction fold:{i_fold}")
            if show_feature_importance:
                feature_importances = pd.concat(
                    [feature_importances,
                     model.feature_importance(X_test)],
                    axis=0)

        # 予測の平均値を出力する
        pred_avg = np.mean(preds, axis=0)

        # 閾値で2値化
        if self.pretrain is None:
            best_threshold = Jbl.load(
                f"{ModelPath.prediction}/{self.run_name}-best-threshold.jbl")
        else:
            best_threshold = Jbl.load(
                f"{ModelPath.prediction}/{self.pretrain.run_name}-best-threshold.jbl"
            )
        pred_avg_binarized = np.where(pred_avg > best_threshold, 1, 0)

        # 予測結果の保存
        Jbl.save(pred_avg, f"{ModelPath.prediction}/{self.run_name}-test.jbl")
        Jbl.save(
            pred_avg_binarized,
            f"{ModelPath.prediction}/{self.run_name}-test-binarized.jbl",
        )

        self.logger.info(f"{self.run_name} - end prediction cv")

        # 特徴量の重要度
        if show_feature_importance:
            aggs = (feature_importances.groupby("Feature").mean().sort_values(
                by="importance", ascending=False))
            cols = aggs[:200].index
            pd.DataFrame(aggs.index).to_csv(
                f"{ModelPath.importance}/{self.run_name}-fi.csv", index=False)

            best_features = feature_importances.loc[
                feature_importances.Feature.isin(cols)]
            plt.figure(figsize=(14, 26))
            sns.barplot(
                x="importance",
                y="Feature",
                data=best_features.sort_values(by="importance",
                                               ascending=False),
            )
            plt.title("LightGBM Features (averaged over folds)")
            plt.tight_layout()
            plt.savefig(f"{ModelPath.importance}/{self.run_name}-fi.png")
            plt.show()

            # mlflow
            mlflow.start_run(run_id=self.run_id)
            log_artifact(f"{ModelPath.importance}/{self.run_name}-fi.png")
            mlflow.end_run()