Exemplos de Jbl em Python, exemplos de src.utils.joblib.Jbl em Python

Exemplo n.º 1

0

Exibir arquivo

def preprocess(fe_cfg: Config):
    fe_name = fe_cfg.basic.name
    target_col = fe_cfg.column.target
    train_path = f"{DataPath.interim.train}.jbl"
    test_path = f"{DataPath.interim.test}.jbl"

    for path, is_train in zip([train_path, test_path], [True, False]):
        df = Jbl.load(path)
        if "frame" in fe_cfg.__annotations__:
            if "window" in fe_cfg.frame.__annotations__:
                frame_column = fe_cfg.frame.column
                frame_window = fe_cfg.frame.window
                df = _filter_frame_window(df, frame_column, frame_window)
            else:
                frame_start_q = fe_cfg.frame.start
                frame_end_q = fe_cfg.frame.end
                df = _filter_frame(df, frame_start_q, frame_end_q)
        df_processed = _build_features(df, is_train, fe_cfg)
        if is_train:
            X = df_processed.drop(target_col, axis=1)
            y = df_processed[target_col]
        else:
            X = df_processed.copy()
            y = None
        X_save_path = (f"{DataPath.processed.X_train}_{fe_name}.jbl"
                       if is_train else
                       f"{DataPath.processed.X_test}_{fe_name}.jbl")
        Jbl.save(X, X_save_path)
        if is_train:
            y_save_path = f"{DataPath.processed.y_train}_{fe_name}.jbl"
            Jbl.save(y, y_save_path)

Exemplo n.º 2

0

Exibir arquivo

def join_data():
    train_files = os.listdir(DataPath.raw.train_dir)
    test_files = os.listdir(DataPath.raw.test_dir)

    for files, is_train in zip([train_files, test_files], [True, False]):
        dfs = []
        for f in files:
            if is_train:
                path = f"{DataPath.raw.train_dir}/{f}"
            else:
                path = f"{DataPath.raw.test_dir}/{f}"
            df_tmp = pd.read_csv(path)
            stem = os.path.splitext(f)[0]
            if int(stem) < 400:
                df_tmp["is_screen_play"] = 1
            else:
                df_tmp["is_screen_play"] = 0
            df_tmp["filename"] = stem
            dfs.append(df_tmp)
        df = pd.concat(dfs, axis=0, ignore_index=True)
        df = df.sort_values(["filename", "frame"]).reset_index(drop=True)
        if not is_train:
            df = df.drop("is_screen_play", axis=1)
        del dfs
        gc.collect()

        save_path = (f"{DataPath.interim.train}.jbl"
                     if is_train else f"{DataPath.interim.test}.jbl")
        Jbl.save(df, save_path)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: base.py Projeto: takaiyuk/kaggle-base

 def save_model(self, model_path: str) -> None:
     """モデルの保存を行う
     :param path: モデルの保存先パス
     """
     model_path_dir = os.path.dirname(model_path)
     mkdir(model_path_dir)
     Jbl.save(self.model, model_path)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: runner.py Projeto: takaiyuk/codalab-bbc2020

    def __init__(self, cfgs: Dict[str, Config], logger):
        super().__init__(cfgs, logger)
        self.X_train = Jbl.load(
            f"{DataPath.processed.X_train}_{self.fe_name}.jbl")
        self.y_train = Jbl.load(
            f"{DataPath.processed.y_train}_{self.fe_name}.jbl")
        self.X_test = Jbl.load(
            f"{DataPath.processed.X_test}_{self.fe_name}.jbl")

        self.best_threshold = 0.0

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self, cfgs: Dict[str, Config], logger: logging.Logger):
        blend_cfg = cfgs["blend"]

        self.description = blend_cfg.basic.description
        self.exp_name = blend_cfg.basic.exp_name
        self.run_name = blend_cfg.basic.name
        self.run_id = None
        self.fe_name = blend_cfg.basic.fe_name
        self.run_cfg = blend_cfg
        self.params = blend_cfg.params
        self.cv = generate_cv(blend_cfg)
        self.column = blend_cfg.column
        self.cat_cols = (blend_cfg.column.categorical if "categorical"
                         in blend_cfg.column.__annotations__ else None)
        self.kfold = blend_cfg.kfold
        self.evaluation_metric = blend_cfg.model.eval_metric
        self.logger = logger

        @dataclass
        class advanced:
            PseudoRunner: PseudoRunner = None
            ResRunner: ResRunner = None
            AdversarialValidation: AdversarialValidation = None
            Selector: Selector = None

        self.advanced = advanced

        if blend_cfg.model.name in models_map.keys():
            self.model_cls = models_map[blend_cfg.model.name]
        else:
            raise ValueError(f"model_name {self.model_cls} not found")

        trs = []
        tes = []
        for run_name, _ in blend_cfg.result.__annotations__.items():
            tr = Jbl.load(f"{ModelPath.prediction}/{run_name}-train.jbl")
            te = Jbl.load(f"{ModelPath.prediction}/{run_name}-test.jbl")
            trs.append(tr)
            tes.append(te)
        train = pd.DataFrame(trs).T
        train.columns = list(blend_cfg.result.__annotations__.keys())
        test = pd.DataFrame(tes).T
        test.columns = list(blend_cfg.result.__annotations__.keys())
        target = [1] * 400 + [0] * (1528 - 400)
        train["y"] = target
        self.X_train = train.drop("y", axis=1)
        self.y_train = train["y"]
        self.X_test = test.copy()

        self.best_threshold = 0.0

Exemplo n.º 6

0

Exibir arquivo

Arquivo: runner.py Projeto: takaiyuk/codalab-bbc2020

 def submission(self):
     if self.advanced and "separate" in self.advanced.__annotations__:
         sub = Jbl.load(
             f"{DataPath.processed.prefix}/X_test_{self.fe_name}.jbl"
         ).loc[:, [self.separate_col]]
         separate_col_uniques = sub[self.separate_col].unique()
         results = {}
         for separate_col_val in separate_col_uniques:
             pred = Jbl.load(
                 f"{ModelPath.prediction}/{self.run_name}-{separate_col_val}-test.jbl"
             )
             sub_separate_idx = sub[sub[self.separate_col] ==
                                    separate_col_val].index
             result = {
                 idx_: [p_]
                 for idx_, p_ in zip(sub_separate_idx, pred)
             }
             results.update(result)
         sub = (pd.DataFrame(results).T.reset_index().rename(
             columns={
                 "index": "id",
                 0: self.column.target
             }).sort_values("id").reset_index(drop=True))
         sub.loc[:, "id"] = (
             Jbl.load(f"{DataPath.interim.test}").loc[:, ["id"]].values)
         pred = sub[self.column.target].values
     else:
         # sub = Jbl.load(f"{DataPath.interim.test}").loc[:, ["id"]]
         # pred = Jbl.load(f"{ModelPath.prediction}/{self.run_name}-test.jbl")
         sub = pd.DataFrame()
         pred = Jbl.load(
             f"{ModelPath.prediction}/{self.run_name}-test-binarized.jbl")
     pred = pred.reshape(-1, )
     if self.advanced and "predict_exp" in self.advanced.__annotations__:
         sub[self.column.target] = np.exp(pred)
     else:
         sub[self.column.target] = pred
     # sub.to_csv(
     #     f"{DataPath.submission}/submission_{self.run_name}.csv", index=False,
     # )
     sub.to_csv(
         f"{ModelPath.submission}/submission_{self.run_name}.csv",
         index=False,
         header=None,
     )

Exemplo n.º 7

0

Exibir arquivo