示例#1
0
def run_model(cfg: omegaconf.DictConfig) -> None:
    logger.info(f"Config: {omegaconf.OmegaConf.to_yaml(cfg)}")
    utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list)

    earlystopping_callback = hydra.utils.instantiate(cfg.callbacks.early_stopping)
    checkpoint_callback = hydra.utils.instantiate(cfg.callbacks.model_checkpoint)
    tb_logger = hydra.utils.instantiate(cfg.callbacks.tensorboard)
    lr_logger = hydra.utils.instantiate(cfg.callbacks.lr_logger)

    if cfg.training.pretrain_dir != "":
        logger.info(f"Loading the pre-trained model from: {cfg.training.pretrain_dir}")
        pretrain_path = utils.get_single_model_path(cfg.training.pretrain_dir)
        model = LitWheatModel.load_from_checkpoint(pretrain_path, hydra_cfg=cfg)

        # Number of classes in bad labels does not equal to the number of classes in good labels
        fc_layer_name = (
            "_fc"
            if cfg.model.architecture_name.startswith("efficientnet")
            else "_classifier"
        )
        if (
            getattr(model.model, fc_layer_name).out_features
            != cfg.data_mode.num_classes
        ):
            fc = torch.nn.Linear(
                getattr(model.model, fc_layer_name).in_features,
                cfg.data_mode.num_classes,
            )
            setattr(model.model, fc_layer_name, fc)
    else:
        logger.info("Training the model from scratch")
        model = LitWheatModel(hydra_cfg=cfg)

    trainer = pl.Trainer(
        max_epochs=cfg.training.max_epochs,
        min_epochs=cfg.training.max_epochs,
        logger=[tb_logger],
        early_stop_callback=earlystopping_callback,
        checkpoint_callback=checkpoint_callback,
        callbacks=[lr_logger],
        gradient_clip_val=0.5,
        gpus=cfg.general.gpu_list,
        fast_dev_run=False,
        distributed_backend="dp",
        precision=32,
        weights_summary=None,
        progress_bar_refresh_rate=50,
        deterministic=True,
    )

    # model.setup()
    # # Run learning rate finder
    # lr_finder = trainer.lr_find(model)
    # fig = lr_finder.plot(suggest=True)
    # fig.savefig("/data/ybabakhin/data/zindi_wheat/zindi_wheat_growth/lrfinder.png")

    logger.info("Start fitting the model...")
    trainer.fit(model)
def run_inference(cfg: omegaconf.DictConfig) -> None:

    logger.info(" .. Testing Will Be Starting in few seconds .. ")

    test_df = pd.read_csv(cfg.testing.test_csv)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    checkpoints = glob.glob(
        os.path.join(
            cfg.general.logs_dir, "checkpoints",
            f"{cfg.model.architecture_name}{cfg.classifiermode.num_classes}",
            "*.ckpt"))
    num_models = len(checkpoints)

    if num_models == 0:
        sys.exit()
    if cfg.classifiermode.num_classes == 1:
        test_preds = np.zeros(len(test_df))
    else:
        test_preds = np.zeros((len(test_df), 3))

    for checkpoint_id, checkpoint_path in enumerate(checkpoints):

        output_name = checkpoint_path.split("/")[2]
        seed = int(checkpoint_path.split("/")[3].split(".")[0].split("_")[1])
        utils.setup_environment(random_seed=seed,
                                gpu_list=cfg.general.gpu_list)
        model = TuniziDialectClassifier.load_from_checkpoint(checkpoint_path,
                                                             hydra_config=cfg)
        model.eval().to(device)
        test_predictions = []
        with torch.no_grad():
            for batch_idx, batch in enumerate(Bar(model.test_dataloader())):
                input_ids = batch["input_ids"]
                attention_mask = batch["attention_mask"]

                input_ids = input_ids.to(device, dtype=torch.long)
                attention_mask = attention_mask.to(device, dtype=torch.long)
                outputs = model.forward(input_ids,
                                        attention_mask=attention_mask)

                if cfg.classifiermode.num_classes == 1:
                    outputs = torch.sigmoid(outputs).detach().cpu().numpy()
                    test_predictions.append(outputs)
                else:
                    outputs = torch.softmax(outputs, 1).detach().cpu().numpy()
                    test_predictions.append(outputs)

        test_predictions = np.concatenate(test_predictions, axis=0)
        if cfg.classifiermode.num_classes == 1:
            test_predictions = test_predictions.reshape(
                test_predictions.shape[0])
        gc.collect()
        torch.cuda.empty_cache()
        utils.create_submission(test_df, output_name + str(seed),
                                test_predictions,
                                cfg.classifiermode.num_classes)
示例#3
0
def train_model(cfg: omegaconf.DictConfig) -> None:
    logger.info(f"Config: {omegaconf.OmegaConf.to_yaml(cfg)}")
    utils.setup_environment(random_seed=cfg.general.seed,
                            gpu_list=cfg.general.gpu_list)
    tensorboard_logger = hydra.utils.instantiate(cfg.callbacks.tensorboard)
    model = TuniziDialectClassifier(hydra_config=cfg)

    trainer = pl.Trainer(max_epochs=cfg.training.max_epochs,
                         min_epochs=cfg.training.min_epochs,
                         logger=[tensorboard_logger],
                         gpus=cfg.general.gpu_list,
                         fast_dev_run=False,
                         precision=32,
                         progress_bar_refresh_rate=1,
                         deterministic=True)
    logger.info(".. Shake your Hands Training Will Begin .. ")
    trainer.fit(model)
示例#4
0
def run_model(cfg: omegaconf.DictConfig) -> None:
    utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list)

    if cfg.testing.mode == "valid":
        test = pd.read_csv(cfg.data_mode.train_csv)
        test = test[test.label_quality == 2].reset_index(drop=True)
    else:
        test = pd.read_csv(cfg.testing.test_csv)

    test = utils.preprocess_df(test, data_dir=cfg.data_mode.data_dir)
    logger.info(f"Length of the test data: {len(test)}")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    df_list = []
    pred_list = []

    for fold in cfg.testing.folds:
        if cfg.testing.mode == "valid":
            df_test = test[test.fold == fold].reset_index(drop=True)
        else:
            df_test = test

        checkpoints = glob.glob(
            os.path.join(
                cfg.general.logs_dir, f"model_{cfg.model.model_id}/fold_{fold}/*.ckpt"
            )
        )
        fold_predictions = np.zeros(
            (len(df_test), cfg.data_mode.num_classes, len(checkpoints))
        )

        for checkpoint_id, checkpoint_path in enumerate(checkpoints):
            model = lightning_models.LitWheatModel.load_from_checkpoint(
                checkpoint_path, hydra_cfg=cfg
            )
            model.eval().to(device)

            test_dataset = dataset.ZindiWheatDataset(
                images=df_test.path.values,
                labels=None,
                preprocess_function=model.preprocess,
                augmentations=None,
                input_shape=(cfg.model.input_size[0], cfg.model.input_size[1], 3),
                crop_method=cfg.model.crop_method,
            )

            test_loader = torch_data.DataLoader(
                test_dataset,
                batch_size=cfg.training.batch_size,
                num_workers=cfg.general.num_workers,
                shuffle=False,
                pin_memory=True,
            )

            if cfg.testing.tta:
                model = tta.get_tta_model(
                    model,
                    crop_method=cfg.model.crop_method,
                    input_size=cfg.model.input_size,
                )

            if torch.cuda.is_available() and torch.cuda.device_count() > 1:
                model = torch.nn.DataParallel(model)

            with torch.no_grad():
                tq = tqdm.tqdm(test_loader, total=len(test_loader))
                for idx, data in enumerate(tq):
                    images = data["image"]
                    images = images.to(device)

                    preds = model(images)
                    if not cfg.model.regression:
                        preds = torch.softmax(preds, dim=1)
                    preds = preds.cpu().detach().numpy()

                    fold_predictions[
                        idx
                        * cfg.training.batch_size : (idx + 1)
                        * cfg.training.batch_size,
                        :,
                        checkpoint_id,
                    ] = preds

        gc.collect()
        torch.cuda.empty_cache()
        fold_predictions = np.mean(fold_predictions, axis=-1)

        # OOF predictions for validation and pseudolabels
        if cfg.testing.mode == "valid" or cfg.testing.mode == "pseudo":
            df_list.append(df_test)

        pred_list.append(fold_predictions)

    multipliers = np.array(cfg.data_mode.rmse_multipliers)

    if cfg.testing.mode == "valid":
        test = pd.concat(df_list)
        probs = np.vstack(pred_list)
        filename = "validation_probs.pkl"

    elif cfg.testing.mode == "pseudo":
        for fold, df_test, probs in zip(cfg.testing.folds, df_list, pred_list):
            predictions = np.argmax(probs, axis=1)
            predictions = [multipliers[x] for x in predictions]
            df_test["growth_stage"] = predictions
            save_path = os.path.join(
                cfg.general.logs_dir,
                f"model_{cfg.model.model_id}/pseudo_fold_{fold}.csv",
            )
            logger.info(f"Saving pseudolabels to {save_path}")
            df_test[["UID", "growth_stage"]].to_csv(save_path, index=False)
        return

    else:
        probs = np.stack(pred_list)
        probs = np.mean(probs, axis=0)
        filename = "test_probs.pkl"

    ensemble_probs = dict(zip(test.UID.values, probs))
    utils.save_in_file_fast(
        ensemble_probs,
        file_name=os.path.join(
            cfg.general.logs_dir, f"model_{cfg.model.model_id}/{filename}"
        ),
    )

    if not cfg.model.regression:
        probs = np.sum(probs * multipliers, axis=-1)
    predictions = np.clip(probs, min(multipliers), max(multipliers))

    if cfg.testing.mode == "valid":
        rmse = np.sqrt(
            metrics.mean_squared_error(predictions, test.growth_stage.values)
        )
        logger.info(f"OOF VALIDATION SCORE: {rmse:.5f}")

        test["pred"] = predictions
        save_path = os.path.join(
            cfg.general.logs_dir, f"model_{cfg.model.model_id}/valid_preds.csv"
        )
        logger.info(f"Saving validation predictions to {save_path}")
        test[["UID", "pred"]].to_csv(save_path, index=False)
    else:
        test["growth_stage"] = predictions
        save_path = os.path.join(
            cfg.general.logs_dir, f"model_{cfg.model.model_id}/test_preds.csv"
        )
        logger.info(f"Saving test predictions to {save_path}")
        test[["UID", "growth_stage"]].to_csv(save_path, index=False)
示例#5
0
def make_ensemble(cfg: omegaconf.DictConfig) -> None:
    utils.setup_environment(seed=cfg.general.seed,
                            gpu_list=cfg.general.gpu_list)

    if cfg.testing.mode == "valid":
        train = pd.read_csv(cfg.data_mode.train_csv)

        predictions = utils.combine_dataframes(
            models_list=cfg.ensemble.model_ids,
            logs_dir=cfg.general.logs_dir,
            filename="valid_preds.csv",
            output_colname="pred",
        )
        predictions = predictions.merge(train)

        if cfg.ensemble.postprocessing:
            for mult in cfg.data_mode.rmse_multipliers:
                predictions.loc[(predictions["pred"] < mult + 0.03)
                                & (predictions["pred"] > mult - 0.03),
                                "pred", ] = mult

        rmse = np.sqrt(
            metrics.mean_squared_error(predictions.growth_stage,
                                       predictions.pred))
        logger.info(f"OOF ENSEMBLE VALIDATION SCORE: {rmse:.5f}")
    elif cfg.testing.mode == "pseudo":
        for fold in cfg.testing.folds:
            test_predictions = utils.combine_dataframes(
                models_list=cfg.ensemble.model_ids,
                logs_dir=cfg.general.logs_dir,
                filename=f"pseudo_fold_{fold}.csv",
                agg_func="mode",
            )

            save_path = os.path.join(
                cfg.general.logs_dir,
                f"{'_'.join([str(x) for x in cfg.ensemble.model_ids])}_pseudo_fold_{fold}.csv",
            )
            logger.info(f"Saving pseudo predictions to {save_path}")
            test_predictions[["UID", "growth_stage"]].to_csv(save_path,
                                                             index=False)
    else:
        test_predictions = utils.combine_dataframes(
            models_list=cfg.ensemble.model_ids,
            logs_dir=cfg.general.logs_dir,
            filename="test_preds.csv",
        )

        if cfg.ensemble.postprocessing:
            for mult in cfg.data_mode.rmse_multipliers:
                test_predictions.loc[
                    (test_predictions["growth_stage"] < mult + 0.03)
                    & (test_predictions["growth_stage"] > mult - 0.03),
                    "growth_stage", ] = mult

        save_path = os.path.join(
            cfg.general.logs_dir,
            f"{'_'.join([str(x) for x in cfg.ensemble.model_ids])}_ens.csv",
        )
        logger.info(f"Saving test predictions to {save_path}")
        test_predictions[["UID", "growth_stage"]].to_csv(save_path,
                                                         index=False)
示例#6
0
def make_ensemble(cfg: omegaconf.DictConfig) -> None:
    utils.setup_environment(seed=cfg.general.seed, gpu_list=cfg.general.gpu_list)

    if cfg.testing.mode == "valid":
        train = pd.read_csv(cfg.data_mode.train_csv, index_col="UID")
        train = train[["growth_stage", "fold"]]
        feature_columns = list(range(len(cfg.ensemble.model_ids)))

        predictions = utils.combine_dataframes(
            models_list=cfg.ensemble.model_ids,
            logs_dir=cfg.general.logs_dir,
            filename="valid_preds.csv",
            agg_func=None,
        )
        predictions.columns = feature_columns
        train = train.join(predictions, how="inner")

        lightgbm_params = {
            "boosting_type": "gbdt",
            "objective": "regression",
            "metric": "rmse",
            "num_leaves": 2,
            "learning_rate": 0.05,
            "feature_fraction": 0.6,
            "bagging_fraction": 0.9,
            "bagging_freq": 5,
            "verbose": 1,
        }

        train["pred"] = -1
        multipliers = np.array(cfg.data_mode.rmse_multipliers)
        model_name = "_".join([str(x) for x in cfg.ensemble.model_ids])

        for fold in cfg.testing.folds:
            train_folds = [f for f in cfg.testing.folds if f != fold]

            x_train = train.loc[train.fold.isin(train_folds), feature_columns].values
            y_train = train.loc[train.fold.isin(train_folds), "growth_stage"].values

            x_test = train.loc[train.fold == fold, feature_columns].values
            y_test = train.loc[train.fold == fold, "growth_stage"].values

            train_data = lightgbm.Dataset(x_train, label=y_train)
            test_data = lightgbm.Dataset(x_test, label=y_test)

            gbm = lightgbm.train(
                lightgbm_params,
                train_data,
                valid_sets=test_data,
                num_boost_round=5000,
                early_stopping_rounds=100,
            )

            preds = gbm.predict(x_test)
            preds = np.clip(preds, min(multipliers), max(multipliers))
            train.loc[train.fold == fold, "pred"] = preds

            gbm.save_model(
                os.path.join(
                    cfg.general.logs_dir, f"{model_name}_stacking_fold_{fold}.txt"
                ),
                num_iteration=gbm.best_iteration,
            )

        rmse = np.sqrt(metrics.mean_squared_error(train.growth_stage, train.pred))
        logger.info(f"STACKING VALIDATION SCORE: {rmse:.5f}")
    else:
        test_predictions = utils.combine_dataframes(
            models_list=cfg.ensemble.model_ids,
            logs_dir=cfg.general.logs_dir,
            filename="test_preds.csv",
            agg_func=None,
        )
        feature_columns = list(range(len(cfg.ensemble.model_ids)))
        test_predictions.columns = feature_columns

        model_name = "_".join([str(x) for x in cfg.ensemble.model_ids])
        test_predictions["growth_stage"] = 0
        multipliers = np.array(cfg.data_mode.rmse_multipliers)

        for fold in cfg.testing.folds:
            gbm = lightgbm.Booster(
                model_file=os.path.join(
                    cfg.general.logs_dir, f"{model_name}_stacking_fold_{fold}.txt"
                )
            )

            preds = gbm.predict(test_predictions[feature_columns].values)
            preds = np.clip(preds, min(multipliers), max(multipliers))
            test_predictions["growth_stage"] += preds / len(cfg.testing.folds)

        save_path = os.path.join(
            cfg.general.logs_dir,
            f"{'_'.join([str(x) for x in cfg.ensemble.model_ids])}_ens.csv",
        )
        logger.info(f"Saving test predictions to {save_path}")
        test_predictions.reset_index()[["UID", "growth_stage"]].to_csv(save_path, index=False)