def main(data_path="data/split/", feature_path="data/features/", out_path="data/pca/"):
    X_train, X_test, y_train, y_test = read_data(data_path)

    params = read_params("params.yaml", "pca")
    pca = PCA(**params).fit(X_train)

    train_feature = pd.DataFrame(pca.transform(X_train))
    test_feature = pd.DataFrame(pca.transform(X_test))
    train_feature["class"] = y_train
    test_feature["class"] = y_test

    if not os.path.isdir(feature_path):
        os.mkdir(feature_path)
    train_feature.to_csv(f"{feature_path}train.csv", index=False)
    test_feature.to_csv(f"{feature_path}test.csv", index=False)
    save_results(out_path, pca, None)

    print(f"Finished Feature Engineering:\nStats:")
    print(f"\tExplained Variance: {pca.explained_variance_}")
    print(f"\tExplained Variance Ratio: {pca.explained_variance_ratio_}")

    log_experiment(
        out_path,
        metrics=dict(
            explained_variance_=pca.explained_variance_,
            explained_variance_ratio_=pca.explained_variance_ratio_,
        ),
    )
예제 #2
0
def main(data_path='data/split/',
         feature_path='data/features/',
         out_path='data/pca/'):
    X_train, X_test, y_train, y_test = read_data(data_path)

    params = read_params('params.yaml', 'pca')
    pca = PCA(**params).fit(X_train)

    train_feature = pd.DataFrame(pca.transform(X_train))
    test_feature = pd.DataFrame(pca.transform(X_test))
    train_feature['class'] = y_train
    test_feature['class'] = y_test

    if not os.path.isdir(feature_path):
        os.mkdir(feature_path)
    train_feature.to_csv(f'{feature_path}train.csv', index=False)
    test_feature.to_csv(f'{feature_path}test.csv', index=False)
    save_results(out_path, pca, None)

    print(f'Finished Feature Engineering:\nStats:')
    print(f'\tExplained Variance: {pca.explained_variance_}')
    print(f'\tExplained Variance Ratio: {pca.explained_variance_ratio_}')

    log_experiment(
        out_path,
        params=params,
        metrics=dict(explained_variance_=pca.explained_variance_,
                     explained_variance_ratio_=pca.explained_variance_ratio_))
예제 #3
0
def main(config):
    experiment_name, current_time = setup_experiment(config.title, config)

    # normalization (creating t1_landmarks.npy file)
    create_normalization_file(
        use_controls=config.use_controls,
        use_nofcd=config.use_ae,
        mods=config.nb_of_modalities,
    )
    print('Normalization is finished')

    # patch extraction
    get_patch_list(use_controls=config.use_controls,
                   use_fcd=config.use_ae,
                   use_coronal=config.use_coronal,
                   use_sagital=config.use_sagital,
                   augment=config.augment,
                   h=config.height,
                   w=config.width,
                   hard_labeling=config.hard_labeling,
                   mods=config.nb_of_modalities,
                   batch_size=config.batch_size)
    print('Patch extraction is finished')

    # cnn model
    top_k_scores = train_model(mods=config.nb_of_modalities,
                               use_ae=config.use_ae,
                               h=config.height,
                               w=config.width,
                               use_coronal=config.use_coronal,
                               use_sagital=config.use_sagital,
                               use_controls=config.use_controls,
                               latent_dim=config.latent_size,
                               batch_size=config.batch_size,
                               lr=config.lr,
                               weight_decay=config.weight_decay,
                               weight_of_class=config.weight_of_class,
                               n_epochs=config.nb_epochs,
                               n_epochs_ae=config.nb_epochs_ae,
                               p=config.dropout_rate,
                               save_masks=config.save_masks,
                               parallel=config.parallel,
                               experiment_name=experiment_name,
                               temporal_division=config.temporal_division,
                               seed=config.seed)

    print(top_k_scores)
    print('LOO mean top-k score:', top_k_scores.mean())

    # logging
    log_experiment(config, current_time, (top_k_scores > 0).mean())
예제 #4
0
def main(data_path="data/features/", out_path="data/models/svc/"):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = "LinearSVC"
    params = read_params("params.yaml", "svc")
    model = LinearSVC(**params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
예제 #5
0
def main(data_path='data/features/', out_path='data/models/logistic/'):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = 'LogisticRegression'
    params = read_params('params.yaml', 'logistic')
    model = LogisticRegression(**params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   params=params,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
예제 #6
0
def main(data_path='data/features/',
         out_path='data/models/r_forrest/',
         n_estimators=10,
         max_samples=30):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = 'RandomForrest'
    params = read_params('params.yaml', 'forrest')
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   params=params,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
예제 #7
0
def main(data_path='data/features/',
         model_path='data/models/',
         out_path='data/models/ensemble/'):
    X_train, X_test, y_train, y_test = read_data(data_path)

    name = 'Ensemble'
    params = read_params('params.yaml', 'ensemble')
    cl1 = load_model(f'{model_path}/logistic/')
    cl2 = load_model(f'{model_path}/svc/')
    cl3 = load_model(f'{model_path}/r_forrest/')
    estimators = [('l_regression', cl1), ('l_svc', cl2), ('r_forrest', cl3)]

    model = VotingClassifier(estimators, **params)
    model.fit(X_train, y_train)

    accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test)
    print_results(accuracy, c_matrix, name)

    save_results(out_path, model, fig)
    log_experiment(out_path,
                   metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
예제 #8
0
    def __call__(
        self,
        read_old_model=True,
        model='TransE',
        num_epochs=2,
        train_batch_size=512,
        evaluation_batch_size=128,
        model_location=MODEL_SAVE_DIR
    ):
        # this fits the data from the old model - if model is present.
        if read_old_model:
            if os.path.exists(os.path.join(model_location, 'trained_model.pkl')):
                # use an old saved model
                self.__model = torch.load(os.path.join(model_location, 'trained_model.pkl'))
            else:
                # if no model found - just train the model
                raise FileNotFoundError(
                    f"Old model not found at {model_location}. "
                    f"Please check the path provided"
                )
        else:
            self.__fit(
                model_text=model,
                num_epochs=num_epochs,
                train_batch_size=train_batch_size,
                eval_batch_size=evaluation_batch_size,
                model_location=model_location
            )

        # if versioning has been set to true, the details are logged in mlflow

        if len(self.__exp_tags):
            log_experiment(
                params={
                    "num_epochs":num_epochs,
                    "train_batch_size":train_batch_size,
                    "eval_batch_size":evaluation_batch_size
                },
                tags=self.__exp_tags,
            )