def main(data_path="data/split/", feature_path="data/features/", out_path="data/pca/"): X_train, X_test, y_train, y_test = read_data(data_path) params = read_params("params.yaml", "pca") pca = PCA(**params).fit(X_train) train_feature = pd.DataFrame(pca.transform(X_train)) test_feature = pd.DataFrame(pca.transform(X_test)) train_feature["class"] = y_train test_feature["class"] = y_test if not os.path.isdir(feature_path): os.mkdir(feature_path) train_feature.to_csv(f"{feature_path}train.csv", index=False) test_feature.to_csv(f"{feature_path}test.csv", index=False) save_results(out_path, pca, None) print(f"Finished Feature Engineering:\nStats:") print(f"\tExplained Variance: {pca.explained_variance_}") print(f"\tExplained Variance Ratio: {pca.explained_variance_ratio_}") log_experiment( out_path, metrics=dict( explained_variance_=pca.explained_variance_, explained_variance_ratio_=pca.explained_variance_ratio_, ), )
def main(data_path='data/split/', feature_path='data/features/', out_path='data/pca/'): X_train, X_test, y_train, y_test = read_data(data_path) params = read_params('params.yaml', 'pca') pca = PCA(**params).fit(X_train) train_feature = pd.DataFrame(pca.transform(X_train)) test_feature = pd.DataFrame(pca.transform(X_test)) train_feature['class'] = y_train test_feature['class'] = y_test if not os.path.isdir(feature_path): os.mkdir(feature_path) train_feature.to_csv(f'{feature_path}train.csv', index=False) test_feature.to_csv(f'{feature_path}test.csv', index=False) save_results(out_path, pca, None) print(f'Finished Feature Engineering:\nStats:') print(f'\tExplained Variance: {pca.explained_variance_}') print(f'\tExplained Variance Ratio: {pca.explained_variance_ratio_}') log_experiment( out_path, params=params, metrics=dict(explained_variance_=pca.explained_variance_, explained_variance_ratio_=pca.explained_variance_ratio_))
def main(config): experiment_name, current_time = setup_experiment(config.title, config) # normalization (creating t1_landmarks.npy file) create_normalization_file( use_controls=config.use_controls, use_nofcd=config.use_ae, mods=config.nb_of_modalities, ) print('Normalization is finished') # patch extraction get_patch_list(use_controls=config.use_controls, use_fcd=config.use_ae, use_coronal=config.use_coronal, use_sagital=config.use_sagital, augment=config.augment, h=config.height, w=config.width, hard_labeling=config.hard_labeling, mods=config.nb_of_modalities, batch_size=config.batch_size) print('Patch extraction is finished') # cnn model top_k_scores = train_model(mods=config.nb_of_modalities, use_ae=config.use_ae, h=config.height, w=config.width, use_coronal=config.use_coronal, use_sagital=config.use_sagital, use_controls=config.use_controls, latent_dim=config.latent_size, batch_size=config.batch_size, lr=config.lr, weight_decay=config.weight_decay, weight_of_class=config.weight_of_class, n_epochs=config.nb_epochs, n_epochs_ae=config.nb_epochs_ae, p=config.dropout_rate, save_masks=config.save_masks, parallel=config.parallel, experiment_name=experiment_name, temporal_division=config.temporal_division, seed=config.seed) print(top_k_scores) print('LOO mean top-k score:', top_k_scores.mean()) # logging log_experiment(config, current_time, (top_k_scores > 0).mean())
def main(data_path="data/features/", out_path="data/models/svc/"): X_train, X_test, y_train, y_test = read_data(data_path) name = "LinearSVC" params = read_params("params.yaml", "svc") model = LinearSVC(**params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def main(data_path='data/features/', out_path='data/models/logistic/'): X_train, X_test, y_train, y_test = read_data(data_path) name = 'LogisticRegression' params = read_params('params.yaml', 'logistic') model = LogisticRegression(**params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, params=params, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def main(data_path='data/features/', out_path='data/models/r_forrest/', n_estimators=10, max_samples=30): X_train, X_test, y_train, y_test = read_data(data_path) name = 'RandomForrest' params = read_params('params.yaml', 'forrest') model = RandomForestClassifier(**params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, params=params, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def main(data_path='data/features/', model_path='data/models/', out_path='data/models/ensemble/'): X_train, X_test, y_train, y_test = read_data(data_path) name = 'Ensemble' params = read_params('params.yaml', 'ensemble') cl1 = load_model(f'{model_path}/logistic/') cl2 = load_model(f'{model_path}/svc/') cl3 = load_model(f'{model_path}/r_forrest/') estimators = [('l_regression', cl1), ('l_svc', cl2), ('r_forrest', cl3)] model = VotingClassifier(estimators, **params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def __call__( self, read_old_model=True, model='TransE', num_epochs=2, train_batch_size=512, evaluation_batch_size=128, model_location=MODEL_SAVE_DIR ): # this fits the data from the old model - if model is present. if read_old_model: if os.path.exists(os.path.join(model_location, 'trained_model.pkl')): # use an old saved model self.__model = torch.load(os.path.join(model_location, 'trained_model.pkl')) else: # if no model found - just train the model raise FileNotFoundError( f"Old model not found at {model_location}. " f"Please check the path provided" ) else: self.__fit( model_text=model, num_epochs=num_epochs, train_batch_size=train_batch_size, eval_batch_size=evaluation_batch_size, model_location=model_location ) # if versioning has been set to true, the details are logged in mlflow if len(self.__exp_tags): log_experiment( params={ "num_epochs":num_epochs, "train_batch_size":train_batch_size, "eval_batch_size":evaluation_batch_size }, tags=self.__exp_tags, )