def config(): datasets = ['archi', 'hcp'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') source = 'hcp_rs_concat' n_subjects = None test_size = { 'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5 } train_size = { 'hcp': .9, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5 } alpha = 0 beta = 0 model = 'trace' max_iter = 2000 verbose = 10 with_std = False with_mean = False per_dataset = False split_loss = True
def config(): datasets = ['camcan', 'hcp'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') source = 'hcp_rs_concat' n_subjects = None test_size = { 'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5 } train_size = dict(hcp=None, archi=30, la5c=50, brainomics=30, camcan=100, human_voice=None) alpha = 0 beta = 0 model = 'logistic' max_iter = 400 n_components = 50 latent_dropout_rate = 0. input_dropout_rate = 0.0 source_init = None optimizer = 'adam' step_size = 1e-3 verbose = 10 with_std = True with_mean = True per_dataset = True
def config(): reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') test_size = {'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5} train_size = dict(hcp=None, archi=30, la5c=50, brainomics=30, camcan=100, human_voice=None) max_iter = 1000 verbose = 10 seed = 10 with_std = True with_mean = True per_dataset = True split_loss = True # Factored only n_components = 75 alpha = 0. latent_dropout_rate = 0.5 input_dropout_rate = 0.25 batch_size = 128 optimizer = 'adam' step_size = 1e-3
def config(): datasets = ['brainomics', 'hcp'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') source = 'hcp_rs_positive_single' test_size = {'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5, 'full': .5} train_size = dict(hcp=None, archi=30, la5c=50, brainomics=30, camcan=100, human_voice=None) dataset_weights = {'brainomics': 1, 'archi': 1, 'hcp': 1} model = 'factored' alpha = np.logspace(-6, -1, 12) max_iter = 200 verbose = 10 seed = 20 with_std = True with_mean = True per_dataset = True split_loss = True # Factored only n_components = 200 latent_dropout_rate = 0.9 input_dropout_rate = 0.25 batch_size = 128 optimizer = 'adam' step_size = 1e-3
def config(): datasets = ['archi', 'brainomics', 'camcan', 'hcp'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') # source = 'mix' source = 'hcp_new_big' test_size = {'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5, 'full': .5} train_size = dict(hcp=None, archi=20, la5c=None, brainomics=None, camcan=None, human_voice=None) dataset_weights = {'brainomics': 1, 'archi': 1, 'hcp': 1} model = 'factored' max_iter = 50 verbose = 10 seed = 100 with_std = True with_mean = True per_dataset = True # Factored only n_components = 100 batch_size = 256 optimizer = 'adam' step_size = 1e-3 alphas = [5e-4] # np.logspace(-6, -1, 12) latent_dropout_rates = [0.75] input_dropout_rates = [0.25] dataset_weights_helpers = [[1, 1, 1]] n_splits = 10 n_jobs = 1
def unmask(dataset, output_dir=None, n_jobs=1, batch_size=1000): if dataset == 'hcp': fetch_data = fetch_hcp elif dataset == 'archi': fetch_data = fetch_archi elif dataset == 'brainomics': fetch_data = fetch_brainomics elif dataset == 'la5c': fetch_data = fetch_la5c elif dataset == 'human_voice': fetch_data = fetch_human_voice elif dataset == 'camcan': fetch_data = fetch_camcan elif dataset == 'brainpedia': fetch_data = fetch_brainpedia else: raise ValueError imgs = fetch_data() if dataset == 'hcp': imgs = imgs.contrasts mask = fetch_mask() artifact_dir = join(get_output_dir(output_dir), 'unmasked', dataset) create_raw_contrast_data(imgs, mask, artifact_dir, n_jobs=n_jobs, batch_size=batch_size)
def main(datasets, source, reduced_dir, unmask_dir, test_size, train_size, _run, _seed): artifact_dir = join(_run.observers[0].basedir, str(_run._id)) single = False if source in ['hcp_rs_positive_single', 'initial_reduction']: source = 'hcp_rs_positive' single = True df = make_data_frame(datasets, source, reduced_dir=reduced_dir, unmask_dir=unmask_dir) if single: df = df.iloc[:, -512:] if source == 'initial_reduction': estimator = load(join(get_output_dir(), 'estimator.pkl')) coef = estimator.coef_ U, S, VT = svd(coef) rank = 41 U = U[:, :rank] projected_df = df.values[:, -512:].dot(coef) df = pd.DataFrame(data=projected_df, index=df.index) df_train, df_test = split_folds(df, test_size=test_size, train_size=train_size, random_state=_seed) pred_df_train, pred_df_test, estimator, transformer \ = fit_model(df_train, df_test, ) pred_contrasts = pd.concat([pred_df_test, pred_df_train], keys=['test', 'train'], names=['fold'], axis=0) true_contrasts = pred_contrasts.index.get_level_values('contrast').values res = pd.DataFrame({ 'pred_contrast': pred_contrasts, 'true_contrast': true_contrasts }) res.to_csv(join(artifact_dir, 'prediction.csv')) match = res['pred_contrast'] == res['true_contrast'] score = match.groupby(level=['fold', 'dataset']).aggregate('mean') score_mean = match.groupby(level=['fold']).aggregate('mean') score_dict = {} for fold, this_score in score_mean.iteritems(): score_dict['%s_mean' % fold] = this_score for (fold, dataset), this_score in score.iteritems(): score_dict['%s_%s' % (fold, dataset)] = this_score _run.info['score'] = score_dict rank = np.linalg.matrix_rank(estimator.coef_) try: dump(estimator, join(artifact_dir, 'estimator.pkl')) except TypeError: pass _run.info['rank'] = rank dump(transformer, join(artifact_dir, 'transformer.pkl')) print('rank', rank) print(score) print(score_mean)
def compute_rec(): mask_img = fetch_mask() masker = MultiNiftiMasker(mask_img=mask_img).fit() atlas = fetch_atlas_modl() components_imgs = [ atlas.positive_new_components16, atlas.positive_new_components64, atlas.positive_new_components512 ] components = masker.transform(components_imgs) proj, proj_inv, rec = make_projection_matrix(components, scale_bases=True) dump(rec, join(get_output_dir(), 'benchmark', 'rec.pkl'))
def summarize(): # NIPS final # basedir_ids = [31] # basedirs = [join(get_output_dir(), 'multi_nested', str(_id), 'run') for _id in basedir_ids] # Current figure nips final # basedir_ids = [6, 9, 15, 23] # 12 unmasked # 25 hcp_new_big / hcp_new_big_single # 28, 29 hcp_new hcp_new_single basedir_ids = [28, 29] basedirs = [ join(get_output_dir(), 'benchmark', str(_id), 'run') for _id in basedir_ids ] res_list = [] for basedir in basedirs: for exp_dir in os.listdir(basedir): exp_dir = join(basedir, exp_dir) try: config = json.load(open(join(exp_dir, 'config.json'), 'r')) info = json.load(open(join(exp_dir, 'info.json'), 'r')) except (JSONDecodeError, FileNotFoundError): continue datasets = config['datasets'] dataset = datasets[0] if len(datasets) > 1: helper_datasets = '__'.join(datasets[1:]) else: helper_datasets = 'none' config['dataset'] = dataset config['helper_datasets'] = helper_datasets score = info.pop('score') res = dict(**config, **info) for key, value in score.items(): res[key] = value res_list.append(res) res = pd.DataFrame(res_list) df_agg = res.groupby( by=['dataset', 'source', 'model', 'with_std', 'helper_datasets' ]).aggregate(['mean', 'std', 'count']) df_agg = df_agg.fillna(0) results = {} for dataset in ['archi', 'brainomics', 'camcan']: results[dataset] = df_agg.loc[dataset]['test_%s' % dataset] results = pd.concat(results, names=['dataset']) print(results) results.to_csv(join(output_dir, 'results_128.csv'))
def config(): datasets = ['archi', 'hcp', 'brainomics'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') source = 'hcp_rs_positive_single' test_size = { 'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5, 'full': .5 } train_size = dict(hcp=None, archi=None, la5c=None, brainomics=None, camcan=None, human_voice=None) dataset_weights = {'brainomics': 1, 'archi': 1, 'hcp': 1} model = '' alpha = 7e-4 max_iter = 100 verbose = 10 seed = 10 with_std = False with_mean = False per_dataset = False split_loss = True # Factored only n_components = 'auto' latent_dropout_rate = 0. input_dropout_rate = 0. batch_size = 128 optimizer = 'lbfgs' step_size = 1
def config(): datasets = ['archi', 'brainomics'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') source = 'hcp_rs_positive_single' n_subjects = None test_size = {'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5} train_size = {'hcp': .9, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5} alpha = 0 model = 'logistic' max_iter = 600 n_components = 50 latent_dropout_rate = 0. input_dropout_rate = 0.25 source_init = None optimizer = 'adam' step_size = 1e-3 verbose = 10 with_std = True with_mean = True row_standardize = False
fig = plt.figure() vmax = np.max(np.abs(single_map.get_data())) cut_coords = find_xyz_cut_coords(single_map, activation_threshold=0.33 * vmax) plot_stat_map(single_map, title=str(title), figure=fig, cut_coords=cut_coords, threshold=0.) plt.savefig(join(analysis_dir, '%s.png' % title)) plt.close(fig) memory = Memory(cachedir=get_cache_dirs()[0], verbose=2) artifact_dir = join(get_output_dir(), 'predict', str(n_exp)) analysis_dir = join(artifact_dir, 'analysis') if not os.path.exists(analysis_dir): os.makedirs(analysis_dir) config = json.load(open(join(artifact_dir, 'config.json'), 'r')) model = load(join(artifact_dir, 'estimator.pkl')) maps = model.coef_ source = config['source'] if source == 'craddock': components = fetch_craddock_parcellation().parcellate400 data = np.ones_like(check_niimg(components).get_data()) mask = new_img_like(components, data)
from json import JSONDecodeError from math import sqrt from os.path import join from matplotlib import colors import matplotlib.pyplot as plt import numpy as np import pandas as pd from cogspaces.pipeline import get_output_dir basedir_ids = [53] basedirs = [ join(get_output_dir(), 'predict_multi', str(_id), 'run') for _id in basedir_ids ] res_list = [] for basedir in basedirs: for exp_dir in os.listdir(basedir): exp_dir = join(basedir, exp_dir) try: config = json.load(open(join(exp_dir, 'config.json'), 'r')) info = json.load(open(join(exp_dir, 'info.json'), 'r')) except (JSONDecodeError, FileNotFoundError): continue cat_datasets = config['datasets'] cat_datasets = '__'.join(cat_datasets) weights = config['dataset_weights'] seed = config['seed']
from sacred import Experiment from sacred.observers import FileStorageObserver from sklearn.externals.joblib import Parallel from sklearn.externals.joblib import delayed from sklearn.utils import check_random_state from cogspaces.pipeline import get_output_dir print(path.dirname(path.dirname(path.abspath(__file__)))) # Add examples to known modules sys.path.append( path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) from exps.old.exp_predict import exp as single_exp exp = Experiment('nips') basedir = join(get_output_dir(), 'nips') if not os.path.exists(basedir): os.makedirs(basedir) exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): n_jobs = 24 n_seeds = 10 seed = 100 @single_exp.config def config(): reduced_dir = join(get_output_dir(), 'reduced')
from cogspaces.input_data.fixes import monkey_patch_nifti_image from cogspaces.pipeline import get_output_dir from cogspaces.input_data.base import unmask, reduce monkey_patch_nifti_image() output_dir = get_output_dir() n_jobs = 30 batch_size = 1200 for dataset in ['archi', 'brainomics', 'camcan', 'la5c']: # unmask(dataset, output_dir=output_dir, # n_jobs=n_jobs, batch_size=batch_size) reduce(dataset, output_dir=output_dir, source='hcp_new_208', direct=False)
import numpy as np import pandas as pd from cogspaces.model.trace import TraceNormEstimator from cogspaces.pipeline import get_output_dir, make_data_frame, split_folds, \ MultiDatasetTransformer from joblib import load from sacred import Experiment from sacred.observers import FileStorageObserver from scipy.linalg import svd from sklearn.externals.joblib import dump idx = pd.IndexSlice exp = Experiment('predict') basedir = join(get_output_dir(), 'predict') exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): datasets = ['archi'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') source = 'hcp_rs_concat' test_size = { 'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5,
def reduce(dataset, output_dir=None, direct=False, source='hcp_rs_concat'): """Create a reduced version of a given dataset. Unmask must be called beforehand""" memory = Memory(cachedir=get_cache_dirs()[0], verbose=2) print('Fetch data') this_dataset_dir = join(get_output_dir(output_dir), 'unmasked', dataset) masker, X = get_raw_contrast_data(this_dataset_dir) print('Retrieve components') if source == 'craddock': components = fetch_craddock_parcellation().parcellate400 niimgs = masker.inverse_transform(X.values) label_masker = NiftiLabelsMasker(labels_img=components, smoothing_fwhm=0, mask_img=masker.mask_img_).fit() # components = label_masker.inverse_transform(np.eye(400)) print('Transform and fit data') Xt = label_masker.transform(niimgs) else: if source == 'msdl': components = [fetch_atlas_msdl()['maps']] else: data = fetch_atlas_modl() if source == 'hcp_rs': components_imgs = [data.nips2017_components256] elif source == 'hcp_rs_concat': components_imgs = [ data.nips2017_components16, data.nips2017_components64, data.nips2017_components256 ] elif source == 'hcp_336': components_imgs = [data.nips2017_components336] elif source == 'hcp_new': components_imgs = [ data.positive_new_components16, data.positive_new_components64, data.positive_new_components128 ] elif source == 'hcp_new_big': components_imgs = [ data.positive_new_components16, data.positive_new_components64, data.positive_new_components512 ] elif source == 'hcp_rs_positive_concat': components_imgs = [ data.positive_components16, data.positive_components64, data.positive_components512 ] elif source == 'hcp_new_208': components_imgs = [data.positive_new_components208] components = masker.transform(components_imgs) print('Transform and fit data') proj, proj_inv, _ = memory.cache(make_projection_matrix)( components, scale_bases=True) if direct: proj = proj_inv.T Xt = X.dot(proj) Xt = pd.DataFrame(data=Xt, index=X.index) this_source = source if direct: this_source += '_direct' this_output_dir = join(get_output_dir(output_dir), 'reduced', this_source, dataset) if not os.path.exists(this_output_dir): os.makedirs(this_output_dir) print(join(this_output_dir, 'Xt.pkl')) Xt.to_pickle(join(this_output_dir, 'Xt.pkl')) dump(masker, join(this_output_dir, 'masker.pkl')) np.save(join(output_dir, 'components'), components)
components_imgs = [ atlas.positive_new_components16, atlas.positive_new_components64, atlas.positive_new_components512 ] components = masker.transform(components_imgs) proj, proj_inv, rec = make_projection_matrix(components, scale_bases=True) dump(rec, join(get_output_dir(), 'benchmark', 'rec.pkl')) def load_rec(): return load(join(get_output_dir(), 'benchmark', 'rec.pkl')) # compute_rec() exp_dirs = join(get_output_dir(), 'single_exp', '8') models = [] rec = load_rec() mask_img = fetch_mask() masker = MultiNiftiMasker(mask_img=mask_img).fit() for exp_dir in [exp_dirs]: estimator = load(join(exp_dirs, 'estimator.pkl')) transformer = load(join(exp_dirs, 'transformer.pkl')) print([(dataset, this_class) for dataset, lbin in transformer.lbins_.items() for this_class in lbin.classes_]) coef = estimator.coef_ coef_rec = coef.dot(rec) print(join(exp_dirs, 'maps.nii.gz')) imgs = masker.inverse_transform(coef_rec)
from os.path import join from cogspaces.pipeline import get_output_dir, make_data_frame reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') datasets = ['archi', 'brainomics', 'camcan', 'la5c'] df = make_data_frame(datasets, 'hcp_rs_positive', reduced_dir=reduced_dir, unmask_dir=unmask_dir) for dataset in datasets: print(len(df.loc[dataset].index.get_level_values('subject').unique()))
def load_rec(): return load(join(get_output_dir(), 'benchmark', 'rec.pkl'))
import numpy as np from cogspaces.pipeline import get_output_dir from sacred import Experiment from sacred.observers import FileStorageObserver from sklearn.externals.joblib import Parallel from sklearn.externals.joblib import delayed from sklearn.utils import check_random_state print(path.dirname(path.dirname(path.abspath(__file__)))) # Add examples to known modules sys.path.append( path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) from exps.single import exp as single_exp exp = Experiment('benchmark_trainsize') basedir = join(get_output_dir(), 'benchmark_trainsize') if not os.path.exists(basedir): os.makedirs(basedir) exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): n_jobs = 20 n_seeds = 20 seed = 1000 @single_exp.config def config(): datasets = ['archi', 'hcp']
from cogspaces.pipeline import get_output_dir monkey_patch_nifti_image() from sklearn.model_selection import train_test_split from modl.input_data.fmri.rest import get_raw_rest_data from modl.decomposition.fmri import fMRIDictFact, rfMRIDictionaryScorer from modl.plotting.fmri import display_maps from modl.utils.system import get_output_dir as modl_get_output_dir from sacred import Experiment exp = Experiment('decompose') base_artifact_dir = join(get_output_dir(), 'decompose') exp.observers.append(FileStorageObserver.create(basedir=base_artifact_dir)) @exp.config def config(): n_components = 128 batch_size = 200 learning_rate = 0.92 method = 'masked' reduction = 12 alpha = 1e-5 n_epochs = 1 verbose = 15 n_jobs = 5 smoothing_fwhm = 4 positive = True
import numpy as np from sacred import Experiment from sacred.observers import FileStorageObserver from sklearn.externals.joblib import Parallel from sklearn.externals.joblib import delayed from sklearn.utils import check_random_state from cogspaces.pipeline import get_output_dir # Add examples to known modules sys.path.append( path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) from exps.old.exp_predict import exp as single_exp exp = Experiment('predict_multi') basedir = join(get_output_dir(), 'predict_multi') if not os.path.exists(basedir): os.makedirs(basedir) exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): n_jobs = 24 n_seeds = 20 seed = 2 @single_exp.config def config(): datasets = ['archi', 'hcp', 'brainomics']
import numpy as np import json import os from json import JSONDecodeError from os.path import join import pandas as pd from cogspaces.pipeline import get_output_dir output_dir = join(get_output_dir(), 'benchmark') def summarize(): # NIPS final # basedir_ids = [31] # basedirs = [join(get_output_dir(), 'multi_nested', str(_id), 'run') for _id in basedir_ids] # Current figure nips final # basedir_ids = [6, 9, 15, 23] # 12 unmasked # 25 hcp_new_big / hcp_new_big_single # 28, 29 hcp_new hcp_new_single basedir_ids = [28, 29] basedirs = [ join(get_output_dir(), 'benchmark', str(_id), 'run') for _id in basedir_ids ] res_list = [] for basedir in basedirs: for exp_dir in os.listdir(basedir):
import pandas as pd import json import os from json import JSONDecodeError from os.path import join import numpy as np from nilearn.input_data import NiftiMasker from cogspaces.datasets import fetch_mask from cogspaces.pipeline import get_output_dir basedir = join(get_output_dir(), 'multi_decompose', '3', 'run') mask = fetch_mask() masker = NiftiMasker(mask_img=mask).fit() res = [] for exp_dir in os.listdir(basedir): print(basedir) try: id_exp = int(exp_dir) except: continue exp_dir = join(basedir, exp_dir) # Loosy decompose.py artifact_dir = join(get_output_dir(), 'decompose', str(id_exp), 'artifacts') try: config = json.load(open(join(exp_dir, 'config.json'), 'r')) info = json.load(open(join(exp_dir, 'info.json'), 'r'))
import pandas as pd import torch from joblib import dump from sacred import Experiment from sacred.observers import FileStorageObserver from sklearn.linear_model import LogisticRegressionCV, LogisticRegression from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit from cogspaces.model.non_convex_pytorch import TransferEstimator from cogspaces.pipeline import get_output_dir, make_data_frame, split_folds, \ MultiDatasetTransformer idx = pd.IndexSlice exp = Experiment('single_exp') basedir = join(get_output_dir(), 'single_exp') exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): datasets = ['archi', 'brainomics', 'camcan', 'hcp'] reduced_dir = join(get_output_dir(), 'reduced') unmask_dir = join(get_output_dir(), 'unmasked') # source = 'mix' source = 'hcp_new_big' test_size = {'hcp': .1, 'archi': .5, 'brainomics': .5, 'camcan': .5, 'la5c': .5, 'full': .5} train_size = dict(hcp=None, archi=20, la5c=None, brainomics=None, camcan=None, human_voice=None)
from sacred import Experiment from sacred.observers import FileStorageObserver from sklearn.externals.joblib import Parallel from sklearn.externals.joblib import delayed from sklearn.utils import check_random_state from cogspaces.pipeline import get_output_dir print(path.dirname(path.dirname(path.abspath(__file__)))) # Add examples to known modules sys.path.append( path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) from exps.old.exp_predict import exp as single_exp exp = Experiment('nips_multinomial') basedir = join(get_output_dir(), 'nips_multinomial') if not os.path.exists(basedir): os.makedirs(basedir) exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): n_jobs = 24 n_seeds = 20 seed = 2 @single_exp.config def config(): datasets = ['camcan', 'hcp']
import numpy as np from cogspaces.pipeline import get_output_dir from sacred import Experiment from sacred.observers import FileStorageObserver from sklearn.externals.joblib import Parallel from sklearn.externals.joblib import delayed from sklearn.utils import check_random_state print(path.dirname(path.dirname(path.abspath(__file__)))) # Add examples to known modules sys.path.append( path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) from exps.pipelining.decompose import exp as single_exp exp = Experiment('multi_decompose') basedir = join(get_output_dir(), 'multi_decompose') if not os.path.exists(basedir): os.makedirs(basedir) exp.observers.append(FileStorageObserver.create(basedir=basedir)) @exp.config def config(): n_jobs = 7 seed = 1000 @single_exp.config def config(): n_components = 128 batch_size = 200
import pandas as pd import os from os.path import join from sklearn.externals.joblib import load from cogspaces.pipeline import get_output_dir reduced_dir = join(get_output_dir(), 'reduced') # unmasked_dir = join(get_output_dir(), 'unmasked') for base_dir in [reduced_dir]: for root, dirs, files in os.walk(base_dir): for this_file in files: if this_file in ['Xt.pkl', 'imgs.pkl']: this_file = join(root, this_file) print(this_file) Xt = load(this_file) Xt.to_pickle(this_file)