def count_num_subjects(self): """Count the number of subjects in the experiment.""" if self._radiomix_feature_file: f = pd.read_excel(self._radiomix_feature_file) pids = f.values[:, 4] tocount = pids elif self._images_train: tocount = self._images_train[0] elif self._features_train: tocount = self._features_train[0] elif self.images_train: tocount = self.images_train[0] elif self.features_train: tocount = self.features_train[0] else: message = 'No features or images given, cannot count number ' +\ ' of subjects. Make sure you input at least one of these ' +\ 'as source.' raise WORCValueError(message) if type(tocount) == dict(): num_subjects = len(list(tocount.keys())) else: num_subjects = len(tocount) self._num_subjects = num_subjects
def find_exampledatadir(): """Find WORC example data folder.""" # Check if example data directory exists base_dir = WORCDirectoryDetector().do_detection() if not base_dir: raise WORCValueError('WORC installation directory not detected!') else: return os.path.join(base_dir, 'exampledata')
def test_combat(): """Test ComBat feature harmonization.""" # Check if example data directory exists example_data_dir = th.find_exampledatadir() # Check if example data required exists features = glob.glob( os.path.join(example_data_dir, 'examplefeatures_Patient*.hdf5')) if len(features) < 7: message = 'Too few example features for ComBat testing not found! ' +\ 'Run the create_example_data script from the WORC exampledata ' +\ 'directory!' raise WORCValueError(message) elif len(features) > 7: message = 'Too many example features for ComBat testing not found! ' +\ 'Run the create_example_data script from the WORC exampledata ' +\ 'directory!' raise WORCValueError(message) objectlabels = os.path.join(example_data_dir, 'objectlabels.csv') # Python config = os.path.join(example_data_dir, 'ComBatConfig_python.ini') features_train_out = [ f.replace('examplefeatures_', 'examplefeatures_ComBat_python_') for f in features ] # First run synthetic test # Synthetictest() # # Run the Combat function: only for training # ComBat(features_train_in=features, # labels_train=objectlabels, # config=config, # features_train_out=features_train_out) # # Run the Combat function: now for train + testing ComBat(features_train_in=features[0:4], labels_train=objectlabels, config=config, features_train_out=features_train_out[0:4], features_test_in=features[4:], labels_test=objectlabels, features_test_out=features_train_out[4:])
def test_iccthreshold(): ''' Test ICC Thresholding statistical testing and computation. ''' # Check if example data directory exists example_data_dir = th.find_exampledatadir() # Check if example data required exists features = glob.glob( os.path.join(example_data_dir, 'examplefeatures_Patient*.hdf5')) if len(features) < 6: message = 'Too few example features for ICC testing not found!' +\ 'Run the create_example_data script from the WORC exampledata ' +\ 'directory!' raise WORCValueError(message) elif len(features) > 6: message = 'Too many example features for ICC testing not found!' +\ 'Run the create_example_data script from the WORC exampledata ' +\ 'directory!' raise WORCValueError(message) # Pretend that features are from three observers features_multi = list() for i in range(0, 3): shuffle(features) features_multi.append(features[:]) features_out = [ i.replace('examplefeatures_', 'examplefeatures_ICC_') for i in features ] features_out = [features_out, features_out, features_out] # CSV to save ICC values to csv_out = os.path.join(example_data_dir, 'ICCValues.csv') # Run the ICC threshold function: only for training convert_features_ICC_threshold(features_in=features_multi, csv_out=csv_out, features_out=features_out) # Remove the feature files for i in glob.glob( os.path.join(example_data_dir, '*examplefeatures_ICC_*.hdf5')): os.remove(i)
def find_testdatadir(): """Find WORC test data folder.""" # Check if example data directory exists base_dir = WORCDirectoryDetector().do_detection() if not base_dir: raise WORCValueError('WORC installation directory not detected!') else: testdatadir = os.path.join(base_dir, 'test', 'tmp') if not os.path.exists(testdatadir): os.mkdir(testdatadir) return testdatadir
def ComBatMatlab(dat, batch, command, mod=None, par=1, per_feature='true'): """ Run the ComBat Function Matlab script. par = 0 is non-parametric. """ # Mod: default argument is empty list if mod is None: mod = [] # TODO: Add check whether matlab executable is found # Save the features in a .mat MatLab Compatible format # NOTE: Should change this_folder to a proper temporary directory this_folder = os.path.dirname(os.path.realpath(__file__)) tempdir = tempfile.gettempdir() tempfile_in = os.path.join(tempdir, 'combat_input.mat') tempfile_out = os.path.join(tempdir, 'combat_output.mat') ComBatFolder = os.path.join(os.path.dirname(this_folder), 'external', 'ComBatHarmonization', 'Matlab', 'scripts') dict = { 'output': tempfile_out, 'ComBatFolder': ComBatFolder, 'datvar': dat, 'batchvar': batch, 'modvar': mod, 'parvar': par, 'per_feature': per_feature } sio.savemat(tempfile_in, dict) # Make sure there is no tempfile out from the previous run if os.path.exists(tempfile_out): os.remove(tempfile_out) # Run ComBat currentdir = os.getcwd() if platform == "linux" or platform == "linux2": commandseparator = ' ; ' elif platform == "win32": commandseparator = ' & ' # BIGR Cluster: /cm/shared/apps/matlab/R2015b/bin/matlab regcommand = ('cd "' + this_folder + '"' + commandseparator + '"' + command + '" -nodesktop -nosplash -nojvm -r "combatmatlab(' + "'" + str(tempfile_in) + "'" + ')"' + commandseparator + 'cd "' + currentdir + '"') print(f'Executing ComBat in Matlab through command: {regcommand}.') proc = subprocess.Popen( regcommand, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) proc.wait() stdout_value, stderr_value = proc.communicate() # BUG: Waiting does not work, just wait for output to arrive, either with # the actual output or an error message succes = False while succes is False: if os.path.exists(tempfile_out): try: mat_dict = sio.loadmat(tempfile_out) try: data_harmonized = mat_dict['data_harmonized'] succes = True except KeyError: try: message = mat_dict['message'] raise WORCValueError( f'Error in Matlab ComBat execution: {message}.') except KeyError: pass except (sio.matlab.miobase.MatReadError, ValueError): pass # Check if expected output file exists if not os.path.exists(tempfile_out): raise WORCValueError( f'Error in Matlab ComBat execution: command: {regcommand}, stdout: {stdout_value}, stderr: {stderr_value}' ) # Read the output from ComBat mat_dict = sio.loadmat(tempfile_out) data_harmonized = mat_dict['data_harmonized'] data_harmonized = np.transpose(data_harmonized) # Remove temporary files os.remove(tempfile_out) os.remove(tempfile_in) return data_harmonized
def ComBatPython(dat, batch, mod=None, par=1, eb=1, per_feature=False, plotting=False): """ Run the ComBat Function python script. par = 0 is non-parametric. """ # convert inputs to neuroCombat format. covars = dict() categorical_cols = list() covars['batch'] = batch if mod is not None: for i_mod in range(mod.shape[1]): label = f'mod_{i_mod}' covars[label] = [m for m in mod[:, i_mod]] categorical_cols.append(label) covars = pd.DataFrame(covars) batch_col = 'batch' if par == 0: parametric = False elif par == 1: parametric = True else: raise WORCValueError(f'Par should be 0 or 1, now {par}.') if eb == 0: eb = False elif eb == 1: eb = True else: raise WORCValueError(f'eb should be 0 or 1, now {eb}.') if per_feature == 0: per_feature = False elif per_feature == 1: per_feature = True else: raise WORCValueError( f'per_feature should be 0 or 1, now {per_feature}.') # execute ComBat if not per_feature: data_harmonized = neuroCombat(dat=dat, covars=covars, batch_col=batch_col, categorical_cols=categorical_cols, eb=eb, parametric=parametric) elif per_feature: print('\t Executing ComBat per feature.') data_harmonized = np.zeros(dat.shape) # Shape: (features, samples) for i in range(dat.shape[0]): if eb: # Copy feature + random noise random_feature = np.random.rand(dat[i, :].shape[0]) feat_temp = np.asarray([dat[i, :], dat[i, :] + random_feature]) else: # Just use the single feature feat_temp = np.asarray([dat[i, :]]) feat_temp = neuroCombat(dat=feat_temp, covars=covars, batch_col=batch_col, categorical_cols=categorical_cols, eb=eb, parametric=parametric) data_harmonized[i, :] = feat_temp[0, :] if plotting: feat1 = dat[i, :] feat1_harm = data_harmonized[i, :] print(len(feat1)) feat1_b1 = [f for f, b in zip(feat1, batch[0]) if b == 1.0] feat1_b2 = [f for f, b in zip(feat1, batch[0]) if b == 2.0] print(len(feat1_b1)) print(len(feat1_b2)) feat1_harm_b1 = [ f for f, b in zip(feat1_harm, batch[0]) if b == 1.0 ] feat1_harm_b2 = [ f for f, b in zip(feat1_harm, batch[0]) if b == 2.0 ] plt.figure() ax = plt.subplot(2, 1, 1) ax.scatter(np.ones((len(feat1_b1))), feat1_b1, color='red') ax.scatter(np.ones((len(feat1_b2))) + 1, feat1_b2, color='blue') plt.title('Before Combat') ax = plt.subplot(2, 1, 2) ax.scatter(np.ones((len(feat1_b1))), feat1_harm_b1, color='red') ax.scatter(np.ones((len(feat1_b2))) + 1, feat1_harm_b2, color='blue') plt.title('After Combat') plt.show() else: raise WORCValueError( f'per_feature should be False or True, now {per_feature}.') return data_harmonized
def test_combat_fastr(): """Test ComBat feature harmonization.""" # Check if example data directory exists example_data_dir = th.find_exampledatadir() # Check if example data required exists features = glob.glob( os.path.join(example_data_dir, 'examplefeatures_Patient*.hdf5')) if len(features) < 6: message = 'Too few example features for ComBat testing not found!' +\ 'Run the create_example_data script from the WORC exampledata ' +\ 'directory!' raise WORCValueError(message) elif len(features) > 6: message = 'Too many example features for ComBat testing not found!' +\ 'Run the create_example_data script from the WORC exampledata ' +\ 'directory!' raise WORCValueError(message) objectlabels = os.path.join(example_data_dir, 'objectlabels.csv') # Python config = os.path.join(example_data_dir, 'ComBatConfig_python.ini') # Create the fastr network experiment = fastr.create_network('test_ComBat') source_features = experiment.create_source('HDF5', id='features_in', node_group='features') source_labels = experiment.create_source('PatientInfoFile', id='labels', node_group='pctrain') source_config = experiment.create_source('ParameterFile', id='config', node_group='conf') sink_features = experiment.create_sink('HDF5', id='features_out') node_combat = experiment.create_node( 'combat/ComBat:1.0', tool_version='1.0', id='ComBat', ) link_combat_1 = experiment.create_link(source_config.output, node_combat.inputs['config']) link_combat_2 = experiment.create_link( source_labels.output, node_combat.inputs['patientclass_train']) link_combat_1.collapse = 'conf' link_combat_2.collapse = 'pctrain' # Mimic using two feature toolboxes links_Combat1_train = node_combat.inputs['features_train'][ 'MR_0'] << source_features.output links_Combat1_train.collapse = 'features' links_Combat2_train = node_combat.inputs['features_train'][ 'MR_1'] << source_features.output links_Combat2_train.collapse = 'features' links_Combat_out_train = sink_features.input << node_combat.outputs[ 'features_train_out'] links_Combat_out_train.collapse = 'ComBat' # Provide source and sink data source_data = dict() source_data['features_in'] = features source_data['labels'] = objectlabels source_data['config'] = config sink_data = dict() sink_data[ 'features_out'] = "vfs://output/test_ComBat/ComBat/features_ComBat_{{sample_id}}_{{cardinality}}{{ext}}" # Execute experiment.execute(source_data, sink_data, execution_plugin='LinearExecution') # Remove the feature files for i in glob.glob(os.path.join(example_data_dir, '*features_ComBat*.hdf5')): os.remove(i)