def count_num_subjects(self):
        """Count the number of subjects in the experiment."""
        if self._radiomix_feature_file:
            f = pd.read_excel(self._radiomix_feature_file)
            pids = f.values[:, 4]
            tocount = pids
        elif self._images_train:
            tocount = self._images_train[0]
        elif self._features_train:
            tocount = self._features_train[0]
        elif self.images_train:
            tocount = self.images_train[0]
        elif self.features_train:
            tocount = self.features_train[0]
        else:
            message = 'No features or images given, cannot count number ' +\
                ' of subjects. Make sure you input at least one of these ' +\
                'as source.'
            raise WORCValueError(message)

        if type(tocount) == dict():
            num_subjects = len(list(tocount.keys()))
        else:
            num_subjects = len(tocount)

        self._num_subjects = num_subjects
示例#2
0
def find_exampledatadir():
    """Find WORC example data folder."""
    # Check if example data directory exists
    base_dir = WORCDirectoryDetector().do_detection()
    if not base_dir:
        raise WORCValueError('WORC installation directory not detected!')
    else:
        return os.path.join(base_dir, 'exampledata')
def test_combat():
    """Test ComBat feature harmonization."""
    # Check if example data directory exists
    example_data_dir = th.find_exampledatadir()

    # Check if example data required exists
    features = glob.glob(
        os.path.join(example_data_dir, 'examplefeatures_Patient*.hdf5'))
    if len(features) < 7:
        message = 'Too few example features for ComBat testing not found! ' +\
            'Run the create_example_data script from the WORC exampledata ' +\
            'directory!'
        raise WORCValueError(message)
    elif len(features) > 7:
        message = 'Too many example features for ComBat testing not found! ' +\
            'Run the create_example_data script from the WORC exampledata ' +\
            'directory!'
        raise WORCValueError(message)

    objectlabels = os.path.join(example_data_dir, 'objectlabels.csv')

    # Python
    config = os.path.join(example_data_dir, 'ComBatConfig_python.ini')
    features_train_out = [
        f.replace('examplefeatures_', 'examplefeatures_ComBat_python_')
        for f in features
    ]

    # First run synthetic test
    # Synthetictest()

    # # Run the Combat function: only for training
    # ComBat(features_train_in=features,
    #        labels_train=objectlabels,
    #        config=config,
    #        features_train_out=features_train_out)

    # # Run the Combat function: now for train + testing
    ComBat(features_train_in=features[0:4],
           labels_train=objectlabels,
           config=config,
           features_train_out=features_train_out[0:4],
           features_test_in=features[4:],
           labels_test=objectlabels,
           features_test_out=features_train_out[4:])
def test_iccthreshold():
    '''
    Test ICC Thresholding statistical testing and computation.
    '''
    # Check if example data directory exists
    example_data_dir = th.find_exampledatadir()

    # Check if example data required exists
    features = glob.glob(
        os.path.join(example_data_dir, 'examplefeatures_Patient*.hdf5'))
    if len(features) < 6:
        message = 'Too few example features for ICC testing not found!' +\
            'Run the create_example_data script from the WORC exampledata ' +\
            'directory!'
        raise WORCValueError(message)
    elif len(features) > 6:
        message = 'Too many example features for ICC testing not found!' +\
            'Run the create_example_data script from the WORC exampledata ' +\
            'directory!'
        raise WORCValueError(message)

    # Pretend that features are from three observers
    features_multi = list()
    for i in range(0, 3):
        shuffle(features)
        features_multi.append(features[:])

    features_out = [
        i.replace('examplefeatures_', 'examplefeatures_ICC_') for i in features
    ]
    features_out = [features_out, features_out, features_out]

    # CSV to save ICC values to
    csv_out = os.path.join(example_data_dir, 'ICCValues.csv')

    # Run the ICC threshold function: only for training
    convert_features_ICC_threshold(features_in=features_multi,
                                   csv_out=csv_out,
                                   features_out=features_out)

    # Remove the feature files
    for i in glob.glob(
            os.path.join(example_data_dir, '*examplefeatures_ICC_*.hdf5')):
        os.remove(i)
示例#5
0
def find_testdatadir():
    """Find WORC test data folder."""
    # Check if example data directory exists
    base_dir = WORCDirectoryDetector().do_detection()
    if not base_dir:
        raise WORCValueError('WORC installation directory not detected!')
    else:
        testdatadir = os.path.join(base_dir, 'test', 'tmp')
        if not os.path.exists(testdatadir):
            os.mkdir(testdatadir)
        return testdatadir
示例#6
0
def ComBatMatlab(dat, batch, command, mod=None, par=1, per_feature='true'):
    """
    Run the ComBat Function Matlab script.

    par = 0 is non-parametric.
    """
    # Mod: default argument is empty list
    if mod is None:
        mod = []

    # TODO: Add check whether matlab executable is found

    # Save the features in a .mat MatLab Compatible format
    # NOTE: Should change this_folder to a proper temporary directory
    this_folder = os.path.dirname(os.path.realpath(__file__))
    tempdir = tempfile.gettempdir()
    tempfile_in = os.path.join(tempdir, 'combat_input.mat')
    tempfile_out = os.path.join(tempdir, 'combat_output.mat')
    ComBatFolder = os.path.join(os.path.dirname(this_folder), 'external',
                                'ComBatHarmonization', 'Matlab', 'scripts')

    dict = {
        'output': tempfile_out,
        'ComBatFolder': ComBatFolder,
        'datvar': dat,
        'batchvar': batch,
        'modvar': mod,
        'parvar': par,
        'per_feature': per_feature
    }

    sio.savemat(tempfile_in, dict)

    # Make sure there is no tempfile out from the previous run
    if os.path.exists(tempfile_out):
        os.remove(tempfile_out)

    # Run ComBat
    currentdir = os.getcwd()
    if platform == "linux" or platform == "linux2":
        commandseparator = ' ; '
    elif platform == "win32":
        commandseparator = ' & '

    # BIGR Cluster: /cm/shared/apps/matlab/R2015b/bin/matlab
    regcommand = ('cd "' + this_folder + '"' + commandseparator + '"' +
                  command + '" -nodesktop -nosplash -nojvm -r "combatmatlab(' +
                  "'" + str(tempfile_in) + "'" + ')"' + commandseparator +
                  'cd "' + currentdir + '"')
    print(f'Executing ComBat in Matlab through command: {regcommand}.')
    proc = subprocess.Popen(
        regcommand,
        shell=True,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    proc.wait()
    stdout_value, stderr_value = proc.communicate()

    # BUG: Waiting does not work, just wait for output to arrive, either with
    # the actual output or an error message
    succes = False
    while succes is False:
        if os.path.exists(tempfile_out):
            try:
                mat_dict = sio.loadmat(tempfile_out)
                try:
                    data_harmonized = mat_dict['data_harmonized']
                    succes = True
                except KeyError:
                    try:
                        message = mat_dict['message']
                        raise WORCValueError(
                            f'Error in Matlab ComBat execution: {message}.')
                    except KeyError:
                        pass
            except (sio.matlab.miobase.MatReadError, ValueError):
                pass

    # Check if expected output file exists
    if not os.path.exists(tempfile_out):
        raise WORCValueError(
            f'Error in Matlab ComBat execution: command: {regcommand}, stdout: {stdout_value}, stderr: {stderr_value}'
        )

    # Read the output from ComBat
    mat_dict = sio.loadmat(tempfile_out)
    data_harmonized = mat_dict['data_harmonized']
    data_harmonized = np.transpose(data_harmonized)

    # Remove temporary files
    os.remove(tempfile_out)
    os.remove(tempfile_in)

    return data_harmonized
示例#7
0
def ComBatPython(dat,
                 batch,
                 mod=None,
                 par=1,
                 eb=1,
                 per_feature=False,
                 plotting=False):
    """
    Run the ComBat Function python script.

    par = 0 is non-parametric.
    """
    # convert inputs to neuroCombat format.
    covars = dict()
    categorical_cols = list()
    covars['batch'] = batch
    if mod is not None:
        for i_mod in range(mod.shape[1]):
            label = f'mod_{i_mod}'
            covars[label] = [m for m in mod[:, i_mod]]
            categorical_cols.append(label)

    covars = pd.DataFrame(covars)
    batch_col = 'batch'
    if par == 0:
        parametric = False
    elif par == 1:
        parametric = True
    else:
        raise WORCValueError(f'Par should be 0 or 1, now {par}.')

    if eb == 0:
        eb = False
    elif eb == 1:
        eb = True
    else:
        raise WORCValueError(f'eb should be 0 or 1, now {eb}.')

    if per_feature == 0:
        per_feature = False
    elif per_feature == 1:
        per_feature = True
    else:
        raise WORCValueError(
            f'per_feature should be 0 or 1, now {per_feature}.')

    # execute ComBat
    if not per_feature:
        data_harmonized = neuroCombat(dat=dat,
                                      covars=covars,
                                      batch_col=batch_col,
                                      categorical_cols=categorical_cols,
                                      eb=eb,
                                      parametric=parametric)
    elif per_feature:
        print('\t Executing ComBat per feature.')
        data_harmonized = np.zeros(dat.shape)
        # Shape: (features, samples)
        for i in range(dat.shape[0]):
            if eb:
                # Copy feature + random noise
                random_feature = np.random.rand(dat[i, :].shape[0])
                feat_temp = np.asarray([dat[i, :], dat[i, :] + random_feature])
            else:
                # Just use the single feature
                feat_temp = np.asarray([dat[i, :]])

            feat_temp = neuroCombat(dat=feat_temp,
                                    covars=covars,
                                    batch_col=batch_col,
                                    categorical_cols=categorical_cols,
                                    eb=eb,
                                    parametric=parametric)
            data_harmonized[i, :] = feat_temp[0, :]

            if plotting:
                feat1 = dat[i, :]
                feat1_harm = data_harmonized[i, :]
                print(len(feat1))

                feat1_b1 = [f for f, b in zip(feat1, batch[0]) if b == 1.0]
                feat1_b2 = [f for f, b in zip(feat1, batch[0]) if b == 2.0]
                print(len(feat1_b1))
                print(len(feat1_b2))

                feat1_harm_b1 = [
                    f for f, b in zip(feat1_harm, batch[0]) if b == 1.0
                ]
                feat1_harm_b2 = [
                    f for f, b in zip(feat1_harm, batch[0]) if b == 2.0
                ]

                plt.figure()
                ax = plt.subplot(2, 1, 1)
                ax.scatter(np.ones((len(feat1_b1))), feat1_b1, color='red')
                ax.scatter(np.ones((len(feat1_b2))) + 1,
                           feat1_b2,
                           color='blue')
                plt.title('Before Combat')

                ax = plt.subplot(2, 1, 2)
                ax.scatter(np.ones((len(feat1_b1))),
                           feat1_harm_b1,
                           color='red')
                ax.scatter(np.ones((len(feat1_b2))) + 1,
                           feat1_harm_b2,
                           color='blue')
                plt.title('After Combat')

                plt.show()

    else:
        raise WORCValueError(
            f'per_feature should be False or True, now {per_feature}.')

    return data_harmonized
def test_combat_fastr():
    """Test ComBat feature harmonization."""
    # Check if example data directory exists
    example_data_dir = th.find_exampledatadir()

    # Check if example data required exists
    features = glob.glob(
        os.path.join(example_data_dir, 'examplefeatures_Patient*.hdf5'))
    if len(features) < 6:
        message = 'Too few example features for ComBat testing not found!' +\
            'Run the create_example_data script from the WORC exampledata ' +\
            'directory!'
        raise WORCValueError(message)
    elif len(features) > 6:
        message = 'Too many example features for ComBat testing not found!' +\
            'Run the create_example_data script from the WORC exampledata ' +\
            'directory!'
        raise WORCValueError(message)

    objectlabels = os.path.join(example_data_dir, 'objectlabels.csv')

    # Python
    config = os.path.join(example_data_dir, 'ComBatConfig_python.ini')

    # Create the fastr network
    experiment = fastr.create_network('test_ComBat')

    source_features = experiment.create_source('HDF5',
                                               id='features_in',
                                               node_group='features')
    source_labels = experiment.create_source('PatientInfoFile',
                                             id='labels',
                                             node_group='pctrain')
    source_config = experiment.create_source('ParameterFile',
                                             id='config',
                                             node_group='conf')

    sink_features = experiment.create_sink('HDF5', id='features_out')

    node_combat = experiment.create_node(
        'combat/ComBat:1.0',
        tool_version='1.0',
        id='ComBat',
    )

    link_combat_1 = experiment.create_link(source_config.output,
                                           node_combat.inputs['config'])
    link_combat_2 = experiment.create_link(
        source_labels.output, node_combat.inputs['patientclass_train'])
    link_combat_1.collapse = 'conf'
    link_combat_2.collapse = 'pctrain'

    # Mimic using two feature toolboxes
    links_Combat1_train = node_combat.inputs['features_train'][
        'MR_0'] << source_features.output
    links_Combat1_train.collapse = 'features'

    links_Combat2_train = node_combat.inputs['features_train'][
        'MR_1'] << source_features.output
    links_Combat2_train.collapse = 'features'

    links_Combat_out_train = sink_features.input << node_combat.outputs[
        'features_train_out']
    links_Combat_out_train.collapse = 'ComBat'

    # Provide source and sink data
    source_data = dict()
    source_data['features_in'] = features
    source_data['labels'] = objectlabels
    source_data['config'] = config

    sink_data = dict()
    sink_data[
        'features_out'] = "vfs://output/test_ComBat/ComBat/features_ComBat_{{sample_id}}_{{cardinality}}{{ext}}"

    # Execute
    experiment.execute(source_data,
                       sink_data,
                       execution_plugin='LinearExecution')

    # Remove the feature files
    for i in glob.glob(os.path.join(example_data_dir,
                                    '*features_ComBat*.hdf5')):
        os.remove(i)