Пример #1
0
def perform_classification():

    # get list of subjects (subjects list specified or get from the pattern)
    if '*' in subjects_names.get() and \
            len(subjects_names.get().split(' ')) == 1:
        subjects_pattern = subjects_names.get()

        subjects_list = get_subject_names(
            var_base_dir.get(), subjects_pattern.replace('*', '')
            )
    else:
        subjects_list = subjects_names.get().split(' ')

    # number of subjects
    subs_num = len(subjects_list)

    # get list of hands (can be only one hand) or get the pattern
    if len(hands_names.get().split(' ')[0]) == 0:
        hands_list = ['Left', 'Right']
    else:
        hands_list = hands_names.get().split(' ')
    hands_num = len(hands_list)

    # get the information required from Load_data and Classifier/Performance
    # subs_num = subs_num.get()
    # hands_num = hands_num.get()
    # rois_num = rois_num.get()
    n_times_num = var_n_times.get()

    if var_rois_apply.get():
        # get list of rois
        rois_list = rois_names.get().split(' ')
        rois_num = len(rois_list)

        # create an array to store results of the classification performance
        results = np.zeros(shape=(subs_num, hands_num, rois_num, n_times_num))
        results_subjects = np.zeros(results.shape[:-1])
        proportions_test_dataset = np.zeros(shape=results.shape)
        proportions_mean = np.zeros(results.shape[:-2])
    else:
        # create an array to store results of the classification performance
        results = np.zeros(shape=(subs_num, hands_num, n_times_num))
        results_subjects = np.zeros(results.shape[:-1])
        proportions_test_dataset = np.zeros(shape=results.shape)
        proportions_mean = np.zeros(results.shape[:-1])

    # result's labels - we have to know which result comes from which subject,
    # hand, iteration, etc.
    labels = []

    # which iteration of the cross validation is that (e.g. which fold)
    var_n_time_current = tk.IntVar()

    # Main loop of the program
    # for subject in number of subjects, etc.
    for sub in range(subs_num):
        # sublist of labels for this subject
        labels.append([])

        for hand in range(hands_num):
            # sublist for labels fot this subject's hand
            labels[-1].append([])

            # Set mvpa directory containing bold signal file and the target.
            # Use current subject and hand of this subject
            mvpa_directory = os.path.join(
                var_base_dir.get() +
                schema.get() % (subjects_list[sub], hands_list[hand])
                )
            print(mvpa_directory)

            # Load dataset using variables from load_data frame
            # (load_data tab). Here volumens will be restricted to  particular,
            # chosen contrasts (e.g. plan vs rest).
            dataset = load_data(mvpa_directory)

            # If any rois are specified apply them.
            if var_rois_apply.get():
                rois_header = []
                for roi in range(rois_num):
                    # get the data from specified import ROIs
                    roi_path = os.path.join(
                        mvpa_directory + 'ROIs/' + rois_list[roi] + '.nii.gz'
                        )
                    rois_header.append(rois_list[roi])

                    # reduce number of features in the dataset
                    dataset_reduced = feature_reduction(dataset, roi_path)

                    var_n_time_current.set('0')

                    for n_time in range(n_times_num):
                        # create Classifier specified in Classifier tab
                        cls = create_classifier()

                        # split dataset use Classifier/Performance settings
                        training_data, test_data = split_data(
                            dataset_reduced, var_n_time_current.get()
                            )
                        proportions_test_dataset[sub][hand][roi][n_time] = \
                            test_data[1].sum()/float(test_data[1].shape[0])

                        # train and test classifier
                        cls = train_and_test_classifier(
                            cls, training_data, test_data
                            )
                        accuracy = get_accuracy(cls)
                        del cls

                        var_n_time_current.set(var_n_time_current.get() + 1)

                        results[sub][hand][roi][n_time] = accuracy
                        print(
                            '%s, %s, %s, %d ==> %0.2f' % (
                                subjects_list[sub], hands_list[hand],
                                rois_list[roi], n_time, accuracy
                                )
                            )
                    print(
                        '%s, %s, %s <mean> ==> %0.2f' % (
                            subjects_list[sub], hands_list[hand],
                            rois_list[roi], results[sub][hand][roi].mean()
                            )
                        )
                    results_subjects[sub][hand][roi] = \
                        results[sub][hand][roi].mean()
                    proportions_mean[sub][hand] = \
                        proportions_test_dataset[sub][hand][roi].mean()
            else:
                dataset_reduced = feature_reduction(dataset)

                var_n_time_current.set('0')

                for n_time in range(n_times_num):
                    # create Classifier specified in Classifier tab
                    cls = create_classifier()

                    # split dataset use Classifier/Performance settings
                    training_data, test_data = split_data(
                        dataset_reduced, var_n_time_current.get()
                        )
                    proportions_test_dataset[sub][hand][n_time] = \
                        test_data[1].sum()/float(test_data[1].shape[0])

                    # train and test classifier
                    cls = train_and_test_classifier(
                        cls, training_data, test_data
                        )
                    accuracy = get_accuracy(cls)
                    del cls

                    var_n_time_current.set(var_n_time_current.get() + 1)

                    results[sub][hand][n_time] = accuracy
                    print(
                        '%s, %s, %d ==> %0.2f' % (
                            subjects_list[sub], hands_list[hand],
                            n_time, accuracy
                            )
                        )
                print(
                    '%s, %s <mean> ==> %0.2f' % (
                        subjects_list[sub], hands_list[hand],
                        results[sub][hand].mean()
                        )
                    )
                results_subjects[sub][hand] = \
                    results[sub][hand].mean()
                proportions_mean[sub][hand] = \
                    proportions_test_dataset[sub][hand].mean()

        # delimiter = ','
        # np.savetxt(
            # os.path.join(
                # var_output_dir.get() +
                # subjects_list[sub] + '_' + hands_list[hand] + '.txt'
                # ),
            # results[sub][hand][...][...].T,
            # delimiter=delimiter,
            # header=delimiter.join(rois_header)
            # )

    print('RESULTS MEAN: %f' % results.mean())
    if var_rois_apply.get():
        for i in range(rois_num):
            print(
                'ROI 00%s: %f (%s)' %
                (i, results[:, :, i].mean(), rois_list[i])
                )

    '''
    Statistical significance vs prior chance level
    '''
    from scipy import stats

    if var_rois_apply.get():
        for roi in range(len(rois_list)):
            print(
                '\n%s statistical difference vs prior chance' % rois_list[roi]
                )
            print(
                'p_value = %f' %
                stats.ttest_1samp(
                    results_subjects[..., roi], proportions_mean.mean()
                    )[1]
                )
        results_rois = np.array(
            [
                [
                    results_subjects[..., i].mean(),
                    stats.sem(results_subjects[..., i].flatten()),
                    results_subjects[..., i].std()
                ]
                for i in range(results_subjects.shape[-1])
            ]
            )
        print('results for particular ROIs: %s' % results_rois.T[0].flatten())
    else:
        print('\nstatistical difference vs prior chance')
        print(
            'p_value = %f' %
            stats.ttest_1samp(
                results_subjects.mean(), proportions_mean.mean()
                )[1]
            )

    import datetime
    results_output_filename = datetime.datetime.now().strftime("%Y%m%d%H%M")

    if not os.path.exists(var_output_dir.get()):
        os.makedirs(var_output_dir.get())

    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_results'
            ),
        results
        )
    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_results_subjects'
            ),
        results_subjects
        )
    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_results_rois'
            ),
        results_rois
        )
    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_proportions_mean'
            ),
        proportions_mean
        )

    # from pymri.visualization.percent_bars import plot_percent_bars
    # plot_percent_bars(percents=results_rois.flatten()*100)

    import ipdb
    ipdb.set_trace()
    return results
Пример #2
0
import nipype.interfaces.utility as util

from pymri.utils.paths_dirs_info import get_subject_names

flirt_apply_all_subs = Workflow(name='flirt_apply_all_subs')

inputsub = Node(
    interface=util.IdentityInterface(
        fields=['sub']
        ),
    name='inputsub'
    )
# inputsub.inputs.sub = ['GK011RZJA', 'GK012OHPA']
# inputsub.iterables = ('sub', ['GK011RZJA', 'GK012OHPA'])
inputsub.iterables = (
    'sub', get_subject_names(base_directory, subject_template)
    )

inputhand = Node(
    interface=util.IdentityInterface(
        fields=['hand']
        ),
    name='inputhand'
    )
inputhand.iterables = ('hand', ['Left', 'Right'])


inputnode = Node(
    interface=util.IdentityInterface(
        fields=['in_sub', 'in_hand']
        ),
Пример #3
0
def perform_classification():

    import numpy as np

    # subs_num = 21
    # hands_num = 2
    # rois_num = 2
    # n_times_num = 100

    # get list of subjects (subjects list specified or get from the pattern)
    if '*' in subjects_names.get() and \
            len(subjects_names.get().split(' ')) == 1:
        subjects_pattern = subjects_names.get()

        from pymri.utils.paths_dirs_info import get_subject_names
        subjects_list = get_subject_names(
            var_base_dir.get(), subjects_pattern.replace('*', '')
            )
    else:
        subjects_list = subjects_names.get().split(' ')
    subs_num = len(subjects_list)

    # get list of hands (can be only one hand) or get the pattern
    if len(hands_names.get().split(' ')[0]) == 0:
        hands_list = ['Left', 'Right']
    else:
        hands_list = hands_names.get().split(' ')
    hands_num = len(hands_list)

    # get the information required from Load_data and Classifier/Performance
    # subs_num = subs_num.get()
    # hands_num = hands_num.get()
    # rois_num = rois_num.get()
    n_times_num = var_n_times.get()

    if var_rois_apply.get():
        # get list of rois
        rois_list = rois_names.get().split(' ')
        rois_num = len(rois_list)

        # create an array to store results of the classification performance
        results = np.zeros(shape=(subs_num, hands_num, rois_num, n_times_num))
        results_subjects = np.zeros(results.shape[:-1])
        proportions_test_dataset = np.zeros(shape=results.shape)
        proportions_mean = np.zeros(results.shape[:-2])
    else:
        # create an array to store results of the classification performance
        results = np.zeros(shape=(subs_num, hands_num, n_times_num))
        results_subjects = np.zeros(results.shape[:-1])
        proportions_test_dataset = np.zeros(shape=results.shape)
        proportions_mean = np.zeros(results.shape[:-1])

    # result's labels
    labels = []

    # which time of the cross validation is that
    var_n_time_current = tk.IntVar()

    # for subject in number of subjects, etc.
    for sub in range(subs_num):
        labels.append([])

        for hand in range(hands_num):
            labels[-1].append([])

            mvpa_directory = os.path.join(
                var_base_dir.get() +
                schema.get() % (subjects_list[sub], hands_list[hand])
                )

            print(mvpa_directory)
            # load dataset using variables from load_data frame (load_data tab)
            dataset = load_data(mvpa_directory)

            # if any rois to apply first do it, else classify once
            if var_rois_apply.get():
                rois_header = []
                for roi in range(rois_num):
                    # get the data from specified import ROIs
                    roi_path = os.path.join(
                        mvpa_directory + 'ROIs/' + rois_list[roi] + '.nii.gz'
                        )
                    rois_header.append(rois_list[roi])
                    dataset_reduced = feature_reduction(dataset, roi_path)

                    var_n_time_current.set('0')

                    for n_time in range(n_times_num):
                        # create Classifier specified in Classifier tab
                        cls = create_classifier()

                        # split dataset use Classifier/Performance settings
                        training_data, test_data = split_data(
                            dataset_reduced, var_n_time_current.get()
                            )
                        proportions_test_dataset[sub][hand][roi][n_time] = \
                            test_data[1].sum()/float(test_data[1].shape[0])

                        # train and test classifier
                        cls = train_and_test_classifier(
                            cls, training_data, test_data
                            )
                        accuracy = get_accuracy(cls)
                        del cls

                        var_n_time_current.set(var_n_time_current.get() + 1)

                        results[sub][hand][roi][n_time] = accuracy
                        print(
                            '%s, %s, %s, %d ==> %0.2f' % (
                                subjects_list[sub], hands_list[hand],
                                rois_list[roi], n_time, accuracy
                                )
                            )
                    print(
                        '%s, %s, %s <mean> ==> %0.2f' % (
                            subjects_list[sub], hands_list[hand],
                            rois_list[roi], results[sub][hand][roi].mean()
                            )
                        )
                    results_subjects[sub][hand][roi] = \
                        results[sub][hand][roi].mean()
                    proportions_mean[sub][hand] = \
                        proportions_test_dataset[sub][hand][roi].mean()
            else:
                dataset_reduced = feature_reduction(dataset)

                var_n_time_current.set('0')

                for n_time in range(n_times_num):
                    # create Classifier specified in Classifier tab
                    cls = create_classifier()

                    # split dataset use Classifier/Performance settings
                    training_data, test_data = split_data(
                        dataset_reduced, var_n_time_current.get()
                        )
                    proportions_test_dataset[sub][hand][n_time] = \
                        test_data[1].sum()/float(test_data[1].shape[0])

                    # train and test classifier
                    cls = train_and_test_classifier(
                        cls, training_data, test_data
                        )
                    accuracy = get_accuracy(cls)
                    del cls

                    var_n_time_current.set(var_n_time_current.get() + 1)

                    results[sub][hand][n_time] = accuracy
                    print(
                        '%s, %s, %d ==> %0.2f' % (
                            subjects_list[sub], hands_list[hand],
                            n_time, accuracy
                            )
                        )
                print(
                    '%s, %s <mean> ==> %0.2f' % (
                        subjects_list[sub], hands_list[hand],
                        results[sub][hand].mean()
                        )
                    )
                results_subjects[sub][hand] = \
                    results[sub][hand].mean()
                proportions_mean[sub][hand] = \
                    proportions_test_dataset[sub][hand].mean()

        # delimiter = ','
        # np.savetxt(
            # os.path.join(
                # var_output_dir.get() +
                # subjects_list[sub] + '_' + hands_list[hand] + '.txt'
                # ),
            # results[sub][hand][...][...].T,
            # delimiter=delimiter,
            # header=delimiter.join(rois_header)
            # )

    print('RESULTS MEAN: %f' % results.mean())
    if var_rois_apply.get():
        for i in range(rois_num):
            print(
                'ROI 00%s: %f (%s)' %
                (i, results[:, :, i].mean(), rois_list[i])
                )

    '''
    Statistical significance vs prior chance level
    '''
    from scipy import stats

    if var_rois_apply.get():
        for roi in range(len(rois_list)):
            print(
                '\n%s statistical difference vs prior chance' % rois_list[roi]
                )
            print(
                'p_value = %f' %
                stats.ttest_1samp(
                    results_subjects[..., roi], proportions_mean.mean()
                    )[1]
                )
        results_rois = np.array(
            [
                [
                    results_subjects[..., i].mean(),
                    stats.sem(results_subjects[..., i].flatten()),
                    results_subjects[..., i].std()
                ]
                for i in range(results_subjects.shape[-1])
            ]
            )
        print('results for particular ROIs: %s' % results_rois.T[0].flatten())
    else:
        print('\nstatistical difference vs prior chance')
        print(
            'p_value = %f' %
            stats.ttest_1samp(
                results_subjects.mean(), proportions_mean.mean()
                )[1]
            )

    import datetime
    results_output_filename = datetime.datetime.now().strftime("%Y%m%d%H%M")

    if not os.path.exists(var_output_dir.get()):
        os.makedirs(var_output_dir.get())

    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_results'
            ),
        results
        )
    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_results_subjects'
            ),
        results_subjects
        )
    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_results_rois'
            ),
        results_rois
        )
    np.save(
        os.path.join(
            var_output_dir.get(),
            results_output_filename + '_proportions_mean'
            ),
        proportions_mean
        )

    # from pymri.visualization.percent_bars import plot_percent_bars
    # plot_percent_bars(percents=results_rois.flatten()*100)

    import ipdb
    ipdb.set_trace()
    return results