Пример #1
0
    def get_x(self):
        """
        Returns: a numpy 2d-array.
        """
        if self._x is not None:
            return self._x

        cprint(f"Loading {len(self.get_images())} subjects")
        self._x = rbio.load_data(self._images, self._subjects)
        cprint("Subjects loaded")

        return self._x
Пример #2
0
    def get_x(self):
        """

        Returns: a numpy 2d-array.

        """
        if self._x is not None:
            return self._x

        print 'Loading ' + str(len(self.get_images())) + ' subjects'
        self._x = rbio.load_data(self._images, self._subjects)
        print 'Subjects loaded'

        return self._x
Пример #3
0
    def get_x(self):
        """

        Returns: a numpy 2d-array.

        """
        if self._x is not None:
            return self._x

        print 'Loading ' + str(len(self.get_images())) + ' subjects'
        x = rbio.load_data(self._images, self._subjects) #### get all the data in tsv into a data frame, lines are subjects, columns are ROI measures
        self._x = x[:, 1:] # delete the first column
        print 'Subjects loaded'

        return self._x
Пример #4
0
def svm_binary_classification(input_image_atlas,
                              subjects_visits_tsv,
                              image_list,
                              diagnosis_list,
                              output_directory,
                              kernel_function=None,
                              existing_gram_matrix=None,
                              mask_zeros=True,
                              scale_data=False,
                              balanced=False,
                              outer_folds=10,
                              inner_folds=10,
                              n_threads=10,
                              c_range=np.logspace(-10, 2, 1000),
                              save_gram_matrix=False,
                              save_subject_classification=False,
                              save_dual_coefficients=False,
                              scaler=None,
                              data_mask=None,
                              save_original_weights=False,
                              save_features_image=True):

    if (kernel_function is None and existing_gram_matrix is None) | (
            kernel_function is not None and existing_gram_matrix is not None):
        raise ValueError(
            'Kernel_function and existing_gram_matrix are mutually exclusive parameters.'
        )

    results = dict()
    dx_filter = np.unique(diagnosis_list)

    print 'Loading ' + str(len(image_list)) + ' subjects'
    x0 = load_data(image_list, subjects_visits_tsv)
    print 'Subjects loaded'
    if scale_data:
        x_all = scale(x0)
    else:
        x_all = x0

    if existing_gram_matrix is None:
        if kernel_function is not None:
            print 'Calculating Gram matrix'
            gram_matrix = kernel_function(x_all)
            print 'Gram matrix calculated'
        else:
            raise ValueError(
                'If a Gram matrix is not provided a function to calculate it (kernel_function) is a required input.'
            )
    else:
        gram_matrix = existing_gram_matrix
        if (gram_matrix.shape[0] != gram_matrix.shape[1]) | (
                gram_matrix.shape[0] != len(image_list)):
            raise ValueError(
                'The existing Gram matrix must be a square matrix with number of rows and columns equal to the number of images.'
            )

    if save_gram_matrix:
        np.savetxt(join(output_directory, 'gram_matrix.txt'), gram_matrix)

    shared_x = sharedmem.copy(x_all)
    x_all = None
    gc.collect()

    for i in range(len(dx_filter)):
        for j in range(i + 1, len(dx_filter)):
            print j
            dx1 = dx_filter[i]
            dx2 = dx_filter[j]

            ind1 = []
            ind2 = []
            for k in range(len(diagnosis_list)):
                if diagnosis_list[k] == dx1:
                    ind1.append(k)
                if diagnosis_list[k] == dx2:
                    ind2.append(k)

            indices = ind1 + ind2

            current_subjects = [image_list[k] for k in indices]
            current_diagnosis = [diagnosis_list[k] for k in indices]

            y = np.array([0] * len(ind1) + [1] * len(ind2))
            gm = gram_matrix[indices, :][:, indices]

            classification_str = dx1 + '_vs_' + dx2 + ('_balanced' if balanced
                                                       else '_not_balanced')
            print 'Running ' + dx1 + ' vs ' + dx2 + ' classification'

            y_hat, dual_coefficients, sv_indices, intersect, c, auc = cv_svm(
                gm,
                shared_x,
                np.array(indices),
                y,
                c_range,
                balanced=balanced,
                outer_folds=outer_folds,
                inner_folds=inner_folds,
                n_threads=n_threads)

            evaluation = evaluate_prediction(y, y_hat)
            evaluation['auc'] = auc

            print '\nTrue positive %0.2f' % len(evaluation['predictions'][0])
            print 'True negative %0.2f' % len(evaluation['predictions'][1])
            print 'False positive %0.2f' % len(evaluation['predictions'][2])
            print 'False negative %0.2f' % len(evaluation['predictions'][3])

            print 'AUC %0.2f' % auc
            print 'Accuracy %0.2f' % evaluation['accuracy']
            print 'Balanced accuracy %0.2f' % evaluation['balanced_accuracy']
            print 'Sensitivity %0.2f' % evaluation['sensitivity']
            print 'Specificity %0.2f' % evaluation['specificity']
            print 'Positive predictive value %0.2f' % evaluation['ppv']
            print 'Negative predictive value %0.2f \n' % evaluation['npv']

            if save_dual_coefficients:
                np.save(
                    join(output_directory,
                         classification_str + '__dual_coefficients'),
                    dual_coefficients[0])
                np.save(
                    join(output_directory,
                         classification_str + '__sv_indices'), sv_indices)
                np.save(
                    join(output_directory, classification_str + '__intersect'),
                    intersect)

            if save_original_weights or save_features_image:
                weights_orig = features_weights(current_subjects,
                                                dual_coefficients[0],
                                                sv_indices, scaler, data_mask)

            if save_original_weights:
                np.save(
                    join(output_directory, classification_str + '__weights'),
                    weights_orig)

            if save_features_image:
                output_image = weights_to_nifti(input_image_atlas,
                                                weights_orig)
                output_image.to_filename(
                    join(output_directory,
                         classification_str + '__weights.nii'))

            if save_subject_classification:
                save_subjects_prediction(
                    current_subjects, current_diagnosis, y, y_hat,
                    join(output_directory,
                         classification_str + '__subjects.tsv'))

            results[(dx1, dx2)] = evaluation  # evaluate_prediction(y, y_hat)

    results_to_tsv(
        results, dx_filter,
        join(
            output_directory, 'resume' +
            ('_balanced' if balanced else '_not_balanced') + '.tsv'))
    shared_x = None
    gc.collect()
Пример #5
0
    def run_command(self, args):
        from clinica.pipelines.machine_learning.region_based_svm import svm_binary_classification
        from clinica.pipelines.machine_learning.region_based_io import get_caps_pet_list, get_caps_t1_list, load_data
        from clinica.pipelines.machine_learning.svm_utils import gram_matrix_linear
        from numpy import logspace
        import pandas
        from os.path import join, split, realpath

        output_directory = join(
            self.absolute_path(args.caps_directory),
            'group-' + args.group_id + '/machine_learning/region_based_svm/',
            'space' + args.atlas_id,
            args.image_type)

        if args.subjects_visits_tsv is None:
            subjects_visits_tsv = ()  # TODO where it's saved for t1 and pet
        else:
            subjects_visits_tsv = pandas.io.parsers.read_csv(
                self.absolute_path(args.participants_sessions_tsv),
                sep='\t')

        if args.image_type == 't1':

            image_list = get_caps_t1_list(self.absolute_path(args.caps_directory),
                                          subjects_visits_tsv,
                                          args.group_id,
                                          args.atlas_id)
        else:

            image_list = get_caps_pet_list(self.absolute_path(args.caps_directory),
                                           subjects_visits_tsv,
                                           args.group_id,
                                           args.atlas_id)

        data = load_data(image_list, subjects_visits_tsv)
        input_image_atlas = join(split(realpath(__file__))[0], '../resources/atlases_spm', args.atlas_id + '.nii')
        subjects_diagnosis = pandas.io.parsers.read_csv(args.diagnosis_tsv, sep='\t')
        if list(subjects_diagnosis.columns.values) != ['participant_id', 'diagnosis']:
            raise Exception('Subjects and visits file is not in the correct format.')
        diagnosis_list = list(subjects_diagnosis.diagnosis)
        gram_matrix = gram_matrix_linear(data)
        c_range = logspace(args.c_range_logspace[0],
                           args.c_range_logspace[1],
                           args.c_range_logspace[2])

        svm_binary_classification(
            input_image_atlas,
            image_list,
            diagnosis_list,
            output_directory,
            kernel_function=None, existing_gram_matrix=gram_matrix,
            mask_zeros=True,
            scale_data=False, balanced=False,
            outer_folds=args.cv_folds,
            inner_folds=args.folds_c,
            n_threads=args.n_procs,
            c_range=c_range,
            save_gram_matrix=args.save_gram_matrix,
            save_subject_classification=args.save_subject_classification,
            save_dual_coefficients=args.save_dual_coefficients,
            scaler=None, data_mask=None,
            save_original_weights=args.save_original_weights,
            save_features_image=args.save_features_image
        )