Exemplo n.º 1
0
def collect_image_paths(data_dir):
    image_keys = [
        structure.BrainImageTypes.T1w, structure.BrainImageTypes.GroundTruth
    ]

    class MyFilePathGenerator(futil.FilePathGenerator):
        @staticmethod
        def get_full_file_path(id_: str, root_dir: str, file_key,
                               file_extension: str) -> str:
            if file_key == structure.BrainImageTypes.T1w:
                file_name = 'T1native'
            elif file_key == structure.BrainImageTypes.GroundTruth:
                file_name = 'labels_native'
            else:
                raise ValueError('Unknown key')
            return os.path.join(root_dir, file_name + file_extension)

    dir_filter = futil.DataDirectoryFilter()

    # todo: create an instance of futil.FileSystemDataCrawler and pass the correpsonding arguments
    crawler = futil.FileSystemDataCrawler('../data/exercise/', image_keys,
                                          MyFilePathGenerator(), dir_filter,
                                          '.nii.gz')  # todo: modify here

    return crawler
Exemplo n.º 2
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction

    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {
        'zscore_pre': True,
        'registration_pre': False,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True,
        'hog_feature': True,
        'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    # store preprocessed images to file
    file_id = open('data_train.pckl', 'wb')
    pickle.dump(data_train, file_id)
    file_id.close()
    file_id = open('labels_train.pckl', 'wb')
    pickle.dump(labels_train, file_id)
    file_id.close()
    print('-' * 5, 'Preprocessed images stored')
Exemplo n.º 3
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str, tmp_result_dir: str):
    """Brain tissue segmentation using decision forests.

    Section of the original main routine. Executes post processing part of the medical image analysis pipeline:

        Must be done separately in advance:
        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images

        Is carried out in this section of the pipeline
        - Loading of temporary data
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    # print('-' * 5, 'Training...')
    #
    # # crawl the training image directories
    # crawler = futil.FileSystemDataCrawler(data_train_dir,
    #                                       LOADING_KEYS,
    #                                       futil.BrainImageFilePathGenerator(),
    #                                       futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator()

    # crawl the test image directories
    crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    # load necessary data to perform post processing
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    # load the prediction of the test images (segmented image
    images_prediction, images_probabilities = putil.load_prediction_images(
        images_test, tmp_result_dir, '2020-10-30-18-31-15')
    # evaluate images without post-processing
    for i, img in enumerate(images_test):
        evaluator.evaluate(images_prediction[i],
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

    # post-process segmentation and evaluate with post-processing
    post_process_params = {
        'simple_post': True,
        'variance': 1.0,
        'preserve_background': False
    }
    images_post_processed = putil.post_process_batch(images_test,
                                                     images_prediction,
                                                     images_probabilities,
                                                     post_process_params,
                                                     multi_process=False)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i],
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(
            images_prediction[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(
            images_post_processed[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)

    # use two writers to report the results
    os.makedirs(
        result_dir,
        exist_ok=True)  # generate result directory, if it does not exists
    result_file = os.path.join(result_dir, 'results.csv')
    writer.CSVWriter(result_file).write(evaluator.results)

    print('\nSubject-wise results...')
    writer.ConsoleWriter().write(evaluator.results)

    # report also mean and standard deviation among all subjects
    result_summary_file = os.path.join(result_dir, 'results_summary.csv')
    functions = {'MEAN': np.mean, 'STD': np.std}
    writer.CSVStatisticsWriter(result_summary_file,
                               functions=functions).write(evaluator.results)
    print('\nAggregated statistic results...')
    writer.ConsoleStatisticsWriter(functions=functions).write(
        evaluator.results)

    # clear results such that the evaluator is ready for the next evaluation
    evaluator.clear()
Exemplo n.º 4
0
def main(_):
    """Ensemble using results from various algorithms
    """

    # load results from various previous runs
    all_probabilities = None
    for r in RESULTS:
        p = np.load(os.path.join(r, 'all_probabilities.npy'))
        if all_probabilities is None:
            all_probabilities = p
        else:
            if p.shape != all_probabilities.shape:
                print('Error: all_probabilities.npy do not match: ' +
                      str(p.shape) + ' vs. ' + str(all_probabilities.shape) +
                      ' for ' + r)
                sys.exit(1)

            if ENSEMBLE_MAX:
                all_probabilities = np.maximum(all_probabilities, p)
            else:
                all_probabilities = all_probabilities + p

    if ENSEMBLE_MAX == False:
        all_probabilities = all_probabilities / len(r)

    # convert back to float32
    all_probabilities = all_probabilities.astype(np.float32)

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    pre_process_params = {
        'zscore_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    index = 0
    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index:batch_index +
                                     TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data,
                                              pre_process_params,
                                              multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()

            probabilities = all_probabilities[index, :, :]
            index = index + 1
            predictions = LABEL_CLASSES[probabilities.argmax(axis=1)]

            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(
                predictions.astype(np.uint8), img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
                probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(
                image_prediction,
                img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')

            # save results
            sitk.WriteImage(
                images_prediction[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'),
                True)
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Result dir: {}'.format(result_dir))
        print('Result dir: {}'.format(result_dir), file=summary_file)
        print('Ensemble from ' + str(RESULTS), file=summary_file)
        print('ENSEMBLE_MAX ' + str(ENSEMBLE_MAX), file=summary_file)
        stats = statistics.gather_statistics(
            os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)
Exemplo n.º 5
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """
    seed = 42
    random.seed(seed)
    np.random.seed(seed)

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)
    #atlas_creation()
    #putil.load_atlas_custom_images(data_train_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir, LOADING_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    # warnings.warn('Random forest parameters not properly set.')
    # we modified the number of decision trees in the forest to be 20 and the maximum tree depth to be 25
    # note, however, that these settings might not be the optimal ones...
    forest = sk_ensemble.RandomForestClassifier(
        max_features=images[0].feature_matrix[0].shape[1],
        n_estimators=5,
        max_depth=10)

    start_time = timeit.default_timer()
    forest.fit(data_train, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_test_dir, LOADING_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        predictions = forest.predict(img.feature_matrix[0])
        probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint8), img.image_properties)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
            probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        images_probabilities.append(image_probabilities)

    # post-process segmentation and evaluate with post-processing
    post_process_params = {'simple_post': True}
    images_post_processed = putil.post_process_batch(images_test,
                                                     images_prediction,
                                                     images_probabilities,
                                                     post_process_params,
                                                     multi_process=True)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i],
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(
            images_prediction[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(
            images_post_processed[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
Exemplo n.º 6
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {
        'zscore_pre': True,
        'registration_pre': False,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True,
        'second_oder_coordinate_feature': False,
        'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]
    }  #[0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]}

    print('-' * 5, 'Testing...')

    # load classifier
    file_id = open('svm_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb')
    svm_rbf_classifier = pickle.load(file_id)
    file_id.close()

    file_id = open('scaler_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb')
    scaler = pickle.load(file_id)
    file_id.close()

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)
        scaled_features, s = util.scale_features(img.feature_matrix[0], scaler)
        start_time = timeit.default_timer()
        predictions = svm_rbf_classifier.predict(scaled_features)
        #probabilities = svm_classifier.predict_proba(img.feature_matrix[0])
        #predictions = forest.predict(img.feature_matrix[0])
        #probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint8), img.image_properties)
        #image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        #images_probabilities.append(image_probabilities)

    # post-process segmentation and evaluate with post-processing
    #post_process_params = {'crf_post': False}
    #images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
    #                                                post_process_params, multi_process=True)

    for i, img in enumerate(images_test):
        #    evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
        #                       img.id_ + '-PP')

        # save results
        sitk.WriteImage(
            images_prediction[i],
            os.path.join(
                result_dir, images_test[i].id_ +
                '_SEG_SVM_fullset_C15-_G5_lotofpointspersample.mha'), True)
Exemplo n.º 7
0
def main(_):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())
    train_data_size = len(data_items)

    pre_process_params = {
        'zscore_pre': True,  #1 features
        'coordinates_feature': False,  #3 features
        'intensity_feature': True,  #1 features
        'gradient_intensity_feature': True
    }  #2 features

    start_time_total_train = timeit.default_timer()

    n_neighbors = 20

    batch_data = dict(data_items)
    # load images for training and pre-process
    images = putil.pre_process_batch(batch_data,
                                     pre_process_params,
                                     multi_process=True)
    print('pre-processing done')

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1] for img in images])

    if NORMALIZE_FEATURES:
        # normalize data (mean 0, std 1)
        data_train = scipy_stats.zscore(data_train)

    start_time = timeit.default_timer()
    neigh = KNeighborsClassifier(n_neighbors=n_neighbors,
                                 weights='distance',
                                 algorithm='auto').fit(data_train,
                                                       labels_train[:, 0])
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')
    time_total_train = timeit.default_timer() - start_time_total_train

    start_time_total_test = timeit.default_timer()
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    all_probabilities = None

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index:batch_index +
                                     TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data,
                                              pre_process_params,
                                              multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            # probabilities, predictions = forest.predict(img.feature_matrix[0])
            features = img.feature_matrix[0]
            if NORMALIZE_FEATURES:
                features = scipy_stats.zscore(features)

            predictions = neigh.predict(features)
            probabilities = neigh.predict_proba(features)

            if all_probabilities is None:
                all_probabilities = np.array([probabilities])
            else:
                all_probabilities = np.concatenate(
                    (all_probabilities, [probabilities]), axis=0)

            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(
                predictions.astype(np.uint8), img.image_properties)

            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
                probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(
                image_prediction,
                img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')

            # save results
            sitk.WriteImage(
                images_prediction[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'),
                True)
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

    time_total_test = timeit.default_timer() - start_time_total_test

    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Result dir: {}'.format(result_dir))
        print('Result dir: {}'.format(result_dir), file=summary_file)
        print('Training data size: {}'.format(train_data_size),
              file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train),
              file=summary_file)
        print('Total testing time: {:.1f}s'.format(time_total_test),
              file=summary_file)
        print('Voxel Filter Mask: {}'.format(
            putil.FeatureExtractor.VOXEL_MASK_FLT),
              file=summary_file)
        print('Normalize Features: {}'.format(NORMALIZE_FEATURES),
              file=summary_file)
        print('kNN', file=summary_file)
        print('n_neighbors: {}'.format(n_neighbors), file=summary_file)
        stats = statistics.gather_statistics(
            os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)
Exemplo n.º 8
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    warnings.warn('Random forest parameters not properly set.')
    # visualization(images)
    print(np.shape(images[0].feature_matrix[0]))

    error_rate = []
    for num_estimators in range(1, 10):
        forest = sk_ensemble.RandomForestClassifier(
            max_features=images[0].feature_matrix[0].shape[1],
            n_estimators=num_estimators,
            max_depth=10,
            oob_score=True)

        # start_time = timeit.default_timer()
        forest.fit(data_train, labels_train)

        oob_error = 1 - forest.oob_score_
        print(forest.oob_score_)
        error_rate += [oob_error]

    plt.plot(range(1, 10), error_rate)
    plt.show()
Exemplo n.º 9
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, ml_method: str, verbose: bool):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training '+ ml_method + '...')

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {'zscore_pre': True,
                          'registration_pre': False,
                          'coordinates_feature': True,
                          'intensity_feature': True,
                          'gradient_intensity_feature': True,
                          'second_oder_coordinate_feature': False,
                          'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]}

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze()

    if verbose:
        util.print_class_count(labels_train)

    start_time = timeit.default_timer()
    if ml_method == 'random_forest':
        classifier = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1],
                                                    n_estimators=20,
                                                    max_depth=25)
        data_train_scaled = data_train # do not scale features to keep original RF
    elif ml_method == 'svm_linear':
        classifier = svm.SVC(kernel='linear', C=1, class_weight='balanced')
        data_train_scaled, scaler = util.scale_features(data_train)
    elif ml_method == 'svm_rbf':
        classifier = svm.SVC(kernel='rbf', C=15, gamma=5, class_weight='balanced',
                                     decision_function_shape='ovo')
        data_train_scaled, scaler = util.scale_features(data_train)

    elif ml_method == 'logistic_regression':
        classifier = linear_model.LogisticRegression(class_weight='balanced')
        data_train_scaled, scaler = util.scale_features(data_train)
    else:
        assert False, "No valid segmentation algorithm selected in argument ml_method"

    classifier.fit(data_train_scaled, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # print and plot feature importance for each structure
    if verbose:
        if ml_method == 'svm_linear':
            util.print_feature_importance(classifier.coef_)
            util.plot_feature_importance(classifier.coef_, result_dir)
        if ml_method == 'random_forest':
            util.print_feature_importance(classifier.feature_importances_)
            util.plot_feature_importance(classifier.feature_importances_, result_dir)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_test_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=True)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        if ml_method == 'random_forest':
            scaled_features = img.feature_matrix[0]
        else:
            scaled_features, s = util.scale_features(img.feature_matrix[0], scaler)


        predictions = classifier.predict(scaled_features)

        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
                                                                        img.image_properties)

        probabilities = classifier.predict_proba(scaled_features)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)
        images_probabilities.append(image_probabilities)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)

        images_prediction.append(image_prediction)


    # post-process segmentation and evaluate with post-processing
    post_process_params = {'crf_post': False}
    images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
                                                     post_process_params, multi_process=True)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
Exemplo n.º 10
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction

    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {
        'zscore_pre': True,
        'registration_pre': False,
        'coordinates_feature': False,
        'intensity_feature': False,
        'gradient_intensity_feature': False,
        'hog_feature': False,
        'canny_feature': False,
        'secondOrder_feature': True,
        'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    forest = sk_ensemble.RandomForestClassifier(
        max_features=images[0].feature_matrix[0].shape[1],
        n_estimators=20,
        max_depth=25)

    start_time = timeit.default_timer()
    forest.fit(data_train, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')
    print(forest.feature_importances_)

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        print(np.sum(np.isnan(img.feature_matrix[0]), axis=0))
        print(img.feature_matrix[0].shape)
        print(np.sum(np.isnan(img.feature_matrix[0]), axis=1))
        print(np.sum(np.isinf(img.feature_matrix[0]), axis=0))
        print(img.feature_matrix[0].shape)
        print(np.sum(np.isinf(img.feature_matrix[0]), axis=1))
        predictions = forest.predict(img.feature_matrix[0])
        probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint64), img.image_properties)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
            probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        images_probabilities.append(image_probabilities)

    # post-process segmentation and evaluate with post-processing
    post_process_params = {'crf_post': False}
    images_post_processed = putil.post_process_batch(images_test,
                                                     images_prediction,
                                                     images_probabilities,
                                                     post_process_params,
                                                     multi_process=True)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i],
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(
            images_prediction[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(
            images_post_processed[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
Exemplo n.º 11
0
def main(_):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())
    train_data_size = len(data_items)

    pre_process_params = {
        'zscore_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # initialize decision forest parameters
    df_params = df.DecisionForestParameters()
    df_params.num_classes = 4
    df_params.num_trees = 160
    df_params.max_nodes = 3000
    df_params.model_dir = model_dir
    forest = None
    start_time_total_train = timeit.default_timer()

    for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE):
        cache_file_prefix = os.path.normpath(
            os.path.join(
                script_dir, './mia-cache/batch-' + str(batch_index) + '-' +
                str(TRAIN_BATCH_SIZE)))
        cache_file_train = cache_file_prefix + '-data_train.npy'
        cache_file_labels = cache_file_prefix + '-data_labels.npy'
        if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)):
            print('Using cache from ', cache_file_train)
            data_train = np.load(cache_file_train)
            labels_train = np.load(cache_file_labels)
        else:
            # slicing manages out of range; no need to worry
            batch_data = dict(data_items[batch_index:batch_index +
                                         TRAIN_BATCH_SIZE])
            # load images for training and pre-process
            images = putil.pre_process_batch(batch_data,
                                             pre_process_params,
                                             multi_process=True)
            print('pre-processing done')

            # generate feature matrix and label vector
            data_train = np.concatenate(
                [img.feature_matrix[0] for img in images])
            labels_train = np.concatenate(
                [img.feature_matrix[1] for img in images])

            if NORMALIZE_FEATURES:
                # normalize data (mean 0, std 1)
                # data_train = scipy_stats.zscore(data_train)
                non_coord = scipy_stats.zscore(data_train[:, 3:8])
                coord = data_train[:, 0:3] / 255 * 2 - 1
                data_train = np.concatenate((coord, non_coord), axis=1)
            if (USE_PREPROCESS_CACHE):
                print('Writing cache')
                if (not os.path.exists(os.path.dirname(cache_file_prefix))):
                    os.mkdir(os.path.dirname(cache_file_prefix))
                data_train.dump(cache_file_train)
                labels_train.dump(cache_file_labels)

        if forest is None:
            df_params.num_features = data_train.shape[1]
            print(df_params)
            forest = df.DecisionForest(df_params)

        start_time = timeit.default_timer()
        forest.train(data_train, labels_train)
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    time_total_train = timeit.default_timer() - start_time_total_train

    start_time_total_test = timeit.default_timer()
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    all_probabilities = None

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index:batch_index +
                                     TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data,
                                              pre_process_params,
                                              multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            features = img.feature_matrix[0]

            if NORMALIZE_FEATURES:
                # features = scipy_stats.zscore(features)
                non_coord = scipy_stats.zscore(features[:, 3:8])
                coord = features[:, 0:3] / 255 * 2 - 1
                features = np.concatenate((coord, non_coord), axis=1)

            probabilities, predictions = forest.predict(features)

            if all_probabilities is None:
                all_probabilities = np.array([probabilities])
            else:
                all_probabilities = np.concatenate(
                    (all_probabilities, [probabilities]), axis=0)

            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(
                predictions.astype(np.uint8), img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
                probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(
                image_prediction,
                img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')

            # save results
            sitk.WriteImage(
                images_prediction[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'),
                True)
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

    time_total_test = timeit.default_timer() - start_time_total_test

    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Result dir: {}'.format(result_dir))
        print('Result dir: {}'.format(result_dir), file=summary_file)
        print('Training data size: {}'.format(train_data_size),
              file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train),
              file=summary_file)
        print('Total testing time: {:.1f}s'.format(time_total_test),
              file=summary_file)
        print('Voxel Filter Mask: {}'.format(
            putil.FeatureExtractor.VOXEL_MASK_FLT),
              file=summary_file)
        print('Normalize Features: {}'.format(NORMALIZE_FEATURES),
              file=summary_file)
        print('Decision forest', file=summary_file)
        print(df_params, file=summary_file)
        stats = statistics.gather_statistics(
            os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)
Exemplo n.º 12
0
def main(_):
    """Brain tissue segmentation using SVM.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - SVM model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # SVM cannot deal with default mark (too much data). Reduce by factor 10
    putil.FeatureExtractor.VOXEL_MASK_FLT = [0.00003, 0.0004, 0.0003, 0.0004]

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())
    train_data_size = len(data_items)

    pre_process_params = {
        'zscore_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    start_time_total_train = timeit.default_timer()

    batch_data = dict(data_items)
    # load images for training and pre-process
    images = putil.pre_process_batch(batch_data,
                                     pre_process_params,
                                     multi_process=True)
    print('pre-processing done')

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1] for img in images])

    if NORMALIZE_FEATURES:
        # normalize data (mean 0, std 1)
        data_train = scipy_stats.zscore(data_train)

    print('Start training SVM')

    # Training
    # SVM does not support online/incremental training. Need to fit all in one go!
    # Note: Very slow with large training set!
    start_time = timeit.default_timer()
    # to limite: max_iter=1000000000

    # Enable for grid search of best hyperparameters
    if False:
        C_range = [300, 350, 400, 450, 500, 550, 600, 800, 1000, 1200, 1500]
        gamma_range = [
            0.00001, 0.00003, 0.00004, 0.00005, 0.00006, 0.00008, 0.0001,
            0.0005, 0.001, 0.005, 0.01, 0.1, 0.2
        ]

        # 1
        C_range = [
            0.001, 0.01, 0.1, 0.5, 1, 3, 5, 10, 20, 50, 100, 200, 250, 300,
            1000, 2000, 5000, 10000, 20000, 50000, 100000, 120000, 150000
        ]
        gamma_range = [
            0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.0005, 0.001,
            0.005, 0.01, 0.1, 0.2, 0.5, 1, 5, 10
        ]

        #C_range = [1, 10, 100, 500, 1000, 5000, 10000, 15000, 20000, 22000, 25000, 30000, 35000]
        #gamma_range = [0.00000001, 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 0.2, 0.5]

        params = [{
            'kernel': ['rbf'],
            'C': C_range,
            'gamma': gamma_range,
        }]
        #'C': [0.001, 0.01, 0.1, 0.5, 1, 3, 5, 10, 20, 50, 100, 200, 250, 300, 1000],
        #'gamma': [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 0.2, 0.5, 1, 5, 10, 20, 100, 10

        clf = GridSearchCV(SVC(probability=True, cache_size=2000),
                           params,
                           cv=2,
                           n_jobs=8,
                           verbose=3)
        clf.fit(data_train, labels_train[:, 0])
        print('best param: ' + str(clf.best_params_))
        scores = clf.cv_results_['mean_test_score'].reshape(
            len(C_range), len(gamma_range))
        plt.figure(figsize=(8, 6))
        plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
        plt.imshow(scores,
                   interpolation='nearest',
                   cmap=plt.cm.hot,
                   norm=MidpointNormalize(vmin=0.2, midpoint=0.92))
        plt.xlabel('gamma')
        plt.ylabel('C')
        plt.colorbar()
        plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)
        plt.yticks(np.arange(len(C_range)), C_range)
        plt.title('Validation accuracy')
        plt.savefig('svm_params.png')
        #plt.show()

        scipy.io.savemat('svm_params.mat',
                         mdict={
                             'C': C_range,
                             'gamma': gamma_range,
                             'score': scores
                         })

    #svm = SVC(probability=True, kernel='rbf', C=clf.best_params_['C'], gamma=clf.best_params_['gamma'], cache_size=2000, verbose=False)

    svm = SVC(probability=True,
              kernel='rbf',
              C=500,
              gamma=0.00005,
              cache_size=2000,
              verbose=False)

    svm.fit(data_train, labels_train[:, 0])
    print('\n Time elapsed:', timeit.default_timer() - start_time, 's')
    time_total_train = timeit.default_timer() - start_time_total_train

    start_time_total_test = timeit.default_timer()
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    all_probabilities = None

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index:batch_index +
                                     TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data,
                                              pre_process_params,
                                              multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            #probabilities, predictions = forest.predict(img.feature_matrix[0])
            features = img.feature_matrix[0]
            if NORMALIZE_FEATURES:
                features = scipy_stats.zscore(features)
            probabilities = np.array(svm.predict_proba(features))
            print('probabilities: ' + str(probabilities.shape))
            predictions = svm.classes_[probabilities.argmax(axis=1)]

            if all_probabilities is None:
                all_probabilities = np.array([probabilities])
            else:
                all_probabilities = np.concatenate(
                    (all_probabilities, [probabilities]), axis=0)

            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(
                predictions.astype(np.uint8), img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
                probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(
                image_prediction,
                img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')

            # save results
            sitk.WriteImage(
                images_prediction[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'),
                True)
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

    time_total_test = timeit.default_timer() - start_time_total_test

    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Result dir: {}'.format(result_dir))
        print('Result dir: {}'.format(result_dir), file=summary_file)
        print('SVM', file=summary_file)
        print('SVM params: {}'.format(svm.get_params()), file=summary_file)
        print('pre-process-params: {}'.format(pre_process_params),
              file=summary_file)
        print('Training data size: {}'.format(train_data_size),
              file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train),
              file=summary_file)
        print('Total testing time: {:.1f}s'.format(time_total_test),
              file=summary_file)
        print('Voxel Filter Mask: {}'.format(
            putil.FeatureExtractor.VOXEL_MASK_FLT),
              file=summary_file)
        print('Normalize Features: {}'.format(NORMALIZE_FEATURES),
              file=summary_file)
        #print('SVM best parameters', file=summary_file)
        #print(clf.best_params_, file=summary_file)
        stats = statistics.gather_statistics(
            os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)
Exemplo n.º 13
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir,
                                          LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params = {'skullstrip_pre': True,
                          'normalization_pre': True,
                          'registration_pre': True,
                          'coordinates_feature': True,
                          'intensity_feature': True,
                          'gradient_intensity_feature': True}

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze()

    warnings.warn('Random forest parameters not properly set.')
    # visualization(images)
    print(np.shape(images[0].feature_matrix[0]))
    dfs= []
    aggregated_results = []

    print('-' * 5, 'Testing...')
    crawler = futil.FileSystemDataCrawler(data_test_dir,
                                          LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)


    for num_estimator in [10]:
        forest = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1],
                                                    n_estimators=num_estimator,
                                                    max_depth=10)

        start_time = timeit.default_timer()
        forest.fit(data_train, labels_train)

        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # create a result directory with timestamp
        t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        result_dir = os.path.join(result_dir, t)

        os.makedirs(result_dir, exist_ok=True)

        print('-' * 5, 'Testing...')

        # initialize evaluator
        evaluator = putil.init_evaluator()

        # crawl the training image directories
        # crawler = futil.FileSystemDataCrawler(data_test_dir,
        #                                       LOADING_KEYS,
        #                                       futil.BrainImageFilePathGenerator(),
        #                                       futil.DataDirectoryFilter())

        # load images for testing and pre-process

        # data_test = np.concatenate([img.feature_matrix[0] for img in images_test])
        # labels_test = np.concatenate([img.feature_matrix[1] for img in images_test]).squeeze()

        # ax = plt.gca()
        # rfc_disp = plot_roc_curve(forest, data_test, labels_test, ax=ax, alpha=0.8)
        # svc_disp.plot(ax=ax, alpha=0.8)
        # disp = plot_confusion_matrix(forest, data_test, labels_test, normalize='true')
        # plt.show()

        # y = label_binarize(labels_test, classes=[0, 1, 2 , 3, 4 , 5])
        # n_classes = y.shape[1]


        images_prediction = []
        images_probabilities = []


        for img in images_test:
            print('-' * 10, 'Testing', img.id_)


            start_time = timeit.default_timer()
            predictions = forest.predict(img.feature_matrix[0])
            probabilities = forest.predict_proba(img.feature_matrix[0])
            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
                                                                            img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        results=evaluator.results
        labels = sorted({result.label for result in results})
        metrics = sorted({result.metric for result in results})

        # functions = {'MEAN': np.mean, 'STD': np.std}
        functions = {'MEAN': np.mean}
        for label in labels:
            for metric in metrics:
                # search for results
                values = [r.value for r in results if r.label == label and r.metric == metric]


                for fn_id, fn in functions.items():
                    aggregated_results.append(
                        [num_estimator,
                        label,
                        metric,
                        float(fn(values))])

        # for result in aggregated_results:
        #     # print([result.label, result.metric, result.id_, result.value])
        #     print(result)


        # writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results)

        # clear results such that the evaluator is ready for the next evaluation
        evaluator.clear()
    df=pd.DataFrame(aggregated_results, columns=['n_estimators', 'label', 'metric', 'value'])
    return df
    xdf = df[df.label == 'WhiteMatter']
    del xdf['label']




    # new_df=df[df.label=='GreyMatter']
    # del new_df['label']
    # new_df.set_index('n_estimators', inplace=True)
    # fig, ax = plt.subplots(figsize=(15, 7))
    # new_df.groupby(['metric']).plot(ax=ax)
    # print(new_df)

    # plt.show()

    plt.figure(2)
    # pd.crosstab(index=[df['Name'], df['Date']], columns=new_df['metric'])
    my_df = pd.pivot_table(df,index=['label'], columns='metric', values='value')
    my_df.plot()
    print(my_df)
Exemplo n.º 14
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    warnings.warn('Random forest parameters not properly set.')
    # visualization(images)
    print(np.shape(images[0].feature_matrix[0]))
    forest = sk_ensemble.RandomForestClassifier(
        max_features=images[0].feature_matrix[0].shape[1],
        n_estimators=10,
        max_depth=10)

    start_time = timeit.default_timer()
    # forest.fit(data_train, labels_train)

    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)

    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator()

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    data_test = np.concatenate([img.feature_matrix[0] for img in images_test])
    labels_test = np.concatenate(
        [img.feature_matrix[1] for img in images_test]).squeeze()

    random_state = np.random.RandomState(0)
    # ax = plt.gca()
    # rfc_disp = plot_roc_curve(forest, data_test, labels_test, ax=ax, alpha=0.8)
    # svc_disp.plot(ax=ax, alpha=0.8)
    # disp = plot_confusion_matrix(forest, data_test, labels_test, normalize='true')
    # plt.show()
    X = np.concatenate((data_train, data_test))
    y = np.concatenate((labels_train, labels_test))
    y = label_binarize(y, classes=[0, 1, 2, 3, 4, 5])
    n_classes = y.shape[1]
    n_samples, n_features = X.shape

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.5,
                                                        random_state=0)

    # classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,
    #                                          random_state=random_state))

    classifier = OneVsRestClassifier(
        sk_ensemble.RandomForestClassifier(
            max_features=images[0].feature_matrix[0].shape[1],
            n_estimators=10,
            max_depth=5))

    y_score = classifier.fit(X_train, y_train).predict(X_test)

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # First aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    plt.figure()
    lw = 2
    plt.plot(fpr[2],
             tpr[2],
             color='darkorange',
             lw=lw,
             label='ROC curve (area = %0.2f)' % roc_auc[2])
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=4)

    plt.plot(fpr["macro"],
             tpr["macro"],
             label='macro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle=':',
             linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(
        'Some extension of Receiver operating characteristic to multi-class')
    plt.legend(loc="lower right")
    plt.show()

    #
    # images_prediction = []
    # images_probabilities = []
    #
    #
    # for img in images_test:
    #     print('-' * 10, 'Testing', img.id_)
    #
    #
    #     start_time = timeit.default_timer()
    #     predictions = forest.predict(img.feature_matrix[0])
    #     probabilities = forest.predict_proba(img.feature_matrix[0])
    #     print(' Time elapsed:', timeit.default_timer() - start_time, 's')
    #
    #     # convert prediction and probabilities back to SimpleITK images
    #     image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
    #                                                                     img.image_properties)
    #     image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)
    #
    #     # evaluate segmentation without post-processing
    #     evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)
    #
    #     images_prediction.append(image_prediction)
    #     images_probabilities.append(image_probabilities)
    #
    #
    #
    #
    # # post-process segmentation and evaluate with post-processing
    # post_process_params = {'simple_post': True}
    # images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
    #                                                  post_process_params, multi_process=True)
    #
    # for i, img in enumerate(images_test):
    #     evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
    #                        img.id_ + '-PP')
    #
    #     # save results
    #     sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
    #     sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
    #
    # # use two writers to report the results
    # os.makedirs(result_dir, exist_ok=True)  # generate result directory, if it does not exists
    # result_file = os.path.join(result_dir, 'results.csv')
    # writer.CSVWriter(result_file).write(evaluator.results)
    #
    # print('\nSubject-wise results...')
    # writer.ConsoleWriter().write(evaluator.results)
    #
    # # report also mean and standard deviation among all subjects
    # result_summary_file = os.path.join(result_dir, 'results_summary.csv')
    # functions = {'MEAN': np.mean, 'STD': np.std}
    # writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results)
    # print('\nAggregated statistic results...')
    # writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results)

    # clear results such that the evaluator is ready for the next evaluation
    evaluator.clear()
Exemplo n.º 15
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    Section of the original main routine. Executes pre-processing and prediction part of the medical image
    analysis pipeline and and saves the temporary data:

        Is carried out in this section of the pipeline
        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Save prediction data

    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    warnings.warn('Random forest parameters not properly set.')
    forest = sk_ensemble.RandomForestClassifier(
        max_features=images[0].feature_matrix[0].shape[1],
        n_estimators=10,
        max_depth=10)

    start_time = timeit.default_timer()
    forest.fit(data_train, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator()

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        predictions = forest.predict(img.feature_matrix[0])
        probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint8), img.image_properties)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
            probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        images_probabilities.append(image_probabilities)

    # save all data used for post processing

    for i, img in enumerate(images_test):

        sitk.WriteImage(
            images_prediction[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(
            images_probabilities[i],
            os.path.join(result_dir, images_test[i].id_ + '_PROB.mha'), True)

    evaluator.clear()
Exemplo n.º 16
0
def main(_):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # SGD need "original" value of 0.04 for ventricles
    putil.FeatureExtractor.VOXEL_MASK_FLT = [0.0003, 0.004, 0.003, 0.04]

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())
    train_data_size = len(data_items)

    pre_process_params = {
        'zscore_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # initialize decision forest parameters
    df_params = df.DecisionForestParameters()
    df_params.num_classes = 4
    df_params.num_trees = 20
    df_params.max_nodes = 1000
    df_params.model_dir = model_dir
    forest = None
    clf = None
    start_time_total_train = timeit.default_timer()

    for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE):
        cache_file_prefix = os.path.normpath(
            os.path.join(
                script_dir, './mia-cache/batch-' + str(batch_index) + '-' +
                str(TRAIN_BATCH_SIZE)))
        cache_file_train = cache_file_prefix + '-data_train.npy'
        cache_file_labels = cache_file_prefix + '-data_labels.npy'
        if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)):
            print('Using cache from ', cache_file_train)
            data_train = np.load(cache_file_train)
            labels_train = np.load(cache_file_labels)
        else:
            # slicing manages out of range; no need to worry
            batch_data = dict(data_items[batch_index:batch_index +
                                         TRAIN_BATCH_SIZE])
            # load images for training and pre-process
            images = putil.pre_process_batch(batch_data,
                                             pre_process_params,
                                             multi_process=True)
            print('pre-processing done')

            # generate feature matrix and label vector
            data_train = np.concatenate(
                [img.feature_matrix[0] for img in images])
            labels_train = np.concatenate(
                [img.feature_matrix[1] for img in images])

            if NORMALIZE_FEATURES:
                # normalize data (mean 0, std 1)
                data_train = scipy_stats.zscore(data_train)

            if (USE_PREPROCESS_CACHE):
                print('Writing cache')
                if (not os.path.exists(os.path.dirname(cache_file_prefix))):
                    os.mkdir(os.path.dirname(cache_file_prefix))
                data_train.dump(cache_file_train)
                labels_train.dump(cache_file_labels)

        if clf is None:
            # cross validation to find best parameter
            param = [
                {
                    "eta0": [0.5, 0.1, 0.01, 0.001, 0.0001, 0.00001],
                    "alpha": [0.5, 0.1, 0.01, 0.001, 0.0001, 0.00001],
                    "learning_rate": ['optimal', 'constant'],
                    "loss": ['log', 'modified_huber']
                    #"max_iter": [10000, 100000]
                },
            ]
            # Best params:
            #{'alpha': 0.01, 'eta0': 0.5, 'learning_rate': 'optimal', 'loss': 'modified_huber'}

            n_iter = 300000 / len(data_items)
            sgd = SGDClassifier(learning_rate='optimal',
                                eta0=0.5,
                                alpha=0.01,
                                loss='modified_huber',
                                penalty="l2",
                                max_iter=n_iter,
                                n_jobs=8,
                                shuffle=False)
            clf = sgd
            # Note: shuffle=True gives '"RuntimeWarning: overflow encountered in expnp.exp(prob, prob)"'

            # to try several parameters with grid search
            #clf = GridSearchCV(sgd, param, cv=2, n_jobs=4, verbose=3)

        start_time = timeit.default_timer()

        clf.fit(data_train, labels_train[:, 0])
        #print('Best params: ')
        #print(clf.best_params_)
        print('\n training, Time elapsed:',
              timeit.default_timer() - start_time, 's')

    time_total_train = timeit.default_timer() - start_time_total_train

    start_time_total_test = timeit.default_timer()
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    all_probabilities = None

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index:batch_index +
                                     TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data,
                                              pre_process_params,
                                              multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            #probabilities, predictions = forest.predict(img.feature_matrix[0])
            features = img.feature_matrix[0]
            if NORMALIZE_FEATURES:
                features = scipy_stats.zscore(features)
            probabilities = np.array(clf.predict_proba(features))
            print('probabilities: ' + str(probabilities.shape))
            predictions = clf.classes_[probabilities.argmax(axis=1)]

            if all_probabilities is None:
                all_probabilities = np.array([probabilities])
            else:
                all_probabilities = np.concatenate(
                    (all_probabilities, [probabilities]), axis=0)

            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(
                predictions.astype(np.uint8), img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
                probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(
                image_prediction,
                img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')

            # save results
            sitk.WriteImage(
                images_prediction[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'),
                True)
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

    time_total_test = timeit.default_timer() - start_time_total_test

    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Result dir: {}'.format(result_dir))
        print('Result dir: {}'.format(result_dir), file=summary_file)
        print('Training data size: {}'.format(train_data_size),
              file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train),
              file=summary_file)
        print('Total testing time: {:.1f}s'.format(time_total_test),
              file=summary_file)
        print('Voxel Filter Mask: {}'.format(
            putil.FeatureExtractor.VOXEL_MASK_FLT),
              file=summary_file)
        print('Normalize Features: {}'.format(NORMALIZE_FEATURES),
              file=summary_file)
        print('SGD', file=summary_file)
        #print(clf.best_params_, file=summary_file)
        stats = statistics.gather_statistics(
            os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)

    all_probabilities.astype(np.float16).dump(
        os.path.join(result_dir, 'all_probabilities.npy'))
Exemplo n.º 17
0
def main(_):
    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    pre_process_params = {
        'zscore_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE):
        cache_file_prefix = os.path.normpath(
            os.path.join(
                script_dir, './mia-cache/batch-' + str(batch_index) + '-' +
                str(TRAIN_BATCH_SIZE)))
        cache_file_train = cache_file_prefix + '-data_train.npy'
        cache_file_labels = cache_file_prefix + '-data_labels.npy'
        if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)):
            print('Using cache from ', cache_file_train)
            data_train = np.load(cache_file_train)
            labels_train = np.load(cache_file_labels)
        else:
            # slicing manages out of range; no need to worry
            batch_data = dict(data_items[batch_index:batch_index +
                                         TRAIN_BATCH_SIZE])
            # load images for training and pre-process
            images = putil.pre_process_batch(batch_data,
                                             pre_process_params,
                                             multi_process=True)

            # generate feature matrix and label vector
            data_train = np.concatenate(
                [img.feature_matrix[0] for img in images])
            labels_train = np.concatenate(
                [img.feature_matrix[1] for img in images])

    # Scatter matrix plot of the train data

    data = pd.DataFrame(data_train,
                        columns=[
                            'Feat. 1', 'Feat. 2', 'Feat. 3', 'Feat. 4',
                            'Feat. 5', 'Feat. 6', 'Feat. 7'
                        ])
    axes = pd.scatter_matrix(data, alpha=0.2, diagonal='hist')
    corr = data.corr().as_matrix()
    for i, j in zip(*plt.np.triu_indices_from(axes, k=1)):
        axes[i, j].annotate("%.2f" % corr[i, j], (0.99, 0.98),
                            size=23,
                            xycoords='axes fraction',
                            ha='right',
                            va='top')

    n = len(data.columns)
    for x in range(n):
        for y in range(n):
            # to get the axis of subplots
            ax = axes[x, y]
            # to make x axis name vertical
            ax.xaxis.label.set_rotation(0)
            ax.xaxis.label.set_size(17)
            ax.xaxis.set_label_coords(0.5, -0.3)
            # to make y axis name horizontal
            ax.yaxis.label.set_rotation(0)
            ax.yaxis.label.set_size(17)
            ax.yaxis.set_label_coords(-0.3, 0.5)
            # to make sure y axis names are outside the plot area
            ax.yaxis.labelpad = 50

    # plt.title('Scatter Plot Matrix', fontsize=17, y=7.1, x=-2.5)
    plt.show()
Exemplo n.º 18
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    Section of the original main routine. Executes gird search of the probabilistic keyhole filling method parameters:

        Must be done separately in advance:
        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images

        Is carried out in this section of the pipeline
        - Loading of temporary data
        - Grid search of PKF parameter of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    #warnings.warn('Random forest parameters not properly set.')
    forest = sk_ensemble.RandomForestClassifier(
        max_features=images[0].feature_matrix[0].shape[1],
        n_estimators=20,
        max_depth=85)

    start_time = timeit.default_timer()
    forest.fit(data_train, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator()

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        predictions = forest.predict(img.feature_matrix[0])
        probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint8), img.image_properties)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
            probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        images_probabilities.append(image_probabilities)

    # save results without post-processing
    name = 'no_PP'
    sub_dir = os.path.join(result_dir, name)
    os.makedirs(sub_dir, exist_ok=True)

    for i, img in enumerate(images_test):
        sitk.WriteImage(images_prediction[i],
                        os.path.join(sub_dir, images_test[i].id_ + '_SEG.mha'),
                        True)

    result_file = os.path.join(sub_dir, 'results.csv')
    writer.CSVWriter(result_file).write(evaluator.results)

    # report also mean and standard deviation among all subjects
    result_summary_file = os.path.join(sub_dir, 'results_summary.csv')
    functions = {'MEAN': np.mean, 'STD': np.std}
    writer.CSVStatisticsWriter(result_summary_file,
                               functions=functions).write(evaluator.results)

    # clear results such that the evaluator is ready for the next evaluation
    evaluator.clear()

    # define paramter for grid search
    post_process_param_list = []
    variance = np.arange(1, 2)
    preserve_background = np.asarray([False])

    #
    # # define paramter for grid search
    # post_process_param_list = []
    # variance = np.arange(0.5, 4.0, 0.5)
    # preserve_background = np.asarray([False, True])

    for bg in preserve_background:
        for var in variance:
            post_process_param_list.append({
                'simple_post': bool(True),
                'variance': float(var),
                'preserve_background': bool(bg)
            })

    # execute post processing with definde parameters
    for post_process_params in post_process_param_list:

        # create sub-directory for results
        name = 'PP-V-'+ str(post_process_params.get('variance')).replace('.','_') +\
               '-BG-' + str(post_process_params.get('preserve_background'))
        sub_dir = os.path.join(result_dir, name)
        os.makedirs(sub_dir, exist_ok=True)

        #write the used parameter into a text file and store it in the result folder
        completeName = os.path.join(sub_dir, "parameter.txt")
        file1 = open(completeName, "w+")
        json.dump(post_process_params, file1)
        file1.close()

        # post-process segmentation and evaluate with post-processing
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=False)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')
            # save results
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(sub_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

        # save all results in csv file
        result_file = os.path.join(sub_dir, 'results.csv')
        writer.CSVWriter(result_file).write(evaluator.results)

        print('\nSubject-wise results...')
        writer.ConsoleWriter().write(evaluator.results)

        # report also mean and standard deviation among all subjects
        result_summary_file = os.path.join(sub_dir, 'results_summary.csv')
        functions = {'MEAN': np.mean, 'STD': np.std}
        writer.CSVStatisticsWriter(
            result_summary_file, functions=functions).write(evaluator.results)
        print('\nAggregated statistic results...')
        writer.ConsoleStatisticsWriter(functions=functions).write(
            evaluator.results)

        # clear results such that the evaluator is ready for the next evaluation
        evaluator.clear()
Exemplo n.º 19
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())
    pre_process_params = {
        'skullstrip_pre': True,
        'normalization_pre': True,
        'registration_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    #warnings.warn('Random forest parameters not properly set.')
    forest = sk_ensemble.RandomForestClassifier(
        max_features=images[0].feature_matrix[0].shape[1],
        n_estimators=10,
        max_depth=10)

    start_time = timeit.default_timer()
    forest.fit(data_train, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator()

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_test_dir, LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        predictions = forest.predict(img.feature_matrix[0])
        probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint8), img.image_properties)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
            probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        images_probabilities.append(image_probabilities)

    # post-process segmentation and evaluate with post-processing
    post_process_params = {'simple_post': True}
    images_post_processed = putil.post_process_batch(images_test,
                                                     images_prediction,
                                                     images_probabilities,
                                                     post_process_params,
                                                     multi_process=False)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i],
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(
            images_prediction[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(
            images_post_processed[i],
            os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)

    # use two writers to report the results
    os.makedirs(
        result_dir,
        exist_ok=True)  # generate result directory, if it does not exists
    result_file = os.path.join(result_dir, 'results.csv')
    writer.CSVWriter(result_file).write(evaluator.results)

    print('\nSubject-wise results...')
    writer.ConsoleWriter().write(evaluator.results)

    # report also mean and standard deviation among all subjects
    result_summary_file = os.path.join(result_dir, 'results_summary.csv')
    functions = {'MEAN': np.mean, 'STD': np.std}
    writer.CSVStatisticsWriter(result_summary_file,
                               functions=functions).write(evaluator.results)
    print('\nAggregated statistic results...')
    writer.ConsoleStatisticsWriter(functions=functions).write(
        evaluator.results)

    # clear results such that the evaluator is ready for the next evaluation
    evaluator.clear()
Exemplo n.º 20
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction

    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {
        'zscore_pre': True,
        'registration_pre': False,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True,
        'second_oder_coordinate_feature': False,
        'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]
    }  #[0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]}

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data,
                                     pre_process_params,
                                     multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1]
                                   for img in images]).squeeze()

    # compute sum over all ground truth images
    Label = putil.LabelImageTypes.AMYGDALA
    img_summed = util.compute_label_dist(images, Label)
    # save results
    sitk.WriteImage(
        img_summed,
        os.path.join(result_dir, 'groundtruth_sum_' + Label.name + '.mha'),
        True)

    # store preprocessed images to file
    file_id = open('data_train_reduced2.pckl', 'wb')
    pickle.dump(data_train, file_id)
    file_id.close()
    file_id = open('labels_train_reduced.pckl', 'wb')
    pickle.dump(labels_train, file_id)
    file_id.close()
    print('-' * 5, 'Preprocessed images stored')

    # n, m = img_summed.shape
    # x = np.arange(0, n-1, 1)
    # y = np.arange(0, m-1, 1)
    # meshgrid = np.meshgrid(x,y, sparse=False)
    # plt.pyplot.contour(meshgrid, img_summed)
    #printing out how much labels of each group were taken by the mask
    util.print_class_count(labels_train)
Exemplo n.º 21
0
def main(FLAGS,trees,nodes):

    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    t='DF_trees_'+str(trees)+'_nodes_'+str(nodes)
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    pre_process_params = {'zscore_pre': True,
                          'coordinates_feature': True,
                          'intensity_feature': True,
                          'gradient_intensity_feature': True}

    # initialize decision forest parameters
    df_params = df.DecisionForestParameters()
    df_params.num_classes = 4
    df_params.num_trees = trees
    df_params.max_nodes = nodes
    df_params.model_dir = model_dir
    forest = None
    start_time_total_train = timeit.default_timer()

    for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE):



        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index: batch_index+TRAIN_BATCH_SIZE])
        # load images for training and pre-process
        images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True)
        print('pre-processing done')

        # generate feature matrix and label vector
        data_train = np.concatenate([img.feature_matrix[0] for img in images])
        labels_train = np.concatenate([img.feature_matrix[1] for img in images])

        if forest is None:
            df_params.num_features = data_train.shape[1]
            print(df_params)
            forest = df.DecisionForest(df_params)

        start_time = timeit.default_timer()
        forest.train(data_train, labels_train)
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    time_total_train = timeit.default_timer() - start_time_total_train
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index: batch_index + TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            probabilities, predictions = forest.predict(img.feature_matrix[0])
            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
                                                                            img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
                                                         post_process_params, multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
                               img.id_ + '-PP')

            # save results
            sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
            sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)


    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Training data size: {}'.format(len(data_items)), file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file)
        print('Decision forest', file=summary_file)
        print(df_params, file=summary_file)
        stats = statistics.gather_statistics(os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)
Exemplo n.º 22
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, parameters_file: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """
    start_main = timeit.default_timer()
    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_train_dir,
                                          LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    fof_parameters = {'10Percentile': True,
                      '90Percentile': True,
                      'Energy': True,
                      'Entropy': True,
                      'InterquartileRange': True,
                      'Kurtosis': True,
                      'Maximum': True,
                      'MeanAbsoluteDeviation': True,
                      'Mean': True,
                      'Median': True,
                      'Minimum': True,
                      'Range': True,
                      'RobustMeanAbsoluteDeviation': True,
                      'RootMeanSquared': True,
                      'Skewness': True,
                      'TotalEnergy': True,
                      'Uniformity': True,
                      'Variance': True}

    glcm_parameters = {'Autocorrelation': True,
                       'ClusterProminence': True,
                       'ClusterShade': True,
                       'ClusterTendency': True,
                       'Contrast': True,
                       'Correlation': True,
                       'DifferenceAverage': True,
                       'DifferenceEntropy': True,
                       'DifferenceVariance': True,
                       'Id': True,
                       'Idm': True,
                       'Idmn': True,
                       'Idn': True,
                       'Imc1': True,
                       'Imc2': True,
                       'InverseVariance': True,
                       'JointAverage': True,
                       'JointEnergy': True,
                       'JointEntropy': True,
                       'MCC': True,
                       'MaximumProbability': True,
                       'SumAverage': True,
                       'SumEntropy': True,
                       'SumSquares': True}

    pre_process_params = {'skullstrip_pre': True,
                          'normalization_pre': True,
                          'registration_pre': True,
                          'save_features': False,
                          'coordinates_feature': True,
                          'intensity_feature': False,
                          'gradient_intensity_feature': False,
                          'first_order_feature': False,
                          'first_order_feature_parameters': fof_parameters,
                          'HOG_feature': False,
                          'GLCM_features': False,
                          'GLCM_features_parameters': glcm_parameters,
                          'n_estimators': 50,
                          'max_depth': 60,
                          'experiment_name': 'default'
                          }

    parameters = json.load(open(parameters_file, 'r'))
    if bool(parameters):
        pre_process_params = parameters

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze()
    np.nan_to_num(data_train, copy=False)

    # warnings.warn('Random forest parameters not properly set.')
    forest = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1],
                                                n_estimators=pre_process_params['n_estimators'],  # 100
                                                max_depth=pre_process_params['max_depth'])  # 10

    # Debugging

    nan_data_idx = np.argwhere(np.isnan(data_train))
    np.savez('data_train.npz', data_train)
    np.save('data_nan.npy', nan_data_idx)

    start_time = timeit.default_timer()
    forest.fit(data_train, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    result_dir = os.path.join(result_dir, pre_process_params['experiment_name'])
    os.makedirs(result_dir, exist_ok=True)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator()

    # crawl the training image directories
    crawler = futil.FileSystemDataCrawler(data_test_dir,
                                          LOADING_KEYS,
                                          futil.BrainImageFilePathGenerator(),
                                          futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        predictions = forest.predict(np.nan_to_num(img.feature_matrix[0],copy=False))
        probabilities = forest.predict_proba(np.nan_to_num(img.feature_matrix[0],copy=False))
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
                                                                        img.image_properties)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)

        images_prediction.append(image_prediction)
        images_probabilities.append(image_probabilities)

    # post-process segmentation and evaluate with post-processing
    post_process_params = {'simple_post': True}
    images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
                                                     post_process_params, multi_process=False)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)

    # use two writers to report the results
    os.makedirs(result_dir, exist_ok=True)  # generate result directory, if it does not exists
    result_file = os.path.join(result_dir, 'results.csv')
    writer.CSVWriter(result_file).write(evaluator.results)

    print('\nSubject-wise results...')
    writer.ConsoleWriter().write(evaluator.results)

    # report also mean and standard deviation among all subjects
    result_summary_file = os.path.join(result_dir, 'results_summary.csv')
    functions = {'MEAN': np.mean, 'STD': np.std}
    writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results)
    print('\nAggregated statistic results...')
    writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results)

    # clear results such that the evaluator is ready for the next evaluation
    evaluator.clear()
    end_main = timeit.default_timer()
    main_time = end_main - start_main

    # writing information on a txt file
    reporter.feature_writer(result_dir, pre_process_params, main_time, 'feature_report')