def run_postprocessing(self):
        print('Post-processing of segmented images...')
        print('Clipping images... ')

        filepaths = util.get_nifti_filepaths('predictions/')
        filenames = util.get_sub_dirs(self.input_dir)

        for i in tqdm(range(len(filepaths))):
            self.remove_small_objects(filepaths[i], filenames[i],
                                      self.input_dir)

        # Delete folder created by miscnn
        shutil.rmtree('predictions/')
Exemplo n.º 2
0
    def run(self):
        # Get filenames
        filenames = util.get_paths_from_tree(self.input_dir, 'segmentation')
        print('\n\nRunning clustering on samples...\n\n')

        # Loop over images
        for i, filename in enumerate(tqdm(filenames)):
            # Print output

            img = nib.load(filename)
            img_data = img.get_data()

            hd = img.header

            M = img.affine[:3, :3]
            iM = np.linalg.inv(M)

            abc = img.affine[:3, 3]

            # Get coordinate data of image
            print('\n\nGet coordinate matrix from segmentation.. ')
            data = get_coordinates_from_segmentation(img_data, M, iM, abc)

            # Shuffle the data
            #np.random.shuffle(data)

            # Set random seed for reproducibility
            random.seed(0)

            # Initialise centroids
            centroids = data[random.sample(range(data.shape[0]),
                                           self.num_clusters)]

            # Create a list to store which centroid is assigned to each dataset
            assigned_centroids = np.zeros(len(data), dtype=np.int32)

            # Number of dimensions in centroid
            num_centroid_dims = data.shape[1]

            # List to store SSE for each iteration
            sse_list = []

            # Start clustering time
            tic = time.time()

            # Main Loop
            print('\nStart clustering image...')
            for n in tqdm(range(self.num_iters), leave=False):
                # Get closest centroids to each data point
                assigned_centroids = get_closest_centroid(
                    data[:, None, :], centroids[None, :, :])

                # Compute new centroids
                for c in range(centroids.shape[1]):
                    # Get data points belonging to each cluster
                    cluster_members = data[assigned_centroids == c]

                    # Compute the mean of the clusters
                    cluster_members = cluster_members.mean(axis=0)

                    # Update the centroids
                    centroids[c] = cluster_members

                # Compute SSE
                sse = compute_sse(data.squeeze(), centroids.squeeze(),
                                  assigned_centroids)
                sse_list.append(sse)
            print('\tFinished clustering image.')

            # End clustering time
            toc = time.time()

            # Print output
            print("Image clustering-time: " + str(round(toc - tic, 2)))
            print("Image clustering-time per iteration: " +
                  str(round(toc - tic, 2) / self.num_iters))

            print('\nSaving clustered image to nifti label-map... ')
            # Start saving time
            tic = time.time()

            # Save clustering to nifti label-map
            label_map = np.zeros_like(img_data)

            # Join centeroids and coordinates to dict
            merged = defaultdict(list)
            for a, b in zip(assigned_centroids, data):
                merged[a].append(b)

            for key, cords in merged.items():
                cluster = key + 1
                for cord in cords:
                    ijk = invf(cord[0], cord[1], cord[2], iM, abc)
                    label_map[int(ijk[0]), int(ijk[1]), int(ijk[2])] = cluster

            # End clustering time
            toc = time.time()

            # Print output
            print("Time spent saving image to labelmap: " +
                  str(round(toc - tic, 2)))
            """
            # Start saving time
            tic = time.time()

            # Make dict for cent
            label = 1

            print('\nSaving clustered image to nifti label-map... \n')
            for c in tqdm(range(len(centroids)), leave=False):
                cluster_members = [data[i] for i in range(len(data)) if assigned_centroids[i] == c]    
                cluster_members = np.array(cluster_members)
                
                for cord in cluster_members:
                    ijk = invf(cord[0], cord[1], cord[2], iM, abc)
                    label_map[int(ijk[0]),int(ijk[1]),int(ijk[2])] = label
                
                label+=1
            
            # End clustering time
            toc = time.time()

            # Print output
            print("Time spent saving image to labelmap: " + str(round(toc - tic, 2)))
            """

            folder_name = util.get_sub_dirs(self.input_dir)[i]
            self.save_clustered_image(img, label_map, hd, folder_name)

            print('File saved.')
Exemplo n.º 3
0
def evaluate(model,
             config,
             experiment,
             validation_directory,
             file_identifier=''):

    missclassified = {}

    # get number of classes in model
    number_of_classes = config['dataset']['number_of_classes']

    # image dimensions
    image_width = config['image_processing']['image_width']
    image_height = config['image_processing']['image_height']
    image_channels = config['image_processing']['image_channels']

    # get class directory names from validation directory
    class_names = get_sub_dirs(validation_directory)
    class_names.sort()

    # get keras labels in label-index format
    label_index = {
        class_name: index
        for index, class_name in enumerate(class_names)
    }
    index_label = {
        index: class_name
        for index, class_name in enumerate(class_names)
    }

    # prepare confusion table
    confusion = np.zeros((number_of_classes, number_of_classes))

    # iterate over each class name
    for class_name in class_names:
        print(f'Starting {class_name}')

        # set path to class directory
        class_dir = os.path.join(validation_directory, class_name)

        # iterate over each image in class directory
        for file_name in os.listdir(class_dir):

            # models class prediction for image
            prediction = None

            # process image before passing it through the network
            image = imread(os.path.join(class_dir, file_name), mode='RGB')
            image = imresize(image,
                             (image_width, image_height, image_channels))
            image = image.reshape(1, image_width, image_height, image_channels)
            image = np.true_divide(image, 255.)

            with tf.get_default_graph().as_default():
                predictions = model.predict(image)[0]
                prediction = np.argmax(predictions)

            # check prediction against ground truth, i.e, if it equals the class directory name
            if (prediction != label_index[class_name]):

                # initialize empty list of fist missclassified of class
                if class_name not in missclassified:
                    missclassified[class_name] = {}

                missclassified[class_name][file_name] = {
                    'prediction': index_label[prediction],
                    'predictions': {
                        index_label[class_index]: pred
                        for class_index, pred in enumerate(predictions)
                    }
                }

            # update confusion table
            confusion[prediction][label_index[class_name]] += 1

    # calculate FP, FN, TP and TN based on confusion table
    FP = confusion.sum(axis=0) - np.diag(confusion)
    FN = confusion.sum(axis=1) - np.diag(confusion)
    TP = np.diag(confusion)
    TN = confusion.sum() - (FP + FN + TP)

    print(f"True Positives: { TP }")
    print(f"True Negatives: { TN }")
    print(f"False Positives: { FP }")
    print(f"False Positives: { FN }")

    # calculate metrics based on FP, FN, TP and TN
    f1 = np.nan_to_num(f1score(TP, TN, FP, FN))
    rec = np.nan_to_num(recall(TP, TN, FP, FN))
    acc = np.nan_to_num(accuracy(TP, TN, FP, FN))
    prec = np.nan_to_num(precision(TP, TN, FP, FN))
    spec = np.nan_to_num(specificity(TP, TN, FP, FN))
    mcc = np.nan_to_num(matthews_correlation_coefficient(TP, TN, FP, FN))

    # bundle metrics into dictionary
    metrics = {
        'FP': FP,
        'FN': FN,
        'TP': TP,
        'TN': TN,
        'f1': f1,
        'rec': rec,
        'acc': acc,
        'prec': prec,
        'spec': spec,
        'mcc': mcc
    }

    # save missclassified images to file together with class
    for class_name in missclassified:
        log_misclassifications(
            f'{file_identifier}_class_misclassifications.txt',
            missclassified[class_name], class_name, index_label)

    # write kvasir legend to results file
    log_class_legend(f'{file_identifier}_split_evaluation_summary.txt',
                     class_names)

    # write confusion table to results file
    log_confusion_table(f'{file_identifier}_split_evaluation_summary.txt',
                        confusion)

    # write model summary to results file
    log_model_results(f'{file_identifier}_split_evaluation_summary.txt',
                      metrics, file_identifier)

    # write summaries for each class
    for class_name in class_names:

        # class index
        class_index = label_index[class_name]
        class_metrics = {
            key: value[class_index]
            for key, value in metrics.items()
        }

        # write class summary to results file
        log_class_results(f'{file_identifier}_class_results.txt',
                          class_metrics, class_name, class_index)

    evaluation_path = config['evaluation']['path']

    print("starting test validation...")

    for file_name in os.listdir(evaluation_path):

        prediction = None
        prediction_time = None

        image = imread(os.path.join(evaluation_path, file_name), mode='RGB')
        image = imresize(image, (image_width, image_height, image_channels))
        image = image.reshape(1, image_width, image_height, image_channels)
        image = np.true_divide(image, 255.)

        with tf.get_default_graph().as_default():
            start_time = time.time()
            prediction = model.predict(image)[0]
            prediction_time = time.time() - start_time

        prediction_index = np.argmax(prediction)
        prediction_label = index_label[prediction_index]

        log_file_evaluation(f'{file_identifier}_test_evaluation_results.txt',
                            file_name, prediction_label,
                            prediction[prediction_index], prediction_time)

    # add evaluation files to experiment
    experiment.add_artifact(
        f'../tmp/{file_identifier}_split_evaluation_summary.txt')
    experiment.add_artifact(
        f'../tmp/{file_identifier}_class_misclassifications.txt')
    experiment.add_artifact(f'../tmp/{file_identifier}_class_results.txt')
    experiment.add_artifact(
        f'../tmp/{file_identifier}_test_evaluation_results.txt')

    # return evaluation metrics
    return {
        'f1': np.mean(f1),
        'rec': np.mean(rec),
        'acc': np.mean(acc),
        'prec': np.mean(prec),
        'spec': np.mean(spec),
        'mcc': np.mean(mcc)
    }
    def run(self, batch_size):
        # Get filenames
        img_filenames = util.get_paths_from_tree(self.input_dir, 'imaging')
        clus_filenames = util.get_paths_from_tree(self.input_dir, 'cluster')
        folder_names = util.get_sub_dirs(self.input_dir)

        print('\n\nRunning feature extraction on samples.. \n\n')
        for i in tqdm(range(len(img_filenames))):
            # Create out dir for patches
            save_dir = util.create_fe_patch_dir(self.patch_size)

            # Append filename to new list to match input of create 2_patches
            img = [img_filenames[i]]
            clus = [clus_filenames[i]]

            # Extract 3d patches from img
            img_ids, label_ids = extract_patches.patch_sampler(
                img_filenames=img,
                labelmap_filenames=clus,
                patch_size=self.patch_size,
                out_dir=save_dir,
                sampler_type='grid',
                voxel_spacing=self.resample,
                patch_overlap=self.patch_overlap,
                save_patches=True,
                inference=True)
            # Save info
            info = {
                'patch_size': self.patch_size,
                'cluster_selection': self.cluster_selection,
                'num_clusters': self.num_clusters,
                'patch_overlap': self.patch_overlap
            }
            util.save_dict(info, self.out_dir, 'info.csv')

            # Add partition to dict and save
            partition = dict()
            partition['image'] = img_ids
            partition['cluster'] = label_ids

            # Create generators
            image_generator = DataGenerator(partition['image'],
                                            data_dir=save_dir,
                                            shuffle=False,
                                            batch_size=batch_size)
            cluster_generator = DataGenerator(partition['cluster'],
                                              data_dir=save_dir,
                                              shuffle=False,
                                              batch_size=batch_size)

            # Load all data from generator
            image_data = [x[0] for x in image_generator]
            cluster_data = [x[0] for x in cluster_generator]
            image_data = np.array(image_data)
            cluster_data = np.array(cluster_data)
            if self.patch_size[0] == 1:
                image_data = image_data.reshape(
                    image_data.shape[0] * image_data.shape[1],
                    image_data.shape[2], image_data.shape[3], 1)
                cluster_data = cluster_data.reshape(
                    cluster_data.shape[0] * cluster_data.shape[1],
                    cluster_data.shape[2], cluster_data.shape[3], 1)
            else:
                image_data = image_data.reshape(
                    image_data.shape[0] * image_data.shape[1],
                    image_data.shape[2], image_data.shape[3],
                    image_data.shape[4], 1)
                cluster_data = cluster_data.reshape(
                    cluster_data.shape[0] * cluster_data.shape[1],
                    cluster_data.shape[2], cluster_data.shape[3],
                    cluster_data.shape[4], 1)
            print(image_data.shape)
            # Make predictions
            pred = self.encoder.predict(image_data,
                                        verbose=1,
                                        batch_size=batch_size)

            # Calculate center index
            x_center = 0
            y_center = 0
            z_center = 0

            if self.patch_size[0] != 1:
                x_center = int(self.patch_size[0] / 2 - 1)
                y_center = int(self.patch_size[1] / 2 - 1)
                z_center = int(self.patch_size[2] / 2 - 1)
            else:
                x_center = int(self.patch_size[1] / 2 - 1)
                y_center = int(self.patch_size[2] / 2 - 1)

            # Start feature extraction time
            tic = time.time()
            feature = dict().fromkeys(range(1, self.num_clusters + 1), 0)
            print('\nStart extracting features from image...')
            for j in range(len(image_data)):
                # Find matching cluster based on center voxel
                if self.cluster_selection == 'center':
                    cluster = int(cluster_data[j, x_center, y_center,
                                               z_center])

                # Find the matching cluster based on highest share
                elif self.cluster_selection == 'highest_share':
                    cluster_patch = cluster_data[j].astype(int)
                    cluster = np.argmax(np.bincount(cluster_patch.flat))

                # Calculate and add max stds for each cluster
                if cluster != 0:
                    std = np.std(pred[j])
                    max_std = feature.get(cluster)
                    if std > max_std:
                        feature[cluster] = std

            # End clustering time
            toc = time.time()
            print("Image feature-extraction-time: " + str(round(toc - tic, 2)))

            # Save features to disk
            util.save_features(list(feature.values()), folder_names[i],
                               self.out_dir)

        # Delete last img patches
        shutil.rmtree(save_dir)