Exemplo n.º 1
0
    def fit(self,
            n=None,
            metric='l2',
            normalization='None',
            epoch=None,
            label=None):
        self.n = n

        if self.multi_epcch:
            assert (epoch is not None)
            assert (self.epochs is not None)
            epoch_idx = np.argwhere(epoch == self.epochs)[0][0]
            embedding = self.embedding[epoch_idx]
        else:
            embedding = self.embedding

        if label is not None:
            embedding = embedding[self.labels == label]

        if self.n is None:
            self.n = embedding.shape[0] - 1

        nbrs = NearestNeighbors(n_neighbors=(self.n + 1),
                                algorithm='auto',
                                metric=metric).fit(
                                    rating_normalize(embedding, normalization))
        distances, indices = nbrs.kneighbors(
            rating_normalize(embedding, normalization))
        self.indices = indices[:, 1:]
        self.distances = distances[:, 1:]
Exemplo n.º 2
0
    def evaluate_rating_space(self, norm='none', ignore_labels=False):
        if np.concatenate(self.labels).ndim == 1 or ignore_labels:
            print('calc_from_meta')
            self.rating = [rating_normalize(calc_rating(meta, method='raw'), method=norm) for meta in self.meta_data]

        else:
            print('calc_from_labels')
            self.rating = [rating_normalize(lbl, method=norm) for lbl in self.labels]
        self.rating_distance_matrix = None  # reset after recalculating the ratings
Exemplo n.º 3
0
    def fit(self, n=None, metric='l1', normalization='None'):
        if n is None:
            self.n = self.embedding.shape[0] - 1
        else:
            self.n = n

        nbrs = NearestNeighbors(n_neighbors=(self.n + 1),
                                algorithm='auto',
                                metric=metric).fit(
                                    rating_normalize(self.embedding,
                                                     normalization))
        distances, indices = nbrs.kneighbors(
            rating_normalize(self.embedding, normalization))
        self.indices = indices[:, 1:]
        self.distances = distances[:, 1:]
def prepare_data(data,
                 rating_format='raw',
                 reshuffle=False,
                 verbose=0,
                 scaling="none"):
    # Entry:
    # 0 'patch'
    # 1 'mask'
    # 2 'class'
    # 3 'info'
    # 4 'size
    # 5 'rating'
    # 6 'rating_weights'
    # 7 'z'

    N = len(data)
    old_size = data[0][0].shape

    # ============================
    #   data: images and masks
    # ============================
    if data[0][0].ndim == 2:
        images = [np.expand_dims(entry[0], axis=-1) for entry in data]
        masks = [np.expand_dims(entry[1], axis=-1) for entry in data]
    else:
        images = [np.array(entry[0]) for entry in data]
        masks = [np.array(entry[1]) for entry in data]

    if verbose:
        print('prepare_data:')
        print("\tImage size changed from {} to {}".format(
            old_size, images[0].shape))
        print("\tImage Range = [{:.1f}, {:.1f}]".format(
            np.max(images[0]), np.min(images[0])))
        print("\tMasks Range = [{}, {}]".format(np.max(masks[0]),
                                                np.min(masks[0])))

    # ============================
    #   labels: classes and ratings
    # ============================

    classes = np.array([entry[2] for entry in data]).reshape(N, 1)

    rating_weights = None
    if rating_format == 'raw':
        ratings = np.array(
            [rating_normalize(entry[5], scaling) for entry in data])
        rating_weights = np.array([entry[6] for entry in data])
    elif rating_format == 'mean':
        ratings = np.array([
            rating_normalize(np.mean(entry[5], axis=0), scaling)
            for entry in data
        ]).reshape(N, 9)
    elif rating_format == 'w_mean':
        w_mean = lambda R, W: np.sum(np.diag(W).dot(R) / np.sum(W), axis=0)
        ratings = np.array([
            rating_normalize(w_mean(entry[5], entry[6]), scaling)
            for entry in data
        ]).reshape(N, 9)
    else:
        print("ERR: Illegual rating_format given ({})".format(rating_format))
        assert (False)

    if verbose:
        print("benign:{}, malignant: {}, unknown: {}".format(
            np.count_nonzero(classes == 0), np.count_nonzero(classes == 1),
            np.count_nonzero(classes == 2)))

    # ============================
    #   meta: meta, nodule-size, slice confidence and z-value
    # ============================

    # for nodule-size use the rescaled mask area
    #
    # nodule_size = np.array([entry[4] for entry in data]).reshape(N, 1)
    # sorted_size = np.sort(nodule_size, axis=0).flatten()
    # L = len(sorted_size)
    # tresh = sorted_size[range(0, L, L//5)]
    nodule_size = np.array([np.count_nonzero(q)
                            for q in masks]).reshape(N, 1) * 0.5 * 0.5
    tresh = [0, 15, 30, 60, 120]
    nodule_size = np.digitize(nodule_size, tresh)

    z = np.array([entry[7] for entry in data]).reshape(N, 1)

    # confidence
    # only relevant for full dataset and should first be reconsidered
    # conf = np.array([np.min(entry[6]) for entry in data])
    # mean rating based objective
    conf = 1 - .5 * np.array([
        rating_normalize(np.std(entry[5], axis=0).mean(), scaling)
        for entry in data
    ])

    meta = [entry[3] for entry in data]

    if reshuffle:
        new_order = np.random.permutation(N)
        # print('permutation: {}'.format(new_order[:20]))
        images = reorder(images, new_order)
        masks = reorder(masks, new_order)
        classes = classes[new_order]
        ratings = ratings[new_order]
        rating_weights = rating_weights[
            new_order] if rating_weights is not None else None
        meta = reorder(meta, new_order)
        nodule_size = nodule_size[new_order]
        z = z[new_order]
        conf = conf[new_order]

    return images, ratings, classes, masks, meta, conf, nodule_size, rating_weights, z

filename = 'LIDC/NodulePatches144-0.5-IByMalignancy.p'
M, B, U = pickle.load(open(filename, 'br'))

raw_dataset = load_nodule_raw_dataset(size=144, res=0.5, sample='Normal')[0]
dataset = [(crop_center(entry['patch'] * (1.0 + 0.0 * entry['mask']),
                        entry['mask'], 128)[0], np.mean(entry['rating'],
                                                        axis=0),
            entry['label'], entry['info'], entry['size'])
           for entry in raw_dataset]
#dataset += [(normalize(entry['patch'], mean_, std_, [-1000, 400])*(1.0+0.0*entry['mask']), np.mean(entry['rating'], axis=0), -1, entry['info'], entry['size']) for entry in U[:len(U)//5]]

images = [scale(entry[0]) for entry in dataset]
rating = [entry[1] for entry in dataset]
n_rating = [rating_normalize(entry[1], 'Norm') for entry in dataset]
malig_map = [entry[2] for entry in dataset]
meta_data = [entry[3] for entry in dataset]
size_arr = np.array([entry[-1] for entry in dataset])

print(np.min(n_rating, axis=0))
print(np.max(n_rating, axis=0))

# 1) Select References
# ======================

size_map = {}
for mal in np.unique(malig_map):
    size_map[mal] = split_by_size(size_arr[np.array(malig_map) == mal])
    for s in np.unique(size_map[mal]):
        cnt = np.count_nonzero((size_map[mal] == s).astype('uint'))
Exemplo n.º 6
0
 def evaluate_rating_space(self, norm='none'):
     self.rating = [
         rating_normalize(calc_rating(meta, method='mean'), method=norm)
         for meta in self.meta_data
     ]
     self.rating_distance_matrix = None  # reset after recalculating the ratings
Exemplo n.º 7
0
    idx_symmetry = np.zeros(len(metrics))
    idx_symmetry_std = np.zeros(len(metrics))
    idx_concentration = np.zeros(len(metrics))
    idx_concentration_std = np.zeros(len(metrics))
    idx_contrast = np.zeros(len(metrics))
    idx_contrast_std = np.zeros(len(metrics))
    idx_kummar = np.zeros(len(metrics))

    rating = []
    for entry in data:
        rating.append(np.mean(entry['rating'], axis=0))
    rating = np.vstack(rating)

    plt.figure()
    plt.subplot(211)
    plt.hist(calc_distance_matrix(rating_normalize(rating, 'Scale'),
                                  method='l2').flatten(),
             bins=500)
    plt.title('L2')
    plt.ylabel('Scaled')
    plt.subplot(212)
    plt.hist(calc_distance_matrix(rating_normalize(rating, 'Norm'),
                                  method='l2').flatten(),
             bins=500)
    #plt.title('l2-norm')
    plt.ylabel('Normalized')

    for metric, m, in zip(metrics, range(len(metrics))):
        plt.figure()
        norm = 'None'
        if len(metric) > 5 and metric[-4:] == 'Norm':
Exemplo n.º 8
0
#metrics =   [ 'euclidean'    # L2
#            , 'chebyshev'    # L-inf
#            , 'cosine'
#            , 'hamming'
#            , 'manhaten'
#            ]

#metrics =   ['cityblock', 'euclidean', 'seuclidean', 'minkowski3', 'chebyshev', 'cosine', 'correlation', 'hamming', 'mahalanobis', 'braycurtis', 'canberra', 'jaccard', 'emd']
metrics = ['cityblock', 'euclidean', 'chebyshev', 'cosine', 'canberra']
normalization = 'Scale'  # 'None', 'Scale', 'Normal'

dataset = pickle.load(open('LIDC/NodulePatches128-0.5.p', 'br'))
print("Loaded {} entries".format(len(dataset)))

Ratings = np.concatenate([
    rating_normalize(entry['rating'], method=normalization)
    for entry in dataset
])
print("Ratings speard over {} annotations".format(Ratings.shape))

for metric in metrics[:]:
    t1, t2 = test_rules(metric)
    print("Metric: {} -\t{}\t{}".format(metric, t1, t2))

for metric in metrics[:]:

    # intra
    # =======
    intra = []
    for entry in dataset: