Пример #1
0
    def config_filenames(self, net_type, use_core, keep_spatial_dim=False):
        # init file managers
        Weights = File.Weights(net_type, output_dir=input_dir)
        if use_core:
            if keep_spatial_dim:
                Embed = File.Embed('SP_' + net_type, output_dir=output_dir)
            else:
                Embed = File.Embed(net_type, output_dir=output_dir)
        else:
            if net_type == 'dir':
                Embed = File.Pred(type='malig', pre='dir', output_dir=output_dir)
            elif net_type == 'dirR':
                Embed = File.Pred(type='rating', pre='dirR', output_dir=output_dir)
            elif net_type == 'dirS':
                Embed = File.Pred(type='size', pre='dirS', output_dir=output_dir)
            elif net_type == 'dirRS':
                # assert False # save rating and size in seperate files
                Embed = {}
                Embed['R'] = File.Pred(type='rating', pre='dirRS', output_dir=output_dir)
                Embed['S'] = File.Pred(type='size', pre='dirRS', output_dir=output_dir)
            else:
                print('{} not recognized'.format(net_type))
                assert False

        return Weights, Embed
Пример #2
0
def embed_correlate(network_type, run, post, epochs, rating_norm='none'):
    pear_corr = []
    kend_corr = []
    for e in epochs:
        # pred, labels_test, meta = pickle.load(open(loader.pred_filename(run, epoch=e, post=post), 'br'))
        file = FileManager.Embed(network_type)
        Reg = RatingCorrelator(file.name(run=run, epoch=e, dset=post))

        Reg.evaluate_embed_distance_matrix(method='euclidean',
                                           round=(rating_norm == 'Round'))

        Reg.evaluate_rating_space(norm=rating_norm)
        Reg.evaluate_rating_distance_matrix(method='euclidean')

        Reg.linear_regression()
        # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False)
        p, s, k = Reg.correlate_retrieval('embed', 'rating')
        pear_corr.append(p)
        kend_corr.append(k)

    epochs = np.array(epochs)
    pear_corr = np.array(pear_corr)
    kend_corr = np.array(kend_corr)

    plt.figure()
    plt.plot(epochs, pear_corr)
    plt.plot(epochs, kend_corr)
    plt.grid(which='major', axis='y')
    plt.title('embed_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(['pearson', 'kendall'])
Пример #3
0
def dir_rating_accuracy(run, post, net_type, epochs, n_groups=5):
    #images, predict, meta_data, labels, masks = pred_loader.load(run, epochs[-1], post)
    rating_property = [
        'Subtlety', 'Internalstructure', 'Calcification', 'Sphericity',
        'Margin', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy'
    ]
    PredFile = FileManager.Pred(type='rating', pre=net_type)
    acc = np.zeros([len(epochs), n_groups, len(rating_property)])
    for c, run_config in enumerate(
        [run + 'c{}'.format(config) for config in range(n_groups)]):
        predict, valid_epochs, images, meta_data, classes, labels, masks = PredFile.load(
            run=run_config, dset=post)
        labels = np.array([np.mean(t, axis=0) for t in labels])
        for i, e in enumerate(epochs):
            try:
                idx = int(np.argwhere(valid_epochs == e))
            except:
                print('skip epoch {}'.format(e))
                continue
            for ridx, r in enumerate(rating_property):
                acc[i, c, ridx] = accuracy(labels[:, ridx], predict[idx, :,
                                                                    ridx])
    acc = np.mean(acc, axis=1)
    plt.figure()
    plt.title('Rating Acc')
    plt.plot(epochs, acc)
    plt.legend(rating_property)

    return acc
Пример #4
0
def dir_rating_view(run, post, epochs, net_type='dirR', factor=1.0):
    # load
    #images, predict, meta_data, labels, masks = pred_loader.load(run, epochs[-1], post)
    PredFile = FileManager.Pred(type='rating', pre=net_type)

    predict, epochs, meta_data, images, classes, labels, masks, _, _, _ = PredFile.load(
        run=run + 'c0', dset=post)
    # prepare
    images = np.squeeze(images)
    labels = np.array([np.mean(l, axis=0) for l in labels])
    labels = np.round(factor * labels).astype('int')
    predict = np.round(factor * predict[-1]).astype('int')
    #plot
    select = [5, 23, 27, 51]
    plt.figure('view_' + run + '_' + post)
    for pid, i in enumerate(select):
        plt.subplot(2, 2, pid + 1)
        plt.imshow(images[i])
        plt.title(
            np.array2string(labels[i], prefix='L') + '\n' +
            np.array2string(predict[i], prefix='P'))
        plt.xticks([])
        plt.yticks([])
        if pid >= 0:
            dl = l2(labels[i], labels[select[0]])
            dp = l2(predict[i], predict[select[0]])
            plt.ylabel("{:.1f}\n{:.1f}".format(dl, dp))
Пример #5
0
    def __init__(self, network = 'dir', pooling='max', categorize=False):
        self.network = network
        self.Weights = FileManager.Weights(network)
        self.Embed = FileManager.Embed(network)

        self.data_size = 144
        self.data_res = '0.5I'  # 'Legacy'
        self.data_sample = 'Normal'

        self.net_in_size = 128
        self.net_input_shape = (self.net_in_size, self.net_in_size, 1)
        self.net_out_size = 128
        self.net_normalize = True
        self.net_pool = pooling
        self.categorize = categorize

        self.model = None
Пример #6
0
def eval_classification(run,
                        net_type,
                        metric,
                        epochs,
                        dset,
                        NN=[7, 11, 17],
                        cross_validation=False,
                        n_groups=5):
    Embed = FileManager.Embed(net_type)
    Pred_L1O = [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    if cross_validation:
        # Load
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
        ]
        Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset)
        for i, source in enumerate(embed_source):
            Ret.load_embedding([source], multi_epcch=True)
            for E in epochs:
                # Calc
                pred_l1o = []
                try:
                    for N in NN:
                        pred_l1o.append(
                            Ret.classify_kfold(epoch=E,
                                               n=N,
                                               k_fold=10,
                                               metric=metric))
                    Pred_L1O[i].append(np.array(pred_l1o))
                    valid_epochs[i].append(E)
                except:
                    print("Epoch {} - no calculated embedding".format(E))
            Pred_L1O[i] = np.array(Pred_L1O[i])
            valid_epochs[i] = np.array(valid_epochs[i])

        combined_epochs = merge_epochs(valid_epochs,
                                       min_element=max(n_groups - 1, 1))
        P, P_std = mean_cross_validated_index_with_std(Pred_L1O, valid_epochs,
                                                       combined_epochs)

    else:
        for E in epochs:
            # Load
            embed_source = Embed(run, E, dset)
            Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset)
            Ret.load_embedding(embed_source)
            # Calc
            pred_l1o = []
            for N in NN:
                pred_l1o.append(Ret.classify_leave1out(n=N, metric=metric)[1])
            Pred_L1O.append(np.array(pred_l1o))
            P, P_std = np.mean(Pred_L1O, axis=-1), np.std(Pred_L1O, axis=-1)

    return P, P_std, combined_epochs
Пример #7
0
    wRuns = [
        '813c0'
    ]  # ['512cc0', '251c0']  #['064X', '078X', '026'] #['064X', '071' (is actually 071X), '078X', '081', '082']
    wRunsNet = ['dirR']  # ['dirRS', 'dirR']  #, 'dir']
    run_metrics = ['l2']

    select = 0

    rating_normalizaion = 'None'  # 'None', 'Normal', 'Scale'

    doRatingRet = True
    doPCA = False

    if doRatingRet:
        Embed = FileManager.Embed(wRunsNet[select])
        #N = 5
        #testData, validData, trainData = load_nodule_raw_dataset(size=160, res=0.5, sample='Normal')
        ##if dset is 'Train':  data = trainData
        #if dset is 'Test':   data = testData
        #if dset is 'Valid':  data = validData

        #Ret = Retriever(title='Ratings', dset=set)
        #Ret.load_rating(data)
        #et.fit(N)

        #info, nod_ids = Ret.show_ret(15)
        #info, nod_ids = Ret.show_ret(135)
        #info, nod_ids = Ret.show_ret(135)
        #anns = getAnnotation(info, nodule_ids=nod_ids, return_all=True)
        #pickle.dump(anns, open('tmp.p', 'bw'))
in_size = 128
out_size = 128
normalize = True

load = False
evaluate = False
force = False

# 0     Test
# 1     Validation
# 2     Training
DataSubSet = 2

run = '000'
epoch = 5
WeightsFile = FileManager.Weights('siamR').name(run, epoch=epoch)

pred_file_format = '.\output\embed\pred_siam{}_E{}_{}.p'


def pred_filename(run, epoch, post):
    return pred_file_format.format(run, epoch, post)


## ========================= ##
## ======= Load Data ======= ##
## ========================= ##

if DataSubSet == 0:
    post = "Test"
elif DataSubSet == 1:
Пример #9
0
import numpy as np
import matplotlib.pyplot as plt
from Network import FileManager
from Analysis import Retriever
from Analysis.metric_space_indexes import k_occurrences

net_type = 'dirD'
config = 0
dset = 'Valid'
K = 2

res = {}
for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']):
    print(run + ': ' + label + '\n' + '*' * 20)
    embed_source = FileManager.Embed(net_type)(run + 'c{}'.format(config),
                                               dset)

    Ret = Retriever(title='{}'.format(''), dset=dset)
    Ret.load_embedding(embed_source, multi_epcch=True)
    Ret.fit(metric='euclidean', epoch=60)
    indices, distances = Ret.ret_nbrs()

    # get Hubs
    k_occ = k_occurrences(indices, K)
    hubs_indices = np.argsort(k_occ)[-3:]
    res[run] = hubs_indices, indices
    print([(a, b) for a, b in zip(hubs_indices, k_occ[hubs_indices])])

for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']):
    print(run + ': ' + label + '\n' + '*' * 20)
Пример #10
0
def dir_rating_correlate(run,
                         post,
                         epochs,
                         rating_norm='none',
                         clustered_rating_distance=True,
                         n_groups=5):
    pear_corr = [[] for i in range(n_groups)]
    kend_corr = [[] for i in range(n_groups)]
    plot_data_filename = './Plots/Data/rating_correlation_{}{}.p'.format(
        'dirR', run)
    try:
        print('SKIPING')
        assert False
        pear_corr, kend_corr = pickle.load(open(plot_data_filename, 'br'))
        print("Loaded results for {}".format(run))
    except:
        print("Evaluating Rating Correlation for {}".format(run))
        for c, run_config in enumerate(
            [run + 'c{}'.format(config) for config in range(n_groups)]):
            PredFile = FileManager.Pred(type='rating', pre='dirR')
            Reg = RatingCorrelator(PredFile(run=run_config, dset=post),
                                   multi_epoch=True)
            for e in epochs:
                Reg.evaluate_embed_distance_matrix(
                    method='euclidean',
                    epoch=e,
                    round=(rating_norm == 'Round'))
                Reg.evaluate_rating_space(norm=rating_norm)
                Reg.evaluate_rating_distance_matrix(
                    method='euclidean',
                    clustered_rating_distance=clustered_rating_distance)

                Reg.linear_regression()
                # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False)
                p, s, k = Reg.correlate_retrieval(
                    'embed',
                    'rating',
                    round=(rating_norm == 'Round'),
                    verbose=False)
                pear_corr[c].append(p)
                kend_corr[c].append(k)

            pear_corr[c] = np.array(pear_corr[c])
            kend_corr[c] = np.array(kend_corr[c])

        pear_corr = np.mean(pear_corr, axis=0)
        kend_corr = np.mean(kend_corr, axis=0)
        print('NO DUMP')
        #pickle.dump((pear_corr, kend_corr), open(plot_data_filename, 'bw'))

    pear_corr = smooth(pear_corr[:, 0]), smooth(pear_corr[:, 1])
    kend_corr = smooth(kend_corr[:, 0]), smooth(kend_corr[:, 1])
    epochs = np.array(epochs)

    plt.figure('Rating2Rating:' + run + '-' + post)
    q = plt.plot(epochs, pear_corr[0])
    plt.plot(epochs,
             pear_corr[0] + pear_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)
    plt.plot(epochs,
             pear_corr[0] - pear_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)

    q = plt.plot(epochs, kend_corr[0])
    plt.plot(epochs,
             kend_corr[0] + kend_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)
    plt.plot(epochs,
             kend_corr[0] - kend_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)

    plt.grid(which='major', axis='y')
    plt.title('rating_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(['pearson', '', '', 'kendall', '', ''])
Пример #11
0
def dir_size_rmse(run,
                  post,
                  epochs,
                  net_type,
                  dist='RMSE',
                  weighted=False,
                  n_groups=5):

    plot_data_filename = './Plots/Data/size{}_{}{}.p'.format(
        dist, net_type, run)
    try:
        assert False
        R = pickle.load(open(plot_data_filename, 'br'))
        print("Loaded results for {}".format(run))
    except:
        print("Evaluating Size RMSE for {}".format(run))
        PredFile = FileManager.Pred(type='size', pre=net_type)
        R = np.zeros([len(epochs), n_groups])

        for c, run_config in enumerate(
            [run + 'c{}'.format(config) for config in range(n_groups)]):
            predict, valid_epochs, images, meta_data, classes, labels, masks = PredFile.load(
                run=run_config, dset=post)
            labels = np.array(labels)
            for i, e in enumerate(epochs):
                print(" Epoch {}:".format(e))
                try:
                    idx = int(np.argwhere(valid_epochs == e))
                except:
                    print('skip epoch {}'.format(e))
                    continue
                pred = predict[idx]
                '''
                W = np.ones(labels.shape[0])
                if weighted:
                    assert False
                    w = np.histogram(labels[:, r], bins=np.array(range(64))+0.5)[0]
                    w = 1 - w / np.sum(w)
                    pred_w = np.minimum(np.maximum(pred[:, r], 1.0), max_val)
                    W = w[np.round(pred_w - 1).astype('int')]
                if dist=='RMSE':
                    err = W.dot((pred - labels)**2)
                    err = np.sqrt(err/np.sum(W))
                elif dist=='ABS':
                    err = W.dot(np.abs(pred - labels)) / np.sum(W)
                else:
                    print('{} unrecognized distance'.format(dist))
                    assert False
                '''
                rmse = np.sqrt(np.mean(np.sum((pred - labels)**2, axis=1)))
                R[i, c] = rmse
        R = np.mean(R, axis=1)
        pickle.dump(R, open(plot_data_filename, 'bw'))

    # smooth
    R = smooth(R)

    plt.figure(dist + ' ' + net_type + run + '-' + post)
    plt.title('Size ' + dist)
    plt.plot(epochs, R)
    #plt.legend(rating_property+['Overall'])
    plt.grid(True, axis='y')

    return R
Пример #12
0
def eval_embed_space(run,
                     net_type,
                     metric,
                     rating_metric,
                     epochs,
                     dset,
                     rating_norm='none',
                     cross_validation=False,
                     n_groups=5):
    # init
    Embed = FileManager.Embed(net_type)
    embed_source = [
        Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
    ]
    idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr \
        = [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \
          [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \
          [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    # calculate
    Ret = Retriever(title='{}'.format(run), dset=dset)
    for i, source in enumerate(embed_source):
        embd, epoch_mask = Ret.load_embedding(source, multi_epcch=True)
        for e in epochs:
            try:
                epoch_idx = np.argwhere(e == epoch_mask)[0][0]
                Ret.fit(metric=metric, epoch=e)
                indices, distances = Ret.ret_nbrs()
                # hubness
                idx_hubness[i].append(calc_hubness(indices))
                #   symmetry
                idx_symmetry[i].append(calc_symmetry(indices))
                # kumar index
                tau, l_e = kumar(distances, res=0.01)
                idx_kummar[i].append(tau)
                # concentration & contrast
                idx_concentration[i].append(concentration(distances))
                idx_contrast[i].append(relative_contrast_imp(distances))
                valid_epochs[i].append(e)
                # correlation
                idx_featCorr[i].append(features_correlation(embd[epoch_idx]))
                idx_sampCorr[i].append(samples_correlation(embd[epoch_idx]))
            except:
                print("Epoch {} - no calculated embedding".format(e))
        valid_epochs[i] = np.array(valid_epochs[i])
        idx_hubness[i] = np.array(list(zip(*idx_hubness[i])))
        idx_symmetry[i] = np.array(list(zip(*idx_symmetry[i])))
        idx_concentration[i] = np.array(list(zip(*idx_concentration[i])))
        idx_contrast[i] = np.array(list(zip(*idx_contrast[i])))
        idx_kummar[i] = np.array([idx_kummar[i]])
        idx_featCorr[i] = np.array([idx_featCorr[i]])
        idx_sampCorr[i] = np.array([idx_sampCorr[i]])

    combined_epochs = [
        i for i, c in enumerate(np.bincount(np.concatenate(valid_epochs)))
        if c > 3
    ]

    idx_hubness = mean_cross_validated_index(idx_hubness, valid_epochs,
                                             combined_epochs)
    idx_symmetry = mean_cross_validated_index(idx_symmetry, valid_epochs,
                                              combined_epochs)
    idx_concentration = np.zeros_like(
        idx_hubness
    )  #mean_cross_validated_index(idx_concentration, valid_epochs, combined_epochs)
    idx_contrast = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_contrast, valid_epochs, combined_epochs)
    idx_kummar = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_kummar, valid_epochs, combined_epochs)
    idx_featCorr = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_featCorr, valid_epochs, combined_epochs)
    idx_sampCorr = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_sampCorr, valid_epochs, combined_epochs)

    return combined_epochs, idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr
from Network import FileManager
from experiments import CrossValidationManager
import numpy as np

run = '888'

DataGroups = [
    FileManager.Dataset('Primary', i, './Dataset').load(size=160, res=0.5)
    for i in range(5)
]
expected_datast_size = [len(d) for d in DataGroups]
label_stats = [
    np.bincount([element['label'] for element in DataGroups[i]])
    for i in range(5)
]
[
    print('group id {} => total:{}, benign:{}, malig:{}, unknown:{}'.format(
        i, expected_datast_size[i], label_stats[i][0], label_stats[i][1],
        label_stats[i][2])) for i in range(5)
]

cv = CrossValidationManager('RET')

for i in range(10):  # conf in conf_names:
    conf = cv.get_run_id(i)
    #dataset_size = len(FileManager.DatasetFromPredication().load(run='{}c{}'.format(run, conf), goal='Test', epoch=70))
    dataset_size = len(
        FileManager.Embed(pre='dirRD').load(run='{}c{}'.format(run, conf),
                                            dset='Valid'))
    group_id = cv.get_test(i)
    print('#{} ({})- expected: {}, actual: {} (group id = {})'.format(
Пример #14
0
def dir_rating_params_correlate(run,
                                post,
                                epochs,
                                net_type,
                                rating_norm='none',
                                configurations=list(range(5)),
                                USE_CACHE=True,
                                DUMP=True):

    reference = [0.7567, 0.5945, 0.7394, 0.5777, 0.6155, 0.7445,
                 0.6481]  # 0, 0,
    rating_property = [
        'Subtlety', 'Sphericity', 'Margin', 'Lobulation', 'Spiculation',
        'Texture', 'Malignancy'
    ]  # 'Internalstructure', 'Calcification',
    mask = [True, False, False, True, True, True, True, True, True]

    pear_corr = [[] for i in configurations]
    plot_data_filename = './Plots/Data/rating_params_correlation_{}{}.p'.format(
        net_type, run)
    try:
        if USE_CACHE is False:
            print('SKIPPING')
            assert False
        pear_corr = pickle.load(open(plot_data_filename, 'br'))
        print("Loaded results for {}".format(run))
    except:
        print("Evaluating Rating Correlation for {}".format(run))
        for c, run_config in enumerate(
            [run + 'c{}'.format(config) for config in configurations]):
            PredFile = FileManager.Pred(type='rating', pre=net_type)
            Reg = RatingCorrelator(PredFile(run=run_config, dset=post),
                                   multi_epoch=True,
                                   conf=c)
            Reg.evaluate_rating_space(norm=rating_norm)
            #valid_epochs = []
            for e in epochs:
                p = Reg.correlate_to_ratings(epoch=e,
                                             round=(rating_norm == 'Round'))
                if not np.all(np.isfinite(p[mask])):
                    print('nan at: conf={}, epoch={}'.format(c, e))
                pear_corr[c].append(p[mask])
                #valid_epochs.append(e)

            pear_corr[c] = np.array(pear_corr[c])

        pear_corr = np.mean(pear_corr, axis=0)
        if DUMP:
            pickle.dump(pear_corr, open(plot_data_filename, 'bw'))
        else:
            print('NO DUMP')

    for i, e in enumerate(epochs):
        print("=" * 20)
        print(" Epoch {}:".format(e))
        print("-" * 20)
        for p, property in enumerate(rating_property):
            print("\t{}: \t{:.2f}".format(property, pear_corr[i, p]))
        #print("\t" + ("-" * 10))
        #print("\toverall: \t{:.2f}".format(R[i, 9]))

    for p in range(pear_corr.shape[1]):
        pear_corr[:, p] = smooth(pear_corr[:, p], window_length=5, polyorder=2)
    epochs = np.array(epochs)

    plt.figure('RatingParams2Rating:' + run + '-' + post)
    q = plt.plot(epochs, pear_corr, linewidth=2.5)
    for line, ref in zip(q, reference):
        plt.plot(epochs,
                 ref * np.ones_like(epochs),
                 color=line.get_color(),
                 ls='--',
                 linewidth=4,
                 alpha=0.6)

    plt.grid(which='major', axis='y')
    plt.title('rating_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(rating_property)
Пример #15
0
def dir_rating_rmse(run,
                    post,
                    epochs,
                    net_type,
                    dist='RMSE',
                    weighted=False,
                    configurations=list(range(5)),
                    USE_CACHE=True,
                    DUMP=True):
    #images, predict, meta_data, labels, masks = pred_loader.load(run, epochs[-1], post)
    rating_property = [
        'Subtlety', 'Internalstructure', 'Calcification', 'Sphericity',
        'Margin', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy'
    ]

    plot_data_filename = './Plots/Data/{}_{}{}.p'.format(dist, net_type, run)
    try:
        if USE_CACHE is False:
            print("skipping...")
            assert False
        R = pickle.load(open(plot_data_filename, 'br'))
        print("Loaded results for {}".format(run))
    except:
        print("Evaluating RMSE for {}".format(run))
        PredFile = FileManager.Pred(type='rating', pre=net_type)
        R = np.zeros([len(epochs), 10, len(configurations)])

        for c, run_config in enumerate(
            [run + 'c{}'.format(config) for config in configurations]):
            predict, valid_epochs, images, meta_data, classes, labels, masks, conf, rating_weights, z = PredFile.load(
                run=run_config, dset=post)
            labels = np.array([np.mean(l, axis=0) for l in labels])
            for i, e in enumerate(epochs):
                #print("=" * 20)
                #print(" Epoch {}:".format(e))
                #print("-" * 20)
                try:
                    idx = int(np.argwhere(valid_epochs == e))
                except:
                    print('skip epoch {}'.format(e))
                    continue
                pred = predict[idx]

                for r, max_val in zip(range(9), [5, 5, 6, 5, 5, 5, 5, 5, 5]):
                    #print("{}:".format(rating_property[r]))
                    W = np.ones(labels.shape[0])
                    if weighted:
                        w = np.histogram(labels[:, r],
                                         bins=np.array(range(max_val + 1)) +
                                         0.5)[0]
                        #print("\tcounts - {}".format(w))
                        w = 1 - w / np.sum(w)
                        w /= (len(w) - 1)
                        assert np.abs(w.sum() - 1) < 1e-6
                        #print("\tweighted by {}".format(w))
                        #pred_w = np.minimum(np.maximum(pred[:, r], 1.0), max_val)
                        W = w[np.round(labels[:, r] - 1).astype('int')]
                    if dist == 'RMSE':
                        err = W.dot((pred[:, r] - labels[:, r])**2)
                        err = np.sqrt(err / np.sum(W))
                    elif dist == 'ABS':
                        err = W.dot(
                            np.abs(pred[:, r] - labels[:, r])) / np.sum(W)
                    else:
                        print('{} unrecognized distance'.format(dist))
                        assert False
                    #print("rmse: \t{:.2f}".format(err))
                    R[i, r, c] = err
                rmse = np.sqrt(np.mean(np.sum((pred - labels)**2, axis=1)))
                #print("=" * 20)
                #print("overall: \t{:.2f}".format(rmse))
                R[i, 9, c] = rmse
        R = np.mean(R, axis=2)
        for i, e in enumerate(epochs):
            print("=" * 20)
            print(" Epoch {}:".format(e))
            print("-" * 20)
            for p, property in enumerate(rating_property):
                print("\t{}: \t{:.2f}".format(property, R[i, p]))
            print("\t" + ("-" * 10))
            print("\toverall: \t{:.2f}".format(R[i, 9]))

        if DUMP:
            pickle.dump(R, open(plot_data_filename, 'bw'))
        else:
            print("No Dump")

    # smooth
    for r in range(9):
        R[:, r] = smooth(R[:, r])
    plt.figure(dist + ' ' + run + '-' + post)
    plt.title('Rating ' + dist)
    plt.plot(epochs, R)
    plt.legend(rating_property + ['Overall'])
    plt.grid(True, axis='y')

    return R
Пример #16
0
def eval_retrieval(run,
                   net_type,
                   metric,
                   epochs,
                   dset,
                   NN=[7, 11, 17],
                   cross_validation=False,
                   n_groups=5):
    Embed = FileManager.Embed(net_type)
    Prec, Prec_b, Prec_m = [[] for i in range(n_groups)
                            ], [[] for i in range(n_groups)
                                ], [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    if cross_validation:
        # Load
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
        ]
        Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset)
        for i, source in enumerate(embed_source):
            Ret.load_embedding(source, multi_epcch=True)
            for E in epochs:
                # Calc
                prec, prec_b, prec_m = [], [], []
                try:
                    Ret.fit(np.max(NN), metric=metric, epoch=E)
                except:
                    print("Epoch {} - no calculated embedding".format(E))
                    continue
                for N in NN:
                    p, pb, pm = Ret.evaluate_precision(n=N)
                    prec.append(p)
                    prec_b.append(pb)
                    prec_m.append(pm)
                Prec[i].append(np.array(prec))
                Prec_b[i].append(np.array(prec_b))
                Prec_m[i].append(np.array(prec_m))
                valid_epochs[i].append(E)

            Prec[i] = np.array(Prec[i])
            Prec_b[i] = np.array(Prec_b[i])
            Prec_m[i] = np.array(Prec_m[i])
            valid_epochs[i] = np.array(valid_epochs[i])

        combined_epochs = epochs  # merge_epochs(valid_epochs)
        P, P_std = mean_cross_validated_index_with_std(Prec, valid_epochs,
                                                       combined_epochs)
        #P, P_std = np.mean(np.mean(Prec, axis=-1), axis=0), np.mean(np.std(Prec, axis=-1), axis=0)
        combined = 2 * np.array(Prec_b) * np.array(Prec_m) / (
            np.array(Prec_b) + np.array(Prec_m))
        #F1, F1_std = np.mean(np.mean(combined, axis=-1), axis=0), np.mean(np.std(combined, axis=-1), axis=0)
        F1, F1_std = mean_cross_validated_index_with_std(
            combined, valid_epochs, combined_epochs)

    else:
        for E in epochs:
            Ret = Retriever(title='', dset='')
            if cross_validation:
                embed_source = [
                    Embed(run + 'c{}'.format(c), E, dset)
                    for c in range(n_groups)
                ]
            else:
                embed_source = Embed(run, E, dset)
            Ret.load_embedding(embed_source)

            prec, prec_b, prec_m = [], [], []
            Ret.fit(np.max(NN), metric=metric)
            for N in NN:
                p, pm, pb = Ret.evaluate_precision(n=N)
                prec.append(p)
                prec_b.append(pb)
                prec_m.append(pm)
            Prec.append(np.array(prec))
            Prec_b.append(np.array(prec_b))
            Prec_m.append(np.array(prec_m))

        Prec = np.array(Prec)
        Prec_m = np.array(Prec_m)
        Prec_b = np.array(Prec_b)
        f1 = 2 * Prec_b * Prec_m / (Prec_b + Prec_m)
        P, P_std = np.mean(Prec, axis=-1), np.std(Prec, axis=-1)
        F1, F1_std = np.mean(f1, axis=-1), np.std(f1, axis=-1)

    return P, P_std, F1, F1_std, valid_epochs
Пример #17
0
def run(choose_model="DIR",
        epochs=200,
        config=0,
        skip_validation=False,
        no_training=False,
        config_name='LEGACY',
        load_data_from_predications=False):

    np.random.seed(1337)
    random.seed(1337)
    tf.set_random_seed(1234)
    K.set_session(tf.Session(graph=tf.get_default_graph()))

    ## --------------------------------------- ##
    ## ------- General Setup ----------------- ##
    ## --------------------------------------- ##

    #data
    dataset_type = 'Primary'
    data_size = 160
    if no_training:
        data_size = 160
    res = 0.5  # 'Legacy' #0.7 #0.5 #'0.5I'
    sample = 'Normal'  # 'UniformNC' #'Normal' #'Uniform'
    data_run = '813'
    data_epoch = 70
    return_predicted_ratings = not no_training
    use_gen = True
    #model
    model_size = 128
    input_shape = (model_size, model_size, 1)
    normalize = True
    out_size = 128
    do_augment = True
    if no_training:
        do_augment = False
    preload_weight = None

    print("-" * 30)
    print("Running {} for --** {} **-- model, with #{} configuration".format(
        "training" if not no_training else "validation", choose_model, config))
    if load_data_from_predications:
        print(
            "\tdata_run = {}, \n\tdata_epoch = {}, return_predicted_ratings = {}"
            .format(data_run, data_epoch, return_predicted_ratings))
    else:
        print(
            "\tdata_size = {},\n\tmodel_size = {},\n\tres = {},\n\tdo_augment = {}"
            .format(data_size, model_size, res, do_augment))
        print("\tdataset_type = {}".format(dataset_type))
    print("-" * 30)

    model = None

    data_augment_params = {
        'max_angle': 30,
        'flip_ratio': 0.5,
        'crop_stdev': 0.15,
        'epoch': 0
    }

    data_loader = build_loader(
        size=data_size,
        res=res,
        sample=sample,
        dataset_type=dataset_type,
        config_name=config_name,
        configuration=config,
        run=data_run,
        epoch=data_epoch,
        load_data_from_predictions=load_data_from_predications,
        return_predicted_ratings=return_predicted_ratings)

    ## --------------------------------------- ##
    ## ------- Prepare Direct Architecture ------- ##
    ## --------------------------------------- ##

    if choose_model is "DIR":
        # run = '300'  # SPIE avg-pool (data-aug, balanced=False,class_weight=True)
        # run = '301'  # SPIE max-pool (data-aug, balanced=False,class_weight=True)
        # run = '302'  # SPIE rmac-pool (data-aug, balanced=False,class_weight=True)

        # run = 'zzz'

        model = DirectArch(miniXception_loader,
                           input_shape,
                           output_size=out_size,
                           normalize=normalize,
                           pooling='msrmac')
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)
        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                balanced=False,
                data_size=data_size,
                model_size=model_size,
                batch_size=32,
                do_augment=do_augment,
                augment=data_augment_params,
                use_class_weight=True,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample)
            images_train, labels_train, class_train, masks_train, _ = prepare_data_direct(
                dataset[2], num_of_classes=2)
            images_valid, labels_valid, class_valid, masks_valid, _ = prepare_data_direct(
                dataset[1], num_of_classes=2)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=32)

    if choose_model is "DIR_RATING":

        ### CLEAN SET
        # run = '800'  # rmac conf:size
        # run = '801'  # rmac conf:none
        # run = '802'  # rmac conf:rating-std
        # run = '803'  # max conf:none

        ### PRIMARY SET
        # run = '810'  # rmac conf:size
        # run = '811'  # rmac conf:none
        # run = '812'  # rmac conf:rating-std
        # run = '813'  # max conf:none
        # run = '814'  # max separated_prediction

        # run = '820'  # dirD, max, logcoh-loss
        # run = '821'  # dirD, max, pearson-loss
        # run = '822'  # dirD, max, KL-rank-loss
        # run = '823'  # dirD, max, poisson-rank-loss
        # run = '824'  # dirD, max, categorical-cross-entropy-loss
        # run = '825'  # dirD, max, ranked-pearson-loss
        # run = '826'  # dirD, max, KL-normalized-rank-loss
        # run = '827'  # dirD, max, KL-normalized-rank-loss (local-scaled) softmax
        # run = '828'  # dirD, max, KL-normalized-rank-loss (local-scaled) l2
        # run = '829'  # dirD, max, ranked-pearson-loss (local-scaled)

        # run = '830'  # dirD, rmac, logcoh-loss
        # run = '831'  # dirD, rmac, pearson-loss
        # run = '832'  # dirD, rmac, KL-rank-loss
        # run = '833'  # dirD, rmac, poisson-rank-loss
        # run = '834'  # dirD, rmac, categorical-cross-entropy-loss
        # run = '835'  # dirD, rmac, ranked-pearson-loss
        # run = '836'  # dirD, rmac, KL-normalized-rank-loss

        # run = '841'  # dirD, max, pearson-loss    pre:dirR813-50
        # run = '842b'  # dirD, max, KL-rank-loss    pre:dirR813-50  (b:lr-4)
        # run = '846'  # dirD, max, KL-norm-loss    pre:dirR813-50

        # run = '851'  # dirD, rmac, pearson-loss   pre:dirR813-50
        # run = '852'  # dirD, rmac, KL-rank-loss   pre:dirR813-50
        # run = '856'  # dirD, rmac, KL-norm-loss   pre:dirR813-50

        # run = '860'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:7)
        # run = '861'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:17)
        # run = '862'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:28)
        # run = '863'  # dirD, max, KL-loss    pre:dirR813-50  (b:lr-4, freeze:39)

        # run = '870'  # dirRD, max, KL-loss    schd: 00
        # run = '871'  # dirRD, max, KL-loss    schd: 01
        # run = '872'  # dirRD, max, KL-loss    schd: 02
        # run = '873'  # dirRD, max, KL-loss    schd: 03
        # run = '874'  # dirRD, max, KL-loss    schd: 04
        # run = '875'  # dirRD, max, KL-loss    schd: 05
        # run = '876'  # dirRD, max, KL-loss    schd: 06
        # run = '877b'  # dirRD, max, KL-loss    schd: 07b
        # run = '878'  # dirRD, max, KL-loss    schd: 08
        # run = '879'  # dirRD, max, KL-loss    schd: 09

        # run = '888'  # dirRD, max, KL-loss    schd: 08, on partial data SUP
        # run = '882'  # dirRD, max, KL-loss    schd:

        run = '898b'  # dirRD, max, KL-loss    schd: 08, on partial data UNSUP
        # run = '890b'  # dirR
        # run = '892b'  # dirRD, max, KL-loss

        # run = 'ccc'

        obj = 'rating_distance-matrix'  # 'distance-matrix' 'rating' 'rating-size'

        rating_scale = 'none'
        reg_loss = None  # {'SampleCorrelation': 0.0}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'
        batch_size = 32

        epoch_pre = 50
        preload_weight = None
        # FileManager.Weights('dirR', output_dir=input_dir).name(run='813c{}'.format(config), epoch=epoch_pre)
        # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=epoch_pre)

        model = DirectArch(miniXception_loader,
                           input_shape,
                           output_size=out_size,
                           objective=obj,
                           separated_prediction=False,
                           normalize=normalize,
                           pooling='max',
                           l1_regularization=None,
                           regularization_loss=reg_loss,
                           batch_size=batch_size)

        if preload_weight is not None:
            model.load_core_weights(preload_weight, 39)
            # 7:    freeze 1 blocks
            # 17:   freeze 2 blocks
            # 28:   freeze 3 blocks
            # 39:   freeze 4 blocks

        model.model.summary()

        should_use_scheduale = (reg_loss is not None) or (obj in [
            'rating_size', 'rating_distance-matrix'
        ])

        # scheduale 00:     870
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 80, 'weights': [0.1, 0.9]}] \
        #    if should_use_scheduale else []

        # scheduale 01:     871
        # sched = [{'epoch': 00, 'weights': [1.0, 0.0]},
        #         {'epoch': 50, 'weights': [0.0, 1.0]}] \
        #    if should_use_scheduale else []

        # scheduale 02:     872
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #       {'epoch': 50, 'weights': [0.1, 0.9]}] \
        #   if should_use_scheduale else []

        # scheduale 03:     873
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #        {'epoch': 50, 'weights': [0.5, 0.5]},
        #         {'epoch': 100, 'weights': [0.1, 0.9]}] \
        #    if should_use_scheduale else []

        # scheduale 04:     874
        # sched = [{'epoch': 00, 'weights': [1.0, 0.0]},
        #        {'epoch': 50, 'weights': [0.0, 0.1]}] \
        #   if should_use_scheduale else []

        # scheduale 05:     875
        # sched = [{'epoch': 00, 'weights': [1.0, 0.0]},
        #        {'epoch': 50, 'weights': [0.0, 1.0]},
        #         {'epoch': 100, 'weights': [0.0, 0.1]}] \
        #    if should_use_scheduale else []

        # scheduale 06:     876
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 60, 'weights': [0.1, 0.1]},
        #         {'epoch': 80, 'weights': [0.0, 0.1]},
        #         {'epoch': 100, 'weights': [0.0, 0.05]}] \
        #    if should_use_scheduale else []

        # scheduale 07b:     877b
        # sched = [{'epoch': 00,  'weights': [1.0, 0.0]},
        #         {'epoch': 50,  'weights': [0.0, 1.0]},
        #         {'epoch': 80,  'weights': [0.0, 0.1]},
        #         {'epoch': 100, 'weights': [0.0, 0.05]}] \
        #    if should_use_scheduale else []

        # scheduale 08b:     878
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 80, 'weights': [0.0, 0.1]}] \
        #    if should_use_scheduale else []

        # scheduale 09:     879
        # sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
        #         {'epoch': 20, 'weights': [0.7, 0.3]},
        #         {'epoch': 40, 'weights': [0.5, 0.5]},
        #         {'epoch': 60, 'weights': [0.3, 0.3]},
        #         {'epoch': 80, 'weights': [0.0, 0.1]}] \
        #    if should_use_scheduale else []

        # scheduale      892/882
        sched = [{'epoch': 00, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [0.5, 0.5]},
                 {'epoch': 120, 'weights': [0.0, 0.1]}] \
            if should_use_scheduale else []

        loss = dict()
        loss['predictions'] = 'logcosh'
        loss['predictions_size'] = 'logcosh'
        loss['distance_matrix'] = distance_matrix_rank_loss_adapter(
            K_losses.kullback_leibler_divergence, 'KL')
        # distance_matrix_logcosh
        # pearson_correlation
        # distance_matrix_rank_loss_adapter(K_losses.kullback_leibler_divergence, 'KL')
        # distance_matrix_rank_loss_adapter(K_losses.poisson, 'poisson')
        # distance_matrix_rank_loss_adapter(K_losses.categorical_crossentropy, 'entropy')
        model.compile(
            learning_rate=1e-3 if (preload_weight is None) else 1e-4,
            loss=loss,
            scheduale=sched
        )  # mean_squared_logarithmic_error, binary_crossentropy, logcosh

        if use_gen:
            generator = DataGeneratorDir(
                data_loader,
                val_factor=0 if skip_validation else 1,
                data_size=data_size,
                model_size=model_size,
                batch_size=batch_size,
                objective=obj,
                rating_scale=rating_scale,
                weighted_rating=('distance-matrix' in obj),
                balanced=False,
                do_augment=do_augment,
                augment=data_augment_params,
                use_class_weight=False,
                use_confidence=False)
            model.load_generator(generator)
        else:
            dataset = load_nodule_dataset(size=data_size,
                                          res=res,
                                          sample=sample,
                                          dataset_type=dataset_type)
            images_train, labels_train, masks_train = prepare_data_direct(
                dataset[2], objective='rating', rating_scale=rating_scale)
            images_valid, labels_valid, masks_valid = prepare_data_direct(
                dataset[1], objective='rating', rating_scale=rating_scale)
            images_train = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_train, masks_train)
            ])
            images_valid = np.array([
                crop_center(im, msk, size=model_size)[0]
                for im, msk in zip(images_valid, masks_valid)
            ])
            model.load_data(images_train,
                            labels_train,
                            images_valid,
                            labels_valid,
                            batch_size=batch_size)

    ## --------------------------------------- ##
    ## ------- Prepare Siamese Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "SIAM":
        # run = '300'  # l1, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '301'  # l1, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '302'  # l1, rmac-pool (data-aug, balanced=True, class_weight=False)
        # run = '310'  # l2, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '311'  # l2, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '312'  # l2, rmac-pool (data-aug, balanced=True, class_weight=False)
        # run = '320'  # cos, avg-pool (data-aug, balanced=True, class_weight=False)
        # run = '321'  # cos, max-pool (data-aug, balanced=True, class_weight=False)
        # run = '322b'  # cos, rmac-pool (data-aug, balanced=True, class_weight=False)

        # b/c - changed margin-loss params
        # run = '313c'  # l2, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '314c'  # l2, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '323c'  # cos, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)
        # run = '324c'  # cos, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False)

        # run = 'zzz'

        batch_size = 64 if local else 128

        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=True,
                                      objective="malignancy",
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         batch_size=batch_size,
                         distance='l2',
                         normalize=normalize,
                         pooling='msrmac')
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)
        if use_gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    if choose_model is "SIAM_RATING":
        ### clean set
        # run = '400'  # l2-rmac no-conf
        # run = '401'  # cosine-rmac no-conf
        # run = '402'  # l2-rmac conf
        # run = '403'  # cosine-rmac conf
        # run = '404'  # l2-max no-conf
        # run = '405'  # cosine-max no-conf

        ### primary set
        # run = '410'  # l2-rmac no-conf
        # run = '411'  # cosine-rmac no-conf
        # run = '412'  # l2-rmac conf
        # run = '413'  # cosine-rmac conf
        # run = '414'  # l2-max no-conf
        # run = '415'  # cosine-max no-conf

        run = 'zzz'

        obj = 'rating'  # rating / size / rating_size
        batch_size = 16 if local else 64
        reg_loss = None  # {'SampleCorrating_clusters_distance_and_stdrelation': 0.1}  # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation'

        epoch_pre = 60
        preload_weight = None  # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=70)

        should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size')
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 30, 'weights': [0.4, 0.6]},
                 {'epoch': 60, 'weights': [0.6, 0.4]},
                 {'epoch': 80, 'weights': [0.9, 0.1]},
                 {'epoch': 100, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        '''
        sched = [{'epoch': 00, 'weights': [0.1, 0.9]},
                 {'epoch': 20, 'weights': [0.4, 0.6]},
                 {'epoch': 30, 'weights': [0.6, 0.4]},
                 {'epoch': 50, 'weights': [0.9, 0.1]},
                 {'epoch': 80, 'weights': [1.0, 0.0]}] \
            if should_use_scheduale else []
        # model
        generator = DataGeneratorSiam(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      train_facotr=2,
                                      val_factor=0 if skip_validation else 3,
                                      balanced=False,
                                      objective=obj,
                                      weighted_rating=True,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)

        model = SiamArch(miniXception_loader,
                         input_shape,
                         output_size=out_size,
                         objective=obj,
                         batch_size=batch_size,
                         distance='cosine',
                         normalize=normalize,
                         pooling='rmac',
                         regularization_loss=reg_loss,
                         l1_regularization=False)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3,
                      decay=0,
                      loss='logcosh',
                      scheduale=sched)  # mean_squared_error, logcosh
        model.load_generator(generator)

    ## --------------------------------------- ##
    ## ------- Prepare Triplet Architecture ------ ##
    ## --------------------------------------- ##

    if choose_model is "TRIPLET":

        # run = '000'  # rmac softplus, b16
        # run = '001'  # rmac hinge, b16, pre:dirR813-50
        # run = '002'  # rmac hinge, b32, pre:dirR813-50
        # run = '003'  # rmac hinge, b64, pre:dirR813-50
        # run = '004'  # rmac hinge, b128, pre:dirR813-50
        # run = '005'  # rmac hinge, b64, pre:dirR813-50
        run = '006'  # rmac rank, b64, pre:dirR813-50

        # run = 'zzz'

        objective = 'rating'
        use_rank_loss = True

        batch_size = 16 if local else 64

        gen = True
        epoch_pre = 50
        preload_weight = FileManager.Weights(
            'dirR', output_dir=input_dir).name(run='813c{}'.format(config),
                                               epoch=epoch_pre)

        # model
        model = TripArch(miniXception_loader,
                         input_shape,
                         objective=objective,
                         output_size=out_size,
                         distance='l2',
                         normalize=True,
                         pooling='msrmac',
                         categorize=use_rank_loss)

        if preload_weight is not None:
            model.load_core_weights(preload_weight)
        model.model.summary()
        model.compile(learning_rate=1e-3, decay=0)

        generator = DataGeneratorTrip(data_loader,
                                      data_size=data_size,
                                      model_size=model_size,
                                      batch_size=batch_size,
                                      objective=objective,
                                      balanced=(objective == 'malignancy'),
                                      categorize=use_rank_loss,
                                      val_factor=0 if skip_validation else 1,
                                      train_factor=2,
                                      do_augment=do_augment,
                                      augment=data_augment_params,
                                      use_class_weight=False,
                                      use_confidence=False)
        if gen:
            model.load_generator(generator)
        else:
            imgs_trn, lbl_trn = generator.next_train().__next__()
            imgs_val, lbl_val = generator.next_val().__next__()
            model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val)

    ## --------------------------------------- ##
    ## -------      RUN             ------ ##
    ## --------------------------------------- ##

    cnf_id = config if config_name == 'LEGACY' else CrossValidationManager(
        config_name).get_run_id(config)
    run_name = '{}{}c{}'.format('', run, cnf_id)
    print('Current Run: {}'.format(run_name))
    if no_training:
        model.last_epoch = epochs
        model.run = run_name
    else:
        model.train(run=run_name,
                    epoch=(0 if preload_weight is None else epoch_pre),
                    n_epoch=epochs,
                    gen=use_gen,
                    do_graph=False)

    return model
Пример #18
0
def eval_correlation(run,
                     net_type,
                     metric,
                     rating_metric,
                     epochs,
                     dset,
                     objective='rating',
                     rating_norm='none',
                     cross_validation=False,
                     n_groups=5,
                     seq=False):

    Embed = FileManager.Embed(net_type)

    if cross_validation:
        # Load
        if n_groups > 1:
            embed_source = [
                Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
            ]
        else:
            embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]]

        valid_epochs = [[] for i in range(n_groups)]
        Pm, Km, Pr, Kr = [[] for i in range(n_groups)
                          ], [[] for i in range(n_groups)
                              ], [[] for i in range(n_groups)
                                  ], [[] for i in range(n_groups)]
        PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups)
                                      ], [[] for i in range(n_groups)
                                          ], [[] for i in range(n_groups)
                                              ], [[] for i in range(n_groups)]

        for c_idx, source in enumerate(embed_source):
            Reg = RatingCorrelator(source,
                                   conf=c_idx,
                                   multi_epoch=True,
                                   seq=seq)

            # load rating data
            cache_filename = 'output/cached_{}_{}_{}.p'.format(
                objective,
                source.split('/')[-1][6:-2], c_idx)
            if not Reg.load_cached_rating_distance(cache_filename):
                print('evaluating rating distance matrix...')
                Reg.evaluate_rating_space(norm=rating_norm,
                                          ignore_labels=False)
                Reg.evaluate_rating_distance_matrix(
                    method=rating_metric,
                    clustered_rating_distance=True,
                    weighted=True)
                Reg.dump_rating_distance_to_cache(cache_filename)
                #print('\tno dump for rating distance matrix...')

            if objective == 'size':
                print('evaluating size distance matrix...')
                Reg.evaluate_size_distance_matrix()

            for E in epochs:
                # Calc
                try:
                    Reg.evaluate_embed_distance_matrix(method=metric, epoch=E)
                except:
                    #print("Epoch {} - no calculated embedding".format(E))
                    continue

                pm, _, km = Reg.correlate_retrieval(
                    'embed',
                    'malig' if objective == 'rating' else 'size',
                    verbose=False)
                pr, _, kr = Reg.correlate_retrieval('embed',
                                                    'rating',
                                                    verbose=False)
                valid_epochs[c_idx].append(E)

                Pm[c_idx].append(pm[0])
                Km[c_idx].append(km[0])
                Pr[c_idx].append(pr[0])
                Kr[c_idx].append(kr[0])
                PmStd[c_idx].append(pm[1])
                KmStd[c_idx].append(km[1])
                PrStd[c_idx].append(pr[1])
                KrStd[c_idx].append(kr[1])

            Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0)
            Km[c_idx] = np.expand_dims(Km[c_idx], axis=0)
            Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0)
            Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0)
            PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0)
            KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0)
            PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0)
            KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0)

    else:
        assert False
        for E in epochs:
            Ret = Retriever(title='', dset='')
            if cross_validation:
                embed_source = [
                    Embed(run + 'c{}'.format(c), E, dset)
                    for c in range(n_groups)
                ]
            else:
                embed_source = Embed(run, E, dset)
            Ret.load_embedding(embed_source)

            prec, prec_b, prec_m = [], [], []
            Ret.fit(np.max(NN), metric=metric)
            for N in NN:
                p, pm, pb = Ret.evaluate_precision(n=N)
                prec.append(p)
                prec_b.append(pb)
                prec_m.append(pm)
            Prec.append(np.array(prec))
            Prec_b.append(np.array(prec_b))
            Prec_m.append(np.array(prec_m))

    merged_epochs = merge_epochs(valid_epochs,
                                 min_element=max(n_groups - 1, 1))
    Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs)
    Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs)
    Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs)
    Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs)
    PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs)
    KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs)
    PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs)
    KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs)

    return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze(
        KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze(
            KrStd), np.array(merged_epochs)
Пример #19
0
            run=run,
            net_type=net_type,
            dset=dset,
            metric=metric,
            epochs=epochs,
            cross_validation=True)

        data[run_id, 0] = acc
        data[run_id, 1] = prec
        data[run_id, 2] = index

        dataStd[run_id, 0] = acc_std
        dataStd[run_id, 1] = prec_std
        dataStd[run_id, 2] = index_std

        Embed = FileManager.Embed(net_type)
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in configurations
        ]

        pm, pm_std, km, km_std, pr, pr_std, kr, kr_std, _ = eval_correlation(
            embed_source,
            metric=metric,
            rating_metric='euclidean',
            rating_norm=rating_norm,
            epochs=epochs)

        data[run_id, 3] = pm
        data[run_id, 4] = pr
        #data[run_id, 5] = km
        #data[run_id, 6] = kr
Пример #20
0
    for conf in [1]:  # range(n_groups):
        run = run_id + 'c{}'.format(conf)
        if True:
            print("Predicting Rating for " + run)
            PredRating = PredictRating(pooling=pooling)
            PredRating.load_dataset(data_subset_id=DataSubSet,
                                    full=full,
                                    include_unknown=include_unknown,
                                    size=128,
                                    rating_scale=rating_scale,
                                    configuration=conf)
            preds = []
            valid_epochs = []
            for e in epochs:
                WeightsFile = FileManager.Weights('dirR').name(run, epoch=e)
                PredFile = FileManager.Pred(type='rating', pre='dirR')
                out_file = PredFile(run=run, dset=post)

                data, out_filename = PredRating.predict_rating(
                    WeightsFile, out_file)
                images_test, pred, meta_test, classes_test, labels_test, masks_test = data
                preds.append(np.expand_dims(pred, axis=0))
            preds = np.concatenate(preds, axis=0)
            pickle.dump((preds, np.array(epochs), meta_test, images_test,
                         classes_test, labels_test, masks_test),
                        open(out_filename, 'bw'))
        else:
            print("Predicting Malignancy for " + run)
            PredMal = PredictMal(pooling=pooling)
            for e in epochs:
Пример #21
0
if __name__ == "__main__":
    #
    # Current Metrics:
    #   'chebyshev'
    #   'euclidean'
    #   'cosine'
    #   'corrlation'
    #
    # To evaluate similarity of two Distance-Metrices:
    #   Kendall tau distance
    #   Spearman's rank correlation
    #   Distance Correlation
    from Network import FileManager

    Embed = FileManager.Embed('siam')

    Reg = RatingCorrelator(Embed(run='064X',epoch=30,dset='Valid'))

    Reg.evaluate_embed_distance_matrix(method='euclidean')

    Reg.evaluate_rating_space()
    Reg.evaluate_rating_distance_matrix(method='euclidean')

    Reg.linear_regression()
    Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=True)
    #Reg.scatter('malig', 'rating', yMethod='euclidean', sub=True)
    #Reg.scatter('embed', 'malig', sub=True)
    #Reg.malig_regression(method='euclidean')

    Reg.correlate('malig', 'rating')
# ===================

inp_size = 144
net_size = 128
out_size = 128
input_shape = (net_size, net_size, 1)
res     = 'Legacy'
sample  = 'Normal' #'UniformNC'

# 0     Test
# 1     Validation
# 2     Training
DataSubSet = 1
dsets = ['Test', 'Valid', 'Train']

Weights = FileManager.Weights('siam')

wRuns = ['078X']
wEpchs= [24]

run = wRuns[0]
epoch = wEpchs[0]

# Load Data
# =================

images, labels, masks, meta = \
                    prepare_data(load_nodule_dataset(size=inp_size, res=res, sample=sample)[DataSubSet],
                                 categorize=False,
                                 reshuffle=False,
                                 return_meta=True,