Exemplo n.º 1
0
def predict_RF(train_pats, test_pats, fscores=None, do_plot_predictions=False,
               stratified=False):
    """
    Predict tumor voxels for given test patients.

    Input:
        train_pats -- list of patient IDs used for training a model.
        test_pats -- list of patient IDs used for testing a model.
        fscores -- An opened output file to which we write the results.
    """
    xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients(train_pats,
                                                                   stratified)

    if stratified:
        # Class frequencies in the whole dataset
        class_freqs = dp.class_counts / float(sum(dp.class_counts))
        print "Class frequencies:", class_freqs*100
        # Class frequencies in the sample
        sample_counts = np.histogram(ytr, range(6))[0]
        sample_freqs = sample_counts / float(sum(sample_counts))
        print "Sample frequencies:", sample_freqs*100
        weights = np.ones(len(ytr))
        for i in range(5):
            weights[ytr==i] = class_freqs[i] / sample_freqs[i]
    else:
        weights = None
    model = train_RF_model(xtr, ytr, n_trees=30, sample_weight=weights)

    print "\n----------------------------------\n"

    yte = np.zeros(0)
    predte = np.zeros(0)
    patient_idxs_te = [0]
    print "Test users:"
    # Iterate over test users
    for te_idx, te_pat in enumerate(test_pats):
        print "Test patient number %d" % (te_idx+1)
        x, y, coord, dim = dp.load_patient(te_pat, n_voxels=None)

        if do_plot_predictions:
            pif = os.path.join('plots', 'pat%d_slices_0_RF.png' % te_pat)
        else:
            pif = None
        pred = predict_and_evaluate(
                model, x, y, coord=coord, dim_list=[dim], plot_confmat=False,
                ret_probs=False, patient_idxs=None,
                pred_img_fname=pif)
        #pred = np.argmax(pred_probs_te, axis=1)

        yte = np.concatenate((yte, y))
        patient_idxs_te.append(len(yte))
        predte = np.concatenate((predte, pred))
        '''
        for i in range(1):
            xlabel_te = dp.extract_label_features(coordte, dims_te, pred_probs_te,
                                                  patient_idxs_te)
            smoothed_pred = np.argmax(xlabel_te, axis=1)
            dice_scores(yte, smoothed_pred, patient_idxs=patient_idxs_te,
                        label='Test smoothed dice scores (iteration %d):' % (i+1))
    
            xte2 = np.hstack((xte, xlabel_te))
            pred_probs_te = predict_and_evaluate(
                    model2, xte2, yte, coord=coordte, dim_list=dims_te, pred_fname=None,
                    plot_confmat=False, ret_probs=True, patient_idxs=patient_idxs_te,
                    pred_img_fname=os.path.join('plots', 'pat%d_slices_%d.png' % (test_pats[0], i+1)))
        '''

    print "\nOverall confusion matrix:"
    cm = confusion_matrix(yte, predte)
    print cm

    dice_scores(yte, predte, patient_idxs=patient_idxs_te,
                label='Overall dice scores (RF):', fscores=fscores)
Exemplo n.º 2
0
def predict_RF(train_pats,
               test_pats,
               fscores=None,
               do_plot_predictions=False,
               stratified=False):
    """
    Predict tumor voxels for given test patients.

    Input:
        train_pats -- list of patient IDs used for training a model.
        test_pats -- list of patient IDs used for testing a model.
        fscores -- An opened output file to which we write the results.
    """
    xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients(
        train_pats, stratified)

    if stratified:
        # Class frequencies in the whole dataset
        class_freqs = dp.class_counts / float(sum(dp.class_counts))
        print "Class frequencies:", class_freqs * 100
        # Class frequencies in the sample
        sample_counts = np.histogram(ytr, range(6))[0]
        sample_freqs = sample_counts / float(sum(sample_counts))
        print "Sample frequencies:", sample_freqs * 100
        weights = np.ones(len(ytr))
        for i in range(5):
            weights[ytr == i] = class_freqs[i] / sample_freqs[i]
    else:
        weights = None
    model = train_RF_model(xtr, ytr, n_trees=30, sample_weight=weights)

    print "\n----------------------------------\n"

    yte = np.zeros(0)
    predte = np.zeros(0)
    patient_idxs_te = [0]
    print "Test users:"
    # Iterate over test users
    for te_idx, te_pat in enumerate(test_pats):
        print "Test patient number %d" % (te_idx + 1)
        x, y, coord, dim = dp.load_patient(te_pat, n_voxels=None)

        if do_plot_predictions:
            pif = os.path.join('plots', 'pat%d_slices_0_RF.png' % te_pat)
        else:
            pif = None
        pred = predict_and_evaluate(model,
                                    x,
                                    y,
                                    coord=coord,
                                    dim_list=[dim],
                                    plot_confmat=False,
                                    ret_probs=False,
                                    patient_idxs=None,
                                    pred_img_fname=pif)
        #pred = np.argmax(pred_probs_te, axis=1)

        yte = np.concatenate((yte, y))
        patient_idxs_te.append(len(yte))
        predte = np.concatenate((predte, pred))
        '''
        for i in range(1):
            xlabel_te = dp.extract_label_features(coordte, dims_te, pred_probs_te,
                                                  patient_idxs_te)
            smoothed_pred = np.argmax(xlabel_te, axis=1)
            dice_scores(yte, smoothed_pred, patient_idxs=patient_idxs_te,
                        label='Test smoothed dice scores (iteration %d):' % (i+1))
    
            xte2 = np.hstack((xte, xlabel_te))
            pred_probs_te = predict_and_evaluate(
                    model2, xte2, yte, coord=coordte, dim_list=dims_te, pred_fname=None,
                    plot_confmat=False, ret_probs=True, patient_idxs=patient_idxs_te,
                    pred_img_fname=os.path.join('plots', 'pat%d_slices_%d.png' % (test_pats[0], i+1)))
        '''

    print "\nOverall confusion matrix:"
    cm = confusion_matrix(yte, predte)
    print cm

    dice_scores(yte,
                predte,
                patient_idxs=patient_idxs_te,
                label='Overall dice scores (RF):',
                fscores=fscores)
Exemplo n.º 3
0
def predict_two_stage(train_pats, test_pats, fscores=None,
                      do_plot_predictions=False, stratified=False, n_trees=30,
                      dev_pats=[], use_mrf=True, resolution=1, n_voxels=30000,
                      mat_dir=None, fresh_models=False, load_hog=False):
    """
    Predict tumor voxels for given test patients.

    Input:
        train_pats -- list of patient IDs used for training a model.
        test_pats -- list of patient IDs used for testing a model.
        fscores -- An opened output file to which we write the results.
    """
    model_str = ""
    if resolution != 1:
        model_str += '_res%d' % resolution
    if load_hog:
        model_str += '_hog'
    model1_fname = os.path.join('models', 'model1_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
                                (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))
    model2_fname = os.path.join('models', 'model2_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
                                (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))

    # Load models if available
    if not fresh_models and os.path.isfile(model1_fname) and \
            os.path.isfile(model2_fname):
        model1 = joblib.load(model1_fname)
        model2 = joblib.load(model2_fname)
        min_voxels = 3000
    else:
        xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients(
                train_pats, stratified, resolution=resolution,
                n_voxels=n_voxels, load_hog=load_hog)

        # Make all tumor labels equal to 1 and train the first model
        ytr1 = np.array(ytr, copy=True)
        ytr1[ytr1>0] = 1
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = [dp.class_counts[0], sum(dp.class_counts[1:])]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 1):", class_freqs*100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, [0,1,5])[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs*100
            weights = np.ones(len(ytr))
            for i in range(2):
                weights[ytr==i] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model1 = train_RF_model(xtr, ytr1, n_trees=n_trees,
                                sample_weight=weights, fname=model1_fname)

        # Compute minimum number of tumor voxels in a train patient
        min_voxels = 3000#get_min_voxels(ytr, patient_idxs_tr)
        print "Minimum number of voxels in a tumor: %d" % min_voxels

        # Train the second model to separate tumor classes
        ok_idxs = ytr > 0
        xtr2 = np.asarray(xtr[ok_idxs,:])
        ytr2 = np.asarray(ytr[ok_idxs])
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = dp.class_counts[1:]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 2):", class_freqs*100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, range(1,6))[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs*100
            weights = np.ones(len(ytr2))
            for i in range(4):
                weights[ytr2==i+1] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model2 = train_RF_model(xtr2, ytr2, n_trees=n_trees,
                                sample_weight=weights, fname=model2_fname)

    print "\n----------------------------------\n"

    if len(dev_pats) > 0:
        best_potential = optimize_potential(
                dev_pats, model1, model2, stratified, fscores,
                do_plot_predictions, resolution=resolution, load_hog=load_hog)
        best_radius = optimize_closing(dev_pats, model1, stratified, fscores,
                                       resolution=resolution, load_hog=load_hog)
        best_th = optimize_threshold1(dev_pats, model1, stratified, fscores,
                                      resolution, load_hog, best_radius)
    else:
        best_radius = 6
        best_th = 0.6
        best_potential = np.array([[0.04, 0.03555556, 0.03555556, 0.02222222],
                                   [0.03555556, 0.04, 0.02222222, 0.],
                                   [0.03555556, 0.02222222, 0.04, 0.03555556],
                                   [0.02222222, 0., 0.03555556, 0.04]])


    yte = np.zeros(0)
    predte = np.zeros(0)
    predte_no_pp = np.zeros(0)
    patient_idxs_te = [0]
    print "Test users:"
    # Iterate over test users
    for te_idx, te_pat in enumerate(test_pats):
        print "Test patient number %d" % (te_idx+1)
        x, y, coord, dim = dp.load_patient(te_pat, n_voxels=None,
                                           resolution=resolution,
                                           load_hog=load_hog)

        #pred = model1.predict(x)
        pred_probs = model1.predict_proba(x)
        #pred = np.argmax(pred_probs, axis=1)
        pred = pred_probs[:,1] >= best_th
        # If the predicted tumor is too small set the most probable tumor
        # voxels to one
        if sum(pred > 0) < min_voxels:
            print "Patient having too few voxels (%d < %d)" % (sum(pred > 0), min_voxels)
            pred = np.zeros(pred.shape)
            new_idxs = np.argsort(pred_probs[:,1])[-min_voxels:]
            pred[new_idxs] = 1
        pp_pred = dp.post_process(coord, dim, pred, binary_closing=True,
                                  radius=best_radius)

        tumor_idxs = pp_pred > 0
        if sum(tumor_idxs) > 0:
            pred_probs2 = model2.predict_proba(x[tumor_idxs,:])
            pred2 = np.argmax(pred_probs2, axis=1) + 1
            pp_pred[tumor_idxs] = pred2

        pp_pred15 = np.array(pp_pred)
        print "\nConfusion matrix:"
        cm = confusion_matrix(y, pp_pred15)
        print cm
        dice_scores(y, pp_pred15, label='Dice scores:')

        if use_mrf:
            # MRF post processing
            if sum(tumor_idxs) > 0:
                edges = dp.create_graph(coord[tumor_idxs,:])
                pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges,
                                             potential=best_potential) + 1
            method = 'MRF'
        else:
            # Closing post processing
            if sum(tumor_idxs) > 0:
                pp_pred[tumor_idxs] = dp.post_process(coord[tumor_idxs,:], dim,
                                                      pred2, remove_components=False,
                                                      radius=best_radius)
            method = 'closing'

        print "\nConfusion matrix (pp):"
        cm = confusion_matrix(y, pp_pred)
        print cm

        yte = np.concatenate((yte, y))
        patient_idxs_te.append(len(yte))
        predte = np.concatenate((predte, pp_pred))
        predte_no_pp = np.concatenate((predte_no_pp, pp_pred15))

        dice_scores(y, pp_pred, label='Dice scores (pp):')

        if do_plot_predictions:
            # Plot the patient
            pif = os.path.join('results', 'pat%d_slices_2S_%s.png' % (te_pat, method))
            if mat_dir is not None:
                fmat = os.path.join(mat_dir, 'pat%d.mat' % te_pat)
            else:
                fmat = None
            pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif,
                                fmat=fmat)
            #if pred_fname is not None:
            #    extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname)

    print "\nOverall confusion matrix:"
    cm = confusion_matrix(yte, predte)
    print cm

    dice_scores(yte, predte_no_pp, patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage, no pp):', fscores=fscores)

    dice_scores(yte, predte, patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage):', fscores=fscores)
Exemplo n.º 4
0
def predict_two_stage(train_pats,
                      test_pats,
                      fscores=None,
                      do_plot_predictions=False,
                      stratified=False,
                      n_trees=30,
                      dev_pats=[],
                      use_mrf=True,
                      resolution=1,
                      n_voxels=30000,
                      mat_dir=None,
                      fresh_models=False,
                      load_hog=False):
    """
    Predict tumor voxels for given test patients.

    Input:
        train_pats -- list of patient IDs used for training a model.
        test_pats -- list of patient IDs used for testing a model.
        fscores -- An opened output file to which we write the results.
    """
    model_str = ""
    if resolution != 1:
        model_str += '_res%d' % resolution
    if load_hog:
        model_str += '_hog'
    model1_fname = os.path.join(
        'models', 'model1_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
        (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))
    model2_fname = os.path.join(
        'models', 'model2_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
        (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))

    # Load models if available
    if not fresh_models and os.path.isfile(model1_fname) and \
            os.path.isfile(model2_fname):
        model1 = joblib.load(model1_fname)
        model2 = joblib.load(model2_fname)
        min_voxels = 3000
    else:
        xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients(
            train_pats,
            stratified,
            resolution=resolution,
            n_voxels=n_voxels,
            load_hog=load_hog)

        # Make all tumor labels equal to 1 and train the first model
        ytr1 = np.array(ytr, copy=True)
        ytr1[ytr1 > 0] = 1
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = [dp.class_counts[0], sum(dp.class_counts[1:])]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 1):", class_freqs * 100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, [0, 1, 5])[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs * 100
            weights = np.ones(len(ytr))
            for i in range(2):
                weights[ytr == i] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model1 = train_RF_model(xtr,
                                ytr1,
                                n_trees=n_trees,
                                sample_weight=weights,
                                fname=model1_fname)

        # Compute minimum number of tumor voxels in a train patient
        min_voxels = 3000  #get_min_voxels(ytr, patient_idxs_tr)
        print "Minimum number of voxels in a tumor: %d" % min_voxels

        # Train the second model to separate tumor classes
        ok_idxs = ytr > 0
        xtr2 = np.asarray(xtr[ok_idxs, :])
        ytr2 = np.asarray(ytr[ok_idxs])
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = dp.class_counts[1:]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 2):", class_freqs * 100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, range(1, 6))[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs * 100
            weights = np.ones(len(ytr2))
            for i in range(4):
                weights[ytr2 == i + 1] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model2 = train_RF_model(xtr2,
                                ytr2,
                                n_trees=n_trees,
                                sample_weight=weights,
                                fname=model2_fname)

    print "\n----------------------------------\n"

    if len(dev_pats) > 0:
        best_potential = optimize_potential(dev_pats,
                                            model1,
                                            model2,
                                            stratified,
                                            fscores,
                                            do_plot_predictions,
                                            resolution=resolution,
                                            load_hog=load_hog)
        best_radius = optimize_closing(dev_pats,
                                       model1,
                                       stratified,
                                       fscores,
                                       resolution=resolution,
                                       load_hog=load_hog)
        best_th = optimize_threshold1(dev_pats, model1, stratified, fscores,
                                      resolution, load_hog, best_radius)
    else:
        best_radius = 6
        best_th = 0.6
        best_potential = np.array([[0.04, 0.03555556, 0.03555556, 0.02222222],
                                   [0.03555556, 0.04, 0.02222222, 0.],
                                   [0.03555556, 0.02222222, 0.04, 0.03555556],
                                   [0.02222222, 0., 0.03555556, 0.04]])

    yte = np.zeros(0)
    predte = np.zeros(0)
    predte_no_pp = np.zeros(0)
    patient_idxs_te = [0]
    print "Test users:"
    # Iterate over test users
    for te_idx, te_pat in enumerate(test_pats):
        print "Test patient number %d" % (te_idx + 1)
        x, y, coord, dim = dp.load_patient(te_pat,
                                           n_voxels=None,
                                           resolution=resolution,
                                           load_hog=load_hog)

        #pred = model1.predict(x)
        pred_probs = model1.predict_proba(x)
        #pred = np.argmax(pred_probs, axis=1)
        pred = pred_probs[:, 1] >= best_th
        # If the predicted tumor is too small set the most probable tumor
        # voxels to one
        if sum(pred > 0) < min_voxels:
            print "Patient having too few voxels (%d < %d)" % (sum(pred > 0),
                                                               min_voxels)
            pred = np.zeros(pred.shape)
            new_idxs = np.argsort(pred_probs[:, 1])[-min_voxels:]
            pred[new_idxs] = 1
        pp_pred = dp.post_process(coord,
                                  dim,
                                  pred,
                                  binary_closing=True,
                                  radius=best_radius)

        tumor_idxs = pp_pred > 0
        if sum(tumor_idxs) > 0:
            pred_probs2 = model2.predict_proba(x[tumor_idxs, :])
            pred2 = np.argmax(pred_probs2, axis=1) + 1
            pp_pred[tumor_idxs] = pred2

        pp_pred15 = np.array(pp_pred)
        print "\nConfusion matrix:"
        cm = confusion_matrix(y, pp_pred15)
        print cm
        dice_scores(y, pp_pred15, label='Dice scores:')

        if use_mrf:
            # MRF post processing
            if sum(tumor_idxs) > 0:
                edges = dp.create_graph(coord[tumor_idxs, :])
                pp_pred[tumor_idxs] = dp.mrf(
                    pred_probs2, edges, potential=best_potential) + 1
            method = 'MRF'
        else:
            # Closing post processing
            if sum(tumor_idxs) > 0:
                pp_pred[tumor_idxs] = dp.post_process(coord[tumor_idxs, :],
                                                      dim,
                                                      pred2,
                                                      remove_components=False,
                                                      radius=best_radius)
            method = 'closing'

        print "\nConfusion matrix (pp):"
        cm = confusion_matrix(y, pp_pred)
        print cm

        yte = np.concatenate((yte, y))
        patient_idxs_te.append(len(yte))
        predte = np.concatenate((predte, pp_pred))
        predte_no_pp = np.concatenate((predte_no_pp, pp_pred15))

        dice_scores(y, pp_pred, label='Dice scores (pp):')

        if do_plot_predictions:
            # Plot the patient
            pif = os.path.join('results',
                               'pat%d_slices_2S_%s.png' % (te_pat, method))
            if mat_dir is not None:
                fmat = os.path.join(mat_dir, 'pat%d.mat' % te_pat)
            else:
                fmat = None
            pp.plot_predictions(coord,
                                dim,
                                pp_pred15,
                                y,
                                pp_pred,
                                fname=pif,
                                fmat=fmat)
            #if pred_fname is not None:
            #    extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname)

    print "\nOverall confusion matrix:"
    cm = confusion_matrix(yte, predte)
    print cm

    dice_scores(yte,
                predte_no_pp,
                patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage, no pp):',
                fscores=fscores)

    dice_scores(yte,
                predte,
                patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage):',
                fscores=fscores)