Exemplo n.º 1
0
    test_pred_full_volume_softmax1 = np.exp(test_pred_full_volume[
        0, 1, :, :, :]) / (np.exp(test_pred_full_volume[0, 0, :, :, :]) +
                           np.exp(test_pred_full_volume[0, 1, :, :, :]))

    uniqueStatsPath = '/raida/apezeshk/lung_dicom_dir/p0614/20000101/s30983/uniqueStats_p0614_20000101_s30983.mat'
    uniqueStatsData = sio.loadmat(uniqueStatsPath)
    noduleMask = uniqueStatsData['allMaxRadiologistMsk']
    if chopVolumeFlag == 1:
        noduleMaskCrop = noduleMask[300:512, 256:500, 200:300]
    else:
        noduleMaskCrop = noduleMask
#    noduleMaskResize = np.zeros((test_pred_full_volume_softmax0.shape[0], test_pred_full_volume_softmax0.shape[1], test_pred_full_volume_softmax0.shape[2]))
#    for i in range(0, test_pred_full_volume_softmax0.shape[2]):
#        noduleMaskResize[:,:,i] = scipy.misc.imresize(noduleMaskCrop[:,:,i], (test_pred_full_volume_softmax0.shape[0], test_pred_full_volume_softmax0.shape[1]))
    dsfactor = [
        w / float(g) for w, g in zip(test_pred_full_volume_softmax0.shape,
                                     noduleMaskCrop.shape)
    ]
    noduleMaskResize = nd.interpolation.zoom(noduleMaskCrop.astype('float'),
                                             zoom=dsfactor)

    vol_scores0 = test_pred_full_volume_softmax0.flatten()
    vol_scores1 = test_pred_full_volume_softmax1.flatten()
    vol_labels = noduleMaskResize.flatten()
    vol_labels = vol_labels > 0.5
    vol_labels = vol_labels.astype('int')

    vol_scores_all = np.hstack((vol_scores0.reshape(
        (len(vol_scores0), 1)), vol_scores1.reshape((len(vol_scores1), 1))))
    test_AUC, test_varAUC = SupportFuncs.Pred2AUC(vol_scores_all, vol_labels)
Exemplo n.º 2
0
def Main(inputParamsConfig):
    #input_shape,learning_rate, momentum, num_epochs, batchsize, data_path, train_set_size, test_set_size,
#         positive_set_ratio, dropout,nonlinearityToUse,augmentation
    experiment_id = str(time.strftime("%Y%m%d%H%M%S"))
    input_shape = inputParamsConfig['input_shape']
    learning_rate = inputParamsConfig['learning_rate']
    momentum = inputParamsConfig['momentum']
    num_epochs = inputParamsConfig['num_epochs']
    batch_size = inputParamsConfig['batch_size']
    noduleCaseFilterParams = inputParamsConfig['noduleCaseFilterParams']
    data_path = inputParamsConfig['data_path']
    train_set_size = inputParamsConfig['train_set_size']
    test_set_size = inputParamsConfig['test_set_size']
    positive_set_ratio = inputParamsConfig['positive_set_ratio']
    dropout = inputParamsConfig['dropout']
    nonlinearityToUse = inputParamsConfig['nonlinearityToUse']
    nonlinearityToUseFC = inputParamsConfig['nonlinearityToUseFC']
    numberOfLayers = inputParamsConfig['numberOfLayers']
    numberOfFCUnits = inputParamsConfig['numberOfFCUnits']
    numberOfFCLayers = inputParamsConfig['numberOfFCLayers']
    numberOfConvFilts = inputParamsConfig['numberOfConvFilts']
    filterSizeTable = inputParamsConfig['filterSizeTable']
    augmentationRegularFlag = inputParamsConfig['augmentationRegularFlag']
    augmentationTransformFlag = inputParamsConfig['augmentationTransformFlag']
    weightInitToUse = inputParamsConfig['weightInitToUse']
    lrDecayFlag = inputParamsConfig['lrDecayFlag']
    biasInitVal = inputParamsConfig['biasInitVal']
    fp_per_case = inputParamsConfig['fp_per_case']
    phase = inputParamsConfig['phase']
    discrim_shape = inputParamsConfigLocal['discrim_shape']
    
    pos_test_size = inputParamsConfig['pos_test_size']
    fp_model_to_use = inputParamsConfig['fp_model_to_use']
    
    print(
        " Learning rate: '%s' , momentum: '%s',  num_epochs: '%s'  ,batch size: '%s'  ,data_path: '%s',Train Set Size: '%s' ,Test set Size: '%s' ,Positive set Ratio '%s' , dropout: '%s', nonlinearityToUse: '%s',augmentationRegularFlag: '%s',augmentationTransformFlag: '%s',number of layers: '%s', pos_test_size: '%s'" % (
        str(learning_rate), str(momentum), str(num_epochs), str(batch_size), data_path, str(train_set_size),
        str(test_set_size), str(positive_set_ratio), str(dropout), str(nonlinearityToUse), str(augmentationRegularFlag),
        str(augmentationTransformFlag), str(numberOfLayers), str(pos_test_size)))
    print(" Phase: '%s', Num FC Layers: '%s', Num FC Units: '%s', Number of ConvFilters: '%s'" % (str(phase), str(numberOfFCLayers), str(numberOfFCUnits), str(numberOfConvFilts)))
    num_epochs=int(num_epochs)
    batch_size=int(batch_size)
    
    if phase == 'screen':
        if noduleCaseFilterParams == '':
            if fp_per_case == '0': #make different filenames per fp_per_case s.t. original & supplemented train set are maintained
                training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)+'.hdf5')
            else:
                training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)
                    +'_fp' + fp_model_to_use + '_' + fp_per_case +'.hdf5')
        else:
            if fp_per_case == '0': #make different filenames per fp_per_case s.t. original & supplemented train set are maintained
                training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)+'_filt.hdf5')
            else:
                training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)
                    +'_fp' + fp_model_to_use + '_' + fp_per_case +'_filt.hdf5')
    elif phase == 'discrim':
        if noduleCaseFilterParams == '':
            training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)
                +'_discrim' + discrim_shape.replace(' ','').replace(',','') + '_' + fp_model_to_use + '.hdf5')
        else: 
            training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)
                +'_discrim' + discrim_shape.replace(' ','').replace(',','') + '_' + fp_model_to_use + '_filt.hdf5')
    # We save the created train and test set of size X and posetive ration r to reduce the overhead in running the pipline
    if os.path.exists(training_filename):
        print ("Training file already exists, reading it...")
#        with h5py.File(training_filename, 'r') as data_set:
#            tmp_train_set = data_set.get('train_set')  # Reading list of patients and test file paths
#            train_set = np.array(tmp_train_set)
#            tmp_train_label = data_set.get('train_label')  # Reading list of patients and test file paths
#            train_label = np.array(tmp_train_label)
#            tmp_test_set = data_set.get('test_set')  # Reading list of patients and test file paths
#            test_set = np.array(tmp_test_set)
#            tmp_test_label = data_set.get('test_label')  # Reading list of patients and test file paths
#            test_label = np.array(tmp_test_label)
#            tmp_val_set = data_set.get('val_set')  # Reading list of patients and test file paths
#            val_set = np.array(tmp_val_set)
#            tmp_val_label = data_set.get('val_label')  # Reading list of patients and test file paths
#            val_label = np.array(tmp_val_label)
#            tr_len_pos = len(np.where(train_label==1)[0])
#            tr_len_neg = len(np.where(train_label==0)[0])
    else:
        inputParamsLoadData = {}
        inputParamsLoadData['data_path'] = data_path
        inputParamsLoadData['input_shape'] = input_shape
        inputParamsLoadData['train_set_size'] = int(train_set_size)
        inputParamsLoadData['test_set_size'] = int(test_set_size)
        inputParamsLoadData['augmentationRegularFlag'] = int(augmentationRegularFlag)
        inputParamsLoadData['augmentationTransformFlag'] = int(augmentationTransformFlag)
        inputParamsLoadData['fp_per_case'] = int(fp_per_case)
        inputParamsLoadData['pos_test_size'] = int(pos_test_size)
        inputParamsLoadData['positive_set_ratio'] = float(positive_set_ratio)
        inputParamsLoadData['fp_model_to_use'] = fp_model_to_use
        inputParamsLoadData['phase'] = phase
        inputParamsLoadData['discrim_shape'] = discrim_shape
        inputParamsLoadData['noduleCaseFilterParams'] = noduleCaseFilterParams
        inputParamsLoadData['training_filename'] = training_filename
        
        tr_len_pos,tr_len_neg, ts_len_pos,ts_len_neg = SupportFuncs.load_data(inputParamsLoadData)    	    
       # train_set, train_label, test_set, test_label, val_set, val_label = SupportFuncs.load_data(data_path, int(train_set_size),
        #                                                                                      int(test_set_size),
         #                                                                                     int(augmentationFlag),float(positive_set_ratio))
#        with h5py.File(training_filename, 'w') as data_set:
#            #Write the dataset to a h5py file
#            data_set.create_dataset('train_set', data=train_set)
#            data_set.create_dataset('train_label', data=train_label)
#            data_set.create_dataset('test_set', data=test_set)
#            data_set.create_dataset('test_label', data=test_label)
#            data_set.create_dataset('val_set', data=val_set)
#            data_set.create_dataset('val_label', data=val_label)
                                     
    training_file_handle = tables.open_file(training_filename, mode='r')   #file closed after both train/test done                                 
    train_set = training_file_handle.root.train_set
    train_label = training_file_handle.root.train_label
    test_set = training_file_handle.root.test_set
    test_label = training_file_handle.root.test_label
    val_set = training_file_handle.root.val_set
    val_label = training_file_handle.root.val_label

    tr_len_pos = len(np.where(train_label[:]==1)[0]); ts_len_pos = len(np.where(test_label[:]==1)[0])
    tr_len_neg = train_set.shape[0] - tr_len_pos; ts_len_neg = test_set.shape[0] - ts_len_pos
    print("Train set number of positives:" + str(tr_len_pos))
    print("Train set number of negatives:" + str(tr_len_neg))
    if nonlinearityToUse == 'relu':
        nonLinearity = lasagne.nonlinearities.rectify        
    elif nonlinearityToUse == 'tanh':
        nonLinearity = lasagne.nonlinearities.tanh        
    elif nonlinearityToUse == 'sigmoid':
        nonLinearity = lasagne.nonlinearities.sigmoid        
    else:
        raise Exception(
            'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!')

    if nonlinearityToUseFC == 'relu':
        nonLinearityFC = lasagne.nonlinearities.rectify        
    elif nonlinearityToUseFC == 'tanh':
        nonLinearityFC = lasagne.nonlinearities.tanh        
    elif nonlinearityToUseFC == 'sigmoid':
        nonLinearityFC = lasagne.nonlinearities.sigmoid        
    else:
        raise Exception(
            'nonlinearityToUseFC: Unsupported nonlinearity type has been selected for the network, retry with a supported one!')
    
    biasInit = lasagne.init.Constant(biasInitVal) #for relu use biasInit=1 s.t. inputs to relu are positive in beginning

    if weightInitToUse == 'normal': #according to documentation, different gains should be used depending on nonlinearity
        weight_init = lasagne.init.Normal()
    elif weightInitToUse == 'He':
        if nonlinearityToUse=='relu':
            gainToUse = np.sqrt(2)            
        else:
            gainToUse = 1
        
        weight_init = lasagne.init.HeNormal(gain=gainToUse)
    else:
        raise Exception(
            'weightInitToUse: Unsupported weight initialization type has been selected, retry with a supported one!')
            
    if lrDecayFlag==1: #if learning rate should be updated, then it has to be a shared variable
        learning_rate = theano.shared(np.array(learning_rate, dtype=theano.config.floatX))
        decayRate = 0.5
    else:
        learning_rate = float(learning_rate)
            
    dtensor5 = T.TensorType('float32', (False,) * 5)
    input_var = dtensor5('inputs')
    target_var = T.ivector('targets')

    inputParamsNetwork = dict(n_layer=numberOfLayers, shape=input_shape,dropout=float(dropout), nonLinearity=nonLinearity,
                              biasInit = biasInit, filterSizeTable = filterSizeTable, numberOfFCLayers=numberOfFCLayers,
                              numberOfFCUnits=numberOfFCUnits, numberOfConvFilts=numberOfConvFilts, 
                              nonLinearityFC=nonLinearityFC)
    network = Build_3dcnn(weight_init, inputParamsNetwork, input_var)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # loss=np.mean(loss)
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=learning_rate, momentum=float(momentum))

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    # test_loss = test_loss.mean()
    test_loss = test_loss.mean()

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)  # mode='DebugMode'

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_prediction])  # ,mode='DebugMode')

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(int(num_epochs)):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in Iterate_minibatches(train_set, train_label, int(batch_size), shuffle=True):
            inputs, targets = batch
            inputs = np.float32(inputs)
            train_err += train_fn(inputs, targets)
            train_batches += 1
            
        print('learning_rate = ' + str(learning_rate.get_value()))
        if lrDecayFlag == 1: #only update learning_rate if lrDecayFlag==1
            if ((epoch+1) % 12) == 0:
                learning_rate.set_value(decayRate * learning_rate.get_value())
            
        

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        all_val_pred = np.empty((0, 2),
                                dtype=float)  # initialize; array n_samplesx2 for the 2 class predictions for all validation samples
        all_val_labels = np.empty((0, 1),
                                  dtype=float)  # initialize; array n_samplesx1 for labels of all validation samples
        for batch in Iterate_minibatches(val_set, val_label, int(batch_size), shuffle=False):
            inputs, targets = batch
            inputs = np.float32(inputs)
            err, acc, val_pred = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1
            all_val_pred = np.vstack((all_val_pred, val_pred))
            all_val_labels = np.append(all_val_labels, targets)

        val_AUC, val_varAUC = SupportFuncs.Pred2AUC(all_val_pred, all_val_labels)
        # Then we print the results for this epoch:

        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))
        print("  validation AUC: " + str(val_AUC) + ", std: " + str(np.sqrt(val_varAUC)))
        epoch_det[epoch + 1] = {'all_val_accuracy': (val_acc / val_batches), "all_val_loss": (val_err / val_batches),
                                "training_loss": (train_err / train_batches)}
        all_val_accuracy.append(val_acc / val_batches)
        all_val_loss.append(val_err / val_batches)
        all_val_AUC.append(val_AUC)
        training_loss.append(train_err / train_batches)

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    all_test_pred = np.empty((0, 2),
                             dtype=float)  # initialize; array n_samplesx2 for the 2 class predictions for all test samples
    all_test_labels = np.empty((0, 1), dtype=float)  # initialize; array n_samplesx1 for labels of all test samples
    for batch in Iterate_minibatches(test_set, test_label, int(batch_size), shuffle=False):
        inputs, targets = batch
        inputs = np.float32(inputs)
        err, acc, test_pred = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
        all_test_pred = np.vstack((all_test_pred, test_pred))
        all_test_labels = np.append(all_test_labels, targets)

    test_AUC, test_varAUC = SupportFuncs.Pred2AUC(all_test_pred, all_test_labels)
    
    ##########################################
    training_file_handle.close()
    ##########################################

    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))
    print("test AUC: " + str(test_AUC) + ", std: " + str(np.sqrt(test_varAUC)))
    # Optionally, you could now dump the network weights to a file like this:
    # np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
    #
    # And load them again later on like this:
    tmp = input_shape.replace(' ','') #get rid of space and comma in input shape
    tmp = tmp.replace(',','')    
    filenameModel = os.path.join(model_path, 'cnn_' + tmp + '_' + experiment_id)
    filenameSamples = filenameModel + '_samples'
    np.savez(filenameModel, *lasagne.layers.get_all_param_values(network))
    np.savez(filenameSamples, inputs=inputs, targets=targets, err=err, acc=acc, training_loss=training_loss,
             test_pred=test_pred, all_test_pred=all_test_pred,all_test_labels=all_test_labels,
             inputParamsConfig=inputParamsConfig, tr_len_pos = tr_len_pos, tr_len_neg = tr_len_neg,
             all_val_loss=all_val_loss, all_val_accuracy=all_val_accuracy, test_AUC=test_AUC, test_varAUC=test_varAUC)
    if not os.path.exists('./figures'):
        os.makedirs('./figures')
    fig = plt.figure()
    # plt.plot(training_loss,'r',val_accuracy,'g',all_val_loss,'b')
    plt.plot(training_loss, 'r', label='Training_loss=' + str("%.6f" % training_loss[num_epochs - 1]))
    plt.plot(all_val_loss, 'r--', label='Val_loss=' + str("%.3f" % all_val_loss[num_epochs - 1]))
    plt.plot(all_val_accuracy, 'g', label='Val_accuracy=' + str("%.3f" % all_val_accuracy[num_epochs - 1]))
    plt.annotate(str("%.3f" % all_val_accuracy[num_epochs - 1]), xy=(num_epochs - 1, all_val_accuracy[num_epochs - 1]),
                 xytext=(num_epochs - 70, 0.6),
                 arrowprops=dict(facecolor='black', shrink=0.05))
    plt.annotate(str("%.6f" % training_loss[num_epochs - 1]), xy=(num_epochs - 1, training_loss[num_epochs - 1]),
                 xytext=(num_epochs - 70, 0.3),
                 arrowprops=dict(facecolor='black', shrink=0.05))
    plt.ylabel('Training loss and Validation accuracy')
    plt.xlabel('Number of Epochs')
    plt.title('Accuracy and Loss Changes')
    plt.legend(fontsize=13, loc=10)
    try:
        fig.savefig(os.path.join(figures_path, experiment_id))  # save the figure to file
    except:
        Make_sure_path_exists(figures_path)

    plt.close(fig)
    plt.show()
    # save_model(result_path, experiment_id, str(input_shape), n_layers, int(batchsize), num_epochs, momentum, learning_rate, 2
    #            , len(train_set), len(test_set), (test_err / test_batches), (test_acc / test_batches), test_AUC[0],
    #            np.sqrt(test_varAUC),augmentation)

    plt.close(fig)
    # plt.show()
    Save_model(result_path, experiment_id, str(input_shape), numberOfLayers, int(batch_size), num_epochs, momentum,
               inputParamsConfig['learning_rate'], 2
               , tr_len_pos+tr_len_neg, ts_len_pos+ts_len_neg, (test_err / test_batches), (test_acc / test_batches), test_AUC[0],
               np.sqrt(test_varAUC), augmentationRegularFlag, augmentationTransformFlag, nonlinearityToUse, dropout,tr_len_pos,tr_len_neg)
Exemplo n.º 3
0
                        noduleMaskCrop.astype('float'), zoom=dsfactor)

                    noduleMaskResize = noduleMaskResize > 0.5
                    noduleMaskResize = noduleMaskResize.astype('int')
                    print len(np.where(noduleMaskResize == 1)[0])
                    vol_scores_zero = test_pred_full_volume_softmax0.flatten()
                    vol_scores_one = test_pred_full_volume_softmax1.flatten()
                    vol_scores_zero = vol_scores_zero.reshape(
                        len(vol_scores_zero), 1)
                    vol_scores_one = vol_scores_one.reshape(
                        len(vol_scores_one), 1)
                    if case_flag == 1:
                        vol_score = np.concatenate(
                            (vol_scores_zero, vol_scores_one), axis=1)
                        total_vol_score_tmp = np.vstack(
                            (total_vol_score_tmp, vol_score))
                        vol_scores_zoronone = np.zeros
                        vol_labels = noduleMaskResize.flatten()
                        vol_labels_tmp = np.concatenate(
                            (vol_labels_tmp, vol_labels), axis=0)
                    else:
                        total_vol_score_tmp = np.concatenate(
                            (vol_scores_zero, vol_scores_one), axis=1)
                        vol_scores_zoronone = np.zeros
                        vol_labels_tmp = noduleMaskResize.flatten()
                        case_flag = 1

    test_AUC, test_varAUC = SupportFuncs.Pred2AUC(total_vol_score_tmp,
                                                  vol_labels_tmp.astype('int'))
    print test_AUC
Exemplo n.º 4
0
def Main(inputParamsConfig):
    #input_shape,learning_rate, momentum, num_epochs, batchsize, data_path, train_set_size, test_set_size,
    #         positive_set_ratio, dropout,nonlinearityToUse,augmentation
    experiment_id = str(time.strftime("%Y%m%d%H%M%S"))
    input_shape = inputParamsConfig['input_shape']
    learning_rate = inputParamsConfig['learning_rate']
    momentum = inputParamsConfig['momentum']
    num_epochs = inputParamsConfig['num_epochs']
    batch_size = inputParamsConfig['batch_size']
    data_path = inputParamsConfig['data_path']
    train_set_size = inputParamsConfig['train_set_size']
    test_set_size = inputParamsConfig['test_set_size']
    positive_set_ratio = inputParamsConfig['positive_set_ratio']
    dropout = inputParamsConfig['dropout']
    nonlinearityToUse = inputParamsConfig['nonlinearityToUse']
    numberOfLayers = inputParamsConfig['numberOfLayers']
    augmentationFlag = inputParamsConfig['augmentationFlag']

    print(
        " Learning rate: '%s' , momentum: '%s',  num_epochs: '%s'  ,batch size: '%s'  ,data_path: '%s' ,Train Set Size: '%s' ,Test set Size: '%s' ,Positive set Ratio '%s' , dropout: '%s', nonlinearityToUse: '%s',augmentationFlag: '%s',number of layers: '%s'"
        % (str(learning_rate), str(momentum), str(num_epochs), str(batch_size),
           data_path, str(train_set_size), str(test_set_size),
           str(positive_set_ratio), str(dropout), str(nonlinearityToUse),
           str(augmentationFlag), str(numberOfLayers)))
    num_epochs = int(num_epochs)
    batch_size = int(batch_size)
    # We save the created train and test set of size X and posetive ration r to reduce the overhead in running the pipline

    ps = []
    ng = []

    patient_id = []
    with h5py.File(os.path.join('/diskStation/temp/test_500_0.3_28288 .hdf5'),
                   'r') as hf:
        print('List of arrays in this file: \n', hf.keys())
        tmp_test_paths = hf.get(
            'Test_set')  # Reading list of patients and test file paths
        ps = np.array(tmp_test_paths)  #full paths to all positive test patches
        tmp_test_paths = hf.get('neg_test_set')
        ng = np.array(tmp_test_paths)

    # path_to_pos_tmp='/home/shamidian/Summer2016/DeepMed/28288/pos_28288'
    # path_to_neg_tmp='/home/shamidian/Summer2016/DeepMed/28288/neg_smp_0_28288'
    # for item in os.listdir(path_to_pos_tmp):
    #     ps.append(os.path.join(path_to_pos_tmp,item))
    # for items in os.listdir(path_to_neg_tmp):
    #     ng.append(os.path.join(path_to_neg_tmp,items))

    test_set, test_label = SupportFuncs.mat_generate_from_path(ps, ng)

    #
    # train_set, train_label, test_set, test_label, val_set, val_label = SupportFuncs.load_data(data_path, int(train_set_size),
    #                                                                                           int(test_set_size),
    #                                                                                           int(augmentationFlag),float(positive_set_ratio))

    if nonlinearityToUse == 'relu':
        nonLinearity = lasagne.nonlinearities.rectify
    elif nonlinearityToUse == 'tanh':
        nonLinearity = lasagne.nonlinearities.tanh
    elif nonlinearityToUse == 'sigmoid':
        nonLinearity = lasagne.nonlinearities.sigmoid
    else:
        raise Exception(
            'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!'
        )
    dtensor5 = T.TensorType('float32', (False, ) * 5)
    input_var = dtensor5('inputs')
    target_var = T.ivector('targets')

    inputParamsNetwork = dict(n_layer=numberOfLayers,
                              shape=input_shape,
                              dropout=float(dropout),
                              nonLinearity=nonLinearity)
    network = Build_3dcnn(weight_init, inputParamsNetwork, input_var)
    lasagne.layers.set_all_param_values(network, param_values)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # loss=np.mean(loss)
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
        loss,
        params,
        learning_rate=float(learning_rate),
        momentum=float(momentum))

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    # test_loss = test_loss.mean()
    test_loss = test_loss.mean()

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss,
                               updates=updates)  # mode='DebugMode'

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function(
        [input_var, target_var],
        [test_loss, test_acc, test_prediction])  # ,mode='DebugMode')

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    all_test_pred = np.empty(
        (0, 2), dtype=float
    )  # initialize; array n_samplesx2 for the 2 class predictions for all test samples
    all_test_labels = np.empty(
        (0, 1), dtype=float
    )  # initialize; array n_samplesx1 for labels of all test samples
    for batch in Iterate_minibatches(test_set,
                                     test_label,
                                     int(batch_size),
                                     shuffle=False):
        inputs, targets = batch
        inputs = np.float32(inputs)
        err, acc, test_pred = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
        all_test_pred = np.vstack((all_test_pred, test_pred))
        all_test_labels = np.append(all_test_labels, targets)

    test_AUC, test_varAUC = SupportFuncs.Pred2AUC(all_test_pred,
                                                  all_test_labels)
Exemplo n.º 5
0
test_set_size = inputParamsConfigAll['test_set_size']
positive_set_ratio = inputParamsConfigAll['positive_set_ratio']
dropout = inputParamsConfigAll['dropout']
nonlinearityToUse = inputParamsConfigAll['nonlinearityToUse']
augmentationFlag = inputParamsConfigAll['augmentationFlag']

print(
    " Learning rate: '%s' , momentum: '%s',  num_epochs: '%s'  ,batch size: '%s'  ,data_path: '%s' ,Train Set Size: '%s' ,Test set Size: '%s' ,Positive set Ratio '%s' , dropout: '%s', nonlinearityToUse: '%s',augmentationFlag: '%s'"
    % (str(learning_rate), str(momentum), str(num_epochs), str(batch_size),
       data_path, str(train_set_size), str(test_set_size),
       str(positive_set_ratio), str(dropout), str(nonlinearityToUse),
       str(augmentationFlag)))
num_epochs = int(num_epochs)
batch_size = int(batch_size)
train_set, train_label, test_set, test_label, val_set, val_label = SupportFuncs.load_data(
    data_path, int(train_set_size), int(test_set_size), int(augmentationFlag),
    float(positive_set_ratio))
if nonlinearityToUse == 'relu':
    nonLinearity = lasagne.nonlinearities.rectify
elif nonlinearityToUse == 'tanh':
    nonLinearity = lasagne.nonlinearities.tanh
elif nonlinearityToUse == 'sigmoid':
    nonLinearity = lasagne.nonlinearities.sigmoid
else:
    raise Exception(
        'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!'
    )
dtensor5 = T.TensorType('float32', (False, ) * 5)
input_var = dtensor5('inputs')
target_var = T.ivector('targets')
Exemplo n.º 6
0
savedSamplesFile = np.load(pathSavedSamples)
inputParamsConfigLocal = savedSamplesFile['inputParamsConfig'].item(
)  #saved dict becomes an object, so need to turn back into dict!

screeningModelForPatches = inputParamsConfigLocal['fp_model_to_use']
masterPatchFolder = 'diskStation/LIDC/36368/screenPatches'
test_pos_paths = os.listdir(os.path.join(masterPatchFolder, 'pos'))
test_neg_paths = os.listdir(os.path.join(masterPatchFolder, 'neg'))
test_pos_paths = [
    os.path.join(masterPatchFolder, 'pos', x) for x in test_pos_paths
]
test_neg_paths = [
    os.path.join(masterPatchFolder, 'neg', x) for x in test_neg_paths
]
test_samples, test_labels, test_len_pos, test_len_neg = SupportFuncs.mat_generate_from_path(
    test_pos_paths, test_neg_paths)
#test_samples = savedSamplesFile['inputs'] #the test samples/labels were saved in a particular order, so they can be loaded in same order
#test_labels = savedSamplesFile['targets']
#test_pred_orig = savedSamplesFile['test_pred']
########################
######Input Params######
#inputParamsConfigLocal = {}
#inputParamsConfigLocal['input_shape'] = '36, 36, 8'
#inputParamsConfigLocal['learning_rate'] = '0.04'
#inputParamsConfigLocal['momentum'] = '0.9'
#inputParamsConfigLocal['num_epochs'] = '35'
#inputParamsConfigLocal['batch_size'] = '100'
#inputParamsConfigLocal['noduleCaseFilterParams'] = 'NumberOfObservers,>=,2;IURatio,>=,0.2;SliceThicknessDicom,>=,1.5;SliceThicknessDicom,<=,3'
#inputParamsConfigLocal['train_set_size'] = '150000'
#inputParamsConfigLocal['test_set_size'] = '500'
#inputParamsConfigLocal['positive_set_ratio'] = '0.5'
Exemplo n.º 7
0
				filepath = folderpath + '/' + i
				with open(filepath, 'rb') as csvfile:
					reader = csv.reader(csvfile)
					for line in reader:
						train_x.append(line[0].replace('/','_'))
						train_y.append(line[1])

		# print the num of training/testing samples
		numTrainSamples=len(train_x)
		print('numTrainSamples=' + str(numTrainSamples))
		numTestSamples =len(test_x)
		print('numTestSamples=' + str(numTestSamples))

		# read correpsonding training set
		# set the name for finding the data
        tr_len_pos,tr_len_neg,ts_len_pos,ts_len_neg = SupportFuncs.load_data(inputParamsLoadData)    	    

		# make sure only with those in the 9 folds
		# training with tr_len_pos, tr_len_neg, ts_len_pos, ts_len_neg
		training_file_handle = tables.open_file(training_filename, mode='r')   #file closed after both train/test done                                 
	    train_set = training_file_handle.root.train_set
	    train_label = training_file_handle.root.train_label
	    test_set = training_file_handle.root.test_set
	    test_label = training_file_handle.root.test_label
	    val_set = training_file_handle.root.val_set
	    val_label = training_file_handle.root.val_label

	    tr_len_pos = len(np.where(train_label[:]==1)[0]); ts_len_pos = len(np.where(test_label[:]==1)[0])
	    tr_len_neg = train_set.shape[0] - tr_len_pos; ts_len_neg = test_set.shape[0] - ts_len_pos
	    print("Train set number of positives:" + str(tr_len_pos))
	    print("Train set number of negatives:" + str(tr_len_neg))