test_pred_full_volume_softmax1 = np.exp(test_pred_full_volume[ 0, 1, :, :, :]) / (np.exp(test_pred_full_volume[0, 0, :, :, :]) + np.exp(test_pred_full_volume[0, 1, :, :, :])) uniqueStatsPath = '/raida/apezeshk/lung_dicom_dir/p0614/20000101/s30983/uniqueStats_p0614_20000101_s30983.mat' uniqueStatsData = sio.loadmat(uniqueStatsPath) noduleMask = uniqueStatsData['allMaxRadiologistMsk'] if chopVolumeFlag == 1: noduleMaskCrop = noduleMask[300:512, 256:500, 200:300] else: noduleMaskCrop = noduleMask # noduleMaskResize = np.zeros((test_pred_full_volume_softmax0.shape[0], test_pred_full_volume_softmax0.shape[1], test_pred_full_volume_softmax0.shape[2])) # for i in range(0, test_pred_full_volume_softmax0.shape[2]): # noduleMaskResize[:,:,i] = scipy.misc.imresize(noduleMaskCrop[:,:,i], (test_pred_full_volume_softmax0.shape[0], test_pred_full_volume_softmax0.shape[1])) dsfactor = [ w / float(g) for w, g in zip(test_pred_full_volume_softmax0.shape, noduleMaskCrop.shape) ] noduleMaskResize = nd.interpolation.zoom(noduleMaskCrop.astype('float'), zoom=dsfactor) vol_scores0 = test_pred_full_volume_softmax0.flatten() vol_scores1 = test_pred_full_volume_softmax1.flatten() vol_labels = noduleMaskResize.flatten() vol_labels = vol_labels > 0.5 vol_labels = vol_labels.astype('int') vol_scores_all = np.hstack((vol_scores0.reshape( (len(vol_scores0), 1)), vol_scores1.reshape((len(vol_scores1), 1)))) test_AUC, test_varAUC = SupportFuncs.Pred2AUC(vol_scores_all, vol_labels)
def Main(inputParamsConfig): #input_shape,learning_rate, momentum, num_epochs, batchsize, data_path, train_set_size, test_set_size, # positive_set_ratio, dropout,nonlinearityToUse,augmentation experiment_id = str(time.strftime("%Y%m%d%H%M%S")) input_shape = inputParamsConfig['input_shape'] learning_rate = inputParamsConfig['learning_rate'] momentum = inputParamsConfig['momentum'] num_epochs = inputParamsConfig['num_epochs'] batch_size = inputParamsConfig['batch_size'] noduleCaseFilterParams = inputParamsConfig['noduleCaseFilterParams'] data_path = inputParamsConfig['data_path'] train_set_size = inputParamsConfig['train_set_size'] test_set_size = inputParamsConfig['test_set_size'] positive_set_ratio = inputParamsConfig['positive_set_ratio'] dropout = inputParamsConfig['dropout'] nonlinearityToUse = inputParamsConfig['nonlinearityToUse'] nonlinearityToUseFC = inputParamsConfig['nonlinearityToUseFC'] numberOfLayers = inputParamsConfig['numberOfLayers'] numberOfFCUnits = inputParamsConfig['numberOfFCUnits'] numberOfFCLayers = inputParamsConfig['numberOfFCLayers'] numberOfConvFilts = inputParamsConfig['numberOfConvFilts'] filterSizeTable = inputParamsConfig['filterSizeTable'] augmentationRegularFlag = inputParamsConfig['augmentationRegularFlag'] augmentationTransformFlag = inputParamsConfig['augmentationTransformFlag'] weightInitToUse = inputParamsConfig['weightInitToUse'] lrDecayFlag = inputParamsConfig['lrDecayFlag'] biasInitVal = inputParamsConfig['biasInitVal'] fp_per_case = inputParamsConfig['fp_per_case'] phase = inputParamsConfig['phase'] discrim_shape = inputParamsConfigLocal['discrim_shape'] pos_test_size = inputParamsConfig['pos_test_size'] fp_model_to_use = inputParamsConfig['fp_model_to_use'] print( " Learning rate: '%s' , momentum: '%s', num_epochs: '%s' ,batch size: '%s' ,data_path: '%s',Train Set Size: '%s' ,Test set Size: '%s' ,Positive set Ratio '%s' , dropout: '%s', nonlinearityToUse: '%s',augmentationRegularFlag: '%s',augmentationTransformFlag: '%s',number of layers: '%s', pos_test_size: '%s'" % ( str(learning_rate), str(momentum), str(num_epochs), str(batch_size), data_path, str(train_set_size), str(test_set_size), str(positive_set_ratio), str(dropout), str(nonlinearityToUse), str(augmentationRegularFlag), str(augmentationTransformFlag), str(numberOfLayers), str(pos_test_size))) print(" Phase: '%s', Num FC Layers: '%s', Num FC Units: '%s', Number of ConvFilters: '%s'" % (str(phase), str(numberOfFCLayers), str(numberOfFCUnits), str(numberOfConvFilts))) num_epochs=int(num_epochs) batch_size=int(batch_size) if phase == 'screen': if noduleCaseFilterParams == '': if fp_per_case == '0': #make different filenames per fp_per_case s.t. original & supplemented train set are maintained training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)+'.hdf5') else: training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio) +'_fp' + fp_model_to_use + '_' + fp_per_case +'.hdf5') else: if fp_per_case == '0': #make different filenames per fp_per_case s.t. original & supplemented train set are maintained training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio)+'_filt.hdf5') else: training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio) +'_fp' + fp_model_to_use + '_' + fp_per_case +'_filt.hdf5') elif phase == 'discrim': if noduleCaseFilterParams == '': training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio) +'_discrim' + discrim_shape.replace(' ','').replace(',','') + '_' + fp_model_to_use + '.hdf5') else: training_filename = os.path.join('./',input_shape+'_'+str(augmentationRegularFlag)+str(augmentationTransformFlag)+'_'+str(positive_set_ratio) +'_discrim' + discrim_shape.replace(' ','').replace(',','') + '_' + fp_model_to_use + '_filt.hdf5') # We save the created train and test set of size X and posetive ration r to reduce the overhead in running the pipline if os.path.exists(training_filename): print ("Training file already exists, reading it...") # with h5py.File(training_filename, 'r') as data_set: # tmp_train_set = data_set.get('train_set') # Reading list of patients and test file paths # train_set = np.array(tmp_train_set) # tmp_train_label = data_set.get('train_label') # Reading list of patients and test file paths # train_label = np.array(tmp_train_label) # tmp_test_set = data_set.get('test_set') # Reading list of patients and test file paths # test_set = np.array(tmp_test_set) # tmp_test_label = data_set.get('test_label') # Reading list of patients and test file paths # test_label = np.array(tmp_test_label) # tmp_val_set = data_set.get('val_set') # Reading list of patients and test file paths # val_set = np.array(tmp_val_set) # tmp_val_label = data_set.get('val_label') # Reading list of patients and test file paths # val_label = np.array(tmp_val_label) # tr_len_pos = len(np.where(train_label==1)[0]) # tr_len_neg = len(np.where(train_label==0)[0]) else: inputParamsLoadData = {} inputParamsLoadData['data_path'] = data_path inputParamsLoadData['input_shape'] = input_shape inputParamsLoadData['train_set_size'] = int(train_set_size) inputParamsLoadData['test_set_size'] = int(test_set_size) inputParamsLoadData['augmentationRegularFlag'] = int(augmentationRegularFlag) inputParamsLoadData['augmentationTransformFlag'] = int(augmentationTransformFlag) inputParamsLoadData['fp_per_case'] = int(fp_per_case) inputParamsLoadData['pos_test_size'] = int(pos_test_size) inputParamsLoadData['positive_set_ratio'] = float(positive_set_ratio) inputParamsLoadData['fp_model_to_use'] = fp_model_to_use inputParamsLoadData['phase'] = phase inputParamsLoadData['discrim_shape'] = discrim_shape inputParamsLoadData['noduleCaseFilterParams'] = noduleCaseFilterParams inputParamsLoadData['training_filename'] = training_filename tr_len_pos,tr_len_neg, ts_len_pos,ts_len_neg = SupportFuncs.load_data(inputParamsLoadData) # train_set, train_label, test_set, test_label, val_set, val_label = SupportFuncs.load_data(data_path, int(train_set_size), # int(test_set_size), # int(augmentationFlag),float(positive_set_ratio)) # with h5py.File(training_filename, 'w') as data_set: # #Write the dataset to a h5py file # data_set.create_dataset('train_set', data=train_set) # data_set.create_dataset('train_label', data=train_label) # data_set.create_dataset('test_set', data=test_set) # data_set.create_dataset('test_label', data=test_label) # data_set.create_dataset('val_set', data=val_set) # data_set.create_dataset('val_label', data=val_label) training_file_handle = tables.open_file(training_filename, mode='r') #file closed after both train/test done train_set = training_file_handle.root.train_set train_label = training_file_handle.root.train_label test_set = training_file_handle.root.test_set test_label = training_file_handle.root.test_label val_set = training_file_handle.root.val_set val_label = training_file_handle.root.val_label tr_len_pos = len(np.where(train_label[:]==1)[0]); ts_len_pos = len(np.where(test_label[:]==1)[0]) tr_len_neg = train_set.shape[0] - tr_len_pos; ts_len_neg = test_set.shape[0] - ts_len_pos print("Train set number of positives:" + str(tr_len_pos)) print("Train set number of negatives:" + str(tr_len_neg)) if nonlinearityToUse == 'relu': nonLinearity = lasagne.nonlinearities.rectify elif nonlinearityToUse == 'tanh': nonLinearity = lasagne.nonlinearities.tanh elif nonlinearityToUse == 'sigmoid': nonLinearity = lasagne.nonlinearities.sigmoid else: raise Exception( 'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!') if nonlinearityToUseFC == 'relu': nonLinearityFC = lasagne.nonlinearities.rectify elif nonlinearityToUseFC == 'tanh': nonLinearityFC = lasagne.nonlinearities.tanh elif nonlinearityToUseFC == 'sigmoid': nonLinearityFC = lasagne.nonlinearities.sigmoid else: raise Exception( 'nonlinearityToUseFC: Unsupported nonlinearity type has been selected for the network, retry with a supported one!') biasInit = lasagne.init.Constant(biasInitVal) #for relu use biasInit=1 s.t. inputs to relu are positive in beginning if weightInitToUse == 'normal': #according to documentation, different gains should be used depending on nonlinearity weight_init = lasagne.init.Normal() elif weightInitToUse == 'He': if nonlinearityToUse=='relu': gainToUse = np.sqrt(2) else: gainToUse = 1 weight_init = lasagne.init.HeNormal(gain=gainToUse) else: raise Exception( 'weightInitToUse: Unsupported weight initialization type has been selected, retry with a supported one!') if lrDecayFlag==1: #if learning rate should be updated, then it has to be a shared variable learning_rate = theano.shared(np.array(learning_rate, dtype=theano.config.floatX)) decayRate = 0.5 else: learning_rate = float(learning_rate) dtensor5 = T.TensorType('float32', (False,) * 5) input_var = dtensor5('inputs') target_var = T.ivector('targets') inputParamsNetwork = dict(n_layer=numberOfLayers, shape=input_shape,dropout=float(dropout), nonLinearity=nonLinearity, biasInit = biasInit, filterSizeTable = filterSizeTable, numberOfFCLayers=numberOfFCLayers, numberOfFCUnits=numberOfFCUnits, numberOfConvFilts=numberOfConvFilts, nonLinearityFC=nonLinearityFC) network = Build_3dcnn(weight_init, inputParamsNetwork, input_var) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # loss=np.mean(loss) # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=learning_rate, momentum=float(momentum)) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) # test_loss = test_loss.mean() test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # mode='DebugMode' # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_prediction]) # ,mode='DebugMode') # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(int(num_epochs)): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in Iterate_minibatches(train_set, train_label, int(batch_size), shuffle=True): inputs, targets = batch inputs = np.float32(inputs) train_err += train_fn(inputs, targets) train_batches += 1 print('learning_rate = ' + str(learning_rate.get_value())) if lrDecayFlag == 1: #only update learning_rate if lrDecayFlag==1 if ((epoch+1) % 12) == 0: learning_rate.set_value(decayRate * learning_rate.get_value()) # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 all_val_pred = np.empty((0, 2), dtype=float) # initialize; array n_samplesx2 for the 2 class predictions for all validation samples all_val_labels = np.empty((0, 1), dtype=float) # initialize; array n_samplesx1 for labels of all validation samples for batch in Iterate_minibatches(val_set, val_label, int(batch_size), shuffle=False): inputs, targets = batch inputs = np.float32(inputs) err, acc, val_pred = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 all_val_pred = np.vstack((all_val_pred, val_pred)) all_val_labels = np.append(all_val_labels, targets) val_AUC, val_varAUC = SupportFuncs.Pred2AUC(all_val_pred, all_val_labels) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) print(" validation AUC: " + str(val_AUC) + ", std: " + str(np.sqrt(val_varAUC))) epoch_det[epoch + 1] = {'all_val_accuracy': (val_acc / val_batches), "all_val_loss": (val_err / val_batches), "training_loss": (train_err / train_batches)} all_val_accuracy.append(val_acc / val_batches) all_val_loss.append(val_err / val_batches) all_val_AUC.append(val_AUC) training_loss.append(train_err / train_batches) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 all_test_pred = np.empty((0, 2), dtype=float) # initialize; array n_samplesx2 for the 2 class predictions for all test samples all_test_labels = np.empty((0, 1), dtype=float) # initialize; array n_samplesx1 for labels of all test samples for batch in Iterate_minibatches(test_set, test_label, int(batch_size), shuffle=False): inputs, targets = batch inputs = np.float32(inputs) err, acc, test_pred = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 all_test_pred = np.vstack((all_test_pred, test_pred)) all_test_labels = np.append(all_test_labels, targets) test_AUC, test_varAUC = SupportFuncs.Pred2AUC(all_test_pred, all_test_labels) ########################################## training_file_handle.close() ########################################## print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) print("test AUC: " + str(test_AUC) + ", std: " + str(np.sqrt(test_varAUC))) # Optionally, you could now dump the network weights to a file like this: # np.savez('model.npz', *lasagne.layers.get_all_param_values(network)) # # And load them again later on like this: tmp = input_shape.replace(' ','') #get rid of space and comma in input shape tmp = tmp.replace(',','') filenameModel = os.path.join(model_path, 'cnn_' + tmp + '_' + experiment_id) filenameSamples = filenameModel + '_samples' np.savez(filenameModel, *lasagne.layers.get_all_param_values(network)) np.savez(filenameSamples, inputs=inputs, targets=targets, err=err, acc=acc, training_loss=training_loss, test_pred=test_pred, all_test_pred=all_test_pred,all_test_labels=all_test_labels, inputParamsConfig=inputParamsConfig, tr_len_pos = tr_len_pos, tr_len_neg = tr_len_neg, all_val_loss=all_val_loss, all_val_accuracy=all_val_accuracy, test_AUC=test_AUC, test_varAUC=test_varAUC) if not os.path.exists('./figures'): os.makedirs('./figures') fig = plt.figure() # plt.plot(training_loss,'r',val_accuracy,'g',all_val_loss,'b') plt.plot(training_loss, 'r', label='Training_loss=' + str("%.6f" % training_loss[num_epochs - 1])) plt.plot(all_val_loss, 'r--', label='Val_loss=' + str("%.3f" % all_val_loss[num_epochs - 1])) plt.plot(all_val_accuracy, 'g', label='Val_accuracy=' + str("%.3f" % all_val_accuracy[num_epochs - 1])) plt.annotate(str("%.3f" % all_val_accuracy[num_epochs - 1]), xy=(num_epochs - 1, all_val_accuracy[num_epochs - 1]), xytext=(num_epochs - 70, 0.6), arrowprops=dict(facecolor='black', shrink=0.05)) plt.annotate(str("%.6f" % training_loss[num_epochs - 1]), xy=(num_epochs - 1, training_loss[num_epochs - 1]), xytext=(num_epochs - 70, 0.3), arrowprops=dict(facecolor='black', shrink=0.05)) plt.ylabel('Training loss and Validation accuracy') plt.xlabel('Number of Epochs') plt.title('Accuracy and Loss Changes') plt.legend(fontsize=13, loc=10) try: fig.savefig(os.path.join(figures_path, experiment_id)) # save the figure to file except: Make_sure_path_exists(figures_path) plt.close(fig) plt.show() # save_model(result_path, experiment_id, str(input_shape), n_layers, int(batchsize), num_epochs, momentum, learning_rate, 2 # , len(train_set), len(test_set), (test_err / test_batches), (test_acc / test_batches), test_AUC[0], # np.sqrt(test_varAUC),augmentation) plt.close(fig) # plt.show() Save_model(result_path, experiment_id, str(input_shape), numberOfLayers, int(batch_size), num_epochs, momentum, inputParamsConfig['learning_rate'], 2 , tr_len_pos+tr_len_neg, ts_len_pos+ts_len_neg, (test_err / test_batches), (test_acc / test_batches), test_AUC[0], np.sqrt(test_varAUC), augmentationRegularFlag, augmentationTransformFlag, nonlinearityToUse, dropout,tr_len_pos,tr_len_neg)
noduleMaskCrop.astype('float'), zoom=dsfactor) noduleMaskResize = noduleMaskResize > 0.5 noduleMaskResize = noduleMaskResize.astype('int') print len(np.where(noduleMaskResize == 1)[0]) vol_scores_zero = test_pred_full_volume_softmax0.flatten() vol_scores_one = test_pred_full_volume_softmax1.flatten() vol_scores_zero = vol_scores_zero.reshape( len(vol_scores_zero), 1) vol_scores_one = vol_scores_one.reshape( len(vol_scores_one), 1) if case_flag == 1: vol_score = np.concatenate( (vol_scores_zero, vol_scores_one), axis=1) total_vol_score_tmp = np.vstack( (total_vol_score_tmp, vol_score)) vol_scores_zoronone = np.zeros vol_labels = noduleMaskResize.flatten() vol_labels_tmp = np.concatenate( (vol_labels_tmp, vol_labels), axis=0) else: total_vol_score_tmp = np.concatenate( (vol_scores_zero, vol_scores_one), axis=1) vol_scores_zoronone = np.zeros vol_labels_tmp = noduleMaskResize.flatten() case_flag = 1 test_AUC, test_varAUC = SupportFuncs.Pred2AUC(total_vol_score_tmp, vol_labels_tmp.astype('int')) print test_AUC
def Main(inputParamsConfig): #input_shape,learning_rate, momentum, num_epochs, batchsize, data_path, train_set_size, test_set_size, # positive_set_ratio, dropout,nonlinearityToUse,augmentation experiment_id = str(time.strftime("%Y%m%d%H%M%S")) input_shape = inputParamsConfig['input_shape'] learning_rate = inputParamsConfig['learning_rate'] momentum = inputParamsConfig['momentum'] num_epochs = inputParamsConfig['num_epochs'] batch_size = inputParamsConfig['batch_size'] data_path = inputParamsConfig['data_path'] train_set_size = inputParamsConfig['train_set_size'] test_set_size = inputParamsConfig['test_set_size'] positive_set_ratio = inputParamsConfig['positive_set_ratio'] dropout = inputParamsConfig['dropout'] nonlinearityToUse = inputParamsConfig['nonlinearityToUse'] numberOfLayers = inputParamsConfig['numberOfLayers'] augmentationFlag = inputParamsConfig['augmentationFlag'] print( " Learning rate: '%s' , momentum: '%s', num_epochs: '%s' ,batch size: '%s' ,data_path: '%s' ,Train Set Size: '%s' ,Test set Size: '%s' ,Positive set Ratio '%s' , dropout: '%s', nonlinearityToUse: '%s',augmentationFlag: '%s',number of layers: '%s'" % (str(learning_rate), str(momentum), str(num_epochs), str(batch_size), data_path, str(train_set_size), str(test_set_size), str(positive_set_ratio), str(dropout), str(nonlinearityToUse), str(augmentationFlag), str(numberOfLayers))) num_epochs = int(num_epochs) batch_size = int(batch_size) # We save the created train and test set of size X and posetive ration r to reduce the overhead in running the pipline ps = [] ng = [] patient_id = [] with h5py.File(os.path.join('/diskStation/temp/test_500_0.3_28288 .hdf5'), 'r') as hf: print('List of arrays in this file: \n', hf.keys()) tmp_test_paths = hf.get( 'Test_set') # Reading list of patients and test file paths ps = np.array(tmp_test_paths) #full paths to all positive test patches tmp_test_paths = hf.get('neg_test_set') ng = np.array(tmp_test_paths) # path_to_pos_tmp='/home/shamidian/Summer2016/DeepMed/28288/pos_28288' # path_to_neg_tmp='/home/shamidian/Summer2016/DeepMed/28288/neg_smp_0_28288' # for item in os.listdir(path_to_pos_tmp): # ps.append(os.path.join(path_to_pos_tmp,item)) # for items in os.listdir(path_to_neg_tmp): # ng.append(os.path.join(path_to_neg_tmp,items)) test_set, test_label = SupportFuncs.mat_generate_from_path(ps, ng) # # train_set, train_label, test_set, test_label, val_set, val_label = SupportFuncs.load_data(data_path, int(train_set_size), # int(test_set_size), # int(augmentationFlag),float(positive_set_ratio)) if nonlinearityToUse == 'relu': nonLinearity = lasagne.nonlinearities.rectify elif nonlinearityToUse == 'tanh': nonLinearity = lasagne.nonlinearities.tanh elif nonlinearityToUse == 'sigmoid': nonLinearity = lasagne.nonlinearities.sigmoid else: raise Exception( 'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!' ) dtensor5 = T.TensorType('float32', (False, ) * 5) input_var = dtensor5('inputs') target_var = T.ivector('targets') inputParamsNetwork = dict(n_layer=numberOfLayers, shape=input_shape, dropout=float(dropout), nonLinearity=nonLinearity) network = Build_3dcnn(weight_init, inputParamsNetwork, input_var) lasagne.layers.set_all_param_values(network, param_values) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # loss=np.mean(loss) # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=float(learning_rate), momentum=float(momentum)) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) # test_loss = test_loss.mean() test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # mode='DebugMode' # Compile a second function computing the validation loss and accuracy: val_fn = theano.function( [input_var, target_var], [test_loss, test_acc, test_prediction]) # ,mode='DebugMode') # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 all_test_pred = np.empty( (0, 2), dtype=float ) # initialize; array n_samplesx2 for the 2 class predictions for all test samples all_test_labels = np.empty( (0, 1), dtype=float ) # initialize; array n_samplesx1 for labels of all test samples for batch in Iterate_minibatches(test_set, test_label, int(batch_size), shuffle=False): inputs, targets = batch inputs = np.float32(inputs) err, acc, test_pred = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 all_test_pred = np.vstack((all_test_pred, test_pred)) all_test_labels = np.append(all_test_labels, targets) test_AUC, test_varAUC = SupportFuncs.Pred2AUC(all_test_pred, all_test_labels)
test_set_size = inputParamsConfigAll['test_set_size'] positive_set_ratio = inputParamsConfigAll['positive_set_ratio'] dropout = inputParamsConfigAll['dropout'] nonlinearityToUse = inputParamsConfigAll['nonlinearityToUse'] augmentationFlag = inputParamsConfigAll['augmentationFlag'] print( " Learning rate: '%s' , momentum: '%s', num_epochs: '%s' ,batch size: '%s' ,data_path: '%s' ,Train Set Size: '%s' ,Test set Size: '%s' ,Positive set Ratio '%s' , dropout: '%s', nonlinearityToUse: '%s',augmentationFlag: '%s'" % (str(learning_rate), str(momentum), str(num_epochs), str(batch_size), data_path, str(train_set_size), str(test_set_size), str(positive_set_ratio), str(dropout), str(nonlinearityToUse), str(augmentationFlag))) num_epochs = int(num_epochs) batch_size = int(batch_size) train_set, train_label, test_set, test_label, val_set, val_label = SupportFuncs.load_data( data_path, int(train_set_size), int(test_set_size), int(augmentationFlag), float(positive_set_ratio)) if nonlinearityToUse == 'relu': nonLinearity = lasagne.nonlinearities.rectify elif nonlinearityToUse == 'tanh': nonLinearity = lasagne.nonlinearities.tanh elif nonlinearityToUse == 'sigmoid': nonLinearity = lasagne.nonlinearities.sigmoid else: raise Exception( 'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!' ) dtensor5 = T.TensorType('float32', (False, ) * 5) input_var = dtensor5('inputs') target_var = T.ivector('targets')
savedSamplesFile = np.load(pathSavedSamples) inputParamsConfigLocal = savedSamplesFile['inputParamsConfig'].item( ) #saved dict becomes an object, so need to turn back into dict! screeningModelForPatches = inputParamsConfigLocal['fp_model_to_use'] masterPatchFolder = 'diskStation/LIDC/36368/screenPatches' test_pos_paths = os.listdir(os.path.join(masterPatchFolder, 'pos')) test_neg_paths = os.listdir(os.path.join(masterPatchFolder, 'neg')) test_pos_paths = [ os.path.join(masterPatchFolder, 'pos', x) for x in test_pos_paths ] test_neg_paths = [ os.path.join(masterPatchFolder, 'neg', x) for x in test_neg_paths ] test_samples, test_labels, test_len_pos, test_len_neg = SupportFuncs.mat_generate_from_path( test_pos_paths, test_neg_paths) #test_samples = savedSamplesFile['inputs'] #the test samples/labels were saved in a particular order, so they can be loaded in same order #test_labels = savedSamplesFile['targets'] #test_pred_orig = savedSamplesFile['test_pred'] ######################## ######Input Params###### #inputParamsConfigLocal = {} #inputParamsConfigLocal['input_shape'] = '36, 36, 8' #inputParamsConfigLocal['learning_rate'] = '0.04' #inputParamsConfigLocal['momentum'] = '0.9' #inputParamsConfigLocal['num_epochs'] = '35' #inputParamsConfigLocal['batch_size'] = '100' #inputParamsConfigLocal['noduleCaseFilterParams'] = 'NumberOfObservers,>=,2;IURatio,>=,0.2;SliceThicknessDicom,>=,1.5;SliceThicknessDicom,<=,3' #inputParamsConfigLocal['train_set_size'] = '150000' #inputParamsConfigLocal['test_set_size'] = '500' #inputParamsConfigLocal['positive_set_ratio'] = '0.5'
filepath = folderpath + '/' + i with open(filepath, 'rb') as csvfile: reader = csv.reader(csvfile) for line in reader: train_x.append(line[0].replace('/','_')) train_y.append(line[1]) # print the num of training/testing samples numTrainSamples=len(train_x) print('numTrainSamples=' + str(numTrainSamples)) numTestSamples =len(test_x) print('numTestSamples=' + str(numTestSamples)) # read correpsonding training set # set the name for finding the data tr_len_pos,tr_len_neg,ts_len_pos,ts_len_neg = SupportFuncs.load_data(inputParamsLoadData) # make sure only with those in the 9 folds # training with tr_len_pos, tr_len_neg, ts_len_pos, ts_len_neg training_file_handle = tables.open_file(training_filename, mode='r') #file closed after both train/test done train_set = training_file_handle.root.train_set train_label = training_file_handle.root.train_label test_set = training_file_handle.root.test_set test_label = training_file_handle.root.test_label val_set = training_file_handle.root.val_set val_label = training_file_handle.root.val_label tr_len_pos = len(np.where(train_label[:]==1)[0]); ts_len_pos = len(np.where(test_label[:]==1)[0]) tr_len_neg = train_set.shape[0] - tr_len_pos; ts_len_neg = test_set.shape[0] - ts_len_pos print("Train set number of positives:" + str(tr_len_pos)) print("Train set number of negatives:" + str(tr_len_neg))