from CTImagesCustomBatch import CTImagesCustomBatch import CTsliceViewer as slices import glob import time save_path = 'C:/Users/linde/Documents/PreprocessedImages_CS_PE/' if not os.path.exists(save_path): os.makedirs(save_path) for string in ['PE']: #still do PE path_cs = "C:/Users/linde/Documents/CS_PE_seperated/" + string + "/*" cs_index = FilesIndex(path=path_cs, dirs=True, sort=True) cs_dataset = ds.Dataset(index=cs_index, batch_class=CTImagesCustomBatch) #load and normalize these images load_and_normalize = ( Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images']).unify_spacing( shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings #from both images and mask .normalize_hu(min_hu=-1200, max_hu=600) ) #clips the HU values and linearly rescales them, values from grt team # .apply_lung_mask(paddi Path = 'C:/Users/linde/Documents/PreprocessedImages1008CorrectConvs/Spacing(2x1x1)/0*'
config_update = { 'preprocessing': { 'mask_mode': args.mask_mode, 'unify_spacing': { 'padding': (args.padding_mode if args.padding_mode != "zeros" else "constant") } } } config = get_config(config_update) scans_index = ds.FilesIndex(path=args.dataset, dirs=(args.fmt != 'raw'), no_ext=(args.fmt == 'raw')) scans_dataset = ds.Dataset(scans_index, batch_class=CTImagesMaskedBatch) pbar = tqdm(total=len(scans_dataset), desc='Scans processed') pipeline = (ds.Pipeline().init_variable('pbar', pbar).load( fmt=args.fmt).unify_spacing(**config.preprocessing.unify_spacing)) if args.annotation: pipeline = (pipeline.fetch_nodules_info( nodules=pd.read_csv(args.annotation)).create_mask( mask_mode=config.preprocessing.mask_mode)) pipeline = (pipeline.dump( dst=os.path.join(RADIO_DATASETS_PATH, args.output), components=['origin', 'spacing', 'images'] + ['masks', 'nodules'] if args.annotation else []).update_variable( 'pbar', B('size'), mode='u')) (scans_dataset >> pipeline).run(batch_size=args.batch_size)
#Define data folder (LUNA_mask) LUNA_MASK = 'C:/Users/s120116/Documents/Allfolders/'+subset+'/*.mhd' # set glob-mask for scans from Luna-dataset here #makes folder for all savings LUNA_val='C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/validate' LUNA_train= 'C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/training' luna_index = FilesIndex(path=LUNA_MASK, no_ext=True) ixs = np.array([ '1.3.6.1.4.1.14519.5.2.1.6279.6001.228511122591230092662900221600']) fix_ds = ds.Dataset(index=luna_index.create_subset(ixs), batch_class=CTImagesCustomBatch) #make pipeline to load and segment, saves segmentations in masks load_and_segment = (Pipeline() .load(fmt='raw') .get_lung_mask(rad=15)) # .unify_spacing_withmask(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant') #equalizes the spacings #from both images and mask # .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them, values from grt team #.apply_lung_mask(padding=170)) #pass training dataset through pipeline lunaline_train=(fix_ds >> load_and_segment )#.dump(dst=LUNA_train,components=['spacing', 'origin', 'images','segmentation'])) batch=lunaline_train.next_batch(batch_size=1, shuffle=False,n_epochs=1)
LUNA_train = 'C:/Users/s120116/Documents/Preprocessed_Images/' + subset + ' - split/training' LUNA_test = 'C:/Users/s120116/Documents/Preprocessed_Images/validationData/' if not os.path.exists(LUNA_test): os.makedirs(LUNA_test) # if not os.path.exists(LUNA_val): # os.makedirs(LUNA_val) # # if not os.path.exists(LUNA_train): # os.makedirs(LUNA_train) #set up dataset structure luna_index = FilesIndex(path=LUNA_MASK, no_ext=True) # preparing indexing structure luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch) #Split dataset in training and validation part ---------------------------------------------- #define path to save or load index files # if Split: # # If dataset has already been split: make two subsets from indices for testing vs training # path='C:/Users/s120116/Documents/'+subset+' - split/' # # index_train=np.load(os.path.join(path, 'trainindex.npy')) # luna_index_train=luna_index.create_subset(index_train) # dataset_train= ds.Dataset(index=luna_index_train, batch_class=CTImagesCustomBatch) # # index_val=np.load(os.path.join(path,'testindex.npy')) # luna_index_val=luna_index.create_subset(index_val) # dataset_val= ds.Dataset(index=luna_index_val, batch_class=CTImagesCustomBatch)
) path = 'C:/Users/s120116/Documents/Preprocessed_Images/' SaveFolder = 'Crops(16x32x32)CompleteDataset' for sub in sublist: print(sub) #define folders in which validation and training data is LUNA_val = path + sub + ' - split/validate/*' LUNA_train = path + sub + ' - split/training/*' #set up dataset structure luna_index_val = FilesIndex(path=LUNA_val, dirs=True) # preparing indexing structure luna_dataset_val = ds.Dataset(index=luna_index_val, batch_class=CTICB) luna_index_train = FilesIndex(path=LUNA_train, dirs=True) # preparing indexing structure luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB) def make_folder(folderlist=[]): for folder in folderlist: if not os.path.exists(folder): os.makedirs(folder) def load_pipeline(nodules_df): pipeline = (Pipeline().load( fmt='blosc', components=[ 'spacing', 'origin', 'images', 'segmentation'
] for sub in sublist: #LUNA_test='C:/Users/s120116/Documents/subset* - split/testing/*' #LUNA_train='C:/Users/s120116/Documents/subset* - split/training/*' LUNA_test = '/home/lshesse/' + sub + ' - split/testing/*' LUNA_train = '/home/lshesse/' + sub + ' - split/training/*' nodules_df = pd.read_csv( 'C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/annotations.csv' ) #nodules_df = pd.read_csv('/home/lshesse/annotations.csv') luna_index_test = ds.FilesIndex(path=LUNA_test, dirs=True) # preparing indexing structure luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB) luna_index_train = ds.FilesIndex(path=LUNA_train, dirs=True) # preparing indexing structure luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB) #save folders path = '/hpme/lshesse/' SaveFolder = 'preprocessed_files/' test_folder = path + SaveFolder + sub + 'testing' train_folder = path + SaveFolder + sub + 'training' folderlist = [test_folder, train_folder]
if os.path.isdir(path) == True: fileList_060.append(path) #+ '/' + os.listdir(path)[0]) path = 'D:/DATA20181008/' + number + '/' + '190' if os.path.isdir(path) == True: filelist_190.append(path) #+ '/' + os.listdir(path)[0]) LUNA_pre = 'C:/Users/linde/Documents/CS_PE_seperatedtest' if not os.path.exists(LUNA_pre): os.makedirs(LUNA_pre) #set up dataset structure luna_index_low = FilesIndex(path=fileList_060, sort=True, dirs=True) # preparing indexing structure luna_dataset_low = ds.Dataset(index=luna_index_low, batch_class=CTICB) luna_index_high = FilesIndex(path=filelist_190, sort=True, dirs=True) # preparing indexing structure luna_dataset_high = ds.Dataset(index=luna_index_high, batch_class=CTICB) cancer_cropline = load_pipeline() line_low = luna_dataset_low >> cancer_cropline line_high = luna_dataset_high >> cancer_cropline for i in range(len(luna_dataset_low)): if luna_dataset_high.index.indices[i] != luna_dataset_low.index.indices[i]: print('error!' + ' high :' + luna_dataset_high.index.indices[i] + ' low: ' + luna_dataset_low.index.indices[i])
def make_dataset(folder): index = ds.FilesIndex(path=folder, dirs=True) dataset = ds.Dataset(index=index, batch_class=CTICB) return dataset
components=['spacing', 'origin', 'images', 'segmentation', 'masks']) print(i) print(np.round((time.time() - start_time) / 60, 2)) slices.multi_slice_viewer(batchnew.images) #get segmented images Path = 'C:/Users/linde/Documents/PreprocessedImages1008CorrectConvs/Spacing(2x1x1)/SpacingNew/incorrect/*' pipeline_loadblosc = (Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images', 'segmentation', 'masks'])) im_index = FilesIndex(path=Path, dirs=True) batch_size = 1 ixs = np.array(['000274_IM000001']) observed_scans = ds.Dataset(index=im_index.create_subset(ixs), batch_class=CTImagesCustomBatch) observed_scans = ds.Dataset(index=im_index, batch_class=CTImagesCustomBatch) lunaline_segm = (observed_scans >> pipeline_loadblosc) batch_segm = lunaline_segm.next_batch(batch_size=batch_size, shuffle=False, n_epochs=1, drop_last=False) slices.multi_slice_viewer(batch_segm.masks) slices.multi_slice_viewer(batch_segm.images) fileList = [] for i in range(12, 13): #from 1 to number of scans number = str(i).zfill(6)
nodules_path='C:/Users/linde/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/AnnotatiesPim/nodule_data_adapted.xlsx' # Preprocessing Images-------------------------------------------------------------------------------------------- savepath_preprocess='../../../ResultingData/PreprocessedImages' #makes folder for all savings if not os.path.exists(savepath_preprocess): os.makedirs(savepath_preprocess) #create filesindex to iterate over all files folder_path=os.path.join(data_path, '*') scan_index=FilesIndex(path=folder_path,dirs=True) scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB) #to check index / dataset use: luna_index.indices or scan_dataset.index.indices #should contain list of names of folders (for each scan a folder), names should be different for each scan #make pipeline to load, equalize spacing and normalize the data load_and_preprocess = (Pipeline() .load(fmt='dicom') #loads all slices from folder in dataset .unify_spacing(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant')#equalizes the spacings .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them ) ##pass training dataset through pipeline preprocessing_pipeline=(scan_dataset>> load_and_preprocess.dump(dst=savepath_preprocess,components=['images', 'spacing', 'origin' ]))
def eval_on_images(path, cnn, nodules_df, crop_size=np.array([16, 32, 32]), step_size=np.array([8, 8, 8]), saveImages=False, savepath='path'): #get data luna_index_test = ds.FilesIndex(path=path, dirs=True, sort=True) # preparing indexing structure luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB) if not os.path.exists(savepath): os.makedirs(savepath) #this pipeline does the preprocessing and gets the ground truth for the image preprocessing = (Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images' ]).fetch_nodules_info_Utrecht(nodules_df).create_mask()) preprocess_line = (luna_dataset_test >> preprocessing) #possible thresholds treshold_list = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97, 0.99, 0.995, 0.998, 0.999, 0.9995, 0.9998, 0.9999, 1 ] FalsePositiveList = np.zeros( [len(luna_dataset_test, ), len(treshold_list)]) SensitivityList = np.zeros([len(luna_dataset_test, ), len(treshold_list)]) TrueDetectedList = np.zeros([len(luna_dataset_test, ), len(treshold_list)]) MissedDetectedList = np.zeros( [len(luna_dataset_test, ), len(treshold_list)]) #define crop and stepsize, and batch size in which prediction should b emakde batch_size = 20 folder = savepath + 'Image_Data' if not os.path.exists(folder): os.makedirs(folder) folder_files = savepath + 'Image_evaluation' if not os.path.exists(folder_files): os.makedirs(folder_files) index_list = [] for k in range(len(luna_dataset_test)): start_time = time.clock() batch = preprocess_line.next_batch(batch_size=1, shuffle=False) if os.path.isdir( batch.index.get_fullpath(batch.index.indices[0]) + '/segmentation'): print( batch.index.get_fullpath(batch.index.indices[0]) + '/segmentation') batch.load(fmt='blosc', components=['segmentation']) im_index = batch.indices index_list.append(str(im_index)) #crop images to bounding box to not classify to much if segmentaiton is present if batch.segmentation is not None: zmin, zmax, ymin, ymax, xmin, xmax = bbox2_3D(batch.segmentation) segmentation = batch.segmentation[zmin:zmax, ymin:ymax, xmin:xmax] #extract segmentation bounding_im = batch.images[zmin:zmax, ymin:ymax, xmin:xmax] bounding_mask = batch.masks[zmin:zmax, ymin:ymax, xmin:xmax] else: bounding_im = batch.images bounding_mask = batch.masks segmentation = None #padd the images to ensure correct patch extraction at boundaries bounding_im_pad, bounding_mask_pad = pad_for_Prediction( bounding_im, bounding_mask, crop_size, step_size) #make empty array for prediction size = bounding_im.shape prediction_size = np.ceil(size / step_size).astype( int) #make sure all pixels got a mini-box prediction_map = np.zeros(prediction_size) start_pred_time = time.clock() #get prediction map of image prediction_map = get_prediction_map(cnn, bounding_im_pad, prediction_map, step_size, crop_size, batch_size) #cast prediction map to same size as prediction image prediction_im = get_prediction_image(bounding_im, prediction_map, step_size) if segmentation is not None: prediction_im = prediction_im * segmentation #all predictions outside segmentation are not relevant #save predicted images if saveImages == True: np.save(folder + '/' + 'prediction_im' + str(k), prediction_im) np.save(folder + '/' + 'bounding_im' + str(k), bounding_im) np.save(folder + '/' + 'bounding_mask' + str(k), bounding_mask) # np.save(folder+'/'+ 'bounding_irrel'+str(k), bounding_segm) #determine of predictions are correct for i in range(len(treshold_list)): treshold = treshold_list[i] TrueDetected, MissedDetected, FalsePositive = verify_predictions( prediction_im, bounding_mask, treshold, FP_correction=False) Sensitivity = calc_Sensitivity(TrueDetected, MissedDetected) SensitivityList[k, i] = Sensitivity FalsePositiveList[k, i] = FalsePositive TrueDetectedList[k, i] = TrueDetected MissedDetectedList[k, i] = MissedDetected print((time.clock() - start_pred_time) / 60) print("--- %s minutes ---" % ((time.clock() - start_time) / 60)) cor_tresh = np.sum(TrueDetectedList, 0) mis_tresh = np.sum(MissedDetectedList, 0) sens_tresh = cor_tresh / (cor_tresh + mis_tresh) fp_tresh = np.mean(FalsePositiveList, 0) #save all files np.save(folder_files + '/FPlist', FalsePositiveList) np.save(folder_files + '/SensList', SensitivityList) np.save(folder_files + '/TrueDetected', TrueDetectedList) np.save(folder_files + '/MissedDetected', MissedDetectedList) np.save(folder_files + '/sens_tresh', sens_tresh) np.save(folder_files + '/fp_tresh', fp_tresh) #writer seriesuid to excel file df = pd.DataFrame({'series': index_list}) writer = pd.ExcelWriter(folder_files + '/seriesUID.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, header=True) #writer seriesuid to excel file df_2 = pd.DataFrame({ 'treshold': treshold_list, 'sensitivity': sens_tresh, 'false positives': fp_tresh }) writer = pd.ExcelWriter(folder_files + '/FROC.xlsx', engine='xlsxwriter') df_2.to_excel(writer, index=False, header=True) #calculate overall sensitivy and fp_rate total_correct_detected = np.sum(TrueDetectedList, 0) total_nodules = total_correct_detected + np.sum(MissedDetectedList, 0) Sensitivity = np.divide(total_correct_detected, total_nodules)
def eval_on_images(path,cnn, nodules_df, nodules_eval, crop_size = np.array([16,32,32]), step_size = np.array([8,8,8]),FPminingNeed=False,saveImages=False): #get data luna_index_test = ds.FilesIndex(path=path, dirs=True) # preparing indexing structure luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB) #replace negative diameters in irrelevant findings with small diamter of 3mm nodules_eval['diameter_mm']=nodules_eval['diameter_mm'].replace(-1,3) #this pipeline does the preprocessing and gets the ground truth for the image preprocessing = (Pipeline() .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation']) .fetch_nodules_info(nodules_df) .create_mask() .fetch_nodules_info(nodules_eval ,update=True)) preprocess_line=(luna_dataset_test >> preprocessing) #possible thresholds treshold_list=[ 0.5, 0.7, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97,0.99,0.995,0.998,0.999, 0.9995,0.9998, 0.9999,1] FalsePositiveList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) SensitivityList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) TrueDetectedList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) MissedDetectedList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) FPminingNeed=False FPminingList=[] #define batch size in which prediction should b emakde batch_size=20 folder='Image_Data' if not os.path.exists(folder): os.makedirs(folder) folder_files='Image_evaluation' if not os.path.exists(folder_files): os.makedirs(folder_files) index_list=[] for k in range(len(luna_dataset_test)): start_time = time.clock() batch=preprocess_line.next_batch(batch_size=1) im_index=batch.indices index_list.append(str(im_index)) #crop images to bounding box to not classify to much zmin,zmax,ymin,ymax,xmin,xmax=bbox2_3D(batch.segmentation) segmentation=batch.segmentation[zmin:zmax,ymin:ymax,xmin:xmax] #extract segmentation batch.create_mask_irrelevant() #when segmentation is no longer needed, put irrelevant findings into this component #cut all images to they bounding box of segmentation to reduce computational load bounding_im=batch.images[zmin:zmax,ymin:ymax,xmin:xmax] bounding_mask=batch.masks[zmin:zmax,ymin:ymax,xmin:xmax] bounding_segm=batch.segmentation[zmin:zmax,ymin:ymax,xmin:xmax] #padd the images to ensure correct patch extraction at boundaries bounding_im_pad,bounding_mask_pad,bounding_segm_pad=pad_for_Prediction(bounding_im, bounding_mask,bounding_segm, crop_size,step_size) #make empty array for prediction size=bounding_im.shape prediction_size=np.ceil(size/step_size).astype(int) #make sure all pixels got a mini-box prediction_map=np.zeros(prediction_size) start_pred_time=time.clock() #get prediction map of image prediction_map=get_prediction_map(cnn,bounding_im_pad, prediction_map, step_size, crop_size,batch_size) #cast prediction map to same size as prediction image prediction_im=get_prediction_image(bounding_im, prediction_map, step_size) prediction_im=prediction_im * segmentation #all predictions outside segmentation are not relevant #save predicted images if saveImages==True: np.save(folder+'/'+ 'prediction_im'+str(k), prediction_im) np.save(folder+'/'+ 'bounding_im'+str(k), bounding_im) np.save(folder+'/'+ 'bounding_mask'+str(k), bounding_mask) np.save(folder+'/'+ 'bounding_irrel'+str(k), bounding_segm) #determine of predictions are correct for i in range(len(treshold_list)): treshold= treshold_list[i] TrueDetected, MissedDetected, FalsePositive=verify_predictions(prediction_im, bounding_mask,bounding_segm, treshold) Sensitivity=calc_Sensitivity(TrueDetected,MissedDetected) SensitivityList[k,i]=Sensitivity FalsePositiveList[k,i]=FalsePositive TrueDetectedList[k,i]=TrueDetected MissedDetectedList[k,i]=MissedDetected #do false positive mining if wanted # if FPminingNeed==True: # detections=measure.regionprops(label_prediction) # FPminingList=FPmining(detections, correct_labels, batch, zmin,ymin,xmin,FPminingList) print( (time.clock()-start_pred_time)/60) print("--- %s minutes ---" % ((time.clock() - start_time)/60)) #determine number of detected / missed nodules cor_tresh=np.sum(TrueDetectedList,0) mis_tresh=np.sum(MissedDetectedList,0) sens_tresh=cor_tresh/(cor_tresh+mis_tresh) fp_tresh=np.mean(FalsePositiveList,0) #save all files np.save(folder_files + '/FPlist',FalsePositiveList ) np.save(folder_files + '/SensList', SensitivityList) np.save(folder_files + '/TrueDetected',TrueDetectedList ) np.save(folder_files + '/MissedDetected', MissedDetectedList) np.save(folder_files + '/sens_tresh',sens_tresh) np.save(folder_files + '/fp_tresh',fp_tresh) #writer seriesuid to excel file df=pd.DataFrame({'series':index_list}) writer = pd.ExcelWriter(folder_files+'/seriesUID.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False,header=True) #writer seriesuid to excel file df_2=pd.DataFrame({'treshold':treshold_list, 'sensitivity':sens_tresh, 'false positives': fp_tresh}) writer = pd.ExcelWriter(folder_files+'/FROC.xlsx', engine='xlsxwriter') df_2.to_excel(writer, index=False,header=True) #calculate overall sensitivy and fp_rate total_correct_detected=np.sum(TrueDetectedList,0) total_nodules=total_correct_detected + np.sum(MissedDetectedList,0) Sensitivity=np.divide(total_correct_detected,total_nodules) fp_rate=np.mean(FalsePositiveList,0) #save sensitivity to txt file names = np.array(['Sensitivity:','FalsePositives:']) floats = np.array([Sensitivity[8], fp_rate[8] ]) ab = np.zeros(names.size, dtype=[('var1', 'U20'), ('var2', float)]) ab['var1'] = names ab['var2'] = floats np.savetxt(folder_files + '/accuracy.txt', ab, fmt="%18s %10.3f") #write false positive list to file if FPminingNeed==True: a=pd.DataFrame(FPminingList) writer = pd.ExcelWriter(folder_files+'/FalsePositiveMiningList.xlsx', engine='xlsxwriter') a.columns = ["seriesuid", "coordX", "coordY", "coordZ"] a.to_excel(writer, index=False,header=True)
LUNA_MASK = 'C:/Users/s120116/Documents/LUNAsubsets/'+subset+'/*.mhd' path='C:/Users/s120116/Documents/LUNAsubsets/subset*/*.mhd' path='C:/Users/s120116/Documents/Preprocessed_Images/subset2 - split/training/*' sub='subset0' luna_index_train = FilesIndex(path=path, no_ext=True) # preparing indexing structure ixs = np.array(['1.3.6.1.4.1.14519.5.2.1.6279.6001.750792629100457382099842515038']) two_scans_dataset = ds.Dataset(index=luna_index_train.create_subset(ixs), batch_class=CTICB) luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB) nodules_malignancy=pd.read_excel('C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/all_info_averaged_observer_corrected2.xlsx') pipeline= (Pipeline() .load(fmt='raw')) .fetch_nodules_info(nodules_df_2) .create_mask()) def load_pipeline(nodules_df): pipeline= (Pipeline() .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation'])