def load_pipeline(nodules_df): pipeline = (Pipeline().load( fmt='blosc', components=[ 'spacing', 'origin', 'images', 'segmentation' ]).fetch_nodules_info_malignancy(nodules_df).create_mask() ) #creates mask component with nodules return pipeline
def nodules(dicom_dataset): pipeline = dicom_dataset >> (Pipeline().init_variable( 'nodules_list', []).load(fmt='dicom').update_variable( 'nodules_list', F(generate_nodules), mode='a')) pipeline.run(batch_size=2) all_nodules = pd.concat( [df for df in pipeline.get_variable('nodules_list') if len(df) > 0]) return all_nodules
def test_combine_datasets(self, crops_datasets, batch_sizes, components): pipeline = ( Pipeline() .load(fmt='blosc', components=components) .normalize_hu() ) combine_pipeline = combine_datasets(crops_datasets, batch_sizes, pipeline) _ = combine_pipeline.next_batch(4) # noqa: F841
def test_create_crops(dicom_dataset, nodules): create_crops(dicom_dataset, 'dicom', nodules, None, './test_crops', config=get_config(config)) cancer_idx = FilesIndex(path='./test_crops/original/cancer/*', dirs=True) ncancer_idx = FilesIndex(path='./test_crops/original/ncancer/*', dirs=True) cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch) ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch) assert len(cancer_set) != 0 and len(ncancer_set) != 0 _ = (Pipeline(dataset=cancer_set).load(fmt='blosc', sync=True).next_batch(2)) _ = (Pipeline(dataset=ncancer_set).load(fmt='blosc', sync=True).next_batch(2)) shutil.rmtree('./test_crops')
def batch_gen(dicom_dataset): dicom_index = FilesIndex(path='./dicom/*', dirs=True) dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch) create_blosc_dataset = dicom_dataset >> ( Pipeline() .load(fmt='dicom') .dump(dst='./blosc', fmt='blosc', components=('images', 'origin', 'spacing')) ) create_blosc_dataset.run(4) blosc_index = FilesIndex(path='./blosc/*', dirs=True) blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch) yield blosc_dataset.gen_batch(2, n_epochs=None) print("Cleaning up generated blosc data...") shutil.rmtree('./blosc')
def __init__(self, cf, classifier, segmentator): self.cf = cf self.full_pipe = (Pipeline() .init_variable('segm_mask') .init_variable('conf_mask') .load(fmt='dicom') .unify_spacing(shape=(500, 300, 300), spacing=(1.0, 1.0, 1.0), method='pil-simd', padding='constant') #.call(crop_img) .normalize_hu() .call(check_metrics) .predict_on_scan( model=lambda x: torch.nn.functional.softmax( torch.from_numpy( classifier.predict(x)))[..., 1], crop_shape=[64, 64, 64], strides=[55, 55, 55], batch_size=4, data_format="channels_first", model_type="callable", targets_mode="classification", show_progress=False ) #.binarize_mask(threshold=0.7) .update_variable('conf_mask', B('masks')) .predict_on_scan( model=segmentator.predict, crop_shape=(64, 64, 64), strides=(55, 55, 55), batch_size=4, data_format="channels_first", model_type="callable", targets_mode="segmentation", show_progress=False ) .binarize_mask(threshold=0.1) .update_variable('segm_mask', B('masks')) .load(fmt='ndarray', masks=V('segm_mask') * V('conf_mask')) .fetch_nodules_from_mask() .call(check_overlap) .call(process_nodules))
flatten = cnn.get_layer(name="flatten_1") inputs = [K.learning_phase()] + cnn.inputs _flat_out = K.function(inputs, [flatten.output]) def flat_out_f(X): # The [0] is to disable the training phase flag return _flat_out([0] + [X]) #define names for folders crops_folder = '../../../ResultingData/NoduleCrops' #server path pre_savepath = '../../../ResultingData/NoduleFeatures' #change this lines into pe #make dataset, and give dtaset to pipeline pipeline_load = Pipeline().load(fmt='blosc', components=['spacing', 'origin', 'images']) dataset = make_dataset(os.path.join(crops_folder, '*')) sample_line = (dataset >> pipeline_load) #for each scan in batch, load scan, and compute features from scan. Next, each batch is saved for i in range(int(np.ceil(len(dataset) / 5))): cbatch = sample_line.next_batch(batch_size=5, drop_last=False, shuffle=True) cim = cbatch.unpack(component='images', data_format='channels_last') features = flat_out_f(cim) for j in range(len(cim)): feat = features[0][j] totalpath = cbatch.index.get_fullpath(cbatch.indices[j]) splits = totalpath.split(os.sep) savepath = pre_savepath + '/' + splits[-1]
def load_pipeline(): pipeline = (Pipeline().load(fmt='dicom')) return pipeline
plt.title(testname) plt.ylim((0,0.5)) plt.savefig(savepath+'/Losses.png') #create datasets voor cancer/noncancer and training/testing cancer_testset= make_dataset(val_cancer_folder) cancer_trainset= make_dataset(cancer_folder) #make lists for the losses losslist = [] test_losslist=[] # create pipeline to load images and give dataset structures to pipeline pipeline_load= (Pipeline().load(fmt='blosc', components=['spacing', 'origin', 'images','masks']) .loadMalignancy()) #get training and testing pipelines with data sample_cancer_train=(cancer_trainset >> pipeline_load) sample_cancer_test=(cancer_testset >> pipeline_load) #use seperate pipeline for evaluation to make sure all images are used for training itself sample_cancer_train_eval=(cancer_trainset >> pipeline_load) #training parameters n_epochs=2 #number of epochs for cancer training set, others continue untill this one has finished cancer_batchsize = 20 ncancer_batchsize = 20 #total batch size is cancer + ncancer batchsize
return loss def save_model(batch, pipeline, model='net'): """ Function for saving model. """ model = pipeline.get_model_by_name(model) name = model.__class__.__name__ model.save(MODELS_DIR + name) # root, train, test pipelines root_pipeline = (Pipeline().load( fmt='blosc', components=[ 'images', 'spacing', 'origin' ]).fetch_nodules_info(nodules=nodules).create_mask().run(batch_size=4, shuffle=True, n_epochs=None, prefetch=3, lazy=True)) train_pipeline = (Pipeline().init_variables( ['loss', 'predictions']).init_model('dynamic', C('model'), 'net', C('model_config')).call(train)) test_pipeline = (Pipeline().init_variables(['loss', 'predictions']).call( save_model, pipeline=C('train_pipeline')).import_model( 'net', C('train_pipeline')).predict_model('net', fetches='output_sigmoid', feed_dict={ 'images': B('nimages'), 'masks': B('nmasks'),
# np.save(os.path.join(path,'trainindex.npy'),luna_dataset.train.indices) # np.save(os.path.join(path,'testindex.npy'), luna_dataset.test.indices) # # #give them seperate names # dataset_val=luna_dataset.test # dataset_train=luna_dataset.train # #----------------------------------------------------------------------- #make pipeline to load and segment, saves segmentations in masks load_and_segment = ( Pipeline().load(fmt='raw').get_lung_mask( rad=15).unify_spacing_withmask( shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings #from both images and mask .normalize_hu( min_hu=-1200, max_hu=600 ) #clips the HU values and linearly rescales them, values from grt team .apply_lung_mask(padding=170)) ## uncomment this for validation data lunaline_test = (luna_dataset >> load_and_segment.dump( dst=LUNA_test, components=['spacing', 'origin', 'images', 'segmentation'])) batch_size = 1 for i in range(np.ceil(len(luna_dataset) / batch_size).astype(int)): batch = lunaline_test.next_batch(batch_size=batch_size, shuffle=False, n_epochs=1)
from CTImagesCustomBatch import CTImagesCustomBatch as CTICB #custom batch class import os import CTsliceViewer as slices nodules_path = 'C:/Users/linde/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/AnnotatiesPim/nodule_data_adapted.xlsx' data_path = 'D:/OnlyConv' #get nodule info, the dtype preserves the leading zeros to to get folder names and this name equal, if numbers are used this is not necessary nodules_utrecht = pd.read_excel(nodules_path, dtype={'PatientID': str}) #make pipeline to load, and get the annotations load_and_preprocess = ( Pipeline().load(fmt='dicom').fetch_nodules_info_general( nodules_utrecht) #loads nodule infomation into batch .create_mask().sample_nodules(batch_size=None, nodule_size=(16, 32, 32), share=(1.0), variance=(0, 0, 0), data='Utrecht')) #create filesindex to iterate over all files folder_path = os.path.join(data_path, '*') scan_index = FilesIndex(path=folder_path, dirs=True) scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB) # get dataset to pipeline, and get a batch through the pipeline, for the next batch run the 2nd command multiple times line = (scan_dataset >> load_and_preprocess) batch = line.next_batch(batch_size=1) slices.multi_slice_viewer( batch.images
def load_line(): return Pipeline().load(fmt='blosc', components=['spacing', 'origin', 'images'])
train_folder = path + SaveFolder + sub + 'training' folderlist = [test_folder, train_folder] for folder in folderlist: if not os.path.exists(folder): os.makedirs(folder) #this pipeline does the preprocessing and gets the ground truth for the image preprocessing = ( Pipeline().load(fmt='blosc', components=[ 'spacing', 'origin', 'images', 'masks' ]).unify_spacing_withmask( shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings .normalize_hu( min_hu=-1200, max_hu=600 ) #clips the HU values and linearly rescales them, values from grt team .apply_lung_mask(padding=170) #eventueel nog bot weghalen) .fetch_nodules_info(nodules_df).create_mask()) # .predict_on_scans(cnn,strides=(8,16,16), crop_shape=(16,32,32), targets_mode='classification', model_type='keras')) # .create_mask() # preprocess_line_test = ( luna_dataset_test >> preprocessing.dump(dst=test_folder)) preprocess_line_train = ( luna_dataset_train >> preprocessing.dump(dst=train_folder)) for i in range(len(luna_dataset_test)):
from radio import dataset as ds import CTsliceViewer as slice from radio.dataset import Pipeline #from memory_profiler import profile #import gc import numpy as np from CTImagesCustomBatch import CTImagesCustomBatch as CTICB #load data Path = 'C:/Users/linde/Documents/PreprocessedImages1008/Spacing(2x1x1)/*' luna_index = ds.FilesIndex(path=Path, dirs=True, sort=True) luna_dataset = ds.Dataset(index=luna_index, batch_class=CTICB) #create pipeline to load images, and spacing & origin information pipeline_load = (Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images', 'segmentation'])) #give dataset to pipline and run it per batch load_line = luna_dataset >> pipeline_load #create lists for middle slices of masks and images, and list for index numbers list_of_masks = [] list_of_im = [] list_of_indices = [] #obtain for whole batch middle slice of image and mask and index in list batch_size = 1 for i in range(int(np.ceil(len(luna_dataset) / batch_size))): batch = load_line.next_batch(batch_size=batch_size, shuffle=False) arrayIm, arrayMask = batch.get_middle_slices( ) #function returns middle slices of batch
def eval_on_images(path, cnn, nodules_df, crop_size=np.array([16, 32, 32]), step_size=np.array([8, 8, 8]), saveImages=False, savepath='path'): #get data luna_index_test = ds.FilesIndex(path=path, dirs=True, sort=True) # preparing indexing structure luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB) if not os.path.exists(savepath): os.makedirs(savepath) #this pipeline does the preprocessing and gets the ground truth for the image preprocessing = (Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images' ]).fetch_nodules_info_Utrecht(nodules_df).create_mask()) preprocess_line = (luna_dataset_test >> preprocessing) #possible thresholds treshold_list = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97, 0.99, 0.995, 0.998, 0.999, 0.9995, 0.9998, 0.9999, 1 ] FalsePositiveList = np.zeros( [len(luna_dataset_test, ), len(treshold_list)]) SensitivityList = np.zeros([len(luna_dataset_test, ), len(treshold_list)]) TrueDetectedList = np.zeros([len(luna_dataset_test, ), len(treshold_list)]) MissedDetectedList = np.zeros( [len(luna_dataset_test, ), len(treshold_list)]) #define crop and stepsize, and batch size in which prediction should b emakde batch_size = 20 folder = savepath + 'Image_Data' if not os.path.exists(folder): os.makedirs(folder) folder_files = savepath + 'Image_evaluation' if not os.path.exists(folder_files): os.makedirs(folder_files) index_list = [] for k in range(len(luna_dataset_test)): start_time = time.clock() batch = preprocess_line.next_batch(batch_size=1, shuffle=False) if os.path.isdir( batch.index.get_fullpath(batch.index.indices[0]) + '/segmentation'): print( batch.index.get_fullpath(batch.index.indices[0]) + '/segmentation') batch.load(fmt='blosc', components=['segmentation']) im_index = batch.indices index_list.append(str(im_index)) #crop images to bounding box to not classify to much if segmentaiton is present if batch.segmentation is not None: zmin, zmax, ymin, ymax, xmin, xmax = bbox2_3D(batch.segmentation) segmentation = batch.segmentation[zmin:zmax, ymin:ymax, xmin:xmax] #extract segmentation bounding_im = batch.images[zmin:zmax, ymin:ymax, xmin:xmax] bounding_mask = batch.masks[zmin:zmax, ymin:ymax, xmin:xmax] else: bounding_im = batch.images bounding_mask = batch.masks segmentation = None #padd the images to ensure correct patch extraction at boundaries bounding_im_pad, bounding_mask_pad = pad_for_Prediction( bounding_im, bounding_mask, crop_size, step_size) #make empty array for prediction size = bounding_im.shape prediction_size = np.ceil(size / step_size).astype( int) #make sure all pixels got a mini-box prediction_map = np.zeros(prediction_size) start_pred_time = time.clock() #get prediction map of image prediction_map = get_prediction_map(cnn, bounding_im_pad, prediction_map, step_size, crop_size, batch_size) #cast prediction map to same size as prediction image prediction_im = get_prediction_image(bounding_im, prediction_map, step_size) if segmentation is not None: prediction_im = prediction_im * segmentation #all predictions outside segmentation are not relevant #save predicted images if saveImages == True: np.save(folder + '/' + 'prediction_im' + str(k), prediction_im) np.save(folder + '/' + 'bounding_im' + str(k), bounding_im) np.save(folder + '/' + 'bounding_mask' + str(k), bounding_mask) # np.save(folder+'/'+ 'bounding_irrel'+str(k), bounding_segm) #determine of predictions are correct for i in range(len(treshold_list)): treshold = treshold_list[i] TrueDetected, MissedDetected, FalsePositive = verify_predictions( prediction_im, bounding_mask, treshold, FP_correction=False) Sensitivity = calc_Sensitivity(TrueDetected, MissedDetected) SensitivityList[k, i] = Sensitivity FalsePositiveList[k, i] = FalsePositive TrueDetectedList[k, i] = TrueDetected MissedDetectedList[k, i] = MissedDetected print((time.clock() - start_pred_time) / 60) print("--- %s minutes ---" % ((time.clock() - start_time) / 60)) cor_tresh = np.sum(TrueDetectedList, 0) mis_tresh = np.sum(MissedDetectedList, 0) sens_tresh = cor_tresh / (cor_tresh + mis_tresh) fp_tresh = np.mean(FalsePositiveList, 0) #save all files np.save(folder_files + '/FPlist', FalsePositiveList) np.save(folder_files + '/SensList', SensitivityList) np.save(folder_files + '/TrueDetected', TrueDetectedList) np.save(folder_files + '/MissedDetected', MissedDetectedList) np.save(folder_files + '/sens_tresh', sens_tresh) np.save(folder_files + '/fp_tresh', fp_tresh) #writer seriesuid to excel file df = pd.DataFrame({'series': index_list}) writer = pd.ExcelWriter(folder_files + '/seriesUID.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, header=True) #writer seriesuid to excel file df_2 = pd.DataFrame({ 'treshold': treshold_list, 'sensitivity': sens_tresh, 'false positives': fp_tresh }) writer = pd.ExcelWriter(folder_files + '/FROC.xlsx', engine='xlsxwriter') df_2.to_excel(writer, index=False, header=True) #calculate overall sensitivy and fp_rate total_correct_detected = np.sum(TrueDetectedList, 0) total_nodules = total_correct_detected + np.sum(MissedDetectedList, 0) Sensitivity = np.divide(total_correct_detected, total_nodules)
def eval_on_images(path,cnn, nodules_df, nodules_eval, crop_size = np.array([16,32,32]), step_size = np.array([8,8,8]),FPminingNeed=False,saveImages=False): #get data luna_index_test = ds.FilesIndex(path=path, dirs=True) # preparing indexing structure luna_dataset_test = ds.Dataset(index=luna_index_test, batch_class=CTICB) #replace negative diameters in irrelevant findings with small diamter of 3mm nodules_eval['diameter_mm']=nodules_eval['diameter_mm'].replace(-1,3) #this pipeline does the preprocessing and gets the ground truth for the image preprocessing = (Pipeline() .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation']) .fetch_nodules_info(nodules_df) .create_mask() .fetch_nodules_info(nodules_eval ,update=True)) preprocess_line=(luna_dataset_test >> preprocessing) #possible thresholds treshold_list=[ 0.5, 0.7, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97,0.99,0.995,0.998,0.999, 0.9995,0.9998, 0.9999,1] FalsePositiveList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) SensitivityList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) TrueDetectedList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) MissedDetectedList=np.zeros([len(luna_dataset_test, ), len(treshold_list)]) FPminingNeed=False FPminingList=[] #define batch size in which prediction should b emakde batch_size=20 folder='Image_Data' if not os.path.exists(folder): os.makedirs(folder) folder_files='Image_evaluation' if not os.path.exists(folder_files): os.makedirs(folder_files) index_list=[] for k in range(len(luna_dataset_test)): start_time = time.clock() batch=preprocess_line.next_batch(batch_size=1) im_index=batch.indices index_list.append(str(im_index)) #crop images to bounding box to not classify to much zmin,zmax,ymin,ymax,xmin,xmax=bbox2_3D(batch.segmentation) segmentation=batch.segmentation[zmin:zmax,ymin:ymax,xmin:xmax] #extract segmentation batch.create_mask_irrelevant() #when segmentation is no longer needed, put irrelevant findings into this component #cut all images to they bounding box of segmentation to reduce computational load bounding_im=batch.images[zmin:zmax,ymin:ymax,xmin:xmax] bounding_mask=batch.masks[zmin:zmax,ymin:ymax,xmin:xmax] bounding_segm=batch.segmentation[zmin:zmax,ymin:ymax,xmin:xmax] #padd the images to ensure correct patch extraction at boundaries bounding_im_pad,bounding_mask_pad,bounding_segm_pad=pad_for_Prediction(bounding_im, bounding_mask,bounding_segm, crop_size,step_size) #make empty array for prediction size=bounding_im.shape prediction_size=np.ceil(size/step_size).astype(int) #make sure all pixels got a mini-box prediction_map=np.zeros(prediction_size) start_pred_time=time.clock() #get prediction map of image prediction_map=get_prediction_map(cnn,bounding_im_pad, prediction_map, step_size, crop_size,batch_size) #cast prediction map to same size as prediction image prediction_im=get_prediction_image(bounding_im, prediction_map, step_size) prediction_im=prediction_im * segmentation #all predictions outside segmentation are not relevant #save predicted images if saveImages==True: np.save(folder+'/'+ 'prediction_im'+str(k), prediction_im) np.save(folder+'/'+ 'bounding_im'+str(k), bounding_im) np.save(folder+'/'+ 'bounding_mask'+str(k), bounding_mask) np.save(folder+'/'+ 'bounding_irrel'+str(k), bounding_segm) #determine of predictions are correct for i in range(len(treshold_list)): treshold= treshold_list[i] TrueDetected, MissedDetected, FalsePositive=verify_predictions(prediction_im, bounding_mask,bounding_segm, treshold) Sensitivity=calc_Sensitivity(TrueDetected,MissedDetected) SensitivityList[k,i]=Sensitivity FalsePositiveList[k,i]=FalsePositive TrueDetectedList[k,i]=TrueDetected MissedDetectedList[k,i]=MissedDetected #do false positive mining if wanted # if FPminingNeed==True: # detections=measure.regionprops(label_prediction) # FPminingList=FPmining(detections, correct_labels, batch, zmin,ymin,xmin,FPminingList) print( (time.clock()-start_pred_time)/60) print("--- %s minutes ---" % ((time.clock() - start_time)/60)) #determine number of detected / missed nodules cor_tresh=np.sum(TrueDetectedList,0) mis_tresh=np.sum(MissedDetectedList,0) sens_tresh=cor_tresh/(cor_tresh+mis_tresh) fp_tresh=np.mean(FalsePositiveList,0) #save all files np.save(folder_files + '/FPlist',FalsePositiveList ) np.save(folder_files + '/SensList', SensitivityList) np.save(folder_files + '/TrueDetected',TrueDetectedList ) np.save(folder_files + '/MissedDetected', MissedDetectedList) np.save(folder_files + '/sens_tresh',sens_tresh) np.save(folder_files + '/fp_tresh',fp_tresh) #writer seriesuid to excel file df=pd.DataFrame({'series':index_list}) writer = pd.ExcelWriter(folder_files+'/seriesUID.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False,header=True) #writer seriesuid to excel file df_2=pd.DataFrame({'treshold':treshold_list, 'sensitivity':sens_tresh, 'false positives': fp_tresh}) writer = pd.ExcelWriter(folder_files+'/FROC.xlsx', engine='xlsxwriter') df_2.to_excel(writer, index=False,header=True) #calculate overall sensitivy and fp_rate total_correct_detected=np.sum(TrueDetectedList,0) total_nodules=total_correct_detected + np.sum(MissedDetectedList,0) Sensitivity=np.divide(total_correct_detected,total_nodules) fp_rate=np.mean(FalsePositiveList,0) #save sensitivity to txt file names = np.array(['Sensitivity:','FalsePositives:']) floats = np.array([Sensitivity[8], fp_rate[8] ]) ab = np.zeros(names.size, dtype=[('var1', 'U20'), ('var2', float)]) ab['var1'] = names ab['var2'] = floats np.savetxt(folder_files + '/accuracy.txt', ab, fmt="%18s %10.3f") #write false positive list to file if FPminingNeed==True: a=pd.DataFrame(FPminingList) writer = pd.ExcelWriter(folder_files+'/FalsePositiveMiningList.xlsx', engine='xlsxwriter') a.columns = ["seriesuid", "coordX", "coordY", "coordZ"] a.to_excel(writer, index=False,header=True)
luna_index_train = FilesIndex(path=path, no_ext=True) # preparing indexing structure ixs = np.array(['1.3.6.1.4.1.14519.5.2.1.6279.6001.750792629100457382099842515038']) two_scans_dataset = ds.Dataset(index=luna_index_train.create_subset(ixs), batch_class=CTICB) luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB) nodules_malignancy=pd.read_excel('C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/all_info_averaged_observer_corrected2.xlsx') pipeline= (Pipeline() .load(fmt='raw')) .fetch_nodules_info(nodules_df_2) .create_mask()) def load_pipeline(nodules_df): pipeline= (Pipeline() .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation']) .fetch_nodules_info_malignancy(nodules_df) .create_mask()) #creates mask component with nodules return pipeline cancer_cropline=load_pipeline(nodules_malignancy) cancer_train=(two_scans_dataset >> load_and_segment)
import pandas as pd from radio import CTImagesMaskedBatch from radio.dataset import FilesIndex, Dataset, Pipeline, F from radio.models import DilatedNoduleNet from radio.models.tf.losses import tversky_loss nodules_df = pd.read_csv('/path/to/annotations.csv') luna_index = FilesIndex(path='/path/to/LunaDataset/*.mhd', no_ext=True) luna_dataset = Dataset(index=luna_index, batch_class=CTImagesMaskedBatch) preprocessing = (Pipeline() .load(fmt='raw') .unify_spacing(shape=(384, 512, 512), spacing=(3.5, 2.0, 2.0))) .fetch_nodules_info(nodules_df) .create_mask() .normalize_hu()) spacing_randomizer = lambda *args: 0.2 * np.random.uniform(size=3) + [3.5, 2.0, 2.0] augmentation = (Pipeline() .sample_nodules(nodule_size=(48, 76, 76)) .rotate(random=True, angle=30, mask=True) .unify_spacing(spacing=F(spacing_randomizer), shape=(32, 64, 64))) vnet_config = {'loss': tversky_loss, 'inputs': dict(images={'shape': (32, 64, 64, 1)}, labels={'name': 'targets', 'shape': (32, 64, 64, 1)})} vnet_config['input_block/inputs'] = 'images' model_training = (Pipeline() .init_model(name='vnet', model_class=DilatedNoduleNet, config=vnet_config) .train_model(name='vnet', feed_dict={'images': F(CTIMB.unpack, component='images'),
#from each dicom folder, add one file to filesindex. This makes sure that with next batch, the next #dicom scan is loaded and not the next slice (file) fileList = [] for i in range(1, 3): #from 1 to number of scans number = '00' + str(i) path = 'C:/Users/linde/Documents/DAta/DATA/Use/' + number + '/conventional' fileList.append(path + '/' + os.listdir(path)[0]) #set up dataset structure luna_index = FilesIndex(path=fileList, no_ext=False, sort=True) # preparing indexing structure luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch) #load pipeline load_LUNA = (Pipeline().load(fmt='dicom').get_lung_mask(rad=10)) lunaline = luna_dataset >> load_LUNA.dump( dst=save_folder, components=['spacing', 'origin', 'images', 'masks']) #get next batch list_int = [] i = 0 while True: try: batch = lunaline.next_batch(batch_size=1, shuffle=False, n_epochs=1) im_array = batch.images [values, count] = np.unique(im_array, return_counts=True) list_int.append([batch.index.indices, values, count]) i = i + 1
def load_pipeline(nodules_df): pipeline= (Pipeline() .load(fmt='blosc', components=['spacing', 'origin', 'images','segmentation'])
#makes folder for all savings LUNA_val='C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/validate' LUNA_train= 'C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/training' luna_index = FilesIndex(path=LUNA_MASK, no_ext=True) ixs = np.array([ '1.3.6.1.4.1.14519.5.2.1.6279.6001.228511122591230092662900221600']) fix_ds = ds.Dataset(index=luna_index.create_subset(ixs), batch_class=CTImagesCustomBatch) #make pipeline to load and segment, saves segmentations in masks load_and_segment = (Pipeline() .load(fmt='raw') .get_lung_mask(rad=15)) # .unify_spacing_withmask(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant') #equalizes the spacings #from both images and mask # .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them, values from grt team #.apply_lung_mask(padding=170)) #pass training dataset through pipeline lunaline_train=(fix_ds >> load_and_segment )#.dump(dst=LUNA_train,components=['spacing', 'origin', 'images','segmentation'])) batch=lunaline_train.next_batch(batch_size=1, shuffle=False,n_epochs=1) batch.dump(dst='C:/Users/s120116/Documents/Preprocessed_Images/subset1 - split', components=['spacing', 'origin', 'images','segmentation'])
if not os.path.exists(savepath): os.makedirs(savepath) #create filesindex to iterate over all files folder_path = os.path.join(data_path, '*') scan_index = FilesIndex(path=folder_path, dirs=True) scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB) #to check index / dataset use: luna_index.indices or scan_dataset.index.indices #should contain list of names of folders (for each scan a folder), names should be different for each scan #make pipeline to load, equalize spacing and normalize the data load_and_preprocess = ( Pipeline().load(fmt='dicom') #loads all slices from folder in dataset .unify_spacing(shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them ) ##pass training dataset through pipeline preprocessing_pipeline = (scan_dataset >> load_and_preprocess.dump( dst=savepath, components=['images', 'spacing', 'origin'])) #get scans one by one through the pipeline for i in range(len(scan_dataset)): print('prepoccesing scan nr:' + str(i)) batch = preprocessing_pipeline.next_batch(batch_size=1, shuffle=False, n_epochs=1, drop_last=False)
if not os.path.exists(save_path): os.makedirs(save_path) for string in ['PE']: #still do PE path_cs = "C:/Users/linde/Documents/CS_PE_seperated/" + string + "/*" cs_index = FilesIndex(path=path_cs, dirs=True, sort=True) cs_dataset = ds.Dataset(index=cs_index, batch_class=CTImagesCustomBatch) #load and normalize these images load_and_normalize = ( Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images']).unify_spacing( shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings #from both images and mask .normalize_hu(min_hu=-1200, max_hu=600) ) #clips the HU values and linearly rescales them, values from grt team # .apply_lung_mask(paddi Path = 'C:/Users/linde/Documents/PreprocessedImages1008CorrectConvs/Spacing(2x1x1)/0*' loadSegm = (Pipeline().load(fmt='blosc', components=['segmentation', 'masks'])) im_index = FilesIndex(path=Path, dirs=True) lunaline_train = (cs_dataset >> load_and_normalize) for i in range(len(cs_dataset)):
#makes folder for all savings if not os.path.exists(savepath_preprocess): os.makedirs(savepath_preprocess) #create filesindex to iterate over all files folder_path=os.path.join(data_path, '*') scan_index=FilesIndex(path=folder_path,dirs=True) scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB) #to check index / dataset use: luna_index.indices or scan_dataset.index.indices #should contain list of names of folders (for each scan a folder), names should be different for each scan #make pipeline to load, equalize spacing and normalize the data load_and_preprocess = (Pipeline() .load(fmt='dicom') #loads all slices from folder in dataset .unify_spacing(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant')#equalizes the spacings .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them ) ##pass training dataset through pipeline preprocessing_pipeline=(scan_dataset>> load_and_preprocess.dump(dst=savepath_preprocess,components=['images', 'spacing', 'origin' ])) #get scans one by one through the pipeline for i in range(len(scan_dataset)): print('prepoccesing scan nr:'+ str(i)) batch=preprocessing_pipeline.next_batch(batch_size=1, shuffle=False,n_epochs=1, drop_last=False)