def batch_gen(dicom_dataset): dicom_index = FilesIndex(path='./dicom/*', dirs=True) dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch) create_blosc_dataset = dicom_dataset >> ( Pipeline() .load(fmt='dicom') .dump(dst='./blosc', fmt='blosc', components=('images', 'origin', 'spacing')) ) create_blosc_dataset.run(4) blosc_index = FilesIndex(path='./blosc/*', dirs=True) blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch) yield blosc_dataset.gen_batch(2, n_epochs=None) print("Cleaning up generated blosc data...") shutil.rmtree('./blosc')
def dicom_dataset(): if os.path.exists('./dicom'): shutil.rmtree('./dicom') generate_dicom_scans('./dicom') index = FilesIndex(path='./dicom/*', dirs=True) dataset = Dataset(index, batch_class=CTImagesMaskedBatch) yield dataset print("Cleaning up generated dicom data...") shutil.rmtree('./dicom')
def test_split_dump(self, dicom_dataset, nodules, histo): pipeline = split_dump('./temp_cancer', './temp_ncancer', nodules, histo, fmt='dicom', spacing=(1.7, 1.0, 1.0), shape=(128, 128, 128), order=3, padding='reflect', crop_size=(32, 64, 64)) pipeline = (dicom_dataset >> pipeline) pipeline.next_batch(2) pipeline.next_batch(2) cancer_idx = FilesIndex(path='./temp_cancer/*', dirs=True) ncancer_idx = FilesIndex(path='./temp_ncancer/*', dirs=True) assert len(cancer_idx) > 0 assert len(ncancer_idx) > 0 shutil.rmtree('./temp_cancer') shutil.rmtree('./temp_ncancer')
def crops_datasets(dicom_dataset, nodules, histo): pipeline = split_dump('./cancer', './ncancer', nodules, histo, fmt='dicom', spacing=(1.7, 1.0, 1.0), shape=(128, 128, 128), order=3, padding='reflect', crop_size=(32, 64, 64)) pipeline = (dicom_dataset >> pipeline) pipeline.next_batch(2) cancer_idx = FilesIndex(path='./cancer/*', dirs=True) ncancer_idx = FilesIndex(path='./ncancer/*', dirs=True) cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch) ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch) yield cancer_set, ncancer_set shutil.rmtree('./cancer') shutil.rmtree('./ncancer')
def test_blosc_dump_sync(self, batch_with_nodules_and_masks, sync): _ = batch_with_nodules_and_masks.dump(dst='./dumped_blosc', # noqa: F841 fmt='blosc', sync=sync) blosc_index = FilesIndex(path='./dumped_blosc/*', dirs=True) blosc_dataset = Dataset(blosc_index, batch_class=CTImagesMaskedBatch) assert len(blosc_dataset) == len(batch_with_nodules_and_masks) batch = ( # noqa: F841 blosc_dataset .next_batch(len(batch_with_nodules_and_masks)) .load(fmt='blosc', sync=sync) ) shutil.rmtree('./dumped_blosc')
def test_dicom_dump(self, batch_with_nodules_and_masks): _ = batch_with_nodules_and_masks.dump(dst='./dumped_dicoms', # noqa: F841 fmt='dicom') dicom_index = FilesIndex(path='./dumped_dicoms/*', dirs=True) dicom_dataset = Dataset(dicom_index, batch_class=CTImagesMaskedBatch) assert len(dicom_dataset) == len(batch_with_nodules_and_masks) batch = ( # noqa: F841 dicom_dataset .next_batch(len(batch_with_nodules_and_masks)) .load(fmt='dicom') ) shutil.rmtree('./dumped_dicoms')
def test_create_crops(dicom_dataset, nodules): create_crops(dicom_dataset, 'dicom', nodules, None, './test_crops', config=get_config(config)) cancer_idx = FilesIndex(path='./test_crops/original/cancer/*', dirs=True) ncancer_idx = FilesIndex(path='./test_crops/original/ncancer/*', dirs=True) cancer_set = Dataset(cancer_idx, batch_class=CTImagesMaskedBatch) ncancer_set = Dataset(ncancer_idx, batch_class=CTImagesMaskedBatch) assert len(cancer_set) != 0 and len(ncancer_set) != 0 _ = (Pipeline(dataset=cancer_set).load(fmt='blosc', sync=True).next_batch(2)) _ = (Pipeline(dataset=ncancer_set).load(fmt='blosc', sync=True).next_batch(2)) shutil.rmtree('./test_crops')
def _check_scan(self, item): if item.fmt == 'raw': index = FilesIndex(path=item.scan_path, no_ext=False, dirs=False) if len(index.indices) == 0: raise FileNotFoundError("File with given path does not exist") if any('.mhd' not in str(p) for p in index.indices): raise ValueError("File must have '.mhd' extension.") if item.fmt == 'dicom': if not (os.path.exists(item.scan_path) and os.path.isdir(item.scan_path)): raise FileNotFoundError("DICOM-directory " + "with given path does not exist.") for name in os.listdir(item.scan_path): path = os.path.join(item.scan_path, name) try: _ = read_file(path) # noqa: F841 except Exception: raise ValueError("Scans path must be" + " directory containing dicom files.")
number = str(i).zfill(6) path = 'D:/DATA20181008//' + number + '/conv' if os.path.isdir(path): fileList.append(path + '/' + os.listdir(path)[0]) pathlist = glob.glob(path) for i in range(len(pathlist)): path = pathlist[i] path1, im = os.path.split(path) path2, conv = os.path.split(path1) path3, patient = os.path.split(path2) os.rename(path, path2 + '/' + conv + '/' + patient + '_' + im) #ixs = np.array([['1.3.6.1.4.1.14519.5.2.1.6279.6001.312127933722985204808706697221']]) luna_index = FilesIndex(path=fileList) luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch) try: import pydicom as dicom # pydicom library was renamed in v1.0 except ImportError: import dicom as dicom indexlist = np.zeros(214) contrast = [] for i in range(214): os_, index = os.path.split(fileList[i]) list_of_dicoms = dicom.dcmread(fileList[i]) print(i) indexlist[i] = (int(index[:6]))
if not os.path.exists(LUNA_pre): os.makedirs(LUNA_pre) string = ['080', '120', '190', 'conv'] for num in string: #start with1 energy fileList = [] for i in range(0, 300): #from 1 to number of scans number = str(i).zfill(6) path = 'D:/DATA20181008/' + number + '/' + num if os.path.isdir(path) == True: fileList.append(path + '/' + os.listdir(path)[0]) luna_index = FilesIndex(path=fileList, sort=True) luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch) #load and normalize these images load_and_normalize = ( Pipeline().load(fmt='dicom').unify_spacing( shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings #from both images and mask .normalize_hu(min_hu=-1200, max_hu=600) ) #clips the HU values and linearly rescales them, values from grt team # .apply_lung_mask(paddi Path = 'C:/Users/linde/Documents/PreprocessedImages1008CorrectConvs/Spacing(2x1x1)/0*' loadblosc = (Pipeline().load(fmt='blosc',
n_epochs=1, drop_last=False) batch.dump( dst=LUNA_pre, components=['spacing', 'origin', 'images', 'segmentation', 'masks']) print(i) print(np.round((time.time() - start_time) / 60, 2)) slices.multi_slice_viewer(batchnew.images) #get segmented images Path = 'C:/Users/linde/Documents/PreprocessedImages1008CorrectConvs/Spacing(2x1x1)/SpacingNew/incorrect/*' pipeline_loadblosc = (Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images', 'segmentation', 'masks'])) im_index = FilesIndex(path=Path, dirs=True) batch_size = 1 ixs = np.array(['000274_IM000001']) observed_scans = ds.Dataset(index=im_index.create_subset(ixs), batch_class=CTImagesCustomBatch) observed_scans = ds.Dataset(index=im_index, batch_class=CTImagesCustomBatch) lunaline_segm = (observed_scans >> pipeline_loadblosc) batch_segm = lunaline_segm.next_batch(batch_size=batch_size, shuffle=False, n_epochs=1, drop_last=False) slices.multi_slice_viewer(batch_segm.masks) slices.multi_slice_viewer(batch_segm.images)
import pandas as pd from radio import CTImagesMaskedBatch from radio.dataset import FilesIndex, Dataset, Pipeline, F from radio.models import DilatedNoduleNet from radio.models.tf.losses import tversky_loss nodules_df = pd.read_csv('/path/to/annotations.csv') luna_index = FilesIndex(path='/path/to/LunaDataset/*.mhd', no_ext=True) luna_dataset = Dataset(index=luna_index, batch_class=CTImagesMaskedBatch) preprocessing = (Pipeline() .load(fmt='raw') .unify_spacing(shape=(384, 512, 512), spacing=(3.5, 2.0, 2.0))) .fetch_nodules_info(nodules_df) .create_mask() .normalize_hu()) spacing_randomizer = lambda *args: 0.2 * np.random.uniform(size=3) + [3.5, 2.0, 2.0] augmentation = (Pipeline() .sample_nodules(nodule_size=(48, 76, 76)) .rotate(random=True, angle=30, mask=True) .unify_spacing(spacing=F(spacing_randomizer), shape=(32, 64, 64))) vnet_config = {'loss': tversky_loss, 'inputs': dict(images={'shape': (32, 64, 64, 1)}, labels={'name': 'targets', 'shape': (32, 64, 64, 1)})} vnet_config['input_block/inputs'] = 'images' model_training = (Pipeline() .init_model(name='vnet', model_class=DilatedNoduleNet, config=vnet_config) .train_model(name='vnet', feed_dict={'images': F(CTIMB.unpack, component='images'),
#makes folder for all savings LUNA_val = 'C:/Users/s120116/Documents/Preprocessed_Images/' + subset + ' - split/validate' LUNA_train = 'C:/Users/s120116/Documents/Preprocessed_Images/' + subset + ' - split/training' LUNA_test = 'C:/Users/s120116/Documents/Preprocessed_Images/validationData/' if not os.path.exists(LUNA_test): os.makedirs(LUNA_test) # if not os.path.exists(LUNA_val): # os.makedirs(LUNA_val) # # if not os.path.exists(LUNA_train): # os.makedirs(LUNA_train) #set up dataset structure luna_index = FilesIndex(path=LUNA_MASK, no_ext=True) # preparing indexing structure luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch) #Split dataset in training and validation part ---------------------------------------------- #define path to save or load index files # if Split: # # If dataset has already been split: make two subsets from indices for testing vs training # path='C:/Users/s120116/Documents/'+subset+' - split/' # # index_train=np.load(os.path.join(path, 'trainindex.npy')) # luna_index_train=luna_index.create_subset(index_train) # dataset_train= ds.Dataset(index=luna_index_train, batch_class=CTImagesCustomBatch) # # index_val=np.load(os.path.join(path,'testindex.npy'))
import CTsliceViewer as slices save_folder = 'C:/Users/linde/Documents/testImages' if not os.path.exists(save_folder): os.makedirs(save_folder) #from each dicom folder, add one file to filesindex. This makes sure that with next batch, the next #dicom scan is loaded and not the next slice (file) fileList = [] for i in range(1, 3): #from 1 to number of scans number = '00' + str(i) path = 'C:/Users/linde/Documents/DAta/DATA/Use/' + number + '/conventional' fileList.append(path + '/' + os.listdir(path)[0]) #set up dataset structure luna_index = FilesIndex(path=fileList, no_ext=False, sort=True) # preparing indexing structure luna_dataset = ds.Dataset(index=luna_index, batch_class=CTImagesCustomBatch) #load pipeline load_LUNA = (Pipeline().load(fmt='dicom').get_lung_mask(rad=10)) lunaline = luna_dataset >> load_LUNA.dump( dst=save_folder, components=['spacing', 'origin', 'images', 'masks']) #get next batch list_int = [] i = 0 while True: try: batch = lunaline.next_batch(batch_size=1, shuffle=False, n_epochs=1)
number = str(i).zfill(6) path = 'D:/DATA20181008/' + number + '/' + '060' if os.path.isdir(path) == True: fileList_060.append(path) #+ '/' + os.listdir(path)[0]) path = 'D:/DATA20181008/' + number + '/' + '190' if os.path.isdir(path) == True: filelist_190.append(path) #+ '/' + os.listdir(path)[0]) LUNA_pre = 'C:/Users/linde/Documents/CS_PE_seperatedtest' if not os.path.exists(LUNA_pre): os.makedirs(LUNA_pre) #set up dataset structure luna_index_low = FilesIndex(path=fileList_060, sort=True, dirs=True) # preparing indexing structure luna_dataset_low = ds.Dataset(index=luna_index_low, batch_class=CTICB) luna_index_high = FilesIndex(path=filelist_190, sort=True, dirs=True) # preparing indexing structure luna_dataset_high = ds.Dataset(index=luna_index_high, batch_class=CTICB) cancer_cropline = load_pipeline() line_low = luna_dataset_low >> cancer_cropline line_high = luna_dataset_high >> cancer_cropline for i in range(len(luna_dataset_low)): if luna_dataset_high.index.indices[i] != luna_dataset_low.index.indices[i]: print('error!' + ' high :' + luna_dataset_high.index.indices[i] + ' low: ' + luna_dataset_low.index.indices[i])
MERGED_NODULES_PATH = './merged_nodules.pkl' HISTO_PATH = './histo.pkl' NODULE_CONFIDENCE_THRESHOLD = 0.02 TRAIN_SHARE = 0.9 CANCEROUS_CROPS_PATH = '/notebooks/data/CT/npcmr_crops/train/cancerous' NONCANCEROUS_CROPS_PATH = '/notebooks/data/CT/npcmr_crops/train/noncancerous' # read df containing info about nodules on scans dataset_info = (radio.annotation.read_dataset_info(NPCMR_GLOB, index_col='seriesid', filter_by_min_spacing=True, load_origin=False)) # set up Index and Dataset for npcmr ct_index = FilesIndex(dataset_info.index.values, paths=dict(dataset_info.loc[:, 'ScanPath']), dirs=True) ct_dataset = Dataset(ct_index, batch_class=CTImagesMaskedBatch) # read dumped annots with open(MERGED_NODULES_PATH, 'rb') as file: merged = pkl.load(file) # filter nodules by confidences filtered = merged[merged.confidence > NODULE_CONFIDENCE_THRESHOLD] ct_dataset.cv_split(TRAIN_SHARE) # read histo of nodules locs with open(HISTO_PATH, 'rb') as file: histo = pkl.load(file)
import matplotlib.pyplot as plt import matplotlib.patches as patches import CTsliceViewer as slices import scipy nodules_df_2 = pd.read_csv('C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/annotations.csv') LUNA_MASK = 'C:/Users/s120116/Documents/LUNAsubsets/'+subset+'/*.mhd' path='C:/Users/s120116/Documents/LUNAsubsets/subset*/*.mhd' path='C:/Users/s120116/Documents/Preprocessed_Images/subset2 - split/training/*' sub='subset0' luna_index_train = FilesIndex(path=path, no_ext=True) # preparing indexing structure ixs = np.array(['1.3.6.1.4.1.14519.5.2.1.6279.6001.750792629100457382099842515038']) two_scans_dataset = ds.Dataset(index=luna_index_train.create_subset(ixs), batch_class=CTICB) luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB) nodules_malignancy=pd.read_excel('C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/all_info_averaged_observer_corrected2.xlsx') pipeline= (Pipeline()
nodules_malignancy = pd.read_excel( 'C:/Users/s120116/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/all_info_averaged_observer_corrected2.xlsx' ) path = 'C:/Users/s120116/Documents/Preprocessed_Images/' SaveFolder = 'Crops(16x32x32)CompleteDataset' for sub in sublist: print(sub) #define folders in which validation and training data is LUNA_val = path + sub + ' - split/validate/*' LUNA_train = path + sub + ' - split/training/*' #set up dataset structure luna_index_val = FilesIndex(path=LUNA_val, dirs=True) # preparing indexing structure luna_dataset_val = ds.Dataset(index=luna_index_val, batch_class=CTICB) luna_index_train = FilesIndex(path=LUNA_train, dirs=True) # preparing indexing structure luna_dataset_train = ds.Dataset(index=luna_index_train, batch_class=CTICB) def make_folder(folderlist=[]): for folder in folderlist: if not os.path.exists(folder): os.makedirs(folder) def load_pipeline(nodules_df): pipeline = (Pipeline().load( fmt='blosc', components=[
#input -------------------------------------------------------------------------------- #put here path with data data_path = 'D:/OnlyConv/' #folder containing for each dicom file a folder with all slices (files) #-------------------------------------------------------------------------------------------- savepath = '../../../ResultingData/PreprocessedImages' #makes folder for all savings if not os.path.exists(savepath): os.makedirs(savepath) #create filesindex to iterate over all files folder_path = os.path.join(data_path, '*') scan_index = FilesIndex(path=folder_path, dirs=True) scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB) #to check index / dataset use: luna_index.indices or scan_dataset.index.indices #should contain list of names of folders (for each scan a folder), names should be different for each scan #make pipeline to load, equalize spacing and normalize the data load_and_preprocess = ( Pipeline().load(fmt='dicom') #loads all slices from folder in dataset .unify_spacing(shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them )
#1.3.6.1.4.1.14519.5.2.1.6279.6001.339546614783708685476232944897 #1.3.6.1.4.1.14519.5.2.1.6279.6001.228511122591230092662900221600- validate sublist=['subset1', 'subset2', 'subset3', 'subset4','subset5', 'subset6', 'subset7'] subset='subset4' #Define data folder (LUNA_mask) LUNA_MASK = 'C:/Users/s120116/Documents/Allfolders/'+subset+'/*.mhd' # set glob-mask for scans from Luna-dataset here #makes folder for all savings LUNA_val='C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/validate' LUNA_train= 'C:/Users/s120116/Documents/Preprocessed_Images/'+subset+' - split/training' luna_index = FilesIndex(path=LUNA_MASK, no_ext=True) ixs = np.array([ '1.3.6.1.4.1.14519.5.2.1.6279.6001.228511122591230092662900221600']) fix_ds = ds.Dataset(index=luna_index.create_subset(ixs), batch_class=CTImagesCustomBatch) #make pipeline to load and segment, saves segmentations in masks load_and_segment = (Pipeline() .load(fmt='raw') .get_lung_mask(rad=15)) # .unify_spacing_withmask(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant') #equalizes the spacings #from both images and mask # .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them, values from grt team #.apply_lung_mask(padding=170))
data_path='D:/OnlyConv/' #folder containing for each dicom file a folder with all slices (files) nodules_path='C:/Users/linde/OneDrive - TU Eindhoven/TUE/Afstuderen/CSVFILES/AnnotatiesPim/nodule_data_adapted.xlsx' # Preprocessing Images-------------------------------------------------------------------------------------------- savepath_preprocess='../../../ResultingData/PreprocessedImages' #makes folder for all savings if not os.path.exists(savepath_preprocess): os.makedirs(savepath_preprocess) #create filesindex to iterate over all files folder_path=os.path.join(data_path, '*') scan_index=FilesIndex(path=folder_path,dirs=True) scan_dataset = ds.Dataset(index=scan_index, batch_class=CTICB) #to check index / dataset use: luna_index.indices or scan_dataset.index.indices #should contain list of names of folders (for each scan a folder), names should be different for each scan #make pipeline to load, equalize spacing and normalize the data load_and_preprocess = (Pipeline() .load(fmt='dicom') #loads all slices from folder in dataset .unify_spacing(shape=(400,512,512), spacing=(2.0,1.0,1.0),padding='constant')#equalizes the spacings .normalize_hu(min_hu=-1200, max_hu=600) #clips the HU values and linearly rescales them ) ##pass training dataset through pipeline preprocessing_pipeline=(scan_dataset>> load_and_preprocess.dump(dst=savepath_preprocess,components=['images', 'spacing', 'origin' ]))
import os from CTImagesCustomBatch import CTImagesCustomBatch import CTsliceViewer as slices import glob import time save_path = 'C:/Users/linde/Documents/PreprocessedImages_CS_PE/' if not os.path.exists(save_path): os.makedirs(save_path) for string in ['PE']: #still do PE path_cs = "C:/Users/linde/Documents/CS_PE_seperated/" + string + "/*" cs_index = FilesIndex(path=path_cs, dirs=True, sort=True) cs_dataset = ds.Dataset(index=cs_index, batch_class=CTImagesCustomBatch) #load and normalize these images load_and_normalize = ( Pipeline().load( fmt='blosc', components=['spacing', 'origin', 'images']).unify_spacing( shape=(400, 512, 512), spacing=(2.0, 1.0, 1.0), padding='constant') #equalizes the spacings #from both images and mask .normalize_hu(min_hu=-1200, max_hu=600) ) #clips the HU values and linearly rescales them, values from grt team # .apply_lung_mask(paddi
sys.path.append('../../') from radio import CTImagesMaskedBatch as CTIMB from radio.dataset import Dataset, Pipeline, FilesIndex, F, V, B, C, Config, L from radio.dataset.research import Research, Option, KV from radio.dataset.models.tf import UNet, VNet # paths to scans in blosc and annotations-table PATH_SCANS = './blosc/*' PATH_ANNOTS = './annotations.csv' # directory for saving models MODELS_DIR = './trained_models/' # dataset and annotations-table index = FilesIndex(path=PATH_SCANS, dirs=True, no_ext=False) dataset = Dataset(index=index, batch_class=CTIMB) dataset.split(0.9, shuffle=120) nodules = pd.read_csv(PATH_ANNOTS) def train(batch, model='net', minibatch_size=8, mode='max', depth=6, stride=2, start=0, channels=3): """ Custom train method. Train a model in minibatches of xips, fetch loss and prediction. """