def create_network(): # Import the faster environment and set it up import fastr network = fastr.Network(id_="elastix_test") source1 = network.create_source('ITKImageFile', id_='fixed_img') source2 = network.create_source('ITKImageFile', id_='moving_img') param1 = network.create_source('ElastixParameterFile', id_='param_file') elastix_node = network.create_node('elastix_dev', id_='elastix') elastix_node.inputs['fixed_image'] = source1.output elastix_node.inputs['moving_image'] = source2.output link_param = network.create_link(param1.output, elastix_node.inputs['parameters']) link_param.converge = 0 outtrans = network.create_sink('ElastixTransformFile', id_='sink_trans') outtrans.inputs['input'] = elastix_node.outputs['transform'] transformix_node = network.create_node('transformix_dev', id_='transformix') transformix_node.inputs['image'] = source2.output transformix_node.inputs['transform'] = elastix_node.outputs['transform'][-1] outimage = network.create_sink('ITKImageFile', id_='sink_image') outimage.inputs['input'] = transformix_node.outputs['image'] network.draw_network(img_format='svg') network.dumpf('{}.json'.format(network.id), indent=2) return network
def create_network(): # Import the faster environment and set it up import fastr # Create a new network network = fastr.Network(id_='Segmentix_test') # Create a source node in the network source_segmentation = network.create_source('ITKImageFile', id_='segmentation_in') source_mask = network.create_source('ITKImageFile', id_='mask') source_parameters = network.create_source('ParameterFile', id_='parameters') # Create a new node in the network using toollist node_segmentix = network.create_node('Segmentix', id_="segmentix") # Create a link between the source output and an input of the addint node node_segmentix.inputs['segmentation_in'] = source_segmentation.output node_segmentix.inputs['mask'] = source_mask.output node_segmentix.inputs['parameters'] = source_parameters.output # Create a sink to save the data sink_segmentation = network.create_sink('ITKImageFile', id_='segmentation_out') # Link the addint node to the sink sink_segmentation.input = node_segmentix.outputs['segmentation_out'] return network
def create_network(): # Import the faster environment and set it up import fastr # Create a new network network = fastr.Network(id_='CalcFeatures_test') # Create a source node in the network source_segmentation = network.create_source('ITKImageFile', id_='segmentation') source_image = network.create_source('ITKImageFile', id_='image') source_metadata = network.create_source('DicomImageFile', id_='metadata') source_parameters = network.create_source('ParameterFile', id_='parameters') # Create a new node in the network using toollist node_calfeatures = network.create_node('CalcFeatures', id_="calcfeatures") # Create a link between the source output and an input of the addint node node_calfeatures.inputs['segmentation'] = source_segmentation.output node_calfeatures.inputs['image'] = source_image.output node_calfeatures.inputs['metadata'] = source_metadata.output node_calfeatures.inputs['parameters'] = source_parameters.output # Create a sink to save the data sink_features = network.create_sink('HDF5', id_='features') # Link the addint node to the sink sink_features.input = node_calfeatures.outputs['features'] return network
def create_network(self): self.network = fastr.Network(id_="transformix") self.MovingImageSource = self.network.create_source('ITKImageFile', id_='MovingImage') self.ParameterMapSource = self.network.create_source( 'ElastixTransformFile', id_='ParameterFile') self.transformix_node = self.network.create_node('transformix_dev', id_='transformix') self.transformix_node.inputs['image'] = self.MovingImageSource.output self.transformix_node.inputs[ 'transform'] = self.ParameterMapSource.output self.outimage = self.network.create_sink('ITKImageFile', id_='sink_image') self.outimage.inputs['input'] = self.transformix_node.outputs['image'] self.network.draw_network(img_format='svg') self.network.dumpf('{}.json'.format(self.network.id), indent=2)
def __init__(self, label_type, ensemble=50, scores='percentages', network=None, features=None, fastr_plugin='ProcessPoolExecution', name='Example'): ''' Build a network that evaluates the performance of an estimator. Parameters ---------- network: fastr network, default None If you input a network, the evaluate network is added to the existing network. ''' if network is not None: self.network = network self.mode = 'WORC' else: self.mode = 'StandAlone' self.fastr_plugin = fastr_plugin self.name = 'WORC_Evaluate_' + name self.network = fastr.Network(id_=self.name) self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'], self.name) if features is None and self.mode == 'StandAlone': raise WORCexceptions.IOError( 'Either features as input or a WORC network is required for the Evaluate network.' ) self.features = features self.label_type = label_type self.ensemble = ensemble self.create_network()
def __init__(self, images=None, segmentations=None, network=None, fastr_plugin='ProcessPoolExecution', name='Example'): ''' Build a network that evaluates the performance of an estimator. Parameters ---------- network: fastr network, default None If you input a network, the evaluate network is added to the existing network. ''' if network is not None: self.network = network self.mode = 'WORC' else: self.mode = 'StandAlone' self.fastr_plugin = fastr_plugin self.name = 'WORC_Slicer_' + name self.network = fastr.Network(id_=self.name) self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'], self.name) if images is None and self.mode == 'StandAlone': message = 'Either images and segmentations as input or a WORC' +\ 'network is required for the Evaluate network.' raise WORCexceptions.IOError(message) self.image = images self.segmentations = segmentations self.create_network()
def main(): ################################################# #### PARAMETERS ################################# ################################################# # network name network_name = 'multiatlas_femur_segm_CV' output_segm_name = 'segm_woapp_hip_foldnr' output_eval_meas_name = 'dice_woapp_hip_foldnr' # output folder segmentations output_segm_folder = 'vfs://fastr_data/hipdata/output/' # output folder evaluation measure output_eval_meas_folder = 'vfs://fastr_data/hipdata/output/' # number of cross-validation folds num_folds = 5 # starting fold foldnr = 1 nrclasses = 2 # radius for dilation mask radius = [5.0] # registration parameter files registration_parameters = ('vfs://elastix_files/par_affine_multi.txt', 'vfs://elastix_files/par_bspline5mm_multi.txt') registration_parameters_generate_mask = ( 'vfs://elastix_file/par_similarity.txt', ) # MRI volume names CV_img = [ 'imageR112621fw', 'imageR112629fw', 'imageR112657fw', 'imageR113297fw', 'imageR115510fw', 'imageR118132fw', 'imageR118663fw', 'imageR118972fw', 'imageR119833fw', 'imageR119927fw', 'imageR128348fw', 'imageR129317fw', 'imageR129358fw', 'imageR131044fw', 'imageR131489fw', 'imageR131717fw', 'imageR132132fw' ] # label volume names CV_label = [ 'maskR112621', 'maskR112629', 'maskR112657', 'maskR113297', 'maskR115510', 'maskR118132', 'maskR118663', 'maskR118972', 'maskR119833', 'maskR119927', 'maskR128348', 'maskR129317', 'maskR129358', 'maskR131044', 'maskR131489', 'maskR131717', 'maskR132132' ] # Region of interest volume CV_ROI = [ 'ROI112621', 'ROI112629', 'ROI112657', 'ROI113297', 'ROI115510', 'ROI118132', 'ROI118663', 'ROI118972', 'ROI119833', 'ROI119927', 'ROI128348', 'ROI129317', 'ROI129358', 'ROI131044', 'ROI131489', 'ROI131717', 'ROI132132' ] cv = cross_validation.KFold(len(CV_img), n_folds=num_folds, random_state=0) # sourcedata dictionary contains path of all data volumes as well as path of # appearance model classifier : 'classifier' sourcedata = { 'scalespace_img': { 'imageR112621fw': ('vfs://fastr_data/hipdata/images/R112621f.nii.gz', ), #1 'imageR112629fw': ('vfs://fastr_data/hipdata/images/R112629f.nii.gz', ), #2 'imageR112657fw': ('vfs://fastr_data/hipdata/images/R112657f.nii.gz', ), #3 'imageR113297fw': ('vfs://fastr_data/hipdata/images/R113297f.nii.gz', ), #4 'imageR115510fw': ('vfs://fastr_data/hipdata/images/R115510f.nii.gz', ), #5 'imageR118132fw': ('vfs://fastr_data/hipdata/images/R118132f.nii.gz', ), #6 'imageR118663fw': ('vfs://fastr_data/hipdata/images/R118663f.nii.gz', ), #7 'imageR118972fw': ('vfs://fastr_data/hipdata/images/R118972f.nii.gz', ), #8 'imageR119833fw': ('vfs://fastr_data/hipdata/images/R119833f.nii.gz', ), #9 'imageR119927fw': ('vfs://fastr_data/hipdata/images/R119927f.nii.gz', ), #10 'imageR128348fw': ('vfs://fastr_data/hipdata/images/R128348f.nii.gz', ), #11 'imageR129317fw': ('vfs://fastr_data/hipdata/images/R129317f.nii.gz', ), #12 'imageR129358fw': ('vfs://fastr_data/hipdata/images/R129358f.nii.gz', ), #13 'imageR131044fw': ('vfs://fastr_data/hipdata/images/R131044f.nii.gz', ), #14 'imageR131489fw': ('vfs://fastr_data/hipdata/images/R131489f.nii.gz', ), #15 'imageR131717fw': ('vfs://fastr_data/hipdata/images/R131717f.nii.gz', ), #16 'imageR132132fw': ('vfs://fastr_data/hipdata/images/R132132f.nii.gz', ), #17 }, 'atlas_img': { 'imageR112621fw': ('vfs://fastr_data/hipdata/images/R112621f.nii.gz', 'vfs://fastr_data/hipdata/images/R112621w.nii.gz'), #1 'imageR112629fw': ('vfs://fastr_data/hipdata/images/R112629f.nii.gz', 'vfs://fastr_data/hipdata/images/R112629w.nii.gz'), #2 'imageR112657fw': ('vfs://fastr_data/hipdata/images/R112657f.nii.gz', 'vfs://fastr_data/hipdata/images/R112657w.nii.gz'), #3 'imageR113297fw': ('vfs://fastr_data/hipdata/images/R113297f.nii.gz', 'vfs://fastr_data/hipdata/images/R113297w.nii.gz'), #4 'imageR115510fw': ('vfs://fastr_data/hipdata/images/R115510f.nii.gz', 'vfs://fastr_data/hipdata/images/R115510w.nii.gz'), #5 'imageR118132fw': ('vfs://fastr_data/hipdata/images/R118132f.nii.gz', 'vfs://fastr_data/hipdata/images/R118132w.nii.gz'), #6 'imageR118663fw': ('vfs://fastr_data/hipdata/images/R118663f.nii.gz', 'vfs://fastr_data/hipdata/images/R118663w.nii.gz'), #7 'imageR118972fw': ('vfs://fastr_data/hipdata/images/R118972f.nii.gz', 'vfs://fastr_data/hipdata/images/R118972w.nii.gz'), #8 'imageR119833fw': ('vfs://fastr_data/hipdata/images/R119833f.nii.gz', 'vfs://fastr_data/hipdata/images/R119833w.nii.gz'), #9 'imageR119927fw': ('vfs://fastr_data/hipdata/images/R119927f.nii.gz', 'vfs://fastr_data/hipdata/images/R119927w.nii.gz'), #10 'imageR128348fw': ('vfs://fastr_data/hipdata/images/R128348f.nii.gz', 'vfs://fastr_data/hipdata/images/R128348f.nii.gz'), #11 'imageR129317fw': ('vfs://fastr_data/hipdata/images/R129317f.nii.gz', 'vfs://fastr_data/hipdata/images/R129317w.nii.gz'), #12 'imageR129358fw': ('vfs://fastr_data/hipdata/images/R129358f.nii.gz', 'vfs://fastr_data/hipdata/images/R129358w.nii.gz'), #13 'imageR131044fw': ('vfs://fastr_data/hipdata/images/R131044f.nii.gz', 'vfs://fastr_data/hipdata/images/R131044w.nii.gz'), #14 'imageR131489fw': ('vfs://fastr_data/hipdata/images/R131489f.nii.gz', 'vfs://fastr_data/hipdata/images/R131489w.nii.gz'), #15 'imageR131717fw': ('vfs://fastr_data/hipdata/images/R131717f.nii.gz', 'vfs://fastr_data/hipdata/images/R131717w.nii.gz'), #16 'imageR132132fw': ( 'vfs://fastr_data/hipdata/images/R132132f.nii.gz', 'vfs://fastr_data/hipdata/images/R132132w.nii.gz') #17 }, 'atlas_labels': { 'maskR112621': ('vfs://fastr_data/hipdata/hip_masks/R112621.nii.gz', ), #1 'maskR112629': ('vfs://fastr_data/hipdata/hip_masks/R112629.nii.gz', ), #2 'maskR112657': ('vfs://fastr_data/hipdata/hip_masks/R112657.nii.gz', ), #3 'maskR113297': ('vfs://fastr_data/hipdata/hip_masks/R113297.nii.gz', ), #4 'maskR115510': ('vfs://fastr_data/hipdata/hip_masks/R115510.nii.gz', ), #5 'maskR118132': ('vfs://fastr_data/hipdata/hip_masks/R118132.nii.gz', ), #6 'maskR118663': ('vfs://fastr_data/hipdata/hip_masks/R118663.nii.gz', ), #7 'maskR118972': ('vfs://fastr_data/hipdata/hip_masks/R118972.nii.gz', ), #8 'maskR119833': ('vfs://fastr_data/hipdata/hip_masks/R119833.nii.gz', ), #9 'maskR119927': ('vfs://fastr_data/hipdata/hip_masks/R119927.nii.gz', ), #10 'maskR128348': ('vfs://fastr_data/hipdata/hip_masks/R128348.nii.gz', ), #11 'maskR129317': ('vfs://fastr_data/hipdata/hip_masks/R129317.nii.gz', ), #12 'maskR129358': ('vfs://fastr_data/hipdata/hip_masks/R129358.nii.gz', ), #13 'maskR131044': ('vfs://fastr_data/hipdata/hip_masks/R131044.nii.gz', ), #14 'maskR131489': ('vfs://fastr_data/hipdata/hip_masks/R131489.nii.gz', ), #15 'maskR131717': ('vfs://fastr_data/hipdata/hip_masks/R131717.nii.gz', ), #16 'maskR132132': ('vfs://fastr_data/hipdata/hip_masks/R132132.nii.gz', ), #17 }, 'atlas_ROI': { 'ROI112621': ('vfs://fastr_data/hipdata/ROI/R112621w.nii.gz', ), #1 'ROI112629': ('vfs://fastr_data/hipdata/ROI/R112629w.nii.gz', ), #2 'ROI112657': ('vfs://fastr_data/hipdata/ROI/R112657w.nii.gz', ), #3 'ROI113297': ('vfs://fastr_data/hipdata/ROI/R113297w.nii.gz', ), #4 'ROI115510': ('vfs://fastr_data/hipdata/ROI/R115510w.nii.gz', ), #5 'ROI118132': ('vfs://fastr_data/hipdata/ROI/R118132w.nii.gz', ), #6 'ROI118663': ('vfs://fastr_data/hipdata/ROI/R118663w.nii.gz', ), #7 'ROI118972': ('vfs://fastr_data/hipdata/ROI/R118972w.nii.gz', ), #8 'ROI119833': ('vfs://fastr_data/hipdata/ROI/R119833w.nii.gz', ), #9 'ROI119927': ('vfs://fastr_data/hipdata/ROI/R119927w.nii.gz', ), #10 'ROI128348': ('vfs://fastr_data/hipdata/ROI/R128348w.nii.gz', ), #11 'ROI129317': ('vfs://fastr_data/hipdata/ROI/R129317w.nii.gz', ), #12 'ROI129358': ('vfs://fastr_data/hipdata/ROI/R129358w.nii.gz', ), #13 'ROI131044': ('vfs://fastr_data/hipdata/ROI/R131044w.nii.gz', ), #14 'ROI131489': ('vfs://fastr_data/hipdata/ROI/R131489w.nii.gz', ), #15 'ROI131717': ('vfs://fastr_data/hipdata/ROI/R131717w.nii.gz', ), #16 'ROI132132': ('vfs://fastr_data/hipdata/ROI/R132132w.nii.gz', ), #17 } } ############################################################################# ##################END PARAMETERS ############################################################################# # Start cross-validation while foldnr <= num_folds: for train_indices, test_indices in cv: sourcedata_fold = {} sourcedata_fold['atlas_img'] = {} sourcedata_fold['atlas_labels'] = {} sourcedata_fold['atlas_ROI'] = {} sourcedata_fold['target_img'] = {} sourcedata_fold['target_labels'] = {} for ii in train_indices: sourcedata_fold['atlas_img'][CV_img[ii]] = sourcedata['atlas_img'][ CV_img[ii]] sourcedata_fold['atlas_labels'][ CV_label[ii]] = sourcedata['atlas_labels'][CV_label[ii]] sourcedata_fold['atlas_ROI'][CV_ROI[ii]] = sourcedata['atlas_ROI'][ CV_ROI[ii]] for kk in test_indices: sourcedata_fold['target_img'][ CV_img[kk]] = sourcedata['atlas_img'][CV_img[kk]] sourcedata_fold['target_labels'][ CV_label[kk]] = sourcedata['atlas_labels'][CV_label[kk]] # Setup Network and sources network = fastr.Network(id_=network_name) # load MRI target volumes source_targetImages = network.create_source('NiftiImageFileCompressed', id_='target_img', nodegroup='target') source_targetlabel = network.create_source('NiftiImageFileCompressed', id_='target_labels', nodegroup='target') # load MRI atlas volumes source_atlasImages = network.create_source('NiftiImageFileCompressed', id_='atlas_img', nodegroup='atlas') source_atlasLabels = network.create_source('NiftiImageFileCompressed', id_='atlas_labels', nodegroup='atlas') source_atlasROI = network.create_source( datatype=fastr.typelist['ITKImageFile'], id_='atlas_ROI', nodegroup='atlas') ########################################################################################################### #Generate target ROI using multi-atlas similarity transform ########################################################################################################### reg_genmask = network.create_node(fastr.toollist['Elastix', '4.8'], id_='reg_genmask', memory='10G') reg_genmask.inputs['fixed_image'] = source_targetImages.output reg_genmask.inputs['moving_image'] = source_atlasImages.output reg_genmask.inputs['moving_image'].input_group = 'atlas' reg_genmask.inputs[ 'parameters'] = registration_parameters_generate_mask # transform target ROI according to parameters estimated by elastix trans_label_genmask = network.create_node('Transformix', id_='trans_label_genmask', memory='6G') link_trans_label_genmask = trans_label_genmask.inputs[ 'image'] << source_atlasROI.output trans_label_genmask.inputs['transform'] = reg_genmask.outputs[ 'transform'][-1] # combine transformed target ROI - produces hard label decided by majority vote or soft label given as probability combine_label_genmask = network.create_node( 'PxCombineSegmentations', id_='combine_label_genmask') link_combine_genmask = network.create_link( trans_label_genmask.outputs['image'], combine_label_genmask.inputs['images']) link_combine_genmask.collapse = 'atlas' combine_label_genmask.inputs['method'] = ['VOTE'] combine_label_genmask.inputs['number_of_classes'] = [nrclasses] # probability values greater than 0.5 are thresholded to 1, others 0. threshold = network.create_node('PxThresholdImage', id_='threshold', memory='2G') threshold.inputs['image'] = combine_label_genmask.outputs[ 'soft_segment'][-1] threshold.inputs['upper_threshold'] = [0.5] # convert to char datatype (this is required for itktools, node starting with 'Px') castconvert = network.create_node('PxCastConvert', id_='castconvert', memory='2G') castconvert.inputs['image'] = threshold.outputs['image'] castconvert.inputs['component_type'] = ['char'] # morphological operation: dilation morph = network.create_node('PxMorphology', id_='morph', memory='5G') morph.inputs['image'] = castconvert.outputs['image'] morph.inputs['operation'] = ['dilation'] morph.inputs['operation_type'] = ['binary'] morph.inputs['radius'] = radius ############################################################################# # Apply image processing operations to MRI volumes ############################################################################ # Apply n4 non-uniformity correction to MRI atlas volumes n4_atlas_im = network.create_node('N4', id_='n4_atlas', memory='15G') linkn4atlas = n4_atlas_im.inputs['image'] << source_atlasImages.output linkn4atlas.expand = True n4_atlas_im.inputs['shrink_factor'] = 4, n4_atlas_im.inputs['converge'] = '[150,00001]', n4_atlas_im.inputs['bspline_fitting'] = '[50]', # Apply n4 non-uniformity correction to MRI target volumes n4_target_im = network.create_node('N4', id_='n4_target', memory='15G') linkn4target = n4_target_im.inputs[ 'image'] << source_targetImages.output linkn4target.expand = True n4_target_im.inputs['shrink_factor'] = 4, n4_target_im.inputs['converge'] = '[150,00001]', n4_target_im.inputs['bspline_fitting'] = '[50]', # Range match MRI atlas images rama_atlas_im = network.create_node('RangeMatch', id_='rama_atlas', memory='15G') rama_atlas_im.inputs['image'] = n4_atlas_im.outputs['image'] link_rama_mask_atlas = rama_atlas_im.inputs[ 'mask'] << source_atlasROI.output # Range match MRI target images rama_target_im = network.create_node('RangeMatch', id_='rama_target', memory='15G') rama_target_im.inputs['image'] = n4_target_im.outputs['image'] link_rama_mask_target = rama_target_im.inputs['mask'] << morph.outputs[ 'image'] ################################ # Multi-atlas segmentation part ################################ # perform registration with elastix reg_t1 = network.create_node(fastr.toollist['Elastix', '4.8'], id_='reg_t1', memory='20G') link1 = reg_t1.inputs['fixed_image'] << rama_target_im.outputs['image'] link1.collapse = "target_img__output" link2 = reg_t1.inputs['moving_image'] << rama_atlas_im.outputs['image'] link2.collapse = "atlas_img__output" reg_t1.inputs['moving_image'].input_group = 'atlas' reg_t1.inputs['parameters'] = registration_parameters reg_t1.inputs['fixed_mask'] = (morph.outputs['image'], morph.outputs['image']) reg_t1.inputs['moving_mask'] = source_atlasROI.output reg_t1.inputs['moving_mask'].input_group = 'atlas' # transform masks according to registration results trans_label = network.create_node('Transformix', id_='trans_label') linktrans = trans_label.inputs['image'] << source_atlasLabels.output trans_label.inputs['transform'] = reg_t1.outputs['transform'][-1] # combine registered masks combine_label = network.create_node('PxCombineSegmentations', id_='combine_label') link_combine = network.create_link(trans_label.outputs['image'], combine_label.inputs['images']) link_combine.collapse = 'atlas' combine_label.inputs['method'] = ['VOTE'] combine_label.inputs['number_of_classes'] = [nrclasses] # Create sink for segmentation out_seg = network.create_sink('NiftiImageFileCompressed', id_='out_seg') #out_seg.input = argmax.outputs['image'] out_seg.input = combine_label.outputs['hard_segment'] # dice overlap dice_node = network.create_node(fastr.toollist['DiceMultilabelIms'], id_='dice_multi') #dice_node.inputs['image1'] = argmax.outputs['image'] dice_node.inputs['image1'] = combine_label.outputs['hard_segment'] dice_node.inputs['image2'] = source_targetlabel.output dice_node.inputs['numlabels'] = 1, # Create sink for dice overlap score outnumber = network.create_sink(datatype=fastr.typelist['Float'], id_='sink_measure') link = network.create_link(dice_node.outputs['output'], outnumber.input) link.collapse = 'target' # location of sink files sinkdata = { 'out_seg': output_segm_folder + output_segm_name + str(foldnr) + '_{sample_id}{ext}', 'sink_measure': output_eval_meas_folder + output_eval_meas_name + str(foldnr) + '_{sample_id}_{cardinality}{ext}' } # print network print network.draw_network(img_format='svg', draw_dimension=True) fastr.log.info('^^^^^^^^^^^^^ Starting execution client.') # execute multi-atlas appearance segmentation network.execute(sourcedata_fold, sinkdata, cluster_queue="week") foldnr = foldnr + 1
def _fit(self, X, y, groups, parameter_iterable): """Actual fitting, performing the search over parameters.""" base_estimator = clone(self.estimator) cv = check_cv(self.cv, y, classifier=is_classifier(base_estimator)) self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) X, y, groups = indexable(X, y, groups) n_splits = cv.get_n_splits(X, y, groups) if self.verbose > 0 and isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) cv_iter = list(cv.split(X, y, groups)) # Original: joblib # out = Parallel( # n_jobs=self.n_jobs, verbose=self.verbose, # pre_dispatch=pre_dispatch # )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, # train, test, self.verbose, parameters, # fit_params=self.fit_params, # return_train_score=self.return_train_score, # return_n_test_samples=True, # return_times=True, return_parameters=True, # error_score=self.error_score) # for parameters in parameter_iterable # for train, test in cv_iter) name = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) tempfolder = os.path.join(fastr.config.mounts['tmp'], 'GS', name) if not os.path.exists(tempfolder): os.makedirs(tempfolder) # Create the parameter files parameters_temp = dict() for num, parameters in enumerate(parameter_iterable): parameters["Number"] = str(num) parameters_temp[str(num)] = parameters # Convert parameter set to json # fname = ('settings_{}.json').format(str(num)) # sourcename = os.path.join(tempfolder, 'parameters', fname) # if not os.path.exists(os.path.dirname(sourcename)): # os.makedirs(os.path.dirname(sourcename)) # with open(sourcename, 'w') as fp: # json.dump(parameters, fp, indent=4) # parameter_files_temp[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format('GS', # name, # 'parameters', # fname) # Split the parameters files in equal parts keys = parameters_temp.keys() keys = chunks(keys, self.n_jobspercore) parameter_files = dict() for num, k in enumerate(keys): temp_dict = dict() for number in k: temp_dict[number] = parameters_temp[number] fname = ('settings_{}.json').format(str(num)) sourcename = os.path.join(tempfolder, 'parameters', fname) if not os.path.exists(os.path.dirname(sourcename)): os.makedirs(os.path.dirname(sourcename)) with open(sourcename, 'w') as fp: json.dump(temp_dict, fp, indent=4) parameter_files[str(num)] =\ ('vfs://tmp/{}/{}/{}/{}').format('GS', name, 'parameters', fname) # Create test-train splits traintest_files = dict() # TODO: ugly nummering solution num = 0 for train, test in cv_iter: source_labels = ['train', 'test'] source_data = pd.Series([train, test], index=source_labels, name='Train-test data') fname = ('traintest_{}.hdf5').format(str(num)) sourcename = os.path.join(tempfolder, 'traintest', fname) if not os.path.exists(os.path.dirname(sourcename)): os.makedirs(os.path.dirname(sourcename)) traintest_files[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format( 'GS', name, 'traintest', fname) sourcelabel = ("Source Data Iteration {}").format(str(num)) source_data.to_hdf(sourcename, sourcelabel) num += 1 # Create the files containing the estimator and settings estimator_labels = [ 'base_estimator', 'X', 'y', 'scorer', 'verbose', 'fit_params', 'return_train_score', 'return_n_test_samples', 'return_times', 'return_parameters', 'error_score' ] estimator_data = pd.Series([ clone(base_estimator), X, y, self.scorer_, self.verbose, self.fit_params, self.return_train_score, True, True, True, self.error_score ], index=estimator_labels, name='estimator Data') fname = 'estimatordata.hdf5' estimatorname = os.path.join(tempfolder, fname) estimator_data.to_hdf(estimatorname, 'Estimator Data') estimatordata = ("vfs://tmp/{}/{}/{}").format('GS', name, fname) # Create the fastr network network = fastr.Network('GridSearch_' + name) estimator_data = network.create_source('HDF5', id_='estimator_source') traintest_data = network.create_source('HDF5', id_='traintest') parameter_data = network.create_source('JsonFile', id_='parameters') sink_output = network.create_sink('HDF5', id_='output') fitandscore = network.create_node('fitandscore', memory='8G', id_='fitandscore') fitandscore.inputs['estimatordata'].input_group = 'estimator' fitandscore.inputs['traintest'].input_group = 'traintest' fitandscore.inputs['parameters'].input_group = 'parameters' fitandscore.inputs['estimatordata'] = estimator_data.output fitandscore.inputs['traintest'] = traintest_data.output fitandscore.inputs['parameters'] = parameter_data.output sink_output.input = fitandscore.outputs['fittedestimator'] source_data = { 'estimator_source': estimatordata, 'traintest': traintest_files, 'parameters': parameter_files } sink_data = { 'output': ("vfs://tmp/{}/{}/output_{{sample_id}}_{{cardinality}}{{ext}}" ).format('GS', name) } network.execute(source_data, sink_data, tmpdir=os.path.join(tempfolder, 'tmp')) # Read in the output data once finished # TODO: expanding fastr url is probably a nicer way sink_files = glob.glob( os.path.join(fastr.config.mounts['tmp'], 'GS', name) + '/output*.hdf5') save_data = list() feature_labels = list() scalers = list() GroupSel = list() VarSel = list() SelectModel = list() for output in sink_files: data = pd.read_hdf(output) save_data.extend(list(data['RET'])) feature_labels.extend(list(data['feature_labels'])) scalers.extend(list(data['scaler'])) GroupSel.extend(list(data['GroupSelection'])) VarSel.extend(list(data['VarSelection'])) SelectModel.extend(list(data['SelectModel'])) # Remove the temporary folder used shutil.rmtree(tempfolder) # if one choose to see train score, "out" will contain train score info if self.return_train_score: (train_scores, test_scores, test_sample_counts, fit_time, score_time, parameters_est, parameters_all) =\ zip(*save_data) else: (test_scores, test_sample_counts, fit_time, score_time, parameters_est, parameters_all) =\ zip(*save_data) candidate_params_est = parameters_est[::n_splits] candidate_params_all = parameters_all[::n_splits] GroupSel = GroupSel[::n_splits] SelectModel = SelectModel[::n_splits] VarSel = VarSel[::n_splits] scalers = scalers[::n_splits] feature_labels = feature_labels[::n_splits] n_candidates = len(candidate_params_est) results = dict() def _store(key_name, array, weights=None, splits=False, rank=False): """A small helper to store the scores/times to the cv_results_""" array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits) if splits: for split_i in range(n_splits): results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results['mean_%s' % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt( np.average((array - array_means[:, np.newaxis])**2, axis=1, weights=weights)) results['std_%s' % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray(rankdata( -array_means, method='min'), dtype=np.int32) # Computed the (weighted) mean and std for test scores alone # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) _store('test_score', test_scores, splits=True, rank=True, weights=test_sample_counts if self.iid else None) if self.return_train_score: _store('train_score', train_scores, splits=True) _store('fit_time', fit_time) _store('score_time', score_time) best_index = np.flatnonzero(results["rank_test_score"] == 1)[0] best_parameters_est = candidate_params_est[best_index] best_groupsel = GroupSel[best_index] best_modelsel = SelectModel[best_index] best_varsel = VarSel[best_index] best_scaler = scalers[best_index] best_featlab = feature_labels[best_index] # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict( partial(MaskedArray, np.empty(n_candidates, ), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params_all): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurence of `name`. # Setting the value at an index also unmasks that index param_results["param_%s" % name][cand_i] = value # In order to reduce the memory used, we will only save at # max 100 parameter settings maxlen = min(100, len(candidate_params_all)) for k in param_results.keys(): param_results[k] = param_results[k][0:maxlen] results.update(param_results) # Store a list of param dicts at the key 'params' results['params'] = candidate_params_est[0:maxlen] results['params_all'] = candidate_params_all[0:maxlen] self.best_groupsel = best_groupsel self.best_scaler = best_scaler self.best_varsel = best_varsel self.best_modelsel = best_modelsel self.cv_results_ = results self.best_index_ = best_index self.best_featlab = best_featlab self.n_splits_ = n_splits if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best_parameters_est) # Select only the feature values, not the labels X = [x[0] for x in X] if best_groupsel is not None: X = best_groupsel.transform(X) if best_modelsel is not None: X = best_modelsel.transform(X) if best_varsel is not None: X = best_varsel.transform(X) if best_scaler is not None: X = best_scaler.transform(X) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def main(): network_name = 'trainSingleAppearanceClassifier' path_of_class_dir = 'vfs://fastr_data/seg/class/' # name of classifier to be output, fastr will infer the extension {ext} name_of_output_classifier = 'Femke_LEFT_trained_classifer{ext}' # Scale space scales to extract (in mm) scales = (1.0, 1.6, 4.0) # Radius for the background sampling mask dilation (mm?) radius = [5.0] # Number/fraction of sample to sample per images # On element per class (class0, class1, etc) # If value between [0.0 - 1.0] it is a fraction of the number of samples # available in that class # If the value is above 1, it is the number of samples to take, if the # avaialble number of samples is lower, it will take all samples. nsamples = (1000, 1000) # Note: the parameters for the random forest classifier are in the # parameter file supplied as source data network = fastr.Network(id_=network_name) # create source nodes of data and labels source_t1 = network.create_source('NiftiImageFileCompressed', id_='images', sourcegroup='atlas') source_label = network.create_source('NiftiImageFileCompressed', id_='label_images', sourcegroup='atlas') source_param = network.create_source('KeyValueFile', id_='param_file') source_atlasROI = network.create_source('NiftiImageFileCompressed', id_='ROI', sourcegroup='atlas') # Apply n4 non-uniformity correction to MRI atlas volumes n4_atlas_im = network.create_node('N4', id_='n4_atlas', memory='15G') n4_atlas_im.inputs['image'] = source_t1.output n4_atlas_im.inputs['shrink_factor'] = 4, n4_atlas_im.inputs['converge'] = '[150,00001]', n4_atlas_im.inputs['bspline_fitting'] = '[50]', # Range match images rama_atlas_im = network.create_node('RangeMatch', id_='rama_atlas', memory='15G') rama_atlas_im.inputs['image'] = n4_atlas_im.outputs['image'] rama_atlas_im.inputs['mask'] = source_atlasROI.output # convert to char datatype (this is required for itktools, node starting with 'Px') pxcastconvert = network.create_node('PxCastConvert', id_='castconvert') pxcastconvert.inputs['image'] = source_label.output pxcastconvert.inputs['component_type'] = ['char'] # Create filter image for source data scalespacefilter = network.create_node('GaussianScaleSpace', id_='scalespacefilter', memory='15G') scalespacefilter.inputs['image'] = rama_atlas_im.outputs['image'] scalespacefilter.inputs['scales'] = scales # Prepare mask morph = network.create_node('PxMorphology', id_='morph', memory='6G') morph.inputs['image'] = pxcastconvert.outputs['image'] morph.inputs['operation'] = ['dilation'] morph.inputs['operation_type'] = ['binary'] morph.inputs['radius'] = radius # Sample the feature images sampler = network.create_node('SampleImage', id_='sampler', memory='15G') sampler.inputs['image'] = scalespacefilter.outputs['image'] sampler.inputs['labels'] = pxcastconvert.outputs['image'] sampler.inputs['mask'] = morph.outputs['image'] sampler.inputs['nsamples'] = nsamples # Train the classifier, use 8 cores in parallel classifier = network.create_node('RandomForestTrain', id_='classifier', memory='15G', cores=8) link = network.create_link(sampler.outputs['sample_file'], classifier.inputs['samples']) link.collapse = 0 classifier.inputs['parameters'] = source_param.output classifier.inputs['number_of_cores'] = (8, ) # Create sink out_classifier = network.create_sink('SKLearnClassifierFile', id_='out_classifier') out_classifier.input = classifier.outputs['classifier'] sinkdata = {'out_classifier': path_of_class_dir + path_of_class_dir} print network.draw_network(img_format='svg', draw_dimension=True) fastr.log.info('^^^^^^^^^^^^^ Starting execution client.') network.execute(sourcedata, sinkdata, cluster_queue="week")
def create_network(self, nettype): if nettype == 'pairwise': # Create the network self.network = fastr.Network(id_="elastix_pair") # Create Sources self.FixedImageSource = self.network.create_source( 'ITKImageFile', id_='FixedImage') self.FixedMaskSource = self.network.create_source('ITKImageFile', id_='FixedMask') self.MovingImageSource = self.network.create_source( 'ITKImageFile', id_='MovingImage') self.MovingMaskSource = self.network.create_source( 'ITKImageFile', id_='MovingMask') self.ToTransformSource = self.network.create_source( 'ITKImageFile', id_='ToTransform') self.ParameterMapSource = self.network.create_source( 'ElastixParameterFile', id_='ParameterMaps', nodegroup='par') # Elastix requires the output folder as a sink # self.OutputFolderSource = self.network.create_sink('Directory', id_='Out') # Create Elastix node and links self.elastix_node = self.network.create_node(self.elastix_toolname, id_='elastix') self.elastix_node.inputs[ 'fixed_image'] = self.FixedImageSource.output self.elastix_node.inputs[ 'fixed_mask'] = self.FixedMaskSource.output self.elastix_node.inputs[ 'moving_image'] = self.MovingImageSource.output self.elastix_node.inputs[ 'moving_mask'] = self.MovingMaskSource.output # self.OutputFolderSource.input = self.elastix_node.outputs['directory'] self.link_param = self.network.create_link( self.ParameterMapSource.output, self.elastix_node.inputs['parameters']) self.link_param.collapse = 'par' # Create Sinks self.outtrans = self.network.create_sink('ElastixTransformFile', id_='sink_trans') self.outimage = self.network.create_sink('ITKImageFile', id_='sink_image') self.outseg = self.network.create_sink('ITKImageFile', id_='sink_seg') self.outtrans.inputs['input'] = self.elastix_node.outputs[ 'transform'] # Transform output image self.transformix_node = self.network.create_node( self.transformix_toolname, id_='transformix') self.transformix_node.inputs[ 'image'] = self.MovingImageSource.output self.transformix_node.inputs[ 'transform'] = self.elastix_node.outputs['transform'][-1] self.outimage.inputs['input'] = self.transformix_node.outputs[ 'image'] # First change the FinalBSplineInterpolationOrder to 0 for the segmentation self.changeorder_node = self.network.create_node( 'EditElastixTransformFile', id_='editelpara') self.link_trans = self.network.create_link( self.elastix_node.outputs['transform'][-1], self.changeorder_node.inputs['transform']) # self.link_trans.converge = 0 # self.link_trans.collapse = 'FixedImage' # self.link_trans.expand = True # Co[y metadata from image to segmentation as Elastix uses this self.copymetadata_node = self.network.create_node( 'CopyMetadata', id_='copymetadata') self.copymetadata_node.inputs[ 'source'] = self.MovingImageSource.output self.copymetadata_node.inputs[ 'destination'] = self.ToTransformSource.output # Then transform the segmentation self.transformix_node_seg = self.network.create_node( self.transformix_toolname, id_='transformix_seg') self.transformix_node_seg.inputs[ 'image'] = self.copymetadata_node.outputs['output'] self.transformix_node_seg.inputs[ 'transform'] = self.changeorder_node.outputs['transform'][-1] self.outseg.inputs['input'] = self.transformix_node_seg.outputs[ 'image'] else: # Create the network self.network = fastr.Network(id_="elastix_group") # Create Sources self.FixedImageSource = self.network.create_source( 'ITKImageFile', id_='FixedImage') self.FixedMaskSource = self.network.create_source('ITKImageFile', id_='FixedMask') self.ToTransformSource = self.network.create_source( 'ITKImageFile', id_='ToTransform') self.ParameterMapSource = self.network.create_source( 'ElastixParameterFile', id_='ParameterMaps', nodegroup='par') # Elastix requires the output folder as a sink # self.OutputFolderSource = self.network.create_sink('Directory', id_='Out') # Create Elastix node and links self.elastix_node = self.network.create_node(self.elastix_toolname, id_='elastix') self.elastix_node.inputs[ 'fixed_image'] = self.FixedImageSource.output self.elastix_node.inputs[ 'fixed_mask'] = self.FixedMaskSource.output self.elastix_node.inputs[ 'moving_image'] = self.FixedImageSource.output self.elastix_node.inputs[ 'moving_mask'] = self.FixedMaskSource.output # self.OutputFolderSource.input = self.elastix_node.outputs['directory'] self.link_param = self.network.create_link( self.ParameterMapSource.output, self.elastix_node.inputs['parameters']) self.link_param.collapse = 'par' # Create Sinks self.outtrans = self.network.create_sink('ElastixTransformFile', id_='sink_trans') self.outimage = self.network.create_sink('ITKImageFile', id_='sink_image') self.outseg = self.network.create_sink('ITKImageFile', id_='sink_seg') self.outtrans.inputs['input'] = self.elastix_node.outputs[ 'transform'] # Transform output image self.transformix_node = self.network.create_node( self.transformix_toolname, id_='transformix') self.transformix_node.inputs[ 'image'] = self.MovingImageSource.output self.transformix_node.inputs[ 'transform'] = self.elastix_node.outputs['transform'][-1] self.outimage.inputs['input'] = self.transformix_node.outputs[ 'image'] # First change the FinalBSplineInterpolationOrder to 0 for the segmentation self.changeorder_node = self.network.create_node( 'EditElastixTransformFile', id_='editelpara') self.changeorder_node.inputs['set'] = [ "FinalBSplineInterpolationOrder=0" ] self.link_trans = self.network.create_link( self.elastix_node.outputs['transform'], self.changeorder_node.inputs['transform'][-1]) # self.link_trans.converge = 0 # self.link_trans.collapse = 'FixedImage' # self.link_trans.expand = True # Co[y metadata from image to segmentation as Elastix uses this self.copymetadata_node = self.network.create_node( 'CopyMetadata', id_='copymetadata') self.copymetadata_node.inputs[ 'source'] = self.MovingImageSource.output self.copymetadata_node.inputs[ 'destination'] = self.ToTransformSource.output # Then transform the segmentation self.transformix_node_seg = self.network.create_node( self.transformix_toolname, id_='transformix_seg') self.transformix_node_seg.inputs[ 'image'] = self.copymetadata_node.outputs['output'] self.transformix_node_seg.inputs[ 'transform'] = self.changeorder_node.outputs['transform'][-1] self.outseg.inputs['input'] = self.transformix_node_seg.outputs[ 'image']
def main(): ################################################# #### PARAMETERS ################################# ################################################# # network name network_name = 'trainAppearanceModelCV' mri_folder = 'vfs://fastr_data/OAI_KneeMRI_testdata/Nifti_gz_FemoralCartilage_IMS_crop/' label_folder = 'vfs://fastr_data/OAI_KneeMRI_testdata/Nifti_gz_FemoralCartilage_GT/cropped_data/' # Note: the parameters for the random forest classifier are in the # parameter file supplied as source data in folder: param_folder = 'vfs://fastr_data/seg/param/' output_appearance_name = 'fastr_voxclass_single_femcart_foldnr' output_folder_appearance = 'vfs://fastr_data/seg/class/' # Scale space scales to extract (in mm) scales = (1.0, 1.6, 4.0) # Radius for the background sampling mask dilation (mm?) radius = [5.0] # Number/fraction of sample to sample per images # On element per class (class0, class1, etc) # If value between [0.0 - 1.0] it is a fraction of the number of samples # available in that class # If the value is above 1, it is the number of samples to take, if the # available number of samples is lower, it will take all samples. nsamples = (1000, 1000) # threshold for creation region of interest mask mask_threshold_const = 0.5 # number of cores used in training num_cores = 8 # number of cross-validation folds num_folds = 5 # starting fold foldnr = 1 # dictionary of names of atlas MRIs CV_ims = [ 'image9003406_20060322', 'image9007827_20051219', 'image9200458_20051202', 'image9352437_20050411', 'image9403165_20060316', 'image9496443_20050811', 'image9567704_20050505', 'image9047800_20060306', 'image9056363_20051010', 'image9068453_20060131', 'image9085290_20051103', 'image9102858_20060210', 'image9279291_20051025', 'image9352883_20051123', 'image9357137_20051212', 'image9357383_20050912', 'image9369649_20060224', 'image9587749_20050707' ] # dictionary of names of atlas label volumes CV_label = [ 'groundtruth9003406_20060322', 'groundtruth9007827_20051219', 'groundtruth9200458_20051202', 'groundtruth9352437_20050411', 'groundtruth9403165_20060316', 'groundtruth9496443_20050811', 'groundtruth9567704_20050505', 'groundtruth9047800_20060306', 'groundtruth9056363_20051010', 'groundtruth9068453_20060131', 'groundtruth9085290_20051103', 'groundtruth9102858_20060210', 'groundtruth9279291_20051025', 'groundtruth9352883_20051123', 'groundtruth9357137_20051212', 'groundtruth9357383_20050912', 'groundtruth9369649_20060224', 'groundtruth9587749_20050707' ] # Create K-Folds cross validation iterator. # Provides train/test indices to split data in train test sets. Split dataset into k consecutive folds (without shuffling). # Each fold is then used a validation set once while the k - 1 remaining fold form the training set. cv = cross_validation.KFold(len(CV_ims), n_folds=num_folds, random_state=0) # sourcedata dictionary contains the paths to mri volumes, label volumes and parameters used in training classifier. # NOTE that it is important to give the element informative names for debugging, e.g 'image9003406_20060322' as this will # us quickly identify faulty data. sourcedata = { 'images': { 'image9003406_20060322': mri_folder + '9003406_20060322_SAG_3D_DESS_LEFT_016610899303_FemoralCartilage_ims_crop.nii.gz', 'image9007827_20051219': mri_folder + '9007827_20051219_SAG_3D_DESS_LEFT_016610641606_FemoralCartilage_ims_crop.nii.gz', 'image9047800_20060306': mri_folder + '9047800_20060306_SAG_3D_DESS_LEFT_016610874403_FemoralCartilage_ims_crop.nii.gz', 'image9056363_20051010': mri_folder + '9056363_20051010_SAG_3D_DESS_LEFT_016610100103_FemoralCartilage_ims_crop.nii.gz', 'image9068453_20060131': mri_folder + '9068453_20060131_SAG_3D_DESS_LEFT_016610822403_FemoralCartilage_ims_crop.nii.gz', 'image9085290_20051103': mri_folder + '9085290_20051103_SAG_3D_DESS_LEFT_016610952703_FemoralCartilage_ims_crop.nii.gz', 'image9094865_20060209': mri_folder + '9094865_20060209_SAG_3D_DESS_LEFT_016610837203_FemoralCartilage_ims_crop.nii.gz', 'image9102858_20060210': mri_folder + '9102858_20060210_SAG_3D_DESS_LEFT_016610859602_FemoralCartilage_ims_crop.nii.gz', 'image9200458_20051202': mri_folder + '9200458_20051202_SAG_3D_DESS_LEFT_016610610903_FemoralCartilage_ims_crop.nii.gz', 'image9279291_20051025': mri_folder + '9279291_20051025_SAG_3D_DESS_LEFT_016610219303_FemoralCartilage_ims_crop.nii.gz', 'image9352437_20050411': mri_folder + '9352437_20050411_SAG_3D_DESS_LEFT_016610106806_FemoralCartilage_ims_crop.nii.gz', 'image9352883_20051123': mri_folder + '9352883_20051123_SAG_3D_DESS_LEFT_016610798103_FemoralCartilage_ims_crop.nii.gz', 'image9357137_20051212': mri_folder + '9357137_20051212_SAG_3D_DESS_LEFT_016610629903_FemoralCartilage_ims_crop.nii.gz', 'image9357383_20050912': mri_folder + '9357383_20050912_SAG_3D_DESS_LEFT_016610520402_FemoralCartilage_ims_crop.nii.gz', 'image9369649_20060224': mri_folder + '9369649_20060224_SAG_3D_DESS_LEFT_016610861903_FemoralCartilage_ims_crop.nii.gz', 'image9403165_20060316': mri_folder + '9403165_20060316_SAG_3D_DESS_LEFT_016610900302_FemoralCartilage_ims_crop.nii.gz', 'image9496443_20050811': mri_folder + '9496443_20050811_SAG_3D_DESS_LEFT_016610469823_FemoralCartilage_ims_crop.nii.gz', 'image9567704_20050505': mri_folder + '9567704_20050505_SAG_3D_DESS_LEFT_016610398706_FemoralCartilage_ims_crop.nii.gz', 'image9587749_20050707': mri_folder + '9587749_20050707_SAG_3D_DESS_LEFT_016610415806_FemoralCartilage_ims_crop.nii.gz', 'image9596610_20050909': mri_folder + '9596610_20050909_SAG_3D_DESS_LEFT_016610499502_FemoralCartilage_ims_crop.nii.gz', }, 'label_images': { 'groundtruth9003406_20060322': label_folder + '20060322_SAG_3D_DESS_LEFT_016610899303_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9007827_20051219': label_folder + '20051219_SAG_3D_DESS_LEFT_016610641606_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9094865_20060209': label_folder + '20060209_SAG_3D_DESS_LEFT_016610837203_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9200458_20051202': label_folder + '20051202_SAG_3D_DESS_LEFT_016610610903_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9352437_20050411': label_folder + '20050411_SAG_3D_DESS_LEFT_016610106806_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9403165_20060316': label_folder + '20060316_SAG_3D_DESS_LEFT_016610900302_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9496443_20050811': label_folder + '20050811_SAG_3D_DESS_LEFT_016610469823_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9567704_20050505': label_folder + '20050505_SAG_3D_DESS_LEFT_016610398706_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9047800_20060306': label_folder + '20060306_SAG_3D_DESS_LEFT_016610874403_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9056363_20051010': label_folder + '20051010_SAG_3D_DESS_LEFT_016610100103_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9068453_20060131': label_folder + '20060131_SAG_3D_DESS_LEFT_016610822403_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9085290_20051103': label_folder + '20051103_SAG_3D_DESS_LEFT_016610952703_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9102858_20060210': label_folder + '20060210_SAG_3D_DESS_LEFT_016610859602_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9279291_20051025': label_folder + '20051025_SAG_3D_DESS_LEFT_016610219303_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9352883_20051123': label_folder + '20051123_SAG_3D_DESS_LEFT_016610798103_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9357137_20051212': label_folder + '20051212_SAG_3D_DESS_LEFT_016610629903_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9357383_20050912': label_folder + '20050912_SAG_3D_DESS_LEFT_016610520402_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9369649_20060224': label_folder + '20060224_SAG_3D_DESS_LEFT_016610861903_FemoralCartilage_GT_crop.nii.gz', 'groundtruth9587749_20050707': label_folder + '20050707_SAG_3D_DESS_LEFT_016610415806_FemoralCartilage_GT_crop.nii.gz' }, 'param_file': param_folder + 'param_single.ini' } ############################################################################# ##################END PARAMETERS ############################################################################# # cross-validation for train_indices, test_indices in cv: sourcedata_fold = {} sourcedata_fold['images'] = {} sourcedata_fold['label_images'] = {} sourcedata_fold['param_file'] = sourcedata['param_file'] for ii in train_indices: sourcedata_fold['images'][CV_ims[ii]] = sourcedata['images'][ CV_ims[ii]] sourcedata_fold['label_images'][ CV_label[ii]] = sourcedata['label_images'][CV_label[ii]] #instantiate network network = fastr.Network(id_=network_name) # load MRI volumes source_t1 = network.create_source('NiftiImageFileCompressed', id_='images', sourcegroup='atlas') # load label volumes source_label = network.create_source('NiftiImageFileCompressed', id_='label_images', sourcegroup='atlas') # load configuration file for appearance model source_param = network.create_source('ConfigFile', id_='param_file') # Create filter image for source data scalespacefilter = network.create_node('GaussianScaleSpace', id_='scalespacefilter', memory='15G') scalespacefilter.inputs['image'] = source_t1.output scalespacefilter.inputs['scales'] = scales # Prepare mask threshold = network.create_node('PxThresholdImage', id_='threshold', memory='15G') morph = network.create_node('PxMorphology', id_='morph', memory='15G') # threshold mask threshold.inputs['image'] = source_label.output threshold.inputs['upper_threshold'] = [mask_threshold_const] # dilate mask morph.inputs['image'] = threshold.outputs['image'] morph.inputs['operation'] = ['dilation'] morph.inputs['operation_type'] = ['binary'] morph.inputs['radius'] = radius # Sample the feature images sampler = network.create_node('SampleImage', id_='sampler', memory='15G') sampler.inputs['image'] = scalespacefilter.outputs['image'] sampler.inputs['labels'] = source_label.output sampler.inputs['mask'] = morph.outputs['image'] sampler.inputs['nsamples'] = nsamples # Train the classifier, use [num_cores] cores in parallel classifier = network.create_node('RandomForestTrain', id_='classifier', memory='15G', cores=8) link = network.create_link(sampler.outputs['sample_file'], classifier.inputs['samples']) link.collapse = 0 classifier.inputs['parameters'] = source_param.output classifier.inputs['number_of_cores'] = (num_cores, ) # Create sink out_classifier = network.create_sink('SKLearnClassifierFile', id_='out_classifier') out_classifier.input = classifier.outputs['classifier'] # location of sink files sinkdata = { 'out_classifier': output_folder_appearance + output_appearance_name + str(foldnr) + '_{sample_id}{ext}' } # print network print network.draw_network(img_format='svg', draw_dimension=True) fastr.log.info('^^^^^^^^^^^^^ Starting execution client.') # execute appearance model training network.execute(sourcedata_fold, sinkdata) # increment fold nr by one foldnr = foldnr + 1
def fit(X, y, groups, parameter_iterable): """Actual fitting, performing the search over parameters.""" estimator = SVC(class_weight='balanced', probability=True) cv = 2 scoring = 'f1_weighted' verbose = True fit_params = None return_train_score = True error_score = 'raise' estimator = estimator cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer_ = check_scoring(estimator, scoring=scoring) X, y, groups = indexable(X, y, groups) n_splits = cv.get_n_splits(X, y, groups) if verbose > 0 and isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) cv_iter = list(cv.split(X, y, groups)) # Original: joblib # out = Parallel( # n_jobs=n_jobs, verbose=verbose # )(delayed(_fit_and_score)(clone(base_estimator), X, y, scorer_, # train, test, verbose, parameters, # fit_params=fit_params, # return_train_score=return_train_score, # return_n_test_samples=True, # return_times=True, return_parameters=True, # error_score=error_score) # for parameters in parameter_iterable # for train, test in cv_iter) name = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) tempfolder = os.path.join(fastr.config.mounts['tmp'], 'GS', name) if not os.path.exists(tempfolder): os.makedirs(tempfolder) # Create the parameter files parameter_files = dict() print parameter_iterable for num, parameters in enumerate(parameter_iterable): print parameters parameters["Number"] = str(num) # Convert parameter set to json fname = ('settings_{}.json').format(str(num)) sourcename = os.path.join(tempfolder, 'parameters', fname) if not os.path.exists(os.path.dirname(sourcename)): os.makedirs(os.path.dirname(sourcename)) with open(sourcename, 'w') as fp: json.dump(parameters, fp, indent=4) parameter_files[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format('GS', name, 'parameters', fname) # Create test-train splits traintest_files = dict() # TODO: ugly nummering solution num = 0 for train, test in cv_iter: source_labels = ['train', 'test'] source_data = pd.Series([train, test], index=source_labels, name='Train-test data') fname = ('traintest_{}.hdf5').format(str(num)) sourcename = os.path.join(tempfolder, 'traintest', fname) if not os.path.exists(os.path.dirname(sourcename)): os.makedirs(os.path.dirname(sourcename)) traintest_files[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format('GS', name, 'traintest', fname) sourcelabel = ("Source Data Iteration {}").format(str(num)) source_data.to_hdf(sourcename, sourcelabel) num += 1 # Create the files containing the estimator and settings estimator_labels = ['base_estimator', 'X', 'y', 'scorer', 'verbose', 'fit_params', 'return_train_score', 'return_n_test_samples', 'return_times', 'return_parameters', 'error_score'] estimator_data = pd.Series([estimator, X, y, scorer_, verbose, fit_params, return_train_score, True, True, True, error_score], index=estimator_labels, name='estimator Data') fname = 'estimatordata.hdf5' estimatorname = os.path.join(tempfolder, fname) estimator_data.to_hdf(estimatorname, 'Estimator Data') estimatordata = ("vfs://tmp/{}/{}/{}").format('GS', name, fname) # Create the fastr network network = fastr.Network('GridSearch_' + name) estimator_data = network.create_source('HDF5', id_='estimator_source') traintest_data = network.create_source('HDF5', id_='traintest') parameter_data = network.create_source('JsonFile', id_='parameters') sink_output = network.create_sink('HDF5', id_='output') fitandscore = network.create_node('fitandscore', memory='2G', id_='fitandscore') fitandscore.inputs['estimatordata'].input_group = 'estimator' fitandscore.inputs['traintest'].input_group = 'traintest' fitandscore.inputs['parameters'].input_group = 'parameters' fitandscore.inputs['estimatordata'] = estimator_data.output fitandscore.inputs['traintest'] = traintest_data.output fitandscore.inputs['parameters'] = parameter_data.output sink_output.input = fitandscore.outputs['fittedestimator'] source_data = {'estimator_source': estimatordata, 'traintest': traintest_files, 'parameters': parameter_files} sink_data = {'output': ("vfs://tmp/{}/{}/output_{{sample_id}}_{{cardinality}}{{ext}}").format('GS', name)} network.draw_network(network.id, draw_dimension=True) print source_data network.execute(source_data, sink_data, tmpdir=os.path.join(tempfolder, 'fastr')) # Read in the output data once finished # TODO: expanding fastr url is probably a nicer way sink_files = glob.glob(os.path.join(fastr.config.mounts['tmp'],'GS', name) + '/output*.hdf5') save_data = list() features_labels = list() for output in sink_files: data = pd.read_hdf(output) temp_save_data = data['RET'] save_data.append(temp_save_data) features_labels.append(data['feature_labels']) # if one choose to see train score, "out" will contain train score info if return_train_score: (train_scores, test_scores, test_sample_counts, fit_time, score_time, parameters) = zip(*save_data) else: (test_scores, test_sample_counts, fit_time, score_time, parameters) = zip(*save_data) candidate_params = parameters[::n_splits] n_candidates = len(candidate_params) results = dict() def _store(key_name, array, weights=None, splits=False, rank=False): """A small helper to store the scores/times to the cv_results_""" array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits) if splits: for split_i in range(n_splits): results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results['mean_%s' % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt(np.average((array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights)) results['std_%s' % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray( rankdata(-array_means, method='min'), dtype=np.int32) # Computed the (weighted) mean and std for test scores alone # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) _store('test_score', test_scores, splits=True, rank=True, weights=test_sample_counts if iid else None) if return_train_score: _store('train_score', train_scores, splits=True) _store('fit_time', fit_time) _store('score_time', score_time) best_index = np.flatnonzero(results["rank_test_score"] == 1)[0] best_parameters = candidate_params[best_index] # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict(partial(MaskedArray, np.empty(n_candidates,), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurence of `name`. # Setting the value at an index also unmasks that index param_results["param_%s" % name][cand_i] = value results.update(param_results) # Store a list of param dicts at the key 'params' results['params'] = candidate_params cv_results_ = results best_index_ = best_index n_splits_ = n_splits if refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best_parameters) if y is not None: best_estimator.fit(X, y, **fit_params) else: best_estimator.fit(X, **fit_params) best_estimator_ = best_estimator return self