def two_level(grouped_files_df, options: TwoLevelConf): """ grouped_files_df - must contain 'group':<any comparable, sortable type> and 'file':MincAtom columns """ # TODO weird naming since the grouped_files_df isn't a GroupBy object? just files_df? s = Stages() if grouped_files_df.isnull().values.any(): raise ValueError("NaN values in input dataframe; can't go") if options.mbm.lsq6.target_type == TargetType.bootstrap: # won't work since the second level part tries to get the resolution of *its* "first input file", which # hasn't been created. We could instead pass in a resolution to the `mbm` function, # but instead disable for now: raise ValueError( "Bootstrap model building currently doesn't work with this pipeline; " "just specify an initial target instead") elif options.mbm.lsq6.target_type == TargetType.pride_of_models: pride_of_models_mapping = get_pride_of_models_mapping( pride_csv=options.mbm.lsq6.target_file, output_dir=options.application.output_directory, pipeline_name=options.application.pipeline_name) # FIXME this is the same as in the 'tamarack' except for names of arguments/enclosing variables def group_options(options, group): options = copy.deepcopy(options) if options.mbm.lsq6.target_type == TargetType.pride_of_models: targets = get_closest_model_from_pride_of_models( pride_of_models_dict=pride_of_models_mapping, time_point=group) options.mbm.lsq6 = options.mbm.lsq6.replace( target_type=TargetType.initial_model, target_file=targets.registration_standard.path) else: # this will ensure that all groups have the same resolution -- is it necessary? targets = s.defer( registration_targets( lsq6_conf=options.mbm.lsq6, app_conf=options.application, reg_conf=options.registration, first_input_file=grouped_files_df.file.iloc[0])) resolution = (options.registration.resolution or get_resolution_from_file( targets.registration_standard.path)) # This must happen after calling registration_targets otherwise it will resample to options.registration.resolution options.registration = options.registration.replace( resolution=resolution) # no need to check common space settings here since they're turned off at the parser level # (a bit strange) return options first_level_results = ( grouped_files_df.groupby( 'group', as_index=False ) # the usual annoying pattern to do a aggregate with access .aggregate({'file': lambda files: list(files) }) # to the groupby object's keys ... TODO: fix .rename(columns={ 'file': "files" }).assign(build_model=lambda df: df.apply( axis=1, func=lambda row: s.defer( mbm(imgs=row.files, options=group_options(options, row.group), prefix="%s" % row.group, output_dir=os.path.join( options.application.output_directory, options. application.pipeline_name + "_first_level", "%s_processed" % row.group)))))) # TODO replace .assign(...apply(...)...) with just an apply, producing a series right away? # FIXME right now the same options set is being used for both levels -- use options.first/second_level second_level_options = copy.deepcopy(options) second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace( run_lsq6=False) second_level_options.mbm.segmentation.run_maget = False second_level_options.mbm.maget.maget.mask_only = False second_level_options.mbm.maget.maget.mask = False # FIXME this is probably a hack -- instead add a --second-level-init-model option to specify which timepoint should be used # as the initial model in the second level ??? (at this point it doesn't matter due to lack of lsq6 ...) if second_level_options.mbm.lsq6.target_type == TargetType.pride_of_models: second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace( target_type=TargetType. target, # target doesn't really matter as no lsq6 here, just used for resolution... target_file=list(pride_of_models_mapping.values()) [0].registration_standard.path) # NOTE: running lsq6_nuc_inorm here doesn't work in general (but possibly with rotational minctracc) # since the native-space initial model is used, but our images are # already in standard space (as we resampled there after the 1st-level lsq6). # On the other hand, we might want to run it here (although of course NOT nuc/inorm!) in the future, # for instance given a 'pride' of models (one for each group). second_level_results = s.defer( mbm(imgs=first_level_results.build_model.map(lambda m: m.avg_img), options=second_level_options, prefix=os.path.join( options.application.output_directory, options.application.pipeline_name + "_second_level"))) # FIXME sadly, `mbm` doesn't return a pd.Series of xfms, so we don't have convenient indexing ... overall_xfms = [ s.defer(concat_xfmhandlers([xfm_1, xfm_2])) for xfms_1, xfm_2 in zip([r.xfms.lsq12_nlin_xfm for r in first_level_results.build_model], second_level_results.xfms.overall_xfm) for xfm_1 in xfms_1 ] resample = np.vectorize(mincresample_new, excluded={"extra_flags"}) defer = np.vectorize(s.defer) # TODO using the avg_img here is a bit clunky -- maybe better to propagate group indices ... # only necessary since `mbm` doesn't return DataFrames but namespaces ... first_level_determinants = pd.concat(list( first_level_results.build_model.apply( lambda x: x.determinants.assign(first_level_avg=x.avg_img))), ignore_index=True) # first_level_xfms is only necessary because you otherwise have no access to the input file which is necessary # for merging with the input csv. lsq12_nlin_xfm can be used to merge, and rigid_xfm contains the input file. # If for some reason we want to output xfms in the future, just don't drop everything. first_level_xfms = pd.concat( list( first_level_results.build_model.apply( lambda x: x.xfms.assign(first_level_avg=x.avg_img))), ignore_index=True)[["lsq12_nlin_xfm", "rigid_xfm"]] if options.mbm.segmentation.run_maget: maget_df = pd.DataFrame([ { "label_file": x.labels.path, "native_file": x.orig_path } #, "_merge" : basename(x.orig_path)} for x in pd.concat([ namespace.maget_result for namespace in first_level_results.build_model ]) ]) first_level_xfms = pd.merge( left=first_level_xfms.assign(native_file=lambda df: df.rigid_xfm. apply(lambda x: x.source.path)), right=maget_df, on="native_file") first_level_determinants = (pd.merge(left=first_level_determinants, right=first_level_xfms, left_on="inv_xfm", right_on="lsq12_nlin_xfm").drop( ["rigid_xfm", "lsq12_nlin_xfm"], axis=1)) resampled_determinants = (pd.merge( left=first_level_determinants, right=pd.DataFrame({ 'group_xfm': second_level_results.xfms.overall_xfm }).assign(source=lambda df: df.group_xfm.apply(lambda r: r.source)), left_on="first_level_avg", right_on="source").assign( resampled_log_full_det=lambda df: defer( resample(img=df.log_full_det, xfm=df.group_xfm.apply(lambda x: x.xfm), like=second_level_results.avg_img)), resampled_log_nlin_det=lambda df: defer( resample(img=df.log_nlin_det, xfm=df.group_xfm.apply(lambda x: x.xfm), like=second_level_results.avg_img)))) # TODO only resamples the log determinants, but still a bit ugly ... abstract somehow? # TODO shouldn't be called resampled_determinants since this is basically the whole (first_level) thing ... inverted_overall_xfms = [ s.defer(invert_xfmhandler(xfm)) for xfm in overall_xfms ] overall_determinants = (s.defer( determinants_at_fwhms( xfms=inverted_overall_xfms, inv_xfms=overall_xfms, blur_fwhms=options.mbm.stats.stats_kernels)).assign( overall_log_full_det=lambda df: df.log_full_det, overall_log_nlin_det=lambda df: df.log_nlin_det).drop( ['log_full_det', 'log_nlin_det'], axis=1)) # TODO return some MAGeT stuff from two_level function ?? # FIXME running MAGeT from within the `two_level` function has the same problem as running it from within `mbm`: # it will now run when this pipeline is called from within another one (e.g., n-level), which will be # redundant, create filename clashes, etc. -- this should be moved to `two_level_pipeline`. # TODO do we need a `pride of atlases` for MAGeT in this pipeline ?? # TODO at the moment MAGeT is run within the MBM code, but it could be disabled there and run here #if options.mbm.segmentation.run_maget: # maget_options = copy.deepcopy(options) # maget_options.maget = options.mbm.maget # fixup_maget_options(maget_options=maget_options.maget, # lsq12_options=maget_options.mbm.lsq12, # nlin_options=maget_options.mbm.nlin) # maget_options.maget.maget.mask = maget_options.maget.maget.mask_only = False # already done above # del maget_options.mbm # again using a weird combination of vectorized and loop constructs ... # s.defer(maget([xfm.resampled for _ix, m in first_level_results.iterrows() # for xfm in m.build_model.xfms.rigid_xfm], # options=maget_options, # prefix="%s_MAGeT" % options.application.pipeline_name, # output_dir=os.path.join(options.application.output_directory, # options.application.pipeline_name + "_processed"))) # TODO resampling to database model ... # TODO there should be one table containing all determinants (first level, overall, resampled first level) for each file # and another containing some groupwise information (averages and transforms to the common average) return Result(stages=s, output=Namespace( first_level_results=first_level_results, resampled_determinants=resampled_determinants, overall_determinants=overall_determinants))
def two_level(grouped_files_df, options : TwoLevelConf): """ grouped_files_df - must contain 'group':<any comparable, sortable type> and 'file':MincAtom columns """ # TODO weird naming since the grouped_files_df isn't a GroupBy object? just files_df? s = Stages() if grouped_files_df.isnull().values.any(): raise ValueError("NaN values in input dataframe; can't go") if options.mbm.lsq6.target_type == TargetType.bootstrap: # won't work since the second level part tries to get the resolution of *its* "first input file", which # hasn't been created. We could instead pass in a resolution to the `mbm` function, # but instead disable for now: raise ValueError("Bootstrap model building currently doesn't work with this pipeline; " "just specify an initial target instead") elif options.mbm.lsq6.target_type == TargetType.pride_of_models: pride_of_models_mapping = get_pride_of_models_mapping(pride_csv=options.mbm.lsq6.target_file, output_dir=options.application.output_directory, pipeline_name=options.application.pipeline_name) # FIXME this is the same as in the 'tamarack' except for names of arguments/enclosing variables def group_options(options, group): options = copy.deepcopy(options) if options.mbm.lsq6.target_type == TargetType.pride_of_models: targets = get_closest_model_from_pride_of_models(pride_of_models_dict=pride_of_models_mapping, time_point=group) options.mbm.lsq6 = options.mbm.lsq6.replace(target_type=TargetType.initial_model, target_file=targets.registration_standard.path) else: # this will ensure that all groups have the same resolution -- is it necessary? targets = registration_targets(lsq6_conf=options.mbm.lsq6, app_conf=options.application, first_input_file=grouped_files_df.file.iloc[0]) resolution = (options.registration.resolution or get_resolution_from_file(targets.registration_standard.path)) options.registration = options.registration.replace(resolution=resolution) # no need to check common space settings here since they're turned off at the parser level # (a bit strange) return options first_level_results = ( grouped_files_df .groupby('group', as_index=False, sort=False) # the usual annoying pattern to do a aggregate with access .aggregate({ 'file' : lambda files: list(files) }) # to the groupby object's keys ... TODO: fix .rename(columns={ 'file' : "files" }) .assign(build_model=lambda df: df.apply(axis=1, func=lambda row: s.defer(mbm(imgs=row.files, options=group_options(options, row.group), prefix="%s" % row.group, output_dir=os.path.join( options.application.output_directory, options.application.pipeline_name + "_first_level", "%s_processed" % row.group))))) ) # TODO replace .assign(...apply(...)...) with just an apply, producing a series right away? # FIXME right now the same options set is being used for both levels -- use options.first/second_level second_level_options = copy.deepcopy(options) second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(run_lsq6=False) second_level_options.mbm.segmentation.run_maget = False second_level_options.mbm.maget.maget.mask_only = False second_level_options.mbm.maget.maget.mask = False # FIXME this is probably a hack -- instead add a --second-level-init-model option to specify which timepoint should be used # as the initial model in the second level ??? (at this point it doesn't matter due to lack of lsq6 ...) if second_level_options.mbm.lsq6.target_type == TargetType.pride_of_models: second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace( target_type=TargetType.target, # target doesn't really matter as no lsq6 here, just used for resolution... target_file=list(pride_of_models_mapping.values())[0].registration_standard.path) # NOTE: running lsq6_nuc_inorm here doesn't work in general (but possibly with rotational minctracc) # since the native-space initial model is used, but our images are # already in standard space (as we resampled there after the 1st-level lsq6). # On the other hand, we might want to run it here (although of course NOT nuc/inorm!) in the future, # for instance given a 'pride' of models (one for each group). second_level_results = s.defer(mbm(imgs=first_level_results.build_model.map(lambda m: m.avg_img), options=second_level_options, prefix=os.path.join(options.application.output_directory, options.application.pipeline_name + "_second_level"))) # FIXME sadly, `mbm` doesn't return a pd.Series of xfms, so we don't have convenient indexing ... overall_xfms = [s.defer(concat_xfmhandlers([xfm_1, xfm_2])) for xfms_1, xfm_2 in zip([r.xfms.lsq12_nlin_xfm for r in first_level_results.build_model], second_level_results.xfms.overall_xfm) for xfm_1 in xfms_1] resample = np.vectorize(mincresample_new, excluded={"extra_flags"}) defer = np.vectorize(s.defer) # TODO using the avg_img here is a bit clunky -- maybe better to propagate group indices ... # only necessary since `mbm` doesn't return DataFrames but namespaces ... first_level_determinants = pd.concat(list(first_level_results.build_model.apply( lambda x: x.determinants.assign(first_level_avg=x.avg_img))), ignore_index=True) resampled_determinants = (pd.merge( left=first_level_determinants, right=pd.DataFrame({'group_xfm' : second_level_results.xfms.overall_xfm}) .assign(source=lambda df: df.group_xfm.apply(lambda r: r.source)), left_on="first_level_avg", right_on="source") .assign(resampled_log_full_det=lambda df: defer(resample(img=df.log_full_det, xfm=df.group_xfm.apply(lambda x: x.xfm), like=second_level_results.avg_img)), resampled_log_nlin_det=lambda df: defer(resample(img=df.log_nlin_det, xfm=df.group_xfm.apply(lambda x: x.xfm), like=second_level_results.avg_img)))) # TODO only resamples the log determinants, but still a bit ugly ... abstract somehow? # TODO shouldn't be called resampled_determinants since this is basically the whole (first_level) thing ... inverted_overall_xfms = [s.defer(invert_xfmhandler(xfm)) for xfm in overall_xfms] overall_determinants = (s.defer(determinants_at_fwhms( xfms=inverted_overall_xfms, inv_xfms=overall_xfms, blur_fwhms=options.mbm.stats.stats_kernels)) .assign(overall_log_full_det=lambda df: df.log_full_det, overall_log_nlin_det=lambda df: df.log_nlin_det) .drop(['log_full_det', 'log_nlin_det'], axis=1)) # TODO return some MAGeT stuff from two_level function ?? # FIXME running MAGeT from within the `two_level` function has the same problem as running it from within `mbm`: # it will now run when this pipeline is called from within another one (e.g., n-level), which will be # redundant, create filename clashes, etc. -- this should be moved to `two_level_pipeline`. # TODO do we need a `pride of atlases` for MAGeT in this pipeline ?? # TODO at the moment MAGeT is run within the MBM code, but it could be disabled there and run here #if options.mbm.segmentation.run_maget: # maget_options = copy.deepcopy(options) # maget_options.maget = options.mbm.maget # fixup_maget_options(maget_options=maget_options.maget, # lsq12_options=maget_options.mbm.lsq12, # nlin_options=maget_options.mbm.nlin) # maget_options.maget.maget.mask = maget_options.maget.maget.mask_only = False # already done above # del maget_options.mbm # again using a weird combination of vectorized and loop constructs ... # s.defer(maget([xfm.resampled for _ix, m in first_level_results.iterrows() # for xfm in m.build_model.xfms.rigid_xfm], # options=maget_options, # prefix="%s_MAGeT" % options.application.pipeline_name, # output_dir=os.path.join(options.application.output_directory, # options.application.pipeline_name + "_processed"))) # TODO resampling to database model ... # TODO there should be one table containing all determinants (first level, overall, resampled first level) for each file # and another containing some groupwise information (averages and transforms to the common average) return Result(stages=s, output=Namespace(first_level_results=first_level_results, resampled_determinants=resampled_determinants, overall_determinants=overall_determinants))
def tamarack(imgs : pd.DataFrame, options): # columns of the input df: `img` : MincAtom, `timept` : number, ... # columns of the pride of models : 'timept' : number, 'model' : MincAtom s = Stages() # TODO some assertions that the pride_of_models, if provided, is correct, and that this is intended target type def group_options(options, timepoint): options = copy.deepcopy(options) if options.mbm.lsq6.target_type == TargetType.pride_of_models: options = copy.deepcopy(options) targets = get_closest_model_from_pride_of_models(pride_of_models_dict=get_pride_of_models_mapping( pride_csv=options.mbm.lsq6.target_file, output_dir=options.application.output_directory, pipeline_name=options.application.pipeline_name), time_point=timepoint) options.mbm.lsq6 = options.mbm.lsq6.replace(target_type=TargetType.initial_model, target_file=targets.registration_standard.path) # resolution = (options.registration.resolution # or get_resolution_from_file(targets.registration_standard.path)) # options.registration = options.registration.replace(resolution=resolution) # FIXME use of registration_standard here is quite wrong ... # part of the trouble is that mbm calls registration_targets itself, # so we can't send this RegistrationTargets to `mbm` directly ... # one option: add yet another optional arg to `mbm` ... else: targets = s.defer(registration_targets(lsq6_conf=options.mbm.lsq6, app_conf=options.application, reg_conf=options.registration, first_input_file=imgs.filename.iloc[0])) resolution = (options.registration.resolution or get_resolution_from_file(targets.registration_standard.path)) # This must happen after calling registration_targets otherwise it will resample to options.registration.resolution options.registration = options.registration.replace(resolution=resolution) return options # build all first-level models: first_level_results = ( imgs # TODO 'group' => 'timept' ? .groupby('group', as_index=False) # the usual annoying pattern to do an aggregate with access .aggregate({ 'file' : lambda files: list(files) }) # to the groupby object's keys ... TODO: fix .rename(columns={ 'file' : "files" }) .assign(options=lambda df: df.apply(axis=1, func=lambda row: group_options(options, row.group))) .assign(build_model=lambda df: df.apply(axis=1, func=lambda row: s.defer( mbm(imgs=row.files, options=row.options, prefix="%s" % row.group, output_dir=os.path.join( options.application.output_directory, options.application.pipeline_name + "_first_level", "%s_processed" % row.group))))) .sort_values(by='group') ) if all(first_level_results.options.map(lambda opts: opts.registration.resolution) == first_level_results.options.iloc[0].registration.resolution): options.registration = options.registration.replace( resolution=first_level_results.options.iloc[0].registration.resolution) else: raise ValueError("some first-level models are run at different resolutions, possibly not what you want ...") # construction of the overall inter-average transforms will be done iteratively (for efficiency/aesthetics), # which doesn't really fit the DataFrame mold ... full_hierarchy = get_nonlinear_configuration_from_options( nlin_protocol=options.mbm.nlin.nlin_protocol, reg_method=options.mbm.nlin.reg_method, file_resolution=options.registration.resolution) # FIXME no good can come of this nlin_protocol = full_hierarchy.confs[-1] if isinstance(full_hierarchy, MultilevelANTSConf) else full_hierarchy # first register consecutive averages together: average_registrations = ( first_level_results[:-1] .assign(next_model=list(first_level_results[1:].build_model)) # TODO: we should be able to do lsq6 registration here as well! .assign(xfm=lambda df: df.apply(axis=1, func=lambda row: s.defer( lsq12_nlin(source=row.build_model.avg_img, target=row.next_model.avg_img, lsq12_conf=get_linear_configuration_from_options( options.mbm.lsq12, transform_type=LinearTransType.lsq12, file_resolution=options.registration.resolution), nlin_conf=nlin_protocol))))) # now compose the above transforms to produce transforms from each average to the common average: common_time_pt = options.tamarack.common_time_pt common_model = first_level_results[first_level_results.group == common_time_pt].iloc[0].build_model.avg_img #common = average_registrations[average_registrations.group == common_time_pt].iloc[0] before = average_registrations[average_registrations.group < common_time_pt] # asymmetry in before/after since after = average_registrations[average_registrations.group >= common_time_pt] # we used `next_`, not `previous_` # compose 1st and 2nd level transforms and resample into the common average space: def suffixes(xs): if len(xs) == 0: return [[]] else: ys = suffixes(xs[1:]) return [[xs[0]] + ys[0]] + ys def prefixes(xs): if len(xs) == 0: return [[]] else: ys = prefixes(xs[1:]) return ys + [ys[-1] + [xs[0]]] xfms_to_common = ( first_level_results .assign(uncomposed_xfms=suffixes(list(before.xfm))[:-1] + [None] + prefixes(list(after.xfm))[1:]) .assign(xfm_to_common=lambda df: df.apply(axis=1, func=lambda row: ((lambda x: s.defer(invert_xfmhandler(x)) if row.group >= common_time_pt else x) (s.defer(concat_xfmhandlers(row.uncomposed_xfms, name=("%s_to_common" if row.group < common_time_pt else "%s_from_common") % row.group)))) if row.uncomposed_xfms is not None else None)) .drop('uncomposed_xfms', axis=1)) # TODO None => identity?? # TODO indexing here is not good ... first_level_determinants = pd.concat(list(first_level_results.build_model.apply( lambda x: x.determinants.assign(first_level_avg=x.avg_img))), ignore_index=True) resampled_determinants = ( pd.merge(left=first_level_determinants, right=xfms_to_common.assign(source=lambda df: df.xfm_to_common.apply( lambda x: x.source if x is not None else None)), left_on="first_level_avg", right_on='source') .assign(resampled_log_full_det=lambda df: df.apply(axis=1, func=lambda row: s.defer(mincresample_new(img=row.log_full_det, xfm=row.xfm_to_common.xfm, like=common_model)) if row.xfm_to_common is not None else row.img), resampled_log_nlin_det=lambda df: df.apply(axis=1, func=lambda row: s.defer(mincresample_new(img=row.log_nlin_det, xfm=row.xfm_to_common.xfm, like=common_model)) if row.xfm_to_common is not None else row.img)) ) inverted_overall_xfms = pd.Series({ xfm : (s.defer(concat_xfmhandlers([xfm, row.xfm_to_common])) if row.xfm_to_common is not None else xfm) for _ix, row in xfms_to_common.iterrows() for xfm in row.build_model.xfms.lsq12_nlin_xfm }) overall_xfms = inverted_overall_xfms.apply(lambda x: s.defer(invert_xfmhandler(x))) overall_determinants = determinants_at_fwhms(xfms=overall_xfms, blur_fwhms=options.mbm.stats.stats_kernels, inv_xfms=inverted_overall_xfms) # TODO turn off bootstrap as with two-level code? # TODO combine into one data frame return Result(stages=s, output=Namespace(first_level_results=first_level_results, overall_determinants=overall_determinants, resampled_determinants=resampled_determinants.drop( ['options'], axis=1)))
def tamarack(imgs: pd.DataFrame, options): # columns of the input df: `img` : MincAtom, `timept` : number, ... # columns of the pride of models : 'timept' : number, 'model' : MincAtom s = Stages() # TODO some assertions that the pride_of_models, if provided, is correct, and that this is intended target type def group_options(options, timepoint): options = copy.deepcopy(options) if options.mbm.lsq6.target_type == TargetType.pride_of_models: options = copy.deepcopy(options) targets = get_closest_model_from_pride_of_models( pride_of_models_dict=get_pride_of_models_mapping( pride_csv=options.mbm.lsq6.target_file, output_dir=options.application.output_directory, pipeline_name=options.application.pipeline_name), time_point=timepoint) options.mbm.lsq6 = options.mbm.lsq6.replace( target_type=TargetType.initial_model, target_file=targets.registration_standard.path) # resolution = (options.registration.resolution # or get_resolution_from_file(targets.registration_standard.path)) # options.registration = options.registration.replace(resolution=resolution) # FIXME use of registration_standard here is quite wrong ... # part of the trouble is that mbm calls registration_targets itself, # so we can't send this RegistrationTargets to `mbm` directly ... # one option: add yet another optional arg to `mbm` ... else: targets = s.defer( registration_targets(lsq6_conf=options.mbm.lsq6, app_conf=options.application, reg_conf=options.registration, first_input_file=imgs.filename.iloc[0])) resolution = (options.registration.resolution or get_resolution_from_file( targets.registration_standard.path)) # This must happen after calling registration_targets otherwise it will resample to options.registration.resolution options.registration = options.registration.replace( resolution=resolution) return options # build all first-level models: first_level_results = ( imgs # TODO 'group' => 'timept' ? .groupby('group', as_index=False ) # the usual annoying pattern to do an aggregate with access .aggregate({'file': lambda files: list(files)} ) # to the groupby object's keys ... TODO: fix .rename(columns={ 'file': "files" }).assign(options=lambda df: df.apply( axis=1, func=lambda row: group_options(options, row.group)) ).assign(build_model=lambda df: df.apply( axis=1, func=lambda row: s.defer( mbm(imgs=row.files, options=row.options, prefix="%s" % row.group, output_dir=os.path.join( options.application.output_directory, options .application.pipeline_name + "_first_level", "%s_processed" % row.group)))) ).sort_values(by='group')) if all( first_level_results.options.map( lambda opts: opts.registration.resolution) == first_level_results.options.iloc[0].registration.resolution): options.registration = options.registration.replace( resolution=first_level_results.options.iloc[0].registration. resolution) else: raise ValueError( "some first-level models are run at different resolutions, possibly not what you want ..." ) # construction of the overall inter-average transforms will be done iteratively (for efficiency/aesthetics), # which doesn't really fit the DataFrame mold ... full_hierarchy = get_nonlinear_configuration_from_options( nlin_protocol=options.mbm.nlin.nlin_protocol, reg_method=options.mbm.nlin.reg_method, file_resolution=options.registration.resolution) # FIXME no good can come of this nlin_protocol = full_hierarchy.confs[-1] if isinstance( full_hierarchy, MultilevelANTSConf) else full_hierarchy # first register consecutive averages together: average_registrations = ( first_level_results[:-1].assign( next_model=list(first_level_results[1:].build_model)) # TODO: we should be able to do lsq6 registration here as well! .assign(xfm=lambda df: df.apply( axis=1, func=lambda row: s.defer( lsq12_nlin(source=row.build_model.avg_img, target=row.next_model.avg_img, lsq12_conf=get_linear_configuration_from_options( options.mbm.lsq12, transform_type=LinearTransType.lsq12, file_resolution=options.registration.resolution ), nlin_conf=nlin_protocol))))) # now compose the above transforms to produce transforms from each average to the common average: common_time_pt = options.tamarack.common_time_pt common_model = first_level_results[ first_level_results.group == common_time_pt].iloc[0].build_model.avg_img #common = average_registrations[average_registrations.group == common_time_pt].iloc[0] before = average_registrations[ average_registrations.group < common_time_pt] # asymmetry in before/after since after = average_registrations[ average_registrations.group >= common_time_pt] # we used `next_`, not `previous_` # compose 1st and 2nd level transforms and resample into the common average space: def suffixes(xs): if len(xs) == 0: return [[]] else: ys = suffixes(xs[1:]) return [[xs[0]] + ys[0]] + ys def prefixes(xs): if len(xs) == 0: return [[]] else: ys = prefixes(xs[1:]) return ys + [ys[-1] + [xs[0]]] xfms_to_common = (first_level_results.assign( uncomposed_xfms=suffixes(list(before.xfm))[:-1] + [None] + prefixes(list(after.xfm))[1:]).assign( xfm_to_common=lambda df: df.apply( axis=1, func=lambda row: ((lambda x: s.defer(invert_xfmhandler( x)) if row.group >= common_time_pt else x)(s.defer( concat_xfmhandlers( row.uncomposed_xfms, name=("%s_to_common" if row.group < common_time_pt else "%s_from_common") % row.group)))) if row.uncomposed_xfms is not None else None)).drop( 'uncomposed_xfms', axis=1)) # TODO None => identity?? # TODO indexing here is not good ... first_level_determinants = pd.concat(list( first_level_results.build_model.apply( lambda x: x.determinants.assign(first_level_avg=x.avg_img))), ignore_index=True) resampled_determinants = (pd.merge( left=first_level_determinants, right=xfms_to_common.assign(source=lambda df: df.xfm_to_common.apply( lambda x: x.source if x is not None else None)), left_on="first_level_avg", right_on='source').assign( resampled_log_full_det=lambda df: df.apply( axis=1, func=lambda row: s.defer( mincresample_new(img=row.log_full_det, xfm=row.xfm_to_common.xfm, like=common_model)) if row.xfm_to_common is not None else row.img), resampled_log_nlin_det=lambda df: df.apply( axis=1, func=lambda row: s.defer( mincresample_new(img=row.log_nlin_det, xfm=row.xfm_to_common.xfm, like=common_model)) if row.xfm_to_common is not None else row.img))) inverted_overall_xfms = pd.Series({ xfm: (s.defer(concat_xfmhandlers([xfm, row.xfm_to_common])) if row.xfm_to_common is not None else xfm) for _ix, row in xfms_to_common.iterrows() for xfm in row.build_model.xfms.lsq12_nlin_xfm }) overall_xfms = inverted_overall_xfms.apply( lambda x: s.defer(invert_xfmhandler(x))) overall_determinants = determinants_at_fwhms( xfms=overall_xfms, blur_fwhms=options.mbm.stats.stats_kernels, inv_xfms=inverted_overall_xfms) # TODO turn off bootstrap as with two-level code? # TODO combine into one data frame return Result(stages=s, output=Namespace( first_level_results=first_level_results, overall_determinants=overall_determinants, resampled_determinants=resampled_determinants.drop( ['options'], axis=1)))
def tissue_vision_pipeline(options): output_dir = options.application.output_directory pipeline_name = options.application.pipeline_name csv = original_csv = get_imgs(options.application) # check_MINC_input_files([img.path for img in imgs]) s = Stages() s.defer(create_quality_control_images(imgs=csv['anatomical_MincAtom'].tolist(), montage_dir=output_dir, montage_output=os.path.join(output_dir, pipeline_name + "_resampled", "input_montage"), auto_range=True, message="input_mincs")) ############################# # Step 1: Run MBM.py to create a consensus average ############################# mbm_result = s.defer(mbm(imgs=csv['anatomical_MincAtom'].tolist(), options=options, prefix=options.application.pipeline_name, output_dir=output_dir, with_maget=False)) #TODO remove transforms = mbm_result.xfms.assign(native_file=lambda df: df.rigid_xfm.apply(lambda x: x.source), lsq6_file=lambda df: df.lsq12_nlin_xfm.apply(lambda x: x.source), lsq6_mask_file=lambda df: df.lsq12_nlin_xfm.apply(lambda x: x.source.mask if x.source.mask else ""), nlin_file=lambda df: df.lsq12_nlin_xfm.apply(lambda x: x.resampled), nlin_mask_file=lambda df: df.lsq12_nlin_xfm.apply(lambda x: x.resampled.mask if x.resampled.mask else ""))\ .applymap(maybe_deref_path) determinants = mbm_result.determinants.drop(["full_det", "nlin_det"], axis=1)\ .applymap(maybe_deref_path) csv = csv.assign(anatomical_lsq6_MincAtom=mbm_result.xfms.lsq12_nlin_xfm.apply(lambda xfm: xfm.source), mbm_lsq6_XfmAtom=mbm_result.xfms.rigid_xfm.apply(lambda x: x.xfm), mbm_lsq12_nlin_XfmAtom=mbm_result.xfms.lsq12_nlin_xfm.apply(lambda x: x.xfm), mbm_full_XfmAtom=mbm_result.xfms.overall_xfm.apply(lambda x: x.xfm)) # x.assign(count_lsq6_MincAtom=lambda df: [x + y for x, y in zip(df["x"], df["y"])]) csv = csv.assign(count_lsq6_MincAtom = lambda df: [s.defer(mincresample_new(img = img, xfm = xfm, like = like)) for img, xfm, like in zip(df["count_MincAtom"], df["mbm_lsq6_XfmAtom"], df["anatomical_lsq6_MincAtom"])]) ############################# # Step 2: Register consensus average to ABI tissuevision Atlas ############################# lsq12_conf = get_linear_configuration_from_options(conf=options.mbm.lsq12, transform_type=LinearTransType.lsq12, file_resolution=options.registration.resolution) nlin_component = get_nonlinear_component(reg_method=options.mbm.nlin.reg_method) atlas_target = MincAtom(name=options.consensus_to_atlas.atlas_target, orig_name=options.consensus_to_atlas.atlas_target, mask=MincAtom(name=options.consensus_to_atlas.atlas_target_mask, orig_name=options.consensus_to_atlas.atlas_target_mask)) atlas_target_label = MincAtom(name=options.consensus_to_atlas.atlas_target_label, orig_name=options.consensus_to_atlas.atlas_target_label, mask=MincAtom(name=options.consensus_to_atlas.atlas_target_mask, orig_name=options.consensus_to_atlas.atlas_target_mask)) lsq12_nlin_result = s.defer(lsq12_nlin(source=mbm_result.avg_img, target=atlas_target, lsq12_conf=lsq12_conf, nlin_module=nlin_component, nlin_options=options.mbm.nlin.nlin_protocol, resolution=options.registration.resolution, resample_source=False )) ############################# # Step 3: Resample count volumes to ABI tissuevision Atlas space and vice versa ############################# csv = csv.assign(lsq6_to_atlas_XfmAtom = lambda df: df['mbm_lsq12_nlin_XfmAtom'].apply(lambda xfm: s.defer(xfmconcat([xfm, lsq12_nlin_result.xfm])))) csv = csv.assign( anatomical_targetspace_MincAtom=lambda df: [s.defer(mincresample_new(img=img, xfm=xfm, like=atlas_target)) for img, xfm in zip(df["anatomical_lsq6_MincAtom"], df["lsq6_to_atlas_XfmAtom"])], count_targetspace_MincAtom=lambda df: [s.defer(mincresample_new(img=img, xfm=xfm, like=atlas_target)) for img, xfm in zip(df["count_lsq6_MincAtom"], df["lsq6_to_atlas_XfmAtom"])], atlas_lsq6space_MincAtom=lambda df: [s.defer(mincresample_new(img=atlas_target_label, xfm=xfm, like=like, invert=True, interpolation=Interpolation.nearest_neighbour, extra_flags=('-keep_real_range',))) for xfm, like in zip( df["lsq6_to_atlas_XfmAtom"], df["count_lsq6_MincAtom"])] ) csv.applymap(maybe_deref_path).to_csv("analysis.csv",index=False) s.defer(create_quality_control_images(imgs=csv.count_targetspace_MincAtom.tolist(), montage_dir=output_dir, montage_output=os.path.join(output_dir, pipeline_name + "_resampled", "count_montage"), auto_range=True, message="count_mincs")) return Result(stages=s, output=())