def two_level(grouped_files_df, options: TwoLevelConf):
    """
    grouped_files_df - must contain 'group':<any comparable, sortable type> and 'file':MincAtom columns
    """  # TODO weird naming since the grouped_files_df isn't a GroupBy object?  just files_df?
    s = Stages()

    if grouped_files_df.isnull().values.any():
        raise ValueError("NaN values in input dataframe; can't go")

    if options.mbm.lsq6.target_type == TargetType.bootstrap:
        # won't work since the second level part tries to get the resolution of *its* "first input file", which
        # hasn't been created.  We could instead pass in a resolution to the `mbm` function,
        # but instead disable for now:
        raise ValueError(
            "Bootstrap model building currently doesn't work with this pipeline; "
            "just specify an initial target instead")
    elif options.mbm.lsq6.target_type == TargetType.pride_of_models:
        pride_of_models_mapping = get_pride_of_models_mapping(
            pride_csv=options.mbm.lsq6.target_file,
            output_dir=options.application.output_directory,
            pipeline_name=options.application.pipeline_name)

    # FIXME this is the same as in the 'tamarack' except for names of arguments/enclosing variables
    def group_options(options, group):
        options = copy.deepcopy(options)

        if options.mbm.lsq6.target_type == TargetType.pride_of_models:

            targets = get_closest_model_from_pride_of_models(
                pride_of_models_dict=pride_of_models_mapping, time_point=group)

            options.mbm.lsq6 = options.mbm.lsq6.replace(
                target_type=TargetType.initial_model,
                target_file=targets.registration_standard.path)
        else:
            # this will ensure that all groups have the same resolution -- is it necessary?
            targets = s.defer(
                registration_targets(
                    lsq6_conf=options.mbm.lsq6,
                    app_conf=options.application,
                    reg_conf=options.registration,
                    first_input_file=grouped_files_df.file.iloc[0]))

        resolution = (options.registration.resolution
                      or get_resolution_from_file(
                          targets.registration_standard.path))
        # This must happen after calling registration_targets otherwise it will resample to options.registration.resolution
        options.registration = options.registration.replace(
            resolution=resolution)
        # no need to check common space settings here since they're turned off at the parser level
        # (a bit strange)
        return options

    first_level_results = (
        grouped_files_df.groupby(
            'group', as_index=False
        )  # the usual annoying pattern to do a aggregate with access
        .aggregate({'file': lambda files: list(files)
                    })  # to the groupby object's keys ... TODO: fix
        .rename(columns={
            'file': "files"
        }).assign(build_model=lambda df: df.apply(
            axis=1,
            func=lambda row: s.defer(
                mbm(imgs=row.files,
                    options=group_options(options, row.group),
                    prefix="%s" % row.group,
                    output_dir=os.path.join(
                        options.application.output_directory, options.
                        application.pipeline_name + "_first_level",
                        "%s_processed" % row.group))))))

    # TODO replace .assign(...apply(...)...) with just an apply, producing a series right away?

    # FIXME right now the same options set is being used for both levels -- use options.first/second_level
    second_level_options = copy.deepcopy(options)
    second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(
        run_lsq6=False)
    second_level_options.mbm.segmentation.run_maget = False
    second_level_options.mbm.maget.maget.mask_only = False
    second_level_options.mbm.maget.maget.mask = False

    # FIXME this is probably a hack -- instead add a --second-level-init-model option to specify which timepoint should be used
    # as the initial model in the second level ???  (at this point it doesn't matter due to lack of lsq6 ...)
    if second_level_options.mbm.lsq6.target_type == TargetType.pride_of_models:
        second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(
            target_type=TargetType.
            target,  # target doesn't really matter as no lsq6 here, just used for resolution...
            target_file=list(pride_of_models_mapping.values())
            [0].registration_standard.path)

    # NOTE: running lsq6_nuc_inorm here doesn't work in general (but possibly with rotational minctracc)
    # since the native-space initial model is used, but our images are
    # already in standard space (as we resampled there after the 1st-level lsq6).
    # On the other hand, we might want to run it here (although of course NOT nuc/inorm!) in the future,
    # for instance given a 'pride' of models (one for each group).

    second_level_results = s.defer(
        mbm(imgs=first_level_results.build_model.map(lambda m: m.avg_img),
            options=second_level_options,
            prefix=os.path.join(
                options.application.output_directory,
                options.application.pipeline_name + "_second_level")))

    # FIXME sadly, `mbm` doesn't return a pd.Series of xfms, so we don't have convenient indexing ...
    overall_xfms = [
        s.defer(concat_xfmhandlers([xfm_1, xfm_2])) for xfms_1, xfm_2 in
        zip([r.xfms.lsq12_nlin_xfm for r in first_level_results.build_model],
            second_level_results.xfms.overall_xfm) for xfm_1 in xfms_1
    ]
    resample = np.vectorize(mincresample_new, excluded={"extra_flags"})
    defer = np.vectorize(s.defer)

    # TODO using the avg_img here is a bit clunky -- maybe better to propagate group indices ...
    # only necessary since `mbm` doesn't return DataFrames but namespaces ...

    first_level_determinants = pd.concat(list(
        first_level_results.build_model.apply(
            lambda x: x.determinants.assign(first_level_avg=x.avg_img))),
                                         ignore_index=True)

    # first_level_xfms is only necessary because you otherwise have no access to the input file which is necessary
    # for merging with the input csv. lsq12_nlin_xfm can be used to merge, and rigid_xfm contains the input file.
    # If for some reason we want to output xfms in the future, just don't drop everything.
    first_level_xfms = pd.concat(
        list(
            first_level_results.build_model.apply(
                lambda x: x.xfms.assign(first_level_avg=x.avg_img))),
        ignore_index=True)[["lsq12_nlin_xfm", "rigid_xfm"]]
    if options.mbm.segmentation.run_maget:
        maget_df = pd.DataFrame([
            {
                "label_file": x.labels.path,
                "native_file": x.orig_path
            }  #, "_merge" : basename(x.orig_path)}
            for x in pd.concat([
                namespace.maget_result
                for namespace in first_level_results.build_model
            ])
        ])
        first_level_xfms = pd.merge(
            left=first_level_xfms.assign(native_file=lambda df: df.rigid_xfm.
                                         apply(lambda x: x.source.path)),
            right=maget_df,
            on="native_file")
    first_level_determinants = (pd.merge(left=first_level_determinants,
                                         right=first_level_xfms,
                                         left_on="inv_xfm",
                                         right_on="lsq12_nlin_xfm").drop(
                                             ["rigid_xfm", "lsq12_nlin_xfm"],
                                             axis=1))

    resampled_determinants = (pd.merge(
        left=first_level_determinants,
        right=pd.DataFrame({
            'group_xfm': second_level_results.xfms.overall_xfm
        }).assign(source=lambda df: df.group_xfm.apply(lambda r: r.source)),
        left_on="first_level_avg",
        right_on="source").assign(
            resampled_log_full_det=lambda df: defer(
                resample(img=df.log_full_det,
                         xfm=df.group_xfm.apply(lambda x: x.xfm),
                         like=second_level_results.avg_img)),
            resampled_log_nlin_det=lambda df: defer(
                resample(img=df.log_nlin_det,
                         xfm=df.group_xfm.apply(lambda x: x.xfm),
                         like=second_level_results.avg_img))))
    # TODO only resamples the log determinants, but still a bit ugly ... abstract somehow?
    # TODO shouldn't be called resampled_determinants since this is basically the whole (first_level) thing ...

    inverted_overall_xfms = [
        s.defer(invert_xfmhandler(xfm)) for xfm in overall_xfms
    ]

    overall_determinants = (s.defer(
        determinants_at_fwhms(
            xfms=inverted_overall_xfms,
            inv_xfms=overall_xfms,
            blur_fwhms=options.mbm.stats.stats_kernels)).assign(
                overall_log_full_det=lambda df: df.log_full_det,
                overall_log_nlin_det=lambda df: df.log_nlin_det).drop(
                    ['log_full_det', 'log_nlin_det'], axis=1))

    # TODO return some MAGeT stuff from two_level function ??
    # FIXME running MAGeT from within the `two_level` function has the same problem as running it from within `mbm`:
    # it will now run when this pipeline is called from within another one (e.g., n-level), which will be
    # redundant, create filename clashes, etc. -- this should be moved to `two_level_pipeline`.
    # TODO do we need a `pride of atlases` for MAGeT in this pipeline ??
    # TODO at the moment MAGeT is run within the MBM code, but it could be disabled there and run here
    #if options.mbm.segmentation.run_maget:
    #    maget_options = copy.deepcopy(options)
    #    maget_options.maget = options.mbm.maget
    #    fixup_maget_options(maget_options=maget_options.maget,
    #                        lsq12_options=maget_options.mbm.lsq12,
    #                        nlin_options=maget_options.mbm.nlin)
    #    maget_options.maget.maget.mask = maget_options.maget.maget.mask_only = False   # already done above
    #    del maget_options.mbm

    # again using a weird combination of vectorized and loop constructs ...
    #    s.defer(maget([xfm.resampled for _ix, m in first_level_results.iterrows()
    #                   for xfm in m.build_model.xfms.rigid_xfm],
    #                  options=maget_options,
    #                  prefix="%s_MAGeT" % options.application.pipeline_name,
    #                  output_dir=os.path.join(options.application.output_directory,
    #                                          options.application.pipeline_name + "_processed")))

    # TODO resampling to database model ...

    # TODO there should be one table containing all determinants (first level, overall, resampled first level) for each file
    # and another containing some groupwise information (averages and transforms to the common average)
    return Result(stages=s,
                  output=Namespace(
                      first_level_results=first_level_results,
                      resampled_determinants=resampled_determinants,
                      overall_determinants=overall_determinants))
Пример #2
0
def two_level(grouped_files_df, options : TwoLevelConf):
    """
    grouped_files_df - must contain 'group':<any comparable, sortable type> and 'file':MincAtom columns
    """  # TODO weird naming since the grouped_files_df isn't a GroupBy object?  just files_df?
    s = Stages()

    if grouped_files_df.isnull().values.any():
        raise ValueError("NaN values in input dataframe; can't go")

    if options.mbm.lsq6.target_type == TargetType.bootstrap:
        # won't work since the second level part tries to get the resolution of *its* "first input file", which
        # hasn't been created.  We could instead pass in a resolution to the `mbm` function,
        # but instead disable for now:
        raise ValueError("Bootstrap model building currently doesn't work with this pipeline; "
                         "just specify an initial target instead")
    elif options.mbm.lsq6.target_type == TargetType.pride_of_models:
        pride_of_models_mapping = get_pride_of_models_mapping(pride_csv=options.mbm.lsq6.target_file,
                                                              output_dir=options.application.output_directory,
                                                              pipeline_name=options.application.pipeline_name)

    # FIXME this is the same as in the 'tamarack' except for names of arguments/enclosing variables
    def group_options(options, group):
        options = copy.deepcopy(options)

        if options.mbm.lsq6.target_type == TargetType.pride_of_models:

            targets = get_closest_model_from_pride_of_models(pride_of_models_dict=pride_of_models_mapping,
                                                             time_point=group)

            options.mbm.lsq6 = options.mbm.lsq6.replace(target_type=TargetType.initial_model,
                                                        target_file=targets.registration_standard.path)
        else:
            # this will ensure that all groups have the same resolution -- is it necessary?
            targets = registration_targets(lsq6_conf=options.mbm.lsq6,
                                           app_conf=options.application,
                                           first_input_file=grouped_files_df.file.iloc[0])

        resolution = (options.registration.resolution
                        or get_resolution_from_file(targets.registration_standard.path))
        options.registration = options.registration.replace(resolution=resolution)
        # no need to check common space settings here since they're turned off at the parser level
        # (a bit strange)
        return options

    first_level_results = (
        grouped_files_df
        .groupby('group', as_index=False, sort=False)       # the usual annoying pattern to do a aggregate with access
        .aggregate({ 'file' : lambda files: list(files) })  # to the groupby object's keys ... TODO: fix
        .rename(columns={ 'file' : "files" })
        .assign(build_model=lambda df:
                              df.apply(axis=1,
                                       func=lambda row:
                                              s.defer(mbm(imgs=row.files,
                                                          options=group_options(options, row.group),
                                                          prefix="%s" % row.group,
                                                          output_dir=os.path.join(
                                                              options.application.output_directory,
                                                              options.application.pipeline_name + "_first_level",
                                                              "%s_processed" % row.group)))))
        )
    # TODO replace .assign(...apply(...)...) with just an apply, producing a series right away?

    # FIXME right now the same options set is being used for both levels -- use options.first/second_level
    second_level_options = copy.deepcopy(options)
    second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(run_lsq6=False)
    second_level_options.mbm.segmentation.run_maget = False
    second_level_options.mbm.maget.maget.mask_only = False
    second_level_options.mbm.maget.maget.mask = False

    # FIXME this is probably a hack -- instead add a --second-level-init-model option to specify which timepoint should be used
    # as the initial model in the second level ???  (at this point it doesn't matter due to lack of lsq6 ...)
    if second_level_options.mbm.lsq6.target_type == TargetType.pride_of_models:
        second_level_options.mbm.lsq6 = second_level_options.mbm.lsq6.replace(
            target_type=TargetType.target,  # target doesn't really matter as no lsq6 here, just used for resolution...
            target_file=list(pride_of_models_mapping.values())[0].registration_standard.path)

    # NOTE: running lsq6_nuc_inorm here doesn't work in general (but possibly with rotational minctracc)
    # since the native-space initial model is used, but our images are
    # already in standard space (as we resampled there after the 1st-level lsq6).
    # On the other hand, we might want to run it here (although of course NOT nuc/inorm!) in the future,
    # for instance given a 'pride' of models (one for each group).

    second_level_results = s.defer(mbm(imgs=first_level_results.build_model.map(lambda m: m.avg_img),
                                       options=second_level_options,
                                       prefix=os.path.join(options.application.output_directory,
                                                           options.application.pipeline_name + "_second_level")))

    # FIXME sadly, `mbm` doesn't return a pd.Series of xfms, so we don't have convenient indexing ...
    overall_xfms = [s.defer(concat_xfmhandlers([xfm_1, xfm_2]))
                    for xfms_1, xfm_2 in zip([r.xfms.lsq12_nlin_xfm for r in first_level_results.build_model],
                                             second_level_results.xfms.overall_xfm)
                    for xfm_1 in xfms_1]
    resample  = np.vectorize(mincresample_new, excluded={"extra_flags"})
    defer     = np.vectorize(s.defer)

    # TODO using the avg_img here is a bit clunky -- maybe better to propagate group indices ...
    # only necessary since `mbm` doesn't return DataFrames but namespaces ...
    first_level_determinants = pd.concat(list(first_level_results.build_model.apply(
                                                lambda x: x.determinants.assign(first_level_avg=x.avg_img))),
                                         ignore_index=True)

    resampled_determinants = (pd.merge(
        left=first_level_determinants,
        right=pd.DataFrame({'group_xfm' : second_level_results.xfms.overall_xfm})
              .assign(source=lambda df: df.group_xfm.apply(lambda r: r.source)),
        left_on="first_level_avg",
        right_on="source")
        .assign(resampled_log_full_det=lambda df: defer(resample(img=df.log_full_det,
                                                                 xfm=df.group_xfm.apply(lambda x: x.xfm),
                                                                 like=second_level_results.avg_img)),
                resampled_log_nlin_det=lambda df: defer(resample(img=df.log_nlin_det,
                                                                 xfm=df.group_xfm.apply(lambda x: x.xfm),
                                                                 like=second_level_results.avg_img))))
    # TODO only resamples the log determinants, but still a bit ugly ... abstract somehow?
    # TODO shouldn't be called resampled_determinants since this is basically the whole (first_level) thing ...

    inverted_overall_xfms = [s.defer(invert_xfmhandler(xfm)) for xfm in overall_xfms]

    overall_determinants = (s.defer(determinants_at_fwhms(
                                     xfms=inverted_overall_xfms,
                                     inv_xfms=overall_xfms,
                                     blur_fwhms=options.mbm.stats.stats_kernels))
                            .assign(overall_log_full_det=lambda df: df.log_full_det,
                                    overall_log_nlin_det=lambda df: df.log_nlin_det)
                            .drop(['log_full_det', 'log_nlin_det'], axis=1))

    # TODO return some MAGeT stuff from two_level function ??
    # FIXME running MAGeT from within the `two_level` function has the same problem as running it from within `mbm`:
    # it will now run when this pipeline is called from within another one (e.g., n-level), which will be
    # redundant, create filename clashes, etc. -- this should be moved to `two_level_pipeline`.
    # TODO do we need a `pride of atlases` for MAGeT in this pipeline ??
    # TODO at the moment MAGeT is run within the MBM code, but it could be disabled there and run here
    #if options.mbm.segmentation.run_maget:
    #    maget_options = copy.deepcopy(options)
    #    maget_options.maget = options.mbm.maget
    #    fixup_maget_options(maget_options=maget_options.maget,
    #                        lsq12_options=maget_options.mbm.lsq12,
    #                        nlin_options=maget_options.mbm.nlin)
    #    maget_options.maget.maget.mask = maget_options.maget.maget.mask_only = False   # already done above
    #    del maget_options.mbm

        # again using a weird combination of vectorized and loop constructs ...
    #    s.defer(maget([xfm.resampled for _ix, m in first_level_results.iterrows()
    #                   for xfm in m.build_model.xfms.rigid_xfm],
    #                  options=maget_options,
    #                  prefix="%s_MAGeT" % options.application.pipeline_name,
    #                  output_dir=os.path.join(options.application.output_directory,
    #                                          options.application.pipeline_name + "_processed")))

    # TODO resampling to database model ...

    # TODO there should be one table containing all determinants (first level, overall, resampled first level) for each file
    # and another containing some groupwise information (averages and transforms to the common average)
    return Result(stages=s, output=Namespace(first_level_results=first_level_results,
                                             resampled_determinants=resampled_determinants,
                                             overall_determinants=overall_determinants))
def tamarack(imgs : pd.DataFrame, options):
    # columns of the input df: `img` : MincAtom, `timept` : number, ...
    # columns of the pride of models : 'timept' : number, 'model' : MincAtom
    s = Stages()

    # TODO some assertions that the pride_of_models, if provided, is correct, and that this is intended target type

    def group_options(options, timepoint):
        options = copy.deepcopy(options)

        if options.mbm.lsq6.target_type == TargetType.pride_of_models:
            options = copy.deepcopy(options)
            targets = get_closest_model_from_pride_of_models(pride_of_models_dict=get_pride_of_models_mapping(
                                                                 pride_csv=options.mbm.lsq6.target_file,
                                                                 output_dir=options.application.output_directory,
                                                                 pipeline_name=options.application.pipeline_name),
                                                             time_point=timepoint)

            options.mbm.lsq6 = options.mbm.lsq6.replace(target_type=TargetType.initial_model,
                                                        target_file=targets.registration_standard.path)

        #    resolution = (options.registration.resolution
        #                  or get_resolution_from_file(targets.registration_standard.path))
        #    options.registration = options.registration.replace(resolution=resolution)

                                                        # FIXME use of registration_standard here is quite wrong ...
                                                        # part of the trouble is that mbm calls registration_targets itself,
                                                        # so we can't send this RegistrationTargets to `mbm` directly ...
                                                        # one option: add yet another optional arg to `mbm` ...
        else:
            targets = s.defer(registration_targets(lsq6_conf=options.mbm.lsq6,
                                           app_conf=options.application, reg_conf=options.registration,
                                           first_input_file=imgs.filename.iloc[0]))

        resolution = (options.registration.resolution or
                        get_resolution_from_file(targets.registration_standard.path))

        # This must happen after calling registration_targets otherwise it will resample to options.registration.resolution
        options.registration = options.registration.replace(resolution=resolution)

        return options

    # build all first-level models:
    first_level_results = (
        imgs  # TODO 'group' => 'timept' ?
        .groupby('group', as_index=False)       # the usual annoying pattern to do an aggregate with access
        .aggregate({ 'file' : lambda files: list(files) })  # to the groupby object's keys ... TODO: fix
        .rename(columns={ 'file' : "files" })
        .assign(options=lambda df: df.apply(axis=1, func=lambda row: group_options(options, row.group)))
        .assign(build_model=lambda df:
                              df.apply(axis=1,
                                       func=lambda row: s.defer(
                                           mbm(imgs=row.files,
                                               options=row.options,
                                               prefix="%s" % row.group,
                                               output_dir=os.path.join(
                                               options.application.output_directory,
                                               options.application.pipeline_name + "_first_level",
                                               "%s_processed" % row.group)))))
        .sort_values(by='group')

        )

    if all(first_level_results.options.map(lambda opts: opts.registration.resolution)
             == first_level_results.options.iloc[0].registration.resolution):
        options.registration = options.registration.replace(
            resolution=first_level_results.options.iloc[0].registration.resolution)
    else:
        raise ValueError("some first-level models are run at different resolutions, possibly not what you want ...")

    # construction of the overall inter-average transforms will be done iteratively (for efficiency/aesthetics),
    # which doesn't really fit the DataFrame mold ...


    full_hierarchy = get_nonlinear_configuration_from_options(
      nlin_protocol=options.mbm.nlin.nlin_protocol,
      reg_method=options.mbm.nlin.reg_method,
      file_resolution=options.registration.resolution)

    # FIXME no good can come of this
    nlin_protocol = full_hierarchy.confs[-1] if isinstance(full_hierarchy, MultilevelANTSConf) else full_hierarchy
    # first register consecutive averages together:
    average_registrations = (
        first_level_results[:-1]
            .assign(next_model=list(first_level_results[1:].build_model))
            # TODO: we should be able to do lsq6 registration here as well!
            .assign(xfm=lambda df: df.apply(axis=1, func=lambda row: s.defer(
                                                      lsq12_nlin(source=row.build_model.avg_img,
                                                                 target=row.next_model.avg_img,
                                                                 lsq12_conf=get_linear_configuration_from_options(
                                                                     options.mbm.lsq12,
                                                                     transform_type=LinearTransType.lsq12,
                                                                     file_resolution=options.registration.resolution),
                                                                 nlin_conf=nlin_protocol)))))

    # now compose the above transforms to produce transforms from each average to the common average:
    common_time_pt = options.tamarack.common_time_pt
    common_model   = first_level_results[first_level_results.group == common_time_pt].iloc[0].build_model.avg_img
    #common = average_registrations[average_registrations.group == common_time_pt].iloc[0]
    before = average_registrations[average_registrations.group <  common_time_pt]  # asymmetry in before/after since
    after  = average_registrations[average_registrations.group >= common_time_pt]  # we used `next_`, not `previous_`

    # compose 1st and 2nd level transforms and resample into the common average space:
    def suffixes(xs):
        if len(xs) == 0:
            return [[]]
        else:
            ys = suffixes(xs[1:])
            return [[xs[0]] + ys[0]] + ys


    def prefixes(xs):
        if len(xs) == 0:
            return [[]]
        else:
            ys = prefixes(xs[1:])
            return ys + [ys[-1] + [xs[0]]]

    xfms_to_common = (
        first_level_results
        .assign(uncomposed_xfms=suffixes(list(before.xfm))[:-1] + [None] + prefixes(list(after.xfm))[1:])
        .assign(xfm_to_common=lambda df: df.apply(axis=1, func=lambda row:
                                ((lambda x: s.defer(invert_xfmhandler(x)) if row.group >= common_time_pt else x)
                                   (s.defer(concat_xfmhandlers(row.uncomposed_xfms,
                                                               name=("%s_to_common"
                                                                     if row.group < common_time_pt
                                                                     else "%s_from_common") % row.group))))
                                  if row.uncomposed_xfms is not None else None))
        .drop('uncomposed_xfms', axis=1))  # TODO None => identity??

    # TODO indexing here is not good ...
    first_level_determinants = pd.concat(list(first_level_results.build_model.apply(
                                                lambda x: x.determinants.assign(first_level_avg=x.avg_img))),
                                         ignore_index=True)

    resampled_determinants = (
        pd.merge(left=first_level_determinants,
                 right=xfms_to_common.assign(source=lambda df: df.xfm_to_common.apply(
                                                              lambda x:
                                                                x.source if x is not None else None)),
                 left_on="first_level_avg", right_on='source')
        .assign(resampled_log_full_det=lambda df: df.apply(axis=1, func=lambda row:
                                         s.defer(mincresample_new(img=row.log_full_det,
                                                                  xfm=row.xfm_to_common.xfm,
                                                                  like=common_model))
                                                 if row.xfm_to_common is not None else row.img),
                resampled_log_nlin_det=lambda df: df.apply(axis=1, func=lambda row:
                                         s.defer(mincresample_new(img=row.log_nlin_det,
                                                                  xfm=row.xfm_to_common.xfm,
                                                                  like=common_model))
                                                 if row.xfm_to_common is not None else row.img))
    )

    inverted_overall_xfms = pd.Series({ xfm : (s.defer(concat_xfmhandlers([xfm, row.xfm_to_common]))
                                                 if row.xfm_to_common is not None else xfm)
                                        for _ix, row in xfms_to_common.iterrows()
                                        for xfm in row.build_model.xfms.lsq12_nlin_xfm })

    overall_xfms = inverted_overall_xfms.apply(lambda x: s.defer(invert_xfmhandler(x)))

    overall_determinants = determinants_at_fwhms(xfms=overall_xfms,
                                                 blur_fwhms=options.mbm.stats.stats_kernels,
                                                 inv_xfms=inverted_overall_xfms)


    # TODO turn off bootstrap as with two-level code?

    # TODO combine into one data frame
    return Result(stages=s, output=Namespace(first_level_results=first_level_results,
                                             overall_determinants=overall_determinants,
                                             resampled_determinants=resampled_determinants.drop(
                                                 ['options'],
                                                 axis=1)))
Пример #4
0
def tamarack(imgs: pd.DataFrame, options):
    # columns of the input df: `img` : MincAtom, `timept` : number, ...
    # columns of the pride of models : 'timept' : number, 'model' : MincAtom
    s = Stages()

    # TODO some assertions that the pride_of_models, if provided, is correct, and that this is intended target type

    def group_options(options, timepoint):
        options = copy.deepcopy(options)

        if options.mbm.lsq6.target_type == TargetType.pride_of_models:
            options = copy.deepcopy(options)
            targets = get_closest_model_from_pride_of_models(
                pride_of_models_dict=get_pride_of_models_mapping(
                    pride_csv=options.mbm.lsq6.target_file,
                    output_dir=options.application.output_directory,
                    pipeline_name=options.application.pipeline_name),
                time_point=timepoint)

            options.mbm.lsq6 = options.mbm.lsq6.replace(
                target_type=TargetType.initial_model,
                target_file=targets.registration_standard.path)

        #    resolution = (options.registration.resolution
        #                  or get_resolution_from_file(targets.registration_standard.path))
        #    options.registration = options.registration.replace(resolution=resolution)

        # FIXME use of registration_standard here is quite wrong ...
        # part of the trouble is that mbm calls registration_targets itself,
        # so we can't send this RegistrationTargets to `mbm` directly ...
        # one option: add yet another optional arg to `mbm` ...
        else:
            targets = s.defer(
                registration_targets(lsq6_conf=options.mbm.lsq6,
                                     app_conf=options.application,
                                     reg_conf=options.registration,
                                     first_input_file=imgs.filename.iloc[0]))

        resolution = (options.registration.resolution
                      or get_resolution_from_file(
                          targets.registration_standard.path))

        # This must happen after calling registration_targets otherwise it will resample to options.registration.resolution
        options.registration = options.registration.replace(
            resolution=resolution)

        return options

    # build all first-level models:
    first_level_results = (
        imgs  # TODO 'group' => 'timept' ?
        .groupby('group', as_index=False
                 )  # the usual annoying pattern to do an aggregate with access
        .aggregate({'file': lambda files: list(files)}
                   )  # to the groupby object's keys ... TODO: fix
        .rename(columns={
            'file': "files"
        }).assign(options=lambda df: df.apply(
            axis=1, func=lambda row: group_options(options, row.group))
                  ).assign(build_model=lambda df: df.apply(
                      axis=1,
                      func=lambda row: s.defer(
                          mbm(imgs=row.files,
                              options=row.options,
                              prefix="%s" % row.group,
                              output_dir=os.path.join(
                                  options.application.output_directory, options
                                  .application.pipeline_name + "_first_level",
                                  "%s_processed" % row.group))))
                           ).sort_values(by='group'))

    if all(
            first_level_results.options.map(
                lambda opts: opts.registration.resolution) ==
            first_level_results.options.iloc[0].registration.resolution):
        options.registration = options.registration.replace(
            resolution=first_level_results.options.iloc[0].registration.
            resolution)
    else:
        raise ValueError(
            "some first-level models are run at different resolutions, possibly not what you want ..."
        )

    # construction of the overall inter-average transforms will be done iteratively (for efficiency/aesthetics),
    # which doesn't really fit the DataFrame mold ...

    full_hierarchy = get_nonlinear_configuration_from_options(
        nlin_protocol=options.mbm.nlin.nlin_protocol,
        reg_method=options.mbm.nlin.reg_method,
        file_resolution=options.registration.resolution)

    # FIXME no good can come of this
    nlin_protocol = full_hierarchy.confs[-1] if isinstance(
        full_hierarchy, MultilevelANTSConf) else full_hierarchy
    # first register consecutive averages together:
    average_registrations = (
        first_level_results[:-1].assign(
            next_model=list(first_level_results[1:].build_model))
        # TODO: we should be able to do lsq6 registration here as well!
        .assign(xfm=lambda df: df.apply(
            axis=1,
            func=lambda row: s.defer(
                lsq12_nlin(source=row.build_model.avg_img,
                           target=row.next_model.avg_img,
                           lsq12_conf=get_linear_configuration_from_options(
                               options.mbm.lsq12,
                               transform_type=LinearTransType.lsq12,
                               file_resolution=options.registration.resolution
                           ),
                           nlin_conf=nlin_protocol)))))

    # now compose the above transforms to produce transforms from each average to the common average:
    common_time_pt = options.tamarack.common_time_pt
    common_model = first_level_results[
        first_level_results.group ==
        common_time_pt].iloc[0].build_model.avg_img
    #common = average_registrations[average_registrations.group == common_time_pt].iloc[0]
    before = average_registrations[
        average_registrations.group <
        common_time_pt]  # asymmetry in before/after since
    after = average_registrations[
        average_registrations.group >=
        common_time_pt]  # we used `next_`, not `previous_`

    # compose 1st and 2nd level transforms and resample into the common average space:
    def suffixes(xs):
        if len(xs) == 0:
            return [[]]
        else:
            ys = suffixes(xs[1:])
            return [[xs[0]] + ys[0]] + ys

    def prefixes(xs):
        if len(xs) == 0:
            return [[]]
        else:
            ys = prefixes(xs[1:])
            return ys + [ys[-1] + [xs[0]]]

    xfms_to_common = (first_level_results.assign(
        uncomposed_xfms=suffixes(list(before.xfm))[:-1] + [None] +
        prefixes(list(after.xfm))[1:]).assign(
            xfm_to_common=lambda df: df.apply(
                axis=1,
                func=lambda row: ((lambda x: s.defer(invert_xfmhandler(
                    x)) if row.group >= common_time_pt else x)(s.defer(
                        concat_xfmhandlers(
                            row.uncomposed_xfms,
                            name=("%s_to_common" if row.group < common_time_pt
                                  else "%s_from_common") % row.group))))
                if row.uncomposed_xfms is not None else None)).drop(
                    'uncomposed_xfms', axis=1))  # TODO None => identity??

    # TODO indexing here is not good ...
    first_level_determinants = pd.concat(list(
        first_level_results.build_model.apply(
            lambda x: x.determinants.assign(first_level_avg=x.avg_img))),
                                         ignore_index=True)

    resampled_determinants = (pd.merge(
        left=first_level_determinants,
        right=xfms_to_common.assign(source=lambda df: df.xfm_to_common.apply(
            lambda x: x.source if x is not None else None)),
        left_on="first_level_avg",
        right_on='source').assign(
            resampled_log_full_det=lambda df: df.apply(
                axis=1,
                func=lambda row: s.defer(
                    mincresample_new(img=row.log_full_det,
                                     xfm=row.xfm_to_common.xfm,
                                     like=common_model))
                if row.xfm_to_common is not None else row.img),
            resampled_log_nlin_det=lambda df: df.apply(
                axis=1,
                func=lambda row: s.defer(
                    mincresample_new(img=row.log_nlin_det,
                                     xfm=row.xfm_to_common.xfm,
                                     like=common_model))
                if row.xfm_to_common is not None else row.img)))

    inverted_overall_xfms = pd.Series({
        xfm: (s.defer(concat_xfmhandlers([xfm, row.xfm_to_common]))
              if row.xfm_to_common is not None else xfm)
        for _ix, row in xfms_to_common.iterrows()
        for xfm in row.build_model.xfms.lsq12_nlin_xfm
    })

    overall_xfms = inverted_overall_xfms.apply(
        lambda x: s.defer(invert_xfmhandler(x)))

    overall_determinants = determinants_at_fwhms(
        xfms=overall_xfms,
        blur_fwhms=options.mbm.stats.stats_kernels,
        inv_xfms=inverted_overall_xfms)

    # TODO turn off bootstrap as with two-level code?

    # TODO combine into one data frame
    return Result(stages=s,
                  output=Namespace(
                      first_level_results=first_level_results,
                      overall_determinants=overall_determinants,
                      resampled_determinants=resampled_determinants.drop(
                          ['options'], axis=1)))
Пример #5
0
def tissue_vision_pipeline(options):
    output_dir = options.application.output_directory
    pipeline_name = options.application.pipeline_name

    csv = original_csv = get_imgs(options.application)
    # check_MINC_input_files([img.path for img in imgs])

    s = Stages()

    s.defer(create_quality_control_images(imgs=csv['anatomical_MincAtom'].tolist(), montage_dir=output_dir,
                                          montage_output=os.path.join(output_dir, pipeline_name + "_resampled",
                                                                      "input_montage"),
                                          auto_range=True,
                                          message="input_mincs"))
#############################
# Step 1: Run MBM.py to create a consensus average
#############################
    mbm_result = s.defer(mbm(imgs=csv['anatomical_MincAtom'].tolist(), options=options,
                             prefix=options.application.pipeline_name,
                             output_dir=output_dir,
                             with_maget=False))

    #TODO remove
    transforms = mbm_result.xfms.assign(native_file=lambda df: df.rigid_xfm.apply(lambda x: x.source),
                            lsq6_file=lambda df: df.lsq12_nlin_xfm.apply(lambda x: x.source),
                            lsq6_mask_file=lambda df:
                              df.lsq12_nlin_xfm.apply(lambda x: x.source.mask if x.source.mask else ""),
                            nlin_file=lambda df: df.lsq12_nlin_xfm.apply(lambda x: x.resampled),
                            nlin_mask_file=lambda df:
                              df.lsq12_nlin_xfm.apply(lambda x: x.resampled.mask if x.resampled.mask else ""))\
        .applymap(maybe_deref_path)
    determinants = mbm_result.determinants.drop(["full_det", "nlin_det"], axis=1)\
        .applymap(maybe_deref_path)

    csv = csv.assign(anatomical_lsq6_MincAtom=mbm_result.xfms.lsq12_nlin_xfm.apply(lambda xfm: xfm.source),
                     mbm_lsq6_XfmAtom=mbm_result.xfms.rigid_xfm.apply(lambda x: x.xfm),
                     mbm_lsq12_nlin_XfmAtom=mbm_result.xfms.lsq12_nlin_xfm.apply(lambda x: x.xfm),
                     mbm_full_XfmAtom=mbm_result.xfms.overall_xfm.apply(lambda x: x.xfm))

    # x.assign(count_lsq6_MincAtom=lambda df: [x + y for x, y in zip(df["x"], df["y"])])
    csv = csv.assign(count_lsq6_MincAtom = lambda df:
    [s.defer(mincresample_new(img = img,
                              xfm = xfm,
                              like = like))
     for img, xfm, like in zip(df["count_MincAtom"],
                               df["mbm_lsq6_XfmAtom"],
                               df["anatomical_lsq6_MincAtom"])])


#############################
# Step 2: Register consensus average to ABI tissuevision Atlas
#############################
    lsq12_conf = get_linear_configuration_from_options(conf=options.mbm.lsq12,
                                                       transform_type=LinearTransType.lsq12,
                                                       file_resolution=options.registration.resolution)
    nlin_component = get_nonlinear_component(reg_method=options.mbm.nlin.reg_method)

    atlas_target = MincAtom(name=options.consensus_to_atlas.atlas_target,
                            orig_name=options.consensus_to_atlas.atlas_target,
                            mask=MincAtom(name=options.consensus_to_atlas.atlas_target_mask,
                                          orig_name=options.consensus_to_atlas.atlas_target_mask))
    atlas_target_label = MincAtom(name=options.consensus_to_atlas.atlas_target_label,
                                  orig_name=options.consensus_to_atlas.atlas_target_label,
                                  mask=MincAtom(name=options.consensus_to_atlas.atlas_target_mask,
                                                orig_name=options.consensus_to_atlas.atlas_target_mask))

    lsq12_nlin_result = s.defer(lsq12_nlin(source=mbm_result.avg_img,
                                           target=atlas_target,
                                           lsq12_conf=lsq12_conf,
                                           nlin_module=nlin_component,
                                           nlin_options=options.mbm.nlin.nlin_protocol,
                                           resolution=options.registration.resolution,
                                           resample_source=False
                                           ))

#############################
# Step 3: Resample count volumes to ABI tissuevision Atlas space and vice versa
#############################

    csv = csv.assign(lsq6_to_atlas_XfmAtom = lambda df: df['mbm_lsq12_nlin_XfmAtom'].apply(lambda xfm:
                            s.defer(xfmconcat([xfm, lsq12_nlin_result.xfm]))))

    csv = csv.assign(
        anatomical_targetspace_MincAtom=lambda df:
        [s.defer(mincresample_new(img=img, xfm=xfm, like=atlas_target))
         for img, xfm in zip(df["anatomical_lsq6_MincAtom"], df["lsq6_to_atlas_XfmAtom"])],
        count_targetspace_MincAtom=lambda df:
        [s.defer(mincresample_new(img=img, xfm=xfm, like=atlas_target))
         for img, xfm in zip(df["count_lsq6_MincAtom"], df["lsq6_to_atlas_XfmAtom"])],
        atlas_lsq6space_MincAtom=lambda df:
        [s.defer(mincresample_new(img=atlas_target_label, xfm=xfm, like=like, invert=True,
                                  interpolation=Interpolation.nearest_neighbour,
                                  extra_flags=('-keep_real_range',)))
         for xfm, like in zip( df["lsq6_to_atlas_XfmAtom"], df["count_lsq6_MincAtom"])]
    )

    csv.applymap(maybe_deref_path).to_csv("analysis.csv",index=False)

    s.defer(create_quality_control_images(imgs=csv.count_targetspace_MincAtom.tolist(), montage_dir=output_dir,
                                          montage_output=os.path.join(output_dir, pipeline_name + "_resampled",
                                                                      "count_montage"),
                                          auto_range=True,
                                          message="count_mincs"))
    return Result(stages=s, output=())