Пример #1
0
def test_pandas_fill_crumbs(tmp_tree):

    tmp_crumb = tmp_tree[0]

    recs = tmp_crumb.values_map('image')
    df = pd.DataFrame.from_dict(valuesmap_to_dict(recs))

    df_crumbs = list(pandas_fill_crumbs(df, tmp_crumb))
    uf_crumbs = tmp_crumb.unfold()

    assert df_crumbs == uf_crumbs
Пример #2
0
def test_values_map_from_df(tmp_tree):
    tmp_crumb = tmp_tree[0]

    recs = tmp_crumb.values_map('image')

    adict = valuesmap_to_dict(recs)

    df1 = pd.DataFrame.from_records([dict(rec) for rec in recs])
    df2 = pd.DataFrame.from_dict(adict)
    assert all(df1 == df2)

    assert all(pd.DataFrame.from_dict   ([dict(rec) for rec in recs]) ==
               pd.DataFrame.from_records([dict(rec) for rec in recs]))
Пример #3
0
def test_valuesmap_to_dict(tmp_tree):
    tmp_crumb = tmp_tree[0]
    values_dict = tmp_tree[1]

    recs = tmp_crumb.values_map("image")
    n_recs = len(recs)

    dicts = valuesmap_to_dict(recs)
    for arg_name in dicts:
        assert len(dicts[arg_name]) == n_recs

    assert values_dict == {arg_name: rm_dups(arg_values) for arg_name, arg_values in dicts.items()}

    key_subset = ["subject_id", "session_id"]
    dicts2 = append_dict_values([dict(rec) for rec in recs], keys=key_subset)

    for key in key_subset:
        assert dicts2[key] == dicts[key]

    assert tmp_crumb.values_map("image") == tmp_crumb.values_map()
Пример #4
0
def crumb_wf(work_dir, data_crumb, output_dir, file_templates,
             wf_name="main_workflow"):
    """ Creates a workflow with the `subject_session_file` input nodes and an empty `datasink`.
    The 'datasink' must be connected afterwards in order to work.

    Parameters
    ----------
    work_dir: str
        Path to the workflow temporary folder

    data_crumb: hansel.Crumb
        The crumb until the subject files.
        Example: Crumb('/home/hansel/data/{subject_id}/{session_id}/{modality}/{image_file})

    output_dir: str
        Path to where the datasink will leave the results.

    file_templates: Dict[str -> list of 2-tuple]
        Maps of crumb argument values to specify each file in the `data_crumb`.
        Example: {'anat': [('modality', 'anat'), ('image_file', 'anat_hc.nii.gz')],
                  'pet':  [('modality', 'pet'),  ('image_file', 'pet_fdg.nii.gz')],
                 }

    wf_name: str
        Name of the main workflow

    Returns
    -------
    wf: Workflow
    """
    # create the root workflow
    wf = pe.Workflow(name=wf_name, base_dir=work_dir)

    # datasink
    datasink = pe.Node(DataSink(parameterization=False,
                                base_directory=output_dir,),
                       name="datasink")

    # input workflow
    # (work_dir, data_crumb, crumb_arg_values, files_crumb_args, wf_name="input_files"):
    select_files = pe.Node(DataCrumb(crumb=data_crumb,
                                     templates=file_templates,
                                     raise_on_empty=False),
                           name='selectfiles')

    # basic file name substitutions for the datasink
    undef_args = select_files.interface._infields
    substitutions = [(name, "") for name in undef_args]
    substitutions.append(("__", "_"))

    datasink.inputs.substitutions = extend_trait_list(datasink.inputs.substitutions,
                                                      substitutions)

    # Infosource - the information source that iterates over crumb values map from the filesystem
    infosource = pe.Node(interface=IdentityInterface(fields=undef_args), name="infosrc")
    infosource.iterables = list(valuesmap_to_dict(joint_value_map(data_crumb, undef_args)).items())
    infosource.synchronize = True

    # connect the input_wf to the datasink
    joinpath = pe.Node(joinstrings(len(undef_args)), name='joinpath')

    # Connect the infosrc node to the datasink
    input_joins = [(name, 'arg{}'.format(arg_no+1))
                   for arg_no, name in enumerate(undef_args)]

    wf.connect([
                (infosource,   select_files, [(field, field) for field in undef_args]),
                (select_files, joinpath,     input_joins),
                (joinpath,     datasink,     [("out", "container")]),
               ],
              )

    return wf
Пример #5
0
def crumb_wf(work_dir,
             data_crumb,
             output_dir,
             file_templates,
             wf_name="main_workflow"):
    """ Creates a workflow with the `subject_session_file` input nodes and an empty `datasink`.
    The 'datasink' must be connected afterwards in order to work.

    Parameters
    ----------
    work_dir: str
        Path to the workflow temporary folder

    data_crumb: hansel.Crumb
        The crumb until the subject files.
        Example: Crumb('/home/hansel/data/{subject_id}/{session_id}/{modality}/{image_file})

    output_dir: str
        Path to where the datasink will leave the results.

    file_templates: Dict[str -> list of 2-tuple]
        Maps of crumb argument values to specify each file in the `data_crumb`.
        Example: {'anat': [('modality', 'anat'), ('image_file', 'anat_hc.nii.gz')],
                  'pet':  [('modality', 'pet'),  ('image_file', 'pet_fdg.nii.gz')],
                 }

    wf_name: str
        Name of the main workflow

    Returns
    -------
    wf: Workflow
    """
    # create the root workflow
    wf = pe.Workflow(name=wf_name, base_dir=work_dir)

    # datasink
    datasink = pe.Node(DataSink(
        parameterization=False,
        base_directory=output_dir,
    ),
                       name="datasink")

    # input workflow
    # (work_dir, data_crumb, crumb_arg_values, files_crumb_args, wf_name="input_files"):
    select_files = pe.Node(DataCrumb(crumb=data_crumb,
                                     templates=file_templates,
                                     raise_on_empty=False),
                           name='selectfiles')

    # basic file name substitutions for the datasink
    undef_args = select_files.interface._infields
    substitutions = [(name, "") for name in undef_args]
    substitutions.append(("__", "_"))

    datasink.inputs.substitutions = extend_trait_list(
        datasink.inputs.substitutions, substitutions)

    # Infosource - the information source that iterates over crumb values map from the filesystem
    infosource = pe.Node(interface=IdentityInterface(fields=undef_args),
                         name="infosrc")
    infosource.iterables = list(
        valuesmap_to_dict(joint_value_map(data_crumb, undef_args)).items())
    infosource.synchronize = True

    # connect the input_wf to the datasink
    joinpath = pe.Node(joinstrings(len(undef_args)), name='joinpath')

    # Connect the infosrc node to the datasink
    input_joins = [(name, 'arg{}'.format(arg_no + 1))
                   for arg_no, name in enumerate(undef_args)]

    wf.connect([
        (infosource, select_files, [(field, field) for field in undef_args]),
        (select_files, joinpath, input_joins),
        (joinpath, datasink, [("out", "container")]),
    ], )

    return wf