Exemplo n.º 1
0
def test_vox2mm():
    """Test vox2mm."""
    test = np.array([[20, 20, 20], [0, 0, 0]])
    true = np.array([[-50.0, -86.0, -32.0], [-90.0, -126.0, -72.0]])
    img = utils.get_template(space="mni152_2mm", mask=None)
    aff = img.affine
    assert np.array_equal(utils.vox2mm(test, aff), true)
Exemplo n.º 2
0
def test_mm2vox():
    """Test mm2vox."""
    test = np.array([[20, 20, 20], [0, 0, 0]])
    true = np.array([[55.0, 73.0, 46.0], [45.0, 63.0, 36.0]])
    img = utils.get_template(space="mni152_2mm", mask=None)
    aff = img.affine
    assert np.array_equal(utils.mm2vox(test, aff), true)
Exemplo n.º 3
0
def test_mm2vox():
    """
    Test mm2vox
    """
    test = np.array([[20, 20, 20], [0, 0, 0]])
    true = np.array([[35., 73., 46.], [45., 63., 36.]])
    img = utils.get_template(space='mni152_2mm', mask=None)
    aff = img.affine
    assert np.array_equal(utils.mm2vox(test, aff), true)
Exemplo n.º 4
0
def testdata2():
    mask_img = get_template(space='mni152_2mm', mask='brain')
    df = pd.DataFrame(columns=['id', 'x', 'y', 'z', 'n', 'space'],
                      data=[[1, -28, -20, -16, 20, 'mni'],
                            [2, -28, -20, -16, 5, 'mni']])
    xyz = df[['x', 'y', 'z']].values
    ijk = pd.DataFrame(mm2vox(xyz, mask_img.affine), columns=['i', 'j', 'k'])
    df = pd.concat([df, ijk], axis=1)

    dset = DummyDataset(df, mask_img)
    return dset
Exemplo n.º 5
0
def test_kernel_peaks(testdata_cbma, tmp_path_factory, kern, res, param,
                      return_type, kwargs):
    """Peak/COMs of kernel maps should match the foci fed in (assuming focus isn't masked out).

    Notes
    -----
    Remember that dataframe --> dataset won't work.
    Only testing dataset --> dataset with ALEKernel because it takes a while.
    Test on multiple template resolutions.
    """
    tmpdir = tmp_path_factory.mktemp("test_kernel_peaks")
    testdata_cbma.update_path(tmpdir)

    id_ = "pain_03.nidm-1"

    template = get_template(space=f"mni152_{res}mm", mask="brain")
    masker = get_masker(template)

    xyz = testdata_cbma.coordinates.loc[testdata_cbma.coordinates["id"] == id_,
                                        ["x", "y", "z"]]
    ijk = mm2vox(xyz, masker.mask_img.affine)
    ijk = np.squeeze(ijk.astype(int))

    if param == "dataframe":
        input_ = testdata_cbma.coordinates.copy()
    elif param == "dataset":
        input_ = testdata_cbma.copy()

    kern_instance = kern(**kwargs)
    output = kern_instance.transform(input_, masker, return_type=return_type)

    if return_type == "image":
        kern_data = output[0].get_fdata()
    elif return_type == "array":
        kern_data = np.squeeze(
            masker.inverse_transform(output[:1, :]).get_fdata())
    else:
        f = output.images.loc[output.images["id"] == id_,
                              kern_instance.image_type].values[0]
        kern_data = nib.load(f).get_fdata()

    if isinstance(kern_instance, kernel.ALEKernel):
        loc_idx = np.array(np.where(kern_data == np.max(kern_data))).T
    elif isinstance(kern_instance, (kernel.MKDAKernel, kernel.KDAKernel)):
        loc_idx = np.array(center_of_mass(kern_data)).astype(int).T
    else:
        raise Exception(f"A {type(kern_instance)}? Why?")

    loc_ijk = np.squeeze(loc_idx)

    assert np.array_equal(ijk, loc_ijk)
Exemplo n.º 6
0
def cbma_testdata3():
    """
    Reduced dataset for SCALE test.
    """
    mask_img = get_template(space='mni152_2mm', mask='brain')
    mask_img = nib.Nifti1Image(np.ones((10, 10, 10), int), mask_img.affine)
    df = pd.DataFrame(columns=['id', 'x', 'y', 'z', 'n', 'space'],
                      data=[[1, -28, -20, -16, 100, 'mni'],
                            [2, -28, -20, -16, 100, 'mni'],
                            [3, -28, -20, -16, 100, 'mni']])
    xyz = df[['x', 'y', 'z']].values
    ijk = pd.DataFrame(mm2vox(xyz, mask_img.affine), columns=['i', 'j', 'k'])
    df = pd.concat([df, ijk], axis=1)

    dset = DummyDataset(df, mask_img)
    pytest.cbma_testdata3 = dset
Exemplo n.º 7
0
def test_get_template():
    """Test nimare.utils.get_template."""
    img = utils.get_template(space="mni152_1mm", mask=None)
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="mni152_1mm", mask="brain")
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="mni152_1mm", mask="gm")
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="mni152_2mm", mask=None)
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="mni152_2mm", mask="brain")
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="mni152_2mm", mask="gm")
    assert isinstance(img, nib.Nifti1Image)
Exemplo n.º 8
0
def cbma_testdata1():
    mask_img = get_template(space='mni152_2mm', mask='brain')
    df = pd.DataFrame(columns=['id', 'x', 'y', 'z', 'n', 'space'],
                      data=[[1, -28, -20, -16, 100, 'mni'],
                            [2, -28, -20, -16, 100, 'mni'],
                            [3, -28, -20, -16, 100, 'mni'],
                            [4, -28, -20, -16, 100, 'mni'],
                            [5, -28, -20, -16, 100, 'mni'],
                            [6, -28, -20, -16, 100, 'mni'],
                            [7, -28, -20, -16, 100, 'mni'],
                            [8, -28, -20, -16, 100, 'mni'],
                            [9, -28, -20, -16, 100, 'mni'],
                            [10, -28, -20, -16, 100, 'mni'],
                            [11, -28, -20, -16, 100, 'mni']])
    xyz = df[['x', 'y', 'z']].values
    ijk = pd.DataFrame(mm2vox(xyz, mask_img.affine), columns=['i', 'j', 'k'])
    df = pd.concat([df, ijk], axis=1)

    dset = DummyDataset(df, mask_img)
    pytest.cbma_testdata1 = dset
Exemplo n.º 9
0
def test_get_template():
    """
    Test nimare.utils.utils.get_template.
    """
    img = utils.get_template(space='mni152_1mm', mask=None)
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space='mni152_1mm', mask='brain')
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space='mni152_1mm', mask='gm')
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space='mni152_2mm', mask=None)
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space='mni152_2mm', mask='brain')
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space='mni152_2mm', mask='gm')
    assert isinstance(img, nib.Nifti1Image)
Exemplo n.º 10
0
    def __init__(self, source, target="mni152_2mm", mask=None):
        if isinstance(source, str):
            with open(source, "r") as f_obj:
                data = json.load(f_obj)
        elif isinstance(source, dict):
            data = source
        else:
            raise Exception("`source` needs to be a file path or a dictionary")

        # Datasets are organized by study, then experiment
        # To generate unique IDs, we combine study ID with experiment ID
        # build list of ids
        id_columns = ["id", "study_id", "contrast_id"]
        all_ids = []
        for pid in data.keys():
            for expid in data[pid]["contrasts"].keys():
                id_ = f"{pid}-{expid}"
                all_ids.append([id_, pid, expid])
        id_df = pd.DataFrame(columns=id_columns, data=all_ids)
        id_df = id_df.set_index("id", drop=False)
        self._ids = id_df.index.values

        # Set up Masker
        if mask is None:
            mask = get_template(target, mask="brain")
        self.masker = mask
        self.space = target

        self.annotations = _dict_to_df(id_df, data, key="labels")
        self.coordinates = _dict_to_coordinates(data,
                                                masker=self.masker,
                                                space=self.space)
        self.images = _dict_to_df(id_df, data, key="images")
        self.metadata = _dict_to_df(id_df, data, key="metadata")
        self.texts = _dict_to_df(id_df, data, key="text")
        self.basepath = None
Exemplo n.º 11
0
    def __init__(
        self,
        count_df,
        coordinates_df,
        mask="mni152_2mm",
        n_topics=100,
        n_regions=2,
        symmetric=True,
        alpha=0.1,
        beta=0.01,
        gamma=0.01,
        delta=1.0,
        dobs=25,
        roi_size=50.0,
        seed_init=1,
    ):
        LGR.info("Constructing/Initializing GCLDA Model")
        count_df = count_df.copy()
        coordinates_df = coordinates_df.copy()

        # Check IDs from DataFrames
        count_df.index = count_df.index.astype(str)
        count_df["id"] = count_df.index
        count_ids = count_df.index.tolist()
        if "id" not in coordinates_df.columns:
            coordinates_df["id"] = coordinates_df.index
        coordinates_df["id"] = coordinates_df["id"].astype(str)
        coord_ids = sorted(list(set(coordinates_df["id"].tolist())))
        ids = sorted(list(set(count_ids).intersection(coord_ids)))
        if len(count_ids) != len(coord_ids) != len(ids):
            union_ids = sorted(list(set(count_ids + coord_ids)))
            LGR.warning(
                f"IDs mismatch detected: retaining {len(ids)} of {len(union_ids)} unique IDs"
            )
        self.ids = ids

        # Reduce inputs based on shared IDs
        count_df = count_df.loc[count_df["id"].isin(ids)]
        coordinates_df = coordinates_df.loc[coordinates_df["id"].isin(ids)]

        # --- Checking to make sure parameters are valid
        if (symmetric is True) and (n_regions % 2 != 0):
            # symmetric model only valid if R = 2
            raise ValueError("Cannot run a symmetric model unless n_regions is even.")

        # Initialize sampling parameters
        # The global sampling iteration of the model
        self.iter = 0
        # Current random seed (is incremented after initialization and each sampling update)
        self.seed = 0

        # Set up model hyperparameters
        # Pseudo-count hyperparams need to be floats so that when sampling
        # distributions are computed the count matrices/vectors are converted
        # to floats
        self.params = {
            "n_topics": n_topics,  # Number of topics (T)
            "n_regions": n_regions,  # Number of subregions (R)
            "alpha": alpha,  # Prior count on topics for each doc
            "beta": beta,  # Prior count on word-types for each topic
            "gamma": gamma,  # Prior count added to y-counts when sampling z assignments
            "delta": delta,  # Prior count on subregions for each topic
            # Default ROI (default covariance spatial region we regularize towards) (not in paper)
            "roi_size": roi_size,
            # Sample constant (# observations weighting sigma in direction of default covariance)
            # (not in paper)
            "dobs": dobs,
            # Use constrained symmetry on subregions? (only for n_regions = 2)
            "symmetric": symmetric,
            "seed_init": seed_init,  # Random seed for initializing model
        }

        # Add dictionaries for other model info
        self.data = {}
        self.topics = {}

        # Prepare data
        if isinstance(mask, str) and not op.isfile(mask):
            self.mask = get_template(mask, mask="brain")
        else:
            self.mask = load_niimg(mask)

        # Extract document and word indices from count_df
        docidx_mapper = {id_: i for (i, id_) in enumerate(ids)}

        # Create docidx column
        count_df["docidx"] = count_df["id"].map(docidx_mapper)
        count_df = count_df.drop(columns=["id"])

        # Remove words not found anywhere in the corpus
        n_terms = len(count_df.columns) - 1  # number of columns minus one for docidx
        count_df = count_df.loc[:, (count_df != 0).any(axis=0)]
        n_terms_in_corpus = len(count_df.columns) - 1
        if n_terms_in_corpus != n_terms:
            LGR.warning(
                "Some terms in count_df do not appear in corpus. "
                f"Retaining {n_terms_in_corpus/n_terms} terms."
            )

        # Get updated vocabulary
        # List of word-strings (wtoken_word_idx values are indices into this list)
        vocabulary = count_df.columns.tolist()
        vocabulary.remove("docidx")
        self.vocabulary = vocabulary
        widx_mapper = {word: i for (i, word) in enumerate(self.vocabulary)}

        # Melt dataframe and create widx column
        widx_df = pd.melt(count_df, id_vars=["docidx"], var_name="word", value_name="count")
        widx_df["widx"] = widx_df["word"].map(widx_mapper)

        # Replicate rows based on count
        widx_df = widx_df.loc[np.repeat(widx_df.index.values, widx_df["count"])]
        widx_df = widx_df[["docidx", "widx"]].astype(int)
        widx_df.sort_values(by=["docidx", "widx"], inplace=True)

        # List of document-indices for word-tokens
        self.data["wtoken_doc_idx"] = widx_df["docidx"].tolist()
        # List of word-indices for word-tokens
        self.data["wtoken_word_idx"] = widx_df["widx"].tolist()

        # Import all peak-indices into lists
        coordinates_df["docidx"] = coordinates_df["id"].astype(str).map(docidx_mapper)
        coordinates_df = coordinates_df[["docidx", "x", "y", "z"]]
        coordinates_df["docidx"] = coordinates_df["docidx"].astype(int)

        # List of document-indices for peak-tokens x
        self.data["ptoken_doc_idx"] = coordinates_df["docidx"].tolist()
        self.data["ptoken_coords"] = coordinates_df[["x", "y", "z"]].values

        # Seed random number generator
        np.random.seed(self.params["seed_init"])

        # Preallocate vectors of assignment indices
        # word->topic assignments
        self.topics["wtoken_topic_idx"] = np.zeros(len(self.data["wtoken_word_idx"]), dtype=int)

        # Randomly initialize peak->topic assignments (y) ~ unif(1...n_topics)
        self.topics["peak_topic_idx"] = np.random.randint(
            self.params["n_topics"],
            size=(len(self.data["ptoken_doc_idx"])),
        )

        # peak->region assignments
        self.topics["peak_region_idx"] = np.zeros(len(self.data["ptoken_doc_idx"]), dtype=int)

        # Preallocate count matrices
        # Peaks: D x T: Number of peak-tokens assigned to each topic per document
        self.topics["n_peak_tokens_doc_by_topic"] = np.zeros(
            (len(self.ids), self.params["n_topics"]),
            dtype=int,
        )

        # Peaks: R x T: Number of peak-tokens assigned to each subregion per topic
        self.topics["n_peak_tokens_region_by_topic"] = np.zeros(
            (self.params["n_regions"], self.params["n_topics"]),
            dtype=int,
        )

        # Words: W x T: Number of word-tokens assigned to each topic per word-type
        self.topics["n_word_tokens_word_by_topic"] = np.zeros(
            (len(self.vocabulary), self.params["n_topics"]),
            dtype=int,
        )

        # Words: D x T: Number of word-tokens assigned to each topic per document
        self.topics["n_word_tokens_doc_by_topic"] = np.zeros(
            (len(self.ids), self.params["n_topics"]),
            dtype=int,
        )

        # Words: 1 x T: Total number of word-tokens assigned to each topic (across all docs)
        self.topics["total_n_word_tokens_by_topic"] = np.zeros(
            (1, self.params["n_topics"]),
            dtype=int,
        )

        # Preallocate Gaussians for all subregions
        # Regions_Mu & Regions_Sigma: Gaussian mean and covariance for all
        # subregions of all topics
        # Formed using lists (over topics) of lists (over subregions) of numpy
        # arrays
        #   regions_mu = (n_topics, n_regions, 1, n_peak_dims)
        #   regions_sigma = (n_topics, n_regions, n_peak_dims, n_peak_dims)
        # (\mu^{(t)}_r)
        self.topics["regions_mu"] = np.zeros(
            (
                self.params["n_topics"],
                self.params["n_regions"],
                1,
                self.data["ptoken_coords"].shape[1],  # generally 3
            ),
        )
        # (\sigma^{(t)}_r)
        self.topics["regions_sigma"] = np.zeros(
            (
                self.params["n_topics"],
                self.params["n_regions"],
                self.data["ptoken_coords"].shape[1],  # generally 3
                self.data["ptoken_coords"].shape[1],  # generally 3
            )
        )

        # Initialize lists for tracking log-likelihood of data over sampling iterations
        self.loglikelihood = {
            "iter": [],  # Tracks iteration associated with the log-likelihood values
            "x": [],  # Tracks log-likelihood of peak tokens
            "w": [],  # Tracks log-likelihood of word tokens
            "total": [],  # Tracks log-likelihood of peak + word tokens
        }

        # Initialize peak->subregion assignments (r)
        if self.params["symmetric"]:
            # if symmetric model use deterministic assignment :
            #     if peak_val[0] > 0, r = 1, else r = 0
            # Namely, check whether x-coordinate is greater than zero.
            n_pairs = int(self.params["n_regions"] / 2)
            initial_assignments = np.random.randint(
                n_pairs,
                size=(len(self.data["ptoken_doc_idx"])),
            )
            signs = (self.data["ptoken_coords"][:, 0] > 0).astype(int)
            self.topics["peak_region_idx"][:] = (initial_assignments * 2) + signs
        else:
            # if asymmetric model, randomly sample r ~ unif(1...n_regions)
            self.topics["peak_region_idx"][:] = np.random.randint(
                self.params["n_regions"],
                size=(len(self.data["ptoken_doc_idx"])),
            )

        # Update model vectors and count matrices to reflect y and r assignments
        for i_ptoken, peak_doc in enumerate(self.data["ptoken_doc_idx"]):
            # peak-token -> topic assignment (y_i)
            peak_topic = self.topics["peak_topic_idx"][i_ptoken]
            # peak-token -> subregion assignment (c_i)
            peak_region = self.topics["peak_region_idx"][i_ptoken]
            # Increment document-by-topic counts
            self.topics["n_peak_tokens_doc_by_topic"][peak_doc, peak_topic] += 1
            # Increment region-by-topic
            self.topics["n_peak_tokens_region_by_topic"][peak_region, peak_topic] += 1

        # Randomly Initialize Word->Topic Assignments (z) for each word
        # token w_i: sample z_i proportional to p(topic|doc_i)
        for i_wtoken, word in enumerate(self.data["wtoken_word_idx"]):
            # w_i doc-index
            doc = self.data["wtoken_doc_idx"][i_wtoken]

            # Estimate p(t|d) for current doc
            p_topic_g_doc = (
                self.topics["n_peak_tokens_doc_by_topic"][doc, :] + self.params["gamma"]
            )

            # Sample a topic from p(t|d) for the z-assignment
            # Compute a cdf of the sampling distribution for z
            probs = np.cumsum(p_topic_g_doc)

            # How many elements of cdf are less than sample
            random_threshold = np.random.rand() * probs[-1]
            # z = # elements of cdf less than rand-sample
            topic = np.sum(probs < random_threshold)

            # Update model assignment vectors and count-matrices to reflect z
            # Word-token -> topic assignment (z_i)
            self.topics["wtoken_topic_idx"][i_wtoken] = topic
            self.topics["n_word_tokens_word_by_topic"][word, topic] += 1
            self.topics["total_n_word_tokens_by_topic"][0, topic] += 1
            self.topics["n_word_tokens_doc_by_topic"][doc, topic] += 1
Exemplo n.º 12
0
def conperm_workflow(contrast_images,
                     mask_image=None,
                     output_dir=None,
                     prefix="",
                     n_iters=10000):
    """Run a contrast permutation workflow."""
    if mask_image is None:
        target = "mni152_2mm"
        mask_image = get_template(target, mask="brain")

    n_studies = len(contrast_images)
    LGR.info("Loading contrast maps...")
    z_data = apply_mask(contrast_images, mask_image)

    boilerplate = """
A contrast permutation analysis was performed on a sample of {n_studies}
images. A brain mask derived from the MNI 152 template (Fonov et al., 2009;
Fonov et al., 2011) was applied at 2x2x2mm resolution. The sign flipping
method used was implemented as described in Maumet & Nichols (2016), with
{n_iters} iterations used to estimate the null distribution.

References
----------
- Fonov, V., Evans, A. C., Botteron, K., Almli, C. R., McKinstry, R. C.,
Collins, D. L., & Brain Development Cooperative Group. (2011).
Unbiased average age-appropriate atlases for pediatric studies.
Neuroimage, 54(1), 313-327.
- Fonov, V. S., Evans, A. C., McKinstry, R. C., Almli, C. R., & Collins, D. L.
(2009). Unbiased nonlinear average age-appropriate brain templates from birth
to adulthood. NeuroImage, (47), S102.
- Maumet, C., & Nichols, T. E. (2016). Minimal Data Needed for Valid & Accurate
Image-Based fMRI Meta-Analysis. https://doi.org/10.1101/048249
    """

    LGR.info("Performing meta-analysis.")
    log_p_map, t_map, _ = permuted_ols(
        np.ones((z_data.shape[0], 1)),
        z_data,
        confounding_vars=None,
        model_intercept=False,  # modeled by tested_vars
        n_perm=n_iters,
        two_sided_test=True,
        random_state=42,
        n_jobs=1,
        verbose=0,
    )
    res = {"logp": log_p_map, "t": t_map}
    # The t_test function will stand in for the Estimator in the results object
    res = MetaResult(permuted_ols, mask_image, maps=res)

    boilerplate = boilerplate.format(n_studies=n_studies, n_iters=n_iters)

    if output_dir is None:
        output_dir = os.getcwd()
    else:
        pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

    LGR.info("Saving output maps...")
    res.save_maps(output_dir=output_dir, prefix=prefix)
    LGR.info("Workflow completed.")
    LGR.info(boilerplate)
Exemplo n.º 13
0
def test_get_template():
    """Test nimare.utils.get_template."""
    # 1mm template
    img = utils.get_template(space="mni152_1mm", mask=None)
    assert isinstance(img, nib.Nifti1Image)
    assert not nib.is_proxy(img.dataobj)
    img = utils.get_template(space="mni152_1mm", mask="brain")
    assert isinstance(img, nib.Nifti1Image)

    # 2mm template (default)
    img = utils.get_template(space="mni152_2mm", mask=None)
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="mni152_2mm", mask="brain")
    assert isinstance(img, nib.Nifti1Image)
    assert not nib.is_proxy(img.dataobj)

    # ALE template
    img = utils.get_template(space="ale_2mm", mask=None)
    assert isinstance(img, nib.Nifti1Image)
    img = utils.get_template(space="ale_2mm", mask="brain")
    assert isinstance(img, nib.Nifti1Image)
    assert not nib.is_proxy(img.dataobj)

    # Expect exceptions when incompatible spaces or masks are requested.
    with pytest.raises(ValueError):
        utils.get_template(space="something", mask=None)

    with pytest.raises(ValueError):
        utils.get_template(space="mni152_1mm", mask="gm")

    with pytest.raises(ValueError):
        utils.get_template(space="mni152_2mm", mask="gm")

    with pytest.raises(ValueError):
        utils.get_template(space="ale_2mm", mask="gm")
Exemplo n.º 14
0
     "contrasts": {
         "animal": "as-Animal"
     },
 }),
 ({
     "collection_ids": {
         "informative_name": 8836
     },
     "contrasts": {
         "animal": "as-Animal"
     },
     "map_type_conversion": {
         "T map": "t"
     },
     "target": "mni152_2mm",
     "mask": get_template("mni152_2mm", mask="brain"),
 }),
 ({
     "collection_ids": (6348, 6419),
     "contrasts": {
         "action": "action"
     },
     "map_type_conversion": {
         "univariate-beta map": "beta"
     },
 }),
 ({
     "collection_ids": (778, ),  # collection not found
     "contrasts": {
         "action": "action"
     },
Exemplo n.º 15
0
def _create_foci(foci, foci_percentage, fwhm, n_studies, n_noise_foci, rng,
                 space):
    """Generate study specific foci.

    .. versionadded:: 0.0.4

    Parameters
    ----------
    foci : :obj:`int` or :obj:`list`
        The number of foci to be generated per study or the
        x,y,z coordinates of the ground truth foci.
    foci_percentage : :obj:`float`
        Percentage of studies where the foci appear.
    fwhm : :obj:`float`
        Full width at half maximum (fwhm) to define the probability
        spread of the foci.
    n_studies : :obj:`int`
        Number of n_studies to generate.
    n_noise_foci : :obj:`int`
        Number of foci considered to be noise in each study.
    rng : :class:`numpy.random.RandomState`
        Random state to reproducibly initialize random numbers.
    space : :obj:`str`
        The template space the coordinates are reported in.

    Returns
    -------
    ground_truth_foci : :obj:`list`
        List of 3-item tuples containing x, y, z coordinates
        of the ground truth foci or an empty list if
        there are no ground_truth_foci.
    foci_dict : :obj:`dict`
        Dictionary with keys representing the study, and
        whose values represent the study specific foci.
    """
    # convert foci_percentage to float between 0 and 1
    if isinstance(foci_percentage, str) and foci_percentage[-1] == "%":
        foci_percentage = float(foci_percentage[:-1]) / 100

    if space == "MNI":
        template_img = get_template(space="mni152_2mm", mask="brain")

    # use a template to find all "valid" coordinates
    template_data = template_img.get_fdata()
    possible_ijks = np.argwhere(template_data)

    # number of "convergent" foci each study should report
    if isinstance(foci, int):
        foci_idxs = np.unique(
            rng.choice(range(possible_ijks.shape[0]), foci, replace=True))
        # if there are no foci_idxs, give a dummy coordinate (0, 0, 0)
        ground_truth_foci_ijks = possible_ijks[
            foci_idxs] if foci_idxs.size else np.array([[]])
    elif isinstance(foci, list):
        ground_truth_foci_ijks = np.array(
            [mm2vox(coord, template_img.affine) for coord in foci])

    # create a probability map for each peak
    kernel = get_ale_kernel(template_img, fwhm)[1]
    foci_prob_maps = {
        tuple(peak): compute_ale_ma(template_data.shape, np.atleast_2d(peak),
                                    kernel)
        for peak in ground_truth_foci_ijks if peak.size
    }

    # get study specific instances of each foci
    signal_studies = int(round(foci_percentage * n_studies))
    signal_ijks = {
        peak: np.argwhere(prob_map)[rng.choice(
            np.argwhere(prob_map).shape[0],
            size=signal_studies,
            replace=True,
            p=prob_map[np.nonzero(prob_map)] /
            sum(prob_map[np.nonzero(prob_map)]),
        )]
        for peak, prob_map in foci_prob_maps.items()
    }

    # reshape foci coordinates to be study specific
    paired_signal_ijks = (np.transpose(np.array(list(signal_ijks.values())),
                                       axes=(1, 0, 2)) if signal_ijks else
                          (None, ))

    foci_dict = {}
    for study_signal_ijks, study in zip_longest(paired_signal_ijks,
                                                range(n_studies)):
        if study_signal_ijks is None:
            study_signal_ijks = np.array([[]])
            n_noise_foci = max(1, n_noise_foci)

        if n_noise_foci > 0:
            noise_ijks = possible_ijks[rng.choice(possible_ijks.shape[0],
                                                  n_noise_foci,
                                                  replace=True)]

            # add the noise foci ijks to the existing signal ijks
            foci_ijks = (np.unique(np.vstack([study_signal_ijks, noise_ijks]),
                                   axis=0)
                         if np.any(study_signal_ijks) else noise_ijks)
        else:
            foci_ijks = study_signal_ijks

        # transform ijk voxel coordinates to xyz mm coordinates
        foci_xyzs = [vox2mm(ijk, template_img.affine) for ijk in foci_ijks]
        foci_dict[study] = foci_xyzs

    ground_truth_foci_xyz = [
        tuple(vox2mm(ijk, template_img.affine))
        for ijk in ground_truth_foci_ijks if np.any(ijk)
    ]
    return ground_truth_foci_xyz, foci_dict