示例#1
0
def test_cyclone_train_on_train():
    print("")
    print("")

    print("## Testing correctness of cyclone()")

    print("")
    print("# Testing algorithm on training data")
    print("")

    settings.enable_fastmath = True
    settings.verbosity = 4

    training_data = datasets.leng15(mode='sorted')

    scores = pairs.cyclone(training_data,
                           datasets.default_cc_marker(),
                           iterations=1000,
                           min_iter=10,
                           min_pairs=1)

    print(scores)

    test_quality = utils.evaluate_prediction(prediction=scores['max_class'],
                                             reference=ref_prediction)

    print(test_quality)

    assert np.allclose(np.array(test_quality.values, dtype=float),
                       np.ones(shape=(4, 4)),
                       atol=0.1)
示例#2
0
def cyclone(
        adata,
        marker_pairs,
        gene_names,
        sample_names,
        iterations=1000,
        min_iter=100,
        min_pairs=50):
    """Assigns scores and predicted class to observations [Scialdone15]_ [Fechtner18]_.

    Calculates scores for each observation and each phase and assigns prediction
    based on marker pairs indentified by sandbag.

    This reproduces the approach of [Scialdone15]_ in the implementation of
    [Fechtner18]_.

    Parameters
    ----------
    adata : :class:`~anndata.AnnData`
        The annotated data matrix.
    marker_pairs : `dict`
        Dictionary of marker pairs. See :func:`~scanpy.api.sandbag` output.
    gene_names: `list`
        List of genes.
    sample_names: `list`
        List of samples.
    iterations : `int`, optional (default: 1000)
        An integer scalar specifying the number of
        iterations for random sampling to obtain a cycle score.
    min_iter : `int`, optional (default: 100)
        An integer scalar specifying the minimum number of iterations
        for score estimation
    min_pairs : `int`, optional (default: 50)
        An integer scalar specifying the minimum number of iterations
        for score estimation

    Returns
    -------
    A :class:`~pandas.DataFrame` with samples as index and categories as columns with scores for each category for each
    sample and a additional column with the name of the max scoring category for each sample.

    If marker pairs contain only the cell cycle categories G1, S and G2M an additional column
    ``pypairs_cc_prediction`` will be added. Where category S is assigned to samples where G1 and G2M score are
    below 0.5.
    """
    try:
        from pypairs import __version__ as pypairsversion
        from distutils.version import LooseVersion

        if LooseVersion(pypairsversion) < LooseVersion("v3.0.9"):
            raise ImportError('Please only use `pypairs` >= v3.0.9 ')
    except ImportError:
        raise ImportError('You need to install the package `pypairs`.')


    from pypairs.pairs import cyclone
    from . import settings
    from pypairs import settings as pp_settings

    pp_settings.verbosity = settings.verbosity
    pp_settings.n_jobs = settings.n_jobs
    pp_settings.writedir = settings.writedir
    pp_settings.cachedir = settings.cachedir
    pp_settings.logfile = settings.logfile

    return cyclone(
        data = adata,
        marker_pairs = marker_pairs,
        gene_names = gene_names,
        sample_names = sample_names,
        iterations = iterations,
        min_iter = min_iter,
        min_pairs = min_pairs
    )
示例#3
0
def cyclone(
    adata: AnnData,
    marker_pairs: Optional[Mapping[str, Collection[Tuple[str, str]]]] = None,
    *,
    iterations: int = 1000,
    min_iter: int = 100,
    min_pairs: int = 50,
) -> pd.DataFrame:
    """\
    Assigns scores and predicted class to observations [Scialdone15]_ [Fechtner18]_.

    Calculates scores for each observation and each phase and assigns prediction
    based on marker pairs indentified by :func:`~scanpy.external.tl.sandbag`.

    This reproduces the approach of [Scialdone15]_ in the implementation of
    [Fechtner18]_.

    Parameters
    ----------
    adata
        The annotated data matrix.
    marker_pairs
        Mapping of categories to lists of marker pairs.
        See :func:`~scanpy.external.tl.sandbag` output.
    iterations
        An integer scalar specifying the number of
        iterations for random sampling to obtain a cycle score.
    min_iter
        An integer scalar specifying the minimum number of iterations
        for score estimation.
    min_pairs
        An integer scalar specifying the minimum number of pairs
        for score estimation.

    Returns
    -------
    A :class:`~pandas.DataFrame` with samples as index and categories as columns
    with scores for each category for each sample and a additional column with
    the name of the max scoring category for each sample.

    If `marker_pairs` contains only the cell cycle categories G1, S and G2M an
    additional column `pypairs_cc_prediction` will be added.
    Where category S is assigned to samples where G1 and G2M score are < 0.5.
    """
    _check_import()
    from pypairs.pairs import cyclone
    from pypairs import settings as pp_settings

    pp_settings.verbosity = settings.verbosity
    pp_settings.n_jobs = settings.n_jobs
    pp_settings.writedir = settings.writedir
    pp_settings.cachedir = settings.cachedir
    pp_settings.logfile = settings.logfile

    return cyclone(
        data=adata,
        marker_pairs=marker_pairs,
        iterations=iterations,
        min_iter=min_iter,
        min_pairs=min_pairs,
    )