def test_cyclone_train_on_train(): print("") print("") print("## Testing correctness of cyclone()") print("") print("# Testing algorithm on training data") print("") settings.enable_fastmath = True settings.verbosity = 4 training_data = datasets.leng15(mode='sorted') scores = pairs.cyclone(training_data, datasets.default_cc_marker(), iterations=1000, min_iter=10, min_pairs=1) print(scores) test_quality = utils.evaluate_prediction(prediction=scores['max_class'], reference=ref_prediction) print(test_quality) assert np.allclose(np.array(test_quality.values, dtype=float), np.ones(shape=(4, 4)), atol=0.1)
def cyclone( adata, marker_pairs, gene_names, sample_names, iterations=1000, min_iter=100, min_pairs=50): """Assigns scores and predicted class to observations [Scialdone15]_ [Fechtner18]_. Calculates scores for each observation and each phase and assigns prediction based on marker pairs indentified by sandbag. This reproduces the approach of [Scialdone15]_ in the implementation of [Fechtner18]_. Parameters ---------- adata : :class:`~anndata.AnnData` The annotated data matrix. marker_pairs : `dict` Dictionary of marker pairs. See :func:`~scanpy.api.sandbag` output. gene_names: `list` List of genes. sample_names: `list` List of samples. iterations : `int`, optional (default: 1000) An integer scalar specifying the number of iterations for random sampling to obtain a cycle score. min_iter : `int`, optional (default: 100) An integer scalar specifying the minimum number of iterations for score estimation min_pairs : `int`, optional (default: 50) An integer scalar specifying the minimum number of iterations for score estimation Returns ------- A :class:`~pandas.DataFrame` with samples as index and categories as columns with scores for each category for each sample and a additional column with the name of the max scoring category for each sample. If marker pairs contain only the cell cycle categories G1, S and G2M an additional column ``pypairs_cc_prediction`` will be added. Where category S is assigned to samples where G1 and G2M score are below 0.5. """ try: from pypairs import __version__ as pypairsversion from distutils.version import LooseVersion if LooseVersion(pypairsversion) < LooseVersion("v3.0.9"): raise ImportError('Please only use `pypairs` >= v3.0.9 ') except ImportError: raise ImportError('You need to install the package `pypairs`.') from pypairs.pairs import cyclone from . import settings from pypairs import settings as pp_settings pp_settings.verbosity = settings.verbosity pp_settings.n_jobs = settings.n_jobs pp_settings.writedir = settings.writedir pp_settings.cachedir = settings.cachedir pp_settings.logfile = settings.logfile return cyclone( data = adata, marker_pairs = marker_pairs, gene_names = gene_names, sample_names = sample_names, iterations = iterations, min_iter = min_iter, min_pairs = min_pairs )
def cyclone( adata: AnnData, marker_pairs: Optional[Mapping[str, Collection[Tuple[str, str]]]] = None, *, iterations: int = 1000, min_iter: int = 100, min_pairs: int = 50, ) -> pd.DataFrame: """\ Assigns scores and predicted class to observations [Scialdone15]_ [Fechtner18]_. Calculates scores for each observation and each phase and assigns prediction based on marker pairs indentified by :func:`~scanpy.external.tl.sandbag`. This reproduces the approach of [Scialdone15]_ in the implementation of [Fechtner18]_. Parameters ---------- adata The annotated data matrix. marker_pairs Mapping of categories to lists of marker pairs. See :func:`~scanpy.external.tl.sandbag` output. iterations An integer scalar specifying the number of iterations for random sampling to obtain a cycle score. min_iter An integer scalar specifying the minimum number of iterations for score estimation. min_pairs An integer scalar specifying the minimum number of pairs for score estimation. Returns ------- A :class:`~pandas.DataFrame` with samples as index and categories as columns with scores for each category for each sample and a additional column with the name of the max scoring category for each sample. If `marker_pairs` contains only the cell cycle categories G1, S and G2M an additional column `pypairs_cc_prediction` will be added. Where category S is assigned to samples where G1 and G2M score are < 0.5. """ _check_import() from pypairs.pairs import cyclone from pypairs import settings as pp_settings pp_settings.verbosity = settings.verbosity pp_settings.n_jobs = settings.n_jobs pp_settings.writedir = settings.writedir pp_settings.cachedir = settings.cachedir pp_settings.logfile = settings.logfile return cyclone( data=adata, marker_pairs=marker_pairs, iterations=iterations, min_iter=min_iter, min_pairs=min_pairs, )