def test_validate_pairs_from_file(tmpdir):
    """test_validate_pairs_from_file."""
    tmpdir_path = str(tmpdir)
    data = [factories.ExperimentFactory() for i in range(3)]
    instance_a = api.create_instance("experiments", **data[0])
    instance_b = api.create_instance("experiments", **data[1])
    instance_c = api.create_instance("experiments", **data[2])

    realpairdf = join(tmpdir_path, "realpairdf.tsv")
    with open(realpairdf, "w") as f:
        f.write("# I am the header\n")
        f.write(instance_a["system_id"] + "\t" + instance_b["system_id"] + "\n")
        f.write(instance_c["system_id"] + "\t" + instance_b["system_id"] + "\n")

    badpairdf = join(tmpdir_path, "badpairdf.tsv")
    with open(badpairdf, "w") as f:
        f.write("\t".join([instance_a["system_id"]]))

    # valid pair file
    parsed_pairs = validators.validate_pairs_from_file(None, None, realpairdf)
    assert parsed_pairs[0] == ([instance_a], [instance_b])
    assert parsed_pairs[1] == ([instance_c], [instance_b])

    # invalid pair file
    with pytest.raises(click.UsageError):
        validators.validate_pairs_from_file(None, None, badpairdf)
Пример #2
0
    def import_bedfiles(
        cls, technique, targets_path, baits_path, assembly, species, description=None
    ):
        """
        Register input_bed_path in technique's storage dir and update `data`.

        Arguments:
            technique (str): technique slug.
            targets_path (str): path to targets bedfile.
            baits_path (str): path to baits bedfile.
            assembly (str): name of reference genome for bedfile.
            species (str): name of genome species.
            description (str): a description of the BED files.

        Returns:
            dict: updated technique instance as retrieved from API.
        """
        utils.check_admin()
        technique = api.get_instance("techniques", technique)
        targets_key = f"{assembly}_targets_bedfile"
        baits_key = f"{assembly}_baits_bedfile"

        if targets_key in technique["reference_data"]:
            raise click.UsageError(
                f"Technique '{technique['slug']}' "
                f"has registered BED files for '{assembly}':\n"
                f'\n\t{technique["reference_data"][targets_key]}'
                f'\n\t{technique["reference_data"][baits_key]}'
            )

        if not technique["storage_url"]:
            technique = update_storage_url("techniques", technique["pk"])

        api.create_instance("assemblies", name=assembly, species=species)
        beds_dir = join(technique["storage_url"], "bed_files", assembly)
        base_name = slugify(f'{technique["slug"]}.{assembly}')
        targets_dst = join(beds_dir, f"{base_name}.targets.bed")
        baits_dst = join(beds_dir, f"{base_name}.baits.bed")
        os.makedirs(beds_dir, exist_ok=True)

        for src, dst in [(targets_path, targets_dst), (baits_path, baits_dst)]:
            cls.echo_src_dst("Copying", src, dst)
            shutil.copy(src, dst)
            click.secho(f"\nProcessing {basename(dst)}...", fg="blue")
            cls.process_bedfile(dst)

        click.secho(f'\nSuccess! patching {technique["slug"]}...', fg="green")

        for i, j in [(targets_key, targets_dst), (baits_key, baits_dst)]:
            technique["reference_data"][i] = {
                "url": j + ".gz",
                "description": description,
            }

        return api.patch_instance(
            endpoint="techniques",
            instance_id=technique["pk"],
            storage_usage=utils.get_tree_size(technique["storage_url"]),
            reference_data=technique["reference_data"],
        )
Пример #3
0
def test_assert_run():
    # test assert_run utility
    data = api.create_instance("projects", **factories.ProjectFactory())
    data = factories.ExperimentFactory(projects=[data])
    data["sample"]["individual"]["species"] = "HUMAN"
    assert utils.assert_run(
        application=MockApplication(),
        tuples=[([api.create_instance("experiments", **data)], [])],
        commit=True,
    )
Пример #4
0
def test_get_experiments_from_default_cli_options(tmpdir):
    app = ExperimentsFromDefaulCLIApplication()
    experiments = [
        api.create_instance("experiments", **factories.ExperimentFactory())
        for i in range(4)
    ]
    analysis = api.create_instance(
        "analyses",
        **{
            **factories.AnalysisFactory(),
            "targets": experiments,
            "references": experiments,
        },
    )

    pairs_file = tmpdir.join("pairs.txt")
    pairs_file.write(experiments[1].system_id + "\t" +
                     experiments[0].system_id + "\n")

    # get coverage for invalid experiments
    api.patch_instance("experiments",
                       experiments[0].system_id,
                       notes="raise validation error")

    command = ExperimentsFromDefaulCLIApplication.as_cli_command()
    runner = CliRunner()
    result = runner.invoke(
        command,
        [
            "--pair",
            experiments[0].system_id,
            experiments[1].system_id,
            "--pairs",
            experiments[2].system_id,
            experiments[3].system_id,
            "--targets-filters",
            "pk",
            experiments[3].pk,
            "--references-filters",
            "pk",
            experiments[2].pk,
            "--analyses-filters",
            "pk",
            analysis.pk,
            "--pairs-from-file",
            str(pairs_file),
        ],
        catch_exceptions=False,
    )
    assert experiments[0].system_id in result.output
    assert "INVALID" in result.output

    # just get coverage for get_job_name
    assert ExperimentsFromDefaulCLIApplication.get_job_name(analysis)
Пример #5
0
def test_unique_analysis_per_individual_app(tmpdir):
    individual = factories.IndividualFactory(species="HUMAN")
    sample = factories.SampleFactory(individual=individual)
    project = api.create_instance("projects", **factories.ProjectFactory())
    experiments = [
        factories.ExperimentFactory(identifier=str(i),
                                    sample=sample,
                                    projects=[project]) for i in range(4)
    ]

    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    tuples = [(experiments, [])]
    application = UniquePerIndividualApplication()
    ran_analyses, _, __ = application.run(tuples, commit=True)

    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 4
    assert (ran_analyses[0][0]["results"].analysis_result_key ==
            experiments[0].sample.individual.system_id)

    application = UniquePerIndividualProtectResultsFalse()
    ran_analyses, _, __ = application.run(tuples, commit=True)
    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 4

    # test application_protect_results false
    tuples = [(experiments[:2], [])]
    application = UniquePerIndividualProtectResultsFalse()
    ran_analyses, _, __ = application.run(tuples, commit=True)

    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 2

    # test application_protect_results reduce add more samples - dont remove this test
    tuples = [(experiments, [])]
    application = UniquePerIndividualProtectResultsFalse()
    ran_analyses, _, __ = application.run(tuples, commit=True)

    assert len(ran_analyses) == 1
    assert "analysis_result_key" in ran_analyses[0][0]["results"]
    assert len(ran_analyses[0][0].targets) == 4

    runner = CliRunner()
    command = UniquePerIndividualApplication.as_cli_command()
    result = runner.invoke(command,
                           ["-fi", "system_id", experiments[0].system_id],
                           catch_exceptions=False)
    assert experiments[0].system_id in result.output
Пример #6
0
def test_get_instances():
    technique = api.create_instance("techniques",
                                    **factories.TechniqueFactory())
    assert api.get_instances("techniques",
                             [technique.name])[0].pk == technique.pk

    experiment = api.create_instance("experiments",
                                     **factories.ExperimentFactory())
    individual = experiment.sample.individual
    project = experiment.projects[0]
    assert api.get_experiments([experiment.pk])[0].pk == experiment.pk
    assert api.get_projects([project.pk])[0].pk == project.pk
    assert api.get_tree(individual.pk).pk == individual.pk
    assert api.get_trees([individual.pk])[0].pk == individual.pk
Пример #7
0
def test_failed_signal():
    analysis = api.create_instance("analyses", **factories.AnalysisFactory())
    get_kwargs = dict(
        target_endpoint="analyses", endpoint="signals", target_id=analysis.pk
    )

    # check signals work and nothing is created
    api._run_signals("analyses", analysis, [besuhof_signal])
    assert len(api.get_instances(**get_kwargs)) == 0

    # check signals failed
    analysis = api.patch_instance("analyses", analysis.pk, notes="please fail")
    api._run_signals("analyses", analysis, [besuhof_signal])
    instances = api.get_instances(**get_kwargs)
    assert len(instances) == 1
    assert _FAILED_SIGNAL_MESSAGE in instances[0].data["failure_traceback"]

    # assert that error traceback is updated
    runner = CliRunner()
    args = f"-fi target_endpoint analyses -fi target_id {analysis.pk}".split()
    api.patch_instance("analyses", analysis.pk, notes="fail with different msg")
    runner.invoke(commands.rerun_signals, args, catch_exceptions=False)
    instances = api.get_instances(**get_kwargs)
    assert len(instances) == 1
    assert "but with a different msg..." in instances[0].data["failure_traceback"]

    # assert that signal is deleted after no failure is detected
    api.patch_instance("analyses", analysis.pk, notes="")
    runner.invoke(commands.rerun_signals, args, catch_exceptions=False)
    assert len(api.get_instances(**get_kwargs)) == 0
def test_get_bed():
    runner = CliRunner()
    technique = api.create_instance("techniques",
                                    **factories.TechniqueFactory())
    args = [str(technique.pk)]
    result = runner.invoke(commands.get_bed, args, catch_exceptions=False)
    assert "No BED files" in result.output

    api.patch_instance(
        "techniques",
        technique.pk,
        reference_data={"test_targets_bedfile": {
            "url": "/hello/world"
        }},
    )

    result = runner.invoke(commands.get_bed, args, catch_exceptions=False)
    assert "/hello/world" in result.output

    api.patch_instance(
        "techniques",
        technique.pk,
        reference_data={
            "test_targets_bedfile": {
                "url": "/hello/world"
            },
            "another_targets_bedfile": {
                "url": "/hello/world"
            },
        },
    )

    result = runner.invoke(commands.get_bed, args, catch_exceptions=False)
    assert "Multiple BEDs" in result.output
Пример #9
0
def create_experiment(
    bam,
    bedfile="/a/fake/bedfile",
    assembly="GRCh37",
    raw_data=None,
    method="TD",
    species="HUMAN",
    category=None,
    technique_name=None,
    sample=None,
):
    """Easily create an experiment for testing purposes."""
    # create technique
    technique = factories.TechniqueFactory(method=method,
                                           name=technique_name
                                           or str(uuid.uuid4()))

    # create experiment
    bed_file_dict = dict(url=bedfile, description="")
    technique["reference_data"][f"{assembly}_targets_bedfile"] = bed_file_dict
    technique["reference_data"][f"{assembly}_baits_bedfile"] = bed_file_dict
    experiment = factories.ExperimentFactory(technique=technique)

    # force sample if provided
    if sample:  # pragma: no cover
        experiment["sample"] = sample
    else:
        category = category or experiment["sample"]["category"]
        experiment["sample"]["individual"]["species"] = species
        experiment["sample"]["category"] = category

    experiment["raw_data"] = raw_data or []
    experiment["bam_files"][assembly] = dict(url=bam, analysis=1)
    return api.create_instance("experiments", **experiment)
Пример #10
0
def test_local_reference_genome_importer(tmpdir):
    runner = CliRunner()
    command = data.LocalReferenceGenomeImporter.as_cli_command()
    reference_test = tmpdir.join("test.fasta")
    reference_test.write("foo")
    identifier = str(uuid.uuid4())
    api.create_instance("assemblies",
                        **factories.AssemblyFactory(name=identifier))
    args = [
        "--genome-path",
        reference_test.strpath,
        "--dont-index",
        "--assembly",
        identifier,
    ]
    result = runner.invoke(command, args, catch_exceptions=False)
    print(result.output)
Пример #11
0
def test_run_web_signals():
    application = MockApplication().application
    analysis = api.create_instance(
        "analyses",
        **factories.AnalysisFactory(targets=[factories.ExperimentFactory()],
                                    application=application),
    )

    api.create_instance(
        "signals",
        import_string="isabl_cli.signals.resume_analysis_signal",
        target_endpoint="analyses",
        target_id=analysis.pk,
    )

    runner = CliRunner()
    args = [
        "-fi", "target_id", analysis.pk, "-fi", "target_endpoint", "analyses"
    ]
    result = runner.invoke(commands.run_web_signals,
                           args,
                           catch_exceptions=False)
    assert str(analysis.pk) in result.output
    assert "SUCCEEDED" in result.output

    api.create_instance(
        "signals",
        import_string="isabl_cli.signals.force_analysis_signal",
        target_endpoint="analyses",
        target_id=analysis.pk,
    )

    result = runner.invoke(commands.run_web_signals,
                           args,
                           catch_exceptions=False)
    assert str(analysis.pk) in result.output
    assert "SUCCEEDED" in result.output

    # increase coverage on get_result
    assert MockApplication().get_result(
        experiment=api.get_experiments([analysis.targets[0].pk])[0],
        application_key=application.pk,
        result_key="analysis_result_key",
        application_name=str(MockApplication),
    )
Пример #12
0
def test_system_id():
    data_a = factories.ExperimentFactory()
    data_b = factories.ExperimentFactory(sample=data_a["sample"])
    instance_a = api.create_instance("experiments", **data_a)
    instance_b = api.create_instance("experiments", **data_b)
    system_ids = [instance_a["system_id"], instance_b["system_id"]]
    assert instance_a["sample"]["pk"] == instance_b["sample"]["pk"]
    assert api.get_instance("experiments",
                            system_ids[0])["pk"] == instance_a["pk"]
    assert len(api.get_instances("experiments", system_ids)) == 2

    instance_a["sample"]["data"]["key"] = "value"
    instance_a["sample"]["notes"] = "a note"
    patched = api.patch_instance("experiments",
                                 instance_a["pk"],
                                 sample=instance_a["sample"])
    assert patched["sample"]["data"]["key"] == "value"
    assert patched["sample"]["notes"] == "a note"
def test_validate_pairs(tmpdir):
    """test_validate_pairs."""
    data = [factories.ExperimentFactory() for i in range(3)]
    instance_a = api.create_instance("experiments", **data[0])
    instance_b = api.create_instance("experiments", **data[1])
    instance_c = api.create_instance("experiments", **data[2])

    # pairs that exist
    real_pairs = [
        (instance_a["system_id"], instance_b["system_id"]),
        (instance_c["system_id"], instance_b["system_id"]),
    ]
    parsed_pairs = validators.validate_pairs(real_pairs)
    assert parsed_pairs[0] == ([instance_a], [instance_b])
    assert parsed_pairs[1] == ([instance_c], [instance_b])

    # pairs that dont exist
    fake_pair = [("not_real", 0), (2, "fake")]
    with pytest.raises(exceptions.ValidationError):
        validators.validate_pairs(fake_pair)
Пример #14
0
def test_get_bams():
    runner = CliRunner()
    experiment = api.create_instance("experiments",
                                     **factories.ExperimentFactory())
    args = [str(experiment.pk)]
    result = runner.invoke(commands.get_bams, args, catch_exceptions=False)
    assert "No bams for" in result.output

    result = runner.invoke(commands.get_bams,
                           args + ["--verbose"],
                           catch_exceptions=False)
    assert experiment.system_id in result.output
    assert "None" in result.output

    api.patch_instance(
        "experiments",
        experiment.pk,
        bam_files={"grch": {
            "url": "/hello/world",
            "analysis": 1
        }},
    )

    result = runner.invoke(commands.get_bams, args, catch_exceptions=False)
    assert "/hello/world" in result.output

    api.patch_instance(
        "experiments",
        experiment.pk,
        bam_files={
            "a1": {
                "url": "/hello/world",
                "analysis": 1
            },
            "a2": {
                "url": "/hello/mars",
                "analysis": 2
            },
        },
    )

    result = runner.invoke(commands.get_bams, args, catch_exceptions=False)
    assert "Multiple bams" in result.output

    result = runner.invoke(commands.get_bams,
                           args + ["--assembly", "a2"],
                           catch_exceptions=False)
    assert "/hello/mars" in result.output
Пример #15
0
def create_test_result(application=None,
                       results=None,
                       targets=None,
                       references=None,
                       analyses=None):
    """Return an analysis object."""
    return api.create_instance(
        "analyses",
        **factories.AnalysisFactory(
            targets=targets or [],
            references=references or [],
            analyses=analyses or [],
            application=application or factories.ApplicationFactory(),
            status="SUCCEEDED",
            results=results or {},
        ),
    )
Пример #16
0
def test_import_bedfiles(tmpdir):
    technique = api.create_instance("techniques",
                                    **factories.TechniqueFactory())
    targets = tmpdir.join("targets.bed")
    baits = tmpdir.join("baits.bed")
    targets.write("2\t1\t2\n1\t1\t2\n")
    baits.write("2\t1\t2\n1\t1\t2\n")
    species = "HUMAN"
    runner = CliRunner()
    technique = data.LocalBedImporter.import_bedfiles(
        species=species,
        technique=technique["pk"],
        targets_path=targets.strpath,
        baits_path=baits.strpath,
        assembly="AnAssembly",
        description="these are test BED files",
    )

    for i in "targets", "baits":
        bedfile = technique["reference_data"][f"AnAssembly_{i}_bedfile"]["url"]
        assert os.path.isfile(bedfile)
        assert os.path.isfile(bedfile + ".tbi")
        assert os.path.isfile(bedfile.replace(".gz", ""))
        args = [str(technique.pk), "--bed-type", i]
        result = runner.invoke(commands.get_bed, args, catch_exceptions=False)
        assert bedfile in result.output
        with open(bedfile.replace(".gz", ""), "r") as f:  # test bed is sorted
            assert next(f).startswith("1")

    command = data.LocalBedImporter.as_cli_command()
    args = [
        "--targets-path",
        targets.strpath,
        "--baits-path",
        baits.strpath,
        "--technique",
        technique["pk"],
        "--assembly",
        "AnAssembly",
        "--species",
        species,
        "--description",
        "Test",
    ]
    result = runner.invoke(command, args, catch_exceptions=False)
    assert "has registered BED files for" in result.output
Пример #17
0
def test_patch_analyses_status():
    application = factories.ApplicationFactory()
    analyses = [
        factories.AnalysisFactory(application=application) for _ in range(2)
    ]
    created = [api.create_instance("analyses", **i) for i in analyses]
    assert all([i["status"] == "CREATED" for i in created])

    pks = [i["pk"] for i in created]
    api.patch_analyses_status(created, "STAGED")
    retrieved = api.get_instances("analyses", pks)
    assert all([i["status"] == "STAGED" for i in retrieved])

    for i in created:
        api.delete_instance("analyses", i["pk"])

    api.delete_instance("applications", created[0]["application"]["pk"])
Пример #18
0
def test_get_analyses_filters_option():
    runner = CliRunner()
    application = MockApplication().application
    analysis = api.create_instance(
        "analyses", **factories.AnalysisFactory(application=application))

    @click.command()
    @options.get_analyses_filters_option([MockApplication])
    def command(analyses_filters):
        for i in analyses_filters:
            print(i.pk)

    result = runner.invoke(command, ["-fi", "pk", analysis.pk])
    assert str(analysis.pk) in result.output

    result = runner.invoke(command, ["--help"])
    assert str(MockApplication) in result.output
Пример #19
0
def test_api_methods():
    endpoint = "diseases"
    diseases = [factories.DiseaseFactory() for _ in range(3)]
    created = [api.create_instance(endpoint, **i) for i in diseases]
    pk = created[0]["pk"]
    pks = [i["pk"] for i in created[:2]]
    patched = api.patch_instance(endpoint, pk, data={"one": 1})

    assert patched["data"]["one"] == 1
    assert api.get_instance(endpoint, pk)["pk"] == pk
    assert api.get_instances(endpoint, pk=pk)[0]["pk"] == pk
    assert api.get_instances_count(endpoint, pk=pk) == 1
    assert len(
        api.get_instances(endpoint)) == api.get_instances_count(endpoint)
    assert len(api.get_instances(endpoint, pks)) == 2
    assert len(api.get_instances(endpoint, pks, pk__in=pks)) == 2
    assert len(api.get_instances(endpoint, pks, pk__in=pks[0])) == 1

    for i in created:
        assert api.delete_instance(endpoint, i["pk"]) is None

    assert api.get_token_headers()["Authorization"]
Пример #20
0
def test_get_data(tmpdir):
    runner = CliRunner()
    experiment = api.create_instance("experiments",
                                     **factories.ExperimentFactory())
    experiment = data.update_storage_url("experiments", experiment.pk)
    args = [str(experiment.pk)]
    result = runner.invoke(commands.get_data, args, catch_exceptions=False)
    assert "No data for" in result.output

    result = runner.invoke(commands.get_bams,
                           args + ["--verbose"],
                           catch_exceptions=False)
    assert experiment.system_id in result.output
    assert "None" in result.output

    api.patch_instance(
        "experiments",
        experiment.pk,
        raw_data=[
            {
                "file_url": "/hello/world",
                "file_type": "TXT"
            },
            {
                "file_url": "/hello/mars",
                "file_type": "PNG"
            },
        ],
    )

    result = runner.invoke(commands.get_data, args, catch_exceptions=False)
    assert "/hello/world" in result.output
    assert "/hello/mars" in result.output

    result = runner.invoke(commands.get_data,
                           args + ["--dtypes", "TXT"],
                           catch_exceptions=False)
    assert "/hello/mars" not in result.output
Пример #21
0
def create_pair(
    tumor_bam,
    normal_bam,
    bedfile="/a/fake/bedfile",
    assembly="GRCh37",
    method="TD",
    species="HUMAN",
):
    """Create pair."""
    pair = []
    bed_file_dict = dict(url=bedfile, description="")
    technique = factories.TechniqueFactory(method=method)
    technique["reference_data"][f"{assembly}_targets_bedfile"] = bed_file_dict
    technique["reference_data"][f"{assembly}_baits_bedfile"] = bed_file_dict

    for (i, category) in [(tumor_bam, "TUMOR"), (normal_bam, "NORMAL")]:
        experiment = factories.ExperimentFactory(technique=technique)
        experiment["sample"]["individual"]["species"] = species
        experiment["sample"]["category"] = category
        experiment["bam_files"][assembly] = dict(url=i, analysis=1)
        pair.append(api.create_instance("experiments", **experiment))

    return pair[0], pair[1]
Пример #22
0
def test_commands(tmpdir):
    analysis = api.create_instance(
        "analyses",
        project_level_analysis=factories.ProjectFactory(),
        storage_url=tmpdir.strpath,
        status="FINISHED",
        **factories.AnalysisFactory(ran_by=None),
    )

    path = tmpdir.join("test.path")
    path.write("not empty")

    runner = CliRunner()
    args = ["-fi", "pk", analysis["pk"]]
    runner.invoke(commands.process_finished, args, catch_exceptions=False)
    analysis = api.get_instance("analyses", analysis["pk"])

    assert analysis["status"] == "SUCCEEDED"
    assert analysis["storage_usage"]

    args = ["--key", analysis["pk"], "--status", "STAGED"]
    runner.invoke(commands.patch_status, args, catch_exceptions=False)
    analysis = api.get_instance("analyses", analysis["pk"])
    assert analysis["status"] == "STAGED"

    args = [
        "analyses",
        "-fi",
        "pk",
        analysis["pk"],
        "-f",
        "pk",
        "-f",
        "application.name",
        "-f",
        "application",
        "-f",
        "carlos",
        "-f",
        "invalid.nested_attr",
    ]

    result = runner.invoke(commands.get_metadata, args, catch_exceptions=False)
    assert analysis["application"]["name"] in result.output
    assert "application.name" in result.output
    assert "INVALID KEY (carlos)" in result.output
    assert "INVALID KEY (nested_attr)" in result.output
    result = runner.invoke(commands.get_metadata,
                           args + ["--json"],
                           catch_exceptions=False)

    args = ["analyses", "-fi", "pk", analysis["pk"], "--pattern", "*.path"]
    result = runner.invoke(commands.get_paths, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output
    assert "test.path" in result.output

    args = ["analyses", "-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_paths, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output

    args = ["analyses", "-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_count, args, catch_exceptions=False)
    assert "1" in result.output

    args = ["-fi", "pk", analysis["pk"]]
    result = runner.invoke(commands.get_outdirs, args, catch_exceptions=False)
    assert tmpdir.strpath in result.output
    result = runner.invoke(commands.get_outdirs,
                           args + ["--pattern", "*.path"],
                           catch_exceptions=False)
    assert "test.path" in result.output

    # use two experiments to increase coverage with project_results=
    project = api.create_instance("projects", **factories.ProjectFactory())
    experiment = factories.ExperimentFactory(projects=[project])
    experiment["sample"]["individual"]["species"] = "HUMAN"
    experiment_b = factories.ExperimentFactory(projects=[project])
    experiment_b["sample"] = experiment["sample"]
    analysis = utils.assert_run(
        application=MockApplication(),
        tuples=[
            ([api.create_instance("experiments", **experiment)], []),
            ([api.create_instance("experiments", **experiment_b)], []),
        ],
        commit=True,
        project_results=["project_result_key"],
    )[0]

    args = ["--app-results", analysis.application.pk]
    result = runner.invoke(commands.get_results, args, catch_exceptions=False)
    assert "command_script" in result.output
    args = ["-fi", "pk", analysis.pk, "-r", "command_script"]
    result = runner.invoke(commands.get_results, args, catch_exceptions=False)
    assert "head_job.sh" in result.output

    args = ["-fi", "pk", analysis.pk, "--force"]
    result = runner.invoke(commands.patch_results,
                           args,
                           catch_exceptions=False)
    assert "Retrieving 1 from analyses API endpoint" in result.output
Пример #23
0
def test_hello_world_app(tmpdir, datadir, commit):
    # path to hello_world test data
    hello_world_datadir = join(datadir, "hello_world")
    raw_data = [
        dict(
            file_url=join(hello_world_datadir, "test.txt"),
            file_data=dict(extra="annotations"),
            file_type="FASTQ_R1",
        ),
    ]

    # create dummy experiment
    meta_data = factories.ExperimentFactory(raw_data=raw_data)
    meta_data["sample"]["individual"]["species"] = "HUMAN"
    meta_data["storage_url"] = hello_world_datadir
    meta_data.pop("identifier", None)
    experiment_a = api.create_instance("experiments",
                                       identifier="a",
                                       **meta_data)
    meta_data["projects"] = experiment_a.projects
    experiment_b = api.create_instance("experiments",
                                       identifier="b",
                                       **meta_data)

    # overwrite default configuration for the default client
    app = HelloWorldApp()
    app.application.settings.default_client = {
        "default_message": "Hello from Elephant Island.",
        "echo_path": tmpdir.docker("ubuntu", "echo")
    }

    # run application and make sure results are reported
    analyses = utils.assert_run(
        application=app,
        tuples=[([experiment_a], []), ([experiment_b], [])],
        commit=commit,
        results=["output", "count", "input"],
    )

    if commit:
        # assert individual level merge worked
        total_count = sum(i.results.count for i in analyses)
        individual_level_analyses = api.get_analyses(
            application__pk=app.individual_level_auto_merge_application.pk,
            individual_level_analysis=experiment_a.sample.individual.pk)

        assert len(individual_level_analyses) == 1
        assert individual_level_analyses[0].results.count == total_count

        # assert project level merged worked
        project_level_analyses = api.get_analyses(
            application__pk=app.project_level_auto_merge_application.pk,
            project_level_analysis=experiment_a.projects[0].pk)

        assert len(project_level_analyses) == 1
        assert project_level_analyses[0].results.count == total_count

        # assert we can rerun from command line given protect_results = False
        runner = CliRunner()
        params = [
            "--commit", "--message", "Hello, Im Shackleton", "-fi",
            "sample.individual", experiment_a.sample.individual.pk
        ]

        result = runner.invoke(app.as_cli_command(), params)
        assert "SUCCEEDED" in result.output
        assert "Submitting individual merge" in result.output
Пример #24
0
def test_engine(tmpdir):
    _DEFAULTS["DEFAULT_LINUX_GROUP"] = "not_a_group"

    individual = factories.IndividualFactory(species="HUMAN")
    sample = factories.SampleFactory(individual=individual)
    project = api.create_instance("projects", **factories.ProjectFactory())

    experiments = [
        factories.ExperimentFactory(identifier=str(i),
                                    sample=sample,
                                    projects=[project]) for i in range(4)
    ]

    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    tuples = [([i], []) for i in experiments]

    command = MockApplication.as_cli_command()
    application = MockApplication()
    ran_analyses, _, __ = application.run(tuples, commit=True)
    target = api.Experiment(ran_analyses[1][0].targets[0].pk)

    assert "analysis_result_key" in ran_analyses[1][0]["results"].keys()
    assert "analysis_result_key" in ran_analyses[2][0]["results"].keys()
    assert f'analysis: {ran_analyses[1][0]["pk"]}' in application.get_job_name(
        ran_analyses[1][0])

    bam = join(tmpdir, "fake.bam")
    application.update_experiment_bam_file(experiments[0], bam,
                                           ran_analyses[0][0].pk)
    assert bam in application.get_bams([experiments[0]])

    # get coverage for when there is no need to update the bam again
    assert application.update_experiment_bam_file(experiments[0], bam,
                                                  ran_analyses[0][0].pk)

    with pytest.raises(exceptions.ValidationError) as error:
        application.validate_bams(experiments)

    assert (f"{experiments[1].system_id} has no registered bam for "
            f"{application.assembly.name}" in str(error.value))

    with pytest.raises(exceptions.ValidationError) as error:
        application.validate_bedfiles(experiments)

    assert f"{experiments[0].system_id} has no registered bedfile" in str(
        error.value)

    # test that get results work as expected
    assert application.get_results(
        result_key="analysis_result_key",
        experiment=target,
        application_key=application.primary_key,
    ) == [(1, ran_analyses[1][0].pk)]

    # check assertion error is raised when an invalid result is searched for
    with pytest.raises(AssertionError) as error:
        application.get_results(
            result_key="invalid_result_key",
            experiment=target,
            application_key=application.primary_key,
        )

    assert "Result 'invalid_result_key' not found for analysis" in str(
        error.value)

    # test options
    runner = CliRunner()
    result = runner.invoke(command, ["--help"], catch_exceptions=False)
    assert "This is a test application" in result.output
    assert "--commit" in result.output
    assert "--force" in result.output
    assert "--quiet" in result.output
    assert "--restart" in result.output
    assert "--url" in result.output

    runner = CliRunner()
    result = runner.invoke(command, ["--url"], catch_exceptions=False)
    assert "http://www.fake-test-app.org" in result.output

    # test get experiments from default cli options
    runner = CliRunner()
    result = runner.invoke(command,
                           ["-fi", "system_id", experiments[0].system_id],
                           catch_exceptions=False)
    assert experiments[0].system_id in result.output

    # check project level results
    pks = ",".join(str(i["pk"]) for i in experiments)
    args = ["-fi", "pk__in", pks]
    result = runner.invoke(command, args, catch_exceptions=False)
    analysis = application.get_project_level_auto_merge_analysis(project)
    merged = join(analysis["storage_url"], "test.merge")

    assert analysis[
        "status"] == "SUCCEEDED", f"Project Analysis failed {analysis}"
    assert "FAILED" in result.output
    assert "SUCCEEDED" in result.output
    assert "SKIPPED 3" in result.output
    assert "INVALID 1" in result.output
    assert isfile(merged)
    assert "project_result_key" in analysis["results"]

    with open(merged) as f:
        assert f.read().strip() == "2"

    # check individual level results
    analysis = application.get_individual_level_auto_merge_analysis(individual)
    merged = join(analysis["storage_url"], "test.merge")
    assert analysis[
        "status"] == "SUCCEEDED", f"Individual Analysis failed {analysis}"
    assert isfile(merged)
    assert "individual_result_key" in analysis["results"]

    with open(merged) as f:
        assert f.read().strip() == "2"

    # check passing command line args
    args = ["-fi", "pk__in", pks, "--commit", "--force"]
    result = runner.invoke(command, args)
    assert "--commit not required when using --force" in result.output

    args = ["-fi", "pk__in", pks, "--restart", "--force"]
    result = runner.invoke(command, args)
    assert "cant use --force and --restart together" in result.output

    args = ["-fi", "pk__in", pks, "--force"]
    result = runner.invoke(command, args)
    assert "trashing:" in result.output

    args = ["-fi", "pk__in", pks, "--restart", "--quiet"]
    result = runner.invoke(command, args)
    assert "FAILED" not in result.output

    with open(join(ran_analyses[0][0].storage_url, "head_job.log")) as f:
        assert "successfully restarted" in f.read()

    MockApplication.cli_allow_force = False
    MockApplication.cli_allow_restart = False
    result = runner.invoke(MockApplication.as_cli_command(), ["--help"],
                           catch_exceptions=False)
    assert "--force" not in result.output
    assert "--restart" not in result.output
Пример #25
0
def test_import_reference_data(tmpdir):
    for model, identifier, factory in [
        ("assemblies", str(uuid.uuid4()), factories.AssemblyFactory),
        ("techniques", str(uuid.uuid4()), factories.TechniqueFactory),
    ]:
        reference_test = tmpdir.join("test.fasta")
        reference_test.write("foo")
        api.create_instance(model, **factory(name=identifier))
        instance = data.LocalReferenceDataImporter.import_data(
            identifier=identifier,
            data_src=reference_test.strpath,
            description="test description",
            data_id="reference_link",
            symlink=True,
            model=model,
        )

        assert os.path.islink(
            instance["reference_data"]["reference_link"]["url"])
        assert (instance["reference_data"]["reference_link"]["description"] ==
                "test description")

        instance = data.LocalReferenceDataImporter.import_data(
            identifier=identifier,
            data_src=reference_test.strpath,
            description="test description",
            data_id="reference_move",
            model=model,
            symlink=False,
        )

        assert os.path.isfile(
            instance["reference_data"]["reference_move"]["url"])
        assert not os.path.islink(
            instance["reference_data"]["reference_move"]["url"])

        reference_test.write("foo")
        command = data.LocalReferenceDataImporter.as_cli_command()
        runner = CliRunner()
        args = [
            "--identifier",
            identifier,
            "--data-src",
            reference_test.strpath,
            "--data-id",
            "reference_move",
            "--description",
            "Test",
            "--model",
            model,
        ]

        result = runner.invoke(command, args, catch_exceptions=False)
        assert "has already reference data registered" in result.output

        args = [
            str(instance.pk), "--data-id", "reference_link", "--model", model
        ]
        result = runner.invoke(commands.get_reference,
                               args,
                               catch_exceptions=False)
        assert instance["reference_data"]["reference_link"][
            "url"] in result.output

        args = [str(instance.pk), "--resources", "--model", model]
        result = runner.invoke(commands.get_reference,
                               args,
                               catch_exceptions=False)
        assert "test description" in result.output
Пример #26
0
def test_local_data_import(tmpdir):
    dirs = [tmpdir.strpath]
    projects = [api.create_instance("projects", **factories.ProjectFactory())]
    experiments = [
        factories.ExperimentFactory(projects=projects) for i in range(4)
    ]
    experiments = [
        api.create_instance("experiments", **i) for i in experiments
    ]
    keys = [i["pk"] for i in experiments]

    importer = data.LocalDataImporter()
    _, summary = importer.import_data(directories=dirs, pk__in=keys)
    obtained = len(summary.rsplit("no files matched"))
    assert obtained == 4 + 1

    # test can't determine type of fastq
    with pytest.raises(click.UsageError) as error:
        path_1 = tmpdir.join(f'{experiments[0]["system_id"]}.fastq')
        path_1.write("foo")
        importer.import_data(directories=dirs, pk__in=keys)

    path_1.remove()
    assert "cant determine fastq type from" in str(error.value)

    # test imports fastq
    path_1 = tmpdir.join(f'{experiments[0]["system_id"]}_R1_foo.fastq')
    path_2 = tmpdir.join(f'{experiments[0]["system_id"]}_R2_foo.fastq')
    path_1.write("foo")
    path_2.write("foo")
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      commit=True)
    assert "samples matched: 1" in summary
    assert api.Experiment(experiments[0].pk).get_fastq()

    # test can exclude formats
    path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq')
    path_2 = tmpdir.join(f'{experiments[1]["system_id"]}.bam')
    path_1.write("foo")
    path_2.write("foo")
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      dtypes=["BAM"])
    assert "FASTQ_R1" not in str(summary)
    assert "BAM" in str(summary)

    # test can import multiple formats
    _, summary = importer.import_data(directories=dirs,
                                      pk__in=keys,
                                      commit=True)
    assert "FASTQ_R1" in str(summary)
    assert "BAM" in str(summary)

    # test raise error if duplicated ids
    with pytest.raises(click.UsageError) as error:
        api.patch_instance("experiments",
                           experiments[2]["pk"],
                           identifier="dup_id")
        api.patch_instance("experiments",
                           experiments[3]["pk"],
                           identifier="dup_id")
        importer.import_data(key=lambda x: x["identifier"],
                             directories=dirs,
                             pk__in=keys)

    assert "same identifier for" in str(error.value)

    # test summary
    path_1 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram1_.cram')
    path_2 = tmpdir.join(f'_{experiments[2]["system_id"]}_cram2_.cram')
    path_3 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam1_.bam')
    path_4 = tmpdir.join(f'_{experiments[3]["system_id"]}_bam2_.bam')
    path_1.write("foo")
    path_2.write("foo")
    path_3.write("foo")
    path_4.write("foo")
    imported, summary = importer.import_data(directories=dirs,
                                             commit=True,
                                             symlink=True,
                                             pk__in=keys)

    project = api.get_instance("projects", projects[0]["pk"])
    assert project["storage_url"]
    assert imported[0]["storage_usage"] > 0
    assert imported[0]["raw_data"]
    assert imported[1]["raw_data"]
    assert "experiments" in imported[1]["storage_url"]
    assert len(os.listdir(os.path.join(imported[1]["storage_url"],
                                       "data"))) == 2
    assert "samples matched: 2" in summary
    assert "samples skipped: 2" in summary

    # test import data from command line and files_data functionality
    path_1 = tmpdir.join(f'{experiments[1]["system_id"]}_1.fastq')
    path_2 = tmpdir.join(f'{experiments[1]["system_id"]}_2.fastq')
    path_1.write("foo")
    path_2.write("foo")
    api.patch_instance("experiments", experiments[1]["pk"], raw_data=None)
    file_data = tmpdir.join("file_data.yaml")

    with open(file_data.strpath, "w") as f:
        yaml.dump(
            {
                os.path.basename(path_1.strpath): {
                    "PU": "TEST_PU"
                },
                os.path.basename(path_2.strpath): {
                    "PU": "TEST_PU"
                },
            },
            f,
            default_flow_style=False,
        )

    command = data.LocalDataImporter.as_cli_command()
    runner = CliRunner()
    args = [
        "-di",
        tmpdir.strpath,
        "-id",
        "system_id",
        "-fi",
        "pk__in",
        keys,
        "--files-data",
        file_data.strpath,
        "--commit",
    ]

    result = runner.invoke(command, args, catch_exceptions=False)
    assert "samples matched: 1" in result.output
    experiments[1] = api.get_instance("experiments", experiments[1]["pk"])
    assert experiments[1]["raw_data"][0]["file_data"]["PU"] == "TEST_PU"
    assert experiments[1]["raw_data"][1]["file_data"]["PU"] == "TEST_PU"

    # test import using invalid identifier
    args = ["-di", tmpdir.strpath, "-id", "sample", "-fi", "pk__in", keys]
    result = runner.invoke(command, args)
    assert "invalid type for identifier" in result.output