示例#1
0
def test_run_cookiecutter_isatab_ms_meta_biocrates(tmp_path):
    # Setup parameters
    output_path = tmp_path / "output_dir"
    parser, subparsers = setup_argparse()
    args = parser.parse_args(["isa-tpl", "ms_meta_biocrates", str(output_path)])

    # Create templates
    run_isatpl = run_cookiecutter(TEMPLATES["ms_meta_biocrates"], no_input=True)
    run_isatpl(args, parser, subparsers.choices[args.cmd])

    # Check output files
    assert output_path.exists()
    assert (output_path / "i_Investigation.txt").exists()
    assert (output_path / "a_investigation_title_Biocrates_MxP_Quant_500_Kit_FIA.txt").exists()
    assert (output_path / "a_investigation_title_Biocrates_MxP_Quant_500_Kit_LC.txt").exists()
    assert (output_path / "s_investigation_title.txt").exists()

    # Run altamisa validate here? I.e. it shouldn't throw exceptions or critical warnings.

    # Test against reference files
    path_test = os.path.join(os.path.dirname(__file__), "data", "isa_tpl", "ms_meta_biocrates_01")
    files = glob.glob(os.path.join(path_test, "*"))
    match, mismatch, errors = filecmp.cmpfiles(
        path_test, output_path, (os.path.basename(f) for f in files), shallow=False
    )
    print([match, mismatch, errors])
    assert len(mismatch) == 0
    assert len(errors) == 0
示例#2
0
def test_run_snappy_itransfer_raw_data_smoke_test(mocker):
    fake_base_path = "/base/path"
    dest_path = "/irods/dest"
    tsv_path = os.path.join(os.path.dirname(__file__), "data", "germline.out")
    argv = [
        "snappy",
        "itransfer-raw-data",
        "--num-parallel-transfers",
        "1",
        "--base-path",
        fake_base_path,
        tsv_path,
        dest_path,
    ]

    # Setup fake file system but only patch selected modules.  We cannot use the Patcher approach here as this would
    # break both biomedsheets and multiprocessing.
    fs = fake_filesystem.FakeFilesystem()

    fake_file_paths = []
    for member in ("index", "father", "mother"):
        for ext in ("", ".md5"):
            fake_file_paths.append(
                "%s/ngs_mapping/work/input_links/%s-N1-DNA1-WES1/%s-N1-DNA1-WES1.fastq.gz%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])

    fake_os = fake_filesystem.FakeOsModule(fs)
    mocker.patch("glob.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_raw_data.os", fake_os)

    mock_check_output = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    # Actually exercise code and perform test.
    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)
    res = main(argv)

    assert not res
    # We do not care about call order but simply test call count and then assert that all files are there which would
    # be equivalent of comparing sets of files.
    assert mock_check_output.call_count == len(fake_file_paths) * 3
    for path in fake_file_paths:
        index, rel_path = os.path.relpath(
            path, os.path.join(fake_base_path,
                               "ngs_mapping/work/input_links")).split("/", 1)
        remote_path = os.path.join(dest_path, index, "raw_data",
                                   args.remote_dir_date, rel_path)
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        expected_irsync_argv = [
            "irsync", "-a", "-K", path,
            "i:%s" % remote_path
        ]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]
        mock_check_output.assert_any_call(expected_mkdir_argv)
        mock_check_output.assert_any_call(expected_irsync_argv)
        mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
示例#3
0
def test_run_snappy_itransfer_raw_data_help(capsys):
    parser, subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["snappy", "itransfer-raw-data", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
def test_run_seasnap_itransfer_results_help(capsys):
    parser, subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["sea-snap", "itransfer-results", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
示例#5
0
def test_run_snappy_itransfer_variant_calling_help(capsys):
    parser, _subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["snappy", "itransfer-variant-calling", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
示例#6
0
def test_run_archive_readme_help(capsys):
    parser, _subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["archive", "readme", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
示例#7
0
def test_run_seasnap_pull_isa_help(capsys):
    parser, _subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["sea-snap", "pull-isa", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
示例#8
0
def test_run_sodar_ingest_fastq_help(capsys):
    parser, _subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["sodar", "ingest-fastq", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
示例#9
0
def test_run_seasnap_write_sample_info_help(capsys):
    parser, subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["sea-snap", "write-sample-info", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
示例#10
0
def test_run_seasnap_working_dir_help(capsys):
    parser, subparsers = setup_argparse()
    with pytest.raises(SystemExit) as e:
        parser.parse_args(["sea-snap", "working-dir", "--help"])

    assert e.value.code == 0

    res = capsys.readouterr()
    assert res.out
    assert not res.err
def test_run_snappy_itransfer_ngs_mapping_nothing(capsys):
    parser, subparsers = setup_argparse()

    with pytest.raises(SystemExit) as e:
        parser.parse_args(["snappy", "itransfer-ngs-mapping"])

    assert e.value.code == 2

    res = capsys.readouterr()
    assert not res.out
    assert res.err
示例#12
0
def test_run_archive_copy_nothing(capsys):
    parser, _subparsers = setup_argparse()

    with pytest.raises(SystemExit) as e:
        parser.parse_args(["archive", "copy"])

    assert e.value.code == 2

    res = capsys.readouterr()
    assert not res.out
    assert res.err
def test_run_snappy_itransfer_ngs_mapping_nostep(capsys):
    sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86"
    argv = [
        "snappy", "itransfer-step", "--sodar-api-token", "XXXX", sodar_uuid,
        "--tool", "bwa"
    ]

    parser, subparsers = setup_argparse()

    res = main(argv)
    assert res == 1
示例#14
0
def test_run_cookiecutter_isatab_germline(tmp_path):
    output_path = tmp_path / "output_dir"
    parser, subparsers = setup_argparse()
    args = parser.parse_args(["isa-tpl", "germline", str(output_path)])

    run_isatab_germline = run_cookiecutter(TEMPLATES["germline"], no_input=True)
    run_isatab_germline(args, parser, subparsers.choices[args.cmd])

    assert output_path.exists()
    assert (output_path / "i_Investigation.txt").exists()
    assert (output_path / "a_output_dir_exome_sequencing_nucleotide_sequencing.txt").exists()
    assert (output_path / "s_output_dir.txt").exists()
示例#15
0
def test_run_archive_summary_smoke_test():
    filename = "summary.tbl"
    with tempfile.TemporaryDirectory() as tmp_dir:
        repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive")
        target_file = os.path.join(repo_dir, filename)
        mocked_file = os.path.join(tmp_dir, filename)

        argv = [
            "archive",
            "summary",
            "--class",
            os.path.join(repo_dir, "classes.yaml"),
            os.path.join(repo_dir, "project"),
            mocked_file,
        ]
        setup_argparse()

        # --- run tests
        res = main(argv)
        assert not res

        mocked = [
            line.rstrip().split("\t") for line in open(mocked_file, "rt")
        ][1:]
        target = [
            line.rstrip().split("\t") for line in open(target_file, "rt")
        ][1:]
        assert len(mocked) == len(target)
        j = target[0].index("ResolvedName")
        failed = []
        for value in target:
            found = False
            for v in mocked:
                if v[-j] == value[-j]:
                    found = True
                    break
            if not found:
                failed.append(value)
        assert len(failed) == 0
示例#16
0
def test_run_archive_readme_smoke_test():
    with tempfile.TemporaryDirectory() as tmp_dir:
        project_name = "project"
        project_dir = os.path.join(os.path.dirname(__file__), "data",
                                   "archive", project_name)

        readme_path = os.path.join(tmp_dir, project_name, "README.md")

        argv = [
            "--sodar-server-url",
            "https://sodar.bihealth.,org",
            "archive",
            "readme",
            "--var-PI-name",
            "Maxene Musterfrau",
            "--var-archiver-name",
            "Eric Blanc",
            "--var-client-name",
            "Max Mustermann",
            "--var-SODAR-UUID",
            "00000000-0000-0000-0000-000000000000",
            "--var-Gitlab-URL",
            "https://cubi-gitlab.bihealth.org",
            "--var-start-date",
            "1970-01-01",
            "--no-input",
            project_dir,
            readme_path,
        ]
        setup_argparse()

        # --- run tests
        res = main(argv)
        assert not res

        assert cubi_tk.archive.readme.is_readme_valid(readme_path)
示例#17
0
def test_run_seasnap_pull_isa_smoke_test(requests_mock, capsys, fs):
    # --- setup arguments
    project_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86"
    argv = ["sea-snap", "pull-isa", "--sodar-api-token", "XXX", project_uuid]

    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)

    # --- add test content
    path_json = os.path.join(os.path.dirname(__file__), "data",
                             "isa_test.json")
    fs.add_real_file(path_json)
    with open(path_json, "rt") as inputf:
        json_text = inputf.read()

    # --- mock modules
    url = URL_TPL % {
        "sodar_url": args.sodar_url,
        "project_uuid": project_uuid,
        "api_key": "XXX"
    }
    requests_mock.get(url, text=json_text)

    # --- run tests
    res = main(argv)
    assert not res

    test_dir = os.path.join(os.path.dirname(__file__), "data",
                            "ISA_files_test")
    fs.add_real_directory(test_dir)
    files = glob.glob(os.path.join(test_dir, "*"))

    match, mismatch, errors = filecmp.cmpfiles("ISA_files",
                                               test_dir, (os.path.basename(f)
                                                          for f in files),
                                               shallow=False)
    print([match, mismatch, errors])
    assert len(mismatch) == 0
    assert len(errors) == 0

    res = capsys.readouterr()
    assert not res.err
示例#18
0
def test_run_seasnap_working_dir_smoke_test(capsys, fs):
    # --- setup arguments
    seasnap_dir = "fake_seasnap"
    seasnap_files = [
        "mapping_config.yaml",
        "DE_config.yaml",
        "cluster_config.json",
        "mapping_pipeline.snake",
        "sea-snap.py",
    ]

    argv = ["sea-snap", "working-dir", seasnap_dir]

    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)

    # --- add test files
    fs.create_dir(seasnap_dir)
    for f in seasnap_files:
        fs.create_file(os.path.join(seasnap_dir, f))

    # --- run tests
    res = main(argv)
    assert not res

    # test dir created
    wd = time.strftime(args.dirname)
    assert Path(wd).is_dir()

    # test files copied
    seasnap_files = seasnap_files[:3]
    for f in seasnap_files:
        p = os.path.join(wd, f)
        assert Path(p).is_file()

    # test symlink created
    p = os.path.join(wd, "sea-snap")
    assert Path(p).is_symlink()

    res = capsys.readouterr()
    assert not res.err
示例#19
0
def test_run_sodar_ingest_fastq_smoke_test(mocker, requests_mock):
    # --- setup arguments
    irods_path = "/irods/dest"
    landing_zone_uuid = "landing_zone_uuid"
    dest_path = "target/folder/generic_file.fq.gz"
    fake_base_path = "/base/path"
    argv = [
        "--verbose",
        "sodar",
        "ingest-fastq",
        "--num-parallel-transfers",
        "0",
        "--sodar-api-token",
        "XXXX",
        "--yes",
        "--remote-dir-pattern",
        dest_path,
        fake_base_path,
        landing_zone_uuid,
    ]

    parser, _subparsers = setup_argparse()
    args = parser.parse_args(argv)

    # Setup fake file system but only patch selected modules.  We cannot use the Patcher approach here as this would
    # break biomedsheets.
    fs = fake_filesystem.FakeFilesystem()
    fake_os = fake_filesystem.FakeOsModule(fs)
    fake_pl = fake_pathlib.FakePathlibModule(fs)

    # --- add test files
    fake_file_paths = []
    for member in ("sample1", "sample2", "sample3"):
        for ext in ("", ".md5"):
            fake_file_paths.append("%s/%s/%s-N1-RNA1-RNA_seq1.fastq.gz%s" %
                                   (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append("%s/%s/%s-N1-DNA1-WES1.fq.gz%s" %
                                   (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])

    # Remove index's log MD5 file again so it is recreated.
    fs.remove(fake_file_paths[3])

    # --- mock modules
    mocker.patch("glob.os", fake_os)
    mocker.patch("cubi_tk.sea_snap.itransfer_results.pathlib", fake_pl)
    mocker.patch("cubi_tk.sea_snap.itransfer_results.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os)

    mock_check_output = mock.MagicMock(return_value=0)
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    mock_check_call = mock.MagicMock(return_value=0)
    mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call)

    mocker.patch("cubi_tk.sodar.ingest_fastq.pathlib", fake_pl)
    mocker.patch("cubi_tk.sodar.ingest_fastq.os", fake_os)

    fake_open = fake_filesystem.FakeFileOpen(fs)
    mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open)

    # necessary because independent test fail
    mock_value = mock.MagicMock()
    mocker.patch("cubi_tk.sodar.ingest_fastq.Value", mock_value)
    mocker.patch("cubi_tk.snappy.itransfer_common.Value", mock_value)

    # requests mock
    return_value = dict(
        assay="",
        config_data="",
        configuration="",
        date_modified="",
        description="",
        irods_path=irods_path,
        project="",
        sodar_uuid="",
        status="",
        status_info="",
        title="",
        user=dict(sodar_uuid="", username="", name="", email=""),
    )
    url = os.path.join(args.sodar_url, "landingzones", "api", "retrieve",
                       args.destination)
    requests_mock.register_uri("GET", url, text=json.dumps(return_value))

    # --- run tests
    res = main(argv)

    assert not res

    # TODO: make mock check_output actually create the file?
    # assert fs.exists(fake_file_paths[3])

    assert mock_check_call.call_count == 1
    assert mock_check_call.call_args[0] == ([
        "md5sum", "sample1-N1-DNA1-WES1.fq.gz"
    ], )

    assert mock_check_output.call_count == len(fake_file_paths) * 3
    remote_path = os.path.join(irods_path, dest_path)
    for path in fake_file_paths:
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        ext = ".md5" if path.split(".")[-1] == "md5" else ""
        expected_irsync_argv = [
            "irsync", "-a", "-K", path, ("i:%s" + ext) % remote_path
        ]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]

        assert ((expected_mkdir_argv, ), ) in mock_check_output.call_args_list
        assert ((expected_irsync_argv, ), ) in mock_check_output.call_args_list
        assert ((expected_ils_argv, ), {
            "stderr": -2
        }) in mock_check_output.call_args_list
def test_run_snappy_itransfer_ngs_mapping_smoke_test(mocker):
    fake_base_path = "/base/path"
    dest_path = "/irods/dest"
    tsv_path = os.path.join(os.path.dirname(__file__), "data", "germline.out")
    argv = [
        "--verbose",
        "snappy",
        "itransfer-ngs-mapping",
        "--base-path",
        fake_base_path,
        tsv_path,
        dest_path,
    ]

    # Setup fake file system but only patch selected modules.  We cannot use the Patcher approach here as this would
    # break both biomedsheets and multiprocessing.
    fs = fake_filesystem.FakeFilesystem()

    fake_file_paths = []
    for member in ("index", "father", "mother"):
        for ext in ("", ".md5"):
            fake_file_paths.append(
                "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/out/%s-N1-DNA1-WES1.bam%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append(
                "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/log/bwa.%s-N1-DNA1-WES1.log%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])

    # Remove index's log MD5 file again so it is recreated.
    fs.remove(fake_file_paths[3])

    fake_os = fake_filesystem.FakeOsModule(fs)
    mocker.patch("glob.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_ngs_mapping.os", fake_os)

    mock_check_output = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    fake_open = fake_filesystem.FakeFileOpen(fs)
    mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open)

    mock_check_call = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call)

    # Actually exercise code and perform test.
    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)
    res = main(argv)

    assert not res

    # We do not care about call order but simply test call count and then assert that all files are there which would
    # be equivalent of comparing sets of files.

    assert fs.exists(fake_file_paths[3])

    assert mock_check_call.call_count == 1
    mock_check_call.assert_called_once_with(
        ["md5sum", "bwa.index-N1-DNA1-WES1.log"],
        cwd=os.path.dirname(fake_file_paths[3]),
        stdout=ANY,
    )

    assert mock_check_output.call_count == len(fake_file_paths) * 3
    for path in fake_file_paths:
        mapper_index, rel_path = os.path.relpath(
            path, os.path.join(fake_base_path,
                               "ngs_mapping/output")).split("/", 1)
        _mapper, index = mapper_index.rsplit(".", 1)
        remote_path = os.path.join(dest_path, index, "ngs_mapping",
                                   args.remote_dir_date, rel_path)
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        expected_irsync_argv = [
            "irsync", "-a", "-K", path,
            "i:%s" % remote_path
        ]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]
        mock_check_output.assert_any_call(expected_mkdir_argv)
        mock_check_output.assert_any_call(expected_irsync_argv)
        mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
示例#21
0
def test_run_archive_copy_smoke_test(mocker):
    with tempfile.TemporaryDirectory() as tmp_dir:
        repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive")

        argv = [
            "archive",
            "copy",
            "--keep-workdir-hashdeep",
            os.path.join(repo_dir, "temp_dest_verif"),
            os.path.join(tmp_dir, "final_dest"),
        ]
        setup_argparse()

        # --- run tests
        res = main(argv)
        assert res == 0

        # --- remove timestamps on all hashdeep reports & audits
        now = datetime.date.today().strftime("%Y-%m-%d")
        prefix = os.path.join(tmp_dir, "final_dest")
        for fn in ["hashdeep_audit", "workdir_report", "workdir_audit"]:
            from_fn = "{}_{}.txt".format(now, fn)
            to_fn = "{}.txt".format(fn)
            os.rename(os.path.join(prefix, from_fn),
                      os.path.join(prefix, to_fn))

        # --- check report
        (repo_titles, repo_body) = sort_hashdeep_title_and_body(
            os.path.join(repo_dir, "final_dest_verif", "workdir_report.txt"))
        (tmp_titles, tmp_body) = sort_hashdeep_title_and_body(
            os.path.join(tmp_dir, "final_dest", "workdir_report.txt"))

        # --- check audits
        for fn in ["hashdeep_audit", "workdir_audit"]:
            with open(os.path.join(repo_dir, "final_dest_verif", fn + ".txt"),
                      "r") as f:
                repo = sorted(f.readlines())
            with open(os.path.join(tmp_dir, "final_dest", fn + ".txt"),
                      "r") as f:
                tmp = sorted(f.readlines())
            assert repo == tmp

        # --- test all copied files, except the hashdeep report & audit, that can differ by line order
        prefix = os.path.join(repo_dir, "final_dest_verif")
        ref_fns = [
            os.path.relpath(x, start=prefix) for x in filter(
                lambda x: os.path.isfile(x) or os.path.islink(x),
                glob.glob(prefix + "/**/*", recursive=True),
            )
        ]
        ref_fns = filter(lambda x: not IGNORE_FILES_PATTERN.match(x), ref_fns)
        prefix = os.path.join(tmp_dir, "final_dest")
        test_fns = [
            os.path.relpath(x, start=prefix) for x in filter(
                lambda x: os.path.isfile(x) or os.path.islink(x),
                glob.glob(prefix + "/**/*", recursive=True),
            )
        ]
        test_fns = filter(lambda x: not IGNORE_FILES_PATTERN.match(x),
                          test_fns)

        matches, mismatches, errors = filecmp.cmpfiles(
            os.path.join(repo_dir, "final_dest_verif"),
            os.path.join(tmp_dir, "final_dest"),
            common=ref_fns,
            shallow=False,
        )
        assert len(matches) > 0
        assert sorted(errors) == [
            "extra_data/to_ignored_dir", "extra_data/to_ignored_file"
        ]
        assert sorted(mismatches) == ["pipeline/output/sample2"]

        assert os.path.exists(
            os.path.join(tmp_dir, "final_dest", "archive_copy_complete"))
示例#22
0
def test_run_snappy_itransfer_raw_data_smoke_test(mocker, minimal_config, germline_trio_sheet_tsv):
    fake_base_path = "/base/path"
    dest_path = "/irods/dest"
    sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86"
    argv = [
        "snappy",
        "itransfer-raw-data",
        "--num-parallel-transfers",
        "1",
        "--base-path",
        fake_base_path,
        "--sodar-api-token",
        "XXXX",
        sodar_uuid,
    ]

    # Setup fake file system but only patch selected modules.  We cannot use the Patcher approach here as this would
    # break both biomedsheets and multiprocessing.
    fs = fake_filesystem.FakeFilesystem()

    fake_file_paths = []
    for member in ("index", "father", "mother"):
        for ext in ("", ".md5"):
            fake_file_paths.append(
                "%s/ngs_mapping/work/input_links/%s-N1-DNA1-WES1/%s-N1-DNA1-WES1.fastq.gz%s"
                % (fake_base_path, member, member, ext)
            )
            fs.create_file(fake_file_paths[-1])

    # Create sample sheet in fake file system
    sample_sheet_path = fake_base_path + "/.snappy_pipeline/sheet.tsv"
    fs.create_file(sample_sheet_path, contents=germline_trio_sheet_tsv, create_missing_dirs=True)
    # Create config in fake file system
    config_path = fake_base_path + "/.snappy_pipeline/config.yaml"
    fs.create_file(config_path, contents=minimal_config, create_missing_dirs=True)

    # Set Mocker
    mocker.patch("pathlib.Path.exists", my_exists)
    mocker.patch(
        "cubi_tk.snappy.itransfer_common.SnappyItransferCommandBase.get_sodar_info",
        my_get_sodar_info,
    )

    fake_os = fake_filesystem.FakeOsModule(fs)
    mocker.patch("glob.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_raw_data.os", fake_os)

    fake_open = fake_filesystem.FakeFileOpen(fs)
    mocker.patch("cubi_tk.snappy.common.open", fake_open)

    mock_check_output = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output", mock_check_output)

    # Actually exercise code and perform test.
    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)
    res = main(argv)

    assert not res
    # We do not care about call order but simply test call count and then assert that all files are there which would
    # be equivalent of comparing sets of files.
    assert mock_check_output.call_count == len(fake_file_paths) * 3
    for path in fake_file_paths:
        index, rel_path = os.path.relpath(
            path, os.path.join(fake_base_path, "ngs_mapping/work/input_links")
        ).split("/", 1)
        remote_path = os.path.join(dest_path, index, "raw_data", args.remote_dir_date, rel_path)
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        expected_irsync_argv = ["irsync", "-a", "-K", path, "i:%s" % remote_path]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]
        mock_check_output.assert_any_call(expected_mkdir_argv)
        mock_check_output.assert_any_call(expected_irsync_argv)
        mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
示例#23
0
def test_run_snappy_itransfer_variant_calling_smoke_test(
        mocker, minimal_config, germline_trio_sheet_tsv):
    fake_base_path = "/base/path"
    dest_path = "/irods/dest"
    sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86"
    argv = [
        "--verbose",
        "snappy",
        "itransfer-variant-calling",
        "--base-path",
        fake_base_path,
        "--sodar-api-token",
        "XXXX",
        # tsv_path,
        sodar_uuid,
    ]

    # Setup fake file system but only patch selected modules.  We cannot use the Patcher approach here as this would
    # break both biomedsheets and multiprocessing.
    fs = fake_filesystem.FakeFilesystem()

    fake_file_paths = []
    for member in ("index", ):
        for ext in ("", ".md5"):
            fake_file_paths.append(
                "%s/variant_calling/output/bwa.gatk_hc.%s-N1-DNA1-WES1/out/bwa.gatk_hc.%s-N1-DNA1-WES1.vcf.gz%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append(
                "%s/variant_calling/output/bwa.gatk_hc.%s-N1-DNA1-WES1/out/bwa.gatk_hc.%s-N1-DNA1-WES1.vcf.gz.tbi%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append(
                "%s/variant_calling/output/bwa.gatk_hc.%s-N1-DNA1-WES1/log/bwa.gatk_hc.%s-N1-DNA1-WES1.log%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
    # Create sample sheet in fake file system
    sample_sheet_path = fake_base_path + "/.snappy_pipeline/sheet.tsv"
    fs.create_file(sample_sheet_path,
                   contents=germline_trio_sheet_tsv,
                   create_missing_dirs=True)
    # Create config in fake file system
    config_path = fake_base_path + "/.snappy_pipeline/config.yaml"
    fs.create_file(config_path,
                   contents=minimal_config,
                   create_missing_dirs=True)

    # Print path to all created files
    print("\n".join(fake_file_paths + [sample_sheet_path, config_path]))

    # Remove index's log MD5 file again so it is recreated.
    fs.remove(fake_file_paths[3])

    # Set Mocker
    mocker.patch("pathlib.Path.exists", my_exists)
    mocker.patch(
        "cubi_tk.snappy.itransfer_common.SnappyItransferCommandBase.get_sodar_info",
        my_get_sodar_info,
    )

    fake_os = fake_filesystem.FakeOsModule(fs)
    mocker.patch("glob.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_variant_calling.os", fake_os)

    mock_check_output = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    fake_open = fake_filesystem.FakeFileOpen(fs)
    mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open)
    mocker.patch("cubi_tk.snappy.common.open", fake_open)

    mock_check_call = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call)

    # Actually exercise code and perform test.
    parser, _subparsers = setup_argparse()
    args = parser.parse_args(argv)
    res = main(argv)

    assert not res

    # We do not care about call order but simply test call count and then assert that all files are there which would
    # be equivalent of comparing sets of files.

    assert fs.exists(fake_file_paths[3])

    assert mock_check_call.call_count == 1
    mock_check_call.assert_called_once_with(
        ["md5sum", "bwa.gatk_hc.index-N1-DNA1-WES1.vcf.gz"],
        cwd=os.path.dirname(fake_file_paths[3]),
        stdout=ANY,
    )

    assert mock_check_output.call_count == len(fake_file_paths) * 3
    for path in fake_file_paths:
        mapper_index, rel_path = os.path.relpath(
            path, os.path.join(fake_base_path,
                               "variant_calling/output")).split("/", 1)
        _mapper, index = mapper_index.rsplit(".", 1)
        remote_path = os.path.join(dest_path, index, "variant_calling",
                                   args.remote_dir_date, rel_path)
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        expected_irsync_argv = [
            "irsync", "-a", "-K", path,
            "i:%s" % remote_path
        ]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]
        mock_check_output.assert_any_call(expected_mkdir_argv)
        mock_check_output.assert_any_call(expected_irsync_argv)
        mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
def test_run_seasnap_itransfer_results_smoke_test(mocker, fs):
    # --- setup arguments
    dest_path = "/irods/dest"
    fake_base_path = "/base/path"
    blueprint_path = os.path.join(os.path.dirname(__file__), "data",
                                  "test_blueprint.txt")

    argv = [
        "--verbose", "sea-snap", "itransfer-results", blueprint_path, dest_path
    ]

    parser, subparsers = setup_argparse()

    # --- add test files
    fake_file_paths = []
    for member in ("sample1", "sample2", "sample3"):
        for ext in ("", ".md5"):
            fake_file_paths.append(
                "%s/mapping/star/%s/out/star.%s-N1-RNA1-RNA-Seq1.bam%s" %
                (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append(
                "%s/mapping/star/%s/report/star.%s-N1-RNA1-RNA-Seq1.log%s" %
                (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])

    fs.add_real_file(blueprint_path)
    Path(blueprint_path).touch()

    # Remove index's log MD5 file again so it is recreated.
    fs.remove(fake_file_paths[3])

    # --- mock modules
    mock_check_output = mock.mock_open()
    mocker.patch("cubi_tk.sea_snap.itransfer_results.check_output",
                 mock_check_output)
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    mock_check_call = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call)

    # necessary because independent test fail
    mock_value = mock.mock_open()
    mocker.patch("cubi_tk.sea_snap.itransfer_results.Value", mock_value)
    mocker.patch("cubi_tk.snappy.itransfer_common.Value", mock_value)

    # --- run tests
    res = main(argv)

    print(mock_check_output.call_args_list)

    assert not res

    assert fs.exists(fake_file_paths[3])

    assert mock_check_call.call_count == 1
    assert mock_check_call.call_args[0] == ([
        "md5sum", "star.sample1-N1-RNA1-RNA-Seq1.log"
    ], )

    assert mock_check_output.call_count == len(fake_file_paths) * 2
    remote_path = os.path.join(dest_path, "fakedest")
    for path in fake_file_paths:
        expected_mkdir_argv = f"imkdir -p $(dirname {remote_path} )"
        ext = ".md5" if path.split(".")[-1] == "md5" else ""
        expected_irsync_argv = f"irsync -a -K {path} {('i:%s' + ext) % remote_path}"

        assert ((expected_mkdir_argv, ), {
            "shell": True
        }) in mock_check_output.call_args_list
        assert ((expected_irsync_argv, ), {
            "shell": True
        }) in mock_check_output.call_args_list
def test_run_snappy_itransfer_ngs_mapping_smoke_test(mocker,
                                                     germline_trio_sheet_tsv,
                                                     minimal_config):
    fake_base_path = "/base/path"
    dest_path = "/irods/dest"
    sodar_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86"
    argv = [
        "--verbose",
        "snappy",
        "itransfer-ngs-mapping",
        "--base-path",
        fake_base_path,
        "--sodar-api-token",
        "XXXX",
        sodar_uuid,
    ]

    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)

    # Setup fake file system but only patch selected modules.  We cannot use the Patcher approach here as this would
    # break both biomedsheets and multiprocessing.
    fs = fake_filesystem.FakeFilesystem()

    fake_file_paths = []
    for member in ("index", "father", "mother"):
        for ext in ("", ".md5"):
            fake_file_paths.append(
                "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/out/%s-N1-DNA1-WES1.bam%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append(
                "%s/ngs_mapping/output/bwa.%s-N1-DNA1-WES1/log/bwa.%s-N1-DNA1-WES1.log%s"
                % (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])

    # Create sample sheet in fake file system
    sample_sheet_path = fake_base_path + "/.snappy_pipeline/sheet.tsv"
    fs.create_file(sample_sheet_path,
                   contents=germline_trio_sheet_tsv,
                   create_missing_dirs=True)
    # Create config in fake file system
    config_path = fake_base_path + "/.snappy_pipeline/config.yaml"
    fs.create_file(config_path,
                   contents=minimal_config,
                   create_missing_dirs=True)

    # Print path to all created files
    print("\n".join(fake_file_paths + [sample_sheet_path, config_path]))

    # Remove index's log MD5 file again so it is recreated.
    fs.remove(fake_file_paths[3])

    # Set Mocker
    mocker.patch("pathlib.Path.exists", my_exists)
    mocker.patch(
        "cubi_tk.snappy.itransfer_common.SnappyItransferCommandBase.get_sodar_info",
        my_get_sodar_info,
    )

    fake_os = fake_filesystem.FakeOsModule(fs)
    mocker.patch("glob.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_common.os", fake_os)
    mocker.patch("cubi_tk.snappy.itransfer_ngs_mapping.os", fake_os)

    mock_check_output = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    fake_open = fake_filesystem.FakeFileOpen(fs)
    mocker.patch("cubi_tk.snappy.itransfer_common.open", fake_open)
    mocker.patch("cubi_tk.snappy.common.open", fake_open)

    mock_check_call = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call)

    # # requests mock
    # return_value = dict(assay="", config_data="", configuration="", date_modified="", description="", irods_path=sodar_path, project="", sodar_uuid="", status="", status_info="", title="", user="")
    # url_tpl = "%(sodar_url)s/landingzones/api/retrieve/%(landing_zone_uuid)s"
    # url = url_tpl % {"sodar_url": args.sodar_url, "landing_zone_uuid": args.landing_zone_uuid}
    # requests_mock.get(url, text=json.dumps(return_value))
    # #requests_mock.get("resource://biomedsheets//data/std_fields.json", text="dummy")
    # #requests_mock.get("resource://biomedsheets/data/std_fields.json#/extraInfoDefs/template/ncbiTaxon", text="dummy")

    # Actually exercise code and perform test.
    res = main(argv)

    assert not res

    # We do not care about call order but simply test call count and then assert that all files are there which would
    # be equivalent of comparing sets of files.

    assert fs.exists(fake_file_paths[3])

    assert mock_check_call.call_count == 1
    mock_check_call.assert_called_once_with(
        ["md5sum", "bwa.index-N1-DNA1-WES1.log"],
        cwd=os.path.dirname(fake_file_paths[3]),
        stdout=ANY,
    )

    assert mock_check_output.call_count == len(fake_file_paths) * 3
    for path in fake_file_paths:
        mapper_index, rel_path = os.path.relpath(
            path, os.path.join(fake_base_path,
                               "ngs_mapping/output")).split("/", 1)
        _mapper, index = mapper_index.rsplit(".", 1)
        remote_path = os.path.join(dest_path, index, "ngs_mapping",
                                   args.remote_dir_date, rel_path)
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        expected_irsync_argv = [
            "irsync", "-a", "-K", path,
            "i:%s" % remote_path
        ]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]
        mock_check_output.assert_any_call(expected_mkdir_argv)
        mock_check_output.assert_any_call(expected_irsync_argv)
        mock_check_output.assert_any_call(expected_ils_argv, stderr=-2)
示例#26
0
def test_run_sodar_ingest_fastq_smoke_test(mocker, fs):
    # --- setup arguments
    irods_path = "/irods/dest"
    dest_path = "target/folder/generic_file.fq.gz"
    fake_base_path = "/base/path"
    argv = [
        "--verbose",
        "sodar",
        "ingest-fastq",
        "--yes",
        "--remote-dir-pattern",
        dest_path,
        fake_base_path,
        irods_path,
    ]

    parser, subparsers = setup_argparse()

    # --- add test files
    fake_file_paths = []
    for member in ("sample1", "sample2", "sample3"):
        for ext in ("", ".md5"):
            fake_file_paths.append("%s/%s/%s-N1-RNA1-RNA_seq1.fastq.gz%s" %
                                   (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])
            fake_file_paths.append("%s/%s/%s-N1-DNA1-WES1.fq.gz%s" %
                                   (fake_base_path, member, member, ext))
            fs.create_file(fake_file_paths[-1])

    # Remove index's log MD5 file again so it is recreated.
    fs.remove(fake_file_paths[3])

    # --- mock modules
    mock_check_output = mock.mock_open()
    # mocker.patch("cubi_tk.sodar.ingest_fastq.check_output", mock_check_output)
    mocker.patch("cubi_tk.snappy.itransfer_common.check_output",
                 mock_check_output)

    mock_check_call = mock.mock_open()
    mocker.patch("cubi_tk.snappy.itransfer_common.check_call", mock_check_call)

    # necessary because independent test fail
    mock_value = mock.mock_open()
    mocker.patch("cubi_tk.sodar.ingest_fastq.Value", mock_value)
    mocker.patch("cubi_tk.snappy.itransfer_common.Value", mock_value)

    # --- run tests
    res = main(argv)

    assert not res

    assert fs.exists(fake_file_paths[3])

    assert mock_check_call.call_count == 1
    assert mock_check_call.call_args[0] == ([
        "md5sum", "sample1-N1-DNA1-WES1.fq.gz"
    ], )

    assert mock_check_output.call_count == len(fake_file_paths) * 3 * 5
    remote_path = os.path.join(irods_path, dest_path)
    for path in fake_file_paths:
        expected_mkdir_argv = ["imkdir", "-p", os.path.dirname(remote_path)]
        ext = ".md5" if path.split(".")[-1] == "md5" else ""
        expected_irsync_argv = [
            "irsync", "-a", "-K", path, ("i:%s" + ext) % remote_path
        ]
        expected_ils_argv = ["ils", os.path.dirname(remote_path)]

        assert ((expected_mkdir_argv, ), ) in mock_check_output.call_args_list
        assert ((expected_irsync_argv, ), ) in mock_check_output.call_args_list
        assert ((expected_ils_argv, ), {
            "stderr": -2
        }) in mock_check_output.call_args_list
示例#27
0
def test_run_seasnap_write_sample_info_smoke_test(capsys, requests_mock, fs):
    # --- setup arguments
    project_uuid = "466ab946-ce6a-4c78-9981-19b79e7bbe86"
    in_path_pattern = os.path.join(os.path.dirname(__file__), "data",
                                   "fastq_test", "{sample}_{mate,R1|R2}")

    argv = [
        "sea-snap",
        "write-sample-info",
        "--sodar-auth-token",
        "XXX",
        "--project_uuid",
        project_uuid,
        in_path_pattern,
        "-",
    ]

    parser, subparsers = setup_argparse()
    args = parser.parse_args(argv)

    # --- add test content and files
    path_json = os.path.join(os.path.dirname(__file__), "data",
                             "isa_test.json")
    fs.add_real_file(path_json)
    with open(path_json, "rt") as inputf:
        json_text = inputf.read()

    path_fastq_test = os.path.join(os.path.dirname(__file__), "data",
                                   "fastq_test")
    fs.add_real_directory(path_fastq_test)

    target_file = os.path.join(os.path.dirname(__file__), "data",
                               "sample_info_test.yaml")
    fs.add_real_file(target_file)

    # --- mock modules
    url = URL_TPL % {
        "sodar_url": args.sodar_url,
        "project_uuid": project_uuid,
        "api_key": "XXX"
    }
    requests_mock.get(url, text=json_text)

    # --- run as end-to-end test
    res = main(argv)
    assert not res

    # test content of generated file
    with open(target_file, "r") as f:
        expected_result = f.read()

    res = capsys.readouterr()
    assert not res.err

    assert expected_result == res.out

    # test whether ISA files were pulled correctly
    test_dir = os.path.join(os.path.dirname(__file__), "data",
                            "ISA_files_test")
    fs.add_real_directory(test_dir)
    files = glob.glob(os.path.join(test_dir, "*"))

    match, mismatch, errors = filecmp.cmpfiles("ISA_files",
                                               test_dir, (os.path.basename(f)
                                                          for f in files),
                                               shallow=False)
    print([match, mismatch, errors])
    assert len(mismatch) == 0
    assert len(errors) == 0
示例#28
0
def test_run_archive_prepare_smoke_test():
    with tempfile.TemporaryDirectory() as tmp_dir:
        repo_dir = os.path.join(os.path.dirname(__file__), "data", "archive")
        project_name = "project"

        argv = [
            "archive",
            "prepare",
            "--rules",
            os.path.join(repo_dir, "rules.yaml"),
            "--no-readme",
            os.path.join(repo_dir, project_name),
            os.path.join(tmp_dir, "temp_dest"),
        ]
        setup_argparse()

        # --- run tests
        res = main(argv)
        assert not res

        # --- remove hashdeep report filename timestamp
        os.rename(
            os.path.join(
                tmp_dir, "temp_dest",
                datetime.date.today().strftime(
                    "%Y-%m-%d_hashdeep_report.txt")),
            os.path.join(tmp_dir, "temp_dest",
                         "1970-01-01_hashdeep_report.txt"),
        )

        # --- compare hashdeep report with reference
        (repo_titles, repo_body) = sort_hashdeep_title_and_body(
            os.path.join(repo_dir, "temp_dest_verif",
                         "1970-01-01_hashdeep_report.txt"))
        (tmp_titles, tmp_body) = sort_hashdeep_title_and_body(
            os.path.join(tmp_dir, "temp_dest",
                         "1970-01-01_hashdeep_report.txt"))
        # No test on gzipped files, timestamp stored on gzip format could be different
        assert repo_body == tmp_body

        prefix = os.path.join(repo_dir, "temp_dest_verif")
        ref_fns = [
            os.path.relpath(x, start=prefix) for x in filter(
                lambda x: os.path.isfile(x) or os.path.islink(x),
                glob.glob(prefix + "/**/*", recursive=True),
            )
        ]
        prefix = os.path.join(tmp_dir, "temp_dest")
        test_fns = [
            os.path.relpath(x, start=prefix) for x in filter(
                lambda x: os.path.isfile(x) or os.path.islink(x),
                glob.glob(prefix + "/**/*", recursive=True),
            )
        ]
        assert sorted(ref_fns) == sorted(test_fns)

        matches, mismatches, errors = filecmp.cmpfiles(
            os.path.join(repo_dir, "temp_dest_verif"),
            os.path.join(tmp_dir, "temp_dest"),
            common=ref_fns,
            shallow=False,
        )
        assert len(matches) > 0
        assert sorted(errors) == [
            "extra_data/to_ignored_dir", "extra_data/to_ignored_file"
        ]
        assert sorted(mismatches) == [
            "1970-01-01_hashdeep_report.txt", "pipeline/output/sample2"
        ]