Exemplo n.º 1
0
def test_pipe_gbr_usc10(tmp_path):
    hg38 = str(tmp_path / "hg38-placeholder.fasta")
    copyfile(data_file("refr/usc10-refr.fna"), hg38)
    run(["bwa", "index", hg38])
    arglist = [
        "pipe",
        data_file("refr/usc10-refr.fna"),
        data_file("def/usc10-offsets.tsv"),
        data_file(""),
        "gbr-usc",
        "--workdir",
        str(tmp_path),
        "--threads",
        "1",
        "--hg38",
        hg38,
    ]
    args = microhapulator.cli.get_parser().parse_args(arglist)
    microhapulator.cli.pipe.main(args)
    expected = SimulatedProfile(fromfile=data_file("prof/gbr-usc10-sim.json"))
    observed = TypingResult(fromfile=tmp_path / "analysis" / "gbr-usc" /
                            "gbr-usc-type.json")
    diff = list(mhapi.diff(observed, expected))
    assert len(diff) == 0
    assert (tmp_path / "report.html").is_file()
    expected = pd.read_csv(data_file("gbr-usc-summary.tsv"), sep="\t")
    observed = pd.read_csv(tmp_path / "analysis" / "summary.tsv", sep="\t")
    assert observed.equals(expected)
Exemplo n.º 2
0
def test_diff_basic():
    gt1 = SimulatedProfile(fromfile=data_file("prof/diff-comp-1.json"))
    gt2 = SimulatedProfile(fromfile=data_file("prof/diff-comp-2.json"))
    diff = list(mhapi.diff(gt1, gt2))
    assert diff == [
        ("MHDBL000140", {"C,C,A,A"}, {"C,C,T,A"}),
        ("MHDBL000163", {"A,A,G,A,T"}, {"C,G,A,A,T"}),
    ]
Exemplo n.º 3
0
def test_diff_nonmatching_alleles():
    p1 = SimulatedProfile(fromfile=data_file("prof/red-strict-profile.json"))
    p2 = SimulatedProfile(fromfile=data_file("prof/red-relaxed-profile.json"))
    diff = list(mhapi.diff(p1, p2))
    print(diff)
    assert diff == [
        ("mh07CP-004", set(), {"T,T,T,A,T", "A,A,T,A,T"}),
        ("mh09KK-157", set(), {"G,C,C,A,T"}),
    ]
Exemplo n.º 4
0
def main(args):
    differ = mhapi.diff(Profile(fromfile=args.profile1),
                        Profile(fromfile=args.profile2))
    with open(args.out, "w") as fh:
        for marker, diff1, diff2 in differ:
            print(marker, file=fh)
            if len(diff1) > 0:
                for haplotype in sorted(diff1):
                    print(">>>", haplotype, file=fh)
            if len(diff2) > 0:
                for haplotype in sorted(diff2):
                    print("<<<", haplotype, file=fh)
Exemplo n.º 5
0
def test_diff_large():
    gt1 = SimulatedProfile(fromfile=data_file("prof/diff-comp-1.json"))
    gt2 = SimulatedProfile(fromfile=data_file("prof/diff-comp-3.json"))
    diff = list(mhapi.diff(gt1, gt2))
    loci = [d[0] for d in diff]
    print(diff[9], diff[17], diff[21])
    assert loci == [
        "MHDBL000002",
        "MHDBL000003",
        "MHDBL000007",
        "MHDBL000013",
        "MHDBL000017",
        "MHDBL000018",
        "MHDBL000030",
        "MHDBL000036",
        "MHDBL000038",
        "MHDBL000047",
        "MHDBL000058",
        "MHDBL000061",
        "MHDBL000076",
        "MHDBL000079",
        "MHDBL000082",
        "MHDBL000085",
        "MHDBL000088",
        "MHDBL000101",
        "MHDBL000106",
        "MHDBL000108",
        "MHDBL000111",
        "MHDBL000112",
        "MHDBL000122",
        "MHDBL000124",
        "MHDBL000128",
        "MHDBL000129",
        "MHDBL000135",
        "MHDBL000136",
        "MHDBL000138",
        "MHDBL000140",
        "MHDBL000144",
        "MHDBL000152",
        "MHDBL000154",
        "MHDBL000163",
        "MHDBL000181",
        "MHDBL000183",
        "MHDBL000194",
        "MHDBL000210",
        "MHDBL000211",
        "MHDBL000212",
    ]
    assert diff[9] == ("MHDBL000047", set(), {"T,T"})
    assert diff[17] == ("MHDBL000101", {"C,C,C,T"}, {"T,C,C,C"})
    assert diff[21] == ("MHDBL000112", {"G,G,A,C"}, set())
Exemplo n.º 6
0
def test_diff2():
    gt1 = SimulatedProfile(fromfile=data_file("prof/euramer-sim-gt.json"))
    gt2 = SimulatedProfile(fromfile=data_file("prof/euramer-inf-gt.json"))
    diff = list(mhapi.diff(gt1, gt2))
    assert diff == [("MHDBL000018", set(), {"T,G,C,T,A"})]