def test_phenos_agree_on_features():
    constraints = {
        "d0": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d1": {
            "must_be_same": False,
            "method": "range",
            "params": {
                "low": 1,
                "high": 2
            },
        },
    }
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    p1 = {"d0": True, "d1": 1.5}
    p2 = {"d0": True, "d1": 1.5}
    p3 = {"d0": True, "d1": 0.5}
    p4 = {"d0": False, "d1": 1.5}
    features = {"d0", "d1"}
    assert pheno_compare.phenos_agree_on_features(p1, p2, features)
    assert not pheno_compare.phenos_agree_on_features(p1, p3, features)
    assert not pheno_compare.phenos_agree_on_features(p1, p4, features)
Exemplo n.º 2
0
def run(options):
    with open(options.case_names_file) as f:
        case_sample_names = [x.rstrip() for x in f]

    genos = genotypes.Genotypes(
        file_of_vcf_filenames=options.vcfs_tsv,
        distance_matrix_file=options.distance_matrix,
        variant_counts_file=options.var_counts_file,
    )

    phenos = phenotypes.Phenotypes(options.phenos_tsv)

    with open(options.pheno_constraints_json) as f:
        pheno_constraints = json.load(f)

    pheno_compare = phenotype_compare.PhenotypeCompare(pheno_constraints)

    triples = strain_triples.StrainTriples(
        genos,
        phenos,
        pheno_compare,
        top_n_genos=options.top_n_genos,
        max_pheno_diffs=options.max_pheno_diffs,
        processes=options.processes,
    )
    triples.run_analysis(case_sample_names,
                         options.out,
                         mask_file=options.mask_bed_file)
def test_satisfy_required_differences_two_the_same():
    constraints = {
        "d0": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d1": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d2": {
            "must_be_same": False,
            "method": "range",
            "params": {
                "low": 1,
                "high": 2
            },
        },
    }
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    p1 = {"d0": True, "d1": True, "d2": 1.5}
    p2 = {"d0": True, "d1": True, "d2": 1.1}
    p3 = {"d0": True, "d1": False, "d2": 1.5}
    p4 = {"d0": True, "d1": True, "d2": 1.5}
    p5 = {"d0": True, "d1": True, "d2": 2.5}
    assert not pheno_compare.satisfy_required_differences(p1, p2)
    assert not pheno_compare.satisfy_required_differences(p2, p1)
    assert not pheno_compare.satisfy_required_differences(p1, p3)
    assert not pheno_compare.satisfy_required_differences(p3, p1)
    assert not pheno_compare.satisfy_required_differences(p1, p4)
    assert not pheno_compare.satisfy_required_differences(p4, p1)
    assert pheno_compare.satisfy_required_differences(p1, p5)
    assert pheno_compare.satisfy_required_differences(p5, p1)
def test_satisfy_required_differences_one_the_same():
    constraints = {
        "d0": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d1": {
            "must_be_same": False,
            "method": "equal",
            "params": {}
        },
    }
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    p1 = {"d1": True}
    p2 = {"d1": True}
    p3 = {"d1": False}
    p4 = {"d1": None}
    p5 = {"d1": None}

    assert pheno_compare.satisfy_required_differences(p1, p3)
    assert pheno_compare.satisfy_required_differences(p3, p1)
    assert not pheno_compare.satisfy_required_differences(p1, p1)
    assert not pheno_compare.satisfy_required_differences(p1, p2)
    assert not pheno_compare.satisfy_required_differences(p2, p1)
    assert not pheno_compare.satisfy_required_differences(p1, p4)
    assert not pheno_compare.satisfy_required_differences(p4, p1)
    assert not pheno_compare.satisfy_required_differences(p4, p5)
    assert not pheno_compare.satisfy_required_differences(p5, p4)
Exemplo n.º 5
0
def run(options):
    wanted_phenos = utils.command_line_wanted_phenos_to_dict(
        options.wanted_pheno)
    phenos = phenotypes.Phenotypes(options.phenos_tsv)
    with open(options.pheno_constraints_json) as f:
        pheno_constraints = json.load(f)
    pheno_compare = phenotype_compare.PhenotypeCompare(pheno_constraints)
    samples = phenos.find_matching_cases(wanted_phenos, pheno_compare)
    if len(samples) == 0:
        raise RuntimeError("No matching samples found")
    with open(options.outfile, "w") as f:
        print(*samples, sep="\n", file=f)
def test_ranked_neighbours_for_one_sample(genos, phenos, constraints):
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    got = sample_neighbours_finding.ranked_neighbours_for_one_sample(
        genos, phenos, pheno_compare, "s1")
    expect = [
        sample_neighbours_finding.RankData(
            sample="s5",
            rank_sum=2,
            geno_rank=0,
            pheno_rank=2,
            geno_dist=1,
            pheno_dist=3,
        ),
        sample_neighbours_finding.RankData(
            sample="s4",
            rank_sum=2,
            geno_rank=1,
            pheno_rank=1,
            geno_dist=2,
            pheno_dist=2,
        ),
        sample_neighbours_finding.RankData(
            sample="s3",
            rank_sum=2,
            geno_rank=2,
            pheno_rank=0,
            geno_dist=3,
            pheno_dist=1,
        ),
    ]
    assert got == expect

    got = sample_neighbours_finding.ranked_neighbours_for_one_sample(
        genos, phenos, pheno_compare, "s1", max_pheno_dist=2)
    del expect[0]
    assert got == expect

    got = sample_neighbours_finding.ranked_neighbours_for_one_sample(
        genos, phenos, pheno_compare, "s1", top_n_genos=2)
    expect = [
        sample_neighbours_finding.RankData(
            sample="s5",
            rank_sum=0,
            geno_rank=0,
            pheno_rank=0,
            geno_dist=1,
            pheno_dist=3,
        )
    ]
    assert got == expect
def test_differences():
    constraints = {
        "d0": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d1": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d2": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        },
        "d3": {
            "must_be_same": False,
            "method": "range",
            "params": {
                "low": 1,
                "high": 2
            },
        },
    }
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    p1 = {"d0": True, "d1": True, "d2": True, "d3": 2.5}
    p2 = {"d0": True, "d1": True, "d2": True, "d3": 2.5}
    p3 = {"d0": True, "d1": True, "d2": False, "d3": 2.5}
    p4 = {"d0": True, "d1": False, "d2": False, "d3": 2.5}
    p5 = {"d0": False, "d1": False, "d2": False, "d3": 2.5}
    assert pheno_compare.differences(p1, p1) == 0
    assert pheno_compare.differences(p1, p2) == 0
    assert pheno_compare.differences(p2, p1) == 0
    assert pheno_compare.differences(p1, p3) == 1
    assert pheno_compare.differences(p3, p1) == 1
    assert pheno_compare.differences(p1, p4) == 2
    assert pheno_compare.differences(p4, p1) == 2
    assert pheno_compare.differences(p1, p5) == 3
    assert pheno_compare.differences(p5, p1) == 3
def test_run_analysis(genos, phenos, constraints, caplog):
    caplog.set_level(logging.INFO)
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    triples = strain_triples.StrainTriples(genos,
                                           phenos,
                                           pheno_compare,
                                           top_n_genos=10)
    case_sample_names = ["s1", "s2"]
    tmp_dir = "tmp.strain_triples.run_analysis"
    subprocess.check_output(f"rm -rf {tmp_dir}", shell=True)
    os.mkdir(tmp_dir)
    outprefix = os.path.join(tmp_dir, "out")
    mask_file = os.path.join(data_dir, "run_analysis.mask.bed")
    got = triples.run_analysis(case_sample_names,
                               outprefix,
                               mask_file=mask_file)
    # The contents of these files are checked elsewhere. This test is just to
    # check then run of whole pipeline doesn't crash
    for filename in got.values():
        assert os.path.exists(filename)
    subprocess.check_output(f"rm -r {tmp_dir}", shell=True)
def test_find_strain_triples(genos, phenos, constraints, caplog):
    caplog.set_level(logging.INFO)
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)
    triples = strain_triples.StrainTriples(genos,
                                           phenos,
                                           pheno_compare,
                                           top_n_genos=10)
    case_sample_names = ["s1", "s2"]
    got_triples = triples.find_strain_triples(case_sample_names)

    rank_data_s1_1 = sample_neighbours_finding.RankData(sample="s3",
                                                        rank_sum=0,
                                                        geno_rank=0,
                                                        pheno_rank=0,
                                                        geno_dist=1,
                                                        pheno_dist=0)
    rank_data_s1_2 = sample_neighbours_finding.RankData(sample="s4",
                                                        rank_sum=0,
                                                        geno_rank=0,
                                                        pheno_rank=0,
                                                        geno_dist=1,
                                                        pheno_dist=0)
    rank_data_s2_1 = sample_neighbours_finding.RankData(sample="s6",
                                                        rank_sum=0,
                                                        geno_rank=0,
                                                        pheno_rank=0,
                                                        geno_dist=1,
                                                        pheno_dist=0)
    rank_data_s2_2 = sample_neighbours_finding.RankData(sample="s7",
                                                        rank_sum=1,
                                                        geno_rank=1,
                                                        pheno_rank=0,
                                                        geno_dist=2,
                                                        pheno_dist=0)
    expect = [
        strain_triple.StrainTriple("s1", rank_data_s1_1, rank_data_s1_2),
        strain_triple.StrainTriple("s2", rank_data_s2_1, rank_data_s2_2),
    ]

    assert got_triples == expect
def test_geno_and_pheno_distances_for_one_sample(genos, phenos, constraints):
    pheno_compare = phenotype_compare.PhenotypeCompare(constraints)

    (
        got_geno_distances,
        got_pheno_distances,
    ) = sample_neighbours_finding._geno_and_pheno_distances_for_one_sample(
        genos, phenos, pheno_compare, "s1")
    expect_geno_distances = {"s3": 3, "s4": 2, "s5": 1}
    expect_pheno_distances = {"s3": 1, "s4": 2, "s5": 3}
    assert got_geno_distances == expect_geno_distances
    assert got_pheno_distances == expect_pheno_distances

    (
        got_geno_distances,
        got_pheno_distances,
    ) = sample_neighbours_finding._geno_and_pheno_distances_for_one_sample(
        genos, phenos, pheno_compare, "s1", top_n_genos=3)
    expect_geno_distances = {"s4": 2, "s5": 1}
    expect_pheno_distances = {"s4": 2, "s5": 3}
    assert got_geno_distances == expect_geno_distances
    assert got_pheno_distances == expect_pheno_distances
def test_init_in_particular_sanity_check_constraints():
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "equal",
            "params": {}
        }
    }
    phenotype_compare.PhenotypeCompare(constraints)

    # Uses an unknown method
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "WRONG",
            "params": {}
        }
    }
    with pytest.raises(RuntimeError):
        phenotype_compare.PhenotypeCompare(constraints)

    # equal method should not have any params
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "equal",
            "params": {
                "low": 1
            }
        }
    }
    with pytest.raises(RuntimeError):
        phenotype_compare.PhenotypeCompare(constraints)

    # Range method should have low and high params
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "range",
            "params": {}
        }
    }
    with pytest.raises(RuntimeError):
        phenotype_compare.PhenotypeCompare(constraints)
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "range",
            "params": {
                "low": 1
            }
        }
    }
    with pytest.raises(RuntimeError):
        phenotype_compare.PhenotypeCompare(constraints)
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "range",
            "params": {
                "low": 1,
                "high": 2
            }
        }
    }
    phenotype_compare.PhenotypeCompare(constraints)

    # abs_distance should have max_dist param
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "abs_distance",
            "params": {}
        }
    }
    with pytest.raises(RuntimeError):
        phenotype_compare.PhenotypeCompare(constraints)
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "abs_distance",
            "params": {
                "max_dist": 1
            },
        }
    }
    phenotype_compare.PhenotypeCompare(constraints)

    # percent_distance should have max_percent param
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "percent_distance",
            "params": {}
        }
    }
    with pytest.raises(RuntimeError):
        phenotype_compare.PhenotypeCompare(constraints)
    constraints = {
        "d1": {
            "must_be_same": True,
            "method": "percent_distance",
            "params": {
                "max_percent": 42
            },
        }
    }
    phenotype_compare.PhenotypeCompare(constraints)