Exemplo n.º 1
0
def main():
    # First test, pwdumb, with no scorer
    dna_seq1 = [c for c in "GATTACA"]
    dna_seq2 = [c for c in "ATAC"]
    alms = malign.multi_align([dna_seq1, dna_seq2], method="dumb")
    print("Experiment #1")
    print(malign.tabulate_alms(alms))

    # Second test, pwnw, default scorer
    alms = malign.multi_align([dna_seq1, dna_seq2], method="nw")
    print("Experiment #2")
    print(malign.tabulate_alms(alms))

    # Third test, pwkbest, default scorer
    alms = malign.multi_align([dna_seq1, dna_seq2], k=4, method="yenksp")
    print("Experiment #3")
    print(malign.tabulate_alms(alms))

    # Fourth test, pwkbest, DNA scorer
    scorer = malign.utils.DNA_MATRIX
    alms = malign.multi_align([dna_seq1, dna_seq2],
                              k=4,
                              method="yenksp",
                              scorer=scorer)
    print("Experiment #4")
    print(malign.tabulate_alms(alms))
Exemplo n.º 2
0
def with_full_matrix():
    ita_rus = malign.ScoringMatrix()
    ita_rus.load("docs/ita_rus.matrix")
    ita_grk = malign.ScoringMatrix()
    ita_grk.load("docs/ita_grk.matrix")

    # Combine the two matrices into a single one, add some points, show a couple of examples
    # TODO: move to function
    scores_ita_rus = {
        (key[0], key[1], None): value for key, value in ita_rus.scores.items()
    }
    scores_ita_grk = {
        (key[0], None, key[1]): value for key, value in ita_grk.scores.items()
    }
    scores = {**scores_ita_rus, **scores_ita_grk}

    full_matrix = malign.ScoringMatrix(scores)
    full_matrix["o", "в", "ο"] = -4.0
    full_matrix["o", "в", "ς"] = -10.0
    full_matrix["-", "в", "ς"] = -7.5
    full_matrix["-", "в", "ο"] = -6.5
    full_matrix["o", "-", "ο"] = 5.0
    full_matrix["o", "-", "ς"] = -3.0
    full_matrix["i", "-", "Ι"] = -4.0
    full_matrix["c", "к", "κ"] = 10.0
    full_matrix["-", "в", "ς"] = -10.0
    full_matrix["m", "в", "β"] = 10.0

    print("--------------------- FULL MATRIX #1")
    for key in [
        ("-", "к", "ο"),
        ("i", "а", "Ι"),
        ("m", "в", "β"),
        ("m", "-", "β"),
        ("-", "в", "ς"),
        ("-", "-", "ς"),
        ("o", "-", "ς"),
        ("-", "-", "ο"),
        ("-", "в", "ο"),
    ]:
        print(key, full_matrix[key])

    print("--------------------- FULL MATRIX #2")
    alms = malign.multi_align(
        ["atomo", "атом", "ατομο"], k=4, method="anw", matrix=full_matrix
    )
    print(tabulate_alms(alms))

    print("--------------------- FULL MATRIX #3")
    alms = malign.multi_align(
        ["Giacomo", "Яков", "Ιακωβος"], k=4, method="anw", matrix=full_matrix
    )
    print(tabulate_alms(alms))

    return full_matrix
Exemplo n.º 3
0
def with_ita_rus():
    ita_rus = malign.ScoringMatrix()
    ita_rus.load("docs/ita_rus.matrix")

    print("--------------------- ITA_RUS #1")
    alms = malign.multi_align(["atomo", "атом"], k=2, method="anw", matrix=ita_rus)
    print(tabulate_alms(alms))

    print("--------------------- ITA_RUS #2")
    alms = malign.multi_align(["Giacomo", "Яков"], k=4, method="anw", matrix=ita_rus)
    print(tabulate_alms(alms))
Exemplo n.º 4
0
def with_ita_grk():
    ita_grk = malign.ScoringMatrix()
    ita_grk.load("docs/ita_grk.matrix")

    print("--------------------- ITA_GRK #1")
    alms = malign.multi_align(["atomo", "ατομο"], k=2, method="anw", matrix=ita_grk)
    print(tabulate_alms(alms))

    print("--------------------- ITA_GRK #2")
    alms = malign.multi_align(["Giacomo", "Ιακωβος"], k=4, method="anw", matrix=ita_grk)
    print(tabulate_alms(alms))
Exemplo n.º 5
0
    def test_dumb_alignment(self):
        """
        Test results of alignment with the `dumb` method.
        """

        # Perform pairwise dumb alignment
        seq_a = "tra"
        seq_b = "fatata"
        alms = malign.multi_align([seq_a, seq_b], method="dumb")
        assert tuple(alms[0].seqs[0]) == ("-", "t", "r", "a", "-", "-")

        # Perform multiwise dumb alignment
        seqs = ["tra", "fra", "batata", "virp", "x"]
        alms = malign.multi_align(seqs, method="dumb")
        assert tuple(alms[0].seqs[3]) == ("-", "v", "i", "r", "p", "-")
Exemplo n.º 6
0
def debug_trigger():
    scorer = malign.ScoringMatrix()
    scorer.load("tests\\tiago.json")
    seqs = ["Giacomo", "Яков", "Ιακωβος"]
    yenksp_alms = malign.multi_align(seqs, method="yenksp", k=2, matrix=scorer)
    print(yenksp_alms[0].seqs)
    malign.utils.score_alignment(yenksp_alms[0].seqs, scorer)
Exemplo n.º 7
0
    def test_multialignment_linguistic(self):
        """
        Test results of alignment with `nw` method on multiwise linguistic data.
        """

        docs_path = Path(__file__).parent.parent
        filename_a = docs_path / "docs" / "ita_rus.matrix"
        filename_b = docs_path / "docs" / "ita_grk.matrix"
        ita_rus = malign.ScoringMatrix()
        ita_rus.load(filename_a.as_posix())
        ita_grk = malign.ScoringMatrix()
        ita_grk.load(filename_b.as_posix())

        # TODO: have/function methods that does this
        scores_ita_rus = {(key[0], key[1], None): value
                          for key, value in ita_rus.scores.items()}
        scores_ita_grk = {(key[0], None, key[1]): value
                          for key, value in ita_grk.scores.items()}
        scores = {**scores_ita_rus, **scores_ita_grk}

        full_matrix = malign.ScoringMatrix(scores,
                                           impute_method="bayesian_ridge")
        full_matrix["o", "в", "ο"] = -4
        full_matrix["i", "-", "Ι"] = -4
        full_matrix["c", "к", "κ"] = 10

        full_matrix.save("tiago.json")

        seqs = ["Giacomo", "Яков", "Ιακωβος"]
        nw_alms = malign.multi_align(seqs,
                                     method="anw",
                                     k=4,
                                     matrix=full_matrix)
        yenksp_alms = malign.multi_align(seqs,
                                         method="yenksp",
                                         k=2,
                                         matrix=full_matrix)

        # TODO: bayesian ridge should not give same score here, check
        # TODO: failing on GitHub
        assert tuple(nw_alms[0].seqs[1]) == ("Я", "к", "-", "о", "-", "-", "в")
        assert isclose(nw_alms[0].score, 2.12027, rel_tol=1e-05)

        assert tuple(yenksp_alms[0].seqs[1]) == ("Я", "-", "-", "к", "о", "в",
                                                 "-")
        assert isclose(yenksp_alms[0].score, 2.12027, rel_tol=1e-05)
Exemplo n.º 8
0
def pairwise_dumb():
    # Pairwise dumb alignment
    seq_a = "tra"
    seq_b = "fatata"

    print("--------------------- PAIRWISE DUMB")
    alms = malign.multi_align([seq_a, seq_b], method="dumb")
    print(tabulate_alms(alms))
Exemplo n.º 9
0
def dna2():
    seq_a = "GATTACA"
    seq_b = "ATTT"

    print("--------------------- DNA2")
    alms = malign.multi_align(
        [seq_a, seq_b], k=2, method="anw", matrix=malign.utils.DNA_MATRIX
    )
    print(tabulate_alms(alms))
Exemplo n.º 10
0
    def test_alignment_identity(self):
        """
        Test results of alignment with `nw` method on identity matrices.
        """

        seqs = ["VOLDEMORT", "WALDEMAR", "VLADIMIR", "VOLODYMIR"]
        voldemort_matrix = malign.utils.identity_matrix(seqs)

        nw_alms = malign.multi_align(seqs,
                                     method="anw",
                                     k=4,
                                     matrix=voldemort_matrix)
        assert tuple(nw_alms[0].seqs[0]) == (
            "V",
            "O",
            "L",
            "-",
            "D",
            "E",
            "M",
            "O",
            "R",
            "T",
        )
        assert isclose(nw_alms[0].score, 7.3, rel_tol=1e-05)

        yenksp_alms = malign.multi_align(seqs,
                                         method="yenksp",
                                         k=4,
                                         matrix=voldemort_matrix)

        assert tuple(yenksp_alms[0].seqs[0]) == (
            "V",
            "O",
            "L",
            "-",
            "D",
            "E",
            "M",
            "O",
            "R",
            "T",
        )
        assert isclose(yenksp_alms[0].score, 7.3, rel_tol=1e-05)
Exemplo n.º 11
0
def test_nw_pw_align():
    """
    Test `nw` pairwise alignment.
    """

    alms = malign.multi_align(["tra", "fata"], k=2, method="anw")
    assert len(alms) == 1
    assert tuple(alms[0].seqs[0]) == ("-", "-", "t", "r", "a")
    assert tuple(alms[0].seqs[1]) == ("f", "a", "t", "-", "a")
    assert alms[0].score == pytest.approx(-1.2)
Exemplo n.º 12
0
def test_tabulation():
    """
    Test alignment tabulation output
    """

    # TODO: assertMultiLineEqual() is failing, only keeping here for coverage

    alms = malign.multi_align(["tra", "fatata"], method="anw", k=3)
    output = malign.tabulate_alms(alms)

    ref = """
Exemplo n.º 13
0
def yenksp(full_matrix, voldemort_matrix):
    print("--------------------- YENKSP #1")
    alms = malign.multi_align(
        ["atomo", "атом", "ατομο"], k=2, method="yenksp", matrix=full_matrix
    )
    print(tabulate_alms(alms))

    print("--------------------- YENKSP #2")
    alms = malign.multi_align(
        ["Giacomo", "Яков", "Ιακωβος"], k=2, method="yenksp", matrix=full_matrix
    )
    print(tabulate_alms(alms))

    print("--------------------- YENKSP #3")
    alms = malign.multi_align(
        ["VOLDEMORT", "WALDEMAR", "VLADIMIR", "VOLODYMIR"],
        k=4,
        method="yenksp",
        matrix=voldemort_matrix,
    )
    print(tabulate_alms(alms))
Exemplo n.º 14
0
    def test_nw_alignment(self):
        """
        Test results of alignment with the `nw` method.
        """

        seq_a = "GATTACA"
        seq_b = "A"
        alms = malign.multi_align([seq_a, seq_b],
                                  k=2,
                                  method="anw",
                                  matrix=malign.utils.DNA_MATRIX)
        assert tuple(alms[0].seqs[1]) == ("-", "-", "-", "-", "-", "-", "A")
        assert isclose(alms[0].score, -3.857142, rel_tol=1e-05)

        seq_a = "GATTACA"
        seq_b = "ATTT"
        alms = malign.multi_align([seq_a, seq_b],
                                  k=2,
                                  method="anw",
                                  matrix=malign.utils.DNA_MATRIX)
        assert tuple(alms[0].seqs[1]) == ("-", "A", "T", "T", "-", "T", "-")
        assert isclose(alms[0].score, 1.0, rel_tol=1e-05)
Exemplo n.º 15
0
def modified_dna():
    modified_DNA = malign.utils.DNA_MATRIX.copy()
    modified_DNA["C", "T"] = -99.0

    seq_a = "GATTACA"
    seq_b = "ATTT"
    alms = malign.multi_align([seq_a, seq_b], k=4, method="anw", matrix=modified_DNA)

    print("--------------------- MODIFIED DNA #1")
    print(tabulate_alms(alms))

    print("--------------------- MODIFIED DNA #2")
    alms = malign.multi_align(
        ["GATTACA", "GATTATA"], k=2, method="anw", matrix=modified_DNA
    )
    print(tabulate_alms(alms))

    print("--------------------- MODIFIED DNA #3")
    alms = malign.multi_align(
        ["GATTATA", "GATTACA"], k=2, method="anw", matrix=modified_DNA
    )
    print(tabulate_alms(alms))
Exemplo n.º 16
0
    def test_nw_alignment_asymmetric(self):
        """
        Test results of alignment with `nw` method with asymmetric matrices.
        """

        # Perform pairwise, assymetric NW
        matrix = malign.utils.DNA_MATRIX.copy()
        matrix["C", "T"] = -99
        seq_a = "GATTACA"
        seq_b = "ATTT"
        alms = malign.multi_align([seq_a, seq_b],
                                  k=4,
                                  method="anw",
                                  matrix=matrix)
        assert tuple(alms[0].seqs[1]) == ("-", "A", "T", "T", "T", "-", "-")
        assert isclose(alms[0].score, 0.4285714, rel_tol=1e-05)
Exemplo n.º 17
0
    def test_nw_alignment_linguistic(self):
        """
        Test results of alignment with `nw` method on linguistic data.
        """

        filename = Path(__file__).parent.parent
        filename = filename / "docs" / "ita_rus.matrix"
        ita_rus = malign.ScoringMatrix()
        ita_rus.load(filename.as_posix())

        alms = malign.multi_align(["Giacomo", "Яков"],
                                  k=4,
                                  method="anw",
                                  matrix=ita_rus)
        assert tuple(alms[0].seqs[1]) == ("-", "Я", "-", "к", "о", "в", "-")
        assert isclose(alms[0].score, 2.8571428, rel_tol=1e-05)
Exemplo n.º 18
0
def voldemort():
    seqs = ["VOLDEMORT", "WALDEMAR", "VLADIMIR", "VOLODYMIR"]
    voldemort_matrix = malign.utils.identity_matrix(seqs)

    print("--------------------- VOLDEMORT #1")
    for key in [
        ("T", "-", "-", "-"),
        ("T", "R", "R", "R"),
        ("-", "R", "R", "R"),
        ("R", "R", "R", "R"),
        ("O", "A", "I", "I"),
    ]:
        print(key, voldemort_matrix[key])

    print("--------------------- VOLDEMORT #2")
    alms = malign.multi_align(seqs, k=4, method="anw", matrix=voldemort_matrix)
    print(tabulate_alms(alms))

    return voldemort_matrix
Exemplo n.º 19
0
def test_yenksp_pw_align():
    """
    Test `kbest` pairwise alignment.
    """

    # Test with basic alignment, no scorer
    alms = malign.multi_align(["tra", "fata"], k=4, method="yenksp")
    assert len(alms) == 4
    assert tuple(alms[0].seqs[0]) == ("t", "r", "-", "a")
    assert tuple(alms[0].seqs[1]) == ("f", "a", "t", "a")
    assert alms[0].score == pytest.approx(-0.95)

    # More complex test with DNA scorer
    dna_seq1 = "TGGACCCGGGAAGGTGACCCAC"
    dna_seq2 = "TTACCACCGGCGCGAACCCCCCCCC"
    graph = malign.yenksp.compute_graph(dna_seq1, dna_seq2,
                                        malign.utils.DNA_MATRIX)

    dest = (len(dna_seq1), len(dna_seq2))
    aligns = malign.yenksp.align(graph, (0, 0), dest, dna_seq1, dna_seq2,
                                 malign.utils.DNA_MATRIX)
    assert "".join(aligns[0].seqs[0]) == "TGGAC-CCGG-G-AAGGTGACCCAC"
    assert "".join(aligns[0].seqs[1]) == "TTACCACCGGCGCGAACCCCCCCCC"
    assert aligns[0].score == pytest.approx(2.32)
Exemplo n.º 20
0
def multiwise_dumb():
    seqs = ["tra", "fra", "batata", "virp", "x"]

    print("--------------------- MULTIWISE DUMB")
    alms = malign.multi_align(seqs, method="dumb")
    print(tabulate_alms(alms))