Пример #1
0
def test_Seq__split_id_line(line, exp_id, exp_desc):
    """ Are we splitting the fasta header lines correctly? """
    act_id, act_desc = Seq._split_id_line(line)

    assert act_id == exp_id
    assert act_desc == exp_desc
    return
Пример #2
0
def test_Seq_parse(fasta, expected):
    actual = Seq.parse(fasta)

    # We want parse to return a generator over the sequences.
    assert isinstance(actual, GeneratorType)

    # Just this time, get a list so we can compare lenths.
    actual = list(actual)

    assert len(actual) == len(expected)

    for act, exp in zip(actual, expected):
        assert act.id == exp.id
        assert act.desc == exp.desc
        assert act.seq == exp.seq
    return
Пример #3
0
def main():
    """ The command line version of pcalign.

    This is the entrypoint specified in setup.py as `pcalign`.
    """

    args = cli(prog=sys.argv[0], args=sys.argv[1:])

    if args.verbose > 1:
        log_level = logging.DEBUG
    elif args.verbose > 0:
        log_level = logging.INFO
    else:
        log_level = logging.WARNING

    logger.setLevel(log_level)
    logger.info("Running pcalign")
    logger.info("Using parameters:")
    logger.info("- seq1 = %s", args.seq1.name)
    logger.info("- seq2 = %s", args.seq2.name)
    logger.info("- output = %s", args.output.name)
    logger.info("- match score = %d", args.match_score)
    logger.info("- mismatch score = %d", args.mismatch_score)
    logger.info("- indel_score = %d", args.indel_score)

    logger.info("Parsing sequences")
    try:
        # Use this seq1_loaded thing to see which file fails, if one does.
        seq1_loaded = False

        # next() will return the next element of an iterator.
        # I'm using it to pop off the first sequence.
        # If there were more sequences, we simply ignore them
        seq1 = Seq.read(args.seq1)
        seq1_loaded = True

        seq2 = Seq.read(args.seq2)
    except ValueError as e:
        # next will yield this exception if the iterator is empty.
        # This will happen if there are no fasta headers in the file.
        # To handle this user error, we log the problem and stop the program.
        # It's common to handle errors like this when we're dealing with user
        # input.
        if seq1_loaded:
            fname = args.seq2.name
        else:
            fname = args.seq1.name

        logger.error("Parsing fasta files failed.")
        logger.error("Offending file is '%s'", fname)
        logger.error(e)
        sys.exit(1)

    logger.info("Aligning sequences")
    aligned = align(seq1,
                    seq2,
                    match_reward=args.match_score,
                    mismatch_penalty=args.mismatch_score,
                    indel_penalty=args.indel_score)

    logger.info("Writing alignments")
    for seq in aligned:
        print(seq, file=args.output)

    return
Пример #4
0
def test_Seq_read(fasta, expected):
    actual = Seq.read(fasta)
    assert actual.id == expected.id
    assert actual.desc == expected.desc
    assert actual.seq == expected.seq
    return
Пример #5
0
def test_Seq_getitem():
    """ Simple wrapper function, so gets a simple test. """
    seq = Seq(id="test", desc=None, seq="aaabbbbbaaa")
    assert seq[3:].seq == "bbbbbaaa"
    assert seq[1].seq == "a"
    return
Пример #6
0
def test_Seq_len():
    """ Simple wrapper function, so gets a simple test. """
    seq = Seq(id="test", desc=None, seq="four")
    assert len(seq) == 4
    return
Пример #7
0
TATCGATTAAGGTGCGTTTCAAGCGTTGACATTAAAGCCGAAACGCAAGGGCAATGCAAG
TTCTGGTGTAATCATGGAAGTTAATGTTGCTCGCGTGTGTCAATGCTGGGTACAGGAGAA
TAGTGTGTATGCGTGTCAGATCCCCCAGCCGCAAAGTCCCCTTCAGTCGTGCCAAGGCGG
GAAATTCCAACTCTCGTGTCCCCATTCCCGCGCCTTGCTAAAGACATTACTAGATACGCT
TGCTTACGGAGCTACGAAACATGTGTGGCAACTCTCCAGTGCGCAGCGCCCCATAGGTTA
GGCACGGAGACAGTTCGCGTACCAGGTTCTAAATTGAGTAGGTTCGCCATGAGCAGTTAC
CACATACTACCTTGTCTGACACAGGTGACATACCGGCGGGCTGAGTATTGTGATCATGGT
GCGTATATATTGTTTCCCGTCCGTCCCCCCGGTGCACGAACTATCATCTAGCCGGCTATT
TCGTTCAGTTAGCGTAGCTCGTTGCAGAGAAGTGAATTACGTTAAGGGGATGAGCGCCCA
GTCCTCGCCCTCGCCGCTGCCATGGATATAGCAACGTT""",
""">seq3
ATCCAGCT"""
]

expected = [
    Seq(id="seq1", desc="testing", seq="TTTCCGGGGCACATAATCTTCAGCCGGGCGC"),
    Seq(id="seq2",
        desc="testing2",
        seq=("ACTAAGTAGTCTTTTTGAGGTCGTTAACTCTTATAAAGCGGCGCAGCATACCTCCCGAGA"
             "CTATAGTTTTTCTCAATGCTGAACGCCTCATGGCTTGCCGGGCTCAATGCTGTAATCTGT"
             "CTCGGTTCCTGTATACTAGCCGGTACTCCCCAGTTAATTCGACTCGTTGTTTCTCTGTAT"
             "GTCTCCGATACATCCTAATATAATGTCCCCATGCTTACGCCTATAAAATCGCAATACTGT"
             "CTAAGGGAGGTCACTTAATTGTGAAGAGAGCCTAGACAGCGTTCGATTTAGAGCGTCCGT"
             "ACCAGGATCTTCTATCGGGCTCTGTGATGATTATAGCTATCGCTGACCGCCGGCTCGTCC"
             "TAGCGTTTAATACGGCGTACCGACCACTAGGGGGGAGGAAGTAGTTACCATTATCATCCA"
             "TATCGATTAAGGTGCGTTTCAAGCGTTGACATTAAAGCCGAAACGCAAGGGCAATGCAAG"
             "TTCTGGTGTAATCATGGAAGTTAATGTTGCTCGCGTGTGTCAATGCTGGGTACAGGAGAA"
             "TAGTGTGTATGCGTGTCAGATCCCCCAGCCGCAAAGTCCCCTTCAGTCGTGCCAAGGCGG"
             "GAAATTCCAACTCTCGTGTCCCCATTCCCGCGCCTTGCTAAAGACATTACTAGATACGCT"
             "TGCTTACGGAGCTACGAAACATGTGTGGCAACTCTCCAGTGCGCAGCGCCCCATAGGTTA"
             "GGCACGGAGACAGTTCGCGTACCAGGTTCTAAATTGAGTAGGTTCGCCATGAGCAGTTAC"
Пример #8
0
            (6, 6): [(5, 5)],
            (7, 7): [(6, 6)]
        }, [("G-ATTACA", "GCATG-CU"), ("G-ATTACA", "GCA-TGCU"),
            ("G-ATTACA", "GCAT-GCU")]),
    ])
def test_backtrack(seq1, seq2, movements, expected):
    alignments = backtrack(movements, seq1, seq2)

    assert len(alignments) == len(expected)
    for alignment in alignments:
        assert alignment in expected
    return


@pytest.mark.parametrize("seq1,seq2,expected", [(
    Seq(id="one", desc=None, seq="GATTACA"),
    Seq(id="two", desc="desc", seq="GCATGCU"),
    [
        Seq(id="a_0|one", desc=None, seq="G-ATTACA"),
        Seq(id="a_0|two", desc="desc", seq="GCATG-CU"),
        Seq(id="a_1|one", desc=None, seq="G-ATTACA"),
        Seq(id="a_1|two", desc="desc", seq="GCA-TGCU"),
        Seq(id="a_2|one", desc=None, seq="G-ATTACA"),
        Seq(id="a_2|two", desc="desc", seq="GCAT-GCU")
    ],
)])
def test_align(seq1, seq2, expected):
    actual = align(seq1,
                   seq2,
                   match_reward=1,
                   mismatch_penalty=-1,