def test_Seq__split_id_line(line, exp_id, exp_desc): """ Are we splitting the fasta header lines correctly? """ act_id, act_desc = Seq._split_id_line(line) assert act_id == exp_id assert act_desc == exp_desc return
def test_Seq_parse(fasta, expected): actual = Seq.parse(fasta) # We want parse to return a generator over the sequences. assert isinstance(actual, GeneratorType) # Just this time, get a list so we can compare lenths. actual = list(actual) assert len(actual) == len(expected) for act, exp in zip(actual, expected): assert act.id == exp.id assert act.desc == exp.desc assert act.seq == exp.seq return
def main(): """ The command line version of pcalign. This is the entrypoint specified in setup.py as `pcalign`. """ args = cli(prog=sys.argv[0], args=sys.argv[1:]) if args.verbose > 1: log_level = logging.DEBUG elif args.verbose > 0: log_level = logging.INFO else: log_level = logging.WARNING logger.setLevel(log_level) logger.info("Running pcalign") logger.info("Using parameters:") logger.info("- seq1 = %s", args.seq1.name) logger.info("- seq2 = %s", args.seq2.name) logger.info("- output = %s", args.output.name) logger.info("- match score = %d", args.match_score) logger.info("- mismatch score = %d", args.mismatch_score) logger.info("- indel_score = %d", args.indel_score) logger.info("Parsing sequences") try: # Use this seq1_loaded thing to see which file fails, if one does. seq1_loaded = False # next() will return the next element of an iterator. # I'm using it to pop off the first sequence. # If there were more sequences, we simply ignore them seq1 = Seq.read(args.seq1) seq1_loaded = True seq2 = Seq.read(args.seq2) except ValueError as e: # next will yield this exception if the iterator is empty. # This will happen if there are no fasta headers in the file. # To handle this user error, we log the problem and stop the program. # It's common to handle errors like this when we're dealing with user # input. if seq1_loaded: fname = args.seq2.name else: fname = args.seq1.name logger.error("Parsing fasta files failed.") logger.error("Offending file is '%s'", fname) logger.error(e) sys.exit(1) logger.info("Aligning sequences") aligned = align(seq1, seq2, match_reward=args.match_score, mismatch_penalty=args.mismatch_score, indel_penalty=args.indel_score) logger.info("Writing alignments") for seq in aligned: print(seq, file=args.output) return
def test_Seq_read(fasta, expected): actual = Seq.read(fasta) assert actual.id == expected.id assert actual.desc == expected.desc assert actual.seq == expected.seq return
def test_Seq_getitem(): """ Simple wrapper function, so gets a simple test. """ seq = Seq(id="test", desc=None, seq="aaabbbbbaaa") assert seq[3:].seq == "bbbbbaaa" assert seq[1].seq == "a" return
def test_Seq_len(): """ Simple wrapper function, so gets a simple test. """ seq = Seq(id="test", desc=None, seq="four") assert len(seq) == 4 return
TATCGATTAAGGTGCGTTTCAAGCGTTGACATTAAAGCCGAAACGCAAGGGCAATGCAAG TTCTGGTGTAATCATGGAAGTTAATGTTGCTCGCGTGTGTCAATGCTGGGTACAGGAGAA TAGTGTGTATGCGTGTCAGATCCCCCAGCCGCAAAGTCCCCTTCAGTCGTGCCAAGGCGG GAAATTCCAACTCTCGTGTCCCCATTCCCGCGCCTTGCTAAAGACATTACTAGATACGCT TGCTTACGGAGCTACGAAACATGTGTGGCAACTCTCCAGTGCGCAGCGCCCCATAGGTTA GGCACGGAGACAGTTCGCGTACCAGGTTCTAAATTGAGTAGGTTCGCCATGAGCAGTTAC CACATACTACCTTGTCTGACACAGGTGACATACCGGCGGGCTGAGTATTGTGATCATGGT GCGTATATATTGTTTCCCGTCCGTCCCCCCGGTGCACGAACTATCATCTAGCCGGCTATT TCGTTCAGTTAGCGTAGCTCGTTGCAGAGAAGTGAATTACGTTAAGGGGATGAGCGCCCA GTCCTCGCCCTCGCCGCTGCCATGGATATAGCAACGTT""", """>seq3 ATCCAGCT""" ] expected = [ Seq(id="seq1", desc="testing", seq="TTTCCGGGGCACATAATCTTCAGCCGGGCGC"), Seq(id="seq2", desc="testing2", seq=("ACTAAGTAGTCTTTTTGAGGTCGTTAACTCTTATAAAGCGGCGCAGCATACCTCCCGAGA" "CTATAGTTTTTCTCAATGCTGAACGCCTCATGGCTTGCCGGGCTCAATGCTGTAATCTGT" "CTCGGTTCCTGTATACTAGCCGGTACTCCCCAGTTAATTCGACTCGTTGTTTCTCTGTAT" "GTCTCCGATACATCCTAATATAATGTCCCCATGCTTACGCCTATAAAATCGCAATACTGT" "CTAAGGGAGGTCACTTAATTGTGAAGAGAGCCTAGACAGCGTTCGATTTAGAGCGTCCGT" "ACCAGGATCTTCTATCGGGCTCTGTGATGATTATAGCTATCGCTGACCGCCGGCTCGTCC" "TAGCGTTTAATACGGCGTACCGACCACTAGGGGGGAGGAAGTAGTTACCATTATCATCCA" "TATCGATTAAGGTGCGTTTCAAGCGTTGACATTAAAGCCGAAACGCAAGGGCAATGCAAG" "TTCTGGTGTAATCATGGAAGTTAATGTTGCTCGCGTGTGTCAATGCTGGGTACAGGAGAA" "TAGTGTGTATGCGTGTCAGATCCCCCAGCCGCAAAGTCCCCTTCAGTCGTGCCAAGGCGG" "GAAATTCCAACTCTCGTGTCCCCATTCCCGCGCCTTGCTAAAGACATTACTAGATACGCT" "TGCTTACGGAGCTACGAAACATGTGTGGCAACTCTCCAGTGCGCAGCGCCCCATAGGTTA" "GGCACGGAGACAGTTCGCGTACCAGGTTCTAAATTGAGTAGGTTCGCCATGAGCAGTTAC"
(6, 6): [(5, 5)], (7, 7): [(6, 6)] }, [("G-ATTACA", "GCATG-CU"), ("G-ATTACA", "GCA-TGCU"), ("G-ATTACA", "GCAT-GCU")]), ]) def test_backtrack(seq1, seq2, movements, expected): alignments = backtrack(movements, seq1, seq2) assert len(alignments) == len(expected) for alignment in alignments: assert alignment in expected return @pytest.mark.parametrize("seq1,seq2,expected", [( Seq(id="one", desc=None, seq="GATTACA"), Seq(id="two", desc="desc", seq="GCATGCU"), [ Seq(id="a_0|one", desc=None, seq="G-ATTACA"), Seq(id="a_0|two", desc="desc", seq="GCATG-CU"), Seq(id="a_1|one", desc=None, seq="G-ATTACA"), Seq(id="a_1|two", desc="desc", seq="GCA-TGCU"), Seq(id="a_2|one", desc=None, seq="G-ATTACA"), Seq(id="a_2|two", desc="desc", seq="GCAT-GCU") ], )]) def test_align(seq1, seq2, expected): actual = align(seq1, seq2, match_reward=1, mismatch_penalty=-1,