示例#1
0
def test_parse_fasta__multiple_records():
    lines    = [">first\n",  "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n",
                ">Second XT:1:0\n", "GAGAGCTCAGCTAAC\n",
                ">Third\n",  "CGCTGACCAAAAACGGACAG\n", "GGCATTCGGC\n"]
    expected = [(("first", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA"),
                (("Second", "XT:1:0"), "GAGAGCTCAGCTAAC"),
                (("Third", None), "CGCTGACCAAAAACGGACAGGGCATTCGGC")]
    assert_list_equals(parse_fasta(lines), expected)
示例#2
0
def test_parse_fasta__multiple_records():
    lines = [
        ">first\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n",
        ">Second XT:1:0\n", "GAGAGCTCAGCTAAC\n", ">Third\n",
        "CGCTGACCAAAAACGGACAG\n", "GGCATTCGGC\n"
    ]
    expected = [(("first", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA"),
                (("Second", "XT:1:0"), "GAGAGCTCAGCTAAC"),
                (("Third", None), "CGCTGACCAAAAACGGACAGGGCATTCGGC")]
    assert_list_equals(parse_fasta(lines), expected)
示例#3
0
文件: msa.py 项目: schae234/pypeline
def parse_msa(lines, read_meta = False):
    """Parses a MSA from a file/list of lines, and returns a dictionary
    of names to sequences. If read_meta is True, meta information included
    after the first space in header of each sequence:
      >NAME META-INFORMATION
      SEQUENCE
    As suggested above, sequences are expected to be in FASTA format."""
    msa, metas = {}, {}
    for ((name, meta), sequence) in parse_fasta(lines):
        if name in msa:
            raise MSAError("Duplicate names found, cannot be represented as MSA: " + name)
        msa[name] = sequence
        metas[name] = meta

    validate_msa(msa)
    if read_meta:
        return msa, metas
    return msa
示例#4
0
def parse_msa(lines, read_meta=False):
    """Parses a MSA from a file/list of lines, and returns a dictionary
    of names to sequences. If read_meta is True, meta information included
    after the first space in header of each sequence:
      >NAME META-INFORMATION
      SEQUENCE
    As suggested above, sequences are expected to be in FASTA format."""
    msa, metas = {}, {}
    for ((name, meta), sequence) in parse_fasta(lines):
        if name in msa:
            raise MSAError(
                "Duplicate names found, cannot be represented as MSA: " + name)
        msa[name] = sequence
        metas[name] = meta

    validate_msa(msa)
    if read_meta:
        return msa, metas
    return msa
示例#5
0
def test_parse_fasta__single_record():
    lines    = [">single\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n"]
    expected = [(("single", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA")]
    assert_list_equals(parse_fasta(lines), expected)
示例#6
0
def test_parse_fasta__no_records():
    assert_list_equals(parse_fasta([]), [])
示例#7
0
def test_parse_fasta__empty_name__alone():
    lines = [">\n", "ACGT\n"]
    list(parse_fasta(lines))
示例#8
0
def test_parse_fasta__missing_name__alone():
    lines = ["ACGT\n"]
    list(parse_fasta(lines))
示例#9
0
def test_parse_fasta__empty_record__middle():
    lines = [">fasta0\n", "ACGT\n", ">fasta1\n", ">fasta2\n", "AGTC\n"]
    list(parse_fasta(lines))
示例#10
0
def test_parse_empty_record_last():
    lines = [">fasta1\n", "ACGT\n", ">fasta2\n"]
    list(parse_fasta(lines))
示例#11
0
def test_parse_fasta__empty_record__middle():
    lines = [">fasta0\n", "ACGT\n", ">fasta1\n", ">fasta2\n", "AGTC\n"]
    list(parse_fasta(lines))
示例#12
0
def test_parse_fasta__empty_record_name_only__first():
    list(parse_fasta([">fasta1\n", ">fasta2\n", "AGTC\n"]))
示例#13
0
def test_parse_fasta__empty_record_name_only__nothing_else():
    list(parse_fasta([">fasta1\n"]))
示例#14
0
def test_parse_fasta__single_record():
    lines = [">single\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n"]
    expected = [(("single", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA")]
    assert_list_equals(parse_fasta(lines), expected)
示例#15
0
def test_parse_fasta__no_records():
    assert_list_equals(parse_fasta([]), [])
示例#16
0
def test_parse_fasta__empty_record_name_only__nothing_else():
    list(parse_fasta([">fasta1\n"]))
示例#17
0
def test_parse_fasta__empty_record_name_only__first():
    list(parse_fasta([">fasta1\n", ">fasta2\n", "AGTC\n"]))
示例#18
0
def test_parse_fasta__missing_name__alone():
    lines = ["ACGT\n"]
    list(parse_fasta(lines))
示例#19
0
def test_parse_empty_record_last():
    lines = [">fasta1\n", "ACGT\n", ">fasta2\n"]
    list(parse_fasta(lines))
示例#20
0
def test_parse_fasta__missing_name__with_others():
    lines = ["ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))
示例#21
0
def test_parse_fasta__missing_name__with_others():
    lines = ["ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))
示例#22
0
def test_parse_fasta__empty_name__alone():
    lines = [">\n", "ACGT\n"]
    list(parse_fasta(lines))
示例#23
0
def test_parse_fasta__empty_name__with_others():
    lines = [">\n", "ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))
示例#24
0
def test_parse_fasta__empty_name__with_others():
    lines = [">\n", "ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))