Пример #1
0
def test_parse_fasta__multiple_records():
    lines    = [">first\n",  "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n",
                ">Second XT:1:0\n", "GAGAGCTCAGCTAAC\n",
                ">Third\n",  "CGCTGACCAAAAACGGACAG\n", "GGCATTCGGC\n"]
    expected = [(("first", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA"),
                (("Second", "XT:1:0"), "GAGAGCTCAGCTAAC"),
                (("Third", None), "CGCTGACCAAAAACGGACAGGGCATTCGGC")]
    assert_list_equals(parse_fasta(lines), expected)
Пример #2
0
def test_parse_fasta__multiple_records():
    lines = [
        ">first\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n",
        ">Second XT:1:0\n", "GAGAGCTCAGCTAAC\n", ">Third\n",
        "CGCTGACCAAAAACGGACAG\n", "GGCATTCGGC\n"
    ]
    expected = [(("first", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA"),
                (("Second", "XT:1:0"), "GAGAGCTCAGCTAAC"),
                (("Third", None), "CGCTGACCAAAAACGGACAGGGCATTCGGC")]
    assert_list_equals(parse_fasta(lines), expected)
Пример #3
0
def parse_msa(lines, read_meta = False):
    """Parses a MSA from a file/list of lines, and returns a dictionary
    of names to sequences. If read_meta is True, meta information included
    after the first space in header of each sequence:
      >NAME META-INFORMATION
      SEQUENCE
    As suggested above, sequences are expected to be in FASTA format."""
    msa, metas = {}, {}
    for ((name, meta), sequence) in parse_fasta(lines):
        if name in msa:
            raise MSAError("Duplicate names found, cannot be represented as MSA: " + name)
        msa[name] = sequence
        metas[name] = meta

    validate_msa(msa)
    if read_meta:
        return msa, metas
    return msa
Пример #4
0
def parse_msa(lines, read_meta=False):
    """Parses a MSA from a file/list of lines, and returns a dictionary
    of names to sequences. If read_meta is True, meta information included
    after the first space in header of each sequence:
      >NAME META-INFORMATION
      SEQUENCE
    As suggested above, sequences are expected to be in FASTA format."""
    msa, metas = {}, {}
    for ((name, meta), sequence) in parse_fasta(lines):
        if name in msa:
            raise MSAError(
                "Duplicate names found, cannot be represented as MSA: " + name)
        msa[name] = sequence
        metas[name] = meta

    validate_msa(msa)
    if read_meta:
        return msa, metas
    return msa
Пример #5
0
def test_parse_fasta__single_record():
    lines    = [">single\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n"]
    expected = [(("single", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA")]
    assert_list_equals(parse_fasta(lines), expected)
Пример #6
0
def test_parse_fasta__no_records():
    assert_list_equals(parse_fasta([]), [])
Пример #7
0
def test_parse_fasta__empty_name__alone():
    lines = [">\n", "ACGT\n"]
    list(parse_fasta(lines))
Пример #8
0
def test_parse_fasta__missing_name__alone():
    lines = ["ACGT\n"]
    list(parse_fasta(lines))
Пример #9
0
def test_parse_fasta__empty_record__middle():
    lines = [">fasta0\n", "ACGT\n", ">fasta1\n", ">fasta2\n", "AGTC\n"]
    list(parse_fasta(lines))
Пример #10
0
def test_parse_empty_record_last():
    lines = [">fasta1\n", "ACGT\n", ">fasta2\n"]
    list(parse_fasta(lines))
Пример #11
0
def test_parse_fasta__empty_record__middle():
    lines = [">fasta0\n", "ACGT\n", ">fasta1\n", ">fasta2\n", "AGTC\n"]
    list(parse_fasta(lines))
Пример #12
0
def test_parse_fasta__empty_record_name_only__first():
    list(parse_fasta([">fasta1\n", ">fasta2\n", "AGTC\n"]))
Пример #13
0
def test_parse_fasta__empty_record_name_only__nothing_else():
    list(parse_fasta([">fasta1\n"]))
Пример #14
0
def test_parse_fasta__single_record():
    lines = [">single\n", "TGTTCTCCACCGTGCACAAC\n", "CCTTCATCCA\n"]
    expected = [(("single", None), "TGTTCTCCACCGTGCACAACCCTTCATCCA")]
    assert_list_equals(parse_fasta(lines), expected)
Пример #15
0
def test_parse_fasta__no_records():
    assert_list_equals(parse_fasta([]), [])
Пример #16
0
def test_parse_fasta__empty_record_name_only__nothing_else():
    list(parse_fasta([">fasta1\n"]))
Пример #17
0
def test_parse_fasta__empty_record_name_only__first():
    list(parse_fasta([">fasta1\n", ">fasta2\n", "AGTC\n"]))
Пример #18
0
def test_parse_fasta__missing_name__alone():
    lines = ["ACGT\n"]
    list(parse_fasta(lines))
Пример #19
0
def test_parse_empty_record_last():
    lines = [">fasta1\n", "ACGT\n", ">fasta2\n"]
    list(parse_fasta(lines))
Пример #20
0
def test_parse_fasta__missing_name__with_others():
    lines = ["ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))
Пример #21
0
def test_parse_fasta__missing_name__with_others():
    lines = ["ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))
Пример #22
0
def test_parse_fasta__empty_name__alone():
    lines = [">\n", "ACGT\n"]
    list(parse_fasta(lines))
Пример #23
0
def test_parse_fasta__empty_name__with_others():
    lines = [">\n", "ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))
Пример #24
0
def test_parse_fasta__empty_name__with_others():
    lines = [">\n", "ACGT\n", ">Foo\n", "ACGGTA\n"]
    list(parse_fasta(lines))