示例#1
0
def test_contig_coverage_report_offset_reads(projects, sequence_report):
    hxb2_name = 'HIV1-B-FR-K03455-seed'
    ref = projects.getReference(hxb2_name)
    seq = ref[50:150]
    seed_nucs = ([('C', SeedNucleotide())] * 50 +
                 [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq))
    expected_head = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-my-contig,HIV1-B-FR-K03455-seed,51,51,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,52,52,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,53,53,0,1,M
"""

    report_file = StringIO()
    sequence_report.projects = projects
    sequence_report.write_genome_coverage_header(report_file)
    sequence_report.write_sequence_coverage_counts('1-my-contig',
                                                   hxb2_name,
                                                   seq,
                                                   consensus_offset=50,
                                                   seed_nucs=seed_nucs)

    report_text = report_file.getvalue()
    head = report_text[:len(expected_head)]
    assert head == expected_head
示例#2
0
def test_write_sequence_coverage_counts_with_some_deletions(
        projects, sequence_report):
    """ Some reads had deletions at a position. """
    hxb2_name = 'HIV1-B-FR-K03455-seed'
    ref = projects.getReference(hxb2_name)
    seq = ref[100:150]
    seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq)
    seed_nucs[5] = ('G', SeedNucleotide(Counter({'G': 4, '-': 2})))
    expected_head = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-my-contig,HIV1-B-FR-K03455-seed,1,101,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,2,102,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,3,103,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,4,104,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,5,105,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,6,106,2,6,M
1-my-contig,HIV1-B-FR-K03455-seed,7,107,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,8,108,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,9,109,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,10,110,0,1,M
"""

    report_file = StringIO()
    sequence_report.projects = projects
    sequence_report.write_genome_coverage_header(report_file)
    sequence_report.write_sequence_coverage_counts('1-my-contig',
                                                   hxb2_name,
                                                   seq,
                                                   seed_nucs=seed_nucs)

    report_text = report_file.getvalue()
    head = report_text[:len(expected_head)]
    assert head == expected_head
示例#3
0
def test_write_sequence_coverage_counts_with_coverage(projects,
                                                      sequence_report):
    hxb2_name = 'HIV1-B-FR-K03455-seed'
    ref = projects.getReference(hxb2_name)
    seq = ref[100:150] + ref[1000:1050]
    seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * 100
    seed_nucs[2] = ('G', SeedNucleotide(Counter({'G': 4})))
    seed_nucs[98] = ('T', SeedNucleotide(Counter({'T': 5})))
    expected_head = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-my-contig,HIV1-B-FR-K03455-seed,1,101,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,2,102,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,3,103,0,4,M
1-my-contig,HIV1-B-FR-K03455-seed,4,104,0,1,M
"""
    expected_tail = """\
1-my-contig,HIV1-B-FR-K03455-seed,98,1048,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,99,1049,0,5,M
1-my-contig,HIV1-B-FR-K03455-seed,100,1050,0,1,M
"""

    report_file = StringIO()
    sequence_report.projects = projects
    sequence_report.write_genome_coverage_header(report_file)
    sequence_report.write_sequence_coverage_counts('1-my-contig',
                                                   hxb2_name,
                                                   seq,
                                                   seed_nucs=seed_nucs)

    report_text = report_file.getvalue()
    head = report_text[:len(expected_head)]
    tail = report_text[-len(expected_tail):]
    assert head == expected_head
    assert tail == expected_tail
示例#4
0
def choose_consensus(nuc_row: dict) -> str:
    coverage = int(nuc_row['coverage'])
    if coverage < 100:
        return 'x'
    nuc = SeedNucleotide()
    for nuc_seq in nuc.COUNTED_NUCS:
        source_nuc = 'del' if nuc_seq == '-' else nuc_seq
        nuc.count_nucleotides(nuc_seq, int(nuc_row[source_nuc]))
    consensus = nuc.get_consensus(MAX_CUTOFF)
    if int(nuc_row['ins']) > coverage / 2:
        consensus += 'i'
    return consensus
示例#5
0
def test_contig_coverage_report_past_reference_start(projects,
                                                     sequence_report):
    hxb2_name = 'HIV1-B-FR-K03455-seed'
    ref = projects.getReference(hxb2_name)
    seq = 'CGTAC' + ref[:100]
    seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq)
    # link is (M)apped, (U)nmapped, or (I)nserted
    expected_head = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-my-contig,HIV1-B-FR-K03455-seed,1,-4,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,2,-3,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,3,-2,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,4,-1,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,5,0,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,6,1,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,7,2,0,1,M
"""

    report_file = StringIO()
    sequence_report.projects = projects
    sequence_report.write_genome_coverage_header(report_file)
    sequence_report.write_sequence_coverage_counts('1-my-contig',
                                                   hxb2_name,
                                                   seq,
                                                   seed_nucs=seed_nucs)

    report_text = report_file.getvalue()
    head = report_text[:len(expected_head)]
    assert head == expected_head
示例#6
0
def test_contig_coverage_report_past_reference_end(projects, sequence_report):
    hxb2_name = 'HIV1-B-FR-K03455-seed'
    ref = projects.getReference(hxb2_name)
    assert len(ref) == 9719
    seq = ref[-100:] + 'CGTAC'
    seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq)
    expected_tail = """\
1-my-contig,HIV1-B-FR-K03455-seed,99,9718,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,100,9719,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,101,9720,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,102,9721,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,103,9722,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,104,9723,0,1,U
1-my-contig,HIV1-B-FR-K03455-seed,105,9724,0,1,U
"""

    report_file = StringIO()
    sequence_report.projects = projects
    sequence_report.write_genome_coverage_header(report_file)
    sequence_report.write_sequence_coverage_counts('1-my-contig',
                                                   hxb2_name,
                                                   seq,
                                                   seed_nucs=seed_nucs)

    report_text = report_file.getvalue()
    tail = report_text[-len(expected_tail):]
    assert tail == expected_tail
示例#7
0
def test_write_sequence_coverage_counts_with_insert(projects, sequence_report):
    hxb2_name = 'HIV1-B-FR-K03455-seed'
    ref = projects.getReference(hxb2_name)
    seq = ref[100:110] + 'ACTGA' + ref[110:160]
    seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq)
    seed_nucs[12] = ('T', SeedNucleotide(Counter({'T': 4})))
    expected_head = """\
contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link
1-my-contig,HIV1-B-FR-K03455-seed,1,101,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,2,102,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,3,103,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,4,104,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,5,105,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,6,106,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,7,107,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,8,108,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,9,109,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,10,110,0,1,M
1-my-contig,HIV1-B-FR-K03455-seed,11,,0,1,I
1-my-contig,HIV1-B-FR-K03455-seed,12,,0,1,I
1-my-contig,HIV1-B-FR-K03455-seed,13,,0,4,I
1-my-contig,HIV1-B-FR-K03455-seed,14,,0,1,I
1-my-contig,HIV1-B-FR-K03455-seed,15,,0,1,I
1-my-contig,HIV1-B-FR-K03455-seed,16,111,0,1,M
"""

    report_file = StringIO()
    sequence_report.projects = projects
    sequence_report.write_genome_coverage_header(report_file)
    sequence_report.write_sequence_coverage_counts('1-my-contig',
                                                   hxb2_name,
                                                   seq,
                                                   seed_nucs=seed_nucs)

    report_text = report_file.getvalue()
    head = report_text[:len(expected_head)]
    assert head == expected_head