示例#1
0
def test_selection_with_preferred_sources():
    readset = string_to_readset("""
	  1        1
	""", source_id=3)
    more_reads = string_to_readset("""
	  1111
	     111
	        1111
	""",
                                   source_id=1)

    for read in more_reads:
        readset.add(read)

    selected_reads = readselection(readset,
                                   max_cov=2,
                                   preferred_source_ids=None,
                                   bridging=True)
    assert selected_reads == set([1, 2, 3]), str(selected_reads)

    selected_reads = readselection(readset,
                                   max_cov=2,
                                   preferred_source_ids=set([3]),
                                   bridging=True)
    assert selected_reads == set([0, 1, 3]), str(selected_reads)
示例#2
0
def test_selection():
    reads = string_to_readset("""
	  1  1
	  00
	  0   1
	  10  1
	  1   1
	    11
	  0   1
	  1    1
	""")
    selected_reads = readselection(reads,
                                   max_cov=1,
                                   preferred_source_ids=None,
                                   bridging=False)
    assert selected_reads == set([1, 5])
    selected_reads = readselection(reads,
                                   max_cov=2,
                                   preferred_source_ids=None,
                                   bridging=False)
    assert selected_reads == set([1, 3, 5]), str(selected_reads)
    selected_reads = readselection(reads,
                                   max_cov=3,
                                   preferred_source_ids=None,
                                   bridging=False)
    assert selected_reads == set([1, 3, 5, 7]), str(selected_reads)
    selected_reads = readselection(reads,
                                   max_cov=3,
                                   preferred_source_ids=None,
                                   bridging=True)
    #Here the assert is wrong, because the bridging doesn't come into account , because in the slice_read the selected
    # reads  have already coverage 3 by set ([1,3,5,7]) because first each position has to covered at least once before
    #the bridging starts
    assert selected_reads == set([1, 3, 5, 7]), str(selected_reads)
示例#3
0
def test_components_of_readselection():
    reads = string_to_readset("""
	  111
	     000
	  00
	      00
	   1   1
	""")
    selected_reads = readselection(reads, max_cov=2, bridging=False)
    assert selected_reads == set([0, 1, 2, 3]), str(selected_reads)
    #	assert len(set(new_components.values())) == 2
    selected_reads = readselection(reads, max_cov=2, bridging=True)
    assert selected_reads == set([0, 1, 4]), str(selected_reads)
示例#4
0
def test_bridging():
    reads = string_to_readset("""
	  11
	  00
	    11
	    00
	      11
	      00
	  1    1
	""")
    selected_reads = readselection(reads, max_cov=2, bridging=False)
    assert selected_reads == set([0, 1, 2, 3, 4, 5])
    selected_reads = readselection(reads, max_cov=2, bridging=True)
    #Not sure why 0 is there selected and not 1...
    assert selected_reads == set([0, 3, 5, 6])
示例#5
0
def test_selection2():
    reads = string_to_readset("""
	  1111
	     111
	     1  111
	     1     11
	    1      11
	""")
    selected_reads = readselection(reads, max_cov=4, bridging=False)
    assert selected_reads == set([0, 1, 2, 3]), str(selected_reads)
示例#6
0
def select_reads(readset, max_coverage, preferred_source_ids):
    logger.info(
        "Reducing coverage to at most %dX by selecting most informative reads ...", max_coverage,
    )
    selected_indices = readselection(readset, max_coverage, preferred_source_ids)
    selected_reads = readset.subset(selected_indices)
    logger.info(
        "Selected %d reads covering %d variants",
        len(selected_reads),
        len(selected_reads.get_positions()),
    )

    return selected_reads
示例#7
0
def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


#print('INPUT READ SET')

gfa_filename = sys.argv[1]
max_gap = int(sys.argv[2])
max_coverage = int(sys.argv[3])

readset = gfa_to_readset(gfa_filename, max_gap)
readset = readset.subset(
    [i for i, read in enumerate(readset) if len(read) >= 2])

selected_indices = readselection(readset, max_coverage)
selected_reads = readset.subset(selected_indices)

readset_length = 0
for read in selected_reads:
    readset_length += len(read)

#print(selected_reads)


def bipartition(reads):
    positions = reads.get_positions()
    # create genotypes over your variants: all heterozygous (=1)
    genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions))
    # genotype likelihoods are None
    genotype_likelihoods = [None] * len(positions)