Exemplo n.º 1
0
def diploid_sample_from_labels(ls=None,
                               ref=None,
                               hp1=None,
                               hp2=None):
    """Create `medaka.common.Sample` objects from a specified
    reference string and haplotype strings for easily
    mocking variant calling scenarios.
    """

    assert len(ref) == len(hp1) == len(hp2)

    pos = mock_positions_array(ref)

    # mocking up the network output in terms of 0. and 1.
    # in reality they would be float probabilities

    probs = np.zeros((len(pos), len(ls._decoding)))

    for i in range(len(ref)):

        diploid_label = tuple(sorted((hp1[i], hp2[i])))
        probs[i, ls._encoding[diploid_label]] = 1

    s = common.Sample(ref_name='contig1', features=None,
                             labels=None, ref_seq=None,
                             positions=pos, label_probs=probs)
    return s, ref
Exemplo n.º 2
0
 def test_decode_consensus(self):
     """Test the conversion between network outputs and sequence"""
     num_classes = 13  # 3 elements per base * 4 bases + *
     label_probs = np.zeros([6, num_classes])
     label_probs[0, 10] = 0.9   # decodes to (T, 1)
     label_probs[1, 5] = 0.8    # (C, 2)
     label_probs[2, 0] = 0.81   # (*, 1)
     label_probs[3, 3] = 0.95   # (A, 3)
     label_probs[4, 8] = 0.9    # (G, 2)
     label_probs[5, 5] = 0.9    # (C, 2)
     mock = common.Sample(None, None, None, None, None, label_probs)
     expected = 'TCCAAAGGCC'
     got = self.ls.decode_consensus(mock)
     self.assertEqual(expected, got)
Exemplo n.º 3
0
def haploid_sample_from_labels(ls=None,
                               ref=None,
                               pri=None,
                               sec=None,
                               pri_prob=0.6,
                               sec_prob=0.3):
    """Create `medaka.common.Sample` objects from a specified
    reference string and predicted sequence string for easily
    mocking variant calling scenarios.
    """

    assert len(ref) == len(pri)
    if sec is not None:
        assert len(ref) == len(sec)

    pos = mock_positions_array(ref)

    probs = np.zeros((len(pos), len(ls._decoding)))

    if sec is None:
        pri_prob = pri_prob + sec_prob

    for i, l in enumerate(pri):
        probs[i, ls._encoding[(l, )]] = pri_prob
        if sec is not None:
            assert sec[i] != pri[i]
            probs[i, ls._encoding[(sec[i], )]] = sec_prob
        #set another label to have non-zero prob
        #use the ref if is not in
        #primary or secondary
        other_inds = np.where(probs[i] == 0)[0]
        if ls._encoding[(ref[i], )] in other_inds:
            other_ind = ls._encoding[(ref[i], )]
        else:
            other_ind = other_inds[0]
        probs[i, other_ind] = 1 - np.sum(probs[i])

    s = common.Sample(ref_name='contig1',
                      features=None,
                      labels=None,
                      ref_seq=None,
                      positions=pos,
                      label_probs=probs)
    return s, ref
Exemplo n.º 4
0
def diploid_zygosity_sample_from_labels(ls=None,
                                        ref=None,
                                        pri=None,
                                        sec=None,
                                        pri_prob=None,
                                        sec_prob=None,
                                        het=None):
    """Create `medaka.common.Sample` objects from a specified
    reference string and haplotype strings for easily
    mocking variant calling scenarios.
    """

    assert len(ref) == len(pri) == len(sec)

    pos = mock_positions_array(ref)

    probs = np.zeros((len(pos), len(ls._decoding)))

    for i in range(len(ref)):

        probs[i, ls._unitary_encoding[(pri[i], )]] = pri_prob
        probs[i, ls._unitary_encoding[(sec[i], )]] = sec_prob

        # het is always true
        probs[i, -1] = int(het[i])

    s = common.Sample(ref_name='contig1',
                      features=None,
                      labels=None,
                      ref_seq=None,
                      positions=pos,
                      label_probs=probs)

    return s, ref

    def test_snp_metainfo(self):
        self.assertEqual(len(self.ls.snp_metainfo), 7)

    def test_variant_metainfo(self):
        self.assertEqual(len(self.ls.variant_metainfo), 9)