Exemplo n.º 1
0
def test_transfac_pssms():
    transfac_pssms = biopsy.get_transfac_pssm_accessions( biopsy.get_default_transfac_pssm_filter() )
    for p in transfac_pssms:
        print p, biopsy.get_transfac_pssm_name( p )
    print 'Have', len( transfac_pssms ), 'transfac pssms'
    for acc in [ 'R19099', 'M00418' ]:
        print acc, biopsy.get_transfac_pssm_name( acc )
        biopsy.get_pssm( acc )
        print 'Under pssm'
        for under_pssm in biopsy.get_pssm( acc ).get_dist( True, False ):
            print under_pssm
        print 'Under background'
        for under_background in biopsy.get_pssm( acc ).get_dist( False, False ):
            print under_background
Exemplo n.º 2
0
def test_transfac_pssms():
    transfac_pssms = biopsy.get_transfac_pssm_accessions(
        biopsy.get_default_transfac_pssm_filter())
    for p in transfac_pssms:
        print p, biopsy.get_transfac_pssm_name(p)
    print 'Have', len(transfac_pssms), 'transfac pssms'
    for acc in ['R19099', 'M00418']:
        print acc, biopsy.get_transfac_pssm_name(acc)
        biopsy.get_pssm(acc)
        print 'Under pssm'
        for under_pssm in biopsy.get_pssm(acc).get_dist(True, False):
            print under_pssm
        print 'Under background'
        for under_background in biopsy.get_pssm(acc).get_dist(False, False):
            print under_background
Exemplo n.º 3
0
def write_minimal_meme_matrix(out, acc):
    """
    The minimal MEME format for a motif looks something like::

        MOTIF crp
        letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
        0.000000  0.176471  0.000000  0.823529
        0.000000  0.058824  0.647059  0.294118
        0.000000  0.058824  0.000000  0.941176
        0.176471  0.000000  0.764706  0.058824
        0.823529  0.058824  0.000000  0.117647
        0.294118  0.176471  0.176471  0.352941
        0.294118  0.352941  0.235294  0.117647
        0.117647  0.235294  0.352941  0.294118
        0.529412  0.000000  0.176471  0.294118
        0.058824  0.235294  0.588235  0.117647
        0.176471  0.235294  0.294118  0.294118
        0.000000  0.058824  0.117647  0.823529
        0.058824  0.882353  0.000000  0.058824
        0.764706  0.000000  0.176471  0.058824
        0.058824  0.882353  0.000000  0.058824
        0.823529  0.058824  0.058824  0.058824
        0.176471  0.411765  0.058824  0.352941
        0.411765  0.000000  0.000000  0.588235
        0.352941  0.058824  0.000000  0.588235
    """
    pssm_info = biopsy.get_pssm(acc)
    print >> out, (
        "MOTIF %s %s\n"
        "letter-probability matrix: alength= 4 w= %d nsites= %d E= %e\n"
        "%s\n") % (biopsy.get_pssm_name(acc), acc, len(
            pssm_info.dists), pssm_info.sites, 0., "\n".join('  '.join(
                ("%.6f" % dist.get_freq(b))
                for b in xrange(4)) for dist in pssm_info.dists))
Exemplo n.º 4
0
def look_for_matrices(names):
    for name in names:
        print name
        for matrix, factor in find_matrices(name):
            print matrix.acc, matrix.name, factor.acc, factor.name
            logo(dist_for_pssm(biopsy.get_pssm(str(matrix.acc))),
                 '%s-%s' % (name, matrix.acc), 'logos')
Exemplo n.º 5
0
def write_minimal_meme_matrix(out, acc):
    """
    The minimal MEME format for a motif looks something like::

        MOTIF crp
        letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
        0.000000  0.176471  0.000000  0.823529
        0.000000  0.058824  0.647059  0.294118
        0.000000  0.058824  0.000000  0.941176
        0.176471  0.000000  0.764706  0.058824
        0.823529  0.058824  0.000000  0.117647
        0.294118  0.176471  0.176471  0.352941
        0.294118  0.352941  0.235294  0.117647
        0.117647  0.235294  0.352941  0.294118
        0.529412  0.000000  0.176471  0.294118
        0.058824  0.235294  0.588235  0.117647
        0.176471  0.235294  0.294118  0.294118
        0.000000  0.058824  0.117647  0.823529
        0.058824  0.882353  0.000000  0.058824
        0.764706  0.000000  0.176471  0.058824
        0.058824  0.882353  0.000000  0.058824
        0.823529  0.058824  0.058824  0.058824
        0.176471  0.411765  0.058824  0.352941
        0.411765  0.000000  0.000000  0.588235
        0.352941  0.058824  0.000000  0.588235
    """
    pssm_info = biopsy.get_pssm(acc)
    print >> out, ("MOTIF %s %s\n" "letter-probability matrix: alength= 4 w= %d nsites= %d E= %e\n" "%s\n") % (
        biopsy.get_pssm_name(acc),
        acc,
        len(pssm_info.dists),
        pssm_info.sites,
        0.0,
        "\n".join("  ".join(("%.6f" % dist.get_freq(b)) for b in xrange(4)) for dist in pssm_info.dists),
    )
Exemplo n.º 6
0
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True;
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [ 0.0, 0.25, 0.5, 1.0, 2.0 ]:
        # force cache load
        biopsy.get_pssm( sascha_acc )
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm( sascha_acc )
        score = biopsy.score_pssm( p.pssm, sascha_seq )
        (
                bind,
                back,
                cum_bind,
                cum_back,
                odds_ratio,
                cum_odds_ratio,
                p_bind,
                cum_p_bind,
                p_value_p_bind
        ) = biopsy.get_pssm_likelihoods_for_score( p, score )
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods( p, sascha_acc + ': ' + str( pc ), score )
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True;
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        print hits
        print
Exemplo n.º 7
0
def test_likelihoods_indices():
    p = biopsy.get_pssm( 'M00975' )
    dist = p.get_dist( True, False )
    for s in range( len(dist) ):
        score = float(s)/float(len(dist) - 1)
        idx = biopsy.get_likelihood_index( len(dist), score )
        print idx, score
    for score in [ 0.98, 0.99, 1.0 ]:
        print score, biopsy.get_likelihood_index( len(dist), score )
Exemplo n.º 8
0
def test_likelihoods_indices():
    p = biopsy.get_pssm('M00975')
    dist = p.get_dist(True, False)
    for s in range(len(dist)):
        score = float(s) / float(len(dist) - 1)
        idx = biopsy.get_likelihood_index(len(dist), score)
        print idx, score
    for score in [0.98, 0.99, 1.0]:
        print score, biopsy.get_likelihood_index(len(dist), score)
Exemplo n.º 9
0
def test_pssm_pseudo_counts():
    sascha_pssms = biopsy.SequenceVec()
    sascha_acc = 'M00975'
    # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc'
    sascha_seq = 'ttgttgcga'
    sascha_seq = 'ttgttgcaa'
    # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' )
    # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' )
    print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence'
    biopsy.PssmParameters.singleton().use_p_value = True
    # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1;
    for pc in [0.0, 0.25, 0.5, 1.0, 2.0]:
        # force cache load
        biopsy.get_pssm(sascha_acc)
        biopsy.clear_pssm_cache()
        biopsy.PssmParameters.singleton().pseudo_counts = pc
        p = biopsy.get_pssm(sascha_acc)
        score = biopsy.score_pssm(p.pssm, sascha_seq)
        (bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind,
         cum_p_bind,
         p_value_p_bind) = biopsy.get_pssm_likelihoods_for_score(p, score)
        print pc,
        print \
                '%f,%f,%f,%f,%f,%f,%f' \
                % \
                ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind )
        biopsy.plot_likelihoods(p, sascha_acc + ': ' + str(pc), score)
        # print 'Trying with standard distributions'
        # biopsy.PssmParameters.singleton().use_cumulative_dists = False;
        # hits = biopsy.HitVec()
        # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits )
        # print hits
        print 'Trying with cumulative distributions'
        biopsy.PssmParameters.singleton().use_cumulative_dists = True
        hits = biopsy.HitVec()
        biopsy.score_pssm_on_sequence(sascha_acc, sascha_seq, 0.001, hits)
        print hits
        print
Exemplo n.º 10
0
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession('V$DEAF1_01')
    pssm_info = biopsy.get_pssm(pssm_acc)
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range(len(seq) - len(pssm_info.pssm) + 1):
        s = biopsy.score_pssm(pssm_info.pssm, seq[i:])
        p_binding = biopsy.get_p_binding(
            biopsy.get_odds_ratio(s, pssm_info.get_dist(True, False),
                                  pssm_info.get_dist(False, False)))
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence(pssm_acc, seq, 0.05, result)
    print 'Got', len(result), 'hits from', len(seq), 'bases'
    print p_binding
Exemplo n.º 11
0
def test_pssm_score():
    # 'V$AP1_Q2'
    pssm_acc = biopsy.get_transfac_pssm_accession( 'V$DEAF1_01' );
    pssm_info = biopsy.get_pssm( pssm_acc )
    # print pssm_info.pssm
    seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga'
    for i in range( len( seq ) - len( pssm_info.pssm )  + 1 ):
        s = biopsy.score_pssm( pssm_info.pssm, seq[i:] )
        p_binding = biopsy.get_p_binding(
                biopsy.get_odds_ratio(
                        s,
                        pssm_info.get_dist( True, False ),
                        pssm_info.get_dist( False, False ) ) )
        if p_binding > 0.05:
            print i, s, p_binding
    result = biopsy.HitVec()
    p_binding = biopsy.score_pssm_on_sequence( pssm_acc, seq, 0.05, result )
    print 'Got', len( result ), 'hits from', len( seq ), 'bases'
    print p_binding
Exemplo n.º 12
0
def look_for_matrices(names):
    for name in names:
        print name
        for matrix, factor in find_matrices(name):
            print matrix.acc, matrix.name, factor.acc, factor.name
            logo(dist_for_pssm(biopsy.get_pssm(str(matrix.acc))), "%s-%s" % (name, matrix.acc), "logos")
Exemplo n.º 13
0
def logo_for_pssm_name(pssm_name):
    import biopsy
    logo_for_pssm(biopsy.get_pssm(pssm_name), pssm_name)