def test_pssm_pseudo_counts(): sascha_pssms = biopsy.SequenceVec() sascha_acc = 'M00975' # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc' sascha_seq = 'ttgttgcga' sascha_seq = 'ttgttgcaa' # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' ) # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' ) print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence' biopsy.PssmParameters.singleton().use_p_value = True; # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1; for pc in [ 0.0, 0.25, 0.5, 1.0, 2.0 ]: # force cache load biopsy.get_pssm( sascha_acc ) biopsy.clear_pssm_cache() biopsy.PssmParameters.singleton().pseudo_counts = pc p = biopsy.get_pssm( sascha_acc ) score = biopsy.score_pssm( p.pssm, sascha_seq ) ( bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind, cum_p_bind, p_value_p_bind ) = biopsy.get_pssm_likelihoods_for_score( p, score ) print pc, print \ '%f,%f,%f,%f,%f,%f,%f' \ % \ ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind ) biopsy.plot_likelihoods( p, sascha_acc + ': ' + str( pc ), score ) # print 'Trying with standard distributions' # biopsy.PssmParameters.singleton().use_cumulative_dists = False; # hits = biopsy.HitVec() # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits ) # print hits print 'Trying with cumulative distributions' biopsy.PssmParameters.singleton().use_cumulative_dists = True; hits = biopsy.HitVec() biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits ) print hits print
def test_pssm_score(): # 'V$AP1_Q2' pssm_acc = biopsy.get_transfac_pssm_accession('V$DEAF1_01') pssm_info = biopsy.get_pssm(pssm_acc) # print pssm_info.pssm seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga' for i in range(len(seq) - len(pssm_info.pssm) + 1): s = biopsy.score_pssm(pssm_info.pssm, seq[i:]) p_binding = biopsy.get_p_binding( biopsy.get_odds_ratio(s, pssm_info.get_dist(True, False), pssm_info.get_dist(False, False))) if p_binding > 0.05: print i, s, p_binding result = biopsy.HitVec() p_binding = biopsy.score_pssm_on_sequence(pssm_acc, seq, 0.05, result) print 'Got', len(result), 'hits from', len(seq), 'bases' print p_binding
def test_pssm_score(): # 'V$AP1_Q2' pssm_acc = biopsy.get_transfac_pssm_accession( 'V$DEAF1_01' ); pssm_info = biopsy.get_pssm( pssm_acc ) # print pssm_info.pssm seq = 'tacatcatctgtctgcagtagtctaaccgaccccccccagttttagaagcagactgcatgcggacgggaccgcggatcgcgcggtgcgcctcagtgtacttccgaacgaatgagtcattaatagagcgctatatcgtaactgtctttgacgaagtataccgaaaccgtgcagccagacgtgatccgggcgttgtaaaggcgatcagcgccctaggagtaccatttttgccgtaggcttgcgtctcaaagaccagctggggcgtggtatcactcgtcagtacgatttctgccagatagatagcatagactgaaccttaggcccaatagggacacaattacccgagtgactgactggtctaaggggagtccccccttaaaacgttttacgtaatagcgggctccagaagcaaagcatcggtttgagccccagtactaaacgtttgagtgtttgctctcgtctgataggtaaaccgacaagagaaccaagctcaaggcgcggtaggtgcgccttgcgaactgttgatgccgtgagcgccaccatcccgtgcatcataggcagggagagaagaccacatggccttgcgaccgtatgagctgtttcagattaaatgccaacgggcatggtcggtgtccagcattttttgcagtcagctggtggtacacagtggggacaagaacgcctctggtagatgtcttctgaaggagtaactcatttcgttgaatcgaccttcccttgcgcttgaacgcggacctctagtctctctcgcagactggggtcgaaaatcaaggtagatatggaatgttccgcatgagggtagcgaccggatcgggcgtcaagtatatcctccctgctacgtccccctactagcctcagtccgcctcgaacctaggaagattggccacatcagcttggtggatgcctggtccatacttcagacccgagaatgttagacaggaccccatttggctcctttacgtacgatctatgtagacgcagtga' for i in range( len( seq ) - len( pssm_info.pssm ) + 1 ): s = biopsy.score_pssm( pssm_info.pssm, seq[i:] ) p_binding = biopsy.get_p_binding( biopsy.get_odds_ratio( s, pssm_info.get_dist( True, False ), pssm_info.get_dist( False, False ) ) ) if p_binding > 0.05: print i, s, p_binding result = biopsy.HitVec() p_binding = biopsy.score_pssm_on_sequence( pssm_acc, seq, 0.05, result ) print 'Got', len( result ), 'hits from', len( seq ), 'bases' print p_binding
def test_pssm_pseudo_counts(): sascha_pssms = biopsy.SequenceVec() sascha_acc = 'M00975' # sascha_seq = 'gtaaaccaggctgcctGAgaacttgttgcgaatcc' sascha_seq = 'ttgttgcga' sascha_seq = 'ttgttgcaa' # plot_likelihoods( biopsy.get_pssm( 'M00975' ), 'M00975' ) # plot_likelihoods( biopsy.get_pssm( 'R02146' ), 'R02146' ) print 'Binding,Background,odds,p(binding),cumulative p(binding),Sequence' biopsy.PssmParameters.singleton().use_p_value = True # biopsy.PssmParameters.singleton().binding_background_odds_prior = 1; for pc in [0.0, 0.25, 0.5, 1.0, 2.0]: # force cache load biopsy.get_pssm(sascha_acc) biopsy.clear_pssm_cache() biopsy.PssmParameters.singleton().pseudo_counts = pc p = biopsy.get_pssm(sascha_acc) score = biopsy.score_pssm(p.pssm, sascha_seq) (bind, back, cum_bind, cum_back, odds_ratio, cum_odds_ratio, p_bind, cum_p_bind, p_value_p_bind) = biopsy.get_pssm_likelihoods_for_score(p, score) print pc, print \ '%f,%f,%f,%f,%f,%f,%f' \ % \ ( bind, back, cum_bind, cum_back, p_bind, cum_p_bind, p_value_p_bind ) biopsy.plot_likelihoods(p, sascha_acc + ': ' + str(pc), score) # print 'Trying with standard distributions' # biopsy.PssmParameters.singleton().use_cumulative_dists = False; # hits = biopsy.HitVec() # biopsy.score_pssm_on_sequence( sascha_acc, sascha_seq, 0.001, hits ) # print hits print 'Trying with cumulative distributions' biopsy.PssmParameters.singleton().use_cumulative_dists = True hits = biopsy.HitVec() biopsy.score_pssm_on_sequence(sascha_acc, sascha_seq, 0.001, hits) print hits print