Пример #1
0
 def pssms_for_K_mer(self, K_mer):
     """
     Return 4 pssms for gapped/ungapped and complementary/uncomplementary strands.
     @return: pssm, comp_pssm, gap_pssm, gap_comp_pssm
     """
     dist = self.nucleo_dist_from_K_mer(K_mer, include_gap=False)
     gap_dist = self.nucleo_dist_from_K_mer(K_mer, include_gap=True)
     pssm = hmm.calculate_log_scores(dist)
     comp_pssm = hmm.calculate_complementary_scores(pssm)
     gap_pssm = hmm.calculate_log_scores(gap_dist)
     gap_comp_pssm = hmm.calculate_complementary_scores(gap_pssm)
     return pssm, comp_pssm, gap_pssm, gap_comp_pssm
Пример #2
0
def evaluate_gap_position(
  L_mer,
  gap_index,
  sequences,
  bg_L_mer_scores,
  pssm_scores,
  comp_pssm_scores,
  options
):
    "Evaluate a k-mer with a gap in a particular position."
    gap_dist = nucleo_dist_from_mer(L_mer, options.pseudo_count_for_L_mer_scoring, gap_index=gap_index)
    gap_pssm = hmm.calculate_log_scores(gap_dist)
    gap_comp_pssm = hmm.calculate_complementary_scores(gap_pssm)
    gap_pssm_scores = hmm.max_scores_in_sequences(gap_pssm, sequences, bg_L_mer_scores)
    gap_comp_pssm_scores = hmm.max_scores_in_sequences(gap_comp_pssm, sequences, bg_L_mer_scores)
    max_scores_per_pssm = numpy.array(
        [
            pssm_scores,
            comp_pssm_scores,
            gap_pssm_scores,
            gap_comp_pssm_scores
        ]
    )
    best_scores = max_scores_per_pssm.max(axis=0)
    best_scores[best_scores<0.] = 0. # when we didn't find sites, ignore
    score = best_scores.sum() / len(L_mer) / len(best_scores)
    logging.debug(
        'Evaluated: %s; gap: %d; score: %f',
        numpy_to_seq(L_mer),
        gap_index,
        score
    )
    return score
Пример #3
0
def evaluate_L_mer(L_mer, sequences, bg_L_mer_scores, gap_positions, options):
    dist = nucleo_dist_from_mer(
      L_mer,
      options.pseudo_count_for_L_mer_scoring,
      gap_index=None
    )
    pssm = hmm.calculate_log_scores(dist)
    comp_pssm = hmm.calculate_complementary_scores(pssm)
    pssm_scores = hmm.max_scores_in_sequences(pssm, sequences, bg_L_mer_scores)
    comp_pssm_scores = hmm.max_scores_in_sequences(comp_pssm, sequences, bg_L_mer_scores)
    return max(
      (
        evaluate_gap_position(
          L_mer,
          gap_index,
          sequences,
          bg_L_mer_scores,
          pssm_scores,
          comp_pssm_scores,
          options
        ),
        gap_index
      )
      for gap_index in gap_positions
    )
Пример #4
0
    nucleo_dists = N.array([
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
    ])
    logging.info('Calculating log scores')
    pssm_scores = hmm.calculate_log_scores(nucleo_dists)
    logging.info('Calculating complementary scores')
    comp_scores = hmm.calculate_complementary_scores(pssm_scores)
    logging.info('Preprocessing sequence')
    seq = hmm.preprocess_sequence(
        N.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4]))

    logging.info('Scoring sequence')
    logging.info(hmm.score_sequence(pssm_scores, seq))
    logging.info('Scoring sequence with complementary scores')
    logging.info(hmm.score_sequence(comp_scores, seq))

    logging.info(hmm.max_score_in_sequence(pssm_scores, seq))
    logging.info(hmm.max_score_in_sequence(comp_scores, seq))

    long_seqs = [R.random_integers(0, 4, size=10000) for i in xrange(100)]
Пример #5
0
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
        [.85, .05, .05, .05],
        [.05, .85, .05, .05],
      ]
    )
    logging.info('Calculating log scores')
    pssm_scores = hmm.calculate_log_scores(nucleo_dists)
    logging.info('Calculating complementary scores')
    comp_scores = hmm.calculate_complementary_scores(pssm_scores)
    logging.info('Preprocessing sequence')
    seq = hmm.preprocess_sequence(N.array([0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,4]))

    logging.info('Scoring sequence')
    logging.info(hmm.score_sequence(pssm_scores, seq))
    logging.info('Scoring sequence with complementary scores')
    logging.info(hmm.score_sequence(comp_scores, seq))

    logging.info(hmm.max_score_in_sequence(pssm_scores, seq))
    logging.info(hmm.max_score_in_sequence(comp_scores, seq))

    long_seqs = [R.random_integers(0,4,size=10000) for i in xrange(100)]
    long_seqs_preprocessed = hmm.preprocess_sequences(long_seqs)