Exemplo n.º 1
0
def score_sequence_set(sequence_set, pssms, p_binding_site):
    models = [
        build_hmm_from_semi_parsed(
            parsed,
            p_binding_site=p_binding_site
        )
        for parsed
        in pssms
    ]
    return calculate_model_counts_on_sequences(models, sequence_set)
Exemplo n.º 2
0
def get_roc_for_sequences(p_binding_site, positive_sequences, negative_sequences, pssms):
    models = [
      build_hmm_from_semi_parsed(
        parsed,
        p_binding_site=p_binding_site
      )
      for parsed
      in pssms
    ]
    roc = RocCalculator()
    update_roc(roc, generate_roc_data(models, positive_sequences, negative_sequences))
    return roc
Exemplo n.º 3
0
def score_sequence_set(sequence_set, pssms, p_binding_site):
    models = [
        build_hmm_from_semi_parsed(parsed, p_binding_site=p_binding_site)
        for parsed in pssms
    ]
    return calculate_model_counts_on_sequences(models, sequence_set)
Exemplo n.º 4
0
for fragment, pssm in pssms():
    sequence_file = os.path.join(sequence_dir,
                                 sequence_filename_fmt % fragment)
    model_file = os.path.join(model_dir, '%s-%s.pssm' % (fragment, pssm))

    logging.info('Loading sequences: %s', sequence_file)
    sequences = list(sequences_from_fasta(sequence_file))
    numpy_seqs = map(seq_to_numpy, sequences)
    logging.info('Loaded %d sequences', len(sequences))

    logging.info('Parsing PSSMs: %s', model_file)
    pssms = list(parse_models(open(model_file)))

    logging.info('Building models')
    models = [
        build_hmm_from_semi_parsed(parsed, p_binding_site=p_binding_site)
        for parsed in pssms
    ]

    def nucleotide_dist():
        return numpy.zeros(4) + .25

    base_dists = DictOf(nucleotide_dist)

    min_site_length = 20
    logging.info('Analysing sequences')
    for hmm, traits in models:
        sites = []
        for sequence in numpy_seqs:

            # analyse the sequence for its most likely state sequence
    sequence_file = os.path.join(sequence_dir, sequence_filename_fmt % fragment)
    model_file = os.path.join(model_dir, '%s-%s.pssm' % (fragment, pssm))

    logging.info('Loading sequences: %s', sequence_file)
    sequences = list(sequences_from_fasta(sequence_file))
    numpy_seqs = map(seq_to_numpy, sequences)
    logging.info('Loaded %d sequences', len(sequences))


    logging.info('Parsing PSSMs: %s', model_file)
    pssms = list(parse_models(open(model_file)))


    logging.info('Building models')
    models = [
      build_hmm_from_semi_parsed(parsed, p_binding_site=p_binding_site)
      for parsed in pssms
    ]

    def nucleotide_dist():
        return numpy.zeros(4) + .25
    base_dists = DictOf(nucleotide_dist)

    min_site_length = 20
    logging.info('Analysing sequences')
    for hmm, traits in models:
        sites = []
        for sequence in numpy_seqs:

            # analyse the sequence for its most likely state sequence
            LL, states = hmm.viterbi(sequence)
Exemplo n.º 6
0


logging.info('Loading sequences: %s', options.sequences_file)
sequences = dict(sequences_from_fasta(options.sequences_file))
numpy_seqs = dict((desc, seq_to_numpy(seq)) for desc, seq in sequences.iteritems())
logging.info('Loaded %d sequences', len(sequences))


logging.info('Parsing PSSMs: %s', options.models_file)
pssms = list(parse_models(open(options.models_file)))


logging.info('Building models')
models = [
  build_hmm_from_semi_parsed(parsed, p_binding_site=options.p_binding_site)
  for parsed in pssms
]

logging.info('Analysing sequences')
p_binding_sites = list()
sites_file = open('sites.txt', 'w')
for hmm, traits in models:
    for desc, sequence in numpy_seqs.iteritems():
        p_binding_site, site_seq = analyse_sequence_for_best_site(sequence, hmm, traits)
        p_binding_sites.append(p_binding_site)
        site = numpy_to_seq(site_seq)
        logging.info('%s: p(binding site)=%12g, sequence=%s', desc, p_binding_site, site)
        print >> sites_file, '%s, %12g, %s' % (desc, p_binding_site, site)
sites_file.close()