示例#1
0
 def logos(self):
     "Create a logo for the standard PWM representing each possible combination of gaps."
     import hmm.pssm.logo as L
     return [
         L.pssm_as_image(N.exp(pwm.freqs), size=(160 * self.K, 480))
         for p, pwm in self.pwms
     ]
示例#2
0
    def logo(self):
        "Create a logo of the gapped PWM."
        import hmm.pssm.logo as L

        transparencies = N.ones(self.K)
        transparencies[self.gap_char] = self.gap_freq
        return L.pssm_as_image(N.exp(self.gapped_pwm), size=(160 * self.K, 480), transparencies=transparencies)
示例#3
0
 def write_image(self):
     image = logo.pssm_as_image(
       self.emissions,
       transparencies=self.gap_probs
     )
     image.save(self.png_file, "PNG")
     image.save(self.eps_file, "EPS")
示例#4
0
def make_logo_for_glam2_output(filename):
    "Writes a logo to a filename with .png extension."
    output = GLAM2Output.parse(open(filename))
    freqs, gaps = output.freqs_and_gaps()
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s.png' % os.path.splitext(filename)[0]
    logo.save(logo_filename)
示例#5
0
    def logo(self):
        "Create a logo of the gapped PWM."
        import hmm.pssm.logo as L

        transparencies = N.ones(self.K)
        for gap_char, gap_freq in self.gaps:
            transparencies[gap_char] = gap_freq
        return L.pssm_as_image(N.exp(self.pwms[-1][1].freqs), size=(160 * self.K, 480), transparencies=transparencies)
示例#6
0
 def logo(self):
     "Create a logo of the gapped PWM."
     import hmm.pssm.logo as L
     transparencies = N.ones(self.K)
     transparencies[self.gap_char] = self.gap_freq
     return L.pssm_as_image(N.exp(self.gapped_pwm),
                            size=(160 * self.K, 480),
                            transparencies=transparencies)
示例#7
0
 def logo(self):
     "Create a logo of the gapped PWM."
     import hmm.pssm.logo as L
     transparencies = N.ones(self.K)
     for gap_char, gap_freq in self.gaps:
         transparencies[gap_char] = gap_freq
     return L.pssm_as_image(N.exp(self.pwms[-1][1].freqs),
                            size=(160 * self.K, 480),
                            transparencies=transparencies)
示例#8
0
 def test_hmm(tag, pwm):
     freqs, gaps = pwm
     logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
     logo_filename = '%s-logo.png' % tag
     logo.save(logo_filename)
     logging.info('%s: Created logo: %s', tag, logo_filename)
     model = build_hmm_model(freqs, gaps, .1)
     logging.debug('%s: Created model', tag)
     hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':3.})
     logging.debug('%s: Graphed model', tag)
     return model
 def test_hmm(tag, pwm):
     freqs, gaps = pwm
     logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
     logo_filename = '%s-logo.png' % tag
     logo.save(logo_filename)
     logging.info('%s: Created logo: %s', tag, logo_filename)
     model = build_hmm_model(freqs, gaps, .1)
     logging.debug('%s: Created model', tag)
     hmm.graph_as_svg(model,
                      '%s-states' % tag,
                      neato_properties={'-Elen': 3.})
     logging.debug('%s: Graphed model', tag)
     return model
示例#10
0
def run_pwm_viterbi(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using Viterbi algorithm to classify sequences.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    roc_points = []
    for p_binding in p_binding_params:
        # build model
        model = build_hmm_model(freqs, gaps, p_binding)
        hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4})
        logging.debug('%s: Graphed model', tag)
        pos_total_pos, pos_total_neg, pos_num_seqs_with_site = run_on_seqs(model, positive_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Positive sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag,
            p_binding,
            pos_total_pos,
            pos_total_neg,
            pos_num_seqs_with_site,
            len(positive_seqs)
        )
        neg_total_pos, neg_total_neg, neg_num_seqs_with_site = run_on_seqs(model, negative_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Negative sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag,
            p_binding,
            neg_total_pos,
            neg_total_neg,
            neg_num_seqs_with_site,
            len(negative_seqs)
        )
        tp = pos_num_seqs_with_site
        fp = neg_num_seqs_with_site
        fn = len(positive_seqs) - pos_num_seqs_with_site
        tn = len(negative_seqs) - neg_num_seqs_with_site
        roc_point = roc.RocCalculator(tp=tp, fp=fp, tn=tn, fn=fn)
        logging.info('%s: p(binding)=%.1e; Specificity=%.3f; Sensitivity=%.3f',
            tag,
            p_binding,
            roc_point.specificity(),
            roc_point.sensitivity(),
        )
        roc_points.append(roc_point)
    return roc_points
def run_pwm_forward_backward(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using forward-backward.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    # build model
    model = build_hmm_model(freqs, gaps, .001)
    hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen': 1.4})
    logging.debug('%s: Graphed model', tag)
    positive_scores = test_hmm_forward_backward(model, positive_seqs.values())
    negative_scores = test_hmm_forward_backward(model, negative_seqs.values())
    return roc.picked_rocs_from_thresholds(positive_scores, negative_scores)
示例#12
0
def run_pwm_forward_backward(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using forward-backward.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    # build model
    model = build_hmm_model(freqs, gaps, .001)
    hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4})
    logging.debug('%s: Graphed model', tag)
    positive_scores = test_hmm_forward_backward(model, positive_seqs.values())
    negative_scores = test_hmm_forward_backward(model, negative_seqs.values())
    return roc.picked_rocs_from_thresholds(positive_scores, negative_scores)
def run_pwm_viterbi(tag, freqs, gaps, positive_seqs, negative_seqs):
    """
    Run the PWM using Viterbi algorithm to classify sequences.
    """
    logging.info('Running PWM: %s', tag)
    logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
    logo_filename = '%s-logo.png' % tag
    logo.save(logo_filename)
    logging.info('%s: Created logo: %s', tag, logo_filename)
    roc_points = []
    for p_binding in p_binding_params:
        # build model
        model = build_hmm_model(freqs, gaps, p_binding)
        hmm.graph_as_svg(model,
                         '%s-states' % tag,
                         neato_properties={'-Elen': 1.4})
        logging.debug('%s: Graphed model', tag)
        pos_total_pos, pos_total_neg, pos_num_seqs_with_site = run_on_seqs(
            model, positive_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Positive sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag, p_binding, pos_total_pos, pos_total_neg,
            pos_num_seqs_with_site, len(positive_seqs))
        neg_total_pos, neg_total_neg, neg_num_seqs_with_site = run_on_seqs(
            model, negative_seqs)
        logging.debug(
            '%s: p(binding)=%.1e: Negative sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences',
            tag, p_binding, neg_total_pos, neg_total_neg,
            neg_num_seqs_with_site, len(negative_seqs))
        tp = pos_num_seqs_with_site
        fp = neg_num_seqs_with_site
        fn = len(positive_seqs) - pos_num_seqs_with_site
        tn = len(negative_seqs) - neg_num_seqs_with_site
        roc_point = roc.RocCalculator(tp=tp, fp=fp, tn=tn, fn=fn)
        logging.info(
            '%s: p(binding)=%.1e; Specificity=%.3f; Sensitivity=%.3f',
            tag,
            p_binding,
            roc_point.specificity(),
            roc_point.sensitivity(),
        )
        roc_points.append(roc_point)
    return roc_points
示例#14
0
 def examine_model(self, model, builder, sequences, image_file=None, pssm_def_file=None):
     """
     Log some info about the model.
     """
     #
     # How many sites does it find after training?
     #
     emissions, gap_probs = builder.get_emissions_and_gap_probabilities(model, offset=1)
     logging.info('Entropy/base        : %f', hmm.pssm.entropy(emissions, gap_probs) / gap_probs.sum())
     logging.info('Information content : %f', hmm.pssm.information_content(emissions))
     if None != pssm_def_file:
         output_pssm_definition(open(pssm_def_file, 'w'), emissions, gap_probs)
     if None != image_file:
         import hmm.pssm.logo as logo
         image = logo.pssm_as_image(emissions, transparencies=gap_probs)
         png_file = '%s.png' % image_file
         logging.info('Saving PSSM to %s', png_file)
         image.save(png_file, "PNG")
         eps_file = '%s.eps' % image_file
         logging.info('Saving PSSM to %s', eps_file)
         image.save(eps_file, "EPS")
示例#15
0
 def write_logo(self, model, f, rev_comp=False):
     import hmm.pssm.logo as logo
     model = hmm.as_model(model)
     emissions = self.pssm_dist(model)
     transparencies = []
     pssm_dist = []
     for k in xrange(self.K):
         pssm_dist.append(emissions[2*k])
         transparencies.append(1.0)
         if k < self.K - 1:
             p_gap = self.p_gap_for_model(model, k)
             if p_gap > self.gap_threshold:
                 pssm_dist.append(emissions[2*k+1])
                 transparencies.append(p_gap)
     if rev_comp:
         pssm_dist.reverse()
         for i, emission in enumerate(pssm_dist):
             pssm_dist[i] = emission[::-1]
         transparencies.reverse()
     image = logo.pssm_as_image(pssm_dist, transparencies=transparencies)
     image.save(f, "PNG")
     return image
                         help="File in which the gapped PSSMs are stored.")
option_parser.add_option(
    "-l",
    "--logo-files-basename",
    dest="logo_files_basename",
    help="basename of files to write logos to. Extension will be -0.png")
option_parser.add_option("-t",
                         "--image-type",
                         dest="image_type",
                         default='png',
                         help="type of images to write")
options, args = option_parser.parse_args()
for option in option_parser.option_list:
    if option.dest:
        logging.info('%s: %s (%s)', option.dest,
                     str(getattr(options, option.dest)), option.help)

# Load PSSMs
logging.info('Loading PSSMs: %s', options.models_file)
pssms = list(parse_models(open(options.models_file)))

for i, p in enumerate(pssms):
    filename = '%s-%d.%s' % (options.logo_files_basename, i,
                             options.image_type)
    logging.info('Creating image for PSSM: %s', filename)

    emissions, gap_probs = emissions_and_gaps_from_semi_parsed(p)
    logo_image = logo.pssm_as_image(emissions, transparencies=gap_probs)

    logo_image.save(filename)
示例#17
0
        logging.info("Baum-Welch took %f seconds", time.time() - start)
        logging.info("Achieved LL: %f in %d iterations", LL, num_iterations)

        return model


if "__main__" == __name__:
    logging.basicConfig(level=logging.INFO)

    def synthetic():
        identifier = "synthetic-sequences-K10-g0.50-N200-L200-seed4-1"
        sequences = [hmm.pssm.seq_to_numpy(s) for s in convert_seqs("synthetic-2/%s.fa" % identifier)]
        return identifier, sequences

    def fragment(identifier="T00594"):
        sequences = seqs_for_fragment(identifier)
        return identifier, sequences

    identifier, sequences = synthetic()
    identifier, sequences = fragment()

    algorithm = SingleGapAlgorithm()

    model = algorithm(sequences)

    emissions, gap_probs = algorithm.builder.get_emissions_and_gap_probabilities(model, offset=1)
    import hmm.pssm.logo as logo

    image = logo.pssm_as_image(emissions, transparencies=gap_probs)
    image.save("single-gap-results/%s.png" % identifier, "PNG")
示例#18
0
 def write_logo(self, model, f):
     import hmm.pssm.logo as logo
     dist = self.pssm_dist(model)
     image = logo.pssm_as_image(dist)
     image.save(f, "PNG")
     return image
示例#19
0
文件: traits.py 项目: JohnReid/HMM
 def write_logo(self, model, f):
     import hmm.pssm.logo as logo
     dist = self.pssm_dist(model)
     image = logo.pssm_as_image(dist)
     image.save(f, "PNG")
     return image
示例#20
0
        ]
    )
    return freqs, gaps


def all_sp1_pssms():
    return {
        'TRANSFAC'      : transfac_sp1(),
        'MEME'          : meme_sp1(),
        'Gapped'        : gapped_sp1(),
        'Gapped-new'    : gapped_sp1_new(),
        'Ungapped-new'  : ungapped_sp1_new(),
        'GLAM2-i4'      : glam2_sp1_i4(),
        'GLAM2-i7'      : glam2_sp1_i7(),
    }

if '__main__' == __name__:
    import hmm.pssm.logo as L
    for name, (freqs, gaps) in [
        ('TRANSFAC-sp1', transfac_sp1()),
        ('MEME-sp1', meme_sp1()),
        ('Gapped-sp1', gapped_sp1()),
        ('Gapped-sp1-new', gapped_sp1_new()),
        ('Ungapped-sp1-new', ungapped_sp1_new()),
        ('GLAM2-sp1-i4', glam2_sp1_i4()),
        ('GLAM2-sp1-i7', glam2_sp1_i7()),
    ]:
        freqs = (freqs.T / freqs.sum(axis=1)).T
        logo_image = L.pssm_as_image(freqs, transparencies=gaps)
        logo_image.save('%s.png' % name)
示例#21
0
scores = dict()
methods = args
sp1_pssms = all_sp1_pssms()
for tag in methods:
    score_pickle_file = '%s-scores.pickle' % tag
    try:
        positive_scores, negative_scores = cPickle.load(
            open(score_pickle_file))
        logging.info('%s: Unpickled ROCs from %s.', tag, score_pickle_file)
    except:
        logging.info(
            '%s: Could not ROCs from unpickle %s, calculating from scratch.',
            tag, score_pickle_file)
        freqs, gaps = sp1_pssms[tag]
        freqs = (freqs.T / freqs.sum(axis=1)).T
        logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
        logo_filename = '%s-logo.png' % tag
        logo.save(logo_filename)
        logging.info('%s: Created logo: %s', tag, logo_filename)
        model = build_hmm_model(freqs, gaps, .001)
        hmm.graph_as_svg(model,
                         '%s-states' % tag,
                         neato_properties={'-Elen': 1.4})
        logging.debug('%s: Graphed model', tag)
        positive_scores = test_hmm_forward_backward(
            model, sequences['positive'].values())
        negative_scores = dict(
            (bg, test_hmm_forward_backward(model, sequences[bg].values()))
            for bg in backgrounds)
        cPickle.dump((positive_scores, negative_scores),
                     open(score_pickle_file, 'wb'))
示例#22
0
 def logo(self):
     "Create a logo of the gapped PWM."
     import hmm.pssm.logo as L
     transparencies = N.ones(self.K)
     return L.pssm_as_image(N.exp(self.freqs), size=(160 * self.K, 480))
示例#23
0
    def logos(self):
        "Create a logo for the standard PWM representing each possible combination of gaps."
        import hmm.pssm.logo as L

        return [L.pssm_as_image(N.exp(pwm.freqs), size=(160 * self.K, 480)) for p, pwm in self.pwms]
示例#24
0
    def logo(self):
        "Create a logo of the gapped PWM."
        import hmm.pssm.logo as L

        transparencies = N.ones(self.K)
        return L.pssm_as_image(N.exp(self.freqs), size=(160 * self.K, 480))
示例#25
0
sequences = Sequences()
backgrounds = set(sequence_filenames.keys())
backgrounds.remove('positive')
scores = dict()
methods = args
sp1_pssms = all_sp1_pssms()
for tag in methods:
    score_pickle_file = '%s-scores.pickle' % tag
    try:
        positive_scores, negative_scores = cPickle.load(open(score_pickle_file))
        logging.info('%s: Unpickled ROCs from %s.', tag, score_pickle_file)
    except:
        logging.info('%s: Could not ROCs from unpickle %s, calculating from scratch.', tag, score_pickle_file)
        freqs, gaps = sp1_pssms[tag]
        freqs = (freqs.T / freqs.sum(axis=1)).T
        logo = L.pssm_as_image(freqs, size=None, transparencies=gaps)
        logo_filename = '%s-logo.png' % tag
        logo.save(logo_filename)
        logging.info('%s: Created logo: %s', tag, logo_filename)
        model = build_hmm_model(freqs, gaps, .001)
        hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4})
        logging.debug('%s: Graphed model', tag)
        positive_scores = test_hmm_forward_backward(model, sequences['positive'].values())
        negative_scores = dict(
            (bg, test_hmm_forward_backward(model, sequences[bg].values()))
            for bg in backgrounds
        )
        cPickle.dump((positive_scores, negative_scores), open(score_pickle_file, 'wb'))
    scores[(tag,)] = positive_scores
    for bg, score in negative_scores.iteritems():
        scores[(tag, bg)] = score