def logos(self): "Create a logo for the standard PWM representing each possible combination of gaps." import hmm.pssm.logo as L return [ L.pssm_as_image(N.exp(pwm.freqs), size=(160 * self.K, 480)) for p, pwm in self.pwms ]
def logo(self): "Create a logo of the gapped PWM." import hmm.pssm.logo as L transparencies = N.ones(self.K) transparencies[self.gap_char] = self.gap_freq return L.pssm_as_image(N.exp(self.gapped_pwm), size=(160 * self.K, 480), transparencies=transparencies)
def write_image(self): image = logo.pssm_as_image( self.emissions, transparencies=self.gap_probs ) image.save(self.png_file, "PNG") image.save(self.eps_file, "EPS")
def make_logo_for_glam2_output(filename): "Writes a logo to a filename with .png extension." output = GLAM2Output.parse(open(filename)) freqs, gaps = output.freqs_and_gaps() logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s.png' % os.path.splitext(filename)[0] logo.save(logo_filename)
def logo(self): "Create a logo of the gapped PWM." import hmm.pssm.logo as L transparencies = N.ones(self.K) for gap_char, gap_freq in self.gaps: transparencies[gap_char] = gap_freq return L.pssm_as_image(N.exp(self.pwms[-1][1].freqs), size=(160 * self.K, 480), transparencies=transparencies)
def test_hmm(tag, pwm): freqs, gaps = pwm logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) model = build_hmm_model(freqs, gaps, .1) logging.debug('%s: Created model', tag) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':3.}) logging.debug('%s: Graphed model', tag) return model
def test_hmm(tag, pwm): freqs, gaps = pwm logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) model = build_hmm_model(freqs, gaps, .1) logging.debug('%s: Created model', tag) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen': 3.}) logging.debug('%s: Graphed model', tag) return model
def run_pwm_viterbi(tag, freqs, gaps, positive_seqs, negative_seqs): """ Run the PWM using Viterbi algorithm to classify sequences. """ logging.info('Running PWM: %s', tag) logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) roc_points = [] for p_binding in p_binding_params: # build model model = build_hmm_model(freqs, gaps, p_binding) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4}) logging.debug('%s: Graphed model', tag) pos_total_pos, pos_total_neg, pos_num_seqs_with_site = run_on_seqs(model, positive_seqs) logging.debug( '%s: p(binding)=%.1e: Positive sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences', tag, p_binding, pos_total_pos, pos_total_neg, pos_num_seqs_with_site, len(positive_seqs) ) neg_total_pos, neg_total_neg, neg_num_seqs_with_site = run_on_seqs(model, negative_seqs) logging.debug( '%s: p(binding)=%.1e: Negative sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences', tag, p_binding, neg_total_pos, neg_total_neg, neg_num_seqs_with_site, len(negative_seqs) ) tp = pos_num_seqs_with_site fp = neg_num_seqs_with_site fn = len(positive_seqs) - pos_num_seqs_with_site tn = len(negative_seqs) - neg_num_seqs_with_site roc_point = roc.RocCalculator(tp=tp, fp=fp, tn=tn, fn=fn) logging.info('%s: p(binding)=%.1e; Specificity=%.3f; Sensitivity=%.3f', tag, p_binding, roc_point.specificity(), roc_point.sensitivity(), ) roc_points.append(roc_point) return roc_points
def run_pwm_forward_backward(tag, freqs, gaps, positive_seqs, negative_seqs): """ Run the PWM using forward-backward. """ logging.info('Running PWM: %s', tag) logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) # build model model = build_hmm_model(freqs, gaps, .001) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen': 1.4}) logging.debug('%s: Graphed model', tag) positive_scores = test_hmm_forward_backward(model, positive_seqs.values()) negative_scores = test_hmm_forward_backward(model, negative_seqs.values()) return roc.picked_rocs_from_thresholds(positive_scores, negative_scores)
def run_pwm_forward_backward(tag, freqs, gaps, positive_seqs, negative_seqs): """ Run the PWM using forward-backward. """ logging.info('Running PWM: %s', tag) logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) # build model model = build_hmm_model(freqs, gaps, .001) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4}) logging.debug('%s: Graphed model', tag) positive_scores = test_hmm_forward_backward(model, positive_seqs.values()) negative_scores = test_hmm_forward_backward(model, negative_seqs.values()) return roc.picked_rocs_from_thresholds(positive_scores, negative_scores)
def run_pwm_viterbi(tag, freqs, gaps, positive_seqs, negative_seqs): """ Run the PWM using Viterbi algorithm to classify sequences. """ logging.info('Running PWM: %s', tag) logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) roc_points = [] for p_binding in p_binding_params: # build model model = build_hmm_model(freqs, gaps, p_binding) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen': 1.4}) logging.debug('%s: Graphed model', tag) pos_total_pos, pos_total_neg, pos_num_seqs_with_site = run_on_seqs( model, positive_seqs) logging.debug( '%s: p(binding)=%.1e: Positive sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences', tag, p_binding, pos_total_pos, pos_total_neg, pos_num_seqs_with_site, len(positive_seqs)) neg_total_pos, neg_total_neg, neg_num_seqs_with_site = run_on_seqs( model, negative_seqs) logging.debug( '%s: p(binding)=%.1e: Negative sequences: Over all sequences: found %4d positive sites and %4d negative sites in %4d/%4d sequences', tag, p_binding, neg_total_pos, neg_total_neg, neg_num_seqs_with_site, len(negative_seqs)) tp = pos_num_seqs_with_site fp = neg_num_seqs_with_site fn = len(positive_seqs) - pos_num_seqs_with_site tn = len(negative_seqs) - neg_num_seqs_with_site roc_point = roc.RocCalculator(tp=tp, fp=fp, tn=tn, fn=fn) logging.info( '%s: p(binding)=%.1e; Specificity=%.3f; Sensitivity=%.3f', tag, p_binding, roc_point.specificity(), roc_point.sensitivity(), ) roc_points.append(roc_point) return roc_points
def examine_model(self, model, builder, sequences, image_file=None, pssm_def_file=None): """ Log some info about the model. """ # # How many sites does it find after training? # emissions, gap_probs = builder.get_emissions_and_gap_probabilities(model, offset=1) logging.info('Entropy/base : %f', hmm.pssm.entropy(emissions, gap_probs) / gap_probs.sum()) logging.info('Information content : %f', hmm.pssm.information_content(emissions)) if None != pssm_def_file: output_pssm_definition(open(pssm_def_file, 'w'), emissions, gap_probs) if None != image_file: import hmm.pssm.logo as logo image = logo.pssm_as_image(emissions, transparencies=gap_probs) png_file = '%s.png' % image_file logging.info('Saving PSSM to %s', png_file) image.save(png_file, "PNG") eps_file = '%s.eps' % image_file logging.info('Saving PSSM to %s', eps_file) image.save(eps_file, "EPS")
def write_logo(self, model, f, rev_comp=False): import hmm.pssm.logo as logo model = hmm.as_model(model) emissions = self.pssm_dist(model) transparencies = [] pssm_dist = [] for k in xrange(self.K): pssm_dist.append(emissions[2*k]) transparencies.append(1.0) if k < self.K - 1: p_gap = self.p_gap_for_model(model, k) if p_gap > self.gap_threshold: pssm_dist.append(emissions[2*k+1]) transparencies.append(p_gap) if rev_comp: pssm_dist.reverse() for i, emission in enumerate(pssm_dist): pssm_dist[i] = emission[::-1] transparencies.reverse() image = logo.pssm_as_image(pssm_dist, transparencies=transparencies) image.save(f, "PNG") return image
help="File in which the gapped PSSMs are stored.") option_parser.add_option( "-l", "--logo-files-basename", dest="logo_files_basename", help="basename of files to write logos to. Extension will be -0.png") option_parser.add_option("-t", "--image-type", dest="image_type", default='png', help="type of images to write") options, args = option_parser.parse_args() for option in option_parser.option_list: if option.dest: logging.info('%s: %s (%s)', option.dest, str(getattr(options, option.dest)), option.help) # Load PSSMs logging.info('Loading PSSMs: %s', options.models_file) pssms = list(parse_models(open(options.models_file))) for i, p in enumerate(pssms): filename = '%s-%d.%s' % (options.logo_files_basename, i, options.image_type) logging.info('Creating image for PSSM: %s', filename) emissions, gap_probs = emissions_and_gaps_from_semi_parsed(p) logo_image = logo.pssm_as_image(emissions, transparencies=gap_probs) logo_image.save(filename)
logging.info("Baum-Welch took %f seconds", time.time() - start) logging.info("Achieved LL: %f in %d iterations", LL, num_iterations) return model if "__main__" == __name__: logging.basicConfig(level=logging.INFO) def synthetic(): identifier = "synthetic-sequences-K10-g0.50-N200-L200-seed4-1" sequences = [hmm.pssm.seq_to_numpy(s) for s in convert_seqs("synthetic-2/%s.fa" % identifier)] return identifier, sequences def fragment(identifier="T00594"): sequences = seqs_for_fragment(identifier) return identifier, sequences identifier, sequences = synthetic() identifier, sequences = fragment() algorithm = SingleGapAlgorithm() model = algorithm(sequences) emissions, gap_probs = algorithm.builder.get_emissions_and_gap_probabilities(model, offset=1) import hmm.pssm.logo as logo image = logo.pssm_as_image(emissions, transparencies=gap_probs) image.save("single-gap-results/%s.png" % identifier, "PNG")
def write_logo(self, model, f): import hmm.pssm.logo as logo dist = self.pssm_dist(model) image = logo.pssm_as_image(dist) image.save(f, "PNG") return image
] ) return freqs, gaps def all_sp1_pssms(): return { 'TRANSFAC' : transfac_sp1(), 'MEME' : meme_sp1(), 'Gapped' : gapped_sp1(), 'Gapped-new' : gapped_sp1_new(), 'Ungapped-new' : ungapped_sp1_new(), 'GLAM2-i4' : glam2_sp1_i4(), 'GLAM2-i7' : glam2_sp1_i7(), } if '__main__' == __name__: import hmm.pssm.logo as L for name, (freqs, gaps) in [ ('TRANSFAC-sp1', transfac_sp1()), ('MEME-sp1', meme_sp1()), ('Gapped-sp1', gapped_sp1()), ('Gapped-sp1-new', gapped_sp1_new()), ('Ungapped-sp1-new', ungapped_sp1_new()), ('GLAM2-sp1-i4', glam2_sp1_i4()), ('GLAM2-sp1-i7', glam2_sp1_i7()), ]: freqs = (freqs.T / freqs.sum(axis=1)).T logo_image = L.pssm_as_image(freqs, transparencies=gaps) logo_image.save('%s.png' % name)
scores = dict() methods = args sp1_pssms = all_sp1_pssms() for tag in methods: score_pickle_file = '%s-scores.pickle' % tag try: positive_scores, negative_scores = cPickle.load( open(score_pickle_file)) logging.info('%s: Unpickled ROCs from %s.', tag, score_pickle_file) except: logging.info( '%s: Could not ROCs from unpickle %s, calculating from scratch.', tag, score_pickle_file) freqs, gaps = sp1_pssms[tag] freqs = (freqs.T / freqs.sum(axis=1)).T logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) model = build_hmm_model(freqs, gaps, .001) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen': 1.4}) logging.debug('%s: Graphed model', tag) positive_scores = test_hmm_forward_backward( model, sequences['positive'].values()) negative_scores = dict( (bg, test_hmm_forward_backward(model, sequences[bg].values())) for bg in backgrounds) cPickle.dump((positive_scores, negative_scores), open(score_pickle_file, 'wb'))
def logo(self): "Create a logo of the gapped PWM." import hmm.pssm.logo as L transparencies = N.ones(self.K) return L.pssm_as_image(N.exp(self.freqs), size=(160 * self.K, 480))
def logos(self): "Create a logo for the standard PWM representing each possible combination of gaps." import hmm.pssm.logo as L return [L.pssm_as_image(N.exp(pwm.freqs), size=(160 * self.K, 480)) for p, pwm in self.pwms]
sequences = Sequences() backgrounds = set(sequence_filenames.keys()) backgrounds.remove('positive') scores = dict() methods = args sp1_pssms = all_sp1_pssms() for tag in methods: score_pickle_file = '%s-scores.pickle' % tag try: positive_scores, negative_scores = cPickle.load(open(score_pickle_file)) logging.info('%s: Unpickled ROCs from %s.', tag, score_pickle_file) except: logging.info('%s: Could not ROCs from unpickle %s, calculating from scratch.', tag, score_pickle_file) freqs, gaps = sp1_pssms[tag] freqs = (freqs.T / freqs.sum(axis=1)).T logo = L.pssm_as_image(freqs, size=None, transparencies=gaps) logo_filename = '%s-logo.png' % tag logo.save(logo_filename) logging.info('%s: Created logo: %s', tag, logo_filename) model = build_hmm_model(freqs, gaps, .001) hmm.graph_as_svg(model, '%s-states' % tag, neato_properties={'-Elen':1.4}) logging.debug('%s: Graphed model', tag) positive_scores = test_hmm_forward_backward(model, sequences['positive'].values()) negative_scores = dict( (bg, test_hmm_forward_backward(model, sequences[bg].values())) for bg in backgrounds ) cPickle.dump((positive_scores, negative_scores), open(score_pickle_file, 'wb')) scores[(tag,)] = positive_scores for bg, score in negative_scores.iteritems(): scores[(tag, bg)] = score