def main(): #print load_genome_metadata(1) genome = pysam.FastaFile('hg19.genome.fa') #models = load_selex_models_from_db() models = load_binding_models_from_db() peaks = load_peaks(sys.argv[1]) seqs_iter = ( genome.fetch(contig, start, stop+1) for contig, start, stop in peaks ) seqs = FixedLengthDNASequences(seqs_iter) with ThreadSafeFile("output.txt", "w") as ofp: all_args = [(ofp, model, seqs, peaks) for model in models] run_in_parallel(24, score_model_worker, all_args) return
def main(): #print load_genome_metadata(1) genome = pysam.FastaFile('hg19.genome.fa') #models = load_selex_models_from_db() models = load_binding_models_from_db() peaks = load_peaks(sys.argv[1]) seqs_iter = (genome.fetch(contig, start, stop + 1) for contig, start, stop in peaks) seqs = FixedLengthDNASequences(seqs_iter) with ThreadSafeFile("output.txt", "w") as ofp: all_args = [(ofp, model, seqs, peaks) for model in models] run_in_parallel(24, score_model_worker, all_args) return
def load_model(factor_name): """Load models from the DB. This isn't useful - I just keep it here to show where the models came from. """ try: models = load_binding_models_from_db(tf_names=[factor_name,]) assert len(models) == 1, "Multiple binding models found for '{}'".format(factor_name) except NoBindingModelsFoundError: # if we couldnt find a good motif, just find any motif # special case TAF1 because it doesnt exist in CISBP if factor_name == 'TAF1': models = [load_TAF1_binding_model(),] else: models = load_all_pwms_from_db(tf_names=factor_name) model = models[0] return model
def load_model(factor_name): """Load models from the DB. This isn't useful - I just keep it here to show where the models came from. """ try: models = load_binding_models_from_db(tf_names=[ factor_name, ]) assert len( models) == 1, "Multiple binding models found for '{}'".format( factor_name) except NoBindingModelsFoundError: # if we couldnt find a good motif, just find any motif # special case TAF1 because it doesnt exist in CISBP if factor_name == 'TAF1': models = [ load_TAF1_binding_model(), ] else: models = load_all_pwms_from_db(tf_names=factor_name) model = models[0] return model
def score_multiple_fixed_len_seqs(seq_len=10000, n_seqs=100): models = load_binding_models_from_db(TEST_MODEL_TF_NAME) model = models[0] seqs = FixedLengthDNASequences(['A' * seq_len] * n_seqs) scores = model.score_seqs_binding_sites(seqs) print 'PASS', model.motif_len, len(seqs), len(scores)
def score_multiple_seqs(seq_len=100000, n_seqs=10): models = load_binding_models_from_db(TEST_MODEL_TF_NAME) model = models[0] seqs = DNASequences(['A' * seq_len] * n_seqs) scores = seqs.score_binding_sites(model, 'FWD') print 'PASS', model.motif_len, len(scores)
def score_model(seq_len=100000): models = load_binding_models_from_db(TEST_MODEL_TF_NAME) model = models[0] seq = DNASequence('A' * seq_len) score = model.score_binding_sites(seq) print 'PASS', model.motif_len, score.shape
def score_multiple_fixed_len_seqs(seq_len=10000, n_seqs=100): models = load_binding_models_from_db(TEST_MODEL_TF_NAME) model = models[0] seqs = FixedLengthDNASequences(['A'*seq_len]*n_seqs) scores = model.score_seqs_binding_sites(seqs) print 'PASS', model.motif_len, len(seqs), len(scores)
def score_multiple_seqs(seq_len=100000, n_seqs=10): models = load_binding_models_from_db(TEST_MODEL_TF_NAME) model = models[0] seqs = DNASequences(['A'*seq_len]*n_seqs) scores = seqs.score_binding_sites(model, 'FWD') print 'PASS', model.motif_len, len(scores)
def score_model(seq_len=100000): models = load_binding_models_from_db(TEST_MODEL_TF_NAME) model = models[0] seq = DNASequence('A'*seq_len) score = model.score_binding_sites(seq) print 'PASS', model.motif_len, score.shape