def main():
    #print load_genome_metadata(1)
    genome = pysam.FastaFile('hg19.genome.fa')
    #models = load_selex_models_from_db()
    models = load_binding_models_from_db()
    peaks = load_peaks(sys.argv[1])
    seqs_iter = ( genome.fetch(contig, start, stop+1)
                  for contig, start, stop in peaks )
    seqs = FixedLengthDNASequences(seqs_iter)
    with ThreadSafeFile("output.txt", "w") as ofp:
        all_args = [(ofp, model, seqs, peaks) for model in models]
        run_in_parallel(24, score_model_worker, all_args)
    return
Пример #2
0
def main():
    #print load_genome_metadata(1)
    genome = pysam.FastaFile('hg19.genome.fa')
    #models = load_selex_models_from_db()
    models = load_binding_models_from_db()
    peaks = load_peaks(sys.argv[1])
    seqs_iter = (genome.fetch(contig, start, stop + 1)
                 for contig, start, stop in peaks)
    seqs = FixedLengthDNASequences(seqs_iter)
    with ThreadSafeFile("output.txt", "w") as ofp:
        all_args = [(ofp, model, seqs, peaks) for model in models]
        run_in_parallel(24, score_model_worker, all_args)
    return
def load_model(factor_name):
    """Load models from the DB.

    This isn't useful - I just keep it here to show where the models came from.
    """
    try:
        models = load_binding_models_from_db(tf_names=[factor_name,])
        assert len(models) == 1, "Multiple binding models found for '{}'".format(factor_name)
    except NoBindingModelsFoundError:
        # if we couldnt find a good motif, just find any motif
        # special case TAF1 because it doesnt exist in CISBP
        if factor_name == 'TAF1':
            models = [load_TAF1_binding_model(),]
        else:
            models = load_all_pwms_from_db(tf_names=factor_name)
    model = models[0]
    return model
Пример #4
0
def load_model(factor_name):
    """Load models from the DB.

    This isn't useful - I just keep it here to show where the models came from.
    """
    try:
        models = load_binding_models_from_db(tf_names=[
            factor_name,
        ])
        assert len(
            models) == 1, "Multiple binding models found for '{}'".format(
                factor_name)
    except NoBindingModelsFoundError:
        # if we couldnt find a good motif, just find any motif
        # special case TAF1 because it doesnt exist in CISBP
        if factor_name == 'TAF1':
            models = [
                load_TAF1_binding_model(),
            ]
        else:
            models = load_all_pwms_from_db(tf_names=factor_name)
    model = models[0]
    return model
Пример #5
0
def score_multiple_fixed_len_seqs(seq_len=10000, n_seqs=100):
    models = load_binding_models_from_db(TEST_MODEL_TF_NAME)
    model = models[0]
    seqs = FixedLengthDNASequences(['A' * seq_len] * n_seqs)
    scores = model.score_seqs_binding_sites(seqs)
    print 'PASS', model.motif_len, len(seqs), len(scores)
Пример #6
0
def score_multiple_seqs(seq_len=100000, n_seqs=10):
    models = load_binding_models_from_db(TEST_MODEL_TF_NAME)
    model = models[0]
    seqs = DNASequences(['A' * seq_len] * n_seqs)
    scores = seqs.score_binding_sites(model, 'FWD')
    print 'PASS', model.motif_len, len(scores)
Пример #7
0
def score_model(seq_len=100000):
    models = load_binding_models_from_db(TEST_MODEL_TF_NAME)
    model = models[0]
    seq = DNASequence('A' * seq_len)
    score = model.score_binding_sites(seq)
    print 'PASS', model.motif_len, score.shape
Пример #8
0
def score_multiple_fixed_len_seqs(seq_len=10000, n_seqs=100):
    models = load_binding_models_from_db(TEST_MODEL_TF_NAME)
    model = models[0]
    seqs = FixedLengthDNASequences(['A'*seq_len]*n_seqs)
    scores = model.score_seqs_binding_sites(seqs)
    print 'PASS', model.motif_len, len(seqs), len(scores)
Пример #9
0
def score_multiple_seqs(seq_len=100000, n_seqs=10):
    models = load_binding_models_from_db(TEST_MODEL_TF_NAME)
    model = models[0]
    seqs = DNASequences(['A'*seq_len]*n_seqs)
    scores = seqs.score_binding_sites(model, 'FWD')
    print 'PASS', model.motif_len, len(scores)
Пример #10
0
def score_model(seq_len=100000):
    models = load_binding_models_from_db(TEST_MODEL_TF_NAME)
    model = models[0]
    seq = DNASequence('A'*seq_len)
    score = model.score_binding_sites(seq)
    print 'PASS', model.motif_len, score.shape