def pssm_search(request): """Scans the genome with the given motif.""" if request.method == 'POST': form = BindingSiteSearchForm(request.POST) if form.is_valid(): genome = form.cleaned_data['genome'] motif = bioutils.build_motif(form.cleaned_data['sites']) # Find a threshold dist = motif.pssm.distribution(precision=10**4) threshold = dist.threshold_patser() hits = bioutils.pssm_search( motif.pssm, genome.seq, threshold=threshold) matches = [] for pos, strand, score in hits: seq = genome.seq[pos:pos+motif.length] if strand == -1: seq = bioutils.reverse_complement(seq) matches.append({ 'start': pos, 'end': pos + motif.length, 'strand': strand, 'seq': seq, 'score': score}) return render( request, 'pssm_search_results.html', {'matches': matches, 'weblogo': bioutils.weblogo_uri(form.cleaned_data['sites']), 'threshold': threshold}) else: form = BindingSiteSearchForm() return render(request, 'pssm_search.html', {'form': form})
def generate_motif_file(): """Generates motif file containing all motifs for each TF-instance.""" export_file = os.path.join(settings.STATICFILES_DIRS[0], 'collectf_meme.cm') with open(export_file, 'w') as f: for TF_instance in models.TFInstance.objects.all(): print TF_instance reports = motif_report.build_motif_reports( models.Curation_SiteInstance.objects.filter( curation__TF_instances=TF_instance)) if reports: sites = reports[0].aligned_sites if len(sites) < 10: continue motif = build_motif(sites) f.write('>%s %s_%s\n' % ( dbxref.to_uniprot_dbxref(TF_instance.TF_instance_id), reports[0].TF_name, reports[0].short_species_name)) f.write('%s\n' % '\t'.join(map(str, motif.counts['A']))) f.write('%s\n' % '\t'.join(map(str, motif.counts['C']))) f.write('%s\n' % '\t'.join(map(str, motif.counts['G']))) f.write('%s\n' % '\t'.join(map(str, motif.counts['T'])))
def export_PSFM(meta_sites, **kwargs): """Exports Position-Specific-Frequency-Matrix""" format = kwargs['format'] rows = export_base(meta_sites) aligned = lasagna.lasagna([m.delegate_site_instance for m in meta_sites]) motif = bioutils.build_motif(aligned) consensus = motif.degenerate_consensus TF_name= ','.join(set(row['TF_name'] for row in rows)) sp = ','.join(set('_'.join(row['organism'].split()) for row in rows)) lines = [] if format == 'JASPAR': lines.append('> CollecTF_%s_%s' % (TF_name, sp)) lines.append('A [ %s ]' % (' '.join(map(str, motif.counts['A'])))) lines.append('C [ %s ]' % (' '.join(map(str, motif.counts['C'])))) lines.append('G [ %s ]' % (' '.join(map(str, motif.counts['G'])))) lines.append('T [ %s ]' % (' '.join(map(str, motif.counts['T'])))) elif format == 'TRANSFAC': lines.append('ID %s' % TF_name) lines.append('BF %s' % sp) lines.append('PO\tA\tC\tG\tT') lines.extend('%02d\t%d\t%d\t%d\t%d\t%s' % (po+1, motif.counts['A'][po], motif.counts['C'][po], motif.counts['G'][po], motif.counts['T'][po], consensus[po]) for po in range(motif.length)) lines.append('XX') elif format == 'raw_fasta': lines.append('>CollecTF_%s_%s' % (TF_name, sp)) lines.extend('%d\t%d\t%d\t%d' % (motif.counts['A'][po], motif.counts['C'][po], motif.counts['G'][po], motif.counts['T'][po]) for po in range(motif.length)) return '\n'.join(lines)
def pssm_search(request): """Scans the genome with the given motif.""" if request.method == 'POST': form = BindingSiteSearchForm(request.POST) if form.is_valid(): genome = form.cleaned_data['genome'] motif = bioutils.build_motif(form.cleaned_data['sites']) # Find a threshold dist = motif.pssm.distribution(precision=10**4) threshold = dist.threshold_patser() hits = bioutils.pssm_search(motif.pssm, genome.seq, threshold=threshold) matches = [] for pos, strand, score in hits: seq = genome.seq[pos:pos + motif.length] if strand == -1: seq = bioutils.reverse_complement(seq) matches.append({ 'start': pos, 'end': pos + motif.length, 'strand': strand, 'seq': seq, 'score': score }) return render( request, 'pssm_search_results.html', { 'matches': matches, 'weblogo': bioutils.weblogo_uri( form.cleaned_data['sites']), 'threshold': threshold }) else: form = BindingSiteSearchForm() return render(request, 'pssm_search.html', {'form': form})
def motif_pwm_columns(sites): """Returns the columns of teh PWM of the motif.""" motif = bioutils.build_motif(sites) motif.pseudocounts = 1.0 return [{let: motif.pwm[let][i] for let in 'ACGT'} for i in xrange(len(motif))]