Exemplo n.º 1
0
def split_to_exons():
    print('Splitting best hits to exons...')
    with open(separat_exons) as all_exons:
        all_exons_parsed = SeqIO.to_dict(
            SeqIO.parse(all_exons, 'fasta', generic_dna))
    with open(best_separate_exons, 'w') as best_exons:
        for besthit in best_hits:
            locus = besthit.split()[1].split('-')[0]
            probe = besthit.split()[0]
            exons = [
                val for key, val in all_exons_parsed.items() if locus in key
            ]
            for exon in exons:
                name = str(exon.id)
                sequence = str(exon.seq)
                best_exons.write(f'>{probe}_{name}\n{sequence}\n')
    NcbimakeblastdbCommandline(dbtype='nucl',
                               input_file=probes,
                               out=probes,
                               parse_seqids=True)()
    NcbiblastnCommandline(
        task=blast_task,
        query=best_separate_exons,
        db=probes,
        out=f'{best_separate_exons}_against_{probes}.txt',
        num_threads=4,
        outfmt=
        '6 qaccver saccver pident qcovhsp evalue bitscore sstart send qstart qend'
    )()
    with open(f'{best_separate_exons}_against_{probes}.txt'
              ) as new_blast_results:
        hits = new_blast_results.readlines()
    cleaned_hits = []
    for hit in hits:
        if hit.split()[0].split('_')[0] == hit.split()[1]:
            cleaned_hits.append(hit)
    cleaned_hits.sort(key=lambda x: float(x.split()[5]), reverse=True)
    cleaned_hits.sort(key=lambda x: float(x.split()[4]))
    cleaned_hits.sort(key=lambda x: float(x.split()[2]), reverse=True)
    cleaned_hits.sort(key=lambda x: float(x.split()[3]), reverse=True)
    cleaned_hits.sort(
        key=lambda x: int(x.split()[0].split('-')[3].split('_')[1]))
    cleaned_hits.sort(key=lambda x: x.split()[0].split('-')[2])
    hits_exons = set()
    cleaned_dedup_hits = []
    for cleaned_hit in cleaned_hits:
        if cleaned_hit.split()[0] not in hits_exons:
            cleaned_dedup_hits.append(cleaned_hit)
            hits_exons.add(cleaned_hit.split()[0])
    cleaned_dedup_hits.sort(
        key=lambda x: int(x.split()[0].split('-')[3].split('_')[1]))
    cleaned_dedup_hits.sort(key=lambda x: x.split()[1].split('-')[1])
    with open(f'{best_separate_exons}_against_{probes}.txt',
              'w') as new_blast_results:
        for cleaned_hit in cleaned_dedup_hits:
            new_blast_results.write(cleaned_hit)
    with open(probes) as probes_to_parse:
        probes_as_dict = SeqIO.to_dict(
            SeqIO.parse(probes_to_parse, 'fasta', generic_dna))
    with open(best_separate_exons) as best_exons:
        best_exons_as_dict = SeqIO.to_dict(
            SeqIO.parse(best_exons, 'fasta', generic_dna))
    with open(result_file, 'w') as resultfile, open(result_file2,
                                                    'w') as resultfile2:
        for cleaned_dedup_hit in cleaned_dedup_hits:
            name_of_locus = cleaned_dedup_hit.split()[1]
            name_of_exon = cleaned_dedup_hit.split()[0]
            num_exon = cleaned_dedup_hit.split()[0].split('-')[3].split('_')[1]
            if int(cleaned_dedup_hit.split()[6]) > int(
                    cleaned_dedup_hit.split()[7]):
                start = int(cleaned_dedup_hit.split()[7])
                end = int(cleaned_dedup_hit.split()[6])
                sequence = str(probes_as_dict[name_of_locus]
                               [start - 1:end].seq.reverse_complement())
            else:
                start = int(cleaned_dedup_hit.split()[6])
                end = int(cleaned_dedup_hit.split()[7])
                sequence = str(probes_as_dict[name_of_locus][start -
                                                             1:end].seq)

            resultfile.write(f'>{name_of_locus}_exon_{num_exon}\n{sequence}\n')
            if int(cleaned_dedup_hit.split()[8]) > int(
                    cleaned_dedup_hit.split()[9]):
                start_opt = int(cleaned_dedup_hit.split()[9])
                end_opt = int(cleaned_dedup_hit.split()[8])
                sequence_opt = str(best_exons_as_dict[name_of_exon]
                                   [start_opt -
                                    1:end_opt].seq.reverse_complement())
            else:
                start_opt = int(cleaned_dedup_hit.split()[8])
                end_opt = int(cleaned_dedup_hit.split()[9])
                sequence_opt = str(
                    best_exons_as_dict[name_of_exon][start_opt -
                                                     1:end_opt].seq)
            resultfile2.write(
                f'>{name_of_locus}_exon_{num_exon}\n{sequence_opt}\n')
    print('Done')
Exemplo n.º 2
0
def test_primers(args):
    ref = args.Reference[0]
    primers = args.Primers[0]
    out = args.Output[0]
    nproc = args.processes[0]
    tm_offset = args.tm_offset[0]
    tm_size = args.tm_size[0]
    #min_align = args.min_align[0]
    skip_tm = args.skip_tm

    # File Handler
    iTFH = TFH(ref, primers, out)

    # 2. Run blastmakedb
    db = os.path.join(iTFH.outdir, os.path.basename(iTFH.ref) + ".db")
    cline = NcbimakeblastdbCommandline(dbtype="nucl",
                                       input_file=iTFH.ref,
                                       out=db)
    print("Building BLAST Database...")
    print(cline)
    run(cline.__str__())

    # 3. Run short-blast
    result = os.path.join(iTFH.outdir,
                          os.path.basename(iTFH.primers) + ".blast.tsv")
    result_tmp = os.path.join(iTFH.outdir,
                              os.path.basename(iTFH.primers) + ".tmp")
    cline = NcbiblastnCommandline(
        query=iTFH.primers,
        db=db,
        task="blastn-short",
        num_threads=nproc,
        outfmt=
        "6 qseqid sseqid sstart send mismatch qlen length pident qseq sseq",
        out=result_tmp)
    print("Running short-BLAST...")
    print(cline)
    run(cline.__str__())

    f = open(result, "w")
    f.write(
        "#PrimerName\tTargetName\tTargetStart\tTargetEnd\t#Mismatches\tPrimerLength\tAlignedLength\t%Identity\tPrimerSeq\tContigSeq\n"
    )
    f.writelines(open(result_tmp, "r").readlines())
    f.close()
    os.remove(result_tmp)

    if skip_tm:
        return

    # 4. Thermodynamics of BLAST results
    print("Running thermodynamic check on blast results...")
    tm_result_file = os.path.join(
        iTFH.outdir,
        os.path.basename(iTFH.primers) + ".blast.TM.tsv")
    chunks = to_chunks(result, nproc)
    tm_result = run_thermodynamics(chunks, nproc, iTFH.ref, tm_offset, tm_size)

    # PrimerName	TargetName	TargetStart	TargetEnd	#Mismatches	PrimerLength	AlignedLength	%Identity	PrimerSeq	ContigSeq	Struct_found	TM	DG	DH	DS
    f = open(tm_result_file, "w")
    f.write(
        "#PrimerName\tTargetName\tTargetStart\tTargetEnd\t#Mismatches\tPrimerLength\tAlignedLength\t%Identity\tPrimerSeq\tContigSeq\tPrimerTM\tHeteroDimerTM\tHeteroDimerDG\t3EndStabilityTM\t3EndStabilityDG\n"
    )
    for line in tm_result:
        f.write(line + "\n")
    f.close()
Exemplo n.º 3
0
def blastdb(in_file, db_file):
    make_db_cmd = NcbimakeblastdbCommandline(cmd='makeblastdb',
                                             dbtype='nucl',
                                             input_file=in_file,
                                             out=db_file)
    make_db_cmd()