def blast(query_file, db_file, output_file='BLASTResult.xml'): cmd = nb(num_threads=cpu_count(), query=query_file, db=db_file, task='blastn', outfmt=5, out=output_file) stdout, stderr = cmd() return output_file
def blast(query_file, database): cmd = nb(num_threads=cpu_count(), query=query_file, db=database, task='blastn-short', evalue=1e-5, outfmt=5, out='out/BlastResult.xml') stdout, stderr = cmd()
def validate(candidate_file, input_file, n_seqs, min_len, min_covrage, max_mismatch): # remove gap in old alignment file no_gap = 'validate.fasta' with open(no_gap, 'w') as new, open(input_file, 'r') as old: for line in old: if line.startswith('>'): new.write(line) else: new.write(line.replace('-', '')) # build blast db candidate_fasta = 'primer_candidate.fasta' SeqIO.convert(candidate_file, 'fastq', candidate_fasta, 'fasta') run('makeblastdb -in {} -dbtype nucl'.format(no_gap), shell=True) # blast blast_result_file = 'BlastResult.xml' cmd = nb(num_threads=cpu_count(), query=candidate_fasta, db=no_gap, task='blastn', evalue=1e-5, max_hsps=1, max_target_seqs=n_seqs, outfmt=5, out=blast_result_file) stdout, stderr = cmd() # parse min_bitscore_raw = min_len - max_mismatch blast_result = [['ID', 'Hits', 'Sum_Bitscore_raw'], ] blast_result.append(['All', n_seqs, min_len]) for query in SearchIO.parse(blast_result_file, 'blast-xml'): if len(query) == 0: blast_result.append([query.id, 0, 0]) continue sum_bitscore_raw = 0 good_hits = 0 start = 0 for hit in query: hsp_bitscore_raw = hit[0].bitscore_raw if hsp_bitscore_raw >= min_bitscore_raw: sum_bitscore_raw += hsp_bitscore_raw good_hits += 1 start += sum(hit[0].hit_range) / 2 blast_result.append([query.id, good_hits/n_seqs, sum_bitscore_raw, start/n_seqs]) # validate # validate_result = [['ID', 'Hits', 'Sum_Bitscore_raw', 'Seq'], ] validate_result = list() for record in blast_result[2:]: if record[1] >= min_covrage: validate_result.append(record) validate_result.sort(key=lambda x: x[1], reverse=True) return validate_result
def blast(query_file, database): cmd = nb( num_threads=cpu_count(), query=query_file, db=database, task='blastn-short', evalue=1e-5, outfmt=5, out='out/BlastResult.xml' ) stdout, stderr = cmd()
def blast(option): if option == '1': query_file = './output/all.fasta' else: query_file = sys.argv[1] cmd = nb( # num_threads=8, query=query_file, db=sys.argv[2], task='megablast', evalue=0.001, outfmt=5, # xml format out='BlastResult.xml') stdout, stderr = cmd() return
def blast(option): if option == '1': query_file = './output/all.fasta' else: query_file = sys.argv[1] cmd = nb( # num_threads=8, query=query_file, db=sys.argv[2], task='megablast', evalue=0.001, outfmt=5, # xml format out='BlastResult.xml' ) stdout, stderr = cmd() return
def blast(ref_file, query_file): """ max_target_seqs was reported to having bug.""" # MAX_TARGET_SEQS = 2 db_file = os.path.join(args.out, ref_file) # hide output with open(os.path.join(args.out, 'log.txt'), 'w') as log: run('makeblastdb -in {0} -out {1} -dbtype nucl'.format( ref_file, db_file), stdout=log, shell=True) result = os.path.join(args.out, 'BlastResult.xml') cmd = nb( num_threads=cpu_count(), query=query_file, db=db_file, task='blastn', evalue=args.evalue, max_hsps=1, # max_target_seqs=MAX_TARGET_SEQS, outfmt=5, out=result) stdout, stderr = cmd() return result