def blast(query_file, db_file, output_file='BLASTResult.xml'):
    cmd = nb(num_threads=cpu_count(),
             query=query_file,
             db=db_file,
             task='blastn',
             outfmt=5,
             out=output_file)
    stdout, stderr = cmd()
    return output_file
示例#2
0
def blast(query_file, database):
    cmd = nb(num_threads=cpu_count(),
             query=query_file,
             db=database,
             task='blastn-short',
             evalue=1e-5,
             outfmt=5,
             out='out/BlastResult.xml')
    stdout, stderr = cmd()
示例#3
0
def validate(candidate_file, input_file, n_seqs, min_len, min_covrage,
             max_mismatch):
    # remove gap in old alignment file
    no_gap = 'validate.fasta'
    with open(no_gap, 'w') as new, open(input_file, 'r') as old:
        for line in old:
            if line.startswith('>'):
                new.write(line)
            else:
                new.write(line.replace('-', ''))

    # build blast db
    candidate_fasta = 'primer_candidate.fasta'
    SeqIO.convert(candidate_file, 'fastq', candidate_fasta, 'fasta')
    run('makeblastdb -in {} -dbtype nucl'.format(no_gap), shell=True)
    # blast
    blast_result_file = 'BlastResult.xml'
    cmd = nb(num_threads=cpu_count(),
             query=candidate_fasta,
             db=no_gap,
             task='blastn',
             evalue=1e-5,
             max_hsps=1,
             max_target_seqs=n_seqs,
             outfmt=5,
             out=blast_result_file)
    stdout, stderr = cmd()
    # parse
    min_bitscore_raw = min_len - max_mismatch
    blast_result = [['ID', 'Hits', 'Sum_Bitscore_raw'], ]
    blast_result.append(['All', n_seqs, min_len])
    for query in SearchIO.parse(blast_result_file, 'blast-xml'):
        if len(query) == 0:
            blast_result.append([query.id, 0, 0])
            continue
        sum_bitscore_raw = 0
        good_hits = 0
        start = 0
        for hit in query:
            hsp_bitscore_raw = hit[0].bitscore_raw
            if hsp_bitscore_raw >= min_bitscore_raw:
                sum_bitscore_raw += hsp_bitscore_raw
                good_hits += 1
                start += sum(hit[0].hit_range) / 2
        blast_result.append([query.id, good_hits/n_seqs, sum_bitscore_raw,
                             start/n_seqs])
    # validate
    # validate_result = [['ID', 'Hits', 'Sum_Bitscore_raw', 'Seq'], ]
    validate_result = list()
    for record in blast_result[2:]:
        if record[1] >= min_covrage:
            validate_result.append(record)
    validate_result.sort(key=lambda x: x[1], reverse=True)
    return validate_result
示例#4
0
文件: divide.py 项目: jangwen/python
def blast(query_file, database):
    cmd = nb(
        num_threads=cpu_count(),
        query=query_file,
        db=database,
        task='blastn-short',
        evalue=1e-5,
        outfmt=5,
        out='out/BlastResult.xml'
    )
    stdout, stderr = cmd()
示例#5
0
def blast(option):
    if option == '1':
        query_file = './output/all.fasta'
    else:
        query_file = sys.argv[1]
    cmd = nb(
        #  num_threads=8,
        query=query_file,
        db=sys.argv[2],
        task='megablast',
        evalue=0.001,
        outfmt=5,
        # xml format
        out='BlastResult.xml')
    stdout, stderr = cmd()
    return
示例#6
0
def blast(option):
    if option == '1':
        query_file = './output/all.fasta'
    else:
        query_file = sys.argv[1]
    cmd = nb(
    #  num_threads=8,
        query=query_file,
        db=sys.argv[2], 
        task='megablast', 
        evalue=0.001, 
        outfmt=5, 
        # xml format
        out='BlastResult.xml'
    )
    stdout, stderr = cmd()
    return 
示例#7
0
def blast(ref_file, query_file):
    """
    max_target_seqs was reported to having bug."""
    # MAX_TARGET_SEQS = 2
    db_file = os.path.join(args.out, ref_file)
    # hide output
    with open(os.path.join(args.out, 'log.txt'), 'w') as log:
        run('makeblastdb -in {0} -out {1} -dbtype nucl'.format(
            ref_file, db_file),
            stdout=log,
            shell=True)
    result = os.path.join(args.out, 'BlastResult.xml')
    cmd = nb(
        num_threads=cpu_count(),
        query=query_file,
        db=db_file,
        task='blastn',
        evalue=args.evalue,
        max_hsps=1,
        # max_target_seqs=MAX_TARGET_SEQS,
        outfmt=5,
        out=result)
    stdout, stderr = cmd()
    return result