示例#1
0
def make_config(output_dirpath, tmp_dirpath, threads, clade_dirpath,
                augustus_dirpath):
    busco_dirpath = join(dirname(realpath(__file__)), 'busco')
    domain = 'prokaryota' if qconfig.prokaryote else 'eukaryota'
    values = {
        'out_path': output_dirpath,
        'lineage_path': clade_dirpath,
        'domain': domain,
        'tmp_dir': tmp_dirpath,
        'threads': str(threads),
        'tblastn_path': dirname(get_blast_fpath('tblastn')),
        'makeblastdb_path': dirname(get_blast_fpath('makeblastdb')),
        'augustus_path': join(augustus_dirpath, 'bin'),
        'etraining_path': join(augustus_dirpath, 'bin'),
        'augustus_scripts_path': join(augustus_dirpath, 'scripts'),
        'hmmsearch_path': busco_dirpath
    }
    default_config_fpath = join(busco_dirpath, default_config_fname)
    config_fpath = join(output_dirpath, config_fname)
    with open(default_config_fpath) as f_in:
        with open(config_fpath, 'w') as f_out:
            for line in f_in:
                fs = line.strip().split()
                if not fs:
                    continue
                keyword = fs[-1]
                if keyword in values:
                    fs[-1] = values[keyword]
                f_out.write(' '.join(fs) + '\n')
    return config_fpath
示例#2
0
def parallel_blast(contigs_fpath, label, corrected_dirpath, err_fpath,
                   blast_res_fpath, blast_check_fpath, blast_threads):
    logger.info('  ' + 'processing ' + label)
    blast_query_fpath = contigs_fpath
    compress_ext = ['.gz', '.gzip', '.bz2', '.bzip2', '.zip']
    if any(contigs_fpath.endswith(ext) for ext in compress_ext):
        logger.info('  ' + 'unpacking ' + label)
        unpacked_fpath = os.path.join(
            corrected_dirpath,
            os.path.basename(contigs_fpath) + '.unpacked')
        with _get_fasta_file_handler(contigs_fpath) as f_in:
            with open(unpacked_fpath, 'w') as f_out:
                for l in f_in:
                    f_out.write(l)
        blast_query_fpath = unpacked_fpath
    res_fpath = get_blast_output_fpath(blast_res_fpath, label)
    check_fpath = get_blast_output_fpath(blast_check_fpath, label)
    cmd = get_blast_fpath('blastn') + (
        ' -query %s -db %s -outfmt 7 -num_threads %s' %
        (blast_query_fpath, db_fpath, blast_threads))
    qutils.call_subprocess(shlex.split(cmd),
                           stdout=open(res_fpath, 'w'),
                           stderr=open(err_fpath, 'a'),
                           logger=logger)
    logger.info('  ' + 'BLAST results for %s are saved to %s...' %
                (label, res_fpath))
    with open(check_fpath, 'w') as check_file:
        check_file.writelines('Assembly: %s md5 checksum: %s\n' %
                              (contigs_fpath, md5(contigs_fpath)))
示例#3
0
def parallel_blast(contigs_fpath, label, corrected_dirpath, err_fpath, blast_res_fpath, blast_check_fpath, blast_threads):
    logger.info('  ' + 'processing ' + label)
    blast_query_fpath = contigs_fpath
    compress_ext = ['.gz', '.gzip', '.bz2', '.bzip2', '.zip']
    if any(contigs_fpath.endswith(ext) for ext in compress_ext):
        logger.info('  ' + 'unpacking ' + label)
        unpacked_fpath = os.path.join(corrected_dirpath, os.path.basename(contigs_fpath) + '.unpacked')
        with _get_fasta_file_handler(contigs_fpath) as f_in:
            with open(unpacked_fpath, 'w') as f_out:
                for l in f_in:
                    f_out.write(l)
        blast_query_fpath = unpacked_fpath
    res_fpath = get_blast_output_fpath(blast_res_fpath, label)
    check_fpath = get_blast_output_fpath(blast_check_fpath, label)
    cmd = get_blast_fpath('blastn') + (' -query %s -db %s -outfmt 7 -num_threads %s' % (
        blast_query_fpath, db_fpath, blast_threads))
    qutils.call_subprocess(shlex.split(cmd), stdout=open(res_fpath, 'w'), stderr=open(err_fpath, 'a'), logger=logger)
    logger.info('  ' + 'BLAST results for %s are saved to %s...' % (label, res_fpath))
    with open(check_fpath, 'w') as check_file:
        check_file.writelines('Assembly: %s size: %d\n' % (contigs_fpath, os.path.getsize(contigs_fpath)))
示例#4
0
def download_blastdb(logger=logger, only_clean=False):
    global blastdb_dirpath
    blastdb_dirpath = get_dir_for_download('silva',
                                           'Silva',
                                           [silva_downloaded_fname + '.nsq'],
                                           logger,
                                           only_clean=only_clean)
    if not blastdb_dirpath:
        return False

    if only_clean:
        if os.path.isdir(blastdb_dirpath):
            logger.info('Removing ' + blastdb_dirpath)
            shutil.rmtree(blastdb_dirpath)
        return True

    global db_fpath
    db_fpath = join(blastdb_dirpath, silva_downloaded_fname)
    if os.path.isfile(db_fpath +
                      '.nsq') and os.path.getsize(db_fpath +
                                                  '.nsq') >= db_nsq_fsize:
        return True
    log_fpath = os.path.join(blastdb_dirpath, 'blastdb.log')
    db_gz_fpath = os.path.join(blastdb_dirpath, silva_fname + '.gz')
    silva_fpath = os.path.join(blastdb_dirpath, silva_fname)

    logger.info()

    if os.path.isfile(db_gz_fpath):
        logger.info(
            'SILVA 16S ribosomal RNA gene database has already been downloaded.'
        )
    else:
        logger.info('Downloading SILVA 16S ribosomal RNA gene database...')
        if not os.path.isdir(blastdb_dirpath):
            os.makedirs(blastdb_dirpath)
        silva_download = urllib.FancyURLopener()
        silva_remote_fpath = silva_db_url + silva_fname + '.gz'
        try:
            silva_download.retrieve(silva_remote_fpath,
                                    db_gz_fpath + '.download', show_progress)
        except Exception:
            logger.error(
                'Failed downloading SILVA 16S rRNA gene database (%s)! The search for reference genomes cannot be performed. '
                'Try to download it manually in %s and restart your command.' %
                (silva_remote_fpath, blastdb_dirpath))
            return False
        shutil.move(db_gz_fpath + '.download', db_gz_fpath)

    logger.info('Processing downloaded file. Logging to %s...' % log_fpath)
    if not os.path.isfile(silva_fpath):
        logger.info('Unpacking and replacing " " with "_"...')

        unpacked_fpath = silva_fpath + ".unpacked"
        cmd = "gunzip -c %s" % db_gz_fpath
        qutils.call_subprocess(shlex.split(cmd),
                               stdout=open(unpacked_fpath, 'w'),
                               stderr=open(log_fpath, 'a'),
                               logger=logger)

        substituted_fpath = silva_fpath + ".substituted"
        with open(unpacked_fpath) as in_file:
            with open(substituted_fpath, 'w') as out_file:
                for line in in_file:
                    out_file.write(line.replace(' ', '_'))
        os.remove(unpacked_fpath)
        shutil.move(substituted_fpath, silva_fpath)

    logger.info('Making BLAST database...')
    cmd = get_blast_fpath('makeblastdb') + (' -in %s -dbtype nucl -out %s' %
                                            (silva_fpath, db_fpath))
    qutils.call_subprocess(shlex.split(cmd),
                           stdout=open(log_fpath, 'a'),
                           stderr=open(log_fpath, 'a'),
                           logger=logger)
    if not os.path.exists(db_fpath +
                          '.nsq') or os.path.getsize(db_fpath +
                                                     '.nsq') < db_nsq_fsize:
        logger.error('Failed to make BLAST database ("' + blastdb_dirpath +
                     '"). See details in log. Try to make it manually: %s' %
                     cmd)
        return False
    elif not qconfig.debug:
        os.remove(db_gz_fpath)
        os.remove(silva_fpath)
    return True
示例#5
0
def download_blastdb(logger=logger, only_clean=False):
    global blastdb_dirpath
    blastdb_dirpath = get_dir_for_download('silva', 'Silva', [silva_downloaded_fname + '.nsq'], logger, only_clean=only_clean)
    if not blastdb_dirpath:
        return False

    if only_clean:
        if os.path.isdir(blastdb_dirpath):
            logger.info('Removing ' + blastdb_dirpath)
            shutil.rmtree(blastdb_dirpath)
        return True

    global db_fpath
    db_fpath = join(blastdb_dirpath, silva_downloaded_fname)
    if os.path.isfile(db_fpath + '.nsq') and os.path.getsize(db_fpath + '.nsq') >= db_nsq_fsize:
        return True
    log_fpath = os.path.join(blastdb_dirpath, 'blastdb.log')
    db_gz_fpath = os.path.join(blastdb_dirpath, silva_fname + '.gz')
    silva_fpath = os.path.join(blastdb_dirpath, silva_fname)

    logger.info()

    if os.path.isfile(db_gz_fpath):
        logger.info('SILVA 16S ribosomal RNA gene database has already been downloaded.')
    else:
        logger.info('Downloading SILVA 16S ribosomal RNA gene database...')
        if not os.path.isdir(blastdb_dirpath):
            os.makedirs(blastdb_dirpath)
        silva_download = urllib.FancyURLopener()
        silva_remote_fpath = silva_db_url + silva_fname + '.gz'
        try:
            silva_download.retrieve(silva_remote_fpath, db_gz_fpath + '.download', show_progress)
        except Exception:
            logger.error(
                'Failed downloading SILVA 16S rRNA gene database (%s)! The search for reference genomes cannot be performed. '
                'Try to download it manually in %s and restart your command.' % (silva_remote_fpath, blastdb_dirpath))
            return False
        shutil.move(db_gz_fpath + '.download', db_gz_fpath)

    logger.info('Processing downloaded file. Logging to %s...' % log_fpath)
    if not os.path.isfile(silva_fpath):
        logger.info('Unpacking and replacing " " with "_"...')

        unpacked_fpath = silva_fpath + ".unpacked"
        cmd = "gunzip -c %s" % db_gz_fpath
        qutils.call_subprocess(shlex.split(cmd), stdout=open(unpacked_fpath, 'w'), stderr=open(log_fpath, 'a'), logger=logger)

        substituted_fpath = silva_fpath + ".substituted"
        with open(unpacked_fpath) as in_file:
            with open(substituted_fpath, 'w') as out_file:
                for line in in_file:
                    out_file.write(line.replace(' ', '_'))
        os.remove(unpacked_fpath)
        shutil.move(substituted_fpath, silva_fpath)

    logger.info('Making BLAST database...')
    cmd = get_blast_fpath('makeblastdb') + (' -in %s -dbtype nucl -out %s' % (silva_fpath, db_fpath))
    qutils.call_subprocess(shlex.split(cmd), stdout=open(log_fpath, 'a'), stderr=open(log_fpath, 'a'), logger=logger)
    if not os.path.exists(db_fpath + '.nsq') or os.path.getsize(db_fpath + '.nsq') < db_nsq_fsize:
        logger.error('Failed to make BLAST database ("' + blastdb_dirpath +
                     '"). See details in log. Try to make it manually: %s' % cmd)
        return False
    elif not qconfig.debug:
        os.remove(db_gz_fpath)
        os.remove(silva_fpath)
    return True