Пример #1
0
def compile_glimmer(logger, only_clean=False):
    tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
    tool_src_dirpath = os.path.join(tool_dirpath, 'src')

    if not get_path_to_program('glimmerhmm', tool_dirpath):
        compile_tool('GlimmerHMM', tool_src_dirpath, ['../glimmerhmm'], logger=logger, only_clean=only_clean)
    return get_path_to_program('glimmerhmm', tool_dirpath)
Пример #2
0
def compile_aligner(logger, only_clean=False):
    requirements = ['minimap2']
    aligner_failed_compilation_flag = join(contig_aligner_dirpath,
                                           'make.failed')

    if only_clean:
        compile_tool('Minimap2',
                     contig_aligner_dirpath,
                     requirements,
                     logger=logger,
                     only_clean=only_clean)
        return True

    if check_prev_compilation_failed('Minimap2',
                                     aligner_failed_compilation_flag,
                                     just_notice=True,
                                     logger=logger):
        logger.error(
            "Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited."
        )
        return False

    if compile_tool('Minimap2',
                    contig_aligner_dirpath,
                    requirements,
                    just_notice=False,
                    logger=logger,
                    only_clean=only_clean):
        return True

    logger.error(
        "Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited."
    )
    return False
Пример #3
0
def compile_glimmer(logger, only_clean=False):
    tool_dirpath = os.path.join(qconfig.LIBS_LOCATION, 'glimmer')
    tool_src_dirpath = os.path.join(tool_dirpath, 'src')

    if not get_path_to_program('glimmerhmm', tool_dirpath):
        compile_tool('GlimmerHMM', tool_src_dirpath, ['../glimmerhmm'], logger=logger, only_clean=only_clean)
    return get_path_to_program('glimmerhmm', tool_dirpath)
Пример #4
0
def compile_gnuplot(logger, only_clean=False):
    tool_dirpath = join(qconfig.LIBS_LOCATION, 'gnuplot')
    tool_exec_fpath = gnuplot_exec_fpath()
    compile_tool('gnuplot', tool_dirpath, [tool_exec_fpath], just_notice=True, logger=logger, only_clean=only_clean,
                 configure_args=['--with-qt=no', '--disable-wxwidgets', '--prefix=' + tool_dirpath])

    if only_clean:
        return True
    elif isfile(tool_exec_fpath):
        return tool_exec_fpath
    else:
        return None
Пример #5
0
def compile_aligner(logger, only_clean=False):
    global contig_aligner
    global contig_aligner_dirpath
    if contig_aligner_dirpath is not None:
        return True

    if qconfig.platform_name == 'macosx':
        contig_aligner = 'E-MEM'
        contig_aligner_dirpath = join(qconfig.LIBS_LOCATION, 'E-MEM-osx')
        return True

    default_requirements = ['nucmer', 'delta-filter', 'show-coords', 'show-snps', 'mummer', 'mgaps']

    aligners_to_try = [
        ('E-MEM', join(qconfig.LIBS_LOCATION, 'E-MEM-linux'), default_requirements + ['e-mem']),
        ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-linux'), default_requirements)]

    for i, (name, dirpath, requirements) in enumerate(aligners_to_try):
        success_compilation = compile_tool(name, dirpath, requirements, just_notice=(i < len(aligners_to_try) - 1),
                                           logger=logger, only_clean=only_clean)
        if not success_compilation:
            continue
        contig_aligner = name
        contig_aligner_dirpath = dirpath  # successfully compiled
        return True
    logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.")
    return False
Пример #6
0
def compile_minimap(logger, only_clean=False):
    if (minimap_fpath() and not only_clean) or compile_tool(
            'Minimap2',
            contig_aligner_dirpath, ['minimap2'],
            just_notice=False,
            logger=logger,
            only_clean=only_clean):
        return True
    return False
Пример #7
0
def compile_aligner(logger, only_clean=False):
    requirements = ['nucmer', 'delta-filter', 'show-coords', 'show-snps', 'mummer', 'mummerplot', 'mgaps']
    mummer_failed_compilation_flag = join(contig_aligner_dirpath, 'make.failed')

    if only_clean:
        compile_tool('MUMmer', contig_aligner_dirpath, requirements, logger=logger, only_clean=only_clean)
        return True

    if check_prev_compilation_failed('MUMmer', mummer_failed_compilation_flag, just_notice=True, logger=logger):
        logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.")
        return False

    fix_configure_timestamps(contig_aligner_dirpath)
    prefix_arg = '--prefix=' + contig_aligner_dirpath
    if compile_tool('MUMmer', contig_aligner_dirpath, requirements, just_notice=False, logger=logger, only_clean=only_clean,
                    configure_args=[prefix_arg, 'LDFLAGS=-static'] if qconfig.platform_name != 'macosx' else [prefix_arg]):
        return True

    logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.")
    return False
Пример #8
0
def compile_aligner(logger, only_clean=False, compile_all_aligners=False):
    global contig_aligner
    global contig_aligner_dirpath

    if not compile_all_aligners:
        if contig_aligner_dirpath is not None and not \
                check_prev_compilation_failed(contig_aligner, join(contig_aligner_dirpath, 'make.failed'), just_notice=True, logger=logger):
            return True

        if not qconfig.force_nucmer and not contig_aligner_dirpath and qconfig.platform_name == 'macosx':
            if get_installed_emem() or \
                    not check_prev_compilation_failed('E-MEM', e_mem_failed_compilation_flag, just_notice=True, logger=logger):
                contig_aligner = 'E-MEM'
                contig_aligner_dirpath = join(qconfig.LIBS_LOCATION, 'E-MEM-osx')
                return True

    default_requirements = ['nucmer', 'delta-filter', 'show-coords', 'show-snps', 'mummer', 'mgaps']

    if qconfig.platform_name == 'macosx':
        aligners_to_try = [
            ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-osx'), default_requirements)]
    else:
        if not qconfig.force_nucmer:
            if get_installed_emem():
                emem_requirements = default_requirements
            else:
                emem_requirements = default_requirements + ['e-mem']
            aligners_to_try = [
                ('E-MEM', join(qconfig.LIBS_LOCATION, 'E-MEM-linux'), emem_requirements),
                ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-linux'), default_requirements)]
        else:
            aligners_to_try = [
                ('MUMmer', join(qconfig.LIBS_LOCATION, 'MUMmer3.23-linux'), default_requirements)]

    for i, (name, dirpath, requirements) in enumerate(aligners_to_try):
        success_compilation = compile_tool(name, dirpath, requirements, just_notice=(i < len(aligners_to_try) - 1),
                                           logger=logger, only_clean=only_clean, make_cmd='no-emem' if 'E-MEM' in name and get_installed_emem() else None)
        if not success_compilation:
            continue
        contig_aligner = name
        contig_aligner_dirpath = dirpath  # successfully compiled
        if not compile_all_aligners:
            return True

    if compile_all_aligners and contig_aligner and contig_aligner_dirpath:
        return True
    logger.error("Compilation of contig aligner software was unsuccessful! QUAST functionality will be limited.")
    return False
Пример #9
0
def compile_bedtools(logger, only_clean=False):
    return compile_tool('BEDtools', bedtools_dirpath, [join('bin', 'bedtools')], only_clean=only_clean, logger=logger)
Пример #10
0
def compile_bwa(logger, only_clean=False):
    return compile_tool('BWA', bwa_dirpath, ['bwa'], only_clean=only_clean, logger=logger)
Пример #11
0
def do(contigs_fpaths, output_dir, logger):
    logger.print_timestamp()
    logger.info('Running BUSCO...')

    compilation_success = True

    augustus_dirpath = download_augustus(logger)
    if not augustus_dirpath:
        compilation_success = False
    elif not compile_tool('Augustus',
                          augustus_dirpath, [join('bin', 'augustus')],
                          logger=logger):
        compilation_success = False

    if compilation_success and not download_blast_binaries(
            logger=logger, filenames=blast_filenames):
        compilation_success = False

    if not compilation_success:
        logger.info('Failed finding conservative genes.')
        return

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    tmp_dir = join(output_dir, 'tmp')
    if not os.path.isdir(tmp_dir):
        os.makedirs(tmp_dir)

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    busco_threads = max(1, qconfig.max_threads // n_jobs)

    clade_dirpath = download_db(logger,
                                is_prokaryote=qconfig.prokaryote,
                                is_fungus=qconfig.is_fungus)
    if not clade_dirpath:
        logger.info('Failed finding conservative genes.')
        return

    config_fpath = make_config(output_dir, tmp_dir, busco_threads,
                               clade_dirpath, augustus_dirpath)
    logger.info('Logs and results will be saved under ' + output_dir + '...')

    os.environ['BUSCO_CONFIG_FILE'] = config_fpath
    os.environ['AUGUSTUS_CONFIG_PATH'] = copy_augustus_configs(
        augustus_dirpath, tmp_dir)
    if not os.environ['AUGUSTUS_CONFIG_PATH']:
        logger.error(
            'Augustus configs not found, failed to run BUSCO without them.')
    busco_args = [[
        contigs_fpath,
        qutils.label_from_fpath_for_fname(contigs_fpath)
    ] for contigs_fpath in contigs_fpaths]
    summary_fpaths = run_parallel(busco_main_handler, busco_args,
                                  qconfig.max_threads)
    if not any(fpath for fpath in summary_fpaths):
        logger.error(
            'Failed running BUSCO for all the assemblies. See log files in ' +
            output_dir + ' for information '
            '(rerun with --debug to keep all intermediate files).')
        return

    # saving results
    zero_output_for_all = True
    for i, contigs_fpath in enumerate(contigs_fpaths):
        report = reporting.get(contigs_fpath)

        if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]):
            total_buscos, part_buscos, complete_buscos = 0, 0, 0
            with open(summary_fpaths[i]) as f:
                for line in f:
                    if 'Complete BUSCOs' in line:
                        complete_buscos = int(line.split()[0])
                    elif 'Fragmented' in line:
                        part_buscos = int(line.split()[0])
                    elif 'Total' in line:
                        total_buscos = int(line.split()[0])
            if total_buscos != 0:
                report.add_field(
                    reporting.Fields.BUSCO_COMPLETE,
                    ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos)))
                report.add_field(reporting.Fields.BUSCO_PART,
                                 ('%.2f' %
                                  (float(part_buscos) * 100.0 / total_buscos)))
            if complete_buscos + part_buscos > 0:
                zero_output_for_all = False
            shutil.copy(summary_fpaths[i], output_dir)
        else:
            logger.error(
                'Failed running BUSCO for ' + contigs_fpath +
                '. See the log for detailed information'
                ' (rerun with --debug to keep all intermediate files).')
    if zero_output_for_all:
        logger.warning(
            'BUSCO did not fail explicitly but found nothing for all assemblies! '
            'Possible reasons and workarounds:\n'
            '  1. Provided assemblies are so small that they do not contain even a single partial BUSCO gene. Not likely but may happen -- nothing to worry then.\n'
            '  2. Incorrect lineage database was used. To run with fungi DB use --fungus, to run with eukaryota DB use --eukaryote, otherwise BUSCO uses bacteria DB.\n'
            '  3. Problem with BUSCO dependencies, most likely Augustus. Check that the binaries in '
            + augustus_dirpath + '/bin/ are working properly.\n'
            '     If something is wrong with Augustus, you may try to install it yourself (https://github.com/Gaius-Augustus/Augustus) and add "augustus" binary to PATH.\n'
            '  4. Some other problem with BUSCO. Check the logs (you may need to rerun QUAST with --debug to see all intermediate files).\n'
            '     If you cannot solve the problem yourself, post an issue at https://github.com/ablab/quast/issues or write to [email protected]'
        )
    if not qconfig.debug:
        cleanup(output_dir)
    logger.info('Done.')
Пример #12
0
def compile_minimap(logger, only_clean=False):
    if (minimap_fpath() and not only_clean) or compile_tool('Minimap2', contig_aligner_dirpath, ['minimap2'],
                                                            just_notice=False, logger=logger, only_clean=only_clean):
        return True
    return False
Пример #13
0
def do(contigs_fpaths, output_dir, logger):
    logger.print_timestamp()
    logger.info('Running BUSCO...')

    compilation_success = True

    augustus_dirpath = download_augustus(logger)
    if not augustus_dirpath:
        compilation_success = False
    elif not compile_tool('Augustus',
                          augustus_dirpath, [join('bin', 'augustus')],
                          logger=logger):
        compilation_success = False

    if compilation_success and not download_blast_binaries(
            logger=logger, filenames=blast_filenames):
        compilation_success = False

    if not compilation_success:
        logger.info('Failed finding conservative genes.')
        return

    set_augustus_dir(augustus_dirpath)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    tmp_dir = join(output_dir, 'tmp')
    if not os.path.isdir(tmp_dir):
        os.makedirs(tmp_dir)

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    busco_threads = max(1, qconfig.max_threads // n_jobs)

    clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote)
    if not clade_dirpath:
        logger.info('Failed finding conservative genes.')
        return

    log_fpath = join(output_dir, 'busco.log')
    logger.info('Logging to ' + log_fpath + '...')
    busco_args = [([
        '-i', contigs_fpath, '-o',
        qutils.label_from_fpath_for_fname(contigs_fpath), '-l', clade_dirpath,
        '-m', 'genome', '-f', '-z', '-c',
        str(busco_threads), '-t', tmp_dir,
        '--augustus_parameters=\'--AUGUSTUS_CONFIG_PATH=' +
        join(augustus_dirpath, 'config') + '\''
    ], output_dir) for contigs_fpath in contigs_fpaths]
    summary_fpaths = run_parallel(busco.main, busco_args, qconfig.max_threads)
    if not any(fpath for fpath in summary_fpaths):
        logger.error('Failed running BUSCO for all the assemblies. See ' +
                     log_fpath + ' for information.')
        return

    # saving results
    for i, contigs_fpath in enumerate(contigs_fpaths):
        report = reporting.get(contigs_fpath)

        if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]):
            total_buscos, part_buscos, complete_buscos = 0, 0, 0
            with open(summary_fpaths[i]) as f:
                for line in f:
                    if 'Complete BUSCOs' in line:
                        complete_buscos = int(line.split()[0])
                    elif 'Fragmented' in line:
                        part_buscos = int(line.split()[0])
                    elif 'Total' in line:
                        total_buscos = int(line.split()[0])
            if total_buscos != 0:
                report.add_field(
                    reporting.Fields.BUSCO_COMPLETE,
                    ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos)))
                report.add_field(reporting.Fields.BUSCO_PART,
                                 ('%.2f' %
                                  (float(part_buscos) * 100.0 / total_buscos)))
        else:
            logger.error('Failed running BUSCO for ' + contigs_fpath +
                         '. See ' + log_fpath + ' for information.')
    logger.info('Done.')
Пример #14
0
def do(contigs_fpaths, output_dir, logger):
    logger.print_timestamp()
    logger.info('Running BUSCO...')

    compilation_success = True

    augustus_dirpath = download_augustus(logger)
    if not augustus_dirpath:
        compilation_success = False
    elif not compile_tool('Augustus',
                          augustus_dirpath, [join('bin', 'augustus')],
                          logger=logger):
        compilation_success = False

    if compilation_success and not download_blast_binaries(
            logger=logger, filenames=blast_filenames):
        compilation_success = False

    if not compilation_success:
        logger.info('Failed finding conservative genes.')
        return

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    tmp_dir = join(output_dir, 'tmp')
    if not os.path.isdir(tmp_dir):
        os.makedirs(tmp_dir)

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    busco_threads = max(1, qconfig.max_threads // n_jobs)

    clade_dirpath = download_db(logger,
                                is_prokaryote=qconfig.prokaryote,
                                is_fungus=qconfig.is_fungus)
    if not clade_dirpath:
        logger.info('Failed finding conservative genes.')
        return

    config_fpath = make_config(output_dir, tmp_dir, busco_threads,
                               clade_dirpath, augustus_dirpath)
    logger.info('Logs and results will be saved under ' + output_dir + '...')

    os.environ['BUSCO_CONFIG_FILE'] = config_fpath
    os.environ['AUGUSTUS_CONFIG_PATH'] = copy_augustus_contigs(
        augustus_dirpath, tmp_dir)
    if not os.environ['AUGUSTUS_CONFIG_PATH']:
        logger.error(
            'Augustus configs not found, failed to run BUSCO without them.')
    busco_args = [[
        contigs_fpath,
        qutils.label_from_fpath_for_fname(contigs_fpath)
    ] for contigs_fpath in contigs_fpaths]
    summary_fpaths = run_parallel(busco_main_handler, busco_args,
                                  qconfig.max_threads)
    if not any(fpath for fpath in summary_fpaths):
        logger.error(
            'Failed running BUSCO for all the assemblies. See log files in ' +
            output_dir + ' for information.')
        return

    # saving results
    for i, contigs_fpath in enumerate(contigs_fpaths):
        report = reporting.get(contigs_fpath)

        if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]):
            total_buscos, part_buscos, complete_buscos = 0, 0, 0
            with open(summary_fpaths[i]) as f:
                for line in f:
                    if 'Complete BUSCOs' in line:
                        complete_buscos = int(line.split()[0])
                    elif 'Fragmented' in line:
                        part_buscos = int(line.split()[0])
                    elif 'Total' in line:
                        total_buscos = int(line.split()[0])
            if total_buscos != 0:
                report.add_field(
                    reporting.Fields.BUSCO_COMPLETE,
                    ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos)))
                report.add_field(reporting.Fields.BUSCO_PART,
                                 ('%.2f' %
                                  (float(part_buscos) * 100.0 / total_buscos)))
            shutil.copy(summary_fpaths[i], output_dir)
        else:
            logger.error('Failed running BUSCO for ' + contigs_fpath +
                         '. See the log for detailed information.')
    if not qconfig.debug:
        cleanup(output_dir)
    logger.info('Done.')
Пример #15
0
def compile_minimap(logger, only_clean=False):
    return compile_tool('Minimap2',
                        minimap_dirpath, ['minimap2'],
                        only_clean=only_clean,
                        logger=logger)
Пример #16
0
def do(contigs_fpaths, output_dir, logger):
    logger.print_timestamp()
    logger.info('Running BUSCO...')

    compilation_success = True

    augustus_dirpath = download_augustus(logger)
    if not augustus_dirpath:
        compilation_success = False
    elif not compile_tool('Augustus', augustus_dirpath, [join('bin', 'augustus')], logger=logger):
        compilation_success = False

    if compilation_success and not download_blast_binaries(logger=logger, filenames=blast_filenames):
        compilation_success = False

    if not compilation_success:
        logger.info('Failed finding conservative genes.')
        return

    set_augustus_dir(augustus_dirpath)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    tmp_dir = join(output_dir, 'tmp')
    if not os.path.isdir(tmp_dir):
        os.makedirs(tmp_dir)

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    busco_threads = max(1, qconfig.max_threads // n_jobs)

    clade_dirpath = download_db(logger, is_prokaryote=qconfig.prokaryote, is_fungus=qconfig.is_fungus)
    if not clade_dirpath:
        logger.info('Failed finding conservative genes.')
        return

    log_fpath = join(output_dir, 'busco.log')
    logger.info('Logging to ' + log_fpath + '...')
    busco_args = [(['-i', contigs_fpath, '-o', qutils.label_from_fpath_for_fname(contigs_fpath), '-l', clade_dirpath,
                    '-m', 'genome', '-f', '-z', '-c', str(busco_threads), '-t', tmp_dir,
                    '--augustus_parameters=\'--AUGUSTUS_CONFIG_PATH=' + join(augustus_dirpath, 'config') + '\'' ], output_dir)
                    for contigs_fpath in contigs_fpaths]
    summary_fpaths = run_parallel(busco.main, busco_args, qconfig.max_threads)
    if not any(fpath for fpath in summary_fpaths):
        logger.error('Failed running BUSCO for all the assemblies. See ' + log_fpath + ' for information.')
        return

    # saving results
    for i, contigs_fpath in enumerate(contigs_fpaths):
        report = reporting.get(contigs_fpath)

        if summary_fpaths[i] and os.path.isfile(summary_fpaths[i]):
            total_buscos, part_buscos, complete_buscos = 0, 0, 0
            with open(summary_fpaths[i]) as f:
                for line in f:
                    if 'Complete BUSCOs' in line:
                        complete_buscos = int(line.split()[0])
                    elif 'Fragmented' in line:
                        part_buscos = int(line.split()[0])
                    elif 'Total' in line:
                        total_buscos = int(line.split()[0])
            if total_buscos != 0:
                report.add_field(reporting.Fields.BUSCO_COMPLETE, ('%.2f' % (float(complete_buscos) * 100.0 / total_buscos)))
                report.add_field(reporting.Fields.BUSCO_PART, ('%.2f' % (float(part_buscos) * 100.0 / total_buscos)))
        else:
            logger.error(
                'Failed running BUSCO for ' + contigs_fpath + '. See ' + log_fpath + ' for information.')
    logger.info('Done.')