Exemplo n.º 1
0
def correct_assemblies(contigs_fpaths, output_dirpath, labels):
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    # we need correction but do not need min-contig filtration
    min_contig = qconfig.min_contig
    qconfig.min_contig = 0
    corrected_contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting=None)
    qconfig.min_contig = min_contig
    assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in old_contigs_fpaths]
    corrected_labels = [asm.label for asm in assemblies]

    if qconfig.draw_plots or qconfig.html_report:
        corr_fpaths = [asm.fpath for asm in assemblies]
        corr_labels = [asm.label for asm in assemblies]
        plotter_data.save_colors_and_ls(corr_fpaths, labels=corr_labels)
    return assemblies, corrected_labels
Exemplo n.º 2
0
def correct_assemblies(contigs_fpaths, output_dirpath, labels):
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    # we need correction but do not need min-contig filtration
    min_contig = qconfig.min_contig
    qconfig.min_contig = 0
    corrected_contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting=None)
    qconfig.min_contig = min_contig
    assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in old_contigs_fpaths]
    corrected_labels = [asm.label for asm in assemblies]

    if qconfig.draw_plots or qconfig.html_report:
        corr_fpaths = [asm.fpath for asm in assemblies]
        corr_labels = [asm.label for asm in assemblies]
        plotter_data.save_colors_and_ls(corr_fpaths, labels=corr_labels)
    return assemblies, corrected_labels
Exemplo n.º 3
0
def main(args):
    check_dirpath(
        qconfig.QUAST_HOME,
        'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' +
        'Please, put QUAST in a different directory, then try again.\n',
        exit_code=3)

    if not args:
        qconfig.usage(stream=sys.stderr)
        sys.exit(1)

    try:
        import imp
        imp.reload(qconfig)
        imp.reload(qutils)
    except:
        reload(qconfig)
        reload(qutils)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US.utf8')
    except Exception:
        try:
            locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
        except Exception:
            logger.warning('Python locale settings can\'t be changed')
    quast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
    output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    logger.main_info()
    logger.print_params()

    ########################################################################
    from quast_libs import reporting
    reports = reporting.reports
    try:
        import imp
        imp.reload(reporting)
    except:
        reload(reporting)
    reporting.reports = reports
    reporting.assembly_fpaths = []
    from quast_libs import plotter  # Do not remove this line! It would lead to a warning in matplotlib.

    if qconfig.is_combined_ref:
        corrected_dirpath = os.path.join(output_dirpath, '..',
                                         qconfig.corrected_dirname)
    else:
        if os.path.isdir(corrected_dirpath):
            shutil.rmtree(corrected_dirpath)
        os.mkdir(corrected_dirpath)

    qconfig.set_max_threads(logger)
    check_reads_fpaths(logger)
    # PROCESSING REFERENCE
    if ref_fpath:
        logger.main_info()
        logger.main_info('Reference:')
        original_ref_fpath = ref_fpath
        ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
        if qconfig.optimal_assembly:
            if not qconfig.pacbio_reads and not qconfig.nanopore_reads and not qconfig.mate_pairs:
                logger.warning(
                    'Optimal assembly cannot be created. It requires mate-pairs or long reads (Pacbio SMRT or Oxford Nanopore).'
                )
            else:
                optimal_assembly_fpath = optimal_assembly.do(
                    ref_fpath, original_ref_fpath,
                    os.path.join(output_dirpath,
                                 qconfig.optimal_assembly_basename))
                if optimal_assembly_fpath is not None:
                    contigs_fpaths.insert(0, optimal_assembly_fpath)
                    labels.insert(0, 'Optimal')
                    labels = qutils.process_labels(contigs_fpaths, labels)
    else:
        ref_fpath = ''

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')

    contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(
        contigs_fpaths, corrected_dirpath, labels, reporting)
    for contigs_fpath in contigs_fpaths:
        report = reporting.get(contigs_fpath)
        report.add_field(reporting.Fields.NAME,
                         qutils.label_from_fpath(contigs_fpath))

    qconfig.assemblies_num = len(contigs_fpaths)

    cov_fpath = qconfig.cov_fpath
    physical_cov_fpath = qconfig.phys_cov_fpath
    if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(
            ref_fpath,
            contigs_fpaths,
            os.path.join(output_dirpath, qconfig.reads_stats_dirname),
            external_logger=logger)
        qconfig.bed = bed_fpath

    if not contigs_fpaths:
        logger.error(
            "None of the assembly files contains correct contigs. "
            "Please, provide different files or decrease --min-contig threshold.",
            fake_if_nested_run=True)
        return 4

    if qconfig.used_colors and qconfig.used_ls:
        for i, label in enumerate(labels):
            plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i],
                                                     qconfig.used_ls[i])

    qconfig.assemblies_fpaths = contigs_fpaths

    # Where all pdfs will be saved
    all_pdf_fpath = None
    if qconfig.draw_plots and plotter.can_draw_plots:
        all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)

    if qconfig.json_output_dirpath:
        from quast_libs.html_saver import json_saver
        if json_saver.simplejson_error:
            qconfig.json_output_dirpath = None

    ########################################################################
    ### Stats and plots
    ########################################################################
    from quast_libs import basic_stats
    icarus_gc_fpath, circos_gc_fpath = basic_stats.do(
        ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'),
        output_dirpath)

    if qconfig.large_genome and ref_fpath:
        unique_kmers.do(os.path.join(output_dirpath, 'basic_stats'), ref_fpath,
                        contigs_fpaths, logger)

    aligned_contigs_fpaths = []
    aligned_lengths_lists = []
    contig_alignment_plot_fpath = None
    icarus_html_fpath = None
    circos_png_fpath = None
    if ref_fpath:
        ########################################################################
        ### former PLANTAKOLYA, PLANTAGORA
        ########################################################################
        from quast_libs import contigs_analyzer
        is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref
        aligner_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
            ref_fpath, contigs_fpaths, is_cyclic,
            os.path.join(output_dirpath, 'contigs_reports'),
            old_contigs_fpaths, qconfig.bed)
        for contigs_fpath in contigs_fpaths:
            if aligner_statuses[
                    contigs_fpath] == contigs_analyzer.AlignerStatus.OK:
                aligned_contigs_fpaths.append(contigs_fpath)
                aligned_lengths_lists.append(
                    aligned_lengths_per_fpath[contigs_fpath])

    # Before continue evaluating, check if aligner didn't skip all of the contigs files.
    detailed_contigs_reports_dirpath = None
    features_containers = None
    if len(aligned_contigs_fpaths) and ref_fpath:
        detailed_contigs_reports_dirpath = os.path.join(
            output_dirpath, 'contigs_reports')

        ########################################################################
        ### NAx and NGAx ("aligned Nx and NGx")
        ########################################################################
        from quast_libs import aligned_stats
        aligned_stats.do(ref_fpath, aligned_contigs_fpaths, output_dirpath,
                         aligned_lengths_lists,
                         os.path.join(output_dirpath, 'aligned_stats'))

        ########################################################################
        ### GENOME_ANALYZER
        ########################################################################
        from quast_libs import genome_analyzer
        features_containers = genome_analyzer.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath,
            qconfig.features, qconfig.operons,
            detailed_contigs_reports_dirpath,
            os.path.join(output_dirpath, 'genome_stats'))

    genes_by_labels = None
    if qconfig.gene_finding:
        if qconfig.glimmer:
            ########################################################################
            ### Glimmer
            ########################################################################
            from quast_libs import glimmer
            genes_by_labels = glimmer.do(
                contigs_fpaths, qconfig.genes_lengths,
                os.path.join(output_dirpath, 'predicted_genes'))
        if not qconfig.glimmer or qconfig.test:
            ########################################################################
            ### GeneMark
            ########################################################################
            from quast_libs import genemark
            genes_by_labels = genemark.do(
                contigs_fpaths, qconfig.genes_lengths,
                os.path.join(output_dirpath, 'predicted_genes'),
                qconfig.prokaryote, qconfig.metagenemark)
    else:
        logger.main_info("")
        logger.notice(
            "Genes are not predicted by default. Use --gene-finding option to enable it."
        )

    if qconfig.rna_gene_finding:
        run_barrnap.do(contigs_fpaths,
                       os.path.join(output_dirpath, 'predicted_genes'), logger)

    if qconfig.run_busco and not qconfig.is_combined_ref:
        if qconfig.platform_name == 'macosx':
            logger.main_info("")
            logger.warning("BUSCO can be run on Linux only")
        elif sys.version[0:3] == '2.5':
            logger.main_info("")
            logger.warning(
                "BUSCO does not support Python versions older than 2.6.")
        else:
            from quast_libs import run_busco
            run_busco.do(contigs_fpaths,
                         os.path.join(output_dirpath, qconfig.busco_dirname),
                         logger)
    ########################################################################
    reports_fpaths, transposed_reports_fpaths = reporting.save_total(
        output_dirpath)

    ########################################################################
    ### LARGE DRAWING TASKS
    ########################################################################
    if qconfig.draw_plots or qconfig.create_icarus_html:
        logger.print_timestamp()
        logger.main_info('Creating large visual summaries...')
        logger.main_info(
            'This may take a while: press Ctrl-C to skip this step..')
        try:
            if detailed_contigs_reports_dirpath:
                report_for_icarus_fpath_pattern = os.path.join(
                    detailed_contigs_reports_dirpath,
                    qconfig.icarus_report_fname_pattern)
                stdout_pattern = os.path.join(
                    detailed_contigs_reports_dirpath,
                    qconfig.contig_report_fname_pattern)
            else:
                report_for_icarus_fpath_pattern = None
                stdout_pattern = None
            draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
            draw_circos_plot = qconfig.draw_plots and ref_fpath and len(
                aligned_contigs_fpaths) and not qconfig.space_efficient
            number_of_steps = sum([
                int(bool(value)) for value in
                [draw_alignment_plots, draw_circos_plot, all_pdf_fpath]
            ])
            if draw_alignment_plots:
                ########################################################################
                ### VISUALIZE CONTIG ALIGNMENT
                ########################################################################
                logger.main_info('  1 of %d: Creating Icarus viewers...' %
                                 number_of_steps)
                from quast_libs import icarus
                icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
                    contigs_fpaths,
                    report_for_icarus_fpath_pattern,
                    output_dirpath,
                    ref_fpath,
                    stdout_pattern=stdout_pattern,
                    features=features_containers,
                    cov_fpath=cov_fpath,
                    physical_cov_fpath=physical_cov_fpath,
                    gc_fpath=icarus_gc_fpath,
                    json_output_dir=qconfig.json_output_dirpath,
                    genes_by_labels=genes_by_labels)

            if draw_circos_plot:
                logger.main_info(
                    '  %d of %d: Creating Circos plots...' %
                    (2 if draw_alignment_plots else 1, number_of_steps))
                from quast_libs import circos
                circos_png_fpath, circos_legend_fpath = circos.do(
                    ref_fpath, contigs_fpaths, report_for_icarus_fpath_pattern,
                    circos_gc_fpath, features_containers, cov_fpath,
                    os.path.join(output_dirpath, 'circos'), logger)

            if all_pdf_fpath:
                # full report in PDF format: all tables and plots
                logger.main_info(
                    '  %d of %d: Creating PDF with all tables and plots...' %
                    (number_of_steps, number_of_steps))
                plotter.fill_all_pdf_file(all_pdf_fpath)
            logger.main_info('Done')
        except KeyboardInterrupt:
            logger.main_info('..step skipped!')
            if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
                os.remove(all_pdf_fpath)

    ########################################################################
    ### TOTAL REPORT
    ########################################################################
    logger.print_timestamp()
    logger.main_info('RESULTS:')
    logger.main_info('  Text versions of total report are saved to ' +
                     reports_fpaths)
    logger.main_info(
        '  Text versions of transposed total report are saved to ' +
        transposed_reports_fpaths)

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_colors(output_dirpath, contigs_fpaths,
                               plotter_data.dict_color_and_ls)
        html_saver.save_total_report(output_dirpath, qconfig.min_contig,
                                     ref_fpath)

    if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
        logger.main_info('  PDF version (tables and plots) is saved to ' +
                         all_pdf_fpath)

    if circos_png_fpath:
        logger.main_info(
            '  Circos plot is saved to %s (the annotation is in %s). Circos configuration file is saved to %s'
            % (circos_png_fpath, circos_legend_fpath,
               circos_png_fpath.replace('.png', '.conf')))

    if icarus_html_fpath:
        logger.main_info('  Icarus (contig browser) is saved to %s' %
                         icarus_html_fpath)

    if qconfig.draw_svg and contig_alignment_plot_fpath:
        logger.main_info('  Contig alignment plot is saved to %s' %
                         contig_alignment_plot_fpath)

    cleanup(corrected_dirpath)
    return logger.finish_up(check_test=qconfig.test)
Exemplo n.º 4
0
def main(args):
    check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' +
                  'Please, put QUAST in a different directory, then try again.\n', exit_code=3)

    if not args:
        qconfig.usage(stream=sys.stderr)
        sys.exit(1)

    try:
        import imp
        imp.reload(qconfig)
        imp.reload(qutils)
    except:
        reload(qconfig)
        reload(qutils)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US.utf8')
    except Exception:
        try:
            locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
        except Exception:
            logger.warning('Python locale settings can\'t be changed')
    quast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
    output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    logger.main_info()
    logger.print_params()

    ########################################################################
    from quast_libs import reporting
    reports = reporting.reports
    try:
        import imp
        imp.reload(reporting)
    except:
        reload(reporting)
    reporting.reports = reports
    reporting.assembly_fpaths = []
    from quast_libs import plotter  # Do not remove this line! It would lead to a warning in matplotlib.

    if qconfig.is_combined_ref:
        corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname)
    else:
        if os.path.isdir(corrected_dirpath):
            shutil.rmtree(corrected_dirpath)
        os.mkdir(corrected_dirpath)

    qconfig.set_max_threads(logger)
    check_reads_fpaths(logger)
    # PROCESSING REFERENCE
    if ref_fpath:
        logger.main_info()
        logger.main_info('Reference:')
        original_ref_fpath = ref_fpath
        ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
        if qconfig.ideal_assembly:
            ideal_assembly_fpath = ideal_assembly.do(ref_fpath, original_ref_fpath,
                                                     os.path.join(output_dirpath, qconfig.ideal_assembly_basename))
            if ideal_assembly_fpath is not None:
                contigs_fpaths.insert(0, ideal_assembly_fpath)
                labels.insert(0, 'IDEAL ASSEMBLY')
                labels = qutils.process_labels(contigs_fpaths, labels)
    else:
        ref_fpath = ''

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')

    contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting)
    for contigs_fpath in contigs_fpaths:
        report = reporting.get(contigs_fpath)
        report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath))

    qconfig.assemblies_num = len(contigs_fpaths)

    cov_fpath = qconfig.cov_fpath
    physical_cov_fpath = qconfig.phys_cov_fpath
    if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths,
                                                                     os.path.join(output_dirpath, qconfig.reads_stats_dirname),
                                                                     external_logger=logger)
        qconfig.bed = bed_fpath

    if not contigs_fpaths:
        logger.error("None of the assembly files contains correct contigs. "
              "Please, provide different files or decrease --min-contig threshold.",
              fake_if_nested_run=True)
        return 4

    if qconfig.used_colors and qconfig.used_ls:
        for i, label in enumerate(labels):
            plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i])

    qconfig.assemblies_fpaths = contigs_fpaths

    # Where all pdfs will be saved
    all_pdf_fpath = None
    if qconfig.draw_plots and plotter.can_draw_plots:
        all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)

    if qconfig.json_output_dirpath:
        from quast_libs.html_saver import json_saver
        if json_saver.simplejson_error:
            qconfig.json_output_dirpath = None

    ########################################################################
    ### Stats and plots
    ########################################################################
    from quast_libs import basic_stats
    icarus_gc_fpath, circos_gc_fpath = basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'), output_dirpath)

    if qconfig.large_genome and ref_fpath:
        unique_kmers.do(os.path.join(output_dirpath, 'basic_stats'), ref_fpath, contigs_fpaths, logger)

    aligned_contigs_fpaths = []
    aligned_lengths_lists = []
    contig_alignment_plot_fpath = None
    icarus_html_fpath = None
    circos_png_fpath = None
    if ref_fpath:
        ########################################################################
        ### former PLANTAKOLYA, PLANTAGORA
        ########################################################################
        from quast_libs import contigs_analyzer
        is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref
        nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
            ref_fpath, contigs_fpaths, is_cyclic, os.path.join(output_dirpath, 'contigs_reports'),
            old_contigs_fpaths, qconfig.bed)
        for contigs_fpath in contigs_fpaths:
            if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK:
                aligned_contigs_fpaths.append(contigs_fpath)
                aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath])

    # Before continue evaluating, check if nucmer didn't skip all of the contigs files.
    detailed_contigs_reports_dirpath = None
    features_containers = None
    if len(aligned_contigs_fpaths) and ref_fpath:
        detailed_contigs_reports_dirpath = os.path.join(output_dirpath, 'contigs_reports')

        ########################################################################
        ### NAx and NGAx ("aligned Nx and NGx")
        ########################################################################
        from quast_libs import aligned_stats
        aligned_stats.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath,
            aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats'))

        ########################################################################
        ### GENOME_ANALYZER
        ########################################################################
        from quast_libs import genome_analyzer
        features_containers = genome_analyzer.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath,
            qconfig.genes, qconfig.operons, detailed_contigs_reports_dirpath,
            os.path.join(output_dirpath, 'genome_stats'))

    genes_by_labels = None
    if qconfig.gene_finding:
        if qconfig.glimmer:
            ########################################################################
            ### Glimmer
            ########################################################################
            from quast_libs import glimmer
            genes_by_labels = glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'))
        if not qconfig.glimmer or qconfig.test:
            ########################################################################
            ### GeneMark
            ########################################################################
            from quast_libs import genemark
            genes_by_labels = genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'),
                        qconfig.prokaryote, qconfig.metagenemark)
    else:
        logger.main_info("")
        logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.")

    if qconfig.rna_gene_finding:
        run_barrnap.do(contigs_fpaths, os.path.join(output_dirpath, 'predicted_genes'), logger)

    if qconfig.run_busco and not qconfig.is_combined_ref:
        if qconfig.platform_name == 'macosx':
            logger.main_info("")
            logger.warning("BUSCO can be run on Linux only")
        elif sys.version[0:3] == '2.5':
            logger.main_info("")
            logger.warning("BUSCO does not support Python versions older than 2.6.")
        else:
            from quast_libs import run_busco
            run_busco.do(contigs_fpaths, os.path.join(output_dirpath, qconfig.busco_dirname), logger)
    ########################################################################
    reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath)

    ########################################################################
    ### LARGE DRAWING TASKS
    ########################################################################
    if qconfig.draw_plots or qconfig.create_icarus_html:
        logger.print_timestamp()
        logger.main_info('Creating large visual summaries...')
        logger.main_info('This may take a while: press Ctrl-C to skip this step..')
        try:
            if detailed_contigs_reports_dirpath:
                report_for_icarus_fpath_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern)
                stdout_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern)
            else:
                report_for_icarus_fpath_pattern = None
                stdout_pattern = None
            draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
            draw_circos_plot = qconfig.draw_plots and ref_fpath and len(aligned_contigs_fpaths) and not qconfig.space_efficient
            number_of_steps = sum([int(bool(value)) for value in [draw_alignment_plots, draw_circos_plot, all_pdf_fpath]])
            if draw_alignment_plots:
                ########################################################################
                ### VISUALIZE CONTIG ALIGNMENT
                ########################################################################
                logger.main_info('  1 of %d: Creating Icarus viewers...' % number_of_steps)
                from quast_libs import icarus
                icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
                    contigs_fpaths, report_for_icarus_fpath_pattern, output_dirpath, ref_fpath,
                    stdout_pattern=stdout_pattern, features=features_containers,
                    cov_fpath=cov_fpath, physical_cov_fpath=physical_cov_fpath, gc_fpath=icarus_gc_fpath,
                    json_output_dir=qconfig.json_output_dirpath, genes_by_labels=genes_by_labels)

            if draw_circos_plot:
                logger.main_info('  %d of %d: Creating Circos plots...' % (2 if draw_alignment_plots else 1, number_of_steps))
                from quast_libs import circos
                circos_png_fpath, circos_legend_fpath = circos.do(ref_fpath, contigs_fpaths, report_for_icarus_fpath_pattern, circos_gc_fpath,
                                                                  features_containers, cov_fpath, os.path.join(output_dirpath, 'circos'), logger)

            if all_pdf_fpath:
                # full report in PDF format: all tables and plots
                logger.main_info('  %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps))
                plotter.fill_all_pdf_file(all_pdf_fpath)
            logger.main_info('Done')
        except KeyboardInterrupt:
            logger.main_info('..step skipped!')
            if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
                os.remove(all_pdf_fpath)

    ########################################################################
    ### TOTAL REPORT
    ########################################################################
    logger.print_timestamp()
    logger.main_info('RESULTS:')
    logger.main_info('  Text versions of total report are saved to ' + reports_fpaths)
    logger.main_info('  Text versions of transposed total report are saved to ' + transposed_reports_fpaths)

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_colors(output_dirpath, contigs_fpaths, plotter_data.dict_color_and_ls)
        html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath)

    if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
        logger.main_info('  PDF version (tables and plots) is saved to ' + all_pdf_fpath)

    if circos_png_fpath:
        logger.main_info('  Circos plot is saved to %s (the annotation is in %s). Circos configuration file is saved to %s' %
                         (circos_png_fpath, circos_legend_fpath, circos_png_fpath.replace('.png', '.conf')))

    if icarus_html_fpath:
        logger.main_info('  Icarus (contig browser) is saved to %s' % icarus_html_fpath)

    if qconfig.draw_svg and contig_alignment_plot_fpath:
        logger.main_info('  Contig alignment plot is saved to %s' % contig_alignment_plot_fpath)

    cleanup(corrected_dirpath)
    return logger.finish_up(check_test=qconfig.test)
Exemplo n.º 5
0
def main(args):
    if ' ' in qconfig.QUAST_HOME:
        logger.error('QUAST does not support spaces in paths. \n'
                     'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n'
                     'Please, put QUAST in a different directory, then try again.\n',
                     to_stderr=True,
                     exit_with_code=3)

    if not args:
        qconfig.usage()
        sys.exit(0)

    reload(qconfig)

    quast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
    output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    logger.main_info()
    logger.print_params()

    ########################################################################
    from quast_libs import reporting
    reports = reporting.reports
    reload(reporting)
    reporting.reports = reports
    reporting.assembly_fpaths = []
    from quast_libs import plotter  # Do not remove this line! It would lead to a warning in matplotlib.

    if qconfig.is_combined_ref:
        corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname)
    else:
        if os.path.isdir(corrected_dirpath):
            shutil.rmtree(corrected_dirpath)
        os.mkdir(corrected_dirpath)

    # PROCESSING REFERENCE
    if ref_fpath:
        logger.main_info()
        logger.main_info('Reference:')
        ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
    else:
        ref_fpath = ''

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')

    contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting)
    for contigs_fpath in contigs_fpaths:
        report = reporting.get(contigs_fpath)
        report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath))

    qconfig.assemblies_num = len(contigs_fpaths)

    reads_fpaths = []
    cov_fpath = []
    physical_cov_fpath = []
    if qconfig.forward_reads:
        reads_fpaths.append(qconfig.forward_reads)
    if qconfig.reverse_reads:
        reads_fpaths.append(qconfig.reverse_reads)
    if (reads_fpaths or qconfig.sam or qconfig.bam) and ref_fpath:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths, reads_fpaths, None,
                                                                     os.path.join(output_dirpath, qconfig.variation_dirname),
                                                                     external_logger=logger, sam_fpath=qconfig.sam, bam_fpath=qconfig.bam, bed_fpath=qconfig.bed)
        qconfig.bed = bed_fpath

    if not contigs_fpaths:
        logger.error("None of the assembly files contains correct contigs. "
              "Please, provide different files or decrease --min-contig threshold.",
              fake_if_nested_run=True)
        return 4

    if qconfig.used_colors and qconfig.used_ls:
        for i, label in enumerate(labels):
            plotter.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i])

    qconfig.assemblies_fpaths = contigs_fpaths
    if qconfig.with_gage:
        ########################################################################
        ### GAGE
        ########################################################################
        if not ref_fpath:
            logger.warning("GAGE can't be run without a reference and will be skipped.")
        else:
            from quast_libs import gage
            gage.do(ref_fpath, contigs_fpaths, output_dirpath)

    # Where all pdfs will be saved
    all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)
    all_pdf_file = None

    if qconfig.draw_plots and plotter.can_draw_plots:
        try:
            from matplotlib.backends.backend_pdf import PdfPages
            all_pdf_file = PdfPages(all_pdf_fpath)
        except:
            all_pdf_file = None

    if qconfig.json_output_dirpath:
        from quast_libs.html_saver import json_saver
        if json_saver.simplejson_error:
            json_output_dirpath = None


    ########################################################################
    ### Stats and plots
    ########################################################################
    from quast_libs import basic_stats
    basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'),
                   qconfig.json_output_dirpath, output_dirpath)

    aligned_contigs_fpaths = []
    aligned_lengths_lists = []
    contig_alignment_plot_fpath = None
    icarus_html_fpath = None
    if ref_fpath:
        ########################################################################
        ### former PLANTAKOLYA, PLANTAGORA
        ########################################################################
        from quast_libs import contigs_analyzer
        nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
            ref_fpath, contigs_fpaths, qconfig.prokaryote, os.path.join(output_dirpath, 'contigs_reports'),
            old_contigs_fpaths, qconfig.bed)
        for contigs_fpath in contigs_fpaths:
            if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK:
                aligned_contigs_fpaths.append(contigs_fpath)
                aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath])

    # Before continue evaluating, check if nucmer didn't skip all of the contigs files.
    detailed_contigs_reports_dirpath = None
    features_containers = None
    if len(aligned_contigs_fpaths) and ref_fpath:
        detailed_contigs_reports_dirpath = os.path.join(output_dirpath, 'contigs_reports')

        ########################################################################
        ### NAx and NGAx ("aligned Nx and NGx")
        ########################################################################
        from quast_libs import aligned_stats
        aligned_stats.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath, qconfig.json_output_dirpath,
            aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats'))

        ########################################################################
        ### GENOME_ANALYZER
        ########################################################################
        from quast_libs import genome_analyzer
        features_containers = genome_analyzer.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath, qconfig.json_output_dirpath,
            qconfig.genes, qconfig.operons, detailed_contigs_reports_dirpath,
            os.path.join(output_dirpath, 'genome_stats'))

    genes_by_labels = None
    if qconfig.gene_finding:
        if qconfig.glimmer:
            ########################################################################
            ### Glimmer
            ########################################################################
            from quast_libs import glimmer
            genes_by_labels = glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'))
        else:
            ########################################################################
            ### GeneMark
            ########################################################################
            from quast_libs import genemark
            genes_by_labels = genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'),
                        qconfig.prokaryote, qconfig.meta)

    else:
        logger.main_info("")
        logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.")
    ########################################################################
    reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath)

    ########################################################################
    ### LARGE DRAWING TASKS
    ########################################################################
    if qconfig.draw_plots or qconfig.create_icarus_html:
        logger.print_timestamp()
        logger.main_info('Creating large visual summaries...')
        logger.main_info('This may take a while: press Ctrl-C to skip this step..')
        try:
            if detailed_contigs_reports_dirpath:
                report_for_icarus_fpath_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern)
                stdout_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern)
            else:
                report_for_icarus_fpath_pattern = None
                stdout_pattern = None
            draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
            number_of_steps = sum([int(bool(value)) for value in [draw_alignment_plots, all_pdf_file]])
            if draw_alignment_plots:
                ########################################################################
                ### VISUALIZE CONTIG ALIGNMENT
                ########################################################################
                logger.main_info('  1 of %d: Creating Icarus viewers...' % number_of_steps)
                from quast_libs import icarus
                icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
                    contigs_fpaths, report_for_icarus_fpath_pattern, output_dirpath, ref_fpath,
                    stdout_pattern=stdout_pattern, features=features_containers, cov_fpath=cov_fpath,
                    physical_cov_fpath=physical_cov_fpath, json_output_dir=qconfig.json_output_dirpath,
                    genes_by_labels=genes_by_labels)

            if all_pdf_file:
                # full report in PDF format: all tables and plots
                logger.main_info('  %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps))
                plotter.fill_all_pdf_file(all_pdf_file)
            logger.main_info('Done')
        except KeyboardInterrupt:
            logger.main_info('..step skipped!')
            os.remove(all_pdf_fpath)

    ########################################################################
    ### TOTAL REPORT
    ########################################################################
    logger.print_timestamp()
    logger.main_info('RESULTS:')
    logger.main_info('  Text versions of total report are saved to ' + reports_fpaths)
    logger.main_info('  Text versions of transposed total report are saved to ' + transposed_reports_fpaths)

    if qconfig.json_output_dirpath:
        json_saver.save_total_report(qconfig.json_output_dirpath, qconfig.min_contig, ref_fpath)

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls)
        html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath)

    if os.path.isfile(all_pdf_fpath):
        logger.main_info('  PDF version (tables and plots) is saved to ' + all_pdf_fpath)

    if icarus_html_fpath:
        logger.main_info('  Icarus (contig browser) is saved to %s' % icarus_html_fpath)

    if qconfig.draw_svg and contig_alignment_plot_fpath:
        logger.main_info('  Contig alignment plot is saved to %s' % contig_alignment_plot_fpath)

    cleanup(corrected_dirpath)
    return logger.finish_up(check_test=qconfig.test)
Exemplo n.º 6
0
Arquivo: quast.py Projeto: ablab/quast
def main(args):
    check_dirpath(
        qconfig.QUAST_HOME,
        "You are trying to run it from "
        + str(qconfig.QUAST_HOME)
        + "\n."
        + "Please, put QUAST in a different directory, then try again.\n",
        exit_code=3,
    )

    if not args:
        qconfig.usage()
        sys.exit(0)

    try:
        import imp

        imp.reload(qconfig)
    except:
        reload(qconfig)

    try:
        locale.setlocale(locale.LC_ALL, "en_US.utf8")
    except Exception:
        try:
            locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
        except Exception:
            logger.warning("Python locale settings can't be changed")
    quast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
    output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    logger.main_info()
    logger.print_params()

    ########################################################################
    from quast_libs import reporting

    reports = reporting.reports
    try:
        import imp

        imp.reload(reporting)
    except:
        reload(reporting)
    reporting.reports = reports
    reporting.assembly_fpaths = []
    from quast_libs import plotter  # Do not remove this line! It would lead to a warning in matplotlib.

    if qconfig.is_combined_ref:
        corrected_dirpath = os.path.join(output_dirpath, "..", qconfig.corrected_dirname)
    else:
        if os.path.isdir(corrected_dirpath):
            shutil.rmtree(corrected_dirpath)
        os.mkdir(corrected_dirpath)

    qconfig.set_max_threads(logger)
    # PROCESSING REFERENCE
    if ref_fpath:
        logger.main_info()
        logger.main_info("Reference:")
        ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
    else:
        ref_fpath = ""

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info("Contigs:")

    contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting)
    for contigs_fpath in contigs_fpaths:
        report = reporting.get(contigs_fpath)
        report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath))

    qconfig.assemblies_num = len(contigs_fpaths)

    reads_fpaths = []
    cov_fpath = qconfig.cov_fpath
    physical_cov_fpath = qconfig.phys_cov_fpath
    if qconfig.forward_reads:
        reads_fpaths.append(qconfig.forward_reads)
    if qconfig.reverse_reads:
        reads_fpaths.append(qconfig.reverse_reads)
    if (reads_fpaths or qconfig.sam or qconfig.bam) and ref_fpath:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(
            ref_fpath,
            contigs_fpaths,
            reads_fpaths,
            None,
            os.path.join(output_dirpath, qconfig.variation_dirname),
            external_logger=logger,
            sam_fpath=qconfig.sam,
            bam_fpath=qconfig.bam,
            bed_fpath=qconfig.bed,
        )
        qconfig.bed = bed_fpath

    if not contigs_fpaths:
        logger.error(
            "None of the assembly files contains correct contigs. "
            "Please, provide different files or decrease --min-contig threshold.",
            fake_if_nested_run=True,
        )
        return 4

    if qconfig.used_colors and qconfig.used_ls:
        for i, label in enumerate(labels):
            plotter.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i])

    qconfig.assemblies_fpaths = contigs_fpaths

    # Where all pdfs will be saved
    all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)
    all_pdf_file = None

    if qconfig.draw_plots and plotter.can_draw_plots:
        try:
            from matplotlib.backends.backend_pdf import PdfPages

            all_pdf_file = PdfPages(all_pdf_fpath)
        except:
            all_pdf_file = None

    if qconfig.json_output_dirpath:
        from quast_libs.html_saver import json_saver

        if json_saver.simplejson_error:
            json_output_dirpath = None

    ########################################################################
    ### Stats and plots
    ########################################################################
    from quast_libs import basic_stats

    basic_stats.do(
        ref_fpath,
        contigs_fpaths,
        os.path.join(output_dirpath, "basic_stats"),
        qconfig.json_output_dirpath,
        output_dirpath,
    )

    aligned_contigs_fpaths = []
    aligned_lengths_lists = []
    contig_alignment_plot_fpath = None
    icarus_html_fpath = None
    if ref_fpath:
        ########################################################################
        ### former PLANTAKOLYA, PLANTAGORA
        ########################################################################
        from quast_libs import contigs_analyzer

        is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref
        nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
            ref_fpath,
            contigs_fpaths,
            is_cyclic,
            os.path.join(output_dirpath, "contigs_reports"),
            old_contigs_fpaths,
            qconfig.bed,
        )
        for contigs_fpath in contigs_fpaths:
            if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK:
                aligned_contigs_fpaths.append(contigs_fpath)
                aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath])

    # Before continue evaluating, check if nucmer didn't skip all of the contigs files.
    detailed_contigs_reports_dirpath = None
    features_containers = None
    if len(aligned_contigs_fpaths) and ref_fpath:
        detailed_contigs_reports_dirpath = os.path.join(output_dirpath, "contigs_reports")

        ########################################################################
        ### NAx and NGAx ("aligned Nx and NGx")
        ########################################################################
        from quast_libs import aligned_stats

        aligned_stats.do(
            ref_fpath,
            aligned_contigs_fpaths,
            output_dirpath,
            qconfig.json_output_dirpath,
            aligned_lengths_lists,
            os.path.join(output_dirpath, "aligned_stats"),
        )

        ########################################################################
        ### GENOME_ANALYZER
        ########################################################################
        from quast_libs import genome_analyzer

        features_containers = genome_analyzer.do(
            ref_fpath,
            aligned_contigs_fpaths,
            output_dirpath,
            qconfig.json_output_dirpath,
            qconfig.genes,
            qconfig.operons,
            detailed_contigs_reports_dirpath,
            os.path.join(output_dirpath, "genome_stats"),
        )

    if qconfig.with_gage:
        ########################################################################
        ### GAGE
        ########################################################################
        if not ref_fpath:
            logger.warning("GAGE can't be run without a reference and will be skipped.")
        else:
            from quast_libs import gage

            gage.do(ref_fpath, contigs_fpaths, output_dirpath)

    genes_by_labels = None
    if qconfig.gene_finding:
        if qconfig.glimmer:
            ########################################################################
            ### Glimmer
            ########################################################################
            from quast_libs import glimmer

            genes_by_labels = glimmer.do(
                contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, "predicted_genes")
            )
        else:
            ########################################################################
            ### GeneMark
            ########################################################################
            from quast_libs import genemark

            genes_by_labels = genemark.do(
                contigs_fpaths,
                qconfig.genes_lengths,
                os.path.join(output_dirpath, "predicted_genes"),
                qconfig.prokaryote,
                qconfig.meta,
            )

    else:
        logger.main_info("")
        logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.")
    ########################################################################
    reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath)

    ########################################################################
    ### LARGE DRAWING TASKS
    ########################################################################
    if qconfig.draw_plots or qconfig.create_icarus_html:
        logger.print_timestamp()
        logger.main_info("Creating large visual summaries...")
        logger.main_info("This may take a while: press Ctrl-C to skip this step..")
        try:
            if detailed_contigs_reports_dirpath:
                report_for_icarus_fpath_pattern = os.path.join(
                    detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern
                )
                stdout_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern)
            else:
                report_for_icarus_fpath_pattern = None
                stdout_pattern = None
            draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
            number_of_steps = sum([int(bool(value)) for value in [draw_alignment_plots, all_pdf_file]])
            if draw_alignment_plots:
                ########################################################################
                ### VISUALIZE CONTIG ALIGNMENT
                ########################################################################
                logger.main_info("  1 of %d: Creating Icarus viewers..." % number_of_steps)
                from quast_libs import icarus

                icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
                    contigs_fpaths,
                    report_for_icarus_fpath_pattern,
                    output_dirpath,
                    ref_fpath,
                    stdout_pattern=stdout_pattern,
                    features=features_containers,
                    cov_fpath=cov_fpath,
                    physical_cov_fpath=physical_cov_fpath,
                    json_output_dir=qconfig.json_output_dirpath,
                    genes_by_labels=genes_by_labels,
                )

            if all_pdf_file:
                # full report in PDF format: all tables and plots
                logger.main_info(
                    "  %d of %d: Creating PDF with all tables and plots..." % (number_of_steps, number_of_steps)
                )
                plotter.fill_all_pdf_file(all_pdf_file)
            logger.main_info("Done")
        except KeyboardInterrupt:
            logger.main_info("..step skipped!")
            os.remove(all_pdf_fpath)

    ########################################################################
    ### TOTAL REPORT
    ########################################################################
    logger.print_timestamp()
    logger.main_info("RESULTS:")
    logger.main_info("  Text versions of total report are saved to " + reports_fpaths)
    logger.main_info("  Text versions of transposed total report are saved to " + transposed_reports_fpaths)

    if qconfig.json_output_dirpath:
        json_saver.save_total_report(qconfig.json_output_dirpath, qconfig.min_contig, ref_fpath)

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver

        html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls)
        html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath)

    if os.path.isfile(all_pdf_fpath):
        logger.main_info("  PDF version (tables and plots) is saved to " + all_pdf_fpath)

    if icarus_html_fpath:
        logger.main_info("  Icarus (contig browser) is saved to %s" % icarus_html_fpath)

    if qconfig.draw_svg and contig_alignment_plot_fpath:
        logger.main_info("  Contig alignment plot is saved to %s" % contig_alignment_plot_fpath)

    cleanup(corrected_dirpath)
    return logger.finish_up(check_test=qconfig.test)