示例#1
0
def correct_meta_references(ref_fpaths, corrected_dirpath, downloaded_refs=False):
    corrected_ref_fpaths = []

    combined_ref_fpath = os.path.join(corrected_dirpath, qconfig.combined_ref_name)

    chromosomes_by_refs = {}

    def _proceed_seq(seq_name, seq, ref_name, ref_fasta_ext, total_references, ref_fpath):
        seq_fname = ref_name
        seq_fname += ref_fasta_ext

        if total_references > 1:
            corr_seq_fpath = corrected_ref_fpaths[-1]
        else:
            corr_seq_fpath = qutils.unique_corrected_fpath(os.path.join(corrected_dirpath, seq_fname))
            corrected_ref_fpaths.append(corr_seq_fpath)
        corr_seq_name = qutils.name_from_fpath(corr_seq_fpath) + '_' + seq_name
        if not qconfig.no_check:
            corr_seq = correct_seq(seq, ref_fpath)
            if not corr_seq:
                return None, None

        fastaparser.write_fasta(corr_seq_fpath, [(corr_seq_name, seq)], 'a')

        contigs_analyzer.ref_labels_by_chromosomes[corr_seq_name] = qutils.name_from_fpath(corr_seq_fpath)
        chromosomes_by_refs[ref_name].append((corr_seq_name, len(seq)))

        return corr_seq_name, corr_seq_fpath

    ref_fnames = [os.path.basename(ref_fpath) for ref_fpath in ref_fpaths]
    ref_names = []
    for ref_fname in ref_fnames:
        ref_name, ref_fasta_ext = qutils.splitext_for_fasta_file(ref_fname)
        ref_names.append(ref_name)

    excluded_ref_fpaths = []
    ref_names = qutils.process_labels(ref_fpaths)
    for ref_fpath, ref_name in zip(ref_fpaths, ref_names):
        total_references = 0
        ref_fname = os.path.basename(ref_fpath)
        _, ref_fasta_ext = qutils.splitext_for_fasta_file(ref_fname)

        chromosomes_by_refs[ref_name] = []
        used_seq_names = defaultdict(int)

        corr_seq_fpath = None
        for i, (seq_name, seq) in enumerate(fastaparser.read_fasta(ref_fpath)):
            total_references += 1
            seq_name = correct_name(seq_name, qutils.MAX_CONTIG_NAME - len(ref_name) - 1)
            uniq_seq_name = get_uniq_name(seq_name, used_seq_names)
            used_seq_names[seq_name] += 1
            corr_seq_name, corr_seq_fpath = _proceed_seq(uniq_seq_name, seq, ref_name, ref_fasta_ext, total_references, ref_fpath)
            if not corr_seq_name:
                break
        if corr_seq_fpath:
            logger.main_info('  ' + ref_fpath + ' ==> ' + qutils.name_from_fpath(corr_seq_fpath) + '')
            fastaparser.write_fasta(combined_ref_fpath, fastaparser.read_fasta(corr_seq_fpath), 'a')
        elif downloaded_refs:
            logger.warning('Skipping ' + ref_fpath + ' because it'
                           ' is empty or contains incorrect sequences (header-only or with non-ACGTN characters)!')
            # cleaning
            for corr_seq_name, _ in chromosomes_by_refs[ref_name]:
                del contigs_analyzer.ref_labels_by_chromosomes[corr_seq_name]
            del chromosomes_by_refs[ref_name]
            corrected_ref_fpaths.pop()
            excluded_ref_fpaths.append(ref_fpath)
        else:
            logger.error('Reference file ' + ref_fpath +
                         ' is empty or contains incorrect sequences (header-only or with non-ACGTN characters)!',
                         exit_with_code=1)
    for excluded in excluded_ref_fpaths:
        ref_fpaths.remove(excluded)

    if len(chromosomes_by_refs) > 0:
        logger.main_info('  All references were combined in ' + qconfig.combined_ref_name)
    else:
        logger.warning('All references were skipped!')

    return corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_fpaths
示例#2
0
def main(args):
    check_dirpath(
        qconfig.QUAST_HOME,
        'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' +
        'Please, put QUAST in a different directory, then try again.\n',
        exit_code=3)

    if not args:
        qconfig.usage(stream=sys.stderr)
        sys.exit(1)

    try:
        import imp
        imp.reload(qconfig)
        imp.reload(qutils)
    except:
        reload(qconfig)
        reload(qutils)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US.utf8')
    except Exception:
        try:
            locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
        except Exception:
            logger.warning('Python locale settings can\'t be changed')
    quast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
    output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    logger.main_info()
    logger.print_params()

    ########################################################################
    from quast_libs import reporting
    reports = reporting.reports
    try:
        import imp
        imp.reload(reporting)
    except:
        reload(reporting)
    reporting.reports = reports
    reporting.assembly_fpaths = []
    from quast_libs import plotter  # Do not remove this line! It would lead to a warning in matplotlib.

    if qconfig.is_combined_ref:
        corrected_dirpath = os.path.join(output_dirpath, '..',
                                         qconfig.corrected_dirname)
    else:
        if os.path.isdir(corrected_dirpath):
            shutil.rmtree(corrected_dirpath)
        os.mkdir(corrected_dirpath)

    qconfig.set_max_threads(logger)
    check_reads_fpaths(logger)
    # PROCESSING REFERENCE
    if ref_fpath:
        logger.main_info()
        logger.main_info('Reference:')
        original_ref_fpath = ref_fpath
        ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
        if qconfig.optimal_assembly:
            if not qconfig.pacbio_reads and not qconfig.nanopore_reads and not qconfig.mate_pairs:
                logger.warning(
                    'Optimal assembly cannot be created. It requires mate-pairs or long reads (Pacbio SMRT or Oxford Nanopore).'
                )
            else:
                optimal_assembly_fpath = optimal_assembly.do(
                    ref_fpath, original_ref_fpath,
                    os.path.join(output_dirpath,
                                 qconfig.optimal_assembly_basename))
                if optimal_assembly_fpath is not None:
                    contigs_fpaths.insert(0, optimal_assembly_fpath)
                    labels.insert(0, 'Optimal')
                    labels = qutils.process_labels(contigs_fpaths, labels)
    else:
        ref_fpath = ''

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')

    contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(
        contigs_fpaths, corrected_dirpath, labels, reporting)
    for contigs_fpath in contigs_fpaths:
        report = reporting.get(contigs_fpath)
        report.add_field(reporting.Fields.NAME,
                         qutils.label_from_fpath(contigs_fpath))

    qconfig.assemblies_num = len(contigs_fpaths)

    cov_fpath = qconfig.cov_fpath
    physical_cov_fpath = qconfig.phys_cov_fpath
    if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(
            ref_fpath,
            contigs_fpaths,
            os.path.join(output_dirpath, qconfig.reads_stats_dirname),
            external_logger=logger)
        qconfig.bed = bed_fpath

    if not contigs_fpaths:
        logger.error(
            "None of the assembly files contains correct contigs. "
            "Please, provide different files or decrease --min-contig threshold.",
            fake_if_nested_run=True)
        return 4

    if qconfig.used_colors and qconfig.used_ls:
        for i, label in enumerate(labels):
            plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i],
                                                     qconfig.used_ls[i])

    qconfig.assemblies_fpaths = contigs_fpaths

    # Where all pdfs will be saved
    all_pdf_fpath = None
    if qconfig.draw_plots and plotter.can_draw_plots:
        all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)

    if qconfig.json_output_dirpath:
        from quast_libs.html_saver import json_saver
        if json_saver.simplejson_error:
            qconfig.json_output_dirpath = None

    ########################################################################
    ### Stats and plots
    ########################################################################
    from quast_libs import basic_stats
    icarus_gc_fpath, circos_gc_fpath = basic_stats.do(
        ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'),
        output_dirpath)

    if qconfig.large_genome and ref_fpath:
        unique_kmers.do(os.path.join(output_dirpath, 'basic_stats'), ref_fpath,
                        contigs_fpaths, logger)

    aligned_contigs_fpaths = []
    aligned_lengths_lists = []
    contig_alignment_plot_fpath = None
    icarus_html_fpath = None
    circos_png_fpath = None
    if ref_fpath:
        ########################################################################
        ### former PLANTAKOLYA, PLANTAGORA
        ########################################################################
        from quast_libs import contigs_analyzer
        is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref
        aligner_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
            ref_fpath, contigs_fpaths, is_cyclic,
            os.path.join(output_dirpath, 'contigs_reports'),
            old_contigs_fpaths, qconfig.bed)
        for contigs_fpath in contigs_fpaths:
            if aligner_statuses[
                    contigs_fpath] == contigs_analyzer.AlignerStatus.OK:
                aligned_contigs_fpaths.append(contigs_fpath)
                aligned_lengths_lists.append(
                    aligned_lengths_per_fpath[contigs_fpath])

    # Before continue evaluating, check if aligner didn't skip all of the contigs files.
    detailed_contigs_reports_dirpath = None
    features_containers = None
    if len(aligned_contigs_fpaths) and ref_fpath:
        detailed_contigs_reports_dirpath = os.path.join(
            output_dirpath, 'contigs_reports')

        ########################################################################
        ### NAx and NGAx ("aligned Nx and NGx")
        ########################################################################
        from quast_libs import aligned_stats
        aligned_stats.do(ref_fpath, aligned_contigs_fpaths, output_dirpath,
                         aligned_lengths_lists,
                         os.path.join(output_dirpath, 'aligned_stats'))

        ########################################################################
        ### GENOME_ANALYZER
        ########################################################################
        from quast_libs import genome_analyzer
        features_containers = genome_analyzer.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath,
            qconfig.features, qconfig.operons,
            detailed_contigs_reports_dirpath,
            os.path.join(output_dirpath, 'genome_stats'))

    genes_by_labels = None
    if qconfig.gene_finding:
        if qconfig.glimmer:
            ########################################################################
            ### Glimmer
            ########################################################################
            from quast_libs import glimmer
            genes_by_labels = glimmer.do(
                contigs_fpaths, qconfig.genes_lengths,
                os.path.join(output_dirpath, 'predicted_genes'))
        if not qconfig.glimmer or qconfig.test:
            ########################################################################
            ### GeneMark
            ########################################################################
            from quast_libs import genemark
            genes_by_labels = genemark.do(
                contigs_fpaths, qconfig.genes_lengths,
                os.path.join(output_dirpath, 'predicted_genes'),
                qconfig.prokaryote, qconfig.metagenemark)
    else:
        logger.main_info("")
        logger.notice(
            "Genes are not predicted by default. Use --gene-finding option to enable it."
        )

    if qconfig.rna_gene_finding:
        run_barrnap.do(contigs_fpaths,
                       os.path.join(output_dirpath, 'predicted_genes'), logger)

    if qconfig.run_busco and not qconfig.is_combined_ref:
        if qconfig.platform_name == 'macosx':
            logger.main_info("")
            logger.warning("BUSCO can be run on Linux only")
        elif sys.version[0:3] == '2.5':
            logger.main_info("")
            logger.warning(
                "BUSCO does not support Python versions older than 2.6.")
        else:
            from quast_libs import run_busco
            run_busco.do(contigs_fpaths,
                         os.path.join(output_dirpath, qconfig.busco_dirname),
                         logger)
    ########################################################################
    reports_fpaths, transposed_reports_fpaths = reporting.save_total(
        output_dirpath)

    ########################################################################
    ### LARGE DRAWING TASKS
    ########################################################################
    if qconfig.draw_plots or qconfig.create_icarus_html:
        logger.print_timestamp()
        logger.main_info('Creating large visual summaries...')
        logger.main_info(
            'This may take a while: press Ctrl-C to skip this step..')
        try:
            if detailed_contigs_reports_dirpath:
                report_for_icarus_fpath_pattern = os.path.join(
                    detailed_contigs_reports_dirpath,
                    qconfig.icarus_report_fname_pattern)
                stdout_pattern = os.path.join(
                    detailed_contigs_reports_dirpath,
                    qconfig.contig_report_fname_pattern)
            else:
                report_for_icarus_fpath_pattern = None
                stdout_pattern = None
            draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
            draw_circos_plot = qconfig.draw_plots and ref_fpath and len(
                aligned_contigs_fpaths) and not qconfig.space_efficient
            number_of_steps = sum([
                int(bool(value)) for value in
                [draw_alignment_plots, draw_circos_plot, all_pdf_fpath]
            ])
            if draw_alignment_plots:
                ########################################################################
                ### VISUALIZE CONTIG ALIGNMENT
                ########################################################################
                logger.main_info('  1 of %d: Creating Icarus viewers...' %
                                 number_of_steps)
                from quast_libs import icarus
                icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
                    contigs_fpaths,
                    report_for_icarus_fpath_pattern,
                    output_dirpath,
                    ref_fpath,
                    stdout_pattern=stdout_pattern,
                    features=features_containers,
                    cov_fpath=cov_fpath,
                    physical_cov_fpath=physical_cov_fpath,
                    gc_fpath=icarus_gc_fpath,
                    json_output_dir=qconfig.json_output_dirpath,
                    genes_by_labels=genes_by_labels)

            if draw_circos_plot:
                logger.main_info(
                    '  %d of %d: Creating Circos plots...' %
                    (2 if draw_alignment_plots else 1, number_of_steps))
                from quast_libs import circos
                circos_png_fpath, circos_legend_fpath = circos.do(
                    ref_fpath, contigs_fpaths, report_for_icarus_fpath_pattern,
                    circos_gc_fpath, features_containers, cov_fpath,
                    os.path.join(output_dirpath, 'circos'), logger)

            if all_pdf_fpath:
                # full report in PDF format: all tables and plots
                logger.main_info(
                    '  %d of %d: Creating PDF with all tables and plots...' %
                    (number_of_steps, number_of_steps))
                plotter.fill_all_pdf_file(all_pdf_fpath)
            logger.main_info('Done')
        except KeyboardInterrupt:
            logger.main_info('..step skipped!')
            if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
                os.remove(all_pdf_fpath)

    ########################################################################
    ### TOTAL REPORT
    ########################################################################
    logger.print_timestamp()
    logger.main_info('RESULTS:')
    logger.main_info('  Text versions of total report are saved to ' +
                     reports_fpaths)
    logger.main_info(
        '  Text versions of transposed total report are saved to ' +
        transposed_reports_fpaths)

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_colors(output_dirpath, contigs_fpaths,
                               plotter_data.dict_color_and_ls)
        html_saver.save_total_report(output_dirpath, qconfig.min_contig,
                                     ref_fpath)

    if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
        logger.main_info('  PDF version (tables and plots) is saved to ' +
                         all_pdf_fpath)

    if circos_png_fpath:
        logger.main_info(
            '  Circos plot is saved to %s (the annotation is in %s). Circos configuration file is saved to %s'
            % (circos_png_fpath, circos_legend_fpath,
               circos_png_fpath.replace('.png', '.conf')))

    if icarus_html_fpath:
        logger.main_info('  Icarus (contig browser) is saved to %s' %
                         icarus_html_fpath)

    if qconfig.draw_svg and contig_alignment_plot_fpath:
        logger.main_info('  Contig alignment plot is saved to %s' %
                         contig_alignment_plot_fpath)

    cleanup(corrected_dirpath)
    return logger.finish_up(check_test=qconfig.test)
示例#3
0
def parse_options(logger, quast_args):
    mode = get_mode(quast_args[0])
    is_metaquast = True if mode == 'meta' else False
    qconfig.large_genome = True if mode == 'large' else False

    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, mode=mode, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(mode)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--no-portable-html'], dict(
             dest='portable_html',
             action='store_false')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-r', '-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='string',
             action='callback',
             callback_args=(logger, True),
             callback=parse_features)
         ),
        (['-g', '--features'], dict(
             dest='features',
             type='string',
             action='callback',
             callback_args=(logger,),
             callback=parse_features)
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--pe1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['--pe2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--mp1'], dict(
             dest='mp_forward_reads',
             type='file',
             action='extend')
         ),
        (['--mp2'], dict(
             dest='mp_reverse_reads',
             type='file',
             action='extend')
         ),
        (['--12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--pe12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--mp12'], dict(
             dest='mp_interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--single'], dict(
             dest='unpaired_reads',
             type='file',
             action='extend')
         ),
        (['--pacbio'], dict(
             dest='pacbio_reads',
             type='file',
             action='extend')
         ),
        (['--nanopore'], dict(
             dest='nanopore_reads',
             type='file',
             action='extend')
         ),
        (['--ref-sam'], dict(
            dest='reference_sam',
            type='file')
         ),
        (['--ref-bam'], dict(
            dest='reference_bam',
            type='file')
         ),
        (['--sam'], dict(
            dest='sam_fpaths',
            type='string',
            action='callback',
            callback_args=('.sam', logger),
            callback=parse_files_list)
         ),
        (['--bam'], dict(
            dest='bam_fpaths',
            type='string',
            action='callback',
            callback_args=('.bam', logger),
            callback=parse_files_list)
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['--cov'], dict(
             dest='cov_fpath',
             type='file')
         ),
        (['--phys-cov'], dict(
             dest='phys_cov_fpath',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--mgm'], dict(
             dest='metagenemark',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']},
             default=False)
         ),
        (['-s', '--split-scaffolds'], dict(
             dest='split_scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['--fungus'], dict(
             dest='is_fungus',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['is_fungus'],
                              'store_false_values': ['prokaryote']})
         ),
        (['--large'], dict(
             dest='large_genome',
             action='store_true')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--rna-finding'], dict(
             dest='rna_gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['--fragmented-max-indent'], dict(
             dest='fragmented_max_indent',
             type='int',
             default=qconfig.MAX_INDEL_LENGTH,
             action='callback',
             callback=set_fragmented_max_indent,
             callback_args=(logger,))
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--unaligned-part-size'], dict(
             dest='unaligned_part_size',
             type=int)
         ),
        (['--skip-unaligned-mis-contigs'], dict(
            dest='unaligned_mis_threshold',
            action="store_const",
            const=0.0)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['--scaffold-gap-max-size'], dict(
             dest='scaffolds_gap_threshold',
             type=int)
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='store_true',
             default=False)
         ),
        (['-b', '--conserved-genes-finding'], dict(
             dest='run_busco',
             action='store_true',
             default=False)
         ),
        (['-k', '--k-mer-stats'], dict(
             dest='use_kmc',
             action='store_true',
             default=False)
         ),
        (['--k-mer-size'], dict(
             dest='unique_kmer_len',
             type='int')
         ),
        (['--upper-bound-assembly'], dict(
             dest='optimal_assembly',
             action='store_true')
         ),
        (['--upper-bound-min-con'], dict(
             dest='upperbound_min_connections',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 1})
         ),
        (['--est-insert-size'], dict(
             dest='optimal_assembly_insert_size',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': qconfig.optimal_assembly_min_IS,
                              'max_value': qconfig.optimal_assembly_max_IS})
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--use-input-ref-order'], dict(
             dest='use_input_ref_order',
             action='store_true')
         ),
        (['--circos'], dict(
             dest='draw_circos',
             action='store_true')
         ),
        (['--no-read-stats'], dict(
             dest='no_read_stats',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip', 'no_read_stats'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html', 'analyze_gaps']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--space-efficient'], dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['space_efficient'],
                              'store_false_values': ['show_snps', 'create_icarus_html']},)
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             ),
            (['--blast-db'], dict(
                 dest='custom_blast_db_fpath')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg)

    if qconfig.glimmer and qconfig.gene_finding:
        logger.error("You cannot use --glimmer and " + ("--mgm" if qconfig.metagenemark else "--gene-finding") + \
                     " simultaneously!", exit_with_code=3)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
                      'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.features = test_features
                qconfig.operons = test_operons
                qconfig.glimmer = True
                if not qconfig.large_genome:  # special case -- large mode imposes eukaryote gene finding (GeneMark-ES) and our test data is too small for it.
                    qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n\n'
                'If you still want to run tests, please download and unpack test data to CWD:\n'
                '  wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n')
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n", to_stderr=True)
        qconfig.usage(stream=sys.stderr)
        sys.exit(2)

    if qconfig.large_genome:
        set_large_genome_parameters()

    if qconfig.extensive_misassembly_threshold is None:
        qconfig.extensive_misassembly_threshold = \
            qconfig.LARGE_EXTENSIVE_MIS_THRESHOLD if qconfig.large_genome else qconfig.DEFAULT_EXT_MIS_SIZE
    if qconfig.min_contig is None:
        qconfig.min_contig = qconfig.LARGE_MIN_CONTIG if qconfig.large_genome else qconfig.DEFAULT_MIN_CONTIG
    if qconfig.min_alignment is None:
        qconfig.min_alignment = qconfig.LARGE_MIN_ALIGNMENT if qconfig.large_genome else qconfig.DEFAULT_MIN_ALIGNMENT

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' +
                      'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_quast_dir = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_quast_dir:
        logger.notice("Output directory already exists and looks like a QUAST output dir. "
                      "Existing results can be reused (e.g. previously generated alignments)!")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    if qconfig.sam_fpaths or qconfig.bam_fpaths:
        check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger)

    return quast_py_args, contigs_fpaths
示例#4
0
文件: quast.py 项目: student-t/quast
def main(args):
    check_dirpath(qconfig.QUAST_HOME, 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n.' +
                  'Please, put QUAST in a different directory, then try again.\n', exit_code=3)

    if not args:
        qconfig.usage(stream=sys.stderr)
        sys.exit(1)

    try:
        import imp
        imp.reload(qconfig)
        imp.reload(qutils)
    except:
        reload(qconfig)
        reload(qutils)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US.utf8')
    except Exception:
        try:
            locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
        except Exception:
            logger.warning('Python locale settings can\'t be changed')
    quast_path = [os.path.realpath(__file__)]
    quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
    output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    logger.main_info()
    logger.print_params()

    ########################################################################
    from quast_libs import reporting
    reports = reporting.reports
    try:
        import imp
        imp.reload(reporting)
    except:
        reload(reporting)
    reporting.reports = reports
    reporting.assembly_fpaths = []
    from quast_libs import plotter  # Do not remove this line! It would lead to a warning in matplotlib.

    if qconfig.is_combined_ref:
        corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname)
    else:
        if os.path.isdir(corrected_dirpath):
            shutil.rmtree(corrected_dirpath)
        os.mkdir(corrected_dirpath)

    qconfig.set_max_threads(logger)
    check_reads_fpaths(logger)
    # PROCESSING REFERENCE
    if ref_fpath:
        logger.main_info()
        logger.main_info('Reference:')
        original_ref_fpath = ref_fpath
        ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
        if qconfig.ideal_assembly:
            ideal_assembly_fpath = ideal_assembly.do(ref_fpath, original_ref_fpath,
                                                     os.path.join(output_dirpath, qconfig.ideal_assembly_basename))
            if ideal_assembly_fpath is not None:
                contigs_fpaths.insert(0, ideal_assembly_fpath)
                labels.insert(0, 'IDEAL ASSEMBLY')
                labels = qutils.process_labels(contigs_fpaths, labels)
    else:
        ref_fpath = ''

    # PROCESSING CONTIGS
    logger.main_info()
    logger.main_info('Contigs:')

    contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting)
    for contigs_fpath in contigs_fpaths:
        report = reporting.get(contigs_fpath)
        report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath))

    qconfig.assemblies_num = len(contigs_fpaths)

    cov_fpath = qconfig.cov_fpath
    physical_cov_fpath = qconfig.phys_cov_fpath
    if qconfig.reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths:
        bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths,
                                                                     os.path.join(output_dirpath, qconfig.reads_stats_dirname),
                                                                     external_logger=logger)
        qconfig.bed = bed_fpath

    if not contigs_fpaths:
        logger.error("None of the assembly files contains correct contigs. "
              "Please, provide different files or decrease --min-contig threshold.",
              fake_if_nested_run=True)
        return 4

    if qconfig.used_colors and qconfig.used_ls:
        for i, label in enumerate(labels):
            plotter_data.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i])

    qconfig.assemblies_fpaths = contigs_fpaths

    # Where all pdfs will be saved
    all_pdf_fpath = None
    if qconfig.draw_plots and plotter.can_draw_plots:
        all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)

    if qconfig.json_output_dirpath:
        from quast_libs.html_saver import json_saver
        if json_saver.simplejson_error:
            qconfig.json_output_dirpath = None

    ########################################################################
    ### Stats and plots
    ########################################################################
    from quast_libs import basic_stats
    icarus_gc_fpath, circos_gc_fpath = basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'), output_dirpath)

    if qconfig.large_genome and ref_fpath:
        unique_kmers.do(os.path.join(output_dirpath, 'basic_stats'), ref_fpath, contigs_fpaths, logger)

    aligned_contigs_fpaths = []
    aligned_lengths_lists = []
    contig_alignment_plot_fpath = None
    icarus_html_fpath = None
    circos_png_fpath = None
    if ref_fpath:
        ########################################################################
        ### former PLANTAKOLYA, PLANTAGORA
        ########################################################################
        from quast_libs import contigs_analyzer
        is_cyclic = qconfig.prokaryote and not qconfig.check_for_fragmented_ref
        nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
            ref_fpath, contigs_fpaths, is_cyclic, os.path.join(output_dirpath, 'contigs_reports'),
            old_contigs_fpaths, qconfig.bed)
        for contigs_fpath in contigs_fpaths:
            if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK:
                aligned_contigs_fpaths.append(contigs_fpath)
                aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath])

    # Before continue evaluating, check if nucmer didn't skip all of the contigs files.
    detailed_contigs_reports_dirpath = None
    features_containers = None
    if len(aligned_contigs_fpaths) and ref_fpath:
        detailed_contigs_reports_dirpath = os.path.join(output_dirpath, 'contigs_reports')

        ########################################################################
        ### NAx and NGAx ("aligned Nx and NGx")
        ########################################################################
        from quast_libs import aligned_stats
        aligned_stats.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath,
            aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats'))

        ########################################################################
        ### GENOME_ANALYZER
        ########################################################################
        from quast_libs import genome_analyzer
        features_containers = genome_analyzer.do(
            ref_fpath, aligned_contigs_fpaths, output_dirpath,
            qconfig.genes, qconfig.operons, detailed_contigs_reports_dirpath,
            os.path.join(output_dirpath, 'genome_stats'))

    genes_by_labels = None
    if qconfig.gene_finding:
        if qconfig.glimmer:
            ########################################################################
            ### Glimmer
            ########################################################################
            from quast_libs import glimmer
            genes_by_labels = glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'))
        if not qconfig.glimmer or qconfig.test:
            ########################################################################
            ### GeneMark
            ########################################################################
            from quast_libs import genemark
            genes_by_labels = genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'),
                        qconfig.prokaryote, qconfig.metagenemark)
    else:
        logger.main_info("")
        logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.")

    if qconfig.rna_gene_finding:
        run_barrnap.do(contigs_fpaths, os.path.join(output_dirpath, 'predicted_genes'), logger)

    if qconfig.run_busco and not qconfig.is_combined_ref:
        if qconfig.platform_name == 'macosx':
            logger.main_info("")
            logger.warning("BUSCO can be run on Linux only")
        elif sys.version[0:3] == '2.5':
            logger.main_info("")
            logger.warning("BUSCO does not support Python versions older than 2.6.")
        else:
            from quast_libs import run_busco
            run_busco.do(contigs_fpaths, os.path.join(output_dirpath, qconfig.busco_dirname), logger)
    ########################################################################
    reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath)

    ########################################################################
    ### LARGE DRAWING TASKS
    ########################################################################
    if qconfig.draw_plots or qconfig.create_icarus_html:
        logger.print_timestamp()
        logger.main_info('Creating large visual summaries...')
        logger.main_info('This may take a while: press Ctrl-C to skip this step..')
        try:
            if detailed_contigs_reports_dirpath:
                report_for_icarus_fpath_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern)
                stdout_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern)
            else:
                report_for_icarus_fpath_pattern = None
                stdout_pattern = None
            draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
            draw_circos_plot = qconfig.draw_plots and ref_fpath and len(aligned_contigs_fpaths) and not qconfig.space_efficient
            number_of_steps = sum([int(bool(value)) for value in [draw_alignment_plots, draw_circos_plot, all_pdf_fpath]])
            if draw_alignment_plots:
                ########################################################################
                ### VISUALIZE CONTIG ALIGNMENT
                ########################################################################
                logger.main_info('  1 of %d: Creating Icarus viewers...' % number_of_steps)
                from quast_libs import icarus
                icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
                    contigs_fpaths, report_for_icarus_fpath_pattern, output_dirpath, ref_fpath,
                    stdout_pattern=stdout_pattern, features=features_containers,
                    cov_fpath=cov_fpath, physical_cov_fpath=physical_cov_fpath, gc_fpath=icarus_gc_fpath,
                    json_output_dir=qconfig.json_output_dirpath, genes_by_labels=genes_by_labels)

            if draw_circos_plot:
                logger.main_info('  %d of %d: Creating Circos plots...' % (2 if draw_alignment_plots else 1, number_of_steps))
                from quast_libs import circos
                circos_png_fpath, circos_legend_fpath = circos.do(ref_fpath, contigs_fpaths, report_for_icarus_fpath_pattern, circos_gc_fpath,
                                                                  features_containers, cov_fpath, os.path.join(output_dirpath, 'circos'), logger)

            if all_pdf_fpath:
                # full report in PDF format: all tables and plots
                logger.main_info('  %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps))
                plotter.fill_all_pdf_file(all_pdf_fpath)
            logger.main_info('Done')
        except KeyboardInterrupt:
            logger.main_info('..step skipped!')
            if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
                os.remove(all_pdf_fpath)

    ########################################################################
    ### TOTAL REPORT
    ########################################################################
    logger.print_timestamp()
    logger.main_info('RESULTS:')
    logger.main_info('  Text versions of total report are saved to ' + reports_fpaths)
    logger.main_info('  Text versions of transposed total report are saved to ' + transposed_reports_fpaths)

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_colors(output_dirpath, contigs_fpaths, plotter_data.dict_color_and_ls)
        html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath)

    if all_pdf_fpath and os.path.isfile(all_pdf_fpath):
        logger.main_info('  PDF version (tables and plots) is saved to ' + all_pdf_fpath)

    if circos_png_fpath:
        logger.main_info('  Circos plot is saved to %s (the annotation is in %s). Circos configuration file is saved to %s' %
                         (circos_png_fpath, circos_legend_fpath, circos_png_fpath.replace('.png', '.conf')))

    if icarus_html_fpath:
        logger.main_info('  Icarus (contig browser) is saved to %s' % icarus_html_fpath)

    if qconfig.draw_svg and contig_alignment_plot_fpath:
        logger.main_info('  Contig alignment plot is saved to %s' % contig_alignment_plot_fpath)

    cleanup(corrected_dirpath)
    return logger.finish_up(check_test=qconfig.test)
示例#5
0
文件: metautils.py 项目: nwespe/quast
def correct_meta_references(ref_fpaths,
                            corrected_dirpath,
                            downloaded_refs=False):
    corrected_ref_fpaths = []

    combined_ref_fpath = os.path.join(corrected_dirpath,
                                      qconfig.combined_ref_name)

    chromosomes_by_refs = {}

    def _proceed_seq(seq_name, seq, ref_name, ref_fasta_ext, total_references,
                     ref_fpath):
        seq_fname = ref_name
        seq_fname += ref_fasta_ext

        if total_references > 1:
            corr_seq_fpath = corrected_ref_fpaths[-1]
        else:
            corr_seq_fpath = qutils.unique_corrected_fpath(
                os.path.join(corrected_dirpath, seq_fname))
            corrected_ref_fpaths.append(corr_seq_fpath)
        corr_seq_name = qutils.name_from_fpath(corr_seq_fpath) + '_' + seq_name
        if not qconfig.no_check:
            corr_seq = correct_seq(seq, ref_fpath)
            if not corr_seq:
                return None, None

        fastaparser.write_fasta(corr_seq_fpath, [(corr_seq_name, seq)], 'a')

        contigs_analyzer.ref_labels_by_chromosomes[
            corr_seq_name] = qutils.name_from_fpath(corr_seq_fpath)
        chromosomes_by_refs[ref_name].append((corr_seq_name, len(seq)))

        return corr_seq_name, corr_seq_fpath

    ref_fnames = [os.path.basename(ref_fpath) for ref_fpath in ref_fpaths]
    ref_names = []
    for ref_fname in ref_fnames:
        ref_name, ref_fasta_ext = qutils.splitext_for_fasta_file(ref_fname)
        ref_names.append(ref_name)

    excluded_ref_fpaths = []
    ref_names = qutils.process_labels(ref_fpaths)
    for ref_fpath, ref_name in zip(ref_fpaths, ref_names):
        total_references = 0
        ref_fname = os.path.basename(ref_fpath)
        _, ref_fasta_ext = qutils.splitext_for_fasta_file(ref_fname)

        chromosomes_by_refs[ref_name] = []
        used_seq_names = defaultdict(int)

        corr_seq_fpath = None
        for i, (seq_name, seq) in enumerate(fastaparser.read_fasta(ref_fpath)):
            total_references += 1
            seq_name = correct_name(seq_name,
                                    qutils.MAX_CONTIG_NAME - len(ref_name) - 1)
            uniq_seq_name = get_uniq_name(seq_name, used_seq_names)
            used_seq_names[seq_name] += 1
            corr_seq_name, corr_seq_fpath = _proceed_seq(
                uniq_seq_name, seq, ref_name, ref_fasta_ext, total_references,
                ref_fpath)
            if not corr_seq_name:
                break
        if corr_seq_fpath:
            logger.main_info('  ' + ref_fpath + ' ==> ' +
                             qutils.name_from_fpath(corr_seq_fpath) + '')
            fastaparser.write_fasta(combined_ref_fpath,
                                    fastaparser.read_fasta(corr_seq_fpath),
                                    'a')
        elif downloaded_refs:
            logger.warning(
                'Skipping ' + ref_fpath + ' because it'
                ' is empty or contains incorrect sequences (header-only or with non-ACGTN characters)!'
            )
            # cleaning
            for corr_seq_name, _ in chromosomes_by_refs[ref_name]:
                del contigs_analyzer.ref_labels_by_chromosomes[corr_seq_name]
            del chromosomes_by_refs[ref_name]
            corrected_ref_fpaths.pop()
            excluded_ref_fpaths.append(ref_fpath)
        else:
            logger.error(
                'Reference file ' + ref_fpath +
                ' is empty or contains incorrect sequences (header-only or with non-ACGTN characters)!',
                exit_with_code=1)
    for excluded in excluded_ref_fpaths:
        ref_fpaths.remove(excluded)

    if len(chromosomes_by_refs) > 0:
        logger.main_info('  All references were combined in ' +
                         qconfig.combined_ref_name)
    else:
        logger.warning('All references were skipped!')

    return corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_fpaths
示例#6
0
def parse_options(logger, quast_args, is_metaquast=False):
    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args,
                      meta=is_metaquast,
                      short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(meta=is_metaquast)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(dest='debug', action='store_true')),
        (['--no-portable-html'],
         dict(dest='portable_html', action='store_false')),
        (['--test'], dict(dest='test', action='store_true')),
        (['--test-sv'], dict(dest='test_sv', action='store_true')),
        (['--test-no-ref'], dict(dest='test_no_ref', action='store_true')),
        (['-o', '--output-dir'],
         dict(dest='output_dirpath',
              type='string',
              action='callback',
              callback=check_output_dir,
              callback_args=(logger, ))),
        (['-t', '--threads'],
         dict(dest='max_threads',
              type='int',
              action='callback',
              callback=check_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'default_value': 1,
                  'min_value': 1
              })),
        (['-R', '--reference'],
         dict(dest='reference',
              type='string' if is_metaquast else 'file',
              action='callback' if is_metaquast else 'store',
              callback_args=(logger, ) if is_metaquast else None,
              callback=parse_meta_references if is_metaquast else None)),
        (['-G', '--genes'], dict(dest='genes', type='file', action='extend')),
        (['-O', '--operons'], dict(dest='operons',
                                   type='file',
                                   action='extend')),
        (['-1', '--reads1'], dict(dest='forward_reads', type='file')),
        (['-2', '--reads2'], dict(dest='reverse_reads', type='file')),
        (['--sam'], dict(dest='sam', type='file')),
        (['--bam'], dict(dest='bam', type='file')),
        (['--sv-bedpe'], dict(dest='bed', type='file')),
        (['--cov'], dict(dest='cov_fpath', type='file')),
        (['--phys-cov'], dict(dest='phys_cov_fpath', type='file')),
        (['-l', '--labels'], dict(dest='labels', type='string')),
        (['-L'], dict(dest='all_labels_from_dirs', action='store_true')),
        (['--mgm'],
         dict(dest='metagenemark',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['gene_finding', 'metagenemark']
              },
              default=False)),
        (['-s', '--scaffolds'], dict(dest='scaffolds', action='store_true')),
        (['-e', '--eukaryote'], dict(dest='prokaryote', action='store_false')),
        (['-f',
          '--gene-finding'], dict(dest='gene_finding', action='store_true')),
        (['--fragmented'],
         dict(dest='check_for_fragmented_ref', action='store_true')),
        (['--fragmented-max-indent'],
         dict(dest='fragmented_max_indent',
              type='int',
              default=qconfig.MAX_INDEL_LENGTH,
              action='callback',
              callback=set_fragmented_max_indent,
              callback_args=(logger, ))),
        (['-a', '--ambiguity-usage'],
         dict(dest='ambiguity_usage',
              type='string',
              default=qconfig.ambiguity_usage,
              action='callback',
              callback=check_str_arg_value,
              callback_args=(logger, ),
              callback_kwargs={'available_values': ['none', 'one', 'all']})),
        (['--ambiguity-score'],
         dict(dest='ambiguity_score',
              type='float',
              action='callback',
              callback=check_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'min_value': 0.8,
                  'max_value': 1.0
              })),
        (['-u', '--use-all-alignments'],
         dict(dest='use_all_alignments', action='store_true')),
        (['--strict-NA'], dict(dest='strict_NA', action='store_true')),
        (['--unaligned-part-size'], dict(dest='unaligned_part_size',
                                         type=int)),
        (['-x', '--extensive-mis-size'],
         dict(dest='extensive_misassembly_threshold',
              type='int',
              default=qconfig.extensive_misassembly_threshold,
              action='callback',
              callback=set_extensive_mis_size,
              callback_args=(logger, ))),
        (['--scaffold-gap-max-size'],
         dict(dest='scaffolds_gap_threshold', type=int)),
        (['-m', '--min-contig'], dict(dest='min_contig', type='int')),
        (['-c', '--min-cluster'], dict(dest='min_cluster', type='int')),
        (['-i', '--min-alignment'], dict(dest='min_alignment', type='int')),
        (['--min-identity'],
         dict(dest='min_IDY',
              type='float',
              default=qconfig.min_IDY,
              action='callback',
              callback=check_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'min_value': 80.0,
                  'max_value': 100.0
              })),
        (['--est-ref-size'], dict(dest='estimated_reference_size',
                                  type='int')),
        (['--contig-thresholds'], dict(dest='contig_thresholds')),
        (['--gene-thresholds'], dict(dest='genes_lengths')),
        (['--gage'], dict(dest='with_gage', action='store_true')),
        (['--glimmer'],
         dict(dest='glimmer',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['gene_finding', 'glimmer']
              },
              default=False)),
        (['--plots-format'],
         dict(dest='plot_extension',
              type='string',
              action='callback',
              callback=check_str_arg_value,
              callback_args=(logger, ),
              callback_kwargs={
                  'available_values': qconfig.supported_plot_extensions
              })),
        (['--use-input-ref-order'],
         dict(dest='use_input_ref_order', action='store_true')),
        (['--svg'], dict(dest='draw_svg', action='store_true')),
        (['--fast'],
         dict(dest='fast',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['no_gc', 'no_sv', 'no_gzip'],
                  'store_false_values': [
                      'show_snps', 'draw_plots', 'html_report',
                      'create_icarus_html'
                  ]
              },
              default=False)),
        (['--no-gzip'], dict(dest='no_gzip', action='store_true')),
        (['--no-check'], dict(dest='no_check', action='store_true')),
        (['--no-check-meta'],
         dict(dest='no_check_meta',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_true_values': ['no_check', 'no_check_meta']
              })), (['--no-snps'], dict(dest='show_snps',
                                        action='store_false')),
        (['--no-plots'], dict(dest='draw_plots', action='store_false')),
        (['--no-html'],
         dict(dest='html_report',
              action='callback',
              callback=set_multiple_variables,
              callback_kwargs={
                  'store_false_values': ['html_report', 'create_icarus_html']
              })),
        (['--no-icarus'], dict(dest='create_icarus_html',
                               action='store_false')),
        (['--no-gc'], dict(dest='no_gc', action='store_true')),
        (['--no-sv'], dict(dest='no_sv', action='store_true')),
        (['--memory-efficient'],
         dict(dest='memory_efficient', action='store_true')),
        (['--space-efficient'],
         dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={
                 'store_true_values': ['space_efficient'],
                 'store_false_values': ['create_icarus_html']
             },
         )),
        (['--force-nucmer'], dict(dest='force_nucmer', action='store_true')),
        (['--silent'], dict(dest='silent', action='store_true')),
        (['--combined-ref'], dict(dest='is_combined_ref',
                                  action='store_true')),
        (['--colors'], dict(dest='used_colors', action='extend')),
        (['--ls'], dict(dest='used_ls', action='extend')),
        (['-j', '--save-json'], dict(dest='save_json', action='store_true')),
        (['-J', '--save-json-to'], dict(dest='json_output_dirpath')),
        (['--err-fpath'], dict(dest='error_log_fpath')),
        (['--read-support'],
         dict(dest='calculate_read_support', action='store_true'))
    ]
    if is_metaquast:
        options += [(['--unique-mapping'],
                     dict(dest='unique_mapping', action='store_true')),
                    (['--max-ref-number'],
                     dict(dest='max_references',
                          type='int',
                          action='callback',
                          callback=check_arg_value,
                          callback_args=(logger, ),
                          callback_kwargs={
                              'default_value': qconfig.max_references,
                              'min_value': 0
                          })),
                    (['--references-list'], dict(dest='references_txt')),
                    (['--blast-db'], dict(dest='custom_blast_db_fpath'))]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(
            qconfig.output_dirpath, 'You are trying to run QUAST from ' +
            str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
            'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.genes = test_genes
                qconfig.operons = test_operons
                qconfig.glimmer = True
                qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n\n'
                'If you still want to run tests, please download and unpack test data to CWD:\n'
                '  wget quast.sf.net/test_data.tar.gz && tar xzf test_data.tar.gz\n'
            )
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n",
                     to_stderr=True)
        qconfig.usage(meta=is_metaquast, stream=sys.stderr)
        sys.exit(2)

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(
            os.getcwd(),
            'An output path was not specified manually. You are trying to run QUAST from '
            + str(os.getcwd()) + '.\n' +
            'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_alignments and not is_metaquast:
        logger.notice(
            "Output directory already exists. Existing Nucmer alignments can be used"
        )
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels,
                                           qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [
            int(x) for x in qconfig.contig_thresholds.split(",")
        ]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [
            int(x) for x in qconfig.genes_lengths.split(",")
        ]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice(
                "--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified"
            )

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    return quast_py_args, contigs_fpaths
示例#7
0
def parse_options(logger, quast_args, is_metaquast=False):
    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(meta=is_metaquast)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--no-portable-html'], dict(
             dest='portable_html',
             action='store_false')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='file',
             action='extend')
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--pe1'], dict(
             dest='forward_reads',
             type='file',
             action='extend')
         ),
        (['--pe2'], dict(
             dest='reverse_reads',
             type='file',
             action='extend')
         ),
        (['--mp1'], dict(
             dest='mp_forward_reads',
             type='file',
             action='extend')
         ),
        (['--mp2'], dict(
             dest='mp_reverse_reads',
             type='file',
             action='extend')
         ),
        (['--12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--pe12'], dict(
             dest='interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--mp12'], dict(
             dest='mp_interlaced_reads',
             type='file',
             action='extend')
         ),
        (['--single'], dict(
             dest='unpaired_reads',
             type='file',
             action='extend')
         ),
        (['--ref-sam'], dict(
            dest='reference_sam',
            type='file')
         ),
        (['--ref-bam'], dict(
            dest='reference_bam',
            type='file')
         ),
        (['--sam'], dict(
            dest='sam_fpaths',
            type='string',
            action='callback',
            callback_args=('.sam', logger),
            callback=parse_files_list)
         ),
        (['--bam'], dict(
            dest='bam_fpaths',
            type='string',
            action='callback',
            callback_args=('.bam', logger),
            callback=parse_files_list)
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['--cov'], dict(
             dest='cov_fpath',
             type='file')
         ),
        (['--phys-cov'], dict(
             dest='phys_cov_fpath',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--mgm'], dict(
             dest='metagenemark',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'metagenemark']},
             default=False)
         ),
        (['-s', '--scaffolds'], dict(
             dest='scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['--fungus'], dict(
             dest='is_fungus',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['is_fungus'],
                              'store_false_values': ['prokaryote']})
         ),
        (['--large'], dict(
             dest='large_genome',
             action='store_true')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--rna-finding'], dict(
             dest='rna_gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['--fragmented-max-indent'], dict(
             dest='fragmented_max_indent',
             type='int',
             default=qconfig.MAX_INDEL_LENGTH,
             action='callback',
             callback=set_fragmented_max_indent,
             callback_args=(logger,))
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--unaligned-part-size'], dict(
             dest='unaligned_part_size',
             type=int)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['--scaffold-gap-max-size'], dict(
             dest='scaffolds_gap_threshold',
             type=int)
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-c', '--min-cluster'], dict(
             dest='min_cluster',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'glimmer']},
             default=False)
         ),
        (['-b', '--find-conserved-genes'], dict(
             dest='run_busco',
             action='store_true',
             default=False)
         ),
        (['--ideal_assembly'], dict(
             dest='ideal_assembly',
             action='store_true')
         ),
        (['--est-insert-size'], dict(
             dest='ideal_assembly_insert_size',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': qconfig.ideal_assembly_min_IS,
                              'max_value': qconfig.ideal_assembly_max_IS})
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--use-input-ref-order'], dict(
             dest='use_input_ref_order',
             action='store_true')
         ),
        (['--svg'], dict(
             dest='draw_svg',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--space-efficient'], dict(
             dest='space_efficient',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['space_efficient'],
                              'store_false_values': ['create_icarus_html']},)
         ),
        (['--force-nucmer'], dict(
             dest='force_nucmer',
             action='store_true')
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             ),
            (['--blast-db'], dict(
                 dest='custom_blast_db_fpath')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        check_dirpath(qconfig.output_dirpath, 'You are trying to run QUAST from ' + str(os.path.dirname(qconfig.output_dirpath)) + '.\n' +
                  'Please, rerun QUAST from a different directory.')
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.genes = test_genes
                qconfig.operons = test_operons
                qconfig.glimmer = True
                qconfig.gene_finding = True
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True
        
        if any(not isfile(fpath) for fpath in contigs_fpaths):
            logger.info(
                '\nYou are probably running QUAST installed via pip, which does not include test data.\n'
                'This is fine, just start using QUAST on your own data!\n'
                'If you still want to run tests, please download test_data directory from \n'
                'https://github.com/ablab/quast/ to CWD, or install QUAST from source:\n'
                'git clone https://github.com/ablab/quast && cd quast && ./setup.py install\n')
            sys.exit(2)

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n", to_stderr=True)
        qconfig.usage(meta=is_metaquast, stream=sys.stderr)
        sys.exit(2)

    if qconfig.large_genome:
        set_large_genome_parameters()

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    if not qconfig.output_dirpath:
        check_dirpath(os.getcwd(), 'An output path was not specified manually. You are trying to run QUAST from ' + str(os.getcwd()) + '.\n' +
                  'Please, specify a different directory using -o option.')
    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.set_up_console_handler(debug=qconfig.debug)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_alignments and not is_metaquast:
        logger.notice("Output directory already exists. Existing Nucmer alignments can be used")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = [int(x) for x in qconfig.contig_thresholds.split(",")]
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = [int(x) for x in qconfig.genes_lengths.split(",")]

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    if qconfig.sam_fpaths or qconfig.bam_fpaths:
        check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger)

    return quast_py_args, contigs_fpaths
示例#8
0
def parse_options(logger, quast_args, is_metaquast=False):
    if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
        qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False)
        sys.exit(0)

    if '-v' in quast_args or '--version' in quast_args:
        qconfig.print_version(meta=is_metaquast)
        sys.exit(0)

    quast_py_args = quast_args[1:]

    options = [
        (['--debug'], dict(
             dest='debug',
             action='store_true')
         ),
        (['--test'], dict(
             dest='test',
             action='store_true')
         ),
        (['--test-sv'], dict(
             dest='test_sv',
             action='store_true')
         ),
        (['--test-no-ref'], dict(
             dest='test_no_ref',
             action='store_true')
         ),
        (['-o', '--output-dir'], dict(
             dest='output_dirpath',
             type='string',
             action='callback',
             callback=check_output_dir,
             callback_args=(logger,))
         ),
        (['-t', '--threads'], dict(
             dest='max_threads',
             type='int',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'default_value': 1, 'min_value': 1})
         ),
        (['-R', '--reference'], dict(
             dest='reference',
             type='string' if is_metaquast else 'file',
             action='callback' if is_metaquast else 'store',
             callback_args=(logger,) if is_metaquast else None,
             callback=parse_meta_references if is_metaquast else None)
         ),
        (['-G', '--genes'], dict(
             dest='genes',
             type='file',
             action='extend')
         ),
        (['-O', '--operons'], dict(
             dest='operons',
             type='file',
             action='extend')
         ),
        (['-1', '--reads1'], dict(
             dest='forward_reads',
             type='file')
         ),
        (['-2', '--reads2'], dict(
             dest='reverse_reads',
             type='file')
         ),
        (['--sam'], dict(
             dest='sam',
             type='file')
         ),
        (['--bam'], dict(
             dest='bam',
             type='file')
         ),
        (['--sv-bedpe'], dict(
             dest='bed',
             type='file')
         ),
        (['-l', '--labels'], dict(
             dest='labels',
             type='string')
         ),
        (['-L'], dict(
             dest='all_labels_from_dirs',
             action='store_true')
         ),
        (['--meta'], dict(
             dest='meta',
             action='store_true')
         ),
        (['-s', '--scaffolds'], dict(
             dest='scaffolds',
             action='store_true')
         ),
        (['-e', '--eukaryote'], dict(
             dest='prokaryote',
             action='store_false')
         ),
        (['-f', '--gene-finding'], dict(
             dest='gene_finding',
             action='store_true')
         ),
        (['--fragmented'], dict(
             dest='check_for_fragmented_ref',
             action='store_true')
         ),
        (['-a', '--ambiguity-usage'], dict(
             dest='ambiguity_usage',
             type='string',
             default=qconfig.ambiguity_usage,
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': ['none', 'one', 'all']})
         ),
        (['--ambiguity-score'], dict(
             dest='ambiguity_score',
             type='float',
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 0.8, 'max_value': 1.0})
         ),
        (['-u', '--use-all-alignments'], dict(
             dest='use_all_alignments',
             action='store_true')
         ),
        (['--strict-NA'], dict(
             dest='strict_NA',
             action='store_true')
         ),
        (['--significant-part-size'], dict(
             dest='significant_part_size',
             type=int)
         ),
        (['-x', '--extensive-mis-size'], dict(
             dest='extensive_misassembly_threshold',
             type='int',
             default=qconfig.extensive_misassembly_threshold,
             action='callback',
             callback=set_extensive_mis_size,
             callback_args=(logger,))
         ),
        (['-m', '--min-contig'], dict(
             dest='min_contig',
             type='int')
         ),
        (['-c', '--min-cluster'], dict(
             dest='min_cluster',
             type='int')
         ),
        (['-i', '--min-alignment'], dict(
             dest='min_alignment',
             type='int')
         ),
        (['--min-identity'], dict(
             dest='min_IDY',
             type='float',
             default=qconfig.min_IDY,
             action='callback',
             callback=check_arg_value,
             callback_args=(logger,),
             callback_kwargs={'min_value': 80.0, 'max_value': 100.0})
         ),
        (['--est-ref-size'], dict(
             dest='estimated_reference_size',
             type='int')
         ),
        (['--contig-thresholds'], dict(
             dest='contig_thresholds')
         ),
        (['--gene-thresholds'], dict(
             dest='genes_lengths')
         ),
        (['--gage'], dict(
             dest='with_gage',
             action='store_true')
         ),
        (['--glimmer'], dict(
             dest='glimmer',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['gene_finding', 'glimmer']},
             default=False)
         ),
        (['--plots-format'], dict(
             dest='plot_extension',
             type='string',
             action='callback',
             callback=check_str_arg_value,
             callback_args=(logger,),
             callback_kwargs={'available_values': qconfig.supported_plot_extensions})
         ),
        (['--svg'], dict(
             dest='draw_svg',
             action='store_true')
         ),
        (['--fast'], dict(
             dest='fast',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_gc', 'no_sv', 'no_gzip'],
                              'store_false_values': ['show_snps', 'draw_plots', 'html_report', 'create_icarus_html']},
             default=False)
         ),
        (['--no-gzip'], dict(
             dest='no_gzip',
             action='store_true')
         ),
        (['--no-check'], dict(
             dest='no_check',
             action='store_true')
         ),
        (['--no-check-meta'], dict(
             dest='no_check_meta',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_true_values': ['no_check', 'no_check_meta']})
         ),
        (['--no-snps'], dict(
             dest='show_snps',
             action='store_false')
         ),
        (['--no-plots'], dict(
             dest='draw_plots',
             action='store_false')
         ),
        (['--no-html'], dict(
             dest='html_report',
             action='callback',
             callback=set_multiple_variables,
             callback_kwargs={'store_false_values': ['html_report', 'create_icarus_html']})
         ),
        (['--no-icarus'], dict(
             dest='create_icarus_html',
             action='store_false')
         ),
        (['--no-gc'], dict(
             dest='no_gc',
             action='store_true')
         ),
        (['--no-sv'], dict(
             dest='no_sv',
             action='store_true')
         ),
        (['--memory-efficient'], dict(
             dest='memory_efficient',
             action='store_true')
         ),
        (['--silent'], dict(
             dest='silent',
             action='store_true')
         ),
        (['--combined-ref'], dict(
             dest='is_combined_ref',
             action='store_true')
         ),
        (['--colors'], dict(
             dest='used_colors',
             action='extend')
         ),
        (['--ls'], dict(
             dest='used_ls',
             action='extend')
         ),
        (['-j', '--save-json'], dict(
             dest='save_json',
             action='store_true')
         ),
        (['-J', '--save-json-to'], dict(
             dest='json_output_dirpath')
         ),
        (['--err-fpath'], dict(
             dest='error_log_fpath')
         ),
        (['--read-support'], dict(
             dest='calculate_read_support',
             action='store_true')
         )
    ]
    if is_metaquast:
        options += [
            (['--unique-mapping'], dict(
                 dest='unique_mapping',
                 action='store_true')
             ),
            (['--max-ref-number'], dict(
                 dest='max_references',
                 type='int',
                 action='callback',
                 callback=check_arg_value,
                 callback_args=(logger,),
                 callback_kwargs={'default_value': qconfig.max_references, 'min_value': 0})
             ),
            (['--references-list'], dict(
                 dest='references_txt')
             )
        ]

    parser = OptionParser(option_class=QuastOption)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    (opts, contigs_fpaths) = parser.parse_args(quast_args[1:])

    if qconfig.test_sv and is_metaquast:
        msg = "Option --test-sv can be used for QUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)
    if qconfig.test_no_ref and not is_metaquast:
        msg = "Option --test-no-ref can be used for MetaQUAST only\n"
        wrong_test_option(logger, msg, is_metaquast)

    if qconfig.test or qconfig.test_no_ref or qconfig.test_sv:
        qconfig.output_dirpath = abspath(qconfig.test_output_dirname)
        if qconfig.test or qconfig.test_sv:
            qconfig.reference = meta_test_references if is_metaquast else test_reference
            if not is_metaquast:
                qconfig.genes = test_genes
                qconfig.operons = test_operons
                qconfig.with_gage = True
                qconfig.glimmer = True
                qconfig.gene_finding = True
                qconfig.prokaryote = False
        if qconfig.test_sv:
            qconfig.forward_reads = test_forward_reads
            qconfig.reverse_reads = test_reverse_reads
        contigs_fpaths += meta_test_contigs_fpaths if is_metaquast else test_contigs_fpaths
        qconfig.test = True

    if not contigs_fpaths:
        logger.error("You should specify at least one file with contigs!\n")
        qconfig.usage(meta=is_metaquast)
        sys.exit(2)

    logger.set_up_console_handler(debug=qconfig.debug)

    for c_fpath in contigs_fpaths:
        assert_file_exists(c_fpath, 'contigs')

    if qconfig.json_output_dirpath:
        qconfig.save_json = True

    qconfig.output_dirpath, qconfig.json_output_dirpath, existing_alignments = \
        set_up_output_dir(qconfig.output_dirpath, qconfig.json_output_dirpath, not qconfig.output_dirpath,
                          qconfig.save_json if not is_metaquast else None)

    logger.set_up_file_handler(qconfig.output_dirpath, qconfig.error_log_fpath)
    logger.print_command_line(quast_args, wrap_after=None, is_main=True)
    logger.start()

    if existing_alignments and not is_metaquast:
        logger.notice("Output directory already exists. Existing Nucmer alignments can be used")
        qutils.remove_reports(qconfig.output_dirpath)

    if qconfig.labels:
        qconfig.labels = qutils.parse_labels(qconfig.labels, contigs_fpaths)
    qconfig.labels = qutils.process_labels(contigs_fpaths, qconfig.labels, qconfig.all_labels_from_dirs)

    if qconfig.contig_thresholds == "None":
        qconfig.contig_thresholds = []
    else:
        qconfig.contig_thresholds = map(int, qconfig.contig_thresholds.split(","))
    if qconfig.genes_lengths == "None":
        qconfig.genes_lengths = []
    else:
        qconfig.genes_lengths = map(int, qconfig.genes_lengths.split(","))

    qconfig.set_max_threads(logger)

    if parser.values.ambiguity_score:
        if qconfig.ambiguity_usage != 'all':
            qconfig.ambiguity_usage = 'all'
            logger.notice("--ambiguity-usage was set to 'all' because not default --ambiguity-score was specified")

    if is_metaquast:
        quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

    return quast_py_args, contigs_fpaths