def main(args): if ' ' in qconfig.QUAST_HOME: logger.error( 'QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage(meta=True) sys.exit(0) metaquast_path = [os.path.realpath(__file__)] quast_py_args, contigs_fpaths = parse_options(logger, metaquast_path + args, is_metaquast=True) output_dirpath, ref_fpaths, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels html_report = qconfig.html_report test_mode = qconfig.test # Directories output_dirpath, _, _ = qutils.set_up_output_dir(output_dirpath, None, not output_dirpath, save_json=False) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) qconfig.set_max_threads(logger) qutils.logger = logger ######################################################################## from libs import reporting reload(reporting) from libs import plotter if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCES if ref_fpaths: logger.main_info() logger.main_info('Reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ correct_meta_references(ref_fpaths, corrected_dirpath) # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') assemblies, labels = correct_assemblies(contigs_fpaths, output_dirpath, labels) if not assemblies: logger.error( "None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold." ) return 4 # Running QUAST(s) quast_py_args += ['--meta'] downloaded_refs = False # SEARCHING REFERENCES if not ref_fpaths: logger.main_info() if qconfig.max_references == 0: logger.notice( "Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled" ) else: if qconfig.references_txt: logger.main_info( "List of references was provided, starting to download reference genomes from NCBI..." ) else: logger.main_info( "No references are provided, starting to search for reference genomes in SILVA 16S rRNA database " "and to download them from NCBI...") downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname) if not os.path.isdir(downloaded_dirpath): os.mkdir(downloaded_dirpath) ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, qconfig.references_txt) if ref_fpaths: search_references_meta.is_quast_first_run = True if not qconfig.references_txt: downloaded_refs = True logger.main_info() logger.main_info('Downloaded reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ correct_meta_references(ref_fpaths, corrected_dirpath) elif test_mode and not ref_fpaths: logger.error( 'Failed to download or setup SILVA 16S rRNA database for working without ' 'references on metagenome datasets!', to_stderr=True, exit_with_code=4) if not ref_fpaths: # No references, running regular quast with MetaGenemark gene finder logger.main_info() logger.notice( 'No references are provided, starting regular QUAST with MetaGeneMark gene finder' ) _start_quast_main(quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath) exit(0) # Running combined reference combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name) reads_fpaths = [] if qconfig.forward_reads: reads_fpaths.append(qconfig.forward_reads) if qconfig.reverse_reads: reads_fpaths.append(qconfig.reverse_reads) if (reads_fpaths or qconfig.sam or qconfig.bam) and ref_fpaths: bed_fpath, cov_fpath, _ = reads_analyzer.do( combined_ref_fpath, contigs_fpaths, reads_fpaths, corrected_ref_fpaths, os.path.join(combined_output_dirpath, qconfig.variation_dirname), external_logger=logger, sam_fpath=qconfig.sam, bam_fpath=qconfig.bam, bed_fpath=qconfig.bed) qconfig.bed = bed_fpath if qconfig.bed: quast_py_args += ['--sv-bed'] quast_py_args += [qconfig.bed] if qconfig.sam: quast_py_args += ['--sam'] quast_py_args += [qconfig.sam] if qconfig.bam: quast_py_args += ['--bam'] quast_py_args += [qconfig.bam] for arg in args: if arg in ('-s', "--scaffolds"): quast_py_args.remove(arg) quast_py_args += ['--combined-ref'] if qconfig.draw_plots or qconfig.html_report: if plotter.dict_color_and_ls: colors_and_ls = [ plotter.dict_color_and_ls[asm.label] for asm in assemblies ] quast_py_args += ['--colors'] quast_py_args += [','.join([style[0] for style in colors_and_ls])] quast_py_args += ['--ls'] quast_py_args += [','.join([style[1] for style in colors_and_ls])] run_name = 'for the combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') total_num_notices = 0 total_num_warnings = 0 total_num_nf_errors = 0 total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors) if qconfig.html_report: from libs.html_saver import json_saver json_texts = [] else: json_texts = None return_code, total_num_notifications, assemblies, labels = \ _start_quast_main(quast_py_args + ([] if qconfig.unique_mapping else ["--ambiguity-usage", 'one']), assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) if json_texts is not None: json_texts.append(json_saver.json_text) search_references_meta.is_quast_first_run = False genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats') genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt') if not os.path.exists(genome_info_fpath): logger.main_info('') logger.main_info( 'Failed aligning the contigs for all the references. ' + ('Try to restart MetaQUAST with another references.' if not downloaded_refs else 'Try to use option --max-ref-number to change maximum number of references ' '(per each assembly) to download.')) logger.main_info('') cleanup(corrected_dirpath) logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode) return if downloaded_refs: logger.main_info() logger.main_info( 'Excluding downloaded references with low genome fraction from further analysis..' ) corr_ref_fpaths = get_downloaded_refs_with_alignments( genome_info_fpath, ref_fpaths, chromosomes_by_refs) if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths: logger.main_info() logger.main_info('Filtered reference(s):') os.remove(combined_ref_fpath) contigs_analyzer.ref_labels_by_chromosomes = {} corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names = \ correct_meta_references(corr_ref_fpaths, corrected_dirpath) run_name = 'for the corrected combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications, assemblies, labels = \ _start_quast_main(quast_py_args + ([] if qconfig.unique_mapping else ["--ambiguity-usage", 'one']), assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) if json_texts is not None: json_texts = json_texts[:-1] json_texts.append(json_saver.json_text) elif corr_ref_fpaths == ref_fpaths: logger.main_info( 'All downloaded references have genome fraction more than 10%. Nothing was excluded.' ) else: logger.main_info( 'All downloaded references have low genome fraction. Nothing was excluded for now.' ) quast_py_args += ['--no-check-meta'] qconfig.contig_thresholds = ','.join([ str(threshold) for threshold in qconfig.contig_thresholds if threshold > qconfig.min_contig ]) if not qconfig.contig_thresholds: qconfig.contig_thresholds = 'None' quast_py_args = remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds) quast_py_args += ['--contig-thresholds'] quast_py_args += [qconfig.contig_thresholds] quast_py_args.remove('--combined-ref') logger.main_info() logger.main_info( 'Partitioning contigs into bins aligned to each reference..') assemblies_by_reference, not_aligned_assemblies = partition_contigs( assemblies, corrected_ref_fpaths, corrected_dirpath, os.path.join(combined_output_dirpath, 'contigs_reports', 'alignments_%s.tsv'), labels) ref_names = [] output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname) for ref_fpath, ref_assemblies in assemblies_by_reference: ref_name = qutils.name_from_fpath(ref_fpath) logger.main_info('') if not ref_assemblies: logger.main_info('No contigs were aligned to the reference ' + ref_name + ', skipping..') else: ref_names.append(ref_name) run_name = 'for the contigs aligned to ' + ref_name logger.main_info('Starting quast.py ' + run_name) return_code, total_num_notifications = _start_quast_main( quast_py_args, assemblies=ref_assemblies, reference_fpath=ref_fpath, output_dirpath=os.path.join(output_dirpath_per_ref, ref_name), num_notifications_tuple=total_num_notifications) if json_texts is not None: json_texts.append(json_saver.json_text) # Finally running for the contigs that has not been aligned to any reference no_unaligned_contigs = True for assembly in not_aligned_assemblies: if os.path.isfile( assembly.fpath) and os.stat(assembly.fpath).st_size != 0: no_unaligned_contigs = False break run_name = 'for the contigs not aligned anywhere' logger.main_info() if no_unaligned_contigs: logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)') else: logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications = _start_quast_main( quast_py_args, assemblies=not_aligned_assemblies, output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name), num_notifications_tuple=total_num_notifications) if return_code not in [0, 4]: logger.error( 'Error running quast.py for the contigs not aligned anywhere') elif return_code == 4: # no unaligned contigs, i.e. everything aligned no_unaligned_contigs = True if not no_unaligned_contigs: if json_texts is not None: json_texts.append(json_saver.json_text) if ref_names: logger.print_timestamp() logger.main_info("Summarizing results...") summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir) if not os.path.isdir(summary_output_dirpath): os.makedirs(summary_output_dirpath) if html_report and json_texts: from libs.html_saver import html_saver html_summary_report_fpath = html_saver.init_meta_report( output_dirpath) else: html_summary_report_fpath = None from libs import create_meta_summary metrics_for_plots = reporting.Fields.main_metrics misassembl_metrics = [ reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION, reporting.Fields.MIS_ISTRANSLOCATIONS ] create_meta_summary.do( html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics_for_plots, misassembl_metrics, ref_names if no_unaligned_contigs else ref_names + [qconfig.not_aligned_name]) if html_report and json_texts: html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls, meta=True) if qconfig.create_icarus_html: icarus_html_fpath = html_saver.create_meta_icarus( output_dirpath, ref_names) logger.main_info(' Icarus (contig browser) is saved to %s' % icarus_html_fpath) html_saver.create_meta_report(output_dirpath, json_texts) cleanup(corrected_dirpath) logger.main_info('') logger.main_info('MetaQUAST finished.') return logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
def main(args): if ' ' in qconfig.QUAST_HOME: logger.error( 'QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage(meta=True) sys.exit(0) genes = [] operons = [] html_report = qconfig.html_report make_latest_symlink = True ref_txt_fpath = None try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage(meta=True) sys.exit(2) quast_py_args = args[:] test_mode = False for opt, arg in options: if opt in ('-d', '--debug'): options.remove((opt, arg)) qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt == '--test' or opt == '--test-no-ref': options.remove((opt, arg)) quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) options += [('-o', 'quast_test_output')] if opt == '--test': options += [('-R', ','.join([ os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_2.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_3.fasta') ]))] contigs_fpaths += [ os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_2.fasta') ] test_mode = True elif opt.startswith('--help') or opt == '-h': qconfig.usage(opt == "--help-hidden", meta=True, short=False) sys.exit(0) elif opt.startswith('--version') or opt == '-v': qconfig.print_version(meta=True) sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage(meta=True) sys.exit(2) ref_fpaths = [] combined_ref_fpath = '' reads_fpath_f = '' reads_fpath_r = '' output_dirpath = None labels = None all_labels_from_dirs = False for opt, arg in options: if opt in ('-o', "--output-dir"): # Removing output dir arg in order to further # construct other quast calls from this options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) output_dirpath = os.path.abspath(arg) make_latest_symlink = False elif opt in ('-G', "--genes"): assert_file_exists(arg, 'genes') genes += arg elif opt in ('-O', "--operons"): assert_file_exists(arg, 'operons') operons += arg elif opt in ('-R', "--reference"): # Removing reference args in order to further # construct quast calls from this args with other reference options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) if os.path.isdir(arg): ref_fpaths = [ os.path.join(path, file) for (path, dirs, files) in os.walk(arg) for file in files if qutils.check_is_fasta_file(file) ] ref_fpaths.sort() else: ref_fpaths = arg.split(',') for i, ref_fpath in enumerate(ref_fpaths): assert_file_exists(ref_fpath, 'reference') ref_fpaths[i] = ref_fpath elif opt == '--max-ref-number': quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) qconfig.max_references = int(arg) if qconfig.max_references < 0: qconfig.max_references = 0 elif opt in ('-m', "--min-contig"): qconfig.min_contig = int(arg) elif opt in ('-t', "--threads"): qconfig.max_threads = int(arg) if qconfig.max_threads < 1: qconfig.max_threads = 1 elif opt in ('-l', '--labels'): quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) labels = quast.parse_labels(arg, contigs_fpaths) elif opt == '-L': quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) all_labels_from_dirs = True elif opt in ('-j', '--save-json'): pass elif opt in ('-J', '--save-json-to'): pass elif opt == "--contig-thresholds": pass elif opt in ('-c', "--mincluster"): pass elif opt == "--est-ref-size": pass elif opt == "--gene-thresholds": pass elif opt in ('-s', "--scaffolds"): pass elif opt == "--gage": pass elif opt == "--debug": pass elif opt in ('-e', "--eukaryote"): pass elif opt in ('-f', "--gene-finding"): pass elif opt in ('-i', "--min-alignment"): pass elif opt in ('-c', "--min-cluster"): pass elif opt in ('-a', "--ambiguity-usage"): pass elif opt in ('-u', "--use-all-alignments"): pass elif opt == "--strict-NA": pass elif opt in ('-x', "--extensive-mis-size"): pass elif opt == "--meta": pass elif opt == '--references-list': ref_txt_fpath = arg elif opt == '--glimmer': pass elif opt == '--no-snps': pass elif opt == '--no-check': pass elif opt == '--no-gc': pass elif opt == '--no-plots': pass elif opt == '--no-html': html_report = False elif opt == '--fast': # --no-check, --no-gc, --no-snps will automatically set in QUAST runs html_report = False elif opt == '--plots-format': pass elif opt == '--memory-efficient': pass elif opt == '--silent': qconfig.silent = True elif opt in ('-1', '--reads1'): reads_fpath_f = arg quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) elif opt in ('-2', '--reads2'): reads_fpath_r = arg quast_py_args = __remove_from_quast_py_args( quast_py_args, opt, arg) elif opt == '--contig-alignment-html': qconfig.create_contig_alignment_html = True else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') labels = quast.process_labels(contigs_fpaths, labels, all_labels_from_dirs) for contigs_fpath in contigs_fpaths: if contigs_fpath in quast_py_args: quast_py_args.remove(contigs_fpath) # Directories output_dirpath, _, _ = quast._set_up_output_dir(output_dirpath, None, make_latest_symlink, save_json=False) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) args = [os.path.realpath(__file__)] for k, v in options: args.extend([k, v]) args.extend(contigs_fpaths) logger.print_command_line(args, wrap_after=None) logger.start() qconfig.set_max_threads(logger) ######################################################################## from libs import reporting reload(reporting) if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCES if ref_fpaths: logger.main_info() logger.main_info('Reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') assemblies, correct_assemblies = _correct_contigs(contigs_fpaths, output_dirpath, labels) if not assemblies: logger.error( "None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold." ) return 4 # Running QUAST(s) quast_py_args += ['--meta'] downloaded_refs = False # SEARCHING REFERENCES if not ref_fpaths: logger.main_info() if qconfig.max_references == 0: logger.notice( "Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled" ) else: if ref_txt_fpath: logger.main_info( "List of references was provided, starting to download reference genomes from NCBI..." ) else: logger.main_info( "No references are provided, starting to search for reference genomes in SILVA 16S rRNA database " "and to download them from NCBI...") downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname) if not os.path.isdir(downloaded_dirpath): os.mkdir(downloaded_dirpath) ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, ref_txt_fpath) if ref_fpaths: search_references_meta.is_quast_first_run = True if not ref_txt_fpath: downloaded_refs = True logger.main_info() logger.main_info('Downloaded reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) elif test_mode and ref_fpaths is None: logger.error( 'Failed to download or setup SILVA 16S rRNA database for working without ' 'references on metagenome datasets!', to_stderr=True, exit_with_code=4) if not ref_fpaths: # No references, running regular quast with MetaGenemark gene finder logger.main_info() logger.notice( 'No references are provided, starting regular QUAST with MetaGeneMark gene finder' ) _start_quast_main(None, quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, exit_on_exception=True) exit(0) # Running combined reference combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name) reads_fpaths = [] if reads_fpath_f: reads_fpaths.append(reads_fpath_f) if reads_fpath_r: reads_fpaths.append(reads_fpath_r) if reads_fpaths: bed_fpath = reads_analyzer.do(combined_ref_fpath, contigs_fpaths, reads_fpaths, corrected_ref_fpaths, os.path.join(combined_output_dirpath, qconfig.variation_dirname), external_logger=logger) if bed_fpath: quast_py_args += ['--bed-file'] quast_py_args += [bed_fpath] quast_py_args += ['--combined-ref'] run_name = 'for the combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') total_num_notices = 0 total_num_warnings = 0 total_num_nf_errors = 0 total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors) if qconfig.html_report: from libs.html_saver import json_saver json_texts = [] else: json_texts = None return_code, total_num_notifications, assemblies, labels = _start_quast_main( run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) for arg in args: if arg in ('-s', "--scaffolds"): quast_py_args.remove(arg) if json_texts is not None: json_texts.append(json_saver.json_text) search_references_meta.is_quast_first_run = False genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats') genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt') if not os.path.exists(genome_info_fpath): logger.main_info('') logger.main_info( 'Failed aligning the contigs for all the references. ' + ('Try to restart MetaQUAST with another references.' if not downloaded_refs else 'Try to use option --max-ref-number to change maximum number of references ' '(per each assembly) to download.')) logger.main_info('') quast._cleanup(corrected_dirpath) logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode) return if downloaded_refs: logger.main_info() logger.main_info( 'Excluding downloaded references with low genome fraction from further analysis..' ) corr_ref_fpaths = remove_unaligned_downloaded_refs( genome_info_fpath, ref_fpaths, chromosomes_by_refs) if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths: logger.main_info() logger.main_info('Filtered reference(s):') os.remove(combined_ref_fpath) contigs_analyzer.ref_labels_by_chromosomes = {} corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(corr_ref_fpaths, corrected_dirpath) run_name = 'for the corrected combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications, assemblies, labels = _start_quast_main( run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) if json_texts is not None: json_texts = json_texts[:-1] json_texts.append(json_saver.json_text) elif corr_ref_fpaths == ref_fpaths: logger.main_info( 'All downloaded references have genome fraction more than 10%. Nothing was excluded.' ) else: logger.main_info( 'All downloaded references have low genome fraction. Nothing was excluded for now.' ) quast_py_args += ['--no-check-meta'] qconfig.contig_thresholds = ','.join([ str(threshold) for threshold in qconfig.contig_thresholds if threshold > qconfig.min_contig ]) if not qconfig.contig_thresholds: qconfig.contig_thresholds = 'None' quast_py_args = __remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds) quast_py_args += ['--contig-thresholds'] quast_py_args += [qconfig.contig_thresholds] quast_py_args.remove('--combined-ref') logger.main_info() logger.main_info( 'Partitioning contigs into bins aligned to each reference..') assemblies_by_reference, not_aligned_assemblies = _partition_contigs( assemblies, corrected_ref_fpaths, corrected_dirpath, os.path.join(combined_output_dirpath, 'contigs_reports', 'alignments_%s.tsv'), labels) ref_names = [] output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname) for ref_fpath, ref_assemblies in assemblies_by_reference: ref_name = qutils.name_from_fpath(ref_fpath) logger.main_info('') if not ref_assemblies: logger.main_info('No contigs were aligned to the reference ' + ref_name + ', skipping..') else: ref_names.append(ref_name) run_name = 'for the contigs aligned to ' + ref_name logger.main_info('Starting quast.py ' + run_name) return_code, total_num_notifications = _start_quast_main( run_name, quast_py_args, assemblies=ref_assemblies, reference_fpath=ref_fpath, output_dirpath=os.path.join(output_dirpath_per_ref, ref_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if json_texts is not None: json_texts.append(json_saver.json_text) # Finally running for the contigs that has not been aligned to any reference no_unaligned_contigs = True for assembly in not_aligned_assemblies: if os.path.isfile( assembly.fpath) and os.stat(assembly.fpath).st_size != 0: no_unaligned_contigs = False break run_name = 'for the contigs not aligned anywhere' logger.main_info() if no_unaligned_contigs: logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)') else: logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications = _start_quast_main( run_name, quast_py_args, assemblies=not_aligned_assemblies, output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if return_code not in [0, 4]: logger.error( 'Error running quast.py for the contigs not aligned anywhere') elif return_code == 4: # no unaligned contigs, i.e. everything aligned no_unaligned_contigs = True if not no_unaligned_contigs: if json_texts is not None: json_texts.append(json_saver.json_text) if ref_names: logger.print_timestamp() logger.main_info("Summarizing results...") summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir) if not os.path.isdir(summary_output_dirpath): os.makedirs(summary_output_dirpath) if html_report and json_texts: from libs.html_saver import html_saver html_summary_report_fpath = html_saver.init_meta_report( output_dirpath) else: html_summary_report_fpath = None from libs import create_meta_summary metrics_for_plots = reporting.Fields.main_metrics misassembl_metrics = [ reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION, reporting.Fields.MIS_ISTRANSLOCATIONS ] create_meta_summary.do( html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics_for_plots, misassembl_metrics, ref_names if no_unaligned_contigs else ref_names + [qconfig.not_aligned_name]) if html_report and json_texts: from libs import plotter html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls, meta=True) html_saver.create_meta_report(output_dirpath, json_texts) quast._cleanup(corrected_dirpath) logger.main_info('') logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
def main(args): if ' ' in qconfig.QUAST_HOME: logger.error('QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage() sys.exit(0) reload(qconfig) try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage() sys.exit(2) for opt, arg in options[:]: if opt == '--test' or opt == '--test-sv': options.remove((opt, arg)) options += [('-o', 'quast_test_output'), ('-R', os.path.join(qconfig.QUAST_HOME, 'test_data', 'reference.fasta.gz')), # for compiling MUMmer ('-O', os.path.join(qconfig.QUAST_HOME, 'test_data', 'operons.gff')), ('-G', os.path.join(qconfig.QUAST_HOME, 'test_data', 'genes.gff')), ('--gage', ''), # for compiling GAGE Java classes ('--gene-finding', ''), ('--eukaryote', ''), ('--glimmer', '')] # for compiling GlimmerHMM if opt == '--test-sv': options += [('-1', os.path.join(qconfig.QUAST_HOME, 'test_data', 'reads1.fastq.gz')), ('-2', os.path.join(qconfig.QUAST_HOME, 'test_data', 'reads2.fastq.gz'))] contigs_fpaths += [os.path.join(qconfig.QUAST_HOME, 'test_data', 'contigs_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'contigs_2.fasta')] qconfig.test = True if opt.startswith('--help') or opt == '-h': qconfig.usage(opt == "--help-hidden", short=False) sys.exit(0) elif opt.startswith('--version') or opt == '-v': qconfig.print_version() sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage() sys.exit(2) json_output_dirpath = None output_dirpath = None labels = None all_labels_from_dirs = False qconfig.is_combined_ref = False ref_fpath = '' genes_fpaths = [] operons_fpaths = [] bed_fpath = None reads_fpath_f = '' reads_fpath_r = '' # Yes, this is a code duplicating. But OptionParser is deprecated since version 2.7. for opt, arg in options: if opt in ('-d', '--debug'): qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt in ('-o', "--output-dir"): output_dirpath = os.path.abspath(arg) qconfig.make_latest_symlink = False if ' ' in output_dirpath: logger.error('QUAST does not support spaces in paths. \n' 'You have specified ' + str(output_dirpath) + ' as an output path.\n' 'Please, use a different directory.\n', to_stderr=True, exit_with_code=3) elif opt in ('-G', "--genes"): genes_fpaths.append(assert_file_exists(arg, 'genes')) elif opt in ('-O', "--operons"): operons_fpaths.append(assert_file_exists(arg, 'operons')) elif opt in ('-R', "--reference"): ref_fpath = assert_file_exists(arg, 'reference') elif opt == "--contig-thresholds": qconfig.contig_thresholds = arg elif opt in ('-m', "--min-contig"): qconfig.min_contig = int(arg) elif opt in ('-t', "--threads"): qconfig.max_threads = int(arg) if qconfig.max_threads < 1: qconfig.max_threads = 1 elif opt in ('-c', "--min-cluster"): qconfig.min_cluster = int(arg) elif opt in ('-i', "--min-alignment"): qconfig.min_alignment = int(arg) elif opt == "--est-ref-size": qconfig.estimated_reference_size = int(arg) elif opt == "--gene-thresholds": qconfig.genes_lengths = arg elif opt in ('-j', '--save-json'): qconfig.save_json = True elif opt in ('-J', '--save-json-to'): qconfig.save_json = True qconfig.make_latest_symlink = False json_output_dirpath = arg elif opt == '--err-fpath': # for web-quast qconfig.save_error = True qconfig.error_log_fname = arg elif opt in ('-s', "--scaffolds"): qconfig.scaffolds = True elif opt == "--gage": qconfig.with_gage = True elif opt in ('-e', "--eukaryote"): qconfig.prokaryote = False elif opt in ('-f', "--gene-finding"): qconfig.gene_finding = True elif opt in ('-a', "--ambiguity-usage"): if arg in ["none", "one", "all"]: qconfig.ambiguity_usage = arg elif opt in ('-u', "--use-all-alignments"): qconfig.use_all_alignments = True elif opt == "--strict-NA": qconfig.strict_NA = True elif opt in ('-x', "--extensive-mis-size"): if int(arg) <= qconfig.MAX_INDEL_LENGTH: logger.error("--extensive-mis-size should be greater than maximum indel length (%d)!" % qconfig.MAX_INDEL_LENGTH, 1, to_stderr=True) qconfig.extensive_misassembly_threshold = int(arg) elif opt == '--no-snps': qconfig.show_snps = False elif opt == '--no-plots': qconfig.draw_plots = False elif opt == '--no-html': qconfig.html_report = False elif opt == '--no-check': qconfig.no_check = True elif opt == '--no-gc': qconfig.no_gc = True elif opt == '--fast': # --no-gc, --no-plots, --no-snps #qconfig.no_check = True # too risky to include qconfig.no_gc = True qconfig.show_snps = False qconfig.draw_plots = False qconfig.html_report = False elif opt == '--plots-format': if arg.lower() in qconfig.supported_plot_extensions: qconfig.plot_extension = arg.lower() else: logger.error('Format "%s" is not supported. Please, use one of the supported formats: %s.' % (arg, ', '.join(qconfig.supported_plot_extensions)), to_stderr=True, exit_with_code=2) elif opt == '--meta': qconfig.meta = True elif opt == '--no-check-meta': qconfig.no_check = True qconfig.no_check_meta = True elif opt == '--references-list': pass elif opt in ('-l', '--labels'): labels = parse_labels(arg, contigs_fpaths) elif opt == '-L': all_labels_from_dirs = True elif opt == '--glimmer': qconfig.glimmer = True elif opt == '--combined-ref': qconfig.is_combined_ref = True elif opt == '--memory-efficient': qconfig.memory_efficient = True elif opt == '--silent': qconfig.silent = True elif opt in ('-1', '--reads1'): reads_fpath_f = arg elif opt in ('-2', '--reads2'): reads_fpath_r = arg elif opt == '--bed-file': bed_fpath = arg elif opt == '--contig-alignment-html': qconfig.create_contig_alignment_html = True else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for contigs_fpath in contigs_fpaths: assert_file_exists(contigs_fpath, 'contigs') labels = process_labels(contigs_fpaths, labels, all_labels_from_dirs) output_dirpath, json_output_dirpath, existing_alignments = \ _set_up_output_dir(output_dirpath, json_output_dirpath, qconfig.make_latest_symlink, qconfig.save_json) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) args = [os.path.realpath(__file__)] for k, v in options: args.extend([k, v]) args.extend(contigs_fpaths) logger.print_command_line(args, wrap_after=None, is_main=True) logger.start() if existing_alignments: logger.main_info() logger.notice("Output directory already exists. Existing Nucmer alignments can be used.") qutils.remove_reports(output_dirpath) if qconfig.contig_thresholds == "None": qconfig.contig_thresholds = [] else: qconfig.contig_thresholds = map(int, qconfig.contig_thresholds.split(",")) if qconfig.genes_lengths == "None": qconfig.genes_lengths = [] else: qconfig.genes_lengths = map(int, qconfig.genes_lengths.split(",")) qconfig.set_max_threads(logger) logger.main_info() logger.print_params() ######################################################################## from libs import reporting reload(reporting) if qconfig.is_combined_ref: corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname) else: if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCE if ref_fpath: logger.main_info() logger.main_info('Reference:') ref_fpath = _correct_reference(ref_fpath, corrected_dirpath) else: ref_fpath = '' # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') contigs_fpaths, old_contigs_fpaths = _correct_contigs(contigs_fpaths, corrected_dirpath, reporting, labels) for contigs_fpath in contigs_fpaths: report = reporting.get(contigs_fpath) report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath)) qconfig.assemblies_num = len(contigs_fpaths) reads_fpaths = [] if reads_fpath_f: reads_fpaths.append(reads_fpath_f) if reads_fpath_r: reads_fpaths.append(reads_fpath_r) if reads_fpaths: bed_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths, reads_fpaths, None, os.path.join(output_dirpath, qconfig.variation_dirname), external_logger=logger) if not contigs_fpaths: logger.error("None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.", fake_if_nested_run=True) return 4 qconfig.assemblies_fpaths = contigs_fpaths if qconfig.with_gage: ######################################################################## ### GAGE ######################################################################## if not ref_fpath: logger.warning("GAGE can't be run without a reference and will be skipped.") else: from libs import gage gage.do(ref_fpath, contigs_fpaths, output_dirpath) # Where all pdfs will be saved all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname) all_pdf_file = None if qconfig.draw_plots or qconfig.html_report: from libs import plotter # Do not remove this line! It would lead to a warning in matplotlib. try: from matplotlib.backends.backend_pdf import PdfPages all_pdf_file = PdfPages(all_pdf_fpath) except: all_pdf_file = None if json_output_dirpath: from libs.html_saver import json_saver if json_saver.simplejson_error: json_output_dirpath = None ######################################################################## ### Stats and plots ######################################################################## from libs import basic_stats basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'), json_output_dirpath, output_dirpath) aligned_contigs_fpaths = [] aligned_lengths_lists = [] contig_alignment_plot_fpath = None if ref_fpath: ######################################################################## ### former PLANTAKOLYA, PLANTAGORA ######################################################################## from libs import contigs_analyzer nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do( ref_fpath, contigs_fpaths, qconfig.prokaryote, os.path.join(output_dirpath, 'contigs_reports'), old_contigs_fpaths, bed_fpath) for contigs_fpath in contigs_fpaths: if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK: aligned_contigs_fpaths.append(contigs_fpath) aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath]) # Before continue evaluating, check if nucmer didn't skip all of the contigs files. detailed_contigs_reports_dirpath = None if len(aligned_contigs_fpaths) and ref_fpath: detailed_contigs_reports_dirpath = os.path.join(output_dirpath, 'contigs_reports') ######################################################################## ### NAx and NGAx ("aligned Nx and NGx") ######################################################################## from libs import aligned_stats aligned_stats.do( ref_fpath, aligned_contigs_fpaths, output_dirpath, json_output_dirpath, aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats')) ######################################################################## ### GENOME_ANALYZER ######################################################################## from libs import genome_analyzer genome_analyzer.do( ref_fpath, aligned_contigs_fpaths, output_dirpath, json_output_dirpath, genes_fpaths, operons_fpaths, detailed_contigs_reports_dirpath, os.path.join(output_dirpath, 'genome_stats')) if qconfig.gene_finding or qconfig.glimmer: if qconfig.glimmer: ######################################################################## ### Glimmer ######################################################################## from libs import glimmer glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes')) else: ######################################################################## ### GeneMark ######################################################################## from libs import genemark genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'), qconfig.prokaryote, qconfig.meta) else: logger.main_info("") logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.") ######################################################################## reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath) ######################################################################## ### LARGE DRAWING TASKS ######################################################################## if qconfig.draw_plots: logger.print_timestamp() logger.main_info('Drawing large plots...') logger.main_info('This may take a while: press Ctrl-C to skip this step..') try: if detailed_contigs_reports_dirpath and qconfig.show_snps: contig_report_fpath_pattern = os.path.join(detailed_contigs_reports_dirpath, 'contigs_report_%s.stdout') else: contig_report_fpath_pattern = None number_of_steps = sum([int(bool(value)) for value in [contig_report_fpath_pattern, all_pdf_file]]) if contig_report_fpath_pattern: ######################################################################## ### VISUALIZE CONTIG ALIGNMENT ######################################################################## logger.main_info(' 1 of %d: Creating contig alignment plot...' % number_of_steps) from libs import contig_alignment_plotter contig_alignment_plot_fpath = contig_alignment_plotter.do( contigs_fpaths, contig_report_fpath_pattern, output_dirpath, ref_fpath, similar=True) if all_pdf_file: # full report in PDF format: all tables and plots logger.main_info(' %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps)) plotter.fill_all_pdf_file(all_pdf_file) logger.main_info('Done') except KeyboardInterrupt: logger.main_info('..step skipped!') os.remove(all_pdf_fpath) ######################################################################## ### TOTAL REPORT ######################################################################## logger.print_timestamp() logger.main_info('RESULTS:') logger.main_info(' Text versions of total report are saved to ' + reports_fpaths) logger.main_info(' Text versions of transposed total report are saved to ' + transposed_reports_fpaths) if json_output_dirpath: json_saver.save_total_report(json_output_dirpath, qconfig.min_contig, ref_fpath) if qconfig.html_report: from libs.html_saver import html_saver html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls) html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath) if os.path.isfile(all_pdf_fpath): logger.main_info(' PDF version (tables and plots) saved to ' + all_pdf_fpath) if contig_alignment_plot_fpath: logger.main_info(' Contig alignment plot: %s' % contig_alignment_plot_fpath) _cleanup(corrected_dirpath) logger.finish_up(check_test=qconfig.test) return 0
def main(args): if ' ' in qconfig.QUAST_HOME: logger.error('QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage(meta=True) sys.exit(0) genes = [] operons = [] html_report = qconfig.html_report make_latest_symlink = True ref_txt_fpath = None try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage(meta=True) sys.exit(2) quast_py_args = args[:] test_mode = False for opt, arg in options: if opt in ('-d', '--debug'): options.remove((opt, arg)) qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt == '--test' or opt == '--test-no-ref': options.remove((opt, arg)) quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) options += [('-o', 'quast_test_output')] if opt == '--test': options += [('-R', ','.join([os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_2.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_ref_3.fasta')]))] contigs_fpaths += [os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'meta_contigs_2.fasta')] test_mode = True elif opt.startswith('--help') or opt == '-h': qconfig.usage(opt == "--help-hidden", meta=True, short=False) sys.exit(0) elif opt.startswith('--version') or opt == '-v': qconfig.print_version(meta=True) sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage(meta=True) sys.exit(2) ref_fpaths = [] combined_ref_fpath = '' reads_fpath_f = '' reads_fpath_r = '' output_dirpath = None labels = None all_labels_from_dirs = False for opt, arg in options: if opt in ('-o', "--output-dir"): # Removing output dir arg in order to further # construct other quast calls from this options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) output_dirpath = os.path.abspath(arg) make_latest_symlink = False elif opt in ('-G', "--genes"): assert_file_exists(arg, 'genes') genes += arg elif opt in ('-O', "--operons"): assert_file_exists(arg, 'operons') operons += arg elif opt in ('-R', "--reference"): # Removing reference args in order to further # construct quast calls from this args with other reference options if opt in quast_py_args and arg in quast_py_args: quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) if os.path.isdir(arg): ref_fpaths = [os.path.join(path,file) for (path, dirs, files) in os.walk(arg) for file in files if qutils.check_is_fasta_file(file)] ref_fpaths.sort() else: ref_fpaths = arg.split(',') for i, ref_fpath in enumerate(ref_fpaths): assert_file_exists(ref_fpath, 'reference') ref_fpaths[i] = ref_fpath elif opt == '--max-ref-number': quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) qconfig.max_references = int(arg) if qconfig.max_references < 0: qconfig.max_references = 0 elif opt in ('-m', "--min-contig"): qconfig.min_contig = int(arg) elif opt in ('-t', "--threads"): qconfig.max_threads = int(arg) if qconfig.max_threads < 1: qconfig.max_threads = 1 elif opt in ('-l', '--labels'): quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) labels = quast.parse_labels(arg, contigs_fpaths) elif opt == '-L': quast_py_args = __remove_from_quast_py_args(quast_py_args, opt) all_labels_from_dirs = True elif opt in ('-j', '--save-json'): pass elif opt in ('-J', '--save-json-to'): pass elif opt == "--contig-thresholds": pass elif opt in ('-c', "--mincluster"): pass elif opt == "--est-ref-size": pass elif opt == "--gene-thresholds": pass elif opt in ('-s', "--scaffolds"): pass elif opt == "--gage": pass elif opt == "--debug": pass elif opt in ('-e', "--eukaryote"): pass elif opt in ('-f', "--gene-finding"): pass elif opt in ('-i', "--min-alignment"): pass elif opt in ('-c', "--min-cluster"): pass elif opt in ('-a', "--ambiguity-usage"): pass elif opt in ('-u', "--use-all-alignments"): pass elif opt == "--strict-NA": pass elif opt in ('-x', "--extensive-mis-size"): pass elif opt == "--meta": pass elif opt == '--references-list': ref_txt_fpath = arg elif opt == '--glimmer': pass elif opt == '--no-snps': pass elif opt == '--no-check': pass elif opt == '--no-gc': pass elif opt == '--no-plots': pass elif opt == '--no-html': html_report = False elif opt == '--fast': # --no-check, --no-gc, --no-snps will automatically set in QUAST runs html_report = False elif opt == '--plots-format': pass elif opt == '--memory-efficient': pass elif opt == '--silent': qconfig.silent = True elif opt in ('-1', '--reads1'): reads_fpath_f = arg quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) elif opt in ('-2', '--reads2'): reads_fpath_r = arg quast_py_args = __remove_from_quast_py_args(quast_py_args, opt, arg) elif opt == '--contig-alignment-html': qconfig.create_contig_alignment_html = True else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for c_fpath in contigs_fpaths: assert_file_exists(c_fpath, 'contigs') labels = quast.process_labels(contigs_fpaths, labels, all_labels_from_dirs) for contigs_fpath in contigs_fpaths: if contigs_fpath in quast_py_args: quast_py_args.remove(contigs_fpath) # Directories output_dirpath, _, _ = quast._set_up_output_dir( output_dirpath, None, make_latest_symlink, save_json=False) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) args = [os.path.realpath(__file__)] for k, v in options: args.extend([k, v]) args.extend(contigs_fpaths) logger.print_command_line(args, wrap_after=None) logger.start() qconfig.set_max_threads(logger) ######################################################################## from libs import reporting reload(reporting) if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCES if ref_fpaths: logger.main_info() logger.main_info('Reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') assemblies, correct_assemblies = _correct_contigs(contigs_fpaths, output_dirpath, labels) if not assemblies: logger.error("None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.") return 4 # Running QUAST(s) quast_py_args += ['--meta'] downloaded_refs = False # SEARCHING REFERENCES if not ref_fpaths: logger.main_info() if qconfig.max_references == 0: logger.notice("Maximum number of references (--max-ref-number) is set to 0, search in SILVA 16S rRNA database is disabled") else: if ref_txt_fpath: logger.main_info("List of references was provided, starting to download reference genomes from NCBI...") else: logger.main_info("No references are provided, starting to search for reference genomes in SILVA 16S rRNA database " "and to download them from NCBI...") downloaded_dirpath = os.path.join(output_dirpath, qconfig.downloaded_dirname) if not os.path.isdir(downloaded_dirpath): os.mkdir(downloaded_dirpath) ref_fpaths = search_references_meta.do(assemblies, labels, downloaded_dirpath, ref_txt_fpath) if ref_fpaths: search_references_meta.is_quast_first_run = True if not ref_txt_fpath: downloaded_refs = True logger.main_info() logger.main_info('Downloaded reference(s):') corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(ref_fpaths, corrected_dirpath) elif test_mode and ref_fpaths is None: logger.error('Failed to download or setup SILVA 16S rRNA database for working without ' 'references on metagenome datasets!', to_stderr=True, exit_with_code=4) if not ref_fpaths: # No references, running regular quast with MetaGenemark gene finder logger.main_info() logger.notice('No references are provided, starting regular QUAST with MetaGeneMark gene finder') _start_quast_main( None, quast_py_args, assemblies=assemblies, output_dirpath=output_dirpath, exit_on_exception=True) exit(0) # Running combined reference combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name) reads_fpaths = [] if reads_fpath_f: reads_fpaths.append(reads_fpath_f) if reads_fpath_r: reads_fpaths.append(reads_fpath_r) if reads_fpaths: bed_fpath = reads_analyzer.do(combined_ref_fpath, contigs_fpaths, reads_fpaths, corrected_ref_fpaths, os.path.join(combined_output_dirpath, qconfig.variation_dirname), external_logger=logger) if bed_fpath: quast_py_args += ['--bed-file'] quast_py_args += [bed_fpath] quast_py_args += ['--combined-ref'] run_name = 'for the combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') total_num_notices = 0 total_num_warnings = 0 total_num_nf_errors = 0 total_num_notifications = (total_num_notices, total_num_warnings, total_num_nf_errors) if qconfig.html_report: from libs.html_saver import json_saver json_texts = [] else: json_texts = None return_code, total_num_notifications, assemblies, labels = _start_quast_main(run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) for arg in args: if arg in ('-s', "--scaffolds"): quast_py_args.remove(arg) if json_texts is not None: json_texts.append(json_saver.json_text) search_references_meta.is_quast_first_run = False genome_info_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name, 'genome_stats') genome_info_fpath = os.path.join(genome_info_dirpath, 'genome_info.txt') if not os.path.exists(genome_info_fpath): logger.main_info('') logger.main_info('Failed aligning the contigs for all the references. ' + ('Try to restart MetaQUAST with another references.' if not downloaded_refs else 'Try to use option --max-ref-number to change maximum number of references ' '(per each assembly) to download.')) logger.main_info('') quast._cleanup(corrected_dirpath) logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode) return if downloaded_refs: logger.main_info() logger.main_info('Excluding downloaded references with low genome fraction from further analysis..') corr_ref_fpaths = remove_unaligned_downloaded_refs(genome_info_fpath, ref_fpaths, chromosomes_by_refs) if corr_ref_fpaths and corr_ref_fpaths != ref_fpaths: logger.main_info() logger.main_info('Filtered reference(s):') os.remove(combined_ref_fpath) contigs_analyzer.ref_labels_by_chromosomes = {} corrected_ref_fpaths, combined_ref_fpath, chromosomes_by_refs, ref_names =\ _correct_references(corr_ref_fpaths, corrected_dirpath) run_name = 'for the corrected combined reference' logger.main_info() logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications, assemblies, labels = _start_quast_main(run_name, quast_py_args + ["--ambiguity-usage"] + ['all'], assemblies=assemblies, reference_fpath=combined_ref_fpath, output_dirpath=combined_output_dirpath, num_notifications_tuple=total_num_notifications, is_first_run=True) if json_texts is not None: json_texts = json_texts[:-1] json_texts.append(json_saver.json_text) elif corr_ref_fpaths == ref_fpaths: logger.main_info('All downloaded references have genome fraction more than 10%. Nothing was excluded.') else: logger.main_info('All downloaded references have low genome fraction. Nothing was excluded for now.') quast_py_args += ['--no-check-meta'] qconfig.contig_thresholds = ','.join([str(threshold) for threshold in qconfig.contig_thresholds if threshold > qconfig.min_contig]) if not qconfig.contig_thresholds: qconfig.contig_thresholds = 'None' quast_py_args = __remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds) quast_py_args += ['--contig-thresholds'] quast_py_args += [qconfig.contig_thresholds] quast_py_args.remove('--combined-ref') logger.main_info() logger.main_info('Partitioning contigs into bins aligned to each reference..') assemblies_by_reference, not_aligned_assemblies = _partition_contigs( assemblies, corrected_ref_fpaths, corrected_dirpath, os.path.join(combined_output_dirpath, 'contigs_reports', 'alignments_%s.tsv'), labels) ref_names = [] output_dirpath_per_ref = os.path.join(output_dirpath, qconfig.per_ref_dirname) for ref_fpath, ref_assemblies in assemblies_by_reference: ref_name = qutils.name_from_fpath(ref_fpath) logger.main_info('') if not ref_assemblies: logger.main_info('No contigs were aligned to the reference ' + ref_name + ', skipping..') else: ref_names.append(ref_name) run_name = 'for the contigs aligned to ' + ref_name logger.main_info('Starting quast.py ' + run_name) return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=ref_assemblies, reference_fpath=ref_fpath, output_dirpath=os.path.join(output_dirpath_per_ref, ref_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if json_texts is not None: json_texts.append(json_saver.json_text) # Finally running for the contigs that has not been aligned to any reference no_unaligned_contigs = True for assembly in not_aligned_assemblies: if os.path.isfile(assembly.fpath) and os.stat(assembly.fpath).st_size != 0: no_unaligned_contigs = False break run_name = 'for the contigs not aligned anywhere' logger.main_info() if no_unaligned_contigs: logger.main_info('Skipping quast.py ' + run_name + ' (everything is aligned!)') else: logger.main_info('Starting quast.py ' + run_name + '...') return_code, total_num_notifications = _start_quast_main(run_name, quast_py_args, assemblies=not_aligned_assemblies, output_dirpath=os.path.join(output_dirpath, qconfig.not_aligned_name), exit_on_exception=False, num_notifications_tuple=total_num_notifications) if return_code not in [0, 4]: logger.error('Error running quast.py for the contigs not aligned anywhere') elif return_code == 4: # no unaligned contigs, i.e. everything aligned no_unaligned_contigs = True if not no_unaligned_contigs: if json_texts is not None: json_texts.append(json_saver.json_text) if ref_names: logger.print_timestamp() logger.main_info("Summarizing results...") summary_output_dirpath = os.path.join(output_dirpath, qconfig.meta_summary_dir) if not os.path.isdir(summary_output_dirpath): os.makedirs(summary_output_dirpath) if html_report and json_texts: from libs.html_saver import html_saver html_summary_report_fpath = html_saver.init_meta_report(output_dirpath) else: html_summary_report_fpath = None from libs import create_meta_summary metrics_for_plots = reporting.Fields.main_metrics misassembl_metrics = [reporting.Fields.MIS_RELOCATION, reporting.Fields.MIS_TRANSLOCATION, reporting.Fields.MIS_INVERTION, reporting.Fields.MIS_ISTRANSLOCATIONS] create_meta_summary.do(html_summary_report_fpath, summary_output_dirpath, combined_output_dirpath, output_dirpath_per_ref, metrics_for_plots, misassembl_metrics, ref_names if no_unaligned_contigs else ref_names + [qconfig.not_aligned_name]) if html_report and json_texts: from libs import plotter html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls, meta=True) html_saver.create_meta_report(output_dirpath, json_texts) quast._cleanup(corrected_dirpath) logger.main_info('') logger.main_info('MetaQUAST finished.') logger.finish_up(numbers=tuple(total_num_notifications), check_test=test_mode)
def main(args): if ' ' in qconfig.QUAST_HOME: logger.error( 'QUAST does not support spaces in paths. \n' 'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n' 'Please, put QUAST in a different directory, then try again.\n', to_stderr=True, exit_with_code=3) if not args: qconfig.usage() sys.exit(0) reload(qconfig) try: options, contigs_fpaths = getopt.gnu_getopt(args, qconfig.short_options, qconfig.long_options) except getopt.GetoptError: _, exc_value, _ = sys.exc_info() print >> sys.stderr, exc_value print >> sys.stderr qconfig.usage() sys.exit(2) for opt, arg in options[:]: if opt == '--test' or opt == '--test-sv': options.remove((opt, arg)) options += [ ('-o', 'quast_test_output'), ('-R', os.path.join(qconfig.QUAST_HOME, 'test_data', 'reference.fasta.gz')), # for compiling MUMmer ('-O', os.path.join(qconfig.QUAST_HOME, 'test_data', 'operons.gff')), ('-G', os.path.join(qconfig.QUAST_HOME, 'test_data', 'genes.gff')), ('--gage', ''), # for compiling GAGE Java classes ('--gene-finding', ''), ('--eukaryote', ''), ('--glimmer', '') ] # for compiling GlimmerHMM if opt == '--test-sv': options += [('-1', os.path.join(qconfig.QUAST_HOME, 'test_data', 'reads1.fastq.gz')), ('-2', os.path.join(qconfig.QUAST_HOME, 'test_data', 'reads2.fastq.gz'))] contigs_fpaths += [ os.path.join(qconfig.QUAST_HOME, 'test_data', 'contigs_1.fasta'), os.path.join(qconfig.QUAST_HOME, 'test_data', 'contigs_2.fasta') ] qconfig.test = True if opt.startswith('--help') or opt == '-h': qconfig.usage(opt == "--help-hidden", short=False) sys.exit(0) elif opt.startswith('--version') or opt == '-v': qconfig.print_version() sys.exit(0) if not contigs_fpaths: logger.error("You should specify at least one file with contigs!\n") qconfig.usage() sys.exit(2) json_output_dirpath = None output_dirpath = None labels = None all_labels_from_dirs = False qconfig.is_combined_ref = False ref_fpath = '' genes_fpaths = [] operons_fpaths = [] bed_fpath = None reads_fpath_f = '' reads_fpath_r = '' # Yes, this is a code duplicating. But OptionParser is deprecated since version 2.7. for opt, arg in options: if opt in ('-d', '--debug'): qconfig.debug = True logger.set_up_console_handler(debug=True) elif opt in ('-o', "--output-dir"): output_dirpath = os.path.abspath(arg) qconfig.make_latest_symlink = False if ' ' in output_dirpath: logger.error('QUAST does not support spaces in paths. \n' 'You have specified ' + str(output_dirpath) + ' as an output path.\n' 'Please, use a different directory.\n', to_stderr=True, exit_with_code=3) elif opt in ('-G', "--genes"): genes_fpaths.append(assert_file_exists(arg, 'genes')) elif opt in ('-O', "--operons"): operons_fpaths.append(assert_file_exists(arg, 'operons')) elif opt in ('-R', "--reference"): ref_fpath = assert_file_exists(arg, 'reference') elif opt == "--contig-thresholds": qconfig.contig_thresholds = arg elif opt in ('-m', "--min-contig"): qconfig.min_contig = int(arg) elif opt in ('-t', "--threads"): qconfig.max_threads = int(arg) if qconfig.max_threads < 1: qconfig.max_threads = 1 elif opt in ('-c', "--min-cluster"): qconfig.min_cluster = int(arg) elif opt in ('-i', "--min-alignment"): qconfig.min_alignment = int(arg) elif opt == "--est-ref-size": qconfig.estimated_reference_size = int(arg) elif opt == "--gene-thresholds": qconfig.genes_lengths = arg elif opt in ('-j', '--save-json'): qconfig.save_json = True elif opt in ('-J', '--save-json-to'): qconfig.save_json = True qconfig.make_latest_symlink = False json_output_dirpath = arg elif opt == '--err-fpath': # for web-quast qconfig.save_error = True qconfig.error_log_fname = arg elif opt in ('-s', "--scaffolds"): qconfig.scaffolds = True elif opt == "--gage": qconfig.with_gage = True elif opt in ('-e', "--eukaryote"): qconfig.prokaryote = False elif opt in ('-f', "--gene-finding"): qconfig.gene_finding = True elif opt in ('-a', "--ambiguity-usage"): if arg in ["none", "one", "all"]: qconfig.ambiguity_usage = arg elif opt in ('-u', "--use-all-alignments"): qconfig.use_all_alignments = True elif opt == "--strict-NA": qconfig.strict_NA = True elif opt in ('-x', "--extensive-mis-size"): if int(arg) <= qconfig.MAX_INDEL_LENGTH: logger.error( "--extensive-mis-size should be greater than maximum indel length (%d)!" % qconfig.MAX_INDEL_LENGTH, 1, to_stderr=True) qconfig.extensive_misassembly_threshold = int(arg) elif opt == '--no-snps': qconfig.show_snps = False elif opt == '--no-plots': qconfig.draw_plots = False elif opt == '--no-html': qconfig.html_report = False elif opt == '--no-check': qconfig.no_check = True elif opt == '--no-gc': qconfig.no_gc = True elif opt == '--fast': # --no-gc, --no-plots, --no-snps #qconfig.no_check = True # too risky to include qconfig.no_gc = True qconfig.show_snps = False qconfig.draw_plots = False qconfig.html_report = False elif opt == '--plots-format': if arg.lower() in qconfig.supported_plot_extensions: qconfig.plot_extension = arg.lower() else: logger.error( 'Format "%s" is not supported. Please, use one of the supported formats: %s.' % (arg, ', '.join(qconfig.supported_plot_extensions)), to_stderr=True, exit_with_code=2) elif opt == '--meta': qconfig.meta = True elif opt == '--no-check-meta': qconfig.no_check = True qconfig.no_check_meta = True elif opt == '--references-list': pass elif opt in ('-l', '--labels'): labels = parse_labels(arg, contigs_fpaths) elif opt == '-L': all_labels_from_dirs = True elif opt == '--glimmer': qconfig.glimmer = True elif opt == '--combined-ref': qconfig.is_combined_ref = True elif opt == '--memory-efficient': qconfig.memory_efficient = True elif opt == '--silent': qconfig.silent = True elif opt in ('-1', '--reads1'): reads_fpath_f = arg elif opt in ('-2', '--reads2'): reads_fpath_r = arg elif opt == '--bed-file': bed_fpath = arg elif opt == '--contig-alignment-html': qconfig.create_contig_alignment_html = True else: logger.error('Unknown option: %s. Use -h for help.' % (opt + ' ' + arg), to_stderr=True, exit_with_code=2) for contigs_fpath in contigs_fpaths: assert_file_exists(contigs_fpath, 'contigs') labels = process_labels(contigs_fpaths, labels, all_labels_from_dirs) output_dirpath, json_output_dirpath, existing_alignments = \ _set_up_output_dir(output_dirpath, json_output_dirpath, qconfig.make_latest_symlink, qconfig.save_json) corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname) logger.set_up_file_handler(output_dirpath) args = [os.path.realpath(__file__)] for k, v in options: args.extend([k, v]) args.extend(contigs_fpaths) logger.print_command_line(args, wrap_after=None, is_main=True) logger.start() if existing_alignments: logger.main_info() logger.notice( "Output directory already exists. Existing Nucmer alignments can be used." ) qutils.remove_reports(output_dirpath) if qconfig.contig_thresholds == "None": qconfig.contig_thresholds = [] else: qconfig.contig_thresholds = map(int, qconfig.contig_thresholds.split(",")) if qconfig.genes_lengths == "None": qconfig.genes_lengths = [] else: qconfig.genes_lengths = map(int, qconfig.genes_lengths.split(",")) qconfig.set_max_threads(logger) logger.main_info() logger.print_params() ######################################################################## from libs import reporting reload(reporting) if qconfig.is_combined_ref: corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname) else: if os.path.isdir(corrected_dirpath): shutil.rmtree(corrected_dirpath) os.mkdir(corrected_dirpath) # PROCESSING REFERENCE if ref_fpath: logger.main_info() logger.main_info('Reference:') ref_fpath = _correct_reference(ref_fpath, corrected_dirpath) else: ref_fpath = '' # PROCESSING CONTIGS logger.main_info() logger.main_info('Contigs:') contigs_fpaths, old_contigs_fpaths = _correct_contigs( contigs_fpaths, corrected_dirpath, reporting, labels) for contigs_fpath in contigs_fpaths: report = reporting.get(contigs_fpath) report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath)) qconfig.assemblies_num = len(contigs_fpaths) reads_fpaths = [] if reads_fpath_f: reads_fpaths.append(reads_fpath_f) if reads_fpath_r: reads_fpaths.append(reads_fpath_r) if reads_fpaths: bed_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths, reads_fpaths, None, os.path.join(output_dirpath, qconfig.variation_dirname), external_logger=logger) if not contigs_fpaths: logger.error( "None of the assembly files contains correct contigs. " "Please, provide different files or decrease --min-contig threshold.", fake_if_nested_run=True) return 4 qconfig.assemblies_fpaths = contigs_fpaths if qconfig.with_gage: ######################################################################## ### GAGE ######################################################################## if not ref_fpath: logger.warning( "GAGE can't be run without a reference and will be skipped.") else: from libs import gage gage.do(ref_fpath, contigs_fpaths, output_dirpath) # Where all pdfs will be saved all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname) all_pdf_file = None if qconfig.draw_plots or qconfig.html_report: from libs import plotter # Do not remove this line! It would lead to a warning in matplotlib. try: from matplotlib.backends.backend_pdf import PdfPages all_pdf_file = PdfPages(all_pdf_fpath) except: all_pdf_file = None if json_output_dirpath: from libs.html_saver import json_saver if json_saver.simplejson_error: json_output_dirpath = None ######################################################################## ### Stats and plots ######################################################################## from libs import basic_stats basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'), json_output_dirpath, output_dirpath) aligned_contigs_fpaths = [] aligned_lengths_lists = [] contig_alignment_plot_fpath = None if ref_fpath: ######################################################################## ### former PLANTAKOLYA, PLANTAGORA ######################################################################## from libs import contigs_analyzer nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do( ref_fpath, contigs_fpaths, qconfig.prokaryote, os.path.join(output_dirpath, 'contigs_reports'), old_contigs_fpaths, bed_fpath) for contigs_fpath in contigs_fpaths: if nucmer_statuses[ contigs_fpath] == contigs_analyzer.NucmerStatus.OK: aligned_contigs_fpaths.append(contigs_fpath) aligned_lengths_lists.append( aligned_lengths_per_fpath[contigs_fpath]) # Before continue evaluating, check if nucmer didn't skip all of the contigs files. detailed_contigs_reports_dirpath = None if len(aligned_contigs_fpaths) and ref_fpath: detailed_contigs_reports_dirpath = os.path.join( output_dirpath, 'contigs_reports') ######################################################################## ### NAx and NGAx ("aligned Nx and NGx") ######################################################################## from libs import aligned_stats aligned_stats.do(ref_fpath, aligned_contigs_fpaths, output_dirpath, json_output_dirpath, aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats')) ######################################################################## ### GENOME_ANALYZER ######################################################################## from libs import genome_analyzer genome_analyzer.do(ref_fpath, aligned_contigs_fpaths, output_dirpath, json_output_dirpath, genes_fpaths, operons_fpaths, detailed_contigs_reports_dirpath, os.path.join(output_dirpath, 'genome_stats')) if qconfig.gene_finding or qconfig.glimmer: if qconfig.glimmer: ######################################################################## ### Glimmer ######################################################################## from libs import glimmer glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes')) else: ######################################################################## ### GeneMark ######################################################################## from libs import genemark genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'), qconfig.prokaryote, qconfig.meta) else: logger.main_info("") logger.notice( "Genes are not predicted by default. Use --gene-finding option to enable it." ) ######################################################################## reports_fpaths, transposed_reports_fpaths = reporting.save_total( output_dirpath) ######################################################################## ### LARGE DRAWING TASKS ######################################################################## if qconfig.draw_plots: logger.print_timestamp() logger.main_info('Drawing large plots...') logger.main_info( 'This may take a while: press Ctrl-C to skip this step..') try: if detailed_contigs_reports_dirpath and qconfig.show_snps: contig_report_fpath_pattern = os.path.join( detailed_contigs_reports_dirpath, 'contigs_report_%s.stdout') else: contig_report_fpath_pattern = None number_of_steps = sum([ int(bool(value)) for value in [contig_report_fpath_pattern, all_pdf_file] ]) if contig_report_fpath_pattern: ######################################################################## ### VISUALIZE CONTIG ALIGNMENT ######################################################################## logger.main_info( ' 1 of %d: Creating contig alignment plot...' % number_of_steps) from libs import contig_alignment_plotter contig_alignment_plot_fpath = contig_alignment_plotter.do( contigs_fpaths, contig_report_fpath_pattern, output_dirpath, ref_fpath, similar=True) if all_pdf_file: # full report in PDF format: all tables and plots logger.main_info( ' %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps)) plotter.fill_all_pdf_file(all_pdf_file) logger.main_info('Done') except KeyboardInterrupt: logger.main_info('..step skipped!') os.remove(all_pdf_fpath) ######################################################################## ### TOTAL REPORT ######################################################################## logger.print_timestamp() logger.main_info('RESULTS:') logger.main_info(' Text versions of total report are saved to ' + reports_fpaths) logger.main_info( ' Text versions of transposed total report are saved to ' + transposed_reports_fpaths) if json_output_dirpath: json_saver.save_total_report(json_output_dirpath, qconfig.min_contig, ref_fpath) if qconfig.html_report: from libs.html_saver import html_saver html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls) html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath) if os.path.isfile(all_pdf_fpath): logger.main_info(' PDF version (tables and plots) saved to ' + all_pdf_fpath) if contig_alignment_plot_fpath: logger.main_info(' Contig alignment plot: %s' % contig_alignment_plot_fpath) _cleanup(corrected_dirpath) logger.finish_up(check_test=qconfig.test) return 0