/
quast.py
282 lines (238 loc) · 12.4 KB
/
quast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#!/usr/bin/env python
############################################################################
# Copyright (c) 2015-2016 Saint Petersburg State University
# Copyright (c) 2011-2015 Saint Petersburg Academic University
# All Rights Reserved
# See file LICENSE for details.
############################################################################
from os.path import basename
import sys
import os
import shutil
import getopt
import re
from quast_libs import qconfig
from quast_libs.options_parser import parse_options
qconfig.check_python_version()
from quast_libs import qutils, reads_analyzer
from quast_libs.qutils import cleanup
from quast_libs.log import get_logger
logger = get_logger(qconfig.LOGGER_DEFAULT_NAME)
logger.set_up_console_handler()
from site import addsitedir
addsitedir(os.path.join(qconfig.LIBS_LOCATION, 'site_packages'))
is_combined_ref = False
def main(args):
if ' ' in qconfig.QUAST_HOME:
logger.error('QUAST does not support spaces in paths. \n'
'You are trying to run it from ' + str(qconfig.QUAST_HOME) + '\n'
'Please, put QUAST in a different directory, then try again.\n',
to_stderr=True,
exit_with_code=3)
if not args:
qconfig.usage()
sys.exit(0)
reload(qconfig)
quast_path = [os.path.realpath(__file__)]
quast_py_args, contigs_fpaths = parse_options(logger, quast_path + args)
output_dirpath, ref_fpath, labels = qconfig.output_dirpath, qconfig.reference, qconfig.labels
corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
logger.main_info()
logger.print_params()
########################################################################
from quast_libs import reporting
reports = reporting.reports
reload(reporting)
reporting.reports = reports
reporting.assembly_fpaths = []
from quast_libs import plotter # Do not remove this line! It would lead to a warning in matplotlib.
if qconfig.is_combined_ref:
corrected_dirpath = os.path.join(output_dirpath, '..', qconfig.corrected_dirname)
else:
if os.path.isdir(corrected_dirpath):
shutil.rmtree(corrected_dirpath)
os.mkdir(corrected_dirpath)
# PROCESSING REFERENCE
if ref_fpath:
logger.main_info()
logger.main_info('Reference:')
ref_fpath = qutils.correct_reference(ref_fpath, corrected_dirpath)
else:
ref_fpath = ''
# PROCESSING CONTIGS
logger.main_info()
logger.main_info('Contigs:')
contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting)
for contigs_fpath in contigs_fpaths:
report = reporting.get(contigs_fpath)
report.add_field(reporting.Fields.NAME, qutils.label_from_fpath(contigs_fpath))
qconfig.assemblies_num = len(contigs_fpaths)
reads_fpaths = []
cov_fpath = []
physical_cov_fpath = []
if qconfig.forward_reads:
reads_fpaths.append(qconfig.forward_reads)
if qconfig.reverse_reads:
reads_fpaths.append(qconfig.reverse_reads)
if (reads_fpaths or qconfig.sam or qconfig.bam) and ref_fpath:
bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths, reads_fpaths, None,
os.path.join(output_dirpath, qconfig.variation_dirname),
external_logger=logger, sam_fpath=qconfig.sam, bam_fpath=qconfig.bam, bed_fpath=qconfig.bed)
qconfig.bed = bed_fpath
if not contigs_fpaths:
logger.error("None of the assembly files contains correct contigs. "
"Please, provide different files or decrease --min-contig threshold.",
fake_if_nested_run=True)
return 4
if qconfig.used_colors and qconfig.used_ls:
for i, label in enumerate(labels):
plotter.dict_color_and_ls[label] = (qconfig.used_colors[i], qconfig.used_ls[i])
qconfig.assemblies_fpaths = contigs_fpaths
if qconfig.with_gage:
########################################################################
### GAGE
########################################################################
if not ref_fpath:
logger.warning("GAGE can't be run without a reference and will be skipped.")
else:
from quast_libs import gage
gage.do(ref_fpath, contigs_fpaths, output_dirpath)
# Where all pdfs will be saved
all_pdf_fpath = os.path.join(output_dirpath, qconfig.plots_fname)
all_pdf_file = None
if qconfig.draw_plots and plotter.can_draw_plots:
try:
from matplotlib.backends.backend_pdf import PdfPages
all_pdf_file = PdfPages(all_pdf_fpath)
except:
all_pdf_file = None
if qconfig.json_output_dirpath:
from quast_libs.html_saver import json_saver
if json_saver.simplejson_error:
json_output_dirpath = None
########################################################################
### Stats and plots
########################################################################
from quast_libs import basic_stats
basic_stats.do(ref_fpath, contigs_fpaths, os.path.join(output_dirpath, 'basic_stats'),
qconfig.json_output_dirpath, output_dirpath)
aligned_contigs_fpaths = []
aligned_lengths_lists = []
contig_alignment_plot_fpath = None
icarus_html_fpath = None
if ref_fpath:
########################################################################
### former PLANTAKOLYA, PLANTAGORA
########################################################################
from quast_libs import contigs_analyzer
nucmer_statuses, aligned_lengths_per_fpath = contigs_analyzer.do(
ref_fpath, contigs_fpaths, qconfig.prokaryote, os.path.join(output_dirpath, 'contigs_reports'),
old_contigs_fpaths, qconfig.bed)
for contigs_fpath in contigs_fpaths:
if nucmer_statuses[contigs_fpath] == contigs_analyzer.NucmerStatus.OK:
aligned_contigs_fpaths.append(contigs_fpath)
aligned_lengths_lists.append(aligned_lengths_per_fpath[contigs_fpath])
# Before continue evaluating, check if nucmer didn't skip all of the contigs files.
detailed_contigs_reports_dirpath = None
features_containers = None
if len(aligned_contigs_fpaths) and ref_fpath:
detailed_contigs_reports_dirpath = os.path.join(output_dirpath, 'contigs_reports')
########################################################################
### NAx and NGAx ("aligned Nx and NGx")
########################################################################
from quast_libs import aligned_stats
aligned_stats.do(
ref_fpath, aligned_contigs_fpaths, output_dirpath, qconfig.json_output_dirpath,
aligned_lengths_lists, os.path.join(output_dirpath, 'aligned_stats'))
########################################################################
### GENOME_ANALYZER
########################################################################
from quast_libs import genome_analyzer
features_containers = genome_analyzer.do(
ref_fpath, aligned_contigs_fpaths, output_dirpath, qconfig.json_output_dirpath,
qconfig.genes, qconfig.operons, detailed_contigs_reports_dirpath,
os.path.join(output_dirpath, 'genome_stats'))
genes_by_labels = None
if qconfig.gene_finding:
if qconfig.glimmer:
########################################################################
### Glimmer
########################################################################
from quast_libs import glimmer
genes_by_labels = glimmer.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'))
else:
########################################################################
### GeneMark
########################################################################
from quast_libs import genemark
genes_by_labels = genemark.do(contigs_fpaths, qconfig.genes_lengths, os.path.join(output_dirpath, 'predicted_genes'),
qconfig.prokaryote, qconfig.meta)
else:
logger.main_info("")
logger.notice("Genes are not predicted by default. Use --gene-finding option to enable it.")
########################################################################
reports_fpaths, transposed_reports_fpaths = reporting.save_total(output_dirpath)
########################################################################
### LARGE DRAWING TASKS
########################################################################
if qconfig.draw_plots or qconfig.create_icarus_html:
logger.print_timestamp()
logger.main_info('Creating large visual summaries...')
logger.main_info('This may take a while: press Ctrl-C to skip this step..')
try:
if detailed_contigs_reports_dirpath:
report_for_icarus_fpath_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.icarus_report_fname_pattern)
stdout_pattern = os.path.join(detailed_contigs_reports_dirpath, qconfig.contig_report_fname_pattern)
else:
report_for_icarus_fpath_pattern = None
stdout_pattern = None
draw_alignment_plots = qconfig.draw_svg or qconfig.create_icarus_html
number_of_steps = sum([int(bool(value)) for value in [draw_alignment_plots, all_pdf_file]])
if draw_alignment_plots:
########################################################################
### VISUALIZE CONTIG ALIGNMENT
########################################################################
logger.main_info(' 1 of %d: Creating Icarus viewers...' % number_of_steps)
from quast_libs import icarus
icarus_html_fpath, contig_alignment_plot_fpath = icarus.do(
contigs_fpaths, report_for_icarus_fpath_pattern, output_dirpath, ref_fpath,
stdout_pattern=stdout_pattern, features=features_containers, cov_fpath=cov_fpath,
physical_cov_fpath=physical_cov_fpath, json_output_dir=qconfig.json_output_dirpath,
genes_by_labels=genes_by_labels)
if all_pdf_file:
# full report in PDF format: all tables and plots
logger.main_info(' %d of %d: Creating PDF with all tables and plots...' % (number_of_steps, number_of_steps))
plotter.fill_all_pdf_file(all_pdf_file)
logger.main_info('Done')
except KeyboardInterrupt:
logger.main_info('..step skipped!')
os.remove(all_pdf_fpath)
########################################################################
### TOTAL REPORT
########################################################################
logger.print_timestamp()
logger.main_info('RESULTS:')
logger.main_info(' Text versions of total report are saved to ' + reports_fpaths)
logger.main_info(' Text versions of transposed total report are saved to ' + transposed_reports_fpaths)
if qconfig.json_output_dirpath:
json_saver.save_total_report(qconfig.json_output_dirpath, qconfig.min_contig, ref_fpath)
if qconfig.html_report:
from quast_libs.html_saver import html_saver
html_saver.save_colors(output_dirpath, contigs_fpaths, plotter.dict_color_and_ls)
html_saver.save_total_report(output_dirpath, qconfig.min_contig, ref_fpath)
if os.path.isfile(all_pdf_fpath):
logger.main_info(' PDF version (tables and plots) is saved to ' + all_pdf_fpath)
if icarus_html_fpath:
logger.main_info(' Icarus (contig browser) is saved to %s' % icarus_html_fpath)
if qconfig.draw_svg and contig_alignment_plot_fpath:
logger.main_info(' Contig alignment plot is saved to %s' % contig_alignment_plot_fpath)
cleanup(corrected_dirpath)
return logger.finish_up(check_test=qconfig.test)
if __name__ == '__main__':
try:
return_code = main(sys.argv[1:])
exit(return_code)
except Exception:
_, exc_value, _ = sys.exc_info()
logger.exception(exc_value)
logger.error('exception caught!', exit_with_code=1, to_stderr=True)