def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir): """ Entry to report. :param aligned_reads_file: (str) path to aligned_reads.xml :param mapping_stats_report: (str) path to mapping stats json report :param variants_report: (str) path to variants report """ _validate_inputs([('aligned_reads_file', aligned_reads_file), ('mapping_stats_report', mapping_stats_report), ('variants_report', variants_report)]) d_map = _get_mapping_stats_data(mapping_stats_report) reads, inst = _get_reads_info(aligned_reads_file) d_bam = _get_read_hole_data(reads, inst) d_var = _get_variants_data(variants_report) ds = AlignmentSet(aligned_reads_file) rpt = Report(meta_rpt.id, dataset_uuids=(ds.uuid, )) rpt.add_attribute( Attribute(Constants.A_INSTRUMENT, d_bam[Constants.A_INSTRUMENT])) rpt.add_attribute( Attribute(Constants.A_COVERAGE, d_var[Constants.A_COVERAGE])) rpt.add_attribute( Attribute(Constants.A_CONCORDANCE, d_var[Constants.A_CONCORDANCE])) rpt.add_attribute( Attribute(Constants.A_READLENGTH, d_map[Constants.A_READLENGTH])) rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS])) rpt = meta_rpt.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report))
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ output_dir = os.path.dirname(output_json) host = socket.getfqdn() attributes = _dataset_to_attribute_reports(reference_ds) _add = attributes.append _add(Attribute("host", host, name="Host")) _add(Attribute("task_dir", output_dir, name="Task Directory")) fasta_file = reference_ds.toExternalFiles()[0] plot_groups = try_fasta_to_plot_group(fasta_file, output_dir) report = Report("dev_diagnostic_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def make_control_report(control_cmph5, filtered_subreads_csv, report, output_dir, dpi, dumpdata): """ Entry to report. :param control_cmph5: (str) path to control_reads.cmp.h5 :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv """ _validate_inputs(control_cmph5, filtered_subreads_csv) name, control_reads = _get_control_reads(control_cmph5) filtered_reads = _get_filtered_reads(filtered_subreads_csv) control_data, sample_data = _process_reads(control_reads, filtered_reads) nr = _get_num_control_reads(control_data) if nr == 0: # Not sure this ever happens, but logic exists in makeControlReport.py r = _get_error_report() r.write_json(os.path.join(output_dir, report)) return atts = _get_attributes(name, control_data, sample_data) pgs = [ _get_plot_group_score(control_data, sample_data, output_dir), _get_plot_group_length(control_data, sample_data, output_dir) ] r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs) r = meta_rpt.apply_view(r) r.write_json(os.path.join(output_dir, report))
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir): """ Entry to report. :param aligned_reads_file: (str) path to aligned_reads.xml :param mapping_stats_report: (str) path to mapping stats json report :param variants_report: (str) path to variants report """ _validate_inputs([('aligned_reads_file', aligned_reads_file), ('mapping_stats_report', mapping_stats_report), ('variants_report', variants_report)]) d_map = _get_mapping_stats_data(mapping_stats_report) reads, inst = _get_reads_info(aligned_reads_file) d_bam = _get_read_hole_data(reads, inst) d_var = _get_variants_data(variants_report) rpt = Report('sat') rpt.add_attribute(Attribute('instrument', d_bam['instrument'], Constants.ATTR_LABELS["instrument"])) rpt.add_attribute(Attribute('coverage', d_var['coverage'], Constants.ATTR_LABELS["coverage"])) rpt.add_attribute(Attribute('accuracy', d_var['accuracy'], Constants.ATTR_LABELS["accuracy"])) rpt.add_attribute(Attribute('mapped_readlength_mean', d_map[ 'mapped_readlength_mean'], Constants.ATTR_LABELS["mapped_readlength_mean"])) rpt.add_attribute(Attribute('reads_in_cell', d_bam[ 'reads_in_cell'], Constants.ATTR_LABELS["reads_in_cell"])) rpt.write_json(os.path.join(output_dir, report))
def make_control_report(control_cmph5, filtered_subreads_csv, report, output_dir, dpi, dumpdata): """ Entry to report. :param control_cmph5: (str) path to control_reads.cmp.h5 :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv """ _validate_inputs(control_cmph5, filtered_subreads_csv) name, control_reads = _get_control_reads(control_cmph5) filtered_reads = _get_filtered_reads(filtered_subreads_csv) control_data, sample_data = _process_reads(control_reads, filtered_reads) nr = _get_num_control_reads(control_data) if nr == 0: # Not sure this ever happens, but logic exists in makeControlReport.py r = _get_error_report() r.write_json(os.path.join(output_dir, report)) return atts = _get_attributes(name, control_data, sample_data) pgs = [_get_plot_group_score(control_data, sample_data, output_dir), _get_plot_group_length(control_data, sample_data, output_dir)] r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs) r = meta_rpt.apply_view(r) r.write_json(os.path.join(output_dir, report))
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report, output_dir, is_minor_variants_rpt=False): """ Entry to report. :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz :param reference: (str) path to reference dir :param how_many: (int) :param batch_sort_size: (int) :param report: (str) report name :param batch_sort_size: (str) output dir :param is_minor_variants_rpt: (bool) True to create a minor top variant report. False to create a variant report. """ _validate_inputs(gff, reference, how_many, batch_sort_size) table_builder = None if is_minor_variants_rpt: table_builder = MinorVariantTableBuilder() else: table_builder = VariantTableBuilder() vf = VariantFinder(gff, reference, how_many, batch_sort_size) top = vf.find_top() for v in top: table_builder.add_variant(v) r = Report(Constants.R_ID, tables=[table_builder.table], dataset_uuids=(ReferenceSet(reference).uuid,)) r = spec.apply_view(r) r.write_json(os.path.join(output_dir, report)) return 0
def make_variants_report(aln_summ_gff, variants_gff, reference, max_contigs_to_plot, report, output_dir, dpi=72, dumpdata=True): """ Entry to report. :param aln_summ_gff: (str) path to alignment_summary.gff :param variants_gff: (str) path to variants_gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs([('aln_summ_gff', aln_summ_gff), ('variants_gff', variants_gff), ('reference', reference)]) # reference entry & top contings ref = openReference(reference) top_contigs = get_top_contigs_from_ref_entry(ref, max_contigs_to_plot) # extract gff data from files ref_data, contig_variants = _extract_alignment_summ_data( aln_summ_gff, top_contigs) _append_variants_gff_data(ref_data, variants_gff) # make report objects table, atts = _get_consensus_table_and_attributes(ref_data, ref) plotgroup = _create_variants_plot_grp( top_contigs, contig_variants, output_dir) rpt = Report(Constants.R_ID, plotgroups=[plotgroup], attributes=atts, tables=[table], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return rpt
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir): """ Entry to report. :param aligned_reads_file: (str) path to aligned_reads.xml :param mapping_stats_report: (str) path to mapping stats json report :param variants_report: (str) path to variants report """ _validate_inputs([('aligned_reads_file', aligned_reads_file), ('mapping_stats_report', mapping_stats_report), ('variants_report', variants_report)]) d_map = _get_mapping_stats_data(mapping_stats_report) reads, inst = _get_reads_info(aligned_reads_file) d_bam = _get_read_hole_data(reads, inst) d_var = _get_variants_data(variants_report) ds = AlignmentSet(aligned_reads_file) rpt = Report(Constants.R_ID, dataset_uuids=(ds.uuid,)) rpt.add_attribute(Attribute(Constants.A_INSTRUMENT, d_bam[Constants.A_INSTRUMENT])) rpt.add_attribute(Attribute(Constants.A_COVERAGE, d_var[Constants.A_COVERAGE])) rpt.add_attribute(Attribute(Constants.A_CONCORDANCE, d_var[Constants.A_CONCORDANCE])) rpt.add_attribute(Attribute(Constants.A_READLENGTH, d_map[Constants.A_READLENGTH])) rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS])) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report))
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report, output_dir): """ Entry to report. :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz :param reference: (str) path to reference dir :param how_many: (int) :param batch_sort_size: (int) :param report: (str) report name :param batch_sort_size: (str) output dir """ _validate_inputs(gff, reference, how_many, batch_sort_size) table_builder = VariantTableBuilder() vf = VariantFinder(gff, reference, how_many, batch_sort_size) top = vf.find_top() for v in top: table_builder.add_variant(v) r = Report(Constants.R_ID, tables=[table_builder.table], dataset_uuids=(ReferenceSet(reference).uuid, )) r = spec.apply_view(r) r.write_json(os.path.join(output_dir, report)) return 0
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ output_dir = os.path.dirname(output_json) host = socket.getfqdn() attributes = _dataset_to_attribute_reports(reference_ds) _add = attributes.append _add(Attribute("host", host, name="Host")) _add(Attribute("task_dir", output_dir, name="Task Directory")) fasta_file = reference_ds.toExternalFiles()[0] plot_groups = try_fasta_to_plot_group(fasta_file, output_json) report = Report("dev_diagnostic_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def _example_main(input_file, output_file, **kwargs): """ This func should be imported from your python package. This should have *no* dependency on the pbcommand IO, such as the RTC/TC models. """ # This is just for test purposes log.info("Running example main with {i} {o} kw:{k}".format(i=input_file, o=output_file, k=kwargs)) # Open dset CSV. Store absolute path of each alignment set. dset_paths = _get_dset_paths(input_file[0]) # Open plots CSV. Store names of plots to produce. plots_to_generate = _get_plots_to_generate(input_file[1]) dsets_kpis = {} for f in dset_paths: dset = openDataSet(dset_paths[f]['aset']) subsampled_dset = _subsample_alignments(dset) dsets_kpis[f] = _getKPIs(dset, subsampled_dset) figures = [] # figure tuple has form (plot_group_id, plot_id, figure) if 'accuracy_vs_readlength' in plots_to_generate: figures.append(('accuracy', 'accuracy_vs_readlength', accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis))) if 'accuracy' in plots_to_generate: figures.append(('accuracy', 'accuracy', accuracy_plots._plot_accuracy_distribution(dsets_kpis))) if 'accuracy_boxplot' in plots_to_generate: figures.append(('accuracy', 'accuracy_boxplot', accuracy_plots._plot_accuracy_boxplots(dsets_kpis))) all_plots = {} # dictionary of plots. keys are groups for plot_group, plot_id, fig in figures: if plot_group not in all_plots.keys(): all_plots[plot_group] = [] plot(fig, filename='{i}.html'.format(i=plot_id), show_link=False, auto_open=False) phantomjs_driver.set_window_size(1920, 1080) phantomjs_driver.get('{i}.html'.format(i=plot_id)) phantomjs_driver.save_screenshot('{i}.png'.format(i=plot_id)) phantomjs_driver.get('{i}.html'.format(i=plot_id)) phantomjs_driver.save_screenshot('{i}_thumb.png'.format(i=plot_id)) os.remove('{i}.html'.format(i=plot_id)) plot_path = '{i}.png'.format(i=plot_id) thumb_path = '{i}_thumb.png'.format(i=plot_id) all_plots[plot_group].append(Plot(plot_id, plot_path, thumbnail=thumb_path)) plot_groups = [] for plot_group_title in all_plots.keys(): plot_group = PlotGroup( plot_group_title, plots=all_plots[plot_group_title]) plot_groups.append(plot_group) report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=()) report.write_json( output_file ) phantomjs_driver.quit() return 0
def write_random_report(path, nrecords): attributes = [ Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i)) for i in xrange(nrecords) ] r = Report("mock_report", attributes=attributes) r.write_json(path) return r
def write_task_report(run_time, nproc, exit_code): attributes = [ Attribute("host", value=os.uname()[1]), Attribute("system", value=os.uname()[0]), Attribute("nproc", value=nproc), Attribute("run_time", value=run_time), Attribute("exit_code", value=exit_code) ] report = Report("workflow_task", title="Workflow Task Report", attributes=attributes, tags=("internal", )) report.write_json("task-report.json")
def run_dev_ccs_report(rtc): from pbcore.io import ConsensusReadSet with ConsensusReadSet(rtc.task.input_files[0]) as ds: ds.updateCounts() attr = [ Attribute("number_of_records", value=ds.numRecords), Attribute("total_length", value=ds.totalLength) ] report = Report("ccs_report", title="ConsensusReadSet XML Report", attributes=attr) report.write_json(rtc.task.output_files[0]) return 0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72): """ Entry point to report generation. """ basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(meta_rpt.id, plotgroups=[pg]) rpt = meta_rpt.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72): """ Entry point to report generation. """ basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, title=get_plotgroup_title(spec, Constants.PG_KIN), thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(spec.id, plotgroups=[pg]) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def make_report(self, gff, reference, max_contigs_to_plot, report, output_dir): """ Entry to report. :param gff: (str) path to alignment_summary.gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs(gff, reference) top_contigs = get_top_contigs(reference, max_contigs_to_plot) cov_map = _get_contigs_to_plot(gff, top_contigs) # stats may be None stats = _get_reference_coverage_stats(cov_map.values()) a1 = _get_att_mean_coverage(stats) a2 = _get_att_percent_missing(stats) plot_grp_coverage = self._create_coverage_plot_grp( top_contigs, cov_map, output_dir) plot_grp_histogram = None if stats is not None: plot_grp_histogram = self._create_coverage_histo_plot_grp( stats, output_dir) plotgroups = [] # Don't add the Plot Group if no plots are added if plot_grp_coverage.plots: plotgroups.append(plot_grp_coverage) if plot_grp_histogram is not None: # Don't add the Plot Group if no plots are added if plot_grp_histogram.plots: plotgroups.append(plot_grp_histogram) rpt = Report(self.spec.id, plotgroups=plotgroups, attributes=[a1, a2], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt = self.spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return rpt
def make_coverage_report(gff, reference, max_contigs_to_plot, report, output_dir): """ Entry to report. :param gff: (str) path to alignment_summary.gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs(gff, reference) top_contigs = get_top_contigs(reference, max_contigs_to_plot) cov_map = _get_contigs_to_plot(gff, top_contigs) # stats may be None stats = _get_reference_coverage_stats(cov_map.values()) a1 = _get_att_mean_coverage(stats) a2 = _get_att_percent_missing(stats) plot_grp_coverage = _create_coverage_plot_grp( top_contigs, cov_map, output_dir) plot_grp_histogram = None if stats is not None: plot_grp_histogram = _create_coverage_histo_plot_grp(stats, output_dir) plotgroups = [] # Don't add the Plot Group if no plots are added if plot_grp_coverage.plots: plotgroups.append(plot_grp_coverage) if plot_grp_histogram is not None: # Don't add the Plot Group if no plots are added if plot_grp_histogram.plots: plotgroups.append(plot_grp_histogram) rpt = Report('coverage', title="Coverage", plotgroups=plotgroups, attributes=[a1, a2], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt.write_json(os.path.join(output_dir, report)) return rpt
def make_polished_assembly_report(report, gff, fastq, output_dir): """ Entry to report. :param gff: (str) path to alignment_summary.gff :param fastq: (str) path to polished fastq file :param report: (str) report name create a polished assembly report. """ log.info("Starting version {f} v{x}".format( x=__version__, f=os.path.basename(__file__))) log.debug("Loading {f}".format(f=fastq)) contigs = _get_contigs(fastq) log.debug("Loading {f}".format(f=gff)) _get_contig_coverage(gff, contigs) log.debug("Computing and creating plots") cvqp = _coverage_vs_quality_plot(contigs, output_dir) pgrp = PlotGroup('coverage_based', title='Contig Coverage vs Confidence', thumbnail=cvqp.thumbnail, plots=[cvqp]) rep = Report('polished_assembly') rep.add_attribute( Attribute(Constants.A_N_CONTIGS, len(contigs), Constants.ATTR_LABELS[Constants.A_N_CONTIGS])) read_lengths = [c.length for c in contigs.values()] read_lengths.sort() rep.add_attribute(_get_att_max_contig_length(read_lengths)) rep.add_attribute(_get_att_n_50_contig_length(read_lengths)) rep.add_attribute(_get_att_sum_contig_lengths(read_lengths)) rep.add_plotgroup(pgrp) rep.write_json(os.path.join(output_dir, report)) _write_coverage_vs_quality_csv(contigs, output_dir) return 0
def make_polished_assembly_report(report, gff, fastq, output_dir): """ Entry to report. :param gff: (str) path to alignment_summary.gff :param fastq: (str) path to polished fastq file :param report: (str) report name create a polished assembly report. """ log.info("Starting version {f} v{x}".format(x=__version__, f=os.path.basename(__file__))) log.debug("Loading {f}".format(f=fastq)) contigs = _get_contigs(fastq) log.debug("Loading {f}".format(f=gff)) _get_contig_coverage(gff, contigs) log.debug("Computing and creating plots") cvqp = _coverage_vs_quality_plot(contigs, output_dir) pgrp = PlotGroup(Constants.PG_COVERAGE, thumbnail=cvqp.thumbnail, plots=[cvqp]) rep = Report(Constants.R_ID) rep.add_attribute(Attribute(Constants.A_N_CONTIGS, len(contigs))) read_lengths = [c.length for c in contigs.values()] read_lengths.sort() rep.add_attribute(_get_att_max_contig_length(read_lengths)) rep.add_attribute(_get_att_n_50_contig_length(read_lengths)) rep.add_attribute(_get_att_sum_contig_lengths(read_lengths)) rep.add_attribute(_get_att_esize_contig_length(read_lengths)) rep.add_plotgroup(pgrp) rep = spec.apply_view(rep) rep.write_json(os.path.join(output_dir, report)) _write_coverage_vs_quality_csv(contigs, output_dir) return 0
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ attributes = _dataset_to_attribute_reports(reference_ds) fasta_file = reference_ds.toExternalFiles()[0] output_dir = os.path.dirname(output_json) plot_groups = try_fasta_to_plot_group(fasta_file, output_dir) report = Report("ds_reference_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def make_modifications_report(modifications_h5, report, output_dir, dpi=DEFAULT_DPI): """ Entry point to report generation. """ try: import h5py except ImportError: raise ImportError("This module requires that h5py be installed") basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, title=get_plotgroup_title(spec, Constants.PG_KIN), thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(spec.id, plotgroups=[pg]) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def run_verify_dataset_filters(rtc): from pbcore.io import SubreadSet expected_num_records = rtc.task.options[ 'pbsmrtpipe.task_options.num_records'] expected_length = rtc.task.options['pbsmrtpipe.task_options.total_length'] with SubreadSet(rtc.task.input_files[0]) as ds: # FIXME ideally we should not need to do this, but Scala code does not # have the ability to update counts directly ds.updateCounts() if ds.numRecords != expected_num_records: raise ValueError("Expected {e} records, got {n}".format( e=expected_num_records, n=ds.numRecords)) if ds.totalLength != expected_length: raise ValueError("Expected length {e}, got {n}".format( e=expected_length, n=ds.totalLength)) attr = [ Attribute("number_of_records", value=ds.numRecords), Attribute("total_length", value=ds.totalLength) ] report = Report("subreads_report", title="SubreadSet XML Report", attributes=attr) report.write_json(rtc.task.output_files[0]) return 0
def run_hello_world(conditions, output_report): a = Attribute("hello", value='world') report = Report('pbinternal_hello_world', attributes=[a]) report.write_json(output_report) return 0
def write_random_report(path, nrecords): attributes = [Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i)) for i in xrange(nrecords)] r = Report("mock_report", attributes=attributes) r.write_json(path) return r
def _example_main(input_file, output_file, **kwargs): """ This func should be imported from your python package. This should have *no* dependency on the pbcommand IO, such as the RTC/TC models. """ # This is just for test purposes log.info("Running example main with {i} {o} kw:{k}".format(i=input_file, o=output_file, k=kwargs)) # Open dset CSV. Store absolute path of each alignment set. dset_paths = _get_dset_paths(input_file[0]) # Open plots CSV. Store names of plots to produce. # plots_to_generate = _get_plots_to_generate(input_file[1]) dsets_kpis = {} for f in dset_paths: dset = openDataSet(dset_paths[f]['aset']) subsampled_dset = _subsample_alignments(dset) dsets_kpis[f] = _getKPIs(dset, subsampled_dset) # figure tuple has form (plot_group_id, plot_id, Plot Name, figure) figures = [ ('accuracy', 'accuracy_vs_readlength', "Accuracy vs ReadLength", accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis)), ('accuracy', 'accuracy', "Accuracy", accuracy_plots._plot_accuracy_distribution(dsets_kpis)), ('accuracy', 'accuracy_boxplot', "Accuracy BoxPlot", accuracy_plots._plot_accuracy_boxplots(dsets_kpis)) ] all_plots = {} # dictionary of plots. keys are groups with PhantomDriver() as driver: for plot_group, plot_id, display_name, fig in figures: if plot_group not in all_plots.keys(): all_plots[plot_group] = [] plot(fig, filename='{i}.html'.format(i=plot_id), show_link=False, auto_open=False) plot_name = '{i}.png'.format(i=plot_id) driver.set_window_size(1920, 1080) driver.get('{i}.html'.format(i=plot_id)) driver.save_screenshot(plot_name) driver.get('{i}.html'.format(i=plot_id)) driver.save_screenshot('{i}_thumb.png'.format(i=plot_id)) log.info("Saved screen to {}".format(plot_name)) os.remove('{i}.html'.format(i=plot_id)) plot_path = '{i}.png'.format(i=plot_id) thumb_path = '{i}_thumb.png'.format(i=plot_id) # the Plot API is a bit awkward. It should have "title" # the caption will be used as the display name p = Plot(plot_id, plot_path, thumbnail=thumb_path, caption=display_name) all_plots[plot_group].append(p) log.info("completed generating {} plots".format(len(all_plots))) plot_groups = [] for plot_group_title in all_plots.keys(): plot_group = PlotGroup(plot_group_title, title="Accuracy Plots", plots=all_plots[plot_group_title]) plot_groups.append(plot_group) report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=()) report.write_json(output_file) return 0
def _example_main(input_file, output_file, **kwargs): """ This func should be imported from your python package. This should have *no* dependency on the pbcommand IO, such as the RTC/TC models. """ # This is just for test purposes log.info("Running example main with {i} {o} kw:{k}".format(i=input_file, o=output_file, k=kwargs)) # Open dset CSV. Store absolute path of each alignment set. dset_paths = _get_dset_paths(input_file[0]) # Open plots CSV. Store names of plots to produce. plots_to_generate = _get_plots_to_generate(input_file[1]) dsets_kpis = {} for f in dset_paths: dset = openDataSet(dset_paths[f]['aset']) subsampled_dset = _subsample_alignments(dset) dsets_kpis[f] = _getKPIs(dset, subsampled_dset) figures = [] # figure tuple has form (plot_group_id, plot_id, figure) if 'accuracy_vs_readlength' in plots_to_generate: figures.append( ('accuracy', 'accuracy_vs_readlength', accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis))) if 'accuracy' in plots_to_generate: figures.append( ('accuracy', 'accuracy', accuracy_plots._plot_accuracy_distribution(dsets_kpis))) if 'accuracy_boxplot' in plots_to_generate: figures.append(('accuracy', 'accuracy_boxplot', accuracy_plots._plot_accuracy_boxplots(dsets_kpis))) all_plots = {} # dictionary of plots. keys are groups for plot_group, plot_id, fig in figures: if plot_group not in all_plots.keys(): all_plots[plot_group] = [] plot(fig, filename='{i}.html'.format(i=plot_id), show_link=False, auto_open=False) phantomjs_driver.set_window_size(1920, 1080) phantomjs_driver.get('{i}.html'.format(i=plot_id)) phantomjs_driver.save_screenshot('{i}.png'.format(i=plot_id)) phantomjs_driver.get('{i}.html'.format(i=plot_id)) phantomjs_driver.save_screenshot('{i}_thumb.png'.format(i=plot_id)) os.remove('{i}.html'.format(i=plot_id)) plot_path = '{i}.png'.format(i=plot_id) thumb_path = '{i}_thumb.png'.format(i=plot_id) all_plots[plot_group].append( Plot(plot_id, plot_path, thumbnail=thumb_path)) plot_groups = [] for plot_group_title in all_plots.keys(): plot_group = PlotGroup(plot_group_title, plots=all_plots[plot_group_title]) plot_groups.append(plot_group) report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=()) report.write_json(output_file) phantomjs_driver.quit() return 0
def _example_main(input_file, output_file, **kwargs): """ This func should be imported from your python package. This should have *no* dependency on the pbcommand IO, such as the RTC/TC models. """ # This is just for test purposes log.info("Running example main with {i} {o} kw:{k}".format(i=input_file, o=output_file, k=kwargs)) # Open input CSV. Store absolute path of each alignment set. dset_paths = _get_dset_paths(input_file) dsets_kpis = {} for f in dset_paths: dset = openDataSet(f) subsampled_dset = _subsample_alignments(dset) dsets_kpis[f] = _getKPIs(dset, subsampled_dset) # save a simple plot traces = [] titles = [] max_rl = 0 for key in dsets_kpis.keys(): rl = dsets_kpis[key]['readlength'] acc = dsets_kpis[key]['accuracy'] if max(rl) > max_rl: max_rl = max(rl) trace = Scatter(x=rl, y=acc, mode='markers') traces.append(trace) titles.append(str(key)) rows = len(traces) fig = plotly.tools.make_subplots(rows=rows, cols=1, subplot_titles=tuple(titles)) fig['layout']['font']['size'] = 8 fig['layout'].update(showlegend=False) for row, trace in enumerate(traces): fig.append_trace(trace, row + 1, 1) # convert from zero-based to one-based indexing fig['layout']['xaxis' + str(row + 1)]['tickfont'].update(size=20) fig['layout']['yaxis' + str(row + 1)]['tickfont'].update(size=20) fig['layout']['xaxis' + str(row + 1)].update(range=[0, max_rl]) fig['layout']['yaxis' + str(rows / 2 + 1)].update(title='accuracy') fig['layout']['yaxis' + str(rows / 2 + 1)]['titlefont'].update(size=20) fig['layout']['xaxis' + str(rows)].update(title='readlength (bases)') fig['layout']['xaxis' + str(rows)]['titlefont'].update(size=20) plot(fig, filename='accuracy_vs_readlength.html', show_link=False, auto_open=False) phantomjs_driver.set_window_size(1920, 1080) phantomjs_driver.get('accuracy_vs_readlength.html') phantomjs_driver.save_screenshot('accuracy_vs_readlength.png') #phantomjs_driver.set_window_size(400, 300) ruins the label size relations etc. phantomjs_driver.get('accuracy_vs_readlength.html') phantomjs_driver.save_screenshot('accuracy_vs_readlength_thumb.png') os.remove('accuracy_vs_readlength.html') plot_path = 'accuracy_vs_readlength.png' thumb_path = 'accuracy_vs_readlength_thumb.png' plot_id = 'accuracy_vs_readlength' acc_rl_plot = Plot(plot_id, plot_path, thumbnail=thumb_path) plot_list = [acc_rl_plot] plot_group = PlotGroup('accuracy', plots=plot_list) report = Report('accuracy', tables=(), plotgroups=[plot_group], attributes=()) report.write_json(output_file) return 0
def _example_main(input_file, output_file, **kwargs): """ This func should be imported from your python package. This should have *no* dependency on the pbcommand IO, such as the RTC/TC models. """ # This is just for test purposes log.info("Running example main with {i} {o} kw:{k}".format(i=input_file, o=output_file, k=kwargs)) # Open input CSV. Store absolute path of each alignment set. dset_paths = _get_dset_paths(input_file) dsets_kpis = {} for f in dset_paths: dset = openDataSet(f) subsampled_dset = _subsample_alignments(dset) dsets_kpis[f] = _getKPIs(dset, subsampled_dset) # save a simple plot traces = []; titles = []; max_rl = 0 for key in dsets_kpis.keys(): rl = dsets_kpis[key]['readlength'] acc = dsets_kpis[key]['accuracy'] if max(rl) > max_rl: max_rl = max(rl) trace = Scatter( x = rl, y = acc, mode='markers' ) traces.append( trace ) titles.append( str(key) ) rows = len( traces ) fig = plotly.tools.make_subplots(rows=rows, cols=1, subplot_titles=tuple(titles)) fig['layout']['font']['size'] = 8 fig['layout'].update(showlegend=False) for row,trace in enumerate(traces): fig.append_trace(trace, row+1, 1) # convert from zero-based to one-based indexing fig['layout']['xaxis'+str(row+1)]['tickfont'].update(size=20) fig['layout']['yaxis'+str(row+1)]['tickfont'].update(size=20) fig['layout']['xaxis'+str(row+1)].update(range=[0,max_rl]) fig['layout']['yaxis'+str(rows/2+1)].update(title='accuracy') fig['layout']['yaxis'+str(rows/2+1)]['titlefont'].update(size=20) fig['layout']['xaxis'+str(rows)].update(title='readlength (bases)') fig['layout']['xaxis'+str(rows)]['titlefont'].update(size=20) plot(fig, filename='accuracy_vs_readlength.html', show_link=False, auto_open=False) phantomjs_driver.set_window_size(1920, 1080) phantomjs_driver.get('accuracy_vs_readlength.html') phantomjs_driver.save_screenshot('accuracy_vs_readlength.png') #phantomjs_driver.set_window_size(400, 300) ruins the label size relations etc. phantomjs_driver.get('accuracy_vs_readlength.html') phantomjs_driver.save_screenshot('accuracy_vs_readlength_thumb.png') os.remove('accuracy_vs_readlength.html') plot_path = 'accuracy_vs_readlength.png' thumb_path = 'accuracy_vs_readlength_thumb.png' plot_id = 'accuracy_vs_readlength' acc_rl_plot = Plot(plot_id, plot_path, thumbnail=thumb_path) plot_list = [acc_rl_plot] plot_group = PlotGroup('accuracy', plots=plot_list) report = Report('accuracy', tables=(), plotgroups=[plot_group], attributes=()) report.write_json( output_file ) return 0