def _wrapper(path): p = validate_file(path) sx = _get_size_mb(path) if sx > max_size_mb: raise argparse.ArgumentTypeError("Fasta file is too large {s:.2f} MB > {m:.2f} MB. Create a ReferenceSet using {e}, then import using `pbservice import-dataset /path/to/referenceset.xml` ".format(e=Constants.FASTA_TO_REFERENCE, s=sx, m=Constants.MAX_FASTA_FILE_MB)) else: return p
def _validate_analysis_job_json(path): px = validate_file(path) with open(px, 'r') as f: d = json.loads(f.read()) try: load_analysis_job_json(d) return px except (KeyError, TypeError, ValueError) as e: raise argparse.ArgumentTypeError("Invalid analysis.json format for '{p}' {e}".format(p=px, e=repr(e)))
def _wrapper(path): p = validate_file(path) sx = _get_size_mb(path) if sx > max_size_mb: raise argparse.ArgumentTypeError( "Fasta file is too large {s:.2f} MB > {m:.2f} MB. Create a ReferenceSet using {e}, then import using `pbservice import-dataset /path/to/referenceset.xml` " .format(e=Constants.FASTA_TO_REFERENCE, s=sx, m=Constants.MAX_FASTA_FILE_MB)) else: return p
def _validate_report(path): p = validate_file(path) _ = load_report_from_json(path) return p
def _f(path): path = validate_file(path) return _f(path)
def to_report(filtered_csv, output_dir, dpi=DEFAULT_DPI, thumb_dpi=DEFAULT_THUMB_DPI): """ Run Report """ validate_file(filtered_csv) validate_dir(output_dir) aggregators = { 'nbases': SumAggregator('length'), 'nreads': CountAggregator('length'), 'mean_subreadlength': MeanSubreadLengthAggregator('length'), 'max_readlength': MaxAggregator('length'), 'n50': N50Aggregator('length'), 'readlength_histogram': HistogramAggregator('length', 0, 100, nbins=10000), 'subread': SubreadLengthHistogram(dx=100) } passed_filter = lambda record: record.passed_filter is True passed_filter_func = functools.partial(_apply, [passed_filter], aggregators.values()) all_subread_aggregators = { 'raw_nreads': SumAggregator('length'), 'max_raw_readlength': MaxAggregator('length'), 'raw_readlength_histogram': HistogramAggregator('length', 0, 100, nbins=10000) } all_filter_func = functools.partial(_apply, [null_filter], all_subread_aggregators.values()) funcs = [passed_filter_func, all_filter_func] with open(filtered_csv, 'r') as f: # read in header header = f.readline() # validate_header(header) applyer(to_record, f, funcs) for aggregator in itertools.chain(aggregators.values(), all_subread_aggregators.values()): log.info(aggregator) # Check if any reads are found if all_subread_aggregators['raw_nreads'].attribute == 0: raise NoSubreadsFound( "No subreads found in {f}".format(f=filtered_csv)) # Now check if aggregators['nreads'].attribute == 0: msg = "No subreads passed the filter in {f}.".format(f=filtered_csv) raise NoSubreadsPassedFilter(msg) # this is where you change the plotting options plot_view = PlotViewProperties(Constants.P_POST_FILTER, Constants.PG_SUBREAD_LENGTH, custom_subread_length_histogram, Constants.I_FILTER_SUBREADS_HIST, xlabel=get_plot_xlabel( spec, Constants.PG_SUBREAD_LENGTH, Constants.P_POST_FILTER), ylabel="Subreads", rlabel="bp > Subread Length", thumb="filtered_subread_report_thmb.png", use_group_thumb=True, color=get_green(3), edgecolor=get_green(2)) view_config_d = {'post_filter': plot_view} id_aggregators = {'post_filter': aggregators['subread']} plot_groups = to_plot_groups(view_config_d, output_dir, id_aggregators) to_a = lambda n: aggregators[n].attribute attributes = _to_attributes(to_a('nreads'), to_a('nbases'), to_a('mean_subreadlength'), to_a('n50')) report = Report(Constants.R_ID, plotgroups=plot_groups, attributes=attributes) log.debug(str(report)) return spec.apply_view(report)
def validate_file_and_load_manifest(path): rt = RunnableTask.from_manifest_json(validate_file(path)) # if we got here everything is valid return path