def _create_mapping_stats_gmap_report(self): """ Creates and returns a Mapping Stats gmap report """ from pbreports.report.mapping_stats import to_report from test_pbreports_report_mapping_stats import _GMAP_DATA_DIR as datadir log.info('Creating Mapping Stats gmap report using datadir {d}'.format( d=datadir)) _to_p = lambda x: os.path.join(datadir, x) filtered_regions_fofn = _to_p('filtered_regions.fofn') filtered_summary = _to_p('filtered_summary.csv') input_fofn = _to_p('input.fofn') aligned_reads_cmph5 = _to_p('aligned_reads.cmp.h5') mode = "external" report_json = 'mapping_report.json' output_json = os.path.join(self._output_dir, 'mapping_report.json') bas_files = fofn_to_files(input_fofn) region_files = fofn_to_files(filtered_regions_fofn) report = to_report(bas_files, region_files, aligned_reads_cmph5, report_json, self._output_dir, filter_summary_csv=filtered_summary, mode=mode) pformat(report.to_dict()) report.write_json(output_json) return self._deserialize_report(output_json)
def main(argv=sys.argv): """Main point of entry""" p = get_parser() # the first arg with be the exe name args = p.parse_args(argv[1:]) fofn = args.fofn cmp_h5 = args.cmp_h5 movie_files = fofn_to_files(fofn) output_csv = args.output_csv external_mode = args.external if args.debug: setup_log(log, level=logging.DEBUG) else: log.addHandler(logging.NullHandler()) log.debug(args) started_at = time.time() try: rcode = run(cmp_h5, movie_files, output_csv, external_mode=external_mode) except Exception as e: rcode = -1 log.error(e, exc_info=True) sys.stderr.write(str(e) + "\n") run_time = time.time() - started_at _d = dict(f=os.path.basename(__file__), x=rcode, s=run_time, v=__version__) log.info( "completed {f} v{v} with exit code {x} in {s:.2f} sec.".format(**_d)) return rcode
def fofn_to_chunks(fofn): files = fofn_to_files(fofn) chunks = [] for i, f in enumerate(files): chunk_id = "chunk-{i}".format(i=i) _d = {Constants.CHUNK_KEY_FOFN: f} p = PipelineChunk(chunk_id, **_d) chunks.append(p) return chunks
def _args_chunk_fofn(args): fofn_files = fofn_to_files(args.input_fofn) log.info("read in fofn with {n} files.".format(n=len(fofn_files))) chunks = CU.write_grouped_fofn_chunks(fofn_files, args.max_total_chunks, args.output_dir, args.chunk_report_json) log.debug("Converted {x} Fofn into {n} chunks. Write chunks to {f}".format( n=len(chunks), f=args.chunk_report_json, x=len(fofn_files))) return 0
def _create_reads_of_insert_report(self): """ Creates and returns a Reads of Insert report. """ from pbreports.report.reads_of_insert import to_report from test_pbreports_report_reads_of_insert import _DATA_DIR as datadir log.info('Creating reads_of_insert report using datadir {d}'.format( d=datadir)) bas_files = fofn_to_files(os.path.join(datadir, 'reads_of_insert.fofn')) output_json = os.path.join(self._output_dir, 'reads_of_insert.json') report = to_report(bas_files, self._output_dir) pformat(report.to_dict()) report.write_json(output_json) return self._deserialize_report(output_json)
def _create_overview_report(self): """ Creates and returns an Overview report """ from pbreports.report.overview import run from test_pbreports_report_overview import _DATA_DIR as datadir log.info( 'Creating Overview report using datadir {d}'.format(d=datadir)) input_name = 'input.fofn' input_fofn = os.path.join(datadir, input_name) bas_files = fofn_to_files(input_fofn) report_json = 'overview.json' output_json = os.path.join(self._output_dir, report_json) report = run(bas_files) pformat(report.to_dict()) report.write_json(output_json) return self._deserialize_report(output_json)
def gather_fofn(input_files, output_file, skip_empty=True): """ This should be better spec'ed and impose a tighter constraint on the FOFN :param input_files: List of file paths :param output_file: File Path :param skip_empty: Ignore empty files :return: Output file :rtype: str """ all_files = [] for input_file in input_files: file_names = fofn_to_files(input_file) all_files.extend(file_names) with open(output_file, 'w') as f: f.write("\n".join(all_files)) return output_file
def nchunk_fofn(input_file, max_chunks): input_files = fofn_to_files(input_file) nchunks = min(len(input_files), max_chunks) return nchunks
def _fofn_to_metadata(path): files = fofn_to_files(path) return DatasetMetadata(len(files), len(files))
def _args_chunk_fofn(args): fofn_files = fofn_to_files(args.input_fofn) log.info("read in fofn with {n} files.".format(n=len(fofn_files))) chunks = CU.write_grouped_fofn_chunks(fofn_files, args.max_total_chunks, args.output_dir, args.chunk_report_json) log.debug("Converted {x} Fofn into {n} chunks. Write chunks to {f}".format(n=len(chunks), f=args.chunk_report_json, x=len(fofn_files))) return 0