def test_summary(self): out_file = StringIO() records = [dict(tile=2, cycle=1, error_rate=0.25), dict(tile=2, cycle=2, error_rate=0.75)] expected_summary = dict(error_rate_fwd=0.5) summary = {} write_phix_csv(out_file, records, summary=summary) self.assertEqual(expected_summary, summary)
def test_summary(self): out_file = StringIO() records = [ dict(tile=2, cycle=1, error_rate=0.25), dict(tile=2, cycle=2, error_rate=0.75) ] expected_summary = dict(error_rate_fwd=0.5) summary = {} write_phix_csv(out_file, records, summary=summary) self.assertEqual(expected_summary, summary)
def test_summary_reverse(self): out_file = StringIO() records = [dict(tile=2, cycle=1, error_rate=0.75), dict(tile=2, cycle=4, error_rate=0.375), dict(tile=2, cycle=5, error_rate=0.125)] read_lengths = [3, 0, 0, 3] expected_summary = dict(error_rate_fwd=0.75, error_rate_rev=0.25) summary = {} write_phix_csv(out_file, records, read_lengths, summary=summary) self.assertEqual(expected_summary, summary)
def test_write_sorted(self): out_file = StringIO() records = [dict(tile=2, cycle=2, error_rate=0.4), dict(tile=2, cycle=1, error_rate=0.5)] expected_csv = """\ tile,cycle,errorrate 2,1,0.5 2,2,0.4 """ write_phix_csv(out_file, records) self.assertEqual(expected_csv, out_file.getvalue())
def test_summary_reverse(self): out_file = StringIO() records = [ dict(tile=2, cycle=1, error_rate=0.75), dict(tile=2, cycle=4, error_rate=0.375), dict(tile=2, cycle=5, error_rate=0.125) ] read_lengths = [3, 0, 0, 3] expected_summary = dict(error_rate_fwd=0.75, error_rate_rev=0.25) summary = {} write_phix_csv(out_file, records, read_lengths, summary=summary) self.assertEqual(expected_summary, summary)
def test_write_missing(self): out_file = StringIO() records = [dict(tile=2, cycle=1, error_rate=0.1), dict(tile=2, cycle=4, error_rate=0.4)] expected_csv = """\ tile,cycle,errorrate 2,1,0.1 2,2 2,3 2,4,0.4 """ write_phix_csv(out_file, records) self.assertEqual(expected_csv, out_file.getvalue())
def test_write_sorted(self): out_file = StringIO() records = [ dict(tile=2, cycle=2, error_rate=0.4), dict(tile=2, cycle=1, error_rate=0.5) ] expected_csv = """\ tile,cycle,errorrate 2,1,0.5 2,2,0.4 """ write_phix_csv(out_file, records) self.assertEqual(expected_csv, out_file.getvalue())
def test_write_missing(self): out_file = StringIO() records = [ dict(tile=2, cycle=1, error_rate=0.1), dict(tile=2, cycle=4, error_rate=0.4) ] expected_csv = """\ tile,cycle,errorrate 2,1,0.1 2,2 2,3 2,4,0.4 """ write_phix_csv(out_file, records) self.assertEqual(expected_csv, out_file.getvalue())
def summarize_run(args, json): """ Summarize the run data from the InterOp folder. Writes some summary files. :return: a dictionary with summary values. """ read_lengths = [json.read_length1, json.index_length1, json.index_length2, json.read_length2] summary = {} interop_path = os.path.join(args.data_path, 'input', 'runs', json.run_id, 'InterOp') phix_path = os.path.join(interop_path, 'ErrorMetricsOut.bin') quality_path = os.path.join(args.data_path, 'scratch', 'quality.csv') bad_cycles_path = os.path.join(args.data_path, 'scratch', 'bad_cycles.csv') summary_path = build_app_result_path(args.data_path, json, json.samples[0], suffix='_QC') makedirs(summary_path) bad_tiles_path = os.path.join(summary_path, 'bad_tiles.csv') with open(phix_path, 'rb') as phix, open(quality_path, 'w') as quality: records = error_metrics_parser.read_errors(phix) error_metrics_parser.write_phix_csv(quality, records, read_lengths, summary) with open(quality_path, 'rU') as quality, \ open(bad_cycles_path, 'w') as bad_cycles, \ open(bad_tiles_path, 'w') as bad_tiles: report_bad_cycles(quality, bad_cycles, bad_tiles) quality_metrics_path = os.path.join(interop_path, 'QMetricsOut.bin') quality_metrics_parser.summarize_quality(quality_metrics_path, summary, read_lengths) tile_metrics_path = os.path.join(interop_path, 'TileMetricsOut.bin') summarize_tiles(tile_metrics_path, summary) return summary
def test_write_reverse(self): out_file = StringIO() records = [dict(tile=2, cycle=1, error_rate=0.1), dict(tile=2, cycle=2, error_rate=0.2), dict(tile=2, cycle=3, error_rate=0.3), dict(tile=2, cycle=4, error_rate=0.4)] read_lengths = [2, 2] expected_csv = """\ tile,cycle,errorrate 2,1,0.1 2,2,0.2 2,-1,0.3 2,-2,0.4 """ write_phix_csv(out_file, records, read_lengths) self.assertEqual(expected_csv, out_file.getvalue())
def upload_filter_quality(self, folder_watcher): read_sizes = parse_read_sizes(folder_watcher.run_folder / "RunInfo.xml") read_lengths = [ read_sizes.read1, read_sizes.index1, read_sizes.index2, read_sizes.read2 ] error_path = folder_watcher.run_folder / "InterOp/ErrorMetricsOut.bin" quality_csv = StringIO() with error_path.open('rb') as error_file: records = error_metrics_parser.read_errors(error_file) error_metrics_parser.write_phix_csv(quality_csv, records, read_lengths) quality_csv_bytes = BytesIO() quality_csv_bytes.write(quality_csv.getvalue().encode('utf8')) quality_csv_bytes.seek(0) folder_watcher.quality_dataset = self.find_or_upload_dataset( quality_csv_bytes, folder_watcher.run_name + '_quality.csv', 'Error rates for {} run.'.format(folder_watcher.run_name))
def test_write_missing_end(self): out_file = StringIO() records = [dict(tile=2, cycle=1, error_rate=0.1), dict(tile=2, cycle=2, error_rate=0.2), dict(tile=2, cycle=4, error_rate=0.4), dict(tile=2, cycle=5, error_rate=0.5)] read_lengths = [3, 0, 0, 3] expected_csv = """\ tile,cycle,errorrate 2,1,0.1 2,2,0.2 2,3, 2,-1,0.4 2,-2,0.5 2,-3, """ write_phix_csv(out_file, records, read_lengths) self.assertEqual(expected_csv, out_file.getvalue())
def upload_filter_quality(self, folder_watcher): read_sizes = parse_read_sizes(folder_watcher.run_folder / "RunInfo.xml") read_lengths = [read_sizes.read1, read_sizes.index1, read_sizes.index2, read_sizes.read2] error_path = folder_watcher.run_folder / "InterOp/ErrorMetricsOut.bin" quality_csv = StringIO() with error_path.open('rb') as error_file: records = error_metrics_parser.read_errors(error_file) error_metrics_parser.write_phix_csv(quality_csv, records, read_lengths) quality_csv_bytes = BytesIO() quality_csv_bytes.write(quality_csv.getvalue().encode('utf8')) quality_csv_bytes.seek(0) folder_watcher.quality_dataset = self.find_or_upload_dataset( quality_csv_bytes, folder_watcher.run_name + '_quality.csv', 'Error rates for {} run.'.format(folder_watcher.run_name))
def test_write_reverse(self): out_file = StringIO() records = [ dict(tile=2, cycle=1, error_rate=0.1), dict(tile=2, cycle=2, error_rate=0.2), dict(tile=2, cycle=3, error_rate=0.3), dict(tile=2, cycle=4, error_rate=0.4) ] read_lengths = [2, 2] expected_csv = """\ tile,cycle,errorrate 2,1,0.1 2,2,0.2 2,-1,0.3 2,-2,0.4 """ write_phix_csv(out_file, records, read_lengths) self.assertEqual(expected_csv, out_file.getvalue())
def test_write_missing_end(self): out_file = StringIO() records = [ dict(tile=2, cycle=1, error_rate=0.1), dict(tile=2, cycle=2, error_rate=0.2), dict(tile=2, cycle=4, error_rate=0.4), dict(tile=2, cycle=5, error_rate=0.5) ] read_lengths = [3, 0, 0, 3] expected_csv = """\ tile,cycle,errorrate 2,1,0.1 2,2,0.2 2,3 2,-1,0.4 2,-2,0.5 2,-3 """ write_phix_csv(out_file, records, read_lengths) self.assertEqual(expected_csv, out_file.getvalue())
def summarize_run(args, json): """ Summarize the run data from the InterOp folder. Writes some summary files. :return: a dictionary with summary values. """ read_lengths = [ json.read_length1, json.index_length1, json.index_length2, json.read_length2 ] summary = {} interop_path = os.path.join(args.data_path, 'input', 'runs', json.run_id, 'InterOp') phix_path = os.path.join(interop_path, 'ErrorMetricsOut.bin') quality_path = os.path.join(args.data_path, 'scratch', 'quality.csv') bad_cycles_path = os.path.join(args.data_path, 'scratch', 'bad_cycles.csv') summary_path = build_app_result_path(args.data_path, json, json.samples[0], suffix='_QC') makedirs(summary_path) bad_tiles_path = os.path.join(summary_path, 'bad_tiles.csv') with open(phix_path, 'rb') as phix, open(quality_path, 'w') as quality: records = error_metrics_parser.read_errors(phix) error_metrics_parser.write_phix_csv(quality, records, read_lengths, summary) with open(quality_path, 'rU') as quality, \ open(bad_cycles_path, 'w') as bad_cycles, \ open(bad_tiles_path, 'w') as bad_tiles: report_bad_cycles(quality, bad_cycles, bad_tiles) quality_metrics_path = os.path.join(interop_path, 'QMetricsOut.bin') quality_metrics_parser.summarize_quality(quality_metrics_path, summary, read_lengths) tile_metrics_path = os.path.join(interop_path, 'TileMetricsOut.bin') summarize_tiles(tile_metrics_path, summary) return summary
def summarize_run(args, run_json): """ Summarize the run data from the InterOp folder. Writes some summary files. :return: a dictionary with summary values. """ read_lengths = [ run_json.read_length1, run_json.index_length1, run_json.index_length2, run_json.read_length2 ] summary = {} has_error_metrics = run_json.has_runinfo if has_error_metrics: interop_path = os.path.join(args.data_path, 'input', 'runs', run_json.run_id, 'InterOp') phix_path = os.path.join(interop_path, 'ErrorMetricsOut.bin') quality_path = os.path.join(args.data_path, 'scratch', 'quality.csv') bad_cycles_path = os.path.join(args.data_path, 'scratch', 'bad_cycles.csv') bad_tiles_path = os.path.join(args.qc_path, 'bad_tiles.csv') with open(phix_path, 'rb') as phix, open(quality_path, 'w') as quality: records = error_metrics_parser.read_errors(phix) error_metrics_parser.write_phix_csv(quality, records, read_lengths, summary) with open(quality_path, 'r') as quality, \ open(bad_cycles_path, 'w') as bad_cycles, \ open(bad_tiles_path, 'w') as bad_tiles: report_bad_cycles(quality, bad_cycles, bad_tiles) quality_metrics_path = os.path.join(interop_path, 'QMetricsOut.bin') quality_metrics_parser.summarize_quality(quality_metrics_path, summary, read_lengths) tile_metrics_path = os.path.join(interop_path, 'TileMetricsOut.bin') summarize_tiles(tile_metrics_path, summary) return summary
def summarize_run(run_info): """ Summarize the run data from the InterOp folder. Writes some summary files. :param RunInfo run_info: details of the run :return: a dictionary with summary values. """ summary = {} if run_info.read_sizes is not None: read_lengths = [run_info.read_sizes.read1, run_info.read_sizes.index1, run_info.read_sizes.index2, run_info.read_sizes.read2] phix_path = os.path.join(run_info.interop_path, 'ErrorMetricsOut.bin') with open(phix_path, 'rb') as phix, \ open(run_info.quality_csv, 'w') as quality: records = error_metrics_parser.read_errors(phix) error_metrics_parser.write_phix_csv(quality, records, read_lengths, summary) with open(run_info.quality_csv) as quality, \ open(run_info.bad_cycles_csv, 'w') as bad_cycles, \ open(run_info.bad_tiles_csv, 'w') as bad_tiles: report_bad_cycles(quality, bad_cycles, bad_tiles) quality_metrics_path = os.path.join(run_info.interop_path, 'QMetricsOut.bin') quality_metrics_parser.summarize_quality(quality_metrics_path, summary, read_lengths) tile_metrics_path = os.path.join(run_info.interop_path, 'TileMetricsOut.bin') summarize_tiles(tile_metrics_path, summary) return summary
def summarize_run(run_info): """ Summarize the run data from the InterOp folder. Writes some summary files. :param RunInfo run_info: details of the run :return: a dictionary with summary values. """ summary = {} if run_info.read_sizes is not None: read_lengths = [run_info.read_sizes.read1, run_info.read_sizes.index1, run_info.read_sizes.index2, run_info.read_sizes.read2] phix_path = os.path.join(run_info.interop_path, 'ErrorMetricsOut.bin') with open(phix_path, 'rb') as phix, \ open(run_info.quality_csv, 'w') as quality: records = error_metrics_parser.read_errors(phix) error_metrics_parser.write_phix_csv(quality, records, read_lengths, summary) with open(run_info.quality_csv) as quality, \ open(run_info.bad_cycles_csv, 'w') as bad_cycles, \ open(run_info.bad_tiles_csv, 'w') as bad_tiles: report_bad_cycles(quality, bad_cycles, bad_tiles) quality_metrics_path = os.path.join(run_info.interop_path, 'QMetricsOut.bin') quality_metrics_parser.summarize_quality(quality_metrics_path, summary, read_lengths) tile_metrics_path = os.path.join(run_info.interop_path, 'TileMetricsOut.bin') summarize_tiles(tile_metrics_path, summary) return summary