traceback.print_exc() if reference_selected: try: # Use ionstats alignment results to generate plots ionstats_plots.alignment_rate_plot2('ionstats_alignment.json','alignment_rate_plot.png', graph_max_x) ionstats_plots.base_error_plot('ionstats_alignment.json','base_error_plot.png', graph_max_x) ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q10.png', 'AQ10', 'red') ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q17.png', 'AQ17', 'yellow') ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q20.png', 'AQ20', 'green') ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q47.png', 'AQ47', 'purple') except: traceback.print_exc() try: wells_beadogram.generate_wells_beadogram(env['BASECALLER_RESULTS'], env['SIGPROC_RESULTS']) except: printtime ("ERROR: Wells beadogram generation failed") traceback.print_exc() set_result_status('TF Processing') try: # TODO basecaller_results/datasets_tf.json might contain read_count : 0 if os.path.exists(os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')): # input tf_basecaller_bam_filename = os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam') tf_reference_filename = os.path.join("/results/referenceLibrary/TestFragment", env['tfKey'], "DefaultTFs.fasta") # These files will be created
"ionstats_alignment.json", "Filtered_Alignments_Q20.png", "AQ20", "green", ) ionstats_plots.old_aq_length_histogram( "ionstats_alignment.json", "Filtered_Alignments_Q47.png", "AQ47", "purple", ) except Exception: traceback.print_exc() try: wells_beadogram.generate_wells_beadogram(env["BASECALLER_RESULTS"], env["SIGPROC_RESULTS"]) except Exception: printtime("ERROR: Wells beadogram generation failed") traceback.print_exc() set_result_status("TF Processing") try: # TODO basecaller_results/datasets_tf.json might contain read_count : 0 if os.path.exists( os.path.join(env["BASECALLER_RESULTS"], "rawtf.basecaller.bam")): # input tf_basecaller_bam_filename = os.path.join( env["BASECALLER_RESULTS"], "rawtf.basecaller.bam")
def basecalling( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId, barcodeSamples, barcodesplit_filter, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName ): if not os.path.exists(BASECALLER_RESULTS): os.mkdir(BASECALLER_RESULTS) ''' Step 1: Generate datasets_pipeline.json ''' # New file, datasets_pipeline.json, contains the list of all active result files. # Tasks like post_basecalling, alignment, plugins, must process each specified file and merge results # Temporarily generated in BASECALLER_RESULTS directory from barcodeList.txt. # Eventually will replace barcodeList.txt altogether. datasets_pipeline_path = os.path.join(BASECALLER_RESULTS,"datasets_pipeline.json") datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json") try: generate_datasets_json( barcodeId, barcodeSamples, barcodeList_path, datasets_pipeline_path, runID, libraryName, sample, site_name, notes, chipType, expName, resultsName, pgmName ) except: printtime('ERROR: Generation of barcode_files.json unsuccessful') traceback.print_exc() ''' Step 2: Invoke BaseCaller ''' try: [(x,y)] = re.findall('block_X(.*)_Y(.*)',os.getcwd()) if x.isdigit(): block_col_offset = int(x) else: block_col_offset = 0 if y.isdigit(): block_row_offset = int(y) else: block_row_offset = 0 except: block_col_offset = 0 block_row_offset = 0 try: # 3' adapter details adapter = reverse_primer_dict['sequence'] # TODO: provide barcode_filter via datasets.json cmd = basecaller_cmd(basecallerArgs, SIGPROC_RESULTS, libKey, tfKey, runID, BASECALLER_RESULTS, block_col_offset, block_row_offset, datasets_pipeline_path, adapter, barcodesplit_filter) printtime("DEBUG: Calling '%s':" % cmd) proc = subprocess.Popen(shlex.split(cmd.encode('utf8')), shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE) stdout_value, stderr_value = proc.communicate() ret = proc.returncode sys.stdout.write("%s" % stdout_value) sys.stderr.write("%s" % stderr_value) # Ion Reporter try: basecaller_log_path = os.path.join(BASECALLER_RESULTS, 'basecaller.log') with open(basecaller_log_path, 'a') as f: if stdout_value: f.write(stdout_value) if stderr_value: f.write(stderr_value) except IOError: traceback.print_exc() if ret != 0: printtime('ERROR: BaseCaller failed with exit code: %d' % ret) raise #ignore rest of operations if '--calibration-training' in basecallerArgs: printtime('training mode: ignore filtering') return except: printtime('ERROR: BaseCaller failed') traceback.print_exc() raise ''' Step 3: Apply barcode filtering: just move the filtered files to a different directory ''' # This approach to barcode filtering needs rethinking. On proton, filtering should happen after block merge try: DIR_BC_FILTERED = os.path.join(BASECALLER_RESULTS,'bc_filtered') if not os.path.exists(DIR_BC_FILTERED): os.mkdir(DIR_BC_FILTERED) f = open(datasets_basecaller_path,'r') datasets_basecaller = json.load(f); f.close() for dataset in datasets_basecaller["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not datasets_basecaller["read_groups"][rg_name].get('filtered',False): keep_dataset = True if keep_dataset: continue filtered_file = os.path.join(BASECALLER_RESULTS, dataset["basecaller_bam"]) printtime ("filter_barcodes: removing %s" % filtered_file) try: move(filtered_file, DIR_BC_FILTERED) except: traceback.print_exc() except: printtime ("Barcode filtering failed") traceback.print_exc() try: wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS) except: printtime ("Wells beadogram generation failed") traceback.print_exc() printtime("Finished basecaller processing")
ionstats_plots.alignment_rate_plot2( 'ionstats_alignment.json', 'alignment_rate_plot.png', graph_max_x) ionstats_plots.base_error_plot('ionstats_alignment.json', 'base_error_plot.png', graph_max_x) ionstats_plots.old_aq_length_histogram( 'ionstats_alignment.json', 'Filtered_Alignments_Q10.png', 'AQ10', 'red') ionstats_plots.old_aq_length_histogram( 'ionstats_alignment.json', 'Filtered_Alignments_Q17.png', 'AQ17', 'yellow') ionstats_plots.old_aq_length_histogram( 'ionstats_alignment.json', 'Filtered_Alignments_Q20.png', 'AQ20', 'green') ionstats_plots.old_aq_length_histogram( 'ionstats_alignment.json', 'Filtered_Alignments_Q47.png', 'AQ47', 'purple') except: traceback.print_exc() try: wells_beadogram.generate_wells_beadogram(env['BASECALLER_RESULTS'], env['SIGPROC_RESULTS']) except: printtime("ERROR: Wells beadogram generation failed") traceback.print_exc() set_result_status('TF Processing') try: # TODO basecaller_results/datasets_tf.json might contain read_count : 0 if os.path.exists(os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')): # input tf_basecaller_bam_filename = os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam') tf_reference_filename = os.path.join( "/results/referenceLibrary/TestFragment", env['tfKey'], "DefaultTFs.fasta")
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder): ######################################################## # Merge datasets_basecaller.json # ######################################################## block_datasets_json = [] combined_datasets_json = {} for dir in dirs: current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json') try: f = open(current_datasets_path,'r') block_datasets_json.append(json.load(f)) f.close() except: printtime("ERROR: skipped %s" % current_datasets_path) if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]): printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting") return combined_datasets_json = copy.deepcopy(block_datasets_json[0]) for dataset_idx in range(len(combined_datasets_json['datasets'])): combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0 for current_datasets_json in block_datasets_json: combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0) for read_group in combined_datasets_json['read_groups'].iterkeys(): combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0; combined_datasets_json['read_groups'][read_group]['total_bases'] = 0; combined_datasets_json['read_groups'][read_group]['read_count'] = 0; combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False for current_datasets_json in block_datasets_json: combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0) combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0) combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0) combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True) try: f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w") json.dump(combined_datasets_json, f, indent=4) f.close() except: printtime("ERROR; Failed to write merged datasets_basecaller.json") traceback.print_exc() ######################################################## # Merge ionstats_basecaller.json: # # First across blocks, then across barcodes # ######################################################## try: composite_filename_list = [] for dataset in combined_datasets_json["datasets"]: composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json') barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs] barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)] ionstats.reduce_stats(barcode_filename_list,composite_filename) if os.path.exists(composite_filename): composite_filename_list.append(composite_filename) ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) except: printtime("ERROR: Failed to merge ionstats_basecaller.json") traceback.print_exc() ######################################################## # write composite return code # ######################################################## try: if len(dirs)==96: composite_return_code=96 for subdir in dirs: blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt") if os.path.exists(blockstatus_return_code_file): with open(blockstatus_return_code_file, 'r') as f: text = f.read() if 'Basecaller=0' in text: composite_return_code-=1 composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt") if not os.path.exists(composite_return_code_file): printtime("DEBUG: create %s" % composite_return_code_file) os.umask(0002) f = open(composite_return_code_file, 'a') f.write(str(composite_return_code)) f.close() else: printtime("DEBUG: skip generation of %s" % composite_return_code_file) except: traceback.print_exc() ################################################## #generate TF Metrics # #look for both keys and append same file # ################################################## printtime("Merging TFMapper metrics and generating TF plots") try: TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder) except: printtime("ERROR: Merging TFMapper metrics failed") ############################################### # Merge BaseCaller.json files # ############################################### printtime("Merging BaseCaller.json files") try: basecallerfiles = [] for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) printtime("DEBUG: %s:" % subdir) if isbadblock(subdir, "Merging BaseCaller.json files"): continue basecallerjson = os.path.join(subdir,'BaseCaller.json') if os.path.exists(basecallerjson): basecallerfiles.append(subdir) else: printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson) mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS) except: printtime("Merging BaseCaller.json files failed") ############################################### # Generate composite plots ############################################### printtime("Build composite basecaller graphs") try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 # Plot read length sparkline for dataset in combined_datasets_json["datasets"]: ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) try: wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS) except: printtime ("ERROR: Wells beadogram generation failed") traceback.print_exc() printtime("Finished merging basecaller stats")