def get_pgm_log_files(rawdatadir): # Create a tarball of the pgm raw data log files for inclusion into CSA. # tarball it now before the raw data gets deleted. # inst diagnostic files are always in toplevel raw data dir: if 'thumbnail' in rawdatadir: rawdatadir = rawdatadir.replace('thumbnail', '') files=[ 'explog_final.txt', 'explog.txt', 'InitLog.txt', 'InitLog1.txt', 'InitLog2.txt', 'RawInit.txt', 'RawInit.jpg', 'InitValsW3.txt', 'InitValsW2.txt', 'Controller', 'debug', 'chipCalImage.bmp.bz2', ] for afile in files: if os.path.exists(os.path.join(rawdatadir,afile)): make_zip ('pgm_logs.zip',os.path.join(rawdatadir,afile),arcname=afile) return
def get_pgm_log_files(rawdatadir): # Create a tarball of the instrument raw data log files for inclusion into CSA. # tarball it now before the raw data gets deleted. from ion.utils.makeCSA import rawdata_patterns as files # inst diagnostic files are always in toplevel raw data dir: if 'thumbnail' in rawdatadir: rawdatadir = rawdatadir.replace('thumbnail', '') for afile in files: if os.path.exists(os.path.join(rawdatadir, afile)): make_zip('pgm_logs.zip', os.path.join(rawdatadir, afile), arcname=afile)
def get_pgm_log_files(rawdatadir): # Create a tarball of the instrument raw data log files for inclusion into CSA. # tarball it now before the raw data gets deleted. from ion.utils.makeCSA import rawdata_patterns as files # inst diagnostic files are always in toplevel raw data dir: if "thumbnail" in rawdatadir: rawdatadir = rawdatadir.replace("thumbnail", "") for afile in files: if os.path.exists(os.path.join(rawdatadir, afile)): make_zip("pgm_logs.zip", os.path.join(rawdatadir, afile), arcname=afile)
def get_pgm_log_files(rawdatadir): # Create a tarball of the pgm raw data log files for inclusion into CSA. # tarball it now before the raw data gets deleted. files = [ "explog_final.txt", "explog.txt", "InitLog.txt", "RawInit.txt", "RawInit.jpg", "InitValsW3.txt", "InitValsW2.txt", "Controller", "debug", ] for afile in files: make_zip("pgm_logs.zip", os.path.join(rawdatadir, afile), arcname=afile) return
def get_pgm_log_files(rawdatadir): # Create a tarball of the pgm raw data log files for inclusion into CSA. # tarball it now before the raw data gets deleted. # inst diagnostic files are always in toplevel raw data dir: if "thumbnail" in rawdatadir: rawdatadir = rawdatadir.replace("thumbnail", "") files = [ "explog_final.txt", "explog.txt", "InitLog.txt", "InitLog1.txt", "InitLog2.txt", "RawInit.txt", "RawInit.jpg", "InitValsW3.txt", "InitValsW2.txt", "Controller", "debug", ] for afile in files: if os.path.exists(os.path.join(rawdatadir, afile)): make_zip("pgm_logs.zip", os.path.join(rawdatadir, afile), arcname=afile) return
ionstats_file = 'ionstats_alignment.json' try: stats = json.load(open(ionstats_file)) l = stats['full']['max_read_length'] graph_max_x = int(round(l + 49, -2)) # Make alignment_rate_plot.png and base_error_plot.png ionstats_plots.alignment_rate_plot2(ionstats_file, 'alignment_rate_plot.png', int(graph_max_x)) ionstats_plots.base_error_plot(ionstats_file, 'base_error_plot.png', int(graph_max_x)) except: traceback.print_exc() if args.zip and len(args.files) > 1: # zip barcoded files zipname = args.zip printtime("Zip merged barcode files to %s" % zipname) for filename in args.files: if os.path.exists(filename): try: make_zip(zipname, filename, arcname=filename) except: print("ERROR: zip target: %s" % filename) traceback.print_exc() printtime("DEBUG: CA job done.")
'alignment_rate_plot.png', int(graph_max_x)) print("Ionstats plot created successfully") except: print("ERROR: Failed to generate alignment rate plot") try: # Make base_error_plot.png base_error_plot.generate_base_error_plot( 'alignStats_err.json', 'base_error_plot.png',int(graph_max_x)) except: print("ERROR: Failed to generate base error plot") traceback.print_exc() if args.zip and len(args.files) > 1: # zip barcoded files zipname = args.zip print "Zip merged barcode files to %s" % zipname for filename in args.files: if os.path.exists(filename): try: make_zip(zipname, filename, arcname=filename) except: print("ERROR: zip target: %s" % filename) traceback.print_exc()
def runForBarcodes(): global pluginParams, pluginResult, pluginReport # read barcode ids barcodes = [] try: bcfileName = pluginParams['analysis_dir']+'/barcodeList.txt' with open(bcfileName) as bcfile: for line in bcfile: if line.startswith('barcode '): barcodes.append(line.split(',')[1]) except: printerr("Reading barcode list file '%s'" % bcfileName) raise numGoodBams = 0 numUnalBams = 0 minFileSize = pluginParams['cmdOptions'].minbamsize (bcBamPath,bcBamRoot) = os.path.split(pluginParams['bamroot']) validBarcodes = [] for barcode in barcodes: # use unmapped BAM if there else mapped BAM (unmapped may not be present on Proton) bcbam = os.path.join( bcBamPath, "%s_%s"%(barcode,bcBamRoot) ) if not os.path.exists(bcbam): bcbam = os.path.join( pluginParams['analysis_dir'], "%s_rawlib.bam"%barcode ) numUnalBams += 1 if not os.path.exists(bcbam): bcbam = ": BAM file not found" numUnalBams -= 1 elif os.stat(bcbam).st_size < minFileSize: bcbam = ": BAM file too small" else: numGoodBams += 1 validBarcodes.append(barcode) printlog("Processing %d barcodes...\n" % numGoodBams) if numUnalBams > 0: printlog("Warning: %d barcodes will be processed using mapped BAM files. (Unmapped BAMs were not available.)\n" % numUnalBams) pluginReport['num_barcodes_processed'] = numGoodBams pluginReport['num_barcodes_failed'] = 0 # iterate over all barcodes and process the valid ones skip_analysis = pluginParams['cmdOptions'].skip_analysis stop_on_error = pluginParams['cmdOptions'].stop_on_error create_scraper = pluginParams['cmdOptions'].scraper sample_names = pluginParams['sample_names'] postout = False; # just for logfile prettiness sampleNamesFile = ("%s/sampleNames.txt" % pluginParams['results_dir']) sampleNamesFW = open(sampleNamesFile, 'w') for barcode in barcodes: sample = sample_names[barcode] if barcode in sample_names else '' sampleNamesFW.write("%s:%s\n" %(barcode, sample)) sampleNamesFW.close() binDir = os.path.join(pluginParams['plugin_dir'], 'bin') outDir = pluginParams['results_dir'] analysisDir = pluginParams['analysis_dir'] global hotspotsFile hotspotsFile = pluginParams['hotspots_file'] global targetsFile targetsFile = pluginParams['regions_file'] printlog("hotspots file is %s " %hotspotsFile) pluginOutDir = os.path.join(analysisDir, 'plugin_out') global variantCallerName if not filter(re.compile(r'variantCaller_out*').search, os.listdir(pluginOutDir)): printerr("Variant Caller plugin has to be run before launching the PGX Analysis plugin. Please run Torrent Variant Caller plugin") return variantCallerName = max(filter(re.compile(r'variantCaller_out*').search, os.listdir(pluginOutDir))) global coverageAnalysisName if not filter(re.compile(r'coverageAnalysis_out*').search, os.listdir(pluginOutDir)): printerr("Coverage Analysis plugin has to be run before launching the PGX Analysis plugin. Please run the Coverage Analysis plugin") return coverageAnalysisName = max(filter(re.compile(r'coverageAnalysis_out*').search, os.listdir(pluginOutDir))) printlog(variantCallerName) printlog(coverageAnalysisName) variantCallerDir = os.path.join(pluginOutDir, variantCallerName) printlog("variantcaller dir is %s" % variantCallerDir) coverageAnalysisDir = os.path.join(pluginOutDir, coverageAnalysisName) hotspotsFileVC = "" resultsJsonFile = os.path.join(variantCallerDir, "results.json") if not os.path.isfile(resultsJsonFile): printerr("VariantCaller results are not ready. Please wait for the variant Caller plugin to finish and then launch the PGx plugin") return covAnalysisResultsJsonFile = os.path.join(coverageAnalysisDir, "results.json") if not os.path.isfile(covAnalysisResultsJsonFile): printerr("Coverage Analysis results are not ready. Please wait for the Coverage Analysis plugin to finish and then launch the PGx plugin") return targetsFileVC = "" with open(resultsJsonFile) as fin: for line in fin: if "hotspots_bed" in line and ":" in line and "type" not in line : kvp = line.split(":") hotspotsFileVC = (os.path.basename(kvp[1].strip())) if "," in hotspotsFileVC: hotspotsFileVC = hotspotsFileVC[:-2] else: hotspotsFileVC = hotspotsFileVC[:-1] hotspotsFileVC = os.path.join(variantCallerDir, hotspotsFileVC) if "targets_bed" in line and ":" in line and "type" not in line : kvp = line.split(":") targetsFileVC = (os.path.basename(kvp[1].strip())) if "," in targetsFileVC: targetsFileVC = targetsFileVC[:-2] else: targetsFileVC = targetsFileVC[:-1] targetsFileVC = os.path.join(variantCallerDir, targetsFileVC) if not hotspotsFileVC: printerr("Cannot obtain the hotspots file used by the VariantCaller. Trying to obtain the hotspots file from plan") else: hotspotsFile = hotspotsFileVC if not hotspotsFile: printerr("The plan is not set up with a hotspots file.") return if not targetsFileVC: printerr("Cannot obtain the Target Regions file used by the VariantCaller. Trying to obtain the regions file from plan") else: targetsFile = targetsFileVC cmd = ("java -jar %s/PGX_Analysis.jar %s %s %s %s %s %s %s %s" % (binDir, hotspotsFile, outDir, bcfileName, analysisDir, variantCallerDir, coverageAnalysisDir, binDir, sampleNamesFile)); printlog(cmd) RunCommand(cmd); # parse out data in results text file to dict AND coverts spaces to underscores in keys to avoid Django issues statsfile = 'summary.txt' analysisData = parseToDict( os.path.join(outDir,statsfile), "\t" ) global numPass, numUniformity, numAvgCov, totalSamples totalSamples = numGoodBams numPass = numGoodBams numAvgCov = 0 numUniformity = 0 for keys,values in analysisData.items(): printlog(keys) printlog(values) for file in os.listdir("%s/cnvCalls" %outDir): if file.endswith(".log"): cnvCallsDir = os.path.join(outDir,"cnvCalls") filein = os.path.join(cnvCallsDir, file) printlog("filein is %s " % filein) with open(filein) as fin: sep = "=" for line in fin: if("valid Samples =" in line): kvp = line.split(sep); totalSamples = kvp[1].strip() elif("CNV Calling =" in line): kvp = line.split(sep); numPass = kvp[1].strip() elif("Average coverage" in line): kvp = line.split(sep); numAvgCov = kvp[1].strip() elif("Uniformate Rate" in line): kvp = line.split(sep); numUniformity = kvp[1].strip() zipfilename = "%s/cnvExports.zip" % outDir cnvExportsDir = "%s/cnvExports" % outDir for file in os.listdir(cnvExportsDir): if file.endswith("_cn.txt"): filein = os.path.join(cnvExportsDir, file) compress.make_zip(zipfilename, filein, arcname=os.path.basename(filein), use_sys_zip = False) vcfZipFilename = "%s/%s.vcf.zip" % (outDir, pluginParams['prefix']) mergedVcfsDir = "%s/merged_VCFs" % outDir for file in os.listdir(mergedVcfsDir): if file.endswith(".gz") or file.endswith(".tbi"): filein = os.path.join(mergedVcfsDir, file) compress.make_zip(vcfZipFilename, filein, arcname=os.path.basename(filein), use_sys_zip = False) global barcodeData for barcode in validBarcodes: barcode_entry = {} sample = sample_names[barcode] if barcode in sample_names else '' barcode_entry['name'] = barcode if barcode in analysisData: barcodeLine = analysisData[barcode] kvp = barcodeLine.split("\t") #key = kvp[0].strip() if sample=='': barcode_entry['sample'] = 'none' else: barcode_entry['sample'] = sample if len(kvp) < 8 and kvp[2].strip() == 'null': barcode_entry['hotspots_variants_total'] = "none" barcode_entry['novel_variants_total'] = "none" barcode_entry['exon9_cnv'] = kvp[3].strip() barcode_entry['gene_cnv'] = kvp[4].strip() barcode_entry['exon9_cnv_confidence'] = kvp[5].strip() barcode_entry['gene_cnv_confidence'] = kvp[6].strip() else: barcode_entry['hotspots_variants_total'] = "%d/%s" %(int(kvp[2].strip()) - int(kvp[4].strip()) - int(kvp[5].strip()), kvp[2].strip()) barcode_entry['novel_variants_total'] = int(kvp[3].strip()) - int(kvp[2].strip()) barcode_entry['exon9_cnv'] = kvp[6].strip() barcode_entry['gene_cnv'] = kvp[7].strip() barcode_entry['exon9_cnv_confidence'] = kvp[8].strip() barcode_entry['gene_cnv_confidence'] = kvp[9].strip() barcodeData.append(barcode_entry) updateBarcodeSummaryReport() if create_scraper: createScraperLinksFolder( pluginParams['results_dir'], pluginParams['prefix'] )
printtime(traceback.format_exc()) prefix_list = [dataset['file_prefix'] for dataset in datasets_basecaller.get("datasets",[])] if len(prefix_list) > 1: zip_task_list = [ ('bam', env['ALIGNMENT_RESULTS']), ('bam.bai', env['ALIGNMENT_RESULTS']), ('basecaller.bam', env['BASECALLER_RESULTS']),] for extension,base_dir in zip_task_list: zipname = "%s/%s_%s.barcode.%s.zip" % (download_links, env['expName'], env['resultsName'], extension) for prefix in prefix_list: try: filename = "%s/%s_%s_%s.%s" % (download_links, prefix.rstrip('_rawlib'), env['expName'], env['resultsName'], extension) src = os.path.join(base_dir, prefix+'.'+extension) if os.path.exists(src): os.symlink(os.path.relpath(src,os.path.dirname(filename)),filename) make_zip(zipname, filename, arcname=filename, compressed=False) except: printtime("ERROR: target: %s" % filename) traceback.print_exc() else: printtime("MergeTLScript: No barcode run") printtime("MergeTLScript exit") sys.exit(0)
] for extension, base_dir in zip_task_list: zipname = "%s/%s_%s.barcode.%s.zip" % ( download_links, env['expName'], env['resultsName'], extension) for prefix in prefix_list: try: filename = "%s/%s_%s_%s.%s" % ( download_links, prefix.rstrip('_rawlib'), env['expName'], env['resultsName'], extension) src = os.path.join(base_dir, prefix + '.' + extension) if os.path.exists(src): os.symlink( os.path.relpath(src, os.path.dirname(filename)), filename) make_zip(zipname, filename, arcname=filename, compressed=False) except: printtime("ERROR: target: %s" % filename) traceback.print_exc() else: printtime("MergeTLScript: No barcode run") printtime("MergeTLScript exit") sys.exit(0)
except: pass try: r = subprocess.call(["ln", "-s", "rawtf.bam", tfbam]) except: pass ################################################## # Create zip of files ################################################## #sampled sff #make_zip(libsff.replace(".sff",".sampled.sff")+'.zip', libsff.replace(".sff",".sampled.sff")) #library sff make_zip(libsff + '.zip', libsff ) #tf sff make_zip(tfsff + '.zip', tfsff) #fastq zip make_zip(fastqpath + '.zip', fastqpath) #sampled fastq #make_zip(fastqpath.replace(".fastq",".sampled.fastq")+'.zip', fastqpath.replace(".fastq",".sampled.fastq")) ######################################################## # barcode processing # # Zip up and move sff, fastq, bam, bai files # # Move zip files to results directory # ########################################################
def plugin_main(): global PLUGIN_DEV_SKIP_VARIANT_CALLING global DIRNAME global TSP_URLPATH_PLUGIN_DIR global TSP_FILEPATH_PLUGIN_DIR global startplugin_json global output_files parser = OptionParser() parser.add_option('-d', '--install-dir', help='Directory containing plugin files', dest='install_dir') parser.add_option('-o', '--output-dir', help='Directory for results files', dest='output_dir') parser.add_option('-u', '--output-url', help='URL matching the output directory', dest='output_url') parser.add_option('-r', '--report-dir', help='Directory containing analysis report files', dest='report_dir') parser.add_option('-f', '--genome-fasta', help='Reference genome fasta file', dest='genome_fasta') parser.add_option('-s', '--skip-tvc', help='(debug) Skip variant calling and reuse existing results', dest='skip_tvc', action="store_true", default=False) (options, args) = parser.parse_args() DIRNAME = options.install_dir #os.environ['DIRNAME'] # home directory for the plugin files TSP_FILEPATH_PLUGIN_DIR = options.output_dir #os.environ['TSP_FILEPATH_PLUGIN_DIR'] # target plugin results directory ANALYSIS_DIR = options.report_dir #os.environ['ANALYSIS_DIR'] # main report directory TSP_URLPATH_PLUGIN_DIR = options.output_url PLUGIN_DEV_SKIP_VARIANT_CALLING = options.skip_tvc settings.configure(DEBUG=True, TEMPLATE_DEBUG=True, TEMPLATE_DIRS=((DIRNAME+'/templates'),)) subprocess.call('rm -f %s/results.json' % TSP_FILEPATH_PLUGIN_DIR,shell=True) printtime('') printtime('Variant Caller Plugin started') printtime('') try: json_file = open(os.path.join(TSP_FILEPATH_PLUGIN_DIR,'startplugin.json'), 'r') startplugin_json = json.load(json_file,parse_float=str) json_file.close() except: printtime('ERROR: Failed to load and parse startplugin.json') return 1 # Uncomment to emulate autorun: #startplugin_json['pluginconfig'] = {} vc_options = options_for_manual_start(startplugin_json) #if not vc_options: # vc_options = options_for_plan_autostart(startplugin_json) vc_options['run_name'] = startplugin_json['expmeta'].get('run_name','Current run') vc_options['genome_name'] = startplugin_json['runinfo'].get('library','') vc_options['plugin_name'] = startplugin_json['runinfo'].get('plugin_name','') vc_options['genome_fasta'] = options.genome_fasta #os.environ['TSP_FILEPATH_GENOME_FASTA'] if 'error' in vc_options: printtime(vc_options['error']) generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS),vc_options['error'], vc_options) return 1 f = open(os.path.join(TSP_FILEPATH_PLUGIN_DIR,BASENAME_PARAMETERS_JSON),'w') json.dump(vc_options['parameters'],f,indent=4) f.close() add_output_file('parameters_json', BASENAME_PARAMETERS_JSON) TSP_FILEPATH_BARCODE_TXT = ANALYSIS_DIR + '/barcodeList.txt' vc_options['has_barcodes'] = False if os.path.exists(TSP_FILEPATH_BARCODE_TXT): vc_options['has_barcodes'] = True # Call tvc -v to get the version string tvc_args = vc_options['parameters'].get('meta',{}).get('tvcargs','tvc') if tvc_args == 'tvc' and os.path.exists(DIRNAME + '/tvc'): # try local binary first, then go to global one tvc_args = DIRNAME + '/tvc' vc_options['tvc_version'] = execute_output(tvc_args + ' -v').splitlines()[0] if vc_options['tvc_version'].endswith('- Torrent Variant Caller'): vc_options['tvc_version'] = vc_options['tvc_version'][:-24].strip() # Parameters from plugin customization printtime('Variant Caller plugin run options:') printtime(' Plugin name : ' + vc_options['plugin_name']) printtime(' Plugin start mode : ' + vc_options['start_mode']) printtime(' Variant Caller version : ' + vc_options['tvc_version']) printtime(' Run is barcoded : ' + str(vc_options['has_barcodes'])) printtime(' Genome : ' + vc_options['genome_name']) printtime(' Library Type : ' + vc_options['library_type']) printtime(' Target Regions : ' + (vc_options['targets_name'] if vc_options['has_targets'] else 'Not using')) printtime(' Hotspots : ' + (vc_options['hotspots_name'] if vc_options['has_hotspots'] else 'Not using')) if 'original_parameters' in vc_options: printtime(' Requested Parameters : ' + vc_options["original_config_line1"]) printtime(' ' + vc_options["original_config_line2"]) printtime(' Auto-Updated Parameters : ' + vc_options["config_line1"]) printtime(' ' + vc_options["config_line2"]) else: printtime(' Used Parameters : ' + vc_options['config_line1']) printtime(' ' + vc_options["config_line2"]) printtime(' Trim Reads : ' + str(vc_options['trim_reads'])) printtime('') printtime('Used files:') printtime(' Reference Genome : ' + vc_options['genome_fasta']) printtime(' Parameters file : ' + os.path.join(TSP_FILEPATH_PLUGIN_DIR,BASENAME_PARAMETERS_JSON)) if 'parameters_source' in vc_options: printtime(' Parameters source file : ' + vc_options['parameters_source']) if vc_options['has_targets']: printtime(' Target unmerged BED : ' + vc_options['targets_bed_unmerged']) printtime(' Target merged BED : ' + vc_options['targets_bed_merged']) if vc_options['has_hotspots']: printtime(' Hotspots unmerged BED : ' + vc_options['hotspots_bed_unmerged']) printtime(' Hotspots merged BED : ' + vc_options['hotspots_bed_merged']) printtime('') PLUGIN_HS_ALIGN_DIR = TSP_FILEPATH_PLUGIN_DIR + '/hs_align' # Remove previous results to avoid displaying old before ready subprocess.call('rm -f %s/%s' % (TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS),shell=True) subprocess.call('rm -f %s' % (TSP_FILEPATH_PLUGIN_DIR + '/results.json'),shell=True) subprocess.call('rm -f %s/*.bed' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -rf %s/*.bam*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -rf %s' % (PLUGIN_HS_ALIGN_DIR),shell=True) subprocess.call('rm -f %s/hotspot*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/variant*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/allele*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/*.xls' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/*.log' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/*.done' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -rf %s/lifegrid' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) if not PLUGIN_DEV_SKIP_VARIANT_CALLING: subprocess.call('rm -f %s/SNP*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/indel*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('rm -f %s/TSVC*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True) printtime('Results folder initialized') # Get local copy of BED files (may be deleted from system later) if vc_options['has_targets']: if not os.path.exists( vc_options['targets_bed_unmerged']): printtime('ERROR: Cannot locate target regions file: ' + vc_options['targets_bed_unmerged']) return 1 if not os.path.exists(vc_options['targets_bed_merged']): printtime('ERROR: Cannot locate merged target regions file: ' + vc_options['targets_bed_merged']) return 1 subprocess.call('cp -f %s %s/%s' % (vc_options['targets_bed_unmerged'],TSP_FILEPATH_PLUGIN_DIR,os.path.basename(vc_options['targets_bed_unmerged'])),shell=True) add_output_file('target_regions_bed', os.path.basename(vc_options['targets_bed_unmerged'])) if vc_options['has_hotspots']: if not os.path.exists(vc_options['hotspots_bed_unmerged']): printtime('ERROR: Cannot locate hotspots file: ' + vc_options['hotspots_bed_unmerged']) return 1 if not os.path.exists(vc_options['hotspots_bed_merged']): printtime('ERROR: Cannot locate merged hotspots file: ' + vc_options['hotspots_bed_merged']) return 1 vc_options['hotspots_bed_unmerged_local'] = os.path.join(TSP_FILEPATH_PLUGIN_DIR,os.path.basename(vc_options['hotspots_bed_unmerged'])) vc_options['hotspots_bed_unmerged_leftalign'] = vc_options['hotspots_bed_unmerged_local'][:-4] + '.left.bed' subprocess.call('cp -f %s %s' % (vc_options['hotspots_bed_unmerged'],vc_options['hotspots_bed_unmerged_local']),shell=True) prepare_hotspots_command = 'tvcutils prepare_hotspots' prepare_hotspots_command += ' --input-bed %s' % vc_options['hotspots_bed_unmerged'] prepare_hotspots_command += ' --reference %s' % vc_options['genome_fasta'] prepare_hotspots_command += ' --left-alignment on' prepare_hotspots_command += ' --allow-block-substitutions on' prepare_hotspots_command += ' --output-bed %s' % vc_options['hotspots_bed_unmerged_leftalign'] prepare_hotspots_command += ' --output-vcf %s/hotspot.vcf' % TSP_FILEPATH_PLUGIN_DIR run_command(prepare_hotspots_command, 'Generate filtered, left-aligned, and merged hotspot VCF file') hotspot_file_empty = True try: f = open('%s/hotspot.vcf' % TSP_FILEPATH_PLUGIN_DIR, 'r') for line in f: if not line or line.startswith('#'): continue hotspot_file_empty = False except: pass if hotspot_file_empty: printtime('Filtered hotspot file has no hotspot entries. Disabling hotspots') vc_options['has_hotspots'] = False else: #run_command('bgzip -c %s/hotspot.vcf > %s/hotspot.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR,TSP_FILEPATH_PLUGIN_DIR), 'Generate compressed hotspot vcf') #run_command('tabix -p vcf %s/hotspot.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR), 'Generate index for compressed hotspot vcf') vc_options['hotspots_vcf'] = TSP_FILEPATH_PLUGIN_DIR + '/hotspot.vcf' add_output_file('hotspots_bed', os.path.basename(vc_options['hotspots_bed_unmerged'])) # Make links to js/css used for barcodes table and empty results page subprocess.call('ln -sf "%s/js" "%s"' % (DIRNAME,TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('ln -sf "%s/css" "%s"' % (DIRNAME,TSP_FILEPATH_PLUGIN_DIR),shell=True) subprocess.call('ln -sf %s/scripts/*.php3 "%s"' % (DIRNAME,TSP_FILEPATH_PLUGIN_DIR),shell=True) results_json = { 'Aligned Reads' : vc_options['run_name'], 'Library Type' : vc_options['library_type'], 'Configuration' : vc_options['parameters']['meta']['configuration'], 'Target Regions' : (vc_options['targets_name'] if vc_options['has_targets'] else 'Not using'), 'Target Loci' : (vc_options['hotspots_name'] if vc_options['has_hotspots'] else 'Not using'), 'Trim Reads' : vc_options['trim_reads'], 'barcoded' : 'false', 'files' : [] } if vc_options['has_targets']: results_json['targets_bed'] = vc_options['targets_bed_unmerged'] if vc_options['has_hotspots']: results_json['hotspots_bed'] = vc_options['hotspots_bed_unmerged'] if vc_options['has_barcodes']: # Run for barcodes or single page barcode_samples_string = startplugin_json.get('plan',{}).get('barcodedSamples',"") barcode_sample_info = {} if barcode_samples_string: barcode_samples_json = json.loads(barcode_samples_string) for k,v in barcode_samples_json.iteritems(): barcode_sample_info.update(v.get('barcodeSampleInfo',{})) # Load barcode list barcode_data = [] bc_list_file = open(TSP_FILEPATH_BARCODE_TXT,'r') for line in bc_list_file: if not line.startswith('barcode '): continue barcode_entry = {} barcode_entry['name'] = line.split(',')[1] barcode_entry['bam'] = os.path.join(ANALYSIS_DIR, barcode_entry['name'] + '_rawlib.bam') barcode_entry['status'] = 'queued' if not PLUGIN_DEV_SKIP_VARIANT_CALLING: subprocess.call('rm -rf %s/%s' % (TSP_FILEPATH_PLUGIN_DIR,barcode_entry['name']),shell=True) if not os.path.exists(barcode_entry['bam']): continue if barcode_entry['name'] in barcode_sample_info: barcode_nuc_type = barcode_sample_info[barcode_entry['name']].get('nucleotideType','') if not barcode_nuc_type: barcode_nuc_type = 'DNA' barcode_reference = barcode_sample_info[barcode_entry['name']].get('reference',vc_options['genome_name']) if barcode_nuc_type != 'DNA': printtime('Skipping barcode ' + barcode_entry['name'] + ' : Unsupported nuc type ' + barcode_nuc_type) continue if barcode_reference != vc_options['genome_name']: printtime('Skipping barcode ' + barcode_entry['name'] + ' : Barcode reference ' + barcode_reference + 'different from run reference ' + vc_options['genome_name']) continue # Size enough to process? TODO - just get from datasets_basecaller.json if os.stat(barcode_entry['bam']).st_size < BCFILE_MIN_SIZE: barcode_entry['status'] = 'insufficient_reads' elif is_bam_invalid(barcode_entry['bam']): barcode_entry['status'] = 'invalid_bam' barcode_data.append(barcode_entry) bc_list_file.close() # End load barcode list printtime('') printtime('Processing %d barcodes...' % len(barcode_data)) # Start json file results_json['barcoded'] = 'true' results_json['barcodes'] = {} all_barcodes_successful = True for barcode_idx in range(len(barcode_data)): if barcode_data[barcode_idx]['status'] != 'queued': printtime('Skipping barcode ' + barcode_data[barcode_idx]['name']) continue barcode_data[barcode_idx]['status'] = 'in_progress' generate_barcode_links_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), barcode_data, vc_options) BARCODE_DIR = TSP_FILEPATH_PLUGIN_DIR + '/' + barcode_data[barcode_idx]['name'] if not os.path.exists(BARCODE_DIR): os.makedirs(BARCODE_DIR) # perform coverage anaysis and write content printtime('') printtime('Processing barcode ' + barcode_data[barcode_idx]['name']) try: summary = call_variants(BARCODE_DIR,barcode_data[barcode_idx]['bam'],vc_options,barcode_data[barcode_idx]['name']) results_json['barcodes'][barcode_data[barcode_idx]['name']] = {} results_json['barcodes'][barcode_data[barcode_idx]['name']]['variants'] = summary.get('variants_total',{}) results_json['barcodes'][barcode_data[barcode_idx]['name']]['hotspots'] = summary.get('hotspots_total',{}) barcode_data[barcode_idx]['summary'] = summary barcode_data[barcode_idx]['status'] = 'completed' except: traceback.print_exc() all_barcodes_successful = False barcode_data[barcode_idx]['status'] = 'error' # Replaced with python zip library because of failures due to too-long argument lists. # # #Zip all vcf.gz and vcf.gz.tbi files # zip_vcf_command = 'echo "' # zip_vcf_command += ' '.join(('%s/%s/TSVC_variants_%s.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) # for barcode in barcode_data if barcode['status'] == 'completed') # zip_vcf_command += ' ' # zip_vcf_command += ' '.join(('%s/%s/TSVC_variants_%s.vcf.gz.tbi' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) # for barcode in barcode_data if barcode['status'] == 'completed') # zip_vcf_command += '" | xargs zip --junk-paths %s/%s.vcf.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name']) # run_command(zip_vcf_command, 'Store per-barcode vcf files in a single zip file') # # #Zip all variants_*.xls files. # zip_xls_command = 'echo "' # zip_xls_command += ' '.join(('%s/%s/alleles_%s.xls' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) # for barcode in barcode_data if barcode['status'] == 'completed') # zip_xls_command += '" | xargs zip --junk-paths %s/%s.xls.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name']) # run_command(zip_xls_command, 'Store per-barcode xls files in a single zip file') printtime(' ') printtime('Task : ' + 'Store per-barcode vcf files in a single zip file') zipfilename = '%s/%s.vcf.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name']) for myfile in [('%s/%s/TSVC_variants_%s.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) for barcode in barcode_data if barcode['status'] == 'completed']: compress.make_zip(zipfilename, myfile, arcname=os.path.basename(myfile), use_sys_zip = False) for myfile in [('%s/%s/TSVC_variants_%s.vcf.gz.tbi' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) for barcode in barcode_data if barcode['status'] == 'completed']: compress.make_zip(zipfilename, myfile, arcname=os.path.basename(myfile), use_sys_zip = False) printtime(' ') printtime(' ') printtime('Task : ' + 'Store per-barcode xls files in a single zip file') zipfilename = '%s/%s.xls.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name']) for myfile in [('%s/%s/alleles_%s.xls' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) for barcode in barcode_data if barcode['status'] == 'completed']: compress.make_zip(zipfilename, myfile, arcname=os.path.basename(myfile), use_sys_zip = False) printtime(' ') generate_barcode_links_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), barcode_data, vc_options) generate_barcode_links_block(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_BLOCK), barcode_data, vc_options) if not all_barcodes_successful: return 1 else: # Non-barcoded run generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), 'Variant calling still in progress', vc_options, autorefresh=True) fullpath_input_bam = os.path.join(ANALYSIS_DIR, 'rawlib.bam') if is_bam_invalid(fullpath_input_bam): generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), 'BAM file format validation failed. Regenerate BAM with latest TS.', vc_options) return 1 try: summary = call_variants(TSP_FILEPATH_PLUGIN_DIR,fullpath_input_bam,vc_options) results_json['variants'] = summary.get('variants_total',{}) results_json['hotspots'] = summary.get('hotspots_total',{}) except: traceback.print_exc() generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), 'An error occurred - check Log File for details', vc_options) return 1 results_json['files'] = output_files out = open(TSP_FILEPATH_PLUGIN_DIR + '/results.json','w') json.dump(results_json,out,indent=4) out.close() printtime('') printtime('Variant Caller Plugin complete') printtime('') return 0
if not os.path.exists(dst): try: os.symlink(src,dst) except: printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst)) printtime(traceback.format_exc()) ################################################## # Create zip of files ################################################## #sampled sff #make_zip(libsff.replace(".sff",".sampled.sff")+'.zip', libsff.replace(".sff",".sampled.sff")) #library sff make_zip(libsff + '.zip', libsff, arcname=libsff ) #tf sff make_zip(tfsff + '.zip', tfsff, arcname=tfsff) #fastq zip make_zip(fastqpath + '.zip', fastqpath, arcname=fastqpath) #sampled fastq #make_zip(fastqpath.replace(".fastq",".sampled.fastq")+'.zip', fastqpath.replace(".fastq",".sampled.fastq")) ######################################################## # barcode processing # # Zip up and move sff, fastq, bam, bai files # # Move zip files to results directory # ########################################################
start_time, env['ALIGNMENT_RESULTS'], bidirectional, sam_parsed ) try: quality = os.path.join('.',"quality.summary") shutil.copy(quality, "../"+status+".quality.summary") except: printtime("ERROR: %s doesn't exist" % quality) pass shutil.copy("alignTable.txt", "../"+status+".alignTable.txt") shutil.copy("alignment.summary", "../"+status+".alignment.summary") make_zip(sff_path+'.zip',sff_path,arcname=sff_path) make_zip(fastq_path+'.zip',fastq_path,arcname=fastq_path) os.chdir(top_dir) # plugin framework expects the sff file in the env['BASECALLER_RESULTS'] subdirectory union_sff = os.path.join(env['BASECALLER_RESULTS'],basename+".sff") union_fastq = os.path.join(env['BASECALLER_RESULTS'],basename+".fastq") # merge sff files try: com = "SFFMerge" com += " -r" com += " -o %s" % union_sff for status in ["Paired_Fwd","Paired_Rev","Singleton_Fwd","Singleton_Rev","corrected"]: sff = basename+"_"+status+".sff"
site_name, flows, notes, barcodeId, aligner_opts_extra, start_time, env['ALIGNMENT_RESULTS'], bidirectional, sam_parsed ) shutil.copy("alignment.summary", "../"+status+".alignment.summary") shutil.copy("quality.summary", "../"+status+".quality.summary") shutil.copy("alignTable.txt", "../"+status+".alignTable.txt") make_zip(sff_path+'.zip',sff_path) make_zip(fastq_path+'.zip',fastq_path) os.chdir(top_dir) union_sff = basename+".sff" union_fastq = basename+".fastq" # merge sff files try: com = "SFFMerge" com += " -r" com += " -o %s" % union_sff for status in ["Paired_Fwd","Paired_Rev","Singleton_Fwd","Singleton_Rev","corrected"]: sff = basename+"_"+status+".sff" if os.path.exists(sff):
def runForBarcodes(): global pluginParams, pluginResult, pluginReport # read barcode ids barcodes = [] try: bcfileName = pluginParams['analysis_dir'] + '/barcodeList.txt' with open(bcfileName) as bcfile: for line in bcfile: if line.startswith('barcode '): barcodes.append(line.split(',')[1]) except: printerr("Reading barcode list file '%s'" % bcfileName) raise numGoodBams = 0 numUnalBams = 0 minFileSize = pluginParams['cmdOptions'].minbamsize (bcBamPath, bcBamRoot) = os.path.split(pluginParams['bamroot']) validBarcodes = [] for barcode in barcodes: # use unmapped BAM if there else mapped BAM (unmapped may not be present on Proton) bcbam = os.path.join(bcBamPath, "%s_%s" % (barcode, bcBamRoot)) if not os.path.exists(bcbam): bcbam = os.path.join(pluginParams['analysis_dir'], "%s_rawlib.bam" % barcode) numUnalBams += 1 if not os.path.exists(bcbam): bcbam = ": BAM file not found" numUnalBams -= 1 elif os.stat(bcbam).st_size < minFileSize: bcbam = ": BAM file too small" else: numGoodBams += 1 validBarcodes.append(barcode) printlog("Processing %d barcodes...\n" % numGoodBams) if numUnalBams > 0: printlog( "Warning: %d barcodes will be processed using mapped BAM files. (Unmapped BAMs were not available.)\n" % numUnalBams) pluginReport['num_barcodes_processed'] = numGoodBams pluginReport['num_barcodes_failed'] = 0 # iterate over all barcodes and process the valid ones skip_analysis = pluginParams['cmdOptions'].skip_analysis stop_on_error = pluginParams['cmdOptions'].stop_on_error create_scraper = pluginParams['cmdOptions'].scraper sample_names = pluginParams['sample_names'] postout = False # just for logfile prettiness sampleNamesFile = ("%s/sampleNames.txt" % pluginParams['results_dir']) sampleNamesFW = open(sampleNamesFile, 'w') for barcode in barcodes: sample = sample_names[barcode] if barcode in sample_names else '' sampleNamesFW.write("%s:%s\n" % (barcode, sample)) sampleNamesFW.close() binDir = os.path.join(pluginParams['plugin_dir'], 'bin') outDir = pluginParams['results_dir'] analysisDir = pluginParams['analysis_dir'] global hotspotsFile hotspotsFile = pluginParams['hotspots_file'] global targetsFile targetsFile = pluginParams['regions_file'] printlog("hotspots file is %s " % hotspotsFile) pluginOutDir = os.path.join(analysisDir, 'plugin_out') global variantCallerName if not filter( re.compile(r'variantCaller_out*').search, os.listdir(pluginOutDir)): printerr( "Variant Caller plugin has to be run before launching the PGX Analysis plugin. Please run Torrent Variant Caller plugin" ) return variantCallerName = max( filter( re.compile(r'variantCaller_out*').search, os.listdir(pluginOutDir))) global coverageAnalysisName if not filter( re.compile(r'coverageAnalysis_out*').search, os.listdir(pluginOutDir)): printerr( "Coverage Analysis plugin has to be run before launching the PGX Analysis plugin. Please run the Coverage Analysis plugin" ) return coverageAnalysisName = max( filter( re.compile(r'coverageAnalysis_out*').search, os.listdir(pluginOutDir))) printlog(variantCallerName) printlog(coverageAnalysisName) variantCallerDir = os.path.join(pluginOutDir, variantCallerName) printlog("variantcaller dir is %s" % variantCallerDir) coverageAnalysisDir = os.path.join(pluginOutDir, coverageAnalysisName) hotspotsFileVC = "" resultsJsonFile = os.path.join(variantCallerDir, "results.json") if not os.path.isfile(resultsJsonFile): printerr( "VariantCaller results are not ready. Please wait for the variant Caller plugin to finish and then launch the PGx plugin" ) return covAnalysisResultsJsonFile = os.path.join(coverageAnalysisDir, "results.json") if not os.path.isfile(covAnalysisResultsJsonFile): printerr( "Coverage Analysis results are not ready. Please wait for the Coverage Analysis plugin to finish and then launch the PGx plugin" ) return targetsFileVC = "" with open(resultsJsonFile) as fin: for line in fin: if "hotspots_bed" in line and ":" in line and "type" not in line: kvp = line.split(":") hotspotsFileVC = (os.path.basename(kvp[1].strip())) if "," in hotspotsFileVC: hotspotsFileVC = hotspotsFileVC[:-2] else: hotspotsFileVC = hotspotsFileVC[:-1] hotspotsFileVC = os.path.join(variantCallerDir, hotspotsFileVC) if "targets_bed" in line and ":" in line and "type" not in line: kvp = line.split(":") targetsFileVC = (os.path.basename(kvp[1].strip())) if "," in targetsFileVC: targetsFileVC = targetsFileVC[:-2] else: targetsFileVC = targetsFileVC[:-1] targetsFileVC = os.path.join(variantCallerDir, targetsFileVC) if not hotspotsFileVC: printerr( "Cannot obtain the hotspots file used by the VariantCaller. Trying to obtain the hotspots file from plan" ) else: hotspotsFile = hotspotsFileVC if not hotspotsFile: printerr("The plan is not set up with a hotspots file.") return if not targetsFileVC: printerr( "Cannot obtain the Target Regions file used by the VariantCaller. Trying to obtain the regions file from plan" ) else: targetsFile = targetsFileVC cmd = ("java -jar %s/PGX_Analysis.jar %s %s %s %s %s %s %s %s" % (binDir, hotspotsFile, outDir, bcfileName, analysisDir, variantCallerDir, coverageAnalysisDir, binDir, sampleNamesFile)) printlog(cmd) RunCommand(cmd) # parse out data in results text file to dict AND coverts spaces to underscores in keys to avoid Django issues statsfile = 'summary.txt' analysisData = parseToDict(os.path.join(outDir, statsfile), "\t") global numPass, numUniformity, numAvgCov, totalSamples totalSamples = numGoodBams numPass = numGoodBams numAvgCov = 0 numUniformity = 0 for keys, values in analysisData.items(): printlog(keys) printlog(values) for file in os.listdir("%s/cnvCalls" % outDir): if file.endswith(".log"): cnvCallsDir = os.path.join(outDir, "cnvCalls") filein = os.path.join(cnvCallsDir, file) printlog("filein is %s " % filein) with open(filein) as fin: sep = "=" for line in fin: if ("valid Samples =" in line): kvp = line.split(sep) totalSamples = kvp[1].strip() elif ("CNV Calling =" in line): kvp = line.split(sep) numPass = kvp[1].strip() elif ("Average coverage" in line): kvp = line.split(sep) numAvgCov = kvp[1].strip() elif ("Uniformate Rate" in line): kvp = line.split(sep) numUniformity = kvp[1].strip() zipfilename = "%s/cnvExports.zip" % outDir cnvExportsDir = "%s/cnvExports" % outDir for file in os.listdir(cnvExportsDir): if file.endswith("_cn.txt"): filein = os.path.join(cnvExportsDir, file) compress.make_zip(zipfilename, filein, arcname=os.path.basename(filein), use_sys_zip=False) vcfZipFilename = "%s/%s.vcf.zip" % (outDir, pluginParams['prefix']) mergedVcfsDir = "%s/merged_VCFs" % outDir for file in os.listdir(mergedVcfsDir): if file.endswith(".gz") or file.endswith(".tbi"): filein = os.path.join(mergedVcfsDir, file) compress.make_zip(vcfZipFilename, filein, arcname=os.path.basename(filein), use_sys_zip=False) global barcodeData for barcode in validBarcodes: barcode_entry = {} sample = sample_names[barcode] if barcode in sample_names else '' barcode_entry['name'] = barcode if barcode in analysisData: barcodeLine = analysisData[barcode] kvp = barcodeLine.split("\t") #key = kvp[0].strip() if sample == '': barcode_entry['sample'] = 'none' else: barcode_entry['sample'] = sample if len(kvp) < 8 and kvp[2].strip() == 'null': barcode_entry['hotspots_variants_total'] = "none" barcode_entry['novel_variants_total'] = "none" barcode_entry['exon9_cnv'] = kvp[3].strip() barcode_entry['gene_cnv'] = kvp[4].strip() barcode_entry['exon9_cnv_confidence'] = kvp[5].strip() barcode_entry['gene_cnv_confidence'] = kvp[6].strip() else: barcode_entry['hotspots_variants_total'] = "%d/%s" % ( int(kvp[2].strip()) - int(kvp[4].strip()) - int(kvp[5].strip()), kvp[2].strip()) barcode_entry['novel_variants_total'] = int( kvp[3].strip()) - int(kvp[2].strip()) barcode_entry['exon9_cnv'] = kvp[6].strip() barcode_entry['gene_cnv'] = kvp[7].strip() barcode_entry['exon9_cnv_confidence'] = kvp[8].strip() barcode_entry['gene_cnv_confidence'] = kvp[9].strip() barcodeData.append(barcode_entry) updateBarcodeSummaryReport() if create_scraper: createScraperLinksFolder(pluginParams['results_dir'], pluginParams['prefix'])