Пример #1
0
def get_pgm_log_files(rawdatadir):
    # Create a tarball of the pgm raw data log files for inclusion into CSA.
    # tarball it now before the raw data gets deleted.
    # inst diagnostic files are always in toplevel raw data dir:
    if 'thumbnail' in rawdatadir:
        rawdatadir = rawdatadir.replace('thumbnail', '')
    files=[
        'explog_final.txt',
        'explog.txt',
        'InitLog.txt',
        'InitLog1.txt',
        'InitLog2.txt',
        'RawInit.txt',
        'RawInit.jpg',
        'InitValsW3.txt',
        'InitValsW2.txt',
        'Controller',
        'debug',
        'chipCalImage.bmp.bz2',
    ]
    for afile in files:
        if os.path.exists(os.path.join(rawdatadir,afile)):
            make_zip ('pgm_logs.zip',os.path.join(rawdatadir,afile),arcname=afile)

    return
Пример #2
0
def get_pgm_log_files(rawdatadir):
    # Create a tarball of the instrument raw data log files for inclusion into CSA.
    # tarball it now before the raw data gets deleted.
    from ion.utils.makeCSA import rawdata_patterns as files

    # inst diagnostic files are always in toplevel raw data dir:
    if 'thumbnail' in rawdatadir:
        rawdatadir = rawdatadir.replace('thumbnail', '')

    for afile in files:
        if os.path.exists(os.path.join(rawdatadir, afile)):
            make_zip('pgm_logs.zip', os.path.join(rawdatadir, afile), arcname=afile)
Пример #3
0
def get_pgm_log_files(rawdatadir):
    # Create a tarball of the instrument raw data log files for inclusion into CSA.
    # tarball it now before the raw data gets deleted.
    from ion.utils.makeCSA import rawdata_patterns as files

    # inst diagnostic files are always in toplevel raw data dir:
    if "thumbnail" in rawdatadir:
        rawdatadir = rawdatadir.replace("thumbnail", "")

    for afile in files:
        if os.path.exists(os.path.join(rawdatadir, afile)):
            make_zip("pgm_logs.zip",
                     os.path.join(rawdatadir, afile),
                     arcname=afile)
Пример #4
0
def get_pgm_log_files(rawdatadir):
    # Create a tarball of the pgm raw data log files for inclusion into CSA.
    # tarball it now before the raw data gets deleted.
    files = [
        "explog_final.txt",
        "explog.txt",
        "InitLog.txt",
        "RawInit.txt",
        "RawInit.jpg",
        "InitValsW3.txt",
        "InitValsW2.txt",
        "Controller",
        "debug",
    ]
    for afile in files:
        make_zip("pgm_logs.zip", os.path.join(rawdatadir, afile), arcname=afile)

    return
Пример #5
0
def get_pgm_log_files(rawdatadir):
    # Create a tarball of the pgm raw data log files for inclusion into CSA.
    # tarball it now before the raw data gets deleted.
    # inst diagnostic files are always in toplevel raw data dir:
    if "thumbnail" in rawdatadir:
        rawdatadir = rawdatadir.replace("thumbnail", "")
    files = [
        "explog_final.txt",
        "explog.txt",
        "InitLog.txt",
        "InitLog1.txt",
        "InitLog2.txt",
        "RawInit.txt",
        "RawInit.jpg",
        "InitValsW3.txt",
        "InitValsW2.txt",
        "Controller",
        "debug",
    ]
    for afile in files:
        if os.path.exists(os.path.join(rawdatadir, afile)):
            make_zip("pgm_logs.zip", os.path.join(rawdatadir, afile), arcname=afile)

    return
Пример #6
0
        ionstats_file = 'ionstats_alignment.json'

        try:
            stats = json.load(open(ionstats_file))
            l = stats['full']['max_read_length']
            graph_max_x = int(round(l + 49, -2))

            # Make alignment_rate_plot.png and base_error_plot.png
            ionstats_plots.alignment_rate_plot2(ionstats_file,
                                                'alignment_rate_plot.png',
                                                int(graph_max_x))
            ionstats_plots.base_error_plot(ionstats_file,
                                           'base_error_plot.png',
                                           int(graph_max_x))
        except:
            traceback.print_exc()

    if args.zip and len(args.files) > 1:
        # zip barcoded files
        zipname = args.zip
        printtime("Zip merged barcode files to %s" % zipname)
        for filename in args.files:
            if os.path.exists(filename):
                try:
                    make_zip(zipname, filename, arcname=filename)
                except:
                    print("ERROR: zip target: %s" % filename)
                    traceback.print_exc()

    printtime("DEBUG: CA job done.")
Пример #7
0
                'alignment_rate_plot.png', int(graph_max_x))
            print("Ionstats plot created successfully")            
        except:            
            print("ERROR: Failed to generate alignment rate plot")
      
        try:
            # Make base_error_plot.png
            base_error_plot.generate_base_error_plot(
                'alignStats_err.json',
                'base_error_plot.png',int(graph_max_x))            
        except:
            print("ERROR: Failed to generate base error plot")
            traceback.print_exc()        
        
         
    if args.zip and len(args.files) > 1: 
       # zip barcoded files
       zipname = args.zip
       print "Zip merged barcode files to %s" % zipname
       for filename in args.files:                      
         if os.path.exists(filename):
            try:
                make_zip(zipname, filename, arcname=filename)
            except:
                print("ERROR: zip target: %s" % filename)
                traceback.print_exc()
        
        
    
         
Пример #8
0
def runForBarcodes():
  global pluginParams, pluginResult, pluginReport
  # read barcode ids
  barcodes = []
  try:
    bcfileName = pluginParams['analysis_dir']+'/barcodeList.txt'
    with open(bcfileName) as bcfile:
      for line in bcfile:
        if line.startswith('barcode '):
          barcodes.append(line.split(',')[1])
  except:
    printerr("Reading barcode list file '%s'" % bcfileName)
    raise
  numGoodBams = 0
  numUnalBams = 0
  minFileSize = pluginParams['cmdOptions'].minbamsize
  (bcBamPath,bcBamRoot) = os.path.split(pluginParams['bamroot'])
  validBarcodes = []
  for barcode in barcodes:
    # use unmapped BAM if there else mapped BAM (unmapped may not be present on Proton)
    bcbam = os.path.join( bcBamPath, "%s_%s"%(barcode,bcBamRoot) )
    if not os.path.exists(bcbam):
      bcbam = os.path.join( pluginParams['analysis_dir'], "%s_rawlib.bam"%barcode )
      numUnalBams += 1
    if not os.path.exists(bcbam):
      bcbam = ": BAM file not found"
      numUnalBams -= 1
    elif os.stat(bcbam).st_size < minFileSize:
      bcbam = ": BAM file too small"
    else:
      numGoodBams += 1
      validBarcodes.append(barcode)

  printlog("Processing %d barcodes...\n" % numGoodBams)
  if numUnalBams > 0:
    printlog("Warning: %d barcodes will be processed using mapped BAM files. (Unmapped BAMs were not available.)\n" % numUnalBams)
  pluginReport['num_barcodes_processed'] = numGoodBams
  pluginReport['num_barcodes_failed'] = 0

  # iterate over all barcodes and process the valid ones
  skip_analysis = pluginParams['cmdOptions'].skip_analysis
  stop_on_error = pluginParams['cmdOptions'].stop_on_error
  create_scraper = pluginParams['cmdOptions'].scraper
  sample_names = pluginParams['sample_names']
  postout = False; # just for logfile prettiness
  sampleNamesFile = ("%s/sampleNames.txt" % pluginParams['results_dir'])
  sampleNamesFW = open(sampleNamesFile, 'w')
  for barcode in barcodes:
    sample = sample_names[barcode] if barcode in sample_names else ''
    sampleNamesFW.write("%s:%s\n" %(barcode, sample))
  sampleNamesFW.close()
  binDir = os.path.join(pluginParams['plugin_dir'], 'bin')
  outDir = pluginParams['results_dir']
  analysisDir = pluginParams['analysis_dir']
  global hotspotsFile
  hotspotsFile = pluginParams['hotspots_file']
  global targetsFile
  targetsFile = pluginParams['regions_file']
  printlog("hotspots file is %s " %hotspotsFile)
  pluginOutDir = os.path.join(analysisDir, 'plugin_out')
  global variantCallerName 
  if not filter(re.compile(r'variantCaller_out*').search, os.listdir(pluginOutDir)):
	printerr("Variant Caller plugin has to be run before launching the PGX Analysis plugin. Please run Torrent Variant Caller plugin")
	return
  variantCallerName = max(filter(re.compile(r'variantCaller_out*').search, os.listdir(pluginOutDir)))
  global coverageAnalysisName
  if not filter(re.compile(r'coverageAnalysis_out*').search, os.listdir(pluginOutDir)):
	printerr("Coverage Analysis plugin has to be run before launching the PGX Analysis plugin. Please run the Coverage Analysis plugin")	
	return
  coverageAnalysisName = max(filter(re.compile(r'coverageAnalysis_out*').search, os.listdir(pluginOutDir)))  
  printlog(variantCallerName)
  printlog(coverageAnalysisName)
  variantCallerDir = os.path.join(pluginOutDir, variantCallerName)
  printlog("variantcaller dir is %s" % variantCallerDir)
  coverageAnalysisDir = os.path.join(pluginOutDir, coverageAnalysisName)
  
  hotspotsFileVC = ""
  resultsJsonFile = os.path.join(variantCallerDir, "results.json")
  if not os.path.isfile(resultsJsonFile):
	printerr("VariantCaller results are not ready. Please wait for the variant Caller plugin to finish and then launch the PGx plugin")
        return

  covAnalysisResultsJsonFile = os.path.join(coverageAnalysisDir, "results.json")
  if not os.path.isfile(covAnalysisResultsJsonFile):
	printerr("Coverage Analysis results are not ready. Please wait for the Coverage Analysis plugin to finish and then launch the PGx plugin")
        return
  targetsFileVC = ""
  with open(resultsJsonFile) as fin:
	for line in fin:
		if "hotspots_bed" in line and ":" in line and "type" not in line :
			kvp = line.split(":")
			hotspotsFileVC = (os.path.basename(kvp[1].strip()))
			if "," in hotspotsFileVC:
				hotspotsFileVC = hotspotsFileVC[:-2]
			else:
				hotspotsFileVC = hotspotsFileVC[:-1]
			hotspotsFileVC = os.path.join(variantCallerDir, hotspotsFileVC)
		if "targets_bed" in line and ":" in line and "type" not in line :
			kvp = line.split(":")
			targetsFileVC = (os.path.basename(kvp[1].strip()))
			if "," in targetsFileVC:
				targetsFileVC = targetsFileVC[:-2]
			else:
				targetsFileVC = targetsFileVC[:-1]
				
			targetsFileVC = os.path.join(variantCallerDir, targetsFileVC)
  if not hotspotsFileVC:
	printerr("Cannot obtain the hotspots file used by the VariantCaller. Trying to obtain the hotspots file from plan")
  else:
	 hotspotsFile = hotspotsFileVC
  if not hotspotsFile:
	printerr("The plan is not set up with a hotspots file.")
	return
  
  if not targetsFileVC:
	printerr("Cannot obtain the Target Regions file used by the VariantCaller. Trying to obtain the regions file from plan")
  else:
	 targetsFile = targetsFileVC
 
  cmd = ("java -jar %s/PGX_Analysis.jar %s %s %s %s %s %s %s %s" % (binDir, hotspotsFile, outDir, bcfileName, analysisDir, variantCallerDir, coverageAnalysisDir, binDir, sampleNamesFile));
 
  printlog(cmd) 
  RunCommand(cmd);

  # parse out data in results text file to dict AND coverts spaces to underscores in keys to avoid Django issues
  statsfile = 'summary.txt'
  analysisData = parseToDict( os.path.join(outDir,statsfile), "\t" )
  global numPass, numUniformity, numAvgCov, totalSamples  
  totalSamples = numGoodBams
  numPass = numGoodBams
  numAvgCov = 0
  numUniformity = 0
  for keys,values in analysisData.items():
    printlog(keys)
    printlog(values)
  for file in os.listdir("%s/cnvCalls" %outDir):
	if file.endswith(".log"):
		cnvCallsDir = os.path.join(outDir,"cnvCalls")
		filein = os.path.join(cnvCallsDir, file)
    		printlog("filein is %s " % filein)
		with open(filein) as fin:
			sep = "="
      			for line in fin:
				if("valid Samples =" in line):
					kvp = line.split(sep);
					totalSamples = kvp[1].strip()
				elif("CNV Calling =" in line):
					kvp = line.split(sep);
					numPass = kvp[1].strip()
				elif("Average coverage" in line):
					kvp = line.split(sep);
					numAvgCov = kvp[1].strip()
				elif("Uniformate Rate" in line):
					kvp = line.split(sep);
					numUniformity = kvp[1].strip()
  zipfilename = "%s/cnvExports.zip" % outDir
  cnvExportsDir = "%s/cnvExports" % outDir 
  for file in os.listdir(cnvExportsDir):
	if file.endswith("_cn.txt"):
		filein = os.path.join(cnvExportsDir, file) 
		compress.make_zip(zipfilename, filein, arcname=os.path.basename(filein), use_sys_zip = False)
 
  vcfZipFilename = "%s/%s.vcf.zip" % (outDir, pluginParams['prefix'])
  mergedVcfsDir = "%s/merged_VCFs" % outDir
  for file in os.listdir(mergedVcfsDir):
	if file.endswith(".gz") or file.endswith(".tbi"):
		filein = os.path.join(mergedVcfsDir, file) 
		compress.make_zip(vcfZipFilename, filein, arcname=os.path.basename(filein), use_sys_zip = False)
   
  
  global barcodeData
 
  for barcode in validBarcodes:
  	barcode_entry = {}
  	sample = sample_names[barcode] if barcode in sample_names else ''
  	barcode_entry['name'] = barcode
	if barcode in analysisData:
		barcodeLine = analysisData[barcode]
		kvp = barcodeLine.split("\t")
        	#key = kvp[0].strip()
			
		if sample=='':	
			barcode_entry['sample'] = 'none'
		else:
			barcode_entry['sample'] = sample		
		if len(kvp) < 8 and kvp[2].strip() == 'null':
			barcode_entry['hotspots_variants_total'] = "none" 
			barcode_entry['novel_variants_total'] = "none"
			barcode_entry['exon9_cnv'] = kvp[3].strip()
			barcode_entry['gene_cnv'] = kvp[4].strip()
			barcode_entry['exon9_cnv_confidence'] = kvp[5].strip()
			barcode_entry['gene_cnv_confidence'] = kvp[6].strip()

		else:
			barcode_entry['hotspots_variants_total'] = "%d/%s" %(int(kvp[2].strip()) - int(kvp[4].strip()) - int(kvp[5].strip()), kvp[2].strip())
			barcode_entry['novel_variants_total'] = int(kvp[3].strip()) - int(kvp[2].strip())		
			barcode_entry['exon9_cnv'] = kvp[6].strip()
			barcode_entry['gene_cnv'] = kvp[7].strip()
			barcode_entry['exon9_cnv_confidence'] = kvp[8].strip()
			barcode_entry['gene_cnv_confidence'] = kvp[9].strip()
	
  		barcodeData.append(barcode_entry)

  updateBarcodeSummaryReport()

  if create_scraper:
    createScraperLinksFolder( pluginParams['results_dir'], pluginParams['prefix'] )
Пример #9
0
                    printtime(traceback.format_exc())



        prefix_list = [dataset['file_prefix'] for dataset in datasets_basecaller.get("datasets",[])]
        
        if len(prefix_list) > 1:
            zip_task_list = [
                ('bam',             env['ALIGNMENT_RESULTS']),
                ('bam.bai',         env['ALIGNMENT_RESULTS']),
                ('basecaller.bam',  env['BASECALLER_RESULTS']),]

            for extension,base_dir in zip_task_list:
                zipname = "%s/%s_%s.barcode.%s.zip" % (download_links, env['expName'], env['resultsName'], extension)
                for prefix in prefix_list:
                    try:
                        filename = "%s/%s_%s_%s.%s" % (download_links, prefix.rstrip('_rawlib'), env['expName'], env['resultsName'], extension)
                        src = os.path.join(base_dir, prefix+'.'+extension)
                        if os.path.exists(src):
                            os.symlink(os.path.relpath(src,os.path.dirname(filename)),filename)
                            make_zip(zipname, filename, arcname=filename, compressed=False)
                    except:
                        printtime("ERROR: target: %s" % filename)
                        traceback.print_exc()

        else:
            printtime("MergeTLScript: No barcode run")

    printtime("MergeTLScript exit")
    sys.exit(0)
Пример #10
0
            ]

            for extension, base_dir in zip_task_list:
                zipname = "%s/%s_%s.barcode.%s.zip" % (
                    download_links, env['expName'], env['resultsName'],
                    extension)
                for prefix in prefix_list:
                    try:
                        filename = "%s/%s_%s_%s.%s" % (
                            download_links, prefix.rstrip('_rawlib'),
                            env['expName'], env['resultsName'], extension)
                        src = os.path.join(base_dir, prefix + '.' + extension)
                        if os.path.exists(src):
                            os.symlink(
                                os.path.relpath(src,
                                                os.path.dirname(filename)),
                                filename)
                            make_zip(zipname,
                                     filename,
                                     arcname=filename,
                                     compressed=False)
                    except:
                        printtime("ERROR: target: %s" % filename)
                        traceback.print_exc()

        else:
            printtime("MergeTLScript: No barcode run")

    printtime("MergeTLScript exit")
    sys.exit(0)
Пример #11
0
        except:
            pass
        try:
            r = subprocess.call(["ln", "-s", "rawtf.bam", tfbam])
        except:
            pass

        ##################################################
        # Create zip of files
        ##################################################

        #sampled sff
        #make_zip(libsff.replace(".sff",".sampled.sff")+'.zip', libsff.replace(".sff",".sampled.sff"))

        #library sff
        make_zip(libsff + '.zip', libsff )

        #tf sff
        make_zip(tfsff + '.zip', tfsff)

        #fastq zip
        make_zip(fastqpath + '.zip', fastqpath)

        #sampled fastq
        #make_zip(fastqpath.replace(".fastq",".sampled.fastq")+'.zip', fastqpath.replace(".fastq",".sampled.fastq"))

        ########################################################
        # barcode processing                                   #
        # Zip up and move sff, fastq, bam, bai files           #
        # Move zip files to results directory                  #
        ########################################################
Пример #12
0
def plugin_main():
    
    global PLUGIN_DEV_SKIP_VARIANT_CALLING    
    global DIRNAME
    global TSP_URLPATH_PLUGIN_DIR
    global TSP_FILEPATH_PLUGIN_DIR
    global startplugin_json
    global output_files

    parser = OptionParser()
    parser.add_option('-d', '--install-dir', help='Directory containing plugin files', dest='install_dir')
    parser.add_option('-o', '--output-dir', help='Directory for results files', dest='output_dir')
    parser.add_option('-u', '--output-url', help='URL matching the output directory', dest='output_url')
    parser.add_option('-r', '--report-dir', help='Directory containing analysis report files', dest='report_dir')
    parser.add_option('-f', '--genome-fasta', help='Reference genome fasta file', dest='genome_fasta')
    parser.add_option('-s', '--skip-tvc', help='(debug) Skip variant calling and reuse existing results', dest='skip_tvc', action="store_true", default=False)
    (options, args) = parser.parse_args()


    DIRNAME                     = options.install_dir    #os.environ['DIRNAME']         # home directory for the plugin files
    TSP_FILEPATH_PLUGIN_DIR     = options.output_dir     #os.environ['TSP_FILEPATH_PLUGIN_DIR'] # target plugin results directory
    ANALYSIS_DIR                = options.report_dir     #os.environ['ANALYSIS_DIR'] # main report directory
    TSP_URLPATH_PLUGIN_DIR      = options.output_url
    PLUGIN_DEV_SKIP_VARIANT_CALLING = options.skip_tvc

    settings.configure(DEBUG=True, TEMPLATE_DEBUG=True, TEMPLATE_DIRS=((DIRNAME+'/templates'),))

    subprocess.call('rm -f %s/results.json' % TSP_FILEPATH_PLUGIN_DIR,shell=True)            

    printtime('')
    printtime('Variant Caller Plugin started')
    printtime('')

    try:
        json_file = open(os.path.join(TSP_FILEPATH_PLUGIN_DIR,'startplugin.json'), 'r')
        startplugin_json = json.load(json_file,parse_float=str)
        json_file.close()
    except:
        printtime('ERROR: Failed to load and parse startplugin.json')
        return 1


    # Uncomment to emulate autorun:
    #startplugin_json['pluginconfig'] = {}
        
    vc_options = options_for_manual_start(startplugin_json)
    #if not vc_options:
    #    vc_options = options_for_plan_autostart(startplugin_json)


    vc_options['run_name']      = startplugin_json['expmeta'].get('run_name','Current run')
    vc_options['genome_name']   = startplugin_json['runinfo'].get('library','')
    vc_options['plugin_name']   = startplugin_json['runinfo'].get('plugin_name','')
    vc_options['genome_fasta']  = options.genome_fasta   #os.environ['TSP_FILEPATH_GENOME_FASTA']
        
    if 'error' in vc_options:
        printtime(vc_options['error'])
        generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS),vc_options['error'], vc_options)
        return 1



    f = open(os.path.join(TSP_FILEPATH_PLUGIN_DIR,BASENAME_PARAMETERS_JSON),'w')
    json.dump(vc_options['parameters'],f,indent=4)
    f.close()

    add_output_file('parameters_json', BASENAME_PARAMETERS_JSON)

    TSP_FILEPATH_BARCODE_TXT    = ANALYSIS_DIR + '/barcodeList.txt'
    vc_options['has_barcodes'] = False
    if os.path.exists(TSP_FILEPATH_BARCODE_TXT):
        vc_options['has_barcodes'] = True

    # Call tvc -v to get the version string
    tvc_args = vc_options['parameters'].get('meta',{}).get('tvcargs','tvc')
    if tvc_args == 'tvc' and os.path.exists(DIRNAME + '/tvc'):   # try local binary first, then go to global one
        tvc_args = DIRNAME + '/tvc'
    vc_options['tvc_version'] = execute_output(tvc_args + ' -v').splitlines()[0]
    if vc_options['tvc_version'].endswith('- Torrent Variant Caller'):
        vc_options['tvc_version'] = vc_options['tvc_version'][:-24].strip()
    
    
    # Parameters from plugin customization
    printtime('Variant Caller plugin run options:')
    printtime('  Plugin name                : ' + vc_options['plugin_name'])
    printtime('  Plugin start mode          : ' + vc_options['start_mode'])
    printtime('  Variant Caller version     : ' + vc_options['tvc_version'])
    printtime('  Run is barcoded            : ' + str(vc_options['has_barcodes']))
    printtime('  Genome                     : ' + vc_options['genome_name'])
    printtime('  Library Type               : ' + vc_options['library_type'])
    printtime('  Target Regions             : ' + (vc_options['targets_name'] if vc_options['has_targets'] else 'Not using'))
    printtime('  Hotspots                   : ' + (vc_options['hotspots_name'] if vc_options['has_hotspots'] else 'Not using'))
    if 'original_parameters' in vc_options:
        printtime('  Requested Parameters       : ' + vc_options["original_config_line1"])
        printtime('                               ' + vc_options["original_config_line2"])
        printtime('  Auto-Updated Parameters    : ' + vc_options["config_line1"])
        printtime('                               ' + vc_options["config_line2"])
    else:
        printtime('  Used Parameters            : ' + vc_options['config_line1'])
        printtime('                               ' + vc_options["config_line2"])
   
    printtime('  Trim Reads                 : ' + str(vc_options['trim_reads']))
    
    printtime('')
    printtime('Used files:')
    printtime('  Reference Genome           : ' + vc_options['genome_fasta'])
    printtime('  Parameters file            : ' + os.path.join(TSP_FILEPATH_PLUGIN_DIR,BASENAME_PARAMETERS_JSON))
    if 'parameters_source' in vc_options:
        printtime('  Parameters source file     : ' + vc_options['parameters_source'])
    
    if vc_options['has_targets']:
        printtime('  Target unmerged BED        : ' + vc_options['targets_bed_unmerged'])
        printtime('  Target merged BED          : ' + vc_options['targets_bed_merged'])
    if vc_options['has_hotspots']:
        printtime('  Hotspots unmerged BED      : ' + vc_options['hotspots_bed_unmerged'])
        printtime('  Hotspots merged BED        : ' + vc_options['hotspots_bed_merged'])
    printtime('')




    PLUGIN_HS_ALIGN_DIR = TSP_FILEPATH_PLUGIN_DIR + '/hs_align'
    
    # Remove previous results to avoid displaying old before ready
    subprocess.call('rm -f %s/%s' % (TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS),shell=True)
    subprocess.call('rm -f %s' % (TSP_FILEPATH_PLUGIN_DIR + '/results.json'),shell=True)
    subprocess.call('rm -f %s/*.bed' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -rf %s/*.bam*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -rf %s' % (PLUGIN_HS_ALIGN_DIR),shell=True)
    subprocess.call('rm -f %s/hotspot*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -f %s/variant*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -f %s/allele*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -f %s/*.xls' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -f %s/*.log' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -f %s/*.done' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('rm -rf %s/lifegrid' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
    
    if not PLUGIN_DEV_SKIP_VARIANT_CALLING:
        subprocess.call('rm -f %s/SNP*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
        subprocess.call('rm -f %s/indel*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)
        subprocess.call('rm -f %s/TSVC*' % (TSP_FILEPATH_PLUGIN_DIR),shell=True)


    printtime('Results folder initialized')
    
    # Get local copy of BED files (may be deleted from system later)
    if vc_options['has_targets']:
        if not os.path.exists( vc_options['targets_bed_unmerged']):
            printtime('ERROR: Cannot locate target regions file: ' +  vc_options['targets_bed_unmerged'])
            return 1
        if not os.path.exists(vc_options['targets_bed_merged']):
            printtime('ERROR: Cannot locate merged target regions file: ' + vc_options['targets_bed_merged'])
            return 1
        subprocess.call('cp -f %s %s/%s' % (vc_options['targets_bed_unmerged'],TSP_FILEPATH_PLUGIN_DIR,os.path.basename(vc_options['targets_bed_unmerged'])),shell=True)
        
        add_output_file('target_regions_bed', os.path.basename(vc_options['targets_bed_unmerged']))

    
    if vc_options['has_hotspots']:
        if not os.path.exists(vc_options['hotspots_bed_unmerged']):
            printtime('ERROR: Cannot locate hotspots file: ' +  vc_options['hotspots_bed_unmerged'])
            return 1
        if not os.path.exists(vc_options['hotspots_bed_merged']):
            printtime('ERROR: Cannot locate merged hotspots file: ' + vc_options['hotspots_bed_merged'])
            return 1
        
        vc_options['hotspots_bed_unmerged_local'] = os.path.join(TSP_FILEPATH_PLUGIN_DIR,os.path.basename(vc_options['hotspots_bed_unmerged']))
        vc_options['hotspots_bed_unmerged_leftalign'] = vc_options['hotspots_bed_unmerged_local'][:-4] + '.left.bed'
        
        subprocess.call('cp -f %s %s' % (vc_options['hotspots_bed_unmerged'],vc_options['hotspots_bed_unmerged_local']),shell=True)

        prepare_hotspots_command  = 'tvcutils prepare_hotspots'
        prepare_hotspots_command += '  --input-bed %s' % vc_options['hotspots_bed_unmerged']
        prepare_hotspots_command += '  --reference %s' % vc_options['genome_fasta']
        prepare_hotspots_command += '  --left-alignment on' 
        prepare_hotspots_command += '  --allow-block-substitutions on' 
        prepare_hotspots_command += '  --output-bed %s' % vc_options['hotspots_bed_unmerged_leftalign']
        prepare_hotspots_command += '  --output-vcf %s/hotspot.vcf' % TSP_FILEPATH_PLUGIN_DIR
        run_command(prepare_hotspots_command, 'Generate filtered, left-aligned, and merged hotspot VCF file')
        
        hotspot_file_empty = True
        try:
            f = open('%s/hotspot.vcf' % TSP_FILEPATH_PLUGIN_DIR, 'r')
            for line in f:
                if not line or line.startswith('#'):
                    continue
                hotspot_file_empty = False
        except:
            pass
        
        if hotspot_file_empty:
            printtime('Filtered hotspot file has no hotspot entries. Disabling hotspots')
            vc_options['has_hotspots'] = False
        else:
            #run_command('bgzip -c %s/hotspot.vcf > %s/hotspot.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR,TSP_FILEPATH_PLUGIN_DIR), 'Generate compressed hotspot vcf')
            #run_command('tabix -p vcf %s/hotspot.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR), 'Generate index for compressed hotspot vcf')
            vc_options['hotspots_vcf'] = TSP_FILEPATH_PLUGIN_DIR + '/hotspot.vcf'
            add_output_file('hotspots_bed', os.path.basename(vc_options['hotspots_bed_unmerged']))
            

    
    # Make links to js/css used for barcodes table and empty results page
    subprocess.call('ln -sf "%s/js" "%s"' % (DIRNAME,TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('ln -sf "%s/css" "%s"' % (DIRNAME,TSP_FILEPATH_PLUGIN_DIR),shell=True)
    subprocess.call('ln -sf %s/scripts/*.php3 "%s"' % (DIRNAME,TSP_FILEPATH_PLUGIN_DIR),shell=True)


    results_json = {
        'Aligned Reads'     : vc_options['run_name'],
        'Library Type'      : vc_options['library_type'],
        'Configuration'     : vc_options['parameters']['meta']['configuration'],
        'Target Regions'    : (vc_options['targets_name'] if vc_options['has_targets'] else 'Not using'),
        'Target Loci'       : (vc_options['hotspots_name'] if vc_options['has_hotspots'] else 'Not using'),
        'Trim Reads'        : vc_options['trim_reads'],
        'barcoded'          : 'false',
        'files'             : []
    }
    if vc_options['has_targets']:
        results_json['targets_bed'] = vc_options['targets_bed_unmerged']
    if vc_options['has_hotspots']:
        results_json['hotspots_bed'] = vc_options['hotspots_bed_unmerged']


    if vc_options['has_barcodes']:      # Run for barcodes or single page

        barcode_samples_string = startplugin_json.get('plan',{}).get('barcodedSamples',"")
        barcode_sample_info = {}
        if barcode_samples_string:
            barcode_samples_json = json.loads(barcode_samples_string)
            for k,v in barcode_samples_json.iteritems():
                barcode_sample_info.update(v.get('barcodeSampleInfo',{}))

        # Load barcode list    
        barcode_data = []    
        bc_list_file = open(TSP_FILEPATH_BARCODE_TXT,'r')
        for line in bc_list_file:
            if not line.startswith('barcode '):
                continue
    
            barcode_entry = {}
            barcode_entry['name'] = line.split(',')[1]
            barcode_entry['bam'] = os.path.join(ANALYSIS_DIR, barcode_entry['name'] + '_rawlib.bam')
            barcode_entry['status'] = 'queued'

            if not PLUGIN_DEV_SKIP_VARIANT_CALLING:
                subprocess.call('rm -rf %s/%s' % (TSP_FILEPATH_PLUGIN_DIR,barcode_entry['name']),shell=True)
            
            if not os.path.exists(barcode_entry['bam']):
                continue

            if barcode_entry['name'] in barcode_sample_info:
                barcode_nuc_type = barcode_sample_info[barcode_entry['name']].get('nucleotideType','')
                if not barcode_nuc_type:
                    barcode_nuc_type = 'DNA'
                barcode_reference = barcode_sample_info[barcode_entry['name']].get('reference',vc_options['genome_name'])
                if barcode_nuc_type != 'DNA':
                    printtime('Skipping barcode ' + barcode_entry['name'] + ' : Unsupported nuc type ' + barcode_nuc_type)
                    continue
                if barcode_reference != vc_options['genome_name']:
                    printtime('Skipping barcode ' + barcode_entry['name'] + ' : Barcode reference ' + barcode_reference + 
                              'different from run reference ' + vc_options['genome_name'])
                    continue
                

            
            # Size enough to process? TODO - just get from datasets_basecaller.json
            if os.stat(barcode_entry['bam']).st_size < BCFILE_MIN_SIZE:
                barcode_entry['status'] = 'insufficient_reads'
                
            elif is_bam_invalid(barcode_entry['bam']):
                barcode_entry['status'] = 'invalid_bam'
                    
            barcode_data.append(barcode_entry)
        
        bc_list_file.close()
        # End load barcode list
        
        printtime('')
        printtime('Processing %d barcodes...' % len(barcode_data))
    
        # Start json file
        results_json['barcoded'] = 'true'
        results_json['barcodes'] = {}
        
        all_barcodes_successful = True
        
        for barcode_idx in range(len(barcode_data)):
            if barcode_data[barcode_idx]['status'] != 'queued':
                printtime('Skipping barcode ' + barcode_data[barcode_idx]['name'])
                continue
    
            barcode_data[barcode_idx]['status'] = 'in_progress'
            generate_barcode_links_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), barcode_data, vc_options)
    
            BARCODE_DIR = TSP_FILEPATH_PLUGIN_DIR + '/' + barcode_data[barcode_idx]['name']
            if not os.path.exists(BARCODE_DIR):
                os.makedirs(BARCODE_DIR)
            
            # perform coverage anaysis and write content
            printtime('')
            printtime('Processing barcode ' + barcode_data[barcode_idx]['name'])
    
            try:
                summary = call_variants(BARCODE_DIR,barcode_data[barcode_idx]['bam'],vc_options,barcode_data[barcode_idx]['name'])
                
                results_json['barcodes'][barcode_data[barcode_idx]['name']] = {}
                results_json['barcodes'][barcode_data[barcode_idx]['name']]['variants'] = summary.get('variants_total',{})
                results_json['barcodes'][barcode_data[barcode_idx]['name']]['hotspots'] = summary.get('hotspots_total',{})
                
                barcode_data[barcode_idx]['summary'] = summary
                barcode_data[barcode_idx]['status'] = 'completed'
            
            except:
                traceback.print_exc()
                all_barcodes_successful = False
                barcode_data[barcode_idx]['status'] = 'error'

# Replaced with python zip library because of failures due to too-long argument lists.
#
#        #Zip all vcf.gz and vcf.gz.tbi files
#        zip_vcf_command =  'echo "'
#        zip_vcf_command +=  '  '.join(('%s/%s/TSVC_variants_%s.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) 
#                                      for barcode in barcode_data if barcode['status'] == 'completed')
#        zip_vcf_command +=  '  '
#        zip_vcf_command +=  '  '.join(('%s/%s/TSVC_variants_%s.vcf.gz.tbi' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) 
#                                      for barcode in barcode_data if barcode['status'] == 'completed')
#        zip_vcf_command += '" | xargs  zip  --junk-paths  %s/%s.vcf.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name'])
#        run_command(zip_vcf_command, 'Store per-barcode vcf files in a single zip file')
        
#        
#        #Zip all variants_*.xls files.
#        zip_xls_command =  'echo "'
#        zip_xls_command +=  '  '.join(('%s/%s/alleles_%s.xls' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) 
#                                      for barcode in barcode_data if barcode['status'] == 'completed')
#        zip_xls_command +=  '" | xargs  zip  --junk-paths  %s/%s.xls.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name'])
#        run_command(zip_xls_command, 'Store per-barcode xls files in a single zip file')
        printtime(' ')
        printtime('Task    : ' + 'Store per-barcode vcf files in a single zip file')
        zipfilename = '%s/%s.vcf.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name'])
        for myfile in [('%s/%s/TSVC_variants_%s.vcf.gz' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) for barcode in barcode_data if barcode['status'] == 'completed']:
            compress.make_zip(zipfilename, myfile, arcname=os.path.basename(myfile), use_sys_zip = False)
        for myfile in [('%s/%s/TSVC_variants_%s.vcf.gz.tbi' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) for barcode in barcode_data if barcode['status'] == 'completed']:
            compress.make_zip(zipfilename, myfile, arcname=os.path.basename(myfile), use_sys_zip = False)
        printtime(' ')
        printtime(' ')
        printtime('Task    : ' + 'Store per-barcode xls files in a single zip file')
        zipfilename = '%s/%s.xls.zip' % (TSP_FILEPATH_PLUGIN_DIR,vc_options['run_name'])
        for myfile in [('%s/%s/alleles_%s.xls' % (TSP_FILEPATH_PLUGIN_DIR,barcode['name'],barcode['name'])) for barcode in barcode_data if barcode['status'] == 'completed']:
            compress.make_zip(zipfilename, myfile, arcname=os.path.basename(myfile), use_sys_zip = False)
        printtime(' ')
            
        
        generate_barcode_links_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), barcode_data, vc_options)
        generate_barcode_links_block(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_BLOCK), barcode_data, vc_options)

        if not all_barcodes_successful:
            return 1
            

    
    else:   # Non-barcoded run

        generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), 'Variant calling still in progress', vc_options, autorefresh=True)

        fullpath_input_bam = os.path.join(ANALYSIS_DIR, 'rawlib.bam')

        if is_bam_invalid(fullpath_input_bam):
            generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), 'BAM file format validation failed. Regenerate BAM with latest TS.', vc_options)
            return 1

        try:
            summary = call_variants(TSP_FILEPATH_PLUGIN_DIR,fullpath_input_bam,vc_options)

            results_json['variants'] = summary.get('variants_total',{})
            results_json['hotspots'] = summary.get('hotspots_total',{})
            
        except:
            traceback.print_exc()
            generate_incomplete_report_page(os.path.join(TSP_FILEPATH_PLUGIN_DIR,HTML_RESULTS), 'An error occurred - check Log File for details', vc_options)
            return 1
        
    results_json['files'] = output_files
    out = open(TSP_FILEPATH_PLUGIN_DIR + '/results.json','w')
    json.dump(results_json,out,indent=4)
    out.close()

    printtime('')
    printtime('Variant Caller Plugin complete')
    printtime('')
    
    return 0
Пример #13
0
            if not os.path.exists(dst):
                try:
                    os.symlink(src,dst)
                except:
                    printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))
                    printtime(traceback.format_exc())

        ##################################################
        # Create zip of files
        ##################################################

        #sampled sff
        #make_zip(libsff.replace(".sff",".sampled.sff")+'.zip', libsff.replace(".sff",".sampled.sff"))

        #library sff
        make_zip(libsff + '.zip', libsff, arcname=libsff )

        #tf sff
        make_zip(tfsff + '.zip', tfsff, arcname=tfsff)

        #fastq zip
        make_zip(fastqpath + '.zip', fastqpath, arcname=fastqpath)

        #sampled fastq
        #make_zip(fastqpath.replace(".fastq",".sampled.fastq")+'.zip', fastqpath.replace(".fastq",".sampled.fastq"))

        ########################################################
        # barcode processing                                   #
        # Zip up and move sff, fastq, bam, bai files           #
        # Move zip files to results directory                  #
        ########################################################
Пример #14
0
            start_time,
            env['ALIGNMENT_RESULTS'],
            bidirectional,
            sam_parsed
            )

        try:
            quality = os.path.join('.',"quality.summary")
            shutil.copy(quality, "../"+status+".quality.summary")
        except:
            printtime("ERROR: %s doesn't exist" % quality)
            pass
        shutil.copy("alignTable.txt", "../"+status+".alignTable.txt")
        shutil.copy("alignment.summary", "../"+status+".alignment.summary")

        make_zip(sff_path+'.zip',sff_path,arcname=sff_path)
        make_zip(fastq_path+'.zip',fastq_path,arcname=fastq_path)

    os.chdir(top_dir)

    # plugin framework expects the sff file in the env['BASECALLER_RESULTS'] subdirectory
    union_sff = os.path.join(env['BASECALLER_RESULTS'],basename+".sff")
    union_fastq = os.path.join(env['BASECALLER_RESULTS'],basename+".fastq")

    # merge sff files
    try:
        com = "SFFMerge"
        com += " -r"
        com += " -o %s" % union_sff
        for status in ["Paired_Fwd","Paired_Rev","Singleton_Fwd","Singleton_Rev","corrected"]:
            sff = basename+"_"+status+".sff"
Пример #15
0
            site_name,
            flows,
            notes,
            barcodeId,
            aligner_opts_extra,
            start_time,
            env['ALIGNMENT_RESULTS'],
            bidirectional,
            sam_parsed
            )

        shutil.copy("alignment.summary", "../"+status+".alignment.summary")
        shutil.copy("quality.summary", "../"+status+".quality.summary")
        shutil.copy("alignTable.txt", "../"+status+".alignTable.txt")
        
        make_zip(sff_path+'.zip',sff_path)
        make_zip(fastq_path+'.zip',fastq_path)

    os.chdir(top_dir)


    union_sff = basename+".sff"
    union_fastq = basename+".fastq"
    # merge sff files
    try:
        com = "SFFMerge"
        com += " -r"
        com += " -o %s" % union_sff
        for status in ["Paired_Fwd","Paired_Rev","Singleton_Fwd","Singleton_Rev","corrected"]:
            sff = basename+"_"+status+".sff"
            if os.path.exists(sff):
Пример #16
0
def runForBarcodes():
    global pluginParams, pluginResult, pluginReport
    # read barcode ids
    barcodes = []
    try:
        bcfileName = pluginParams['analysis_dir'] + '/barcodeList.txt'
        with open(bcfileName) as bcfile:
            for line in bcfile:
                if line.startswith('barcode '):
                    barcodes.append(line.split(',')[1])
    except:
        printerr("Reading barcode list file '%s'" % bcfileName)
        raise
    numGoodBams = 0
    numUnalBams = 0
    minFileSize = pluginParams['cmdOptions'].minbamsize
    (bcBamPath, bcBamRoot) = os.path.split(pluginParams['bamroot'])
    validBarcodes = []
    for barcode in barcodes:
        # use unmapped BAM if there else mapped BAM (unmapped may not be present on Proton)
        bcbam = os.path.join(bcBamPath, "%s_%s" % (barcode, bcBamRoot))
        if not os.path.exists(bcbam):
            bcbam = os.path.join(pluginParams['analysis_dir'],
                                 "%s_rawlib.bam" % barcode)
            numUnalBams += 1
        if not os.path.exists(bcbam):
            bcbam = ": BAM file not found"
            numUnalBams -= 1
        elif os.stat(bcbam).st_size < minFileSize:
            bcbam = ": BAM file too small"
        else:
            numGoodBams += 1
            validBarcodes.append(barcode)

    printlog("Processing %d barcodes...\n" % numGoodBams)
    if numUnalBams > 0:
        printlog(
            "Warning: %d barcodes will be processed using mapped BAM files. (Unmapped BAMs were not available.)\n"
            % numUnalBams)
    pluginReport['num_barcodes_processed'] = numGoodBams
    pluginReport['num_barcodes_failed'] = 0

    # iterate over all barcodes and process the valid ones
    skip_analysis = pluginParams['cmdOptions'].skip_analysis
    stop_on_error = pluginParams['cmdOptions'].stop_on_error
    create_scraper = pluginParams['cmdOptions'].scraper
    sample_names = pluginParams['sample_names']
    postout = False
    # just for logfile prettiness
    sampleNamesFile = ("%s/sampleNames.txt" % pluginParams['results_dir'])
    sampleNamesFW = open(sampleNamesFile, 'w')
    for barcode in barcodes:
        sample = sample_names[barcode] if barcode in sample_names else ''
        sampleNamesFW.write("%s:%s\n" % (barcode, sample))
    sampleNamesFW.close()
    binDir = os.path.join(pluginParams['plugin_dir'], 'bin')
    outDir = pluginParams['results_dir']
    analysisDir = pluginParams['analysis_dir']
    global hotspotsFile
    hotspotsFile = pluginParams['hotspots_file']
    global targetsFile
    targetsFile = pluginParams['regions_file']
    printlog("hotspots file is %s " % hotspotsFile)
    pluginOutDir = os.path.join(analysisDir, 'plugin_out')
    global variantCallerName
    if not filter(
            re.compile(r'variantCaller_out*').search,
            os.listdir(pluginOutDir)):
        printerr(
            "Variant Caller plugin has to be run before launching the PGX Analysis plugin. Please run Torrent Variant Caller plugin"
        )
        return
    variantCallerName = max(
        filter(
            re.compile(r'variantCaller_out*').search,
            os.listdir(pluginOutDir)))
    global coverageAnalysisName
    if not filter(
            re.compile(r'coverageAnalysis_out*').search,
            os.listdir(pluginOutDir)):
        printerr(
            "Coverage Analysis plugin has to be run before launching the PGX Analysis plugin. Please run the Coverage Analysis plugin"
        )
        return
    coverageAnalysisName = max(
        filter(
            re.compile(r'coverageAnalysis_out*').search,
            os.listdir(pluginOutDir)))
    printlog(variantCallerName)
    printlog(coverageAnalysisName)
    variantCallerDir = os.path.join(pluginOutDir, variantCallerName)
    printlog("variantcaller dir is %s" % variantCallerDir)
    coverageAnalysisDir = os.path.join(pluginOutDir, coverageAnalysisName)

    hotspotsFileVC = ""
    resultsJsonFile = os.path.join(variantCallerDir, "results.json")
    if not os.path.isfile(resultsJsonFile):
        printerr(
            "VariantCaller results are not ready. Please wait for the variant Caller plugin to finish and then launch the PGx plugin"
        )
        return

    covAnalysisResultsJsonFile = os.path.join(coverageAnalysisDir,
                                              "results.json")
    if not os.path.isfile(covAnalysisResultsJsonFile):
        printerr(
            "Coverage Analysis results are not ready. Please wait for the Coverage Analysis plugin to finish and then launch the PGx plugin"
        )
        return
    targetsFileVC = ""
    with open(resultsJsonFile) as fin:
        for line in fin:
            if "hotspots_bed" in line and ":" in line and "type" not in line:
                kvp = line.split(":")
                hotspotsFileVC = (os.path.basename(kvp[1].strip()))
                if "," in hotspotsFileVC:
                    hotspotsFileVC = hotspotsFileVC[:-2]
                else:
                    hotspotsFileVC = hotspotsFileVC[:-1]
                hotspotsFileVC = os.path.join(variantCallerDir, hotspotsFileVC)
            if "targets_bed" in line and ":" in line and "type" not in line:
                kvp = line.split(":")
                targetsFileVC = (os.path.basename(kvp[1].strip()))
                if "," in targetsFileVC:
                    targetsFileVC = targetsFileVC[:-2]
                else:
                    targetsFileVC = targetsFileVC[:-1]

                targetsFileVC = os.path.join(variantCallerDir, targetsFileVC)
    if not hotspotsFileVC:
        printerr(
            "Cannot obtain the hotspots file used by the VariantCaller. Trying to obtain the hotspots file from plan"
        )
    else:
        hotspotsFile = hotspotsFileVC
    if not hotspotsFile:
        printerr("The plan is not set up with a hotspots file.")
        return

    if not targetsFileVC:
        printerr(
            "Cannot obtain the Target Regions file used by the VariantCaller. Trying to obtain the regions file from plan"
        )
    else:
        targetsFile = targetsFileVC

    cmd = ("java -jar %s/PGX_Analysis.jar %s %s %s %s %s %s %s %s" %
           (binDir, hotspotsFile, outDir, bcfileName, analysisDir,
            variantCallerDir, coverageAnalysisDir, binDir, sampleNamesFile))

    printlog(cmd)
    RunCommand(cmd)

    # parse out data in results text file to dict AND coverts spaces to underscores in keys to avoid Django issues
    statsfile = 'summary.txt'
    analysisData = parseToDict(os.path.join(outDir, statsfile), "\t")
    global numPass, numUniformity, numAvgCov, totalSamples
    totalSamples = numGoodBams
    numPass = numGoodBams
    numAvgCov = 0
    numUniformity = 0
    for keys, values in analysisData.items():
        printlog(keys)
        printlog(values)
    for file in os.listdir("%s/cnvCalls" % outDir):
        if file.endswith(".log"):
            cnvCallsDir = os.path.join(outDir, "cnvCalls")
            filein = os.path.join(cnvCallsDir, file)
            printlog("filein is %s " % filein)
            with open(filein) as fin:
                sep = "="
                for line in fin:
                    if ("valid Samples =" in line):
                        kvp = line.split(sep)
                        totalSamples = kvp[1].strip()
                    elif ("CNV Calling =" in line):
                        kvp = line.split(sep)
                        numPass = kvp[1].strip()
                    elif ("Average coverage" in line):
                        kvp = line.split(sep)
                        numAvgCov = kvp[1].strip()
                    elif ("Uniformate Rate" in line):
                        kvp = line.split(sep)
                        numUniformity = kvp[1].strip()
    zipfilename = "%s/cnvExports.zip" % outDir
    cnvExportsDir = "%s/cnvExports" % outDir
    for file in os.listdir(cnvExportsDir):
        if file.endswith("_cn.txt"):
            filein = os.path.join(cnvExportsDir, file)
            compress.make_zip(zipfilename,
                              filein,
                              arcname=os.path.basename(filein),
                              use_sys_zip=False)

    vcfZipFilename = "%s/%s.vcf.zip" % (outDir, pluginParams['prefix'])
    mergedVcfsDir = "%s/merged_VCFs" % outDir
    for file in os.listdir(mergedVcfsDir):
        if file.endswith(".gz") or file.endswith(".tbi"):
            filein = os.path.join(mergedVcfsDir, file)
            compress.make_zip(vcfZipFilename,
                              filein,
                              arcname=os.path.basename(filein),
                              use_sys_zip=False)

    global barcodeData

    for barcode in validBarcodes:
        barcode_entry = {}
        sample = sample_names[barcode] if barcode in sample_names else ''
        barcode_entry['name'] = barcode
        if barcode in analysisData:
            barcodeLine = analysisData[barcode]
            kvp = barcodeLine.split("\t")
            #key = kvp[0].strip()

            if sample == '':
                barcode_entry['sample'] = 'none'
            else:
                barcode_entry['sample'] = sample
            if len(kvp) < 8 and kvp[2].strip() == 'null':
                barcode_entry['hotspots_variants_total'] = "none"
                barcode_entry['novel_variants_total'] = "none"
                barcode_entry['exon9_cnv'] = kvp[3].strip()
                barcode_entry['gene_cnv'] = kvp[4].strip()
                barcode_entry['exon9_cnv_confidence'] = kvp[5].strip()
                barcode_entry['gene_cnv_confidence'] = kvp[6].strip()

            else:
                barcode_entry['hotspots_variants_total'] = "%d/%s" % (
                    int(kvp[2].strip()) - int(kvp[4].strip()) -
                    int(kvp[5].strip()), kvp[2].strip())
                barcode_entry['novel_variants_total'] = int(
                    kvp[3].strip()) - int(kvp[2].strip())
                barcode_entry['exon9_cnv'] = kvp[6].strip()
                barcode_entry['gene_cnv'] = kvp[7].strip()
                barcode_entry['exon9_cnv_confidence'] = kvp[8].strip()
                barcode_entry['gene_cnv_confidence'] = kvp[9].strip()

            barcodeData.append(barcode_entry)

    updateBarcodeSummaryReport()

    if create_scraper:
        createScraperLinksFolder(pluginParams['results_dir'],
                                 pluginParams['prefix'])