示例#1
0
    def QC_tumor_normal(self):
        # Separate the runs into tumor and normal lists
        normal_runs, tumor_runs = self.getTumor_Normal()

        if self.sample_json['analysis']['settings'][
                'type'] == 'all_tumor_normal':
            # Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed..
            #if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged':
            # if the user specified the '--pass_fail' option, then run this part still
            if self.sample_json[
                    'sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all:
                # QC the normal or tumor runs with each other
                self.QC_runs(normal_runs, 'normal_')
                self.QC_runs(tumor_runs, 'tumor_')
                # now QC the tumor and normal runs together.
                self.QC_normal_tumor_runs(normal_runs, tumor_runs)

            # make the merger
            merger = Merger(self.sample_json, self.options.recalc_3x3_tables)
            # Check to see if the normal runs are ready to be merged.
            self.sample_json, merge_normal = merger.check_merge(
                normal_runs, 'Normal/', 'normal_')
            if merge_normal == True:
                # merge the normal and/or tumor runs. Will only merge the passing runs with each other.
                self.sample_json = merger.merge_runs('normal', 'Normal_',
                                                     'normal_')

            # Check to see if the tumor runs are ready to be merged.
            self.sample_json, merge_tumor = merger.check_merge(
                tumor_runs, 'Tumor/', 'tumor_')
            if merge_tumor == True:
                self.sample_json = merger.merge_runs('tumor', 'Tumor_',
                                                     'tumor_')

            # If any runs were merged, QC them. If there are only 1 normal and tumor run, they won't be QCd again.
            #if normal_merge_dir != '' or tumor_merge_dir != '' or (len(normal_passing_bams) == 1 and len(tumor_passing_bams) == 1):
            # now QC the tumor and normal merged bams together if both normal and tumor runs are ready.
            # To only QC all for the actual merged runs (PNET), change the 'final' part to 'merged'.
            # The 'final_normal_json' and 'final_tumor_json' flags are set by merger.py in the function check_merge, line 157
            #if (merge_normal or merge_tumor) and ('merged_normal_json' in self.sample_json and 'merged_tumor_json' in self.sample_json):
            if 'final_normal_json' in self.sample_json and 'final_tumor_json' in self.sample_json:
                self.sample_json, qc_json = self.qc_run.QC_2Runs(
                    self.sample_json, self.sample_json['final_normal_json'],
                    self.sample_json['final_tumor_json'], 'normal_', 'tumor_',
                    '_merged')
                self.sample_json, merged_perc_avail_bases = self.qc_run.update_3x3_runs_status(
                    self.sample_json, self.sample_json['final_normal_json'],
                    self.sample_json['final_tumor_json'], qc_json)
                # update the merged run status
                merger.update_merged_run_status(
                    self.sample_json['final_normal_json'],
                    merged_perc_avail_bases)
                merger.update_merged_run_status(
                    self.sample_json['final_tumor_json'],
                    merged_perc_avail_bases)

                # cleanup the individual run bam files
                if merged_perc_avail_bases > .9:
                    final_qc_dir = "%s/all%svs%s" % (
                        self.sample_json['qc_folder'],
                        json.load(open(self.sample_json['final_normal_json']))
                        ['run_name'],
                        json.load(open(
                            self.sample_json['final_tumor_json']))['run_name'])
                    # annotate the final somatic variants
                    command = "bash %s/Somatic_Variants/somatic_variants.sh %s %s %s" % (
                        self.sample_json['analysis']['software_directory'],
                        final_qc_dir, self.sample_json['sample_name'],
                        self.sample_json['analysis']['software_directory'])
                    if runCommandLine(command) != 0:
                        sys.stderr.write("ERROR: somatic annotation failed!\n")

                    # Cleanup the PTRIM.bam and chr bam files after all of the QC is done.
                    # are there any other files to clean up?
                    self.cleanup_sample.cleanup_runs(
                        self.sample_json['runs'],
                        self.sample_json['analysis']['settings']['cleanup'],
                        self.no_errors)
                    #self.cleanup_sample.delete_runs(runs, self.sample_json['analysis']['settings']['cleanup'], self.no_errors)

                    # Cleanup after the merging QC is done.
                    self.cleanup_sample.cleanup_runs([
                        self.sample_json['final_normal_json'],
                        self.sample_json['final_tumor_json']
                    ], self.sample_json['analysis']['settings']['cleanup'],
                                                     self.no_errors)

                    # Set the sample_status
                    self.sample_json['sample_status'] = 'merged_pass'
                else:
                    self.sample_json[
                        'sample_status'] = 'awaiting_more_sequencing'
示例#2
0
    def QC_germline(self):
        # Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed..
        #if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged':
        # if the user specified the '--pass_fail' option, then run this part still
        if self.sample_json[
                'sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all:
            # QC the normal runs with each other
            self.QC_runs(self.sample_json['runs'])

        # what if there is only one run that passes all of the metrics? It should be marked as the 'final_json' and have the 'pass_fail_merged' flag marked as pass.
        # make the merger
        merger = Merger(self.sample_json, self.options.recalc_3x3_tables)
        # Check to see if the normal runs are ready to be merged.
        self.sample_json, merge = merger.check_merge(self.sample_json['runs'])
        if merge != True:
            if 'final_json' in self.sample_json:
                # update the final run status
                merger.update_merged_run_status(self.sample_json['final_json'])
        elif merge == True:
            # merge the normal and/or tumor runs. Will only merge the passing runs with each other.
            self.sample_json = merger.merge_runs('germline')

            # update the merged run status
            merger.update_merged_run_status(self.sample_json['merged_json'])

            if json.load(open(self.sample_json['merged_json'])
                         )['pass_fail_merged_status'] == 'pass':
                # Set the sample_status
                self.sample_json['sample_status'] = 'merged_pass'
                # cleanup the individual run bam files
                self.cleanup_sample.cleanup_runs(
                    self.sample_json['runs'],
                    self.sample_json['analysis']['settings']['cleanup'],
                    self.no_errors)
                # Cleanup the merged dir
                self.cleanup_sample.cleanup_runs(
                    [self.sample_json['merged_json']],
                    self.sample_json['analysis']['settings']['cleanup'],
                    self.no_errors)
            else:
                self.sample_json['sample_status'] = 'awaiting_more_sequencing'

        # copy the final run's VCF file to the final_dir if it passes the "merged" coverage flag
        if 'final_json' in self.sample_json:
            final_json = json.load(open(self.sample_json['final_json']))
            if final_json['pass_fail_merged_status'] == 'pass':
                final_vcf = glob.glob("%s/*.vcf" % final_json['run_folder'])[0]
                final_project_dir = "/home/ionadmin/jeff/%s_Final_VCFs" % (
                    self.sample_json['project'])
                print "copying %s to %s" % (final_vcf, final_project_dir)
                # check to make sure the final dir exists.
                if not os.path.isdir(final_project_dir):
                    os.mkdir(final_project_dir)
                shutil.copy(
                    final_vcf, "%s/%s.vcf" %
                    (final_project_dir, self.sample_json['sample_name']))
                # now push the sample to s3 storage
                if self.sample_json['project'] == 'Einstein':
                    print "pushing %s to amazon s3 storage" % self.sample_json[
                        'sample_name']
                    self.push_sample_to_s3(final_json)
示例#3
0
文件: QC_sample.py 项目: dyermd/legos
	def QC_merge_runs(self):
		# if this is a germline sample, QC all of the normal runs with each other.
		if self.sample_json['sample_type'] == 'germline':
			# Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed..
			#if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged':
			# if the user specified the '--pass_fail' option, then run this part still
			if self.sample_json['sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all:
				# QC the normal runs with each other
				self.QC_runs(self.sample_json['runs'])
				# write the sample json file
				write_json(self.sample_json['json_file'], self.sample_json)
	
			# what if there is only one run that passes all of the metrics? It should be marked as the 'final_json' and have the 'pass_fail_merged' flag marked as pass.
			# make the merger
			merger = Merger(self.sample_json['json_file'])
			# Check to see if the normal runs are ready to be merged.
			merge = merger.check_merge(self.sample_json['runs'])
			if merge == True:
				# merge the normal and/or tumor runs. Will only merge the passing runs with each other.
				merger.merge_runs('germline')

				# load the sample json file because merger edited it.
				self.sample_json = json.load(open(self.sample_json['json_file']))

				# update the merged run status
				merger.update_merged_run_status(self.sample_json['merged_json'])
	
				if json.load(open(self.sample_json['merged_json']))['pass_fail_merged_status'] == 'pass':
					# Set the sample_status
					self.sample_json['sample_status'] = 'merged'
					# cleanup the individual run bam files
					self.cleanup_sample.cleanup_runs(self.sample_json['runs'], self.sample_json['analysis']['settings']['cleanup'], self.no_errors)
					# Cleanup the merged dir 
					self.cleanup_sample.cleanup_runs([self.sample_json['merged_json']], self.sample_json['analysis']['settings']['cleanup'], self.no_errors)
				else:
					self.sample_json['sample_status'] = 'awaiting_more_sequencing'
	

		# if this is a tumor_normal sample, find the normal and tumor runs, and then QC them with each other.
		elif self.sample_json['sample_type'] == 'tumor_normal':
			# Separate the runs into tumor and normal lists
			normal_runs, tumor_runs = self.getTumor_Normal()
	
			if self.sample_json['analysis']['settings']['type'] == 'all_tumor_normal':
				# Use the sample_status here to not re-run the QC and to not overwrite run status. The 'sample_status' should be reset to 'pushed' when new runs are pushed..
				#if self.sample_json['sample_status'] != 'pending_merge' and self.sample_json['sample_status'] != 'pending_3x3_review' and self.sample_json['sample_status'] != 'merged':
				# if the user specified the '--pass_fail' option, then run this part still
				if self.sample_json['sample_status'] == 'pushed' or self.options.pass_fail or self.options.qc_all:
					# QC the normal or tumor runs with each other
					self.QC_runs(normal_runs, 'normal_')
					self.QC_runs(tumor_runs, 'tumor_')
					# now QC the tumor and normal runs together.
					self.QC_normal_tumor_runs(normal_runs, tumor_runs)
					# make the excel spreadsheet containing the data and copy it back to the proton
					#self._make_xlsx()
					# write the sample json file
					write_json(self.sample_json['json_file'], self.sample_json)
	
				# make the merger
				merger = Merger(self.sample_json['json_file'])
				# Check to see if the normal runs are ready to be merged.
				merge_normal = merger.check_merge(normal_runs, 'Normal/', 'normal_')
				if merge_normal == True:
					# merge the normal and/or tumor runs. Will only merge the passing runs with each other.
					merger.merge_runs('normal', 'Normal_', 'normal_')
	
				# Check to see if the tumor runs are ready to be merged.
				merge_tumor = merger.check_merge(tumor_runs, 'Tumor/', 'tumor_')
				if merge_tumor == True:
					merger.merge_runs('tumor', 'Tumor_', 'tumor_')

				# load the sample json file because merger edited it.
				self.sample_json = json.load(open(self.sample_json['json_file']))
	
				# If any runs were merged, QC them. If there are only 1 normal and tumor run, they won't be QCd again. 
				#if normal_merge_dir != '' or tumor_merge_dir != '' or (len(normal_passing_bams) == 1 and len(tumor_passing_bams) == 1):	
				# only QC all for the actual merged runs for now (PNET).
				# now QC the tumor and normal merged bams together if both normal and tumor runs are ready.
				if merge_normal or merge_tumor and ('merged_normal_json' in self.sample_json and 'merged_tumor_json' in self.sample_json):
					self.sample_json, qc_json = self.qc_run.QC_2Runs(self.sample_json, self.sample_json['merged_normal_json'], self.sample_json['merged_tumor_json'], 'normal_', 'tumor_', '_merged')
					self.sample_json, merged_perc_avail_bases = self.qc_run.update_3x3_runs_status(self.sample_json, self.sample_json['merged_normal_json'], self.sample_json['merged_tumor_json'], qc_json)
					# update the merged run status 
					merger.update_merged_run_status(self.sample_json['merged_normal_json'], merged_perc_avail_bases)
					merger.update_merged_run_status(self.sample_json['merged_tumor_json'], merged_perc_avail_bases)

					# cleanup the individual run bam files
					if merged_perc_avail_bases > .9:
						# Cleanup the PTRIM.bam and chr bam files after all of the QC is done.
						# are there any other files to clean up?
						self.cleanup_sample.cleanup_runs(self.sample_json['runs'], self.sample_json['analysis']['settings']['cleanup'], self.no_errors)
						#self.cleanup_sample.delete_runs(runs, self.sample_json['analysis']['settings']['cleanup'], self.no_errors)

						# Cleanup after the merging QC is done.
						self.cleanup_sample.cleanup_runs([self.sample_json['final_normal_json'], self.sample_json['final_tumor_json']], self.sample_json['analysis']['settings']['cleanup'], self.no_errors)

						# Set the sample_status
						self.sample_json['sample_status'] = 'merged_pass'
					else:
						self.sample_json['sample_status'] = 'awaiting_more_sequencing'

		# print the final status
		if self.no_errors == False or self.qc_run.no_errors == False:
			sys.stderr.write("%s finished with errors. See %s/sge.log for more details"%(self.sample_json['sample_name'], self.sample_json['output_folder']))
			self.sample_json['sample_status'] == 'failed'
			write_json(self.sample_json['json_file'], self.sample_json)
			sys.exit(1)
		else:
			print "%s finished with no errors!"%(self.sample_json['sample_name'])

		# write the sample json file
		write_json(self.sample_json['json_file'], self.sample_json)

		# make the excel spreadsheet containing the data and copy it back to the proton
		self._make_xlsx()