def analyse_wgs_prepare(self, input, output): ''' creates working directory and scripts to run for wgs pipeline ''' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r')) if normal_id is None: # nothing to do safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return tmp_id = 'wgs-{}'.format(tumour_id) tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id) safe_make_dir(tmp_dir) safe_make_dir(os.path.dirname(output)) command = 'cp {root}/src/util/ {tmp_dir}/ && cp {root}/src/util/ {tmp_dir}/ && touch {output}'.format( root=config.ROOT, output=output, tmp_dir=tmp_dir) run_stage(self.state, 'analyse_wgs_prepare', command)
def contest(self, input, output): ''' run contest ''' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{root}/cfg/sample-metadata.csv".format(root=config.ROOT), 'r')) # tumour_id is actually normal if normal_id is None: normal_id = tumour_id validation_data = open( "{root}/out/{sample}.validation".format(root=config.ROOT, sample=normal_id), 'r').readlines() normal_uuid = validation_data[1].split('\t')[8] with open( '{tmp_dir}/{tumour_id}'.format( tmp_dir=config.TMP, tumour_id=tumour_id), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('UUID', normal_uuid, new_line) new_line = re.sub('ROOT', config.ROOT, new_line) analyse_fh.write(new_line) else: # it's a tumour validation_data = open( "{root}/out/{sample}.validation".format(root=config.ROOT, sample=normal_id), 'r').readlines() normal_uuid = validation_data[1].split('\t')[8] with open( '{tmp_dir}/{tumour_id}'.format( tmp_dir=config.TMP, tumour_id=tumour_id), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('UUID', normal_uuid, new_line) new_line = re.sub('ROOT', config.ROOT, new_line) analyse_fh.write(new_line) command = 'bash {tmp_dir}/{tumour_id} 2>{prefix}.contest.log.err 1>{prefix}.contest.log.out && touch "{output}"'.format( tmp_dir=config.TMP, tumour_id=tumour_id, output=output, prefix=prefix) run_stage(self.state, 'contest', command)
def _analyse_wgs_with_command(self, input, output, subcommand, cpu=4): ''' take mapped bams and generate variant calls by running the sanger pipeline cgpwgs ''' input = input[0] prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r')) if normal_id is None: # nothing to do safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return tmp_id = 'wgs-{}-{}'.format(config.WGS_VERSION, tumour_id) tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id) safe_make_dir('{}/home'.format(tmp_dir)) # make subcommand analysis script with open( '{tmp_dir}/analyse-{subcommand}.sh'.format( tmp_dir=tmp_dir, subcommand=subcommand), 'w') as analyse_fh: for line in open( '{root}/src/util/analyse-{wgs_version}.sh.template'.format( wgs_version=config.WGS_VERSION, root=config.ROOT), 'r'): new_line = re.sub('TMP_ID', tmp_id, line) new_line = re.sub('TUMOUR', tumour_id, new_line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('COMMAND', subcommand, new_line) new_line = re.sub('WGS_VERSION', config.WGS_VERSION, new_line) new_line = re.sub('CPULIMIT', str(cpu), new_line) analyse_fh.write(new_line) command = 'singularity exec -i --bind {in_dir}:/mnt/in,{out}:/mnt/out,{reference}:/mnt/reference,{tmp}:/mnt/tmp --workdir {tmp_dir} --home {tmp_dir}/home:/home/z --contain {root}/img/cgpwgs-{wgs_version}.img bash /mnt/tmp/{tmp_id}/analyse-{subcommand}.sh 1>{prefix}.wgs.{subcommand}.{wgs_version}.log.out 2>{prefix}.wgs.{subcommand}.{wgs_version}.log.err && touch {output}'.format( root=config.ROOT, in_dir=config.IN, out=config.OUT, reference=config.REFERENCE, tmp=config.TMP, tmp_dir=tmp_dir, tmp_id=tmp_id, prefix=prefix, output=output, subcommand=subcommand, wgs_version=config.WGS_VERSION) run_stage(self.state, 'analyse_wgs_{}'.format(subcommand), command)
def callable_bases(self, input, output): ''' run callable bases ''' MINIMUM_COVERAGE_TUMOR = '17' MINIMUM_COVERAGE_NORMAL = '10' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{root}/cfg/sample-metadata.csv".format(root=config.ROOT), 'r')) # nothing to do for normal sample if normal_id is None: safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return # it's a tumour with open( '{tmp_dir}/{tumour_id}'.format( tmp_dir=config.TMP, tumour_id=tumour_id), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('ROOT', config.ROOT, new_line) new_line = re.sub('TMP_DIR', config.TMP, new_line) new_line = re.sub('MIN_TUM', MINIMUM_COVERAGE_TUMOR, new_line) new_line = re.sub('MIN_NORM', MINIMUM_COVERAGE_NORMAL, new_line) analyse_fh.write(new_line) command = 'bash {tmp_dir}/{tumour_id} 2>{prefix}.callable_bases.log.err 1>{prefix}.callable_bases.log.out && touch "{output}"'.format( tmp_dir=config.TMP, tumour_id=tumour_id, output=output, prefix=prefix) run_stage(self.state, 'callable_bases', command)
def hmmcopy(self, input, output): ''' run hmmcopy ''' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r')) # tumour_id is actually a normal if normal_id is None: target_dir = '{}.hmmcopy'.format(prefix) safe_make_dir(target_dir) with open('{target_dir}/'.format(target_dir=target_dir), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('NORMAL', tumour_id, line) new_line = re.sub('ROOT', config.ROOT, new_line) new_line = re.sub('TARGET_DIR', target_dir, new_line) analyse_fh.write(new_line) command = 'bash {target_dir}/ 2>{prefix}.hmmcopy.log.err 1>{prefix}.hmmcopy.log.out && touch "{output}"'.format( target_dir=target_dir, output=output, prefix=prefix) else: # it's a tumour target_dir = '{}.hmmcopy'.format(prefix) safe_make_dir(target_dir) with open('{target_dir}/'.format(target_dir=target_dir), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('ROOT', config.ROOT, new_line) new_line = re.sub('TARGET_DIR', target_dir, new_line) analyse_fh.write(new_line) command = 'bash {target_dir}/ 2>{prefix}.hmmcopy.log.err 1>{prefix}.hmmcopy.log.out && touch "{output}"'.format( target_dir=target_dir, output=output, prefix=prefix) run_stage(self.state, 'hmmcopy', command)
def delly(self, input, output, cpu=6): ''' run the delly singularity container ''' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r')) # nothing to do for normal sample if normal_id is None: safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return # it's a tumour tmp_id = 'delly-{}-{}'.format(tumour_id, str(uuid.uuid4())) tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id) safe_make_dir(tmp_dir) with open('{tmp_dir}/'.format(tmp_dir=tmp_dir), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('CORES', str(cpu), new_line) analyse_fh.write(new_line) command = 'singularity exec -i --bind {in_dir}:/mnt/in,{out}:/mnt/out,{reference}:/mnt/reference,{tmp_dir}:/mnt/tmp --workdir {tmp_dir} --contain {root}/img/delly-2.0.0.img bash /mnt/tmp/ 1>{prefix}.delly.log.out 2>{prefix}.delly.log.err && mv {tmp_dir}/workdir {prefix}.delly.results && touch "{output}" && rm -r "{tmp_dir}"'.format( root=config.ROOT, in_dir=config.IN, out=config.OUT, reference=config.REFERENCE_DELLY, tmp=config.TMP, tmp_dir=tmp_dir, tmp_id=tmp_id, prefix=prefix, output=output) run_stage(self.state, 'delly', command)
def somatic_sniper(self, input, output): ''' run somatic sniper ''' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{root}/cfg/sample-metadata.csv".format(root=config.ROOT), 'r')) # nothing to do for normal sample if normal_id is None: safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return # it's a tumour with open( '{tmp_dir}/{tumour_id}'.format( tmp_dir=config.TMP, tumour_id=tumour_id), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR_ID', tumour_id, line) new_line = re.sub('NORMAL_ID', normal_id, new_line) new_line = re.sub('ROOT_PATH', config.ROOT, new_line) analyse_fh.write(new_line) command = 'bash {tmp_dir}/{tumour_id} 2>{prefix}.somatic_sniper.log.err 1>{prefix}.somatic_sniper.log.out && touch "{output}"'.format( tmp_dir=config.TMP, tumour_id=tumour_id, output=output, prefix=prefix) run_stage(self.state, 'somatic_sniper', command)
def gridss(self, input, output): ''' run gridss ''' prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r')) # nothing to do for normal sample if normal_id is None: safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return # it's a tumour tmp_id = 'gridss-{}-{}'.format(tumour_id, str(uuid.uuid4())) tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id) safe_make_dir(tmp_dir) with open('{tmp_dir}/'.format(tmp_dir=tmp_dir), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('ROOT', config.ROOT, new_line) new_line = re.sub('ACCOUNT', config.ACCOUNT, new_line) analyse_fh.write(new_line) #command = 'bash {tmp_dir}/ && touch "{output}" && rm -r "{tmp_dir}"'.format(tmp_dir=tmp_dir, output=output) command = 'bash {tmp_dir}/ 2>{prefix}.gridss.log.err 1>{prefix}.gridss.log.out && touch "{output}" && rm -r {tmp_dir}'.format( tmp_dir=tmp_dir, output=output, prefix=prefix) run_stage(self.state, 'gridss', command)
def muse(self, input, output): ''' run muse ''' interval = 50000000 # chunk size to break chromosomes into for muse prefix = re.sub('.mapped.bam$', '', input) # full path without mapped.bam tumour_id = prefix.split('/')[-1] # e.g. CMHS1 normal_id = util.find_normal( tumour_id, open("{}/cfg/sample-metadata.csv".format(config.ROOT), 'r')) # nothing to do for normal sample if normal_id is None: safe_make_dir(os.path.dirname(output)) with open(output, 'w') as output_fh: output_fh.write( 'Normal sample does not require analysis. See the relevant tumour file.\n' ) return # it's a tumour tmp_id = 'muse-{}-{}'.format(tumour_id, str(uuid.uuid4())) tmp_dir = '{tmp}/{tmp_id}'.format(tmp=config.TMP, tmp_id=tmp_id) safe_make_dir(tmp_dir) # build combine variants commands muse_commands = [] cmd = ['samtools', 'view', '-H', input] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) for line in proc.stdout.readlines(): if line.startswith('@SQ\t'): fields = line.strip().split('\t') chromosome = fields[1].split(':')[1] # SN size = int(fields[2].split(':')[1]) # LN # now write regions as zero based current = 0 while current < size: final = min(size, current + interval) muse_commands.append( '$MUSE call -O {tmp_dir}/tmp{chromosome}_{current}_{final} -f $REFERENCE -r "{chromosome}:{current}-{final}" $TMR_ABS $NRML_ABS' .format(tmp_dir=tmp_dir, chromosome=chromosome, current=current, final=final, prefix=prefix)) current = final with open('{tmp_dir}/'.format(tmp_dir=tmp_dir), 'w') as analyse_fh: for line in open( '{root}/src/util/'.format( root=config.ROOT), 'r'): new_line = re.sub('TUMOUR', tumour_id, line) new_line = re.sub('NORMAL', normal_id, new_line) new_line = re.sub('TMP_DIR', tmp_dir, new_line) new_line = re.sub('ROOT', config.ROOT, new_line) new_line = re.sub('CALL_VARIANTS', '\n'.join(muse_commands), new_line) analyse_fh.write(new_line) #command = 'bash {tmp_dir}/ && touch "{output}" && rm -r "{tmp_dir}"'.format(tmp_dir=tmp_dir, output=output) command = 'bash {tmp_dir}/ 2>{prefix}.muse.log.err 1>{prefix}.muse.log.out && touch "{output}" && rm -r {tmp_dir}'.format( tmp_dir=tmp_dir, output=output, prefix=prefix) run_stage(self.state, 'muse', command)