TARGET = '/nas/nbl3/' PWD = os.getcwd().split('code')[0] WORK = PWD + 'work/' FAST_DATA = PWD + 'data/' SLOW_DATA = '/nas/nbl3/' OTHER_CODE = '/nas/nbl3/projects/nb_convergence/code/' def getTumors(normalTumorFile): trans = {} with open(normalTumorFile) as f: for line in f: normalSample, tumorLs = line.strip('\n').split('\t') trans[tumorLs.split(',')[0] ] = True return trans NORMAL_SAMPLES = samples.loadSamples(TARGET + 'target_meta/working/samples') TUMOR_SAMPLES = getTumors('/nas/nbl3/target_meta/working/normal2tumor') SAMPLES = samples.loadSamplesNBL(SLOW_DATA + 'target_meta/working/samples') rule fixMvarBed: input: SLOW_DATA + 'masterVarBetaUnzip_bed/{sample2}.bed' output: FAST_DATA + 'masterVarBetaUnzip_bed_fix/{sample2}.bed' shell: "cat {input} | sed 's/0\t0/0\t1/g' > {output}" rule convertVcf: input: FAST_DATA + 'hg19_noncodingCap_vqlow/{sample}.vcf' output: FAST_DATA + 'hg19_noncodingCap_vqlow_tab/{sample}.tab' shell: '{PY27} convertVcf.py {input} {output}' rule mkBed: input: FAST_DATA + 'hg19_noncodingCap_vqlow_tab/{sample}.tab'
"""Find coding WGS positions.""" import os, sys sys.path.append('/home/evansj/me/projects/me/tool_dirs/') from tools import * sys.path.append('/home/evansj/me/projects/diskin/target_calls/code') import samples PWD = '/'.join(os.getcwd().split('/')[0:-1]) + '/' WORK = PWD + 'work/' FAST_DATA = PWD + 'data/' SLOW_DATA = '/nas/nbl3/' CALLS_CODE = '/home/evansj/me/projects/diskin/target_calls/code/' OTHER_CODE = '/nas/nbl3/projects/nb_convergence/code/' OTHER_CODE_2 = '/home/evansj/me/projects/diskin/noncoding_nbl_regions/code/' SAMPLES = samples.loadSamples(SLOW_DATA + 'target_meta/working/samples') rule annCG: """Find CGI control counts.""" input: FAST_DATA + 'hg19_{varType}_vqlow_tab/{sample}.tab', SLOW_DATA + 'human_variation_vcf/Complete_Public_Genomes_54genomes_B37_mkvcf.vcf.gz' output: FAST_DATA + 'hg19_{varType}_vqlow_cgControl_tab/{sample}.tab' shell: 'python {OTHER_CODE}annCG.py {input} {output}' rule annKaviar: input: FAST_DATA + 'hg19_{varType}_vqlow_cgControl_tab/{sample}.tab', '/home/evansj/me/projects/diskin/noncoding_nbl_regions/data/kaviar/full/Kaviar-150810-Public/vcfs/Kaviar-150810-Public-hg19.vcf.gz' output: FAST_DATA + 'hg19_{varType}_vqlow_cgControl_kaviar_tab/{sample}.tab' shell: '{PY27} {OTHER_CODE_2}annKaviar.py {input} {output}' rule test: input: FAST_DATA + 'hg19_clinvar_vqlow_cgControl_kaviar_tab/TARGET-10-PAIXPH-10A-01D.tab'