Пример #1
0
TARGET = '/nas/nbl3/'
PWD = os.getcwd().split('code')[0]
WORK = PWD + 'work/'
FAST_DATA = PWD + 'data/'
SLOW_DATA = '/nas/nbl3/'
OTHER_CODE = '/nas/nbl3/projects/nb_convergence/code/'

def getTumors(normalTumorFile):
    trans = {}
    with open(normalTumorFile) as f:
        for line in f:
            normalSample, tumorLs = line.strip('\n').split('\t')
            trans[tumorLs.split(',')[0] ] = True
    return trans

NORMAL_SAMPLES = samples.loadSamples(TARGET + 'target_meta/working/samples')
TUMOR_SAMPLES = getTumors('/nas/nbl3/target_meta/working/normal2tumor')
SAMPLES = samples.loadSamplesNBL(SLOW_DATA + 'target_meta/working/samples')

rule fixMvarBed:
    input:  SLOW_DATA + 'masterVarBetaUnzip_bed/{sample2}.bed'
    output: FAST_DATA + 'masterVarBetaUnzip_bed_fix/{sample2}.bed'
    shell:  "cat {input} | sed 's/0\t0/0\t1/g' > {output}"

rule convertVcf:
    input:  FAST_DATA + 'hg19_noncodingCap_vqlow/{sample}.vcf'
    output: FAST_DATA + 'hg19_noncodingCap_vqlow_tab/{sample}.tab'
    shell:  '{PY27} convertVcf.py {input} {output}'

rule mkBed:
    input:  FAST_DATA + 'hg19_noncodingCap_vqlow_tab/{sample}.tab'
Пример #2
0
"""Find coding WGS positions."""
import os, sys
sys.path.append('/home/evansj/me/projects/me/tool_dirs/')
from tools import *
sys.path.append('/home/evansj/me/projects/diskin/target_calls/code')
import samples

PWD = '/'.join(os.getcwd().split('/')[0:-1]) + '/'
WORK = PWD + 'work/'
FAST_DATA = PWD + 'data/'
SLOW_DATA = '/nas/nbl3/'
CALLS_CODE = '/home/evansj/me/projects/diskin/target_calls/code/'
OTHER_CODE = '/nas/nbl3/projects/nb_convergence/code/'
OTHER_CODE_2 = '/home/evansj/me/projects/diskin/noncoding_nbl_regions/code/'
SAMPLES = samples.loadSamples(SLOW_DATA + 'target_meta/working/samples')

rule annCG:
    """Find CGI control counts."""
    input: FAST_DATA + 'hg19_{varType}_vqlow_tab/{sample}.tab',
           SLOW_DATA + 'human_variation_vcf/Complete_Public_Genomes_54genomes_B37_mkvcf.vcf.gz'
    output: FAST_DATA + 'hg19_{varType}_vqlow_cgControl_tab/{sample}.tab'
    shell: 'python {OTHER_CODE}annCG.py {input} {output}'

rule annKaviar:
    input: FAST_DATA + 'hg19_{varType}_vqlow_cgControl_tab/{sample}.tab',
           '/home/evansj/me/projects/diskin/noncoding_nbl_regions/data/kaviar/full/Kaviar-150810-Public/vcfs/Kaviar-150810-Public-hg19.vcf.gz'
    output: FAST_DATA + 'hg19_{varType}_vqlow_cgControl_kaviar_tab/{sample}.tab'
    shell: '{PY27} {OTHER_CODE_2}annKaviar.py {input} {output}'

rule test:
    input: FAST_DATA + 'hg19_clinvar_vqlow_cgControl_kaviar_tab/TARGET-10-PAIXPH-10A-01D.tab'