示例#1
0
def make_pooldirs(data, parentdir):
    # make pool dirs
    print(Bcolors.BOLD + "\nmaking pool dirs" + Bcolors.ENDC)
    pools = uni(data['pool_name'].tolist())
    pooldirs = []
    for p in pools:
        DIR = op.join(parentdir, p)
        if op.exists(DIR):
            print("The pooldir already exists, this could overwrite previous data: %s" % DIR)
            askforinput()
        pooldirs.append(makedir(DIR))
        makedir(op.join(DIR, 'shfiles'))
    return pooldirs
def make_pooldirs(data, parentdir):
    """Create subdirectories of parentdir.

    Positional arguments:
    data - datatable.txt with info for pipeline
    parentdir - directory with datatable.txt and (symlinks to) fastq data
    """
    # make pool dirs
    print(Bcolors.BOLD + "\nmaking pool dirs" + Bcolors.ENDC)
    pools = uni(data['pool_name'].tolist())
    pooldirs = []
    for p in pools:
        pooldir = op.join(parentdir, p)
        if op.exists(pooldir):
            text = "\tWARN: The pooldir already exists, this WILL overwrite and/or delete previous data: %s" % pooldir
            print(Bcolors.WARNING + text + Bcolors.ENDC)
            askforinput(tab='\t', newline='')
            # first unlink fastq files
            for f in fs(pooldir):
                if f.endswith('.gz'):
                    os.unlink(f)
            # then just delete the directory
            shutil.rmtree(pooldir)
        pooldirs.append(makedir(pooldir))
    return pooldirs
def get_prereqs(bedfile, pooldir, parentdir, pool, program):
    """Get object names."""
    num = bedfile.split("_")[-1].split(".bed")[0]
    ref = pklload(op.join(parentdir, 'poolref.pkl'))[pool]
    outdir = makedir(op.join(pooldir, program))
    vcf = op.join(outdir, f'{pool}_{program}_bedfile_{num}.vcf')
    return (num, ref, vcf)
def get_prereqs(num):
    """Create a name for a bedfile based on the ref.fa path name and num.

    Positional arguments:
    num - int; the num'th bedfile
    """
    bname = op.basename(ref).split(".fa")[0]
    beddir = makedir(op.join(op.dirname(ref), 'bedfiles_%s' % bname))
    f = op.join(beddir, "%s_bedfile_%s.bed" % (bname, str(num).zfill(4)))
    return f
示例#5
0
def get_varscan_cmd(bamfiles, bedfile, bednum, vcf, ref, pooldir, program):
    """Create command to call varscan."""
    smallbams, smallcmds = get_small_bam_cmds(bamfiles, bednum, bedfile)
    smallbams = ' '.join(smallbams)
    ploidy = pklload(op.join(parentdir, 'ploidy.pkl'))[pool]
    # if single-sample then set minfreq to 0, else use min possible allele freq
    minfreq = 1 / sum(ploidy.values()) if len(ploidy.keys()) > 1 else 0
    cmd = f'''samtools mpileup -B -f {ref} {smallbams} | java -Xmx15g -jar \
$VARSCAN_DIR/VarScan.v2.4.3.jar mpileup2cns --min-coverage 8 --p-value 0.05 \
--min-var-freq {minfreq} --strand-filter 1 --min-freq-for-hom 0.80 \
--min-avg-qual 20 --output-vcf 1 > {vcf}
module unload samtools
'''
    # final vcf
    outdir = makedir(op.join(pooldir, program))
    finalvcf = op.join(outdir, op.basename(vcf))
    cmds = smallcmds + cmd
    return (cmds, finalvcf)
def create_reservation(pooldir, exitneeded=False):
    """Create a file so that other realign jobs can't start crisp and varscan too."""
    print('creating reservation')
    shdir = makedir(op.join(pooldir, 'shfiles/crispANDvarscan'))
    file = op.join(shdir, '%s_crispANDvarscan_reservation.sh' % pool)
    jobid = os.environ['SLURM_JOB_ID']
    if not op.exists(file):
        with open(file, 'w') as o:
            o.write("%s" % jobid)
    else:
        exitneeded = True
    time.sleep(random.random()*15)
    with open(file, 'r') as o:
        fjobid = o.read().split()[0]
    if not fjobid == jobid or exitneeded is True:
        # just in case two jobs try at nearly the same time
        print('another job has already created crispANDvarscan_reservation.sh for %s' % pool)
        exit()
    return shdir
def make_pooldirs(data, parentdir):
    """Create subdirectories of parentdir.

    Positional arguments:
    data - datatable.txt with info for pipeline
    parentdir - directory with datatable.txt and (symlinks to) fastq data
    """
    # make pool dirs
    print(Bcolors.BOLD + "\nmaking pool dirs" + Bcolors.ENDC)
    pools = uni(data['pool_name'].tolist())
    pooldirs = []
    for p in pools:
        DIR = op.join(parentdir, p)
        if op.exists(DIR):
            print(
                "The pooldir already exists, this could overwrite previous data: %s"
                % DIR)
            print("Do you want to proceed?")
            askforinput()
        pooldirs.append(makedir(DIR))
    return pooldirs
示例#8
0
# get more dup stats
module load samtools/1.9
samtools flagstat {dupfile} > {dupflag}
module unload samtools

# call next step
source $HOME/.bashrc
export PYTHONPATH="${{PYTHONPATH}}:$HOME/gatk_pipeline"
export SQUEUE_FORMAT="%.8i %.8u %.12a %.68j %.3t %16S %.10L %.5D %.4C %.6b %.7m %N (%r)"

python $HOME/gatk_pipeline/04_scatter-gvcf.py {dupfile} {pooldir} {samp}

"""

# create shdir and file
shdir = op.join(pooldir, 'shfiles/03_mark_build_shfiles')
for d in [shdir, dupdir]:
    makedir(d)
file = op.join(shdir, '%(pool)s-%(samp)s-mark.sh' % locals())
with open(file, 'w') as o:
    o.write("%s" % text)

# sbatch file
os.chdir(shdir)
print('shdir = ', shdir)
subprocess.call([shutil.which('sbatch'), file])

# balance queue
balance_queue.main('balance_queue.py', 'mark')
balance_queue.main('balance_queue.py', 'bwa')
示例#9
0
module load gatk/3.8
module load java
export _JAVA_OPTIONS="-Xms256m -Xmx7g"
java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \
-T IndelRealigner -R %(ref)s -I %(dupfile)s -targetIntervals %(listfile)s -o %(realbam)s
module unload gatk

# sbatch CRISP job if all pooled bamfiles have been created
source $HOME/.bashrc
export PYTHONPATH="${PYTHONPATH}:$HOME/pipeline"
export SQUEUE_FORMAT="%%.8i %%.8u %%.12a %%.68j %%.3t %%16S %%.10L %%.5D %%.4C %%.6b %%.7m %%N (%%r)"
python $HOME/pipeline/start_crispANDvarscan.py %(parentdir)s %(pool)s
python $HOME/pipeline/balance_queue.py bedfile

''' % locals()

# create shdir and shfile
shdir = op.join(pooldir, 'shfiles/05_indelRealign_shfiles')
makedir(shdir)
file = op.join(shdir, '%(pool)s-%(samp)s-indelRealign.sh' % locals())
with open(file, 'w') as o:
    o.write("%s" % text)

os.chdir(shdir)
print('shdir = ', shdir)
subprocess.call([shutil.which('sbatch'), file])

balance_queue.main('balance_queue.py', 'indelRealign')
balance_queue.main('balance_queue.py', 'realign')
示例#10
0
def make_beddir():
    """Create dir for bedfiles."""
    bname = op.basename(ref).split(".fa")[0]
    beddir = makedir(op.join(op.dirname(ref), 'bedfiles_%s' % bname))
    return bname, beddir