def FindTranslocations(programDirectory,local_dir, sample_name, bam_file,account,modules): sys.path.append(os.path.join(programDirectory,"modules")) import common #build the sbatch file and submit it sbatch_dir,out_dir,err_dir=common.createFolder(local_dir); output_header = os.path.join(local_dir, sample_name) bai_file = re.sub('m$', 'i', bam_file) # remove the final m and add and i if not (os.path.isfile(bai_file)): bai_file=bam_file+".bai"; path_dict={} with open(os.path.join(programDirectory,"path.txt")) as path_file: for line in path_file: if not line.startswith("#") and not line == "\n": content=line.strip().split("=") print content path_dict[content[0]]=content[1] FT_path="{0}/programFiles/FindTranslocations/bin/FindTranslocations".format(programDirectory) if path_dict["FindTranslocations"]: FT_path=path_dict["FindTranslocations"] with open(os.path.join(sbatch_dir, "{}.slurm".format(sample_name)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {}/FT_{}.out\n".format(out_dir,sample_name)) sbatch.write("#SBATCH -e {}/FT_{}.err\n".format(err_dir,sample_name)) sbatch.write("#SBATCH -J FT_{}.job\n".format(sample_name)) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 2-00:00:00\n") sbatch.write("#SBATCH -n 1 \n") sbatch.write("\n"); sbatch.write("\n"); sbatch.write("FINDTRANS={0}".format(FT_path)) sbatch.write("\n") sbatch.write("\n") # If we are on a non-SNIC system, chances are there is no # SNIC_TMP var set. TMP_DIR is more universal, or fall back # to /tmp. sbatch.write("if [[ -z $SNIC_TMP ]] ; then if [[ -z $TMPDIR ]] ; then SNIC_TMP=/tmp ; else SNIC_TMP=$TMPDIR ; fi ; fi\n") #now tranfer the bam file sbatch.write("mkdir -p $SNIC_TMP/{}\n".format(sample_name)) sbatch.write("rsync -rptoDLv {} $SNIC_TMP/{}\n".format(bam_file, sample_name)) sbatch.write("if [[ -z {} ]] ; then samtools index {} ; fi\n".format(bai_file, bam_file)) sbatch.write("rsync -rptoDLv {} $SNIC_TMP/{}\n".format(bai_file, sample_name)) sbatch.write('$FINDTRANS --sv --bam $SNIC_TMP/{}/{} --bai $SNIC_TMP/{}/{} --auto --minimum-supporting-pairs 4 --output {}\n'.format(sample_name, os.path.split(bam_file)[1], sample_name, os.path.split(bai_file)[1], output_header)) sbatch.write("rm {0}.tab\n".format(output_header)) sbatch.write("rm $SNIC_TMP/{0}/*".format(sample_name)); sbatch.write("\n") sbatch.write("\n") return ( [int(common.generateSlurmJob(sbatch_dir,sample_name)),"{0}_inter_chr_events.vcf;{0}_intra_chr_events.vcf".format(sample_name)] );
def build_DB(analysisTool,analysedProject,analysed,programDirectory,account,frequency_db): sys.path.append(os.path.join(programDirectory,"modules")) import common #get the project path for sample in analysedProject: path=analysed[analysisTool]["analysed"][sample]["outpath"]; pathToTool=os.path.join(path, analysisTool); #find all the features in the feature folder path2FeatureFolder=os.path.join(programDirectory,"feature"); featureList=""; for file in os.listdir(path2FeatureFolder): if file.endswith(".tab") or file.endswith(".bed") or file.endswith(".txt"): featureList += " " + os.path.join(path2FeatureFolder,file); inpath=os.path.join(path,analysisTool); outpath=os.path.join(path,analysisTool,"filtered"); sbatch_dir,out_dir,err_dir=common.createFolder(outpath); path2Query = os.path.join(programDirectory,"programFiles","FindTranslocations","scipts","query_db.py") path2Features = os.path.join(programDirectory,"programFiles","FindTranslocations","scipts","screen_results.py") add2Ongoing={}; for sample in analysedProject: with open(os.path.join(sbatch_dir, "{0}.slurm".format(sample)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {0}/filter_{1}.out\n".format(out_dir,sample)) sbatch.write("#SBATCH -e {0}/filter_{1}.err\n".format(err_dir,sample)) sbatch.write("#SBATCH -J filter_{0}_{1}.job\n".format(sample,analysisTool)) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 10:00:00\n") sbatch.write("#SBATCH -n 1 \n") sbatch.write("\n") sbatch.write("\n") #iterate through every analysed sample, query every vcf of the sample against every db filePath=os.path.join(outpath,"{0}.Query.vcf".format(sample)) input_vcf=os.path.join(inpath,sample+".vcf") sbatch.write("python {0} --variations {1} --db {2} > {3}\n".format(path2Query,input_vcf, frequency_db ,filePath) ); FileName="{0}.Query.vcf".format(sample) #add features sbatch.write("\n") if featureList != "": feature_vcf=os.path.join(outpath,"{0}.Feature.vcf".format(sample)); sbatch.write("python {0} --variations {1} --bed-files {2} > {3}\n".format(path2Features, filePath , featureList ,feature_vcf) ); sbatch.write("\n") FileName="{0}.Feature.vcf".format(sample) analysed[analysisTool]["analysed"][sample]["outputFile"]=FileName; sbatch.write("\n") sbatch.write("\n") pid = int(common.generateSlurmJob(sbatch_dir,sample)) add2Ongoing.update({sample:{"pid":pid,"outpath":analysed[analysisTool]["analysed"][sample]["outpath"],"project":analysed[analysisTool]["analysed"][sample]["project"],"outputFile":FileName}}); return (add2Ongoing);
def fermiKit(programDirectory,local_dir, sample_name, bam_file,account,modules): #build the sbatch file and submit it sys.path.append(os.path.join(programDirectory,"modules")) import common sbatch_dir,out_dir,err_dir=common.createFolder(local_dir); output_header = os.path.join(local_dir, sample_name) reference=references(programDirectory,"bwa-indexed-ref"); with open(os.path.join(sbatch_dir, "{}.slurm".format(sample_name)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {}/fermiKit_{}.out\n".format(out_dir,sample_name)) sbatch.write("#SBATCH -e {}/fermiKit_{}.err\n".format(err_dir,sample_name)) sbatch.write("#SBATCH -J FermiKit_{}.job\n".format(sample_name)) sbatch.write("#SBATCH -p node\n") sbatch.write("#SBATCH -t 4-00:00:00\n") sbatch.write("\n"); sbatch.write("\n"); if(modules == "True"): sbatch.write("module load bioinfo-tools\n") sbatch.write("module load bwa\n") sbatch.write("module load samtools\n") sbatch.write("module load fermikit\n") sbatch.write("\n") sbatch.write("\n") #now transfer the bam file # If we are on a non-SNIC system, chances are there is no # SNIC_TMP var set. TMP_DIR is more universal, or fall back # to /tmp. sbatch.write("if [[ -z $SNIC_TMP ]] ; then if [[ -z $TMPDIR ]] ; then SNIC_TMP=/tmp ; else SNIC_TMP=$TMPDIR ; fi ; fi\n") sbatch.write("mkdir -p $SNIC_TMP/{}\n".format(sample_name)) sbatch.write("rsync -rptoDLv {} $SNIC_TMP/{}\n".format(bam_file, sample_name)) sbatch.write("samtools bam2fq $SNIC_TMP/{0}/{1} > $SNIC_TMP/{0}/output.fastq\n".format(sample_name,os.path.split(bam_file)[1])) sbatch.write("fermi2.pl unitig -s3g -t16 -p $SNIC_TMP/{0}/{0} $SNIC_TMP/{0}/output.fastq > $SNIC_TMP/{0}/{0}.mak\n".format(sample_name)); sbatch.write("make -f $SNIC_TMP/{0}/{0}.mak\n".format(sample_name)); sbatch.write("echo run_calling\n"); sbatch.write("run-calling -t16 {0} $SNIC_TMP/{1}/{1}.mag.gz | sh\n".format(reference,sample_name)); sbatch.write("cd $SNIC_TMP/{}\n".format(sample_name)); sbatch.write("cp *vcf* {}\n".format(local_dir)); sbatch.write("cd {}\n".format(local_dir)); sbatch.write("gunzip *vcf.gz\n"); sbatch.write("echo finished!\n"); sbatch.write("\n") sbatch.write("\n") return ( [int(common.generateSlurmJob(sbatch_dir,sample_name)), "{}.sv.vcf".format(sample_name)] );
def submit4combination(tools,sample,combinedProcessFile,programDirectory,account,bamFilePath): sys.path.append(os.path.join(programDirectory,"modules")) import common outpath=os.path.join(combinedProcessFile[tools]["outpath"],"FindSV") sbatch_dir,out_dir,err_dir=common.createFolder(outpath); mergepath=os.path.join(programDirectory,"programFiles","mergeVCF.py") contigSort=os.path.join(programDirectory,"programFiles","contigSort.py") with open(os.path.join(sbatch_dir, "{}.slurm".format(sample)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {}/combine_{}.out\n".format(out_dir,sample)) sbatch.write("#SBATCH -e {}/combine_{}.err\n".format(err_dir,sample)) sbatch.write("#SBATCH -J combine_{}_{}.job\n".format(sample,"FindSV")) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 5:00:00\n") sbatch.write("#SBATCH -n 1 \n") sbatch.write("\n"); sbatch.write("\n"); sbatch.write("module load bioinfo-tools\n"); sbatch.write("module load vcftools\n"); sbatch.write("module load samtools\n"); vcfPath=[]; #sort each caller output file fileString='' for tool in combinedProcessFile: files=combinedProcessFile[tool]["outputFile"] files=files.strip().split(";") for vcf in files: vcf=os.path.join(combinedProcessFile[tools]["outpath"],tool,vcf) fileString += vcf + " " #merge the files output=os.path.join(outpath,sample+".vcf") sortedOutput=os.path.join(outpath,sample+".merged.vcf") sbatch.write("python {} --vcf {} > {}\n".format(mergepath,fileString,sortedOutput)) sbatch.write( "python {} --vcf {} --bam {} > {}\n".format(contigSort,sortedOutput,bamFilePath,output) ) sbatch.write( "rm {}\n".format(sortedOutput) ) pid = int(common.generateSlurmJob(sbatch_dir,sample)) add2Ongoing={sample:{"pid":pid,"outpath":combinedProcessFile[tools]["outpath"],"project":combinedProcessFile[tools]["project"],"outputFile":sample+".vcf"} }; return (add2Ongoing);
def submit2Cleaning(tools,sample,analysed,programDirectory,account): sys.path.append(os.path.join(programDirectory,"modules")) import common samplePath=os.path.join(analysed[tools]["analysed"][sample]["outpath"],tools) outpath=os.path.join(samplePath,"cleaning"); sbatch_dir,out_dir,err_dir=common.createFolder(outpath); with open(os.path.join(sbatch_dir, "{}.slurm".format(sample)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {}/cleaning_{}.out\n".format(out_dir,sample)) sbatch.write("#SBATCH -e {}/cleaning_{}.err\n".format(err_dir,sample)) sbatch.write("#SBATCH -J cleaning_{}_{}.job\n".format(sample,tools)) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 1:00:00\n") sbatch.write("#SBATCH -n 1 \n") sbatch.write("\n"); sbatch.write("\n"); sbatch.write("module load bioinfo-tools\n") sbatch.write("module load vcftools\n") files=analysed[tools]["analysed"][sample]["outputFile"] files=files.strip().split(";") path2Input=os.path.join(samplePath,"annotation") FileName=[]; for file in files: infile=os.path.join(path2Input,file); outsufix=".cleaned.vcf" prefix=file.rsplit(".",1)[0] outfile=prefix+outsufix; FileName.append(outfile) sbatch.write("vcftools --recode --recode-INFO-all --remove-filtered-all --vcf {0} --stdout > {1} \n".format(infile,os.path.join(outpath,outfile))) sbatch.write("\n") sbatch.write("\n") analysed[tools]["analysed"][sample]["outputFile"]=";".join(FileName) analysed[tools]["analysed"][sample]["pid"]=int(common.generateSlurmJob(sbatch_dir,sample)) return ( {sample:analysed[tools]["analysed"][sample]} );
def __init__(self, z, titles, iniValues): if saveImages: from common import createFolder self.dirname = createFolder("outImages") nplots = len(titles) self.z = z fig, ax = plt.subplots(nplots,1,True) if(nplots == 1): ax = [ax] self.figures = [fig] self.lines = {} self.axes = {} for i in range(0, nplots): title = titles[i] self.addAxis(ax[i], title, iniValues[i]) self.plotTitle = ax[0].set_title("Time 0") if fullscreenMainfigure: #I think this only works with TkAgg backend wm = plt.get_current_fig_manager() wm.full_screen_toggle() plt.draw() plt.show(block=False)
def Delly(programDirectory,local_dir, sample_name, bam_file,account,modules): #build the sbatch file and submit it sys.path.append(os.path.join(programDirectory,"modules")) import common sbatch_dir,out_dir,err_dir=common.createFolder(local_dir); output_header = os.path.join(local_dir, sample_name) reference=references(programDirectory,"bwa-indexed-ref"); with open(os.path.join(sbatch_dir, "{}.slurm".format(sample_name)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {}/DellyTra_{}.out\n".format(out_dir,sample_name)) sbatch.write("#SBATCH -e {}/DellyTra_{}.err\n".format(err_dir,sample_name)) sbatch.write("#SBATCH -J DellyTra_{}.job\n".format(sample_name)) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 3-00:00:00\n") sbatch.write("\n"); sbatch.write("\n"); if(modules == "True"): sbatch.write("module load bioinfo-tools\n") sbatch.write("module load delly\n") sbatch.write("\n") sbatch.write("\n") # now transfer the bam file sbatch.write("delly -t TRA -o {}.tra.vcf -g {} {}\n".format(output_header,reference,bam_file)) sbatch.write("delly -t DEL -o {}.del.vcf -g {} {}\n".format(output_header,reference,bam_file)) sbatch.write("delly -t DUP -o {}.dup.vcf -g {} {}\n".format(output_header,reference,bam_file)) sbatch.write("delly -t INV -o {}.inv.vcf -g {} {}\n".format(output_header,reference,bam_file)) sbatch.write("\n") return ( [int(common.generateSlurmJob(sbatch_dir,sample_name)), "{0}.tra.vcf;{0}.del.vcf;{0}.dup.vcf;{0}.inv.vcf".format(sample_name)] );
def CNVnator(programDirectory,local_dir, sample_name, bam_file,account,modules): sys.path.append(os.path.join(programDirectory,"modules")) import common #samplePath=os.path.join(analysed[tools]["analysed"][sample]["outpath"],tools) path_dict={} with open(os.path.join(programDirectory,"path.txt")) as path_file: for line in path_file: if not line.startswith("#") and not line == "\n": content=line.strip().split("=") path_dict[content[0]]=content[1] cnvnator_path="cnvnator" cnvnator2vcf_path="cnvnator2VCF.pl" if path_dict["CNVnator"]: cnvnator_path=path_dict["CNVnator"] if path_dict["cnvnator2VCF"]: cnvnator2vcf_path=path_dict["cnvnator2VCF"] #path to the folder were the reference chromosomes are stored sbatch_dir,out_dir,err_dir=common.createFolder(local_dir); chrFolder=references(programDirectory,"chromosomes"); output_header = os.path.join(local_dir, sample_name) print( "{0}.slurm".format(sample_name)); print(os.path.join(sbatch_dir, "{0}.slurm".format(sample_name))); with open(os.path.join(sbatch_dir, "{0}.slurm".format(sample_name)), 'w') as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {0}/CNVnator_{1}.out\n".format(out_dir,sample_name)) sbatch.write("#SBATCH -e {0}/CNVnator_{1}.err\n".format(err_dir,sample_name)) sbatch.write("#SBATCH -J CNVnator_{0}.job\n".format(sample_name)) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 2-00:00:00\n") sbatch.write("#SBATCH -n 1 \n") sbatch.write("\n"); sbatch.write("\n"); # If we are on a machine with the SNIC modules installed if(modules == "True"): sbatch.write("module load bioinfo-tools\n") sbatch.write("module load bwa\n") sbatch.write("module load samtools\n") sbatch.write("module load CNVnator\n") sbatch.write("\n") sbatch.write("\n") if (path_dict["ROOTSYS"]): sbatch.write("export ROOTSYS= {}\n".format(path_dict["ROOTSYS"])) sbatch.write("export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ROOTSYS/lib\n") # If we are on a non-SNIC system, chances are there is no # SNIC_TMP var set. TMP_DIR is more universal, or fall back # to /tmp. sbatch.write("if [[ -z $SNIC_TMP ]] ; then if [[ -z $TMPDIR ]] ; then SNIC_TMP=/tmp ; else SNIC_TMP=$TMPDIR ; fi ; fi\n") # now transfer the bam file sbatch.write("mkdir -p $SNIC_TMP/{0}\n".format(sample_name)) sbatch.write("rsync -rptoDLv {0} $SNIC_TMP/{1}\n".format(bam_file, sample_name)) #sbatch.write("rsync -rptoDLv {0} $SNIC_TMP/{1}\n".format(bai_file, sample_name)) sbatch.write("{0} -root {1}.root -tree $SNIC_TMP/{2}/{3} \n".format(cnvnator_path,output_header,sample_name, os.path.split(bam_file)[1]) ); sbatch.write("{0} -root {1}.root -his 1000 -d {2}\n".format(cnvnator_path,output_header,chrFolder)); sbatch.write("{0} -root {1}.root -stat 1000 >> {2}.cnvnator.log \n".format(cnvnator_path,output_header,output_header)); sbatch.write("{0} -root {1}.root -partition 1000 \n".format(cnvnator_path,output_header)) sbatch.write("{0} -root {1}.root -call 1000 > {2}.cnvnator.out \n".format(cnvnator_path,output_header,output_header)); sbatch.write("{0} {1}.cnvnator.out > {2}.vcf \n".format(cnvnator2vcf_path,output_header,output_header)); sbatch.write("rm {0}.root\n".format(output_header)); sbatch.write("rm $SNIC_TMP/{0}/*".format(sample_name)); sbatch.write("\n") sbatch.write("\n") return ([ int(common.generateSlurmJob(sbatch_dir,sample_name)), "{}.vcf".format(sample_name) ] );
def submit2Annotation(tools, sample, analysed, programDirectory, account, genmod_file): sys.path.append(os.path.join(programDirectory, "modules")) import common samplePath = os.path.join(analysed[tools]["analysed"][sample]["outpath"], tools) path_dict = {} with open(os.path.join(programDirectory, "path.txt")) as path_file: for line in path_file: if not line.startswith("#") and not line == "\n": content = line.strip().split("=") path_dict[content[0]] = content[1] if not path_dict["vep"]: path2snpEFF = os.path.join( programDirectory, "programFiles", "ensembl-tools-release-81", "scripts", "variant_effect_predictor", "variant_effect_predictor.pl", ) else: path2snpEFF = path_dict["vep"] reference = "GRCh37.75" cache_dir = "" if path_dict["vep_dir"]: cache_dir = "--dir " + path_dict["vep_dir"] + " " outpath = os.path.join(samplePath, "annotation") sbatch_dir, out_dir, err_dir = common.createFolder(outpath) # check for a genmod file in the genmod folder path2GenmodFolder = genmod_file genmod = "" for file in os.listdir(path2GenmodFolder): if file.endswith(".ini") or file.endswith(".txt"): genmod = os.path.join(path2GenmodFolder, file) with open(os.path.join(sbatch_dir, "{}.slurm".format(sample)), "w") as sbatch: sbatch.write("#! /bin/bash -l\n") sbatch.write("#SBATCH -A {}\n".format(account)) sbatch.write("#SBATCH -o {}/annotation_{}.out\n".format(out_dir, sample)) sbatch.write("#SBATCH -e {}/annotation_{}.err\n".format(err_dir, sample)) sbatch.write("#SBATCH -J annotation_{}_{}.job\n".format(sample, tools)) sbatch.write("#SBATCH -p core\n") sbatch.write("#SBATCH -t 10:00:00\n") sbatch.write("#SBATCH -n 1 \n") sbatch.write("\n") sbatch.write("\n") files = analysed[tools]["analysed"][sample]["outputFile"] files = files.strip().split(";") path2Input = os.path.join(samplePath, "filtered") FileName = [] for file in files: infile = file outsufix = ".annotated.vcf" prefix = file.rsplit(".", 1)[0] outfile = prefix + outsufix FileName.append(outfile) sbatch.write( "perl {0} --cache --force_overwrite --poly b -i {1} -o {2} --buffer_size 5 --port 3337 --vcf --whole_genome --per_gene --format vcf {3} -q\n".format( path2snpEFF, os.path.join(path2Input, infile), os.path.join(outpath, outfile), cache_dir ) ) # generate genmod if genmod != "": sbatch.write("genmod score -c {0} {1} > {1}.tmp\n".format(genmod, os.path.join(outpath, outfile))) sbatch.write("mv {0}.tmp {0}\n".format(os.path.join(outpath, outfile))) sbatch.write("\n") sbatch.write("\n") analysed[tools]["analysed"][sample]["outputFile"] = ";".join(FileName) analysed[tools]["analysed"][sample]["pid"] = int(common.generateSlurmJob(sbatch_dir, sample)) return {sample: analysed[tools]["analysed"][sample]}