def main(argv=None): P.main(argv)
bandwidth = PARAMS["plot_bandwidth"] script = PARAMS["pipeline_dir"] + "R/plotFootprint.R" statement = '''Rscript {script} --infiles {infiles} --outfile {outfile} --labels {labels} --title {region} -b {bandwidth} --xlims {distance} --unsmoothed {smoothed}''' P.run(statement) # --------------------------------------------------- # Generic pipeline tasks @follows(plotFootprint, plotFootprints) def full(): pass def main(argv=None): if argv is None: argv = sys.argv P.main(argv) if __name__ == "__main__": sys.exit(P.main(sys.argv))
def count_genes(infile, outfile): statement = '''wc -l %(infile)s > %(outfile)s''' #counts no. of lines, s specifies string. Each line = transcript P.run(statement) #if on cbrg have to specify job queue. Will run and save to single file per chromosome #or can do len(chr1.gtf) - count number of lines in file @merge(count_genes,'all.average') def average (infiles, outfile): #each count file has 2 items e.g 100 (no. of transcripts) chr1.gtf (original file name) total_counts = {} #create dictionary for infile in infiles: with open (infile) as inf: count, chrom = inf.read().strip().split(' ') #.read = reading the line, .strip = taking away white space, .split = setting 2 variables total_counts[chrom] = int(count) #key = chrom, count (make integer) = value median = statistics.median(total_counts.values()) #calculate median from integers only with open(outfile,'w') as count: for key, value in total_counts.items(): count.write(f'{key}\t{value}\n') #write dictionary count.write(f'Median\t{median}\n') #write median if __name__ == "__main__": sys.exit( P.main(sys.argv) ) #if have this at bottom can run in command line eg $python workflow.py make for full pipeline #If want to set input file using .yml then make .yml file with (save .yml file in same directory): gtf = /ifs/obds-training/lingf/obds/devel/OBDS_Training_Sep_2019/genes.gtf.gz #Then under import statements P.get_parameters('workflow.yml') #And have to change decorators e.g @split(P.PARAMS['gtf'], 'chr*.gtf') #end up with dictionary, gtf is key and value is file path
def main(argv=None): if argv is None: argv = sys.argv P.main(argv)
def main(argv=None): if argv is None: argv = sys.argv P.main(argv) if __name__ == "__main__": sys.exit(P.main(sys.argv))
def main(): P.main(sys.argv)
token = token[0] else: token = None s, infile = Sra.process_remote_BAM(infile, token, tmpfilename, filter_bed=PARAMS["contigs_bed"]) s = re.sub(";\n", " &&\n", s) infile = ",".join(infile) statement = " && ".join([ "mkdir -p %(tmpfilename)s", s, statement, "rm -r %(tmpfilename)s" ]) P.run(statement, job_condaenv=PARAMS["rmats_env"], job_memory=PARAMS["rmats_prep_memory"]) rmats_counter = "" for f_path in glob("rmats.dir/%(track)s.dir/*.rmats" % locals()): shutil.copy(f_path, P.snip(outfile, ".rmats") + rmats_counter + ".rmats") if rmats_counter == "": rmats_counter = 1 else: rmats_counter += 1 P.main(sys.argv)
genomegenes.add(gene) if g in genomegenes: result[g].append("1") else: result[g].append("0") outf.write("\t".join( ["gene"] + [P.snip(os.path.basename(x), ".tsv") for x in infiles]) + "\n") for gene, data in result.items(): outf.write("\t".join([gene] + data) + "\n") outf.close() ############################################## ############################################## ############################################## @follows(mergeAnnotations) def full(): pass ############################################## ############################################## ############################################## if __name__ == "__main__": P.main()
def main(): sys.exit(P.main(sys.argv))