# Reference options if args.spec == "all": spec = specs_ordered else: spec = args.spec.replace(", ", ",").split(",") for s in spec: if s not in spec_ids: sys.exit("SF2", "Cannot find specified species: " + s) # Parse the input species. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent BAM commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS( mcore.spacedOut("# Current step:", pad) + "BAM merging", jobfile) mcore.PWS(mcore.spacedOut("# Input directory:", pad) + indir, jobfile) mcore.PWS( mcore.spacedOut("# Species job directory:", pad) + jobs_dir, jobfile) if not os.path.isdir(jobs_dir): mcore.PWS("# Creating jobs directory.", jobfile) os.system("mkdir " + jobs_dir) mcore.PWS(mcore.spacedOut("# Species:", pad) + args.spec, jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", jobfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, jobfile) mcore.PWS(mcore.spacedOut("# Logfile directory:", pad) + logdir, jobfile) if not os.path.isdir(logdir): mcore.PWS("# Creating logfile directory.", jobfile)
spec = specs_ordered else: spec = args.spec.replace(", ", ",").split(",") for s in spec: if s not in spec_ids: sys.exit(" * ERROR SF2: Cannot find specified species: " + s) # Parse the input species. ########################## # Reporting run-time info for records. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent post-dedup reformat commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS(mcore.spacedOut("# Current step:", pad) + step, jobfile) mcore.PWS(mcore.spacedOut("# Previous step:", pad) + prev_step, jobfile) mcore.PWS("# ----------", jobfile) mcore.PWS("# I/O INFO", jobfile) mcore.PWS( mcore.spacedOut("# Input directory:", pad) + prev_step_dir, jobfile) mcore.PWS( mcore.spacedOut("# Output directory:", pad) + dedup_dir, jobfile) mcore.PWS( mcore.spacedOut("# Intermediate reformat directory:", pad) + reformat_dir, jobfile) mcore.PWS( mcore.spacedOut("# reformat.sh path:", pad) + args.path, jobfile) mcore.PWS(mcore.spacedOut("# Species:", pad) + args.spec, jobfile) mcore.PWS(mcore.spacedOut("# Seq runs:", pad) + args.runtype, jobfile) if not args.name:
base_logdir = os.path.abspath("logs/") logdir = os.path.join(base_logdir, step + "-logs") # Step I/O info. runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) print(runtype, runstrs) # Parse the input run types. spec = mfiles.parseSpecs(args.spec, specs_ordered) # Parse the input species. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent BWA commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS(mcore.spacedOut("# Current step:", pad) + step, jobfile) mcore.PWS(mcore.spacedOut("# Previous step:", pad) + prev_step, jobfile) mcore.PWS( mcore.spacedOut("# Input directory:", pad) + prev_step_dir, jobfile) mcore.PWS( mcore.spacedOut("# Assembly directory:", pad) + ref_dir, jobfile) mcore.PWS(mcore.spacedOut("# Output directory:", pad) + step_dir, jobfile) mcore.PWS(mcore.spacedOut("# BWA path:", pad) + args.path, jobfile) mcore.PWS(mcore.spacedOut("# Species:", pad) + args.spec, jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", jobfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, jobfile) mcore.PWS(mcore.spacedOut("# Logfile directory:", pad) + logdir, jobfile) if not os.path.isdir(logdir):
logdir = os.path.join(base_logdir, step + "-logs") # Step I/O info. runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) print(runtype, runstrs) # Parse the input run types. spec = mfiles.parseSpecs(args.spec, specs_ordered) # Parse the input species. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent BWA commands", jobfile) mcore.PWS("# STEP INFO", jobfile) #mcore.PWS(mcore.spacedOut("# Reads directory:", pad) + read_dir, jobfile); mcore.PWS( mcore.spacedOut("# Assembly input directory:", pad) + assembly_indir, jobfile) mcore.PWS( mcore.spacedOut("# Assembly output directory:", pad) + assembly_outdir, jobfile) #mcore.PWS(mcore.spacedOut("# Output directory:", pad) + bam_dir, jobfile); #mcore.PWS(mcore.spacedOut("# Pseudo-it path:", pad) + args.path, jobfile); mcore.PWS(mcore.spacedOut("# Species:", pad) + str(args.spec), jobfile) mcore.PWS( mcore.spacedOut("# Run types:", pad) + str(args.runtype), jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", jobfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, jobfile) mcore.PWS(mcore.spacedOut("# Logfile directory:", pad) + logdir, jobfile)
# Job files runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) # Parse the input run types. specs = mfiles.parseSpecs(args.spec, specs_ordered) specs = [s for s in specs if "(no WGA)" not in s and "pos_ctrl" not in s] specs = [s for s in specs if any(r in spec_ids[s] for r in runtype)] # Parse the input species. ########################## # Reporting run-time info for records. with open(output_file, "w") as outfile, mp.Pool(processes=args.procs) as pool: mcore.runTime("# Rodent read counting", outfile) mcore.PWS( mcore.spacedOut("# Input directory:", pad) + args.indir, outfile) mcore.PWS(mcore.spacedOut("# Output file:", pad) + output_file, outfile) mcore.PWS(mcore.spacedOut("# Species:", pad) + args.spec, outfile) mcore.PWS(mcore.spacedOut("# Seq runs:", pad) + args.runtype, outfile) mcore.PWS(mcore.spacedOut("# Job file:", pad) + output_file, outfile) mcore.PWS("# ----------", outfile) mcore.PWS("# BEGIN OUTPUT", outfile) ########################## headers = ["Total bases", "Total reads"] mcore.PWS("Species" + "," + ",".join(headers)) outlines = {} chunk_num, spec_num = 1, 1 cur_specs = []
for line in open(target_file): line = line.strip().split("\t") total_target_len += (float(line[2]) - float(line[1])) runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) # Parse the input run types. specs = mfiles.parseSpecs(args.spec, specs_ordered) specs = [s for s in specs if "(no WGA)" not in s and "pos_ctrl" not in s] specs = [s for s in specs if any(r in spec_ids[s] for r in runtype)] # Parse the input species. with open(outfilename, "w") as outfile, mp.Pool(processes=args.procs) as pool: mcore.runTime("# Rodent assembly and mapping stats", outfile) mcore.PWS( mcore.spacedOut("# Total species:", pad) + str(len(specs)), outfile) mcore.PWS( mcore.spacedOut("# Total target length:", pad) + str(total_target_len), outfile) mcore.PWS(mcore.spacedOut("# Mapping directory:", pad) + map_dir, outfile) mcore.PWS( mcore.spacedOut("# Assembly directory:", pad) + assembly_dir, outfile) mcore.PWS(mcore.spacedOut("# Output file:", pad) + outfilename, outfile) mcore.PWS("# ----------", outfile) cols = [ 'num-scaffs', 'avg-scaff-len', 'asm-len', 'asm-n50', 'asm-l50', 'asm-reads-mapped', 'asm-perc-reads-mapped', 'asm-paired-mapped', 'asm-perc-paired-mapped', 'asm-pair-mapped-diff-chr', 'asm-single-mapped', 'asm-perc-single-mapped', 'asm-duplicate-reads', 'asm-avg-depth', 'asm-avg-start-depth', 'asm-avg-mid-depth',
logdir = os.path.join(base_logdir, step + "-logs") # Step I/O info. runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) #print(runtype, runstrs); # Parse the input run types. specs = mfiles.parseSpecs(args.spec, specs_ordered) specs = [s for s in specs if "(no WGA)" not in s and "pos_ctrl" not in s] specs = [s for s in specs if any(r in spec_ids[s] for r in runtype)] # Parse the input species. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent pileup commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS(mcore.spacedOut("# BAM directory:", pad) + bam_dir, jobfile) mcore.PWS( mcore.spacedOut("# Assembly directory:", pad) + assembly_dir, jobfile) mcore.PWS( mcore.spacedOut("# Output directory:", pad) + pileup_dir, jobfile) mcore.PWS(mcore.spacedOut("# Pseudo-it path:", pad) + args.path, jobfile) mcore.PWS(mcore.spacedOut("# Species:", pad) + str(args.spec), jobfile) mcore.PWS( mcore.spacedOut("# Run types:", pad) + str(args.runtype), jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", jobfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, jobfile) mcore.PWS(mcore.spacedOut("# Logfile directory:", pad) + logdir, jobfile) if not os.path.isdir(logdir):
if args.spec != "all": args.spec = args.spec.replace(", ", ",").split(",") if any(s not in assembled_specs for s in args.spec): sys.exit(" * ERROR 3: Invalid species entered with -s.") if args.runtype != "all": args.runtype = [ int(r) for r in args.runtype.replace(", ", ",").split(",") ] if any(r not in [1, 2, 3, 4, 5] for r in args.runtype): sys.exit(" * ERROR 4: Invalid run type entered with -r.") with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent BWA commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS(mcore.spacedOut("# BAM directory:", pad) + bam_dir, jobfile) mcore.PWS(mcore.spacedOut("# Output directory:", pad) + bam_dir, jobfile) mcore.PWS(mcore.spacedOut("# Samtools path:", pad) + args.path, jobfile) mcore.PWS(mcore.spacedOut("# Species:", pad) + str(args.spec), jobfile) if args.ref: mcore.PWS( mcore.spacedOut("# Reference genome:", pad) + args.ref, jobfile) else: mcore.PWS( mcore.spacedOut("# Reference genome:", pad) + "Exome assemblies", jobfile) mcore.PWS( mcore.spacedOut("# Run types:", pad) + str(args.runtype), jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name",
logdir = os.path.join(base_logdir, step + "-logs") # Step I/O info. runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) # Parse the input run types. spec = mfiles.parseSpecs(args.spec, specs_ordered) # Parse the input species. ########################## # Reporting run-time info for records. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent Spades commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS(mcore.spacedOut("# Current step:", pad) + step, jobfile) mcore.PWS(mcore.spacedOut("# Previous step:", pad) + prev_step, jobfile) mcore.PWS("# ----------", jobfile) mcore.PWS("# I/O INFO", jobfile) mcore.PWS( mcore.spacedOut("# Input directory:", pad) + prev_step_dir, jobfile) mcore.PWS(mcore.spacedOut("# Output directory:", pad) + step_dir, jobfile) mcore.PWS(mcore.spacedOut("# Spades path:", pad) + args.path, jobfile) mcore.PWS(mcore.spacedOut("# Species:", pad) + args.spec, jobfile) mcore.PWS(mcore.spacedOut("# Seq runs:", pad) + args.runtype, jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", jobfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, jobfile) mcore.PWS(mcore.spacedOut("# Logfile directory:", pad) + logdir, jobfile)
# Reference options runtype, runstrs = mfiles.parseRuntypes(args.runtype, seq_run_ids) #print(runtype, runstrs); # Parse the input run types. specs = mfiles.parseSpecs(args.spec, specs_ordered) specs = [s for s in specs if "(no WGA)" not in s and "pos_ctrl" not in s] specs = [s for s in specs if any(r in spec_ids[s] for r in runtype)] # Parse the input species. with open(output_file, "w") as jobfile: mcore.runTime("#!/bin/bash\n# Rodent variant commands", jobfile) mcore.PWS("# STEP INFO", jobfile) mcore.PWS( mcore.spacedOut("# Current step:", pad) + "Variant calling", jobfile) mcore.PWS( mcore.spacedOut("# Assembly directory:", pad) + referee_dir, jobfile) mcore.PWS(mcore.spacedOut("# Map directory:", pad) + remap_dir, jobfile) mcore.PWS(mcore.spacedOut("# Output directory:", pad) + step_dir, jobfile) mcore.PWS( mcore.spacedOut("# Species job directory:", pad) + jobs_dir, jobfile) if not os.path.isdir(jobs_dir): mcore.PWS("# Creating jobs directory.", jobfile) os.system("mkdir " + jobs_dir) mcore.PWS(mcore.spacedOut("# Species:", pad) + args.spec, jobfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", jobfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, jobfile)
cwd = os.getcwd() # Job vars output_file = os.path.join(cwd, "jobs", name + ".sh") submit_file = os.path.join(cwd, "submit", name + ".sh") logdir = os.path.join(args.output, "logs") # Job files ########################## # Reporting run-time info for records. with open(output_file, "w") as outfile: mcore.runTime("#!/bin/bash\n# Exonerate command generator", outfile) mcore.PWS("# IO OPTIONS", outfile) mcore.PWS( mcore.spacedOut("# Input directory:", pad) + args.input, outfile) if args.outname: mcore.PWS( mcore.spacedOut("# --outname:", pad) + "Using end of output directory path as job name.", outfile) if not args.name: mcore.PWS( "# -n not specified --> Generating random string for job name", outfile) mcore.PWS(mcore.spacedOut("# Job name:", pad) + name, outfile) mcore.PWS( mcore.spacedOut("# Output directory:", pad) + args.output, outfile) if args.overwrite: mcore.PWS( mcore.spacedOut("# --overwrite set:", pad) + "Overwriting previous files in output directory.", outfile)