def uploads(): if request.method == 'POST': file_ = request.files['file'] if file_: filename = secure_filename(file_.filename) filename = random_str(32) + '.' + filename path_ = path.joinpath(app.config["ASSETS_FOLDER"], 'uploads', request.args.get('type', '')) if not path_.exists(): sh.mkdir('-p', path_) full_path = path_.joinpath(filename) file_.save(full_path) return jsonify({ 'url': url_for('.assets', filename=path.joinpath('uploads', request.args.get('type', ''), filename)), }) abort(403) else: # DELETE fpath = asset_for(request.json['url']) if fpath: sh.rm(request.json['url']) return 'ok' abort(404)
def generateBattenbergPureR(sample, normal, tumour, run_dir_sample, log_dir, pipe_script): ''' Setup a pure R BB pipeline for the sample with its associated normal, tumour, run_dir and log_dir ''' # Write a runscript runscript = path.joinpath(run_dir_sample, "submit.sh") outf = open(runscript, 'w') outf.write("TUMOURNAME=" + sample + "\n") outf.write("NORMALCEL=" + normal + "\n") outf.write("TUMOURCEL=" + tumour + "\n") outf.write("RUN_DIR=" + run_dir_sample + "\n") outf.write("bsub -n "+str(NUM_THREADS) + \ " -q "+QUEUE + \ " -R\"select[mem>"+str(MEMORY)+"] rusage[mem="+str(MEMORY)+"] span[hosts=1]\" -M"+str(MEMORY)+" " + \ " -o "+path.joinpath(log_dir, sample+".%J.out") + " -J"+sample + \ " \"R CMD BATCH '--no-restore-data --no-save --args "+sample+" "+normal+" "+tumour+" "+run_dir_sample+" "+str(NUM_THREADS)+"' " + \ pipe_script+" "+path.joinpath(log_dir, "battenberg_snp6."+sample+".Rout")+"\"") outf.close() # Make the runscript executable st = os.stat(runscript) os.chmod(runscript, st.st_mode | stat.S_IEXEC) return runscript
def setUp(self): super(ImportTestCase, self).setUp() self.url = course_url('course_import_export_handler', self.course) self.content_dir = path(mkdtemp_clean()) # Create tar test files ----------------------------------------------- # OK course: good_dir = tempfile.mkdtemp(dir=self.content_dir) # test course being deeper down than top of tar file embedded_dir = os.path.join(good_dir, "grandparent", "parent") os.makedirs(os.path.join(embedded_dir, "course")) with open(os.path.join(embedded_dir, "course.xml"), "w+") as f: f.write('<course url_name="2013_Spring" org="EDx" course="0.00x"/>') with open(os.path.join(embedded_dir, "course", "2013_Spring.xml"), "w+") as f: f.write('<course></course>') self.good_tar = os.path.join(self.content_dir, "good.tar.gz") with tarfile.open(self.good_tar, "w:gz") as gtar: gtar.add(good_dir) # Bad course (no 'course.xml' file): bad_dir = tempfile.mkdtemp(dir=self.content_dir) path.joinpath(bad_dir, "bad.xml").touch() self.bad_tar = os.path.join(self.content_dir, "bad.tar.gz") with tarfile.open(self.bad_tar, "w:gz") as btar: btar.add(bad_dir) self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
def main(argv): parser = argparse.ArgumentParser( prog='Generate a pure R Battenberg pipeline', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-s", required=True, type=str, help="Sample sheet that contains a line per sample") parser.add_argument("-r", required=True, type=str, help="Directory where the pipelines will run") parser.add_argument("--snp6", action="store_true", help="Set up a SNP6 pipeline") parser.set_defaults(snp6=True) args = parser.parse_args() run_dir = args.r log_dir = path.joinpath(run_dir, "logs") if not log_dir.exists(): log_dir.mkdir() runcommands = [] for line in open(args.s, 'r'): # Skip headers if line.startswith("#"): continue # Unpack the sample information words = line.strip().split("\t") sample = words[0] tumour = words[1] normal = words[2] # Create the run dir for this sample run_dir_sample = path.joinpath(run_dir, sample) if not run_dir_sample.exists(): run_dir_sample.mkdir() # Set the pipeline to be set up if args.snp6: pipe_script = BB_PURE_R_SNP6 else: pipe_script = BB_PURE_R_WGS # Create the pipeline and store the path to the submit script runscript = generateBattenbergPureR(sample, normal, tumour, run_dir_sample, log_dir, pipe_script) runcommands.append(runscript) # Create a master file for easy submission runscript = path.joinpath(run_dir, "submit_all.sh") outf = open(runscript, 'w') for cmd in runcommands: outf.write(cmd + "\n") outf.close() # Make executable st = os.stat(runscript) os.chmod(runscript, st.st_mode | stat.S_IEXEC)
def test_joinpath_fails_on_empty(self): "It doesn't make sense to join nothing at all" try: path.joinpath() except TypeError: pass else: raise Exception("did not raise")
def asset_for(url): from flask import current_app match = current_app.url_map.bind('').match(url) if match and match[0] == 'assets': path_ = path.joinpath(current_app.config['ASSETS_FOLDER'], match[1]['filename']) if path_.exists(): return path.joinpath(current_app.config['ASSETS_FOLDER'], match[1]['filename'])
def setUp(self): """Setup for BldFile parser""" self.bld_path = path.joinpath(TSRC_DIR, "tsrc", "group", "bld.inf").normpath() upper_bld_path = self.bld_path.dirname() self.tcp = ats3.parsers.BldFileParser() self.test_mmp_files = [ ['tc1.mmp'], ['dependent_1.mmp', 'onemore.mmp'], ['dependent_2.mmp'], ['sub_test.mmp'], ['if_test.mmp'], ['tc2.mmp'], ["tc3.mmp"], ] self.path_list = [path.joinpath(upper_bld_path, "../tc1/group").normpath(), path.joinpath(upper_bld_path, "../tc1/group/../dependent_1/group").normpath(), path.joinpath(upper_bld_path, "../tc1/group/../dependent_2/group").normpath(), path.joinpath(upper_bld_path, "../tc1/group/../subtest/group").normpath(), path.joinpath(upper_bld_path, "../tc1/group/../subtest/group/../if_test/group").normpath(), path.joinpath(upper_bld_path, "../tc2/group").normpath(), path.joinpath(upper_bld_path, "../tc3/group").normpath(), ]
def do_it(input_dir, image_txt, output_npy, R=0, G=0, B=0, H=0, S=0, V=0): # initial color histogram extractor extractor = ColorHistogram(R=R, G=G, B=B, H=H, S=S, V=V) # load image list with open(image_txt, 'r') as f: images = [image.strip() for image in f.readlines()] nImages = len(images) nDimensions = extractor.get_dimension() data = np.empty((nImages, nDimensions), dtype=float) for i, image in enumerate(images): if i % 1000 == 0: print 'Processing image {i} / {n}'.format(i=i + 1, n=len(images)) # 100024928 --> /path/to/input/dir/100024928.jpg pathToImage = path.joinpath(input_dir, image + '.jpg') data[i, :] = extractor(pathToImage) with open(output_npy, 'wb') as f: np.save(f, data)
def generateQcrunScript(dp_master_file, dp_in_dir, qc_dir): """ Creates the qc runscript for easy qc generation. """ scriptfile = path.joinpath(qc_dir, "runQc.sh") outf = open(scriptfile, "w") outf.write(merge_items([SCRIPT, dp_master_file, dp_in_dir, qc_dir]) + "\n") outf.write("convert *alleleFreq*png alleleFrequency.pdf\n") outf.write("convert *copyNumberAdjustment*png copyNumberAdjustment.pdf\n") outf.write("convert *depth*png depth.pdf\n") outf.write("convert *kappa*png kappa.pdf\n") outf.write("convert *mutation.copy.num*png mutation.copy.number.pdf\n") outf.write("convert *totalCopy*png totalCopyNumber.pdf\n") outf.write("convert *_fractionOfCells*png fractionOfCells.pdf\n") outf.write( "convert *subclonalFractionPerChromosome*png subclonalFractionPerChromosome.pdf\n" ) outf.write( "convert *large.subclonal.fraction.by.chrom*png large.subclonal.fraction.by.chrom.pdf\n" ) outf.write( "convert *depth.vs.frac.mutCount.png depth.vs.frac.mutCount.pdf\n") outf.write( "convert *_cellularityCorrectedAF.png cellularityCorrectedAF.pdf\n") outf.close() # Make executable st = os.stat(scriptfile) os.chmod(scriptfile, st.st_mode | stat.S_IEXEC)
def generateDPDataFile(proj_name, infile, dp_in_dir, run_dir): ss = read_sample_infile(infile) # Collect the purity estimate for each tumour from BB output tumour2purity = getTumourPurity(ss) # Create an inventory of all available dp input files dp_in_files = np.array(path(dp_in_dir).listdir("*.txt")) outfile = open(path.joinpath(run_dir, proj_name + ".txt"), 'w') outfile.write("sample\tsubsample\tdatafile\tcellularity\n") for sample in ss.getSamplenames(): for tumour in ss.getTumours(sample): dp_in_file = dp_in_files[np.array( [tumour in item for item in dp_in_files])] if not len(dp_in_file) == 1: print(dp_in_file) print("Found different than expected dp input matches for " + tumour) continue if tumour in tumour2purity.keys(): outfile.write(sample + "\t" + tumour + "\t" + path(dp_in_file[0]).basename() + "\t" + tumour2purity[tumour] + "\n") else: print("Did not find purity estimate for " + tumour) outfile.close()
def generateBattenbergPipeline_SNP6(bb_config, pipe_exe, pipe_rerun_exe, pipe_rerun_manual_exe): # Write the params file config_file = path.joinpath(bb_config.get_run_dir(), "params"+bb_config.get_samplename()+".txt") bb_config.generateParamsFile_SNP6(config_file) # Return run commands return (pipe_exe+' '+config_file), (pipe_rerun_exe+' '+config_file), (pipe_rerun_manual_exe+' '+config_file)
def find_insert(self, dic_infos={}, level=0): ''' Produces the code to be inserted in the straptoc document. ''' pf = os.getcwd() # path folder dn = os.path.basename(pf) # directory name root = path.joinpath( *path(pf).splitall()[-(level + 1):])[:] # root address for straptoc for i, f in enumerate(glob.glob('*')): if path(f).isfile(): if i == 0: if level > 0: print(self.pref(level) + self.strap_kind[self.kind]) print(" " * level * 4 + '+++ ' + root) for k in self.kind_filter[self.kind]: if k in f: print(" " * level * 4 + self.make_code(f, self.kind)) elif path(f).isdir(): # print(self.pref(level) + f + ' ::') os.chdir(f) # go inside level += 1 self.find_insert(level=level) # recursion os.chdir(path('../')) # go back level += -1
def generateBsubCmd(jobname, logdir, cmd, queue="normal", mem=1, depends=None, isArray=False, threads=None): ''' Transforms the cmd into a bsub command with the supplied parameters. ''' bcmd = merge_items(["bsub", "-q", queue, "-J \"" + jobname + "\""]) if isArray: bcmd = merge_items([ bcmd, "-o", path.joinpath(logdir, jobname) + ".%J.%I.out", "-e", path.joinpath(logdir, jobname + ".%J.%I.err") ]) else: bcmd = merge_items([ bcmd, "-o", path.joinpath(logdir, jobname) + ".%J.out", "-e", path.joinpath(logdir, jobname + ".%J.err") ]) mem = str(mem) + "000" bcmd = merge_items([ bcmd, "-M", mem, "-R", "'span[hosts=1] select[mem>" + mem + "] rusage[mem=" + mem + "]'" ]) if depends is not None: depends_str = map(lambda x: "done(" + x + ")", depends) depends_str = "&&".join(depends_str) bcmd = merge_items([bcmd, "-w\"" + depends_str + "\""]) if threads is not None: bcmd = merge_items([bcmd, "-n", str(threads)]) bcmd = merge_items([bcmd, "'" + cmd + "'"]) return (bcmd)
def generateBattenbergPipeline_CGPIT(bb_conf): samplename = bb_conf.get_samplename() log_dir = bb_conf.get_log_dir() threads = bb_conf.get_threads() runscript = path.joinpath(bb_conf.get_run_dir(), "RunCommands_"+samplename+".sh") outf = open(runscript, 'w') cmd = createAlleleCountCmd(bb_conf) outf.write(generateBsubCmd("loci_"+samplename, log_dir, cmd, queue="normal", mem=4, depends=None, isArray=False, threads=threads) + "\n") cmd = createBafLogCmd(bb_conf) outf.write(generateBsubCmd("baflog_"+samplename, log_dir, cmd, queue="normal", mem=28, depends=["loci_"+samplename], isArray=False, threads=threads) + "\n") cmd = createImputeFromBafCmd(bb_conf) outf.write(generateBsubCmd("imputebaf_"+samplename, log_dir, cmd, queue="normal", mem=7, depends=["loci_"+samplename], isArray=False, threads=threads) + "\n") cmd = createImputeCmd(bb_conf) outf.write(generateBsubCmd("impute_"+samplename, log_dir, cmd, queue="long", mem=25, depends=["imputebaf_"+samplename], isArray=False, threads=threads) + "\n") cmd = createCombineImputeCmd(bb_conf) outf.write(generateBsubCmd("combineimpute_"+samplename, log_dir, cmd, queue="long", mem=25, depends=["impute_"+samplename], isArray=False, threads=threads) + "\n") cmd = createHaplotypeBafsCmd(bb_conf) outf.write(generateBsubCmd("haplotypebafs_"+samplename, log_dir, cmd, queue="normal", mem=12, depends=["combineimpute_"+samplename], isArray=False, threads=threads) + "\n") cmd = createCleanupPostBafCmd(bb_conf) outf.write(generateBsubCmd("cleanuppostbaf_"+samplename, log_dir, cmd, queue="normal", mem=12, depends=["haplotypebafs_"+samplename], isArray=False, threads=threads) + "\n") cmd = createPlotHaplotypesCmd(bb_conf) outf.write(generateBsubCmd("plothaplotypes_"+samplename, log_dir, cmd, queue="normal", mem=1, depends=["cleanuppostbaf_"+samplename], isArray=False, threads=threads) + "\n") cmd = createCombineBafsCmd(bb_conf) outf.write(generateBsubCmd("combinebafs_"+samplename, log_dir, cmd, queue="normal", mem=4, depends=["plothaplotypes_"+samplename], isArray=False, threads=None) + "\n") cmd = createSegmentPhasedCmd(bb_conf) outf.write(generateBsubCmd("segmentphased_"+samplename, log_dir, cmd, queue="normal", mem=4, depends=["combinebafs_"+samplename], isArray=False, threads=None) + "\n") cmd = createFitcnCmd(bb_conf) outf.write(generateBsubCmd("fitcn_"+samplename, log_dir, cmd, queue="normal", mem=16, depends=["segmentphased_"+samplename, "baflog_"+samplename], isArray=False, threads=None) + "\n") cmd = createSubclonesCmd(bb_conf) outf.write(generateBsubCmd("subclones_"+samplename, log_dir, cmd, queue="normal", mem=16, depends=["fitcn_"+samplename], isArray=False, threads=None) + "\n") cmd = createFinaliseCmd(bb_conf) outf.write(generateBsubCmd("finalise_"+samplename, log_dir, cmd, queue="normal", mem=2, depends=["subclones_"+samplename], isArray=False, threads=None) + "\n") outf.close() # Make executable st = os.stat(runscript) os.chmod(runscript, st.st_mode | stat.S_IEXEC) return(runscript)
def start_server(job_cls, script_dir, exp, tag, force=False, save=False, user=None, address=None, authkey=None): """ Start a distributed job server.""" create_interrupt() # Set up parameters and task data. script_pth = path.joinpath(script_dir, exp, "cfg_{}.pkl".format(tag)) params = job_cls.get_params(script_pth) params["save"] = save tasks = job_cls.create_tasks(params) # Create the server and start it. server = ServerManager(user=user, address=address, authkey=authkey) server.start(params, tasks, force=force)
def make_new_album(from_dir): #currently, no error handling frd = path(from_dir) d = raw_input("What is the album's name? ") try: os.mkdir(path.joinpath('photos', d)) os.chdir(path.joinpath('photos', d)) cwd = path.getcwd() included = [] for f in frd.files(): commands.getoutput('eog %s' % f) ans = raw_input('include this file in the album? (y/[n]) ') if ans.lower() == 'y' or ans.lower() == 'yes': cmt = raw_input('picture caption: (blank for none)') path.copy(f, cwd) n = f.name.replace(f.name[-4:], '.thumb' + f.name[-4:]) thumb = path.joinpath(cwd, n) path.copy(f, thumb) cur_pic = path.joinpath(cwd, f.name) included.append(Photo(cur_pic, thumb, cmt)) im = resize_image(Image.open(thumb), (200, 150)) im.save(file(thumb, 'w')) return included
def navigate(self, web, match, consumed_path): escaped_target = re.sub(r'\\(?!\d|g<)', r'\\\\', self.target) dest = self.r_expand(match['path'], escaped_target) groups = self.r_groups(match) req_path = groups.pop('tnotfwsbr') filepath = path.joinpath(self.context.dir, dest, req_path) filedir, filename = filepath.splitpath() if filepath.isdir(): web.response.error(403) if not self.filter(filedir, filename): web.response.error(404) return spoof_name = self.rename(filedir, filename) self.file_server.serve(web, filepath, rename=spoof_name)
def get_cfg(vpn_name, default_cfg_pth="."): """ Returns a valid VPN config file.""" pth = path.joinpath(default_cfg_pth, vpn_name) if not pth.isfile(): if pth.isdir(): cfgs = pth.files("*.conf") if cfgs: pth = cfgs[0] else: pth += ".conf" if not pth.isfile(): msg = "Cannot find VPN: %s" % vpn_name log.error(msg) return None return pth
def cd(*pth): ''' chdirs to path, always restoring the cwd >>> with cd('mydir'): >>> do_stuff() ''' original_cwd = os.getcwd() try: new_cwd = path.joinpath(*pth) #inform('cd %s' % os.path.abspath(new_cwd)) os.chdir(new_cwd) yield finally: #inform('cd %s' % os.path.abspath(original_cwd)) os.chdir(original_cwd)
def get_params(cls, script_pth): """Load the parameters. Args: script_pth (str): Path to the job script. Returns: (dict): Parameters. """ with path(script_pth).open("r") as fid: params = pickle.load(fid) params["Job"] = cls params["script_pth"] = str(script_pth) tasks_fn = "tasks_{}.pkl".format(params["tag"]) params["tasks_pth"] = str(path.joinpath(params["out_dir"], tasks_fn)) return params
def start_debug(job_cls, script_dir, exp, tag, force=False, save=False, graphics=False): """ Distributed job debugger.""" create_interrupt() script_pth = path.joinpath(script_dir, exp, "cfg_{}.pkl".format(tag)) params = job_cls.get_params(script_pth) tasks = job_cls.create_tasks(params) job = job_cls(params, save=save, graphics=graphics) job.setup() for taskname in sorted(tasks): task = tasks[taskname] progress, tmp_fid = job.run(task) if tmp_fid: path(tmp_fid.name).copy2(task["out_pth"]) tmp_fid.close() progress.report() job.teardown()
def diff_dir(dir_cmp, left_path=True): """ A generator that, given a ``filecmp.dircmp`` object, yields the paths to all files that are different. Works recursively. :param dir_cmp: A ``filecmp.dircmp`` object representing the comparison. :param left_path: If ``True``, paths will be relative to dircmp.left. Else paths will be relative to dircmp.right. """ for name in dir_cmp.diff_files: if left_path: path_root = dir_cmp.left else: path_root = dir_cmp.right yield path.joinpath(path_root, name) for sub in dir_cmp.subdirs.values(): # Need to iterate over the recursive call to make sure the individual values are yielded up the stack for the_dir in diff_dir(sub, left_path): yield the_dir
def writeSimpleShellScript(rundir, scriptname, cmds): ''' Creates a simple script with the commands specified in cmds contained within. This script works with jobarrays. Note: It returns the status of the last run command. ''' #scriptfile = path.joinpath(rundir, 'GetAlleleFrequenciesFromBAMByChromosome'+samplename+'.sh') scriptfile = path.joinpath(rundir, scriptname) samplecommands = open(scriptfile, 'w') samplecommands.write('#$LSB_JOBINDEX\n') for item in cmds: samplecommands.write(item + "\n") samplecommands.write('exit $?\n') samplecommands.close() st = os.stat(scriptfile) os.chmod(scriptfile, st.st_mode | stat.S_IEXEC) return (scriptfile)
def process_file(file): file = path(file) # Cleanup lines = file.lines() if file.ext == '.js': lines[0] = lines[0].replace('(' , '') lines[-3] = lines[-3].replace(');', '') lines = lines[:-2] elif file.ext == '.map': lines[2] = lines[2].replace('.js"', '.json"') else: raise TypeError('UNEXPECTED FILE EXTENSION: ' + file.ext) file.write_lines(lines) # Rename the file newname = path.joinpath(BUILDDIR, file.basename().replace('.js', '.json')) file.rename(newname)
def generateAlleleCountPipelines(run_dir, samplesheet, loci): runscripts = [] for sample in samplesheet.getSamplenames(): print(sample) tn_pairs = samplesheet.getTumour2NormalPairingBam(sample) for tb, nb in tn_pairs: run_dir_sample = path.joinpath(run_dir, sample) runscript = generateAlleleCountPipeline( run_dir_sample, samplesheet.getIdByTumourBam(tb), tb, loci, path.joinpath( run_dir_sample, samplesheet.getIdByTumourBam(tb) + "_alleleFrequencies_chr${LSB_JOBINDEX}.txt")) runscripts.append("bsub -q long -J alleleCount_" + samplesheet.getIdByTumourBam(tb) + "[1-23] -o " + path.joinpath( run_dir_sample, "logs", "alleleCount_" + samplesheet.getIdByTumourBam(tb) + ".%J.out") + " " + runscript) runscript = generateAlleleCountPipeline( run_dir_sample, samplesheet.getIdByNormalBam(nb), nb, loci, path.joinpath( run_dir_sample, samplesheet.getIdByNormalBam(nb) + "_alleleFrequencies_chr${LSB_JOBINDEX}.txt")) runscripts.append("bsub -q long -J alleleCount_" + samplesheet.getIdByNormalBam(nb) + "[1-23] -o " + path.joinpath( run_dir_sample, "logs", "alleleCount_" + samplesheet.getIdByNormalBam(nb) + ".%J.out") + " " + runscript) # Create a master script scriptname = path.joinpath(run_dir, "RunAlleleCountCommands.sh") runscript = open(scriptname, 'w') for item in runscripts: runscript.write(item + "\n") runscript.close() # Make executable st = os.stat(scriptname) os.chmod(scriptname, st.st_mode | stat.S_IEXEC)
from panda3d.core import loadPrcFile from path import path # The root local path. ROOT_PATH = path(__path__[0]).abspath().split()[0] # Load scenesim-specific Panda3d settings. config_pth = path.joinpath(ROOT_PATH, "cfg/Config.prc") if config_pth.isfile(): loadPrcFile(config_pth)
def setup_module(): """Setup the test environment. The testing of the test parser script requires spesific structure to be available with bld.inf files (with the content written into those).""" global TSRC_DIR TSRC_DIR = path(tempfile.mkdtemp()).normpath() test_component = TSRC_DIR for path_parts in (("tsrc", "group"), ("tsrc", "tc1", "group"), ("tsrc", "tc1", "data"), ("tsrc", "tc1", "dependent_1", "group"), ("tsrc", "tc1", "dependent_2", "group"), ("tsrc", "tc1", "subtest", "group"), ("tsrc", "tc1", "subtest", "data"), ("tsrc", "tc1", "subtest", "if_test", "group"), ("tsrc", "tc2", "group"), ("tsrc", "tc2", "data"), ("tsrc", "tc3", "group"), ("tsrc", "tc3", "data"), ("tmp", "macros"), ): filepath = path.joinpath(test_component, *path_parts).normpath() if not filepath.exists(): os.makedirs(filepath) tsrc = open(path.joinpath(TSRC_DIR, "tsrc", "group", "bld.inf"), 'w') tsrc.write( r""" #include "../tc1/group/bld.inf" #include "../tc2/group/bld.inf" #include "../tc3/group/bld.inf" PRJ_TESTMMPFILES """) tsrc.close() tc1 = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "group", "bld.inf"), 'w') tc1.write( r""" #include "../dependent_1/group/bld.inf" #include "../dependent_2/group/bld.inf" #include "../subtest/group/bld.inf" PRJ_TESTMMPFILES tc1.mmp PRJ_MMPFILES not_included.mmp """) tc1.close() tc1_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "group", "tc1.mmp"), 'w') tc1_mmp.write( r""" TARGET tc1.dll TARGETTYPE dll LIBRARY stiftestinterface.lib LIBRARY user.lib """) tc1_mmp.close() tc1_sub = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "subtest", "group", "bld.inf"), "w") tc1_sub.write( r""" PRJ_TESTMMPFILES sub_test.mmp #ifndef RD_TEST1 #include "../if_test/group/bld.inf" #endif """) tc1_sub.close() tc1_sub_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "subtest", "group", "sub_test.mmp"), 'w') tc1_sub_mmp.write( r""" TARGET sub_test.dll TARGETTYPE dll LIBRARY stiftestinterface.lib """) tc1_sub_mmp.close() tc1_if = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "subtest", "if_test", "group", "bld.inf"), "w") tc1_if.write( r""" PRJ_TESTMMPFILES if_test.mmp """) tc1_if.close() tc1_if_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "subtest", "if_test", "group", "if_test.mmp"), 'w') tc1_if_mmp.write( r""" TARGET tc1_if.dll TARGETTYPE dll LIBRARY stifunit.lib """) tc1_if_mmp.close() tc1_dep1 = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "dependent_1", "group", "bld.inf"), "w") tc1_dep1.write( r""" PRJ_TESTMMPFILES dependent_1.mmp onemore.mmp """) tc1_dep1.close() tc1_dep1_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "dependent_1", "group", "dependent_1.mmp"), 'w') tc1_dep1_mmp.write( r""" TARGET dependent_1.dll TARGETTYPE PLUGIN """) tc1_dep1_mmp.close() tc1_dep2 = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "dependent_2", "group", "bld.inf"), "w") tc1_dep2.write( r""" PRJ_TESTMMPFILES dependent_2.mmp """) tc1_dep2.close() tc1_dep2_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "dependent_2", "group", "dependent_2.mmp"), 'w') tc1_dep2_mmp.write( r""" TARGET dependent_2.dll TARGETTYPE PLUGIN """) tc1_dep2_mmp.close() tc1_pkg = open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "group", "tc1.pkg"), 'w') tc1_pkg.write( r""" ;Language - standard language definitions &EN ; standard SIS file header #{"BTEngTestApp"},(0x04DA27D5),1,0,0 ;Supports Series 60 v 3.0 (0x101F7961), 0, 0, 0, {"Series60ProductID"} ;Localized Vendor Name %{"BTEngTestApp"} ;Unique Vendor name :"Nokia" ; Files to copy "..\data\file1.dll"-"c:\sys\bin\file1.dll" "..\data\file1.txt"-"e:\sys\bin\file1.txt" , FF ; FF stands for Normal file "..\data\file2.mp3"-"e:\sys\bin\file2.mp3" "..\data\TestFramework.ini"-"c:\sys\bin\TestFramework.ini" ;"..\xyz\TestFramework.ini"-"!:\sys\bin\TestFramework.ini" (commented line) "../data/temp.ini"-"!:/sys/bin/temp.ini" , FF ; "something here" "..\data\tc1.cfg"-"e:\sys\bin\tc1.cfg" "..\data\tc1.sisx"-"e:\sys\bin\tc1.sisx" "..\data\DUMP.xyz"-"e:\sys\bin\DUMP.xyz" """.replace('\\', os.sep)) tc1_pkg.close() open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "data", "file1.dll"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "data", "file1.txt"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "data", "file2.mp3"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "data", "TestFramework.ini"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "data", "temp.ini"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc1", "data", "DUMP.xyz"), 'w').close() tc2 = open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "group", "bld.inf"), "w") tc2.write( r""" PRJ_TESTMMPFILES tc2.mmp """) tc2.close() tc2_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "group", "tc2.mmp"), 'w') tc2_mmp.write( r""" TARGET tc2.dll TARGETTYPE dll LIBRARY EUnit.lib """) tc2_mmp.close() tc2_pkg = open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "group", "tc2.pkg"), 'w') tc2_pkg.write( r""" ;Language - standard language definitions &EN ; standard SIS file header #{"BTEngTestApp"},(0x04DA27D5),1,0,0 ;Supports Series 60 v 3.0 (0x101F7961), 0, 0, 0, {"Series60ProductID"} ;Localized Vendor Name %{"BTEngTestApp"} ;Unique Vendor name :"Nokia" ; Files to copy "..\data\file1.dll"-"c:\sys\bin\file1.dll" "..\data\file1.txt"-"e:\sys\bin\file1.txt" "..\data\file2.mp3"-"e:\sys\bin\file2.mp3" "..\data\TestFramework.ini"-"!:\sys\bin\TestFramework.ini" , FF ; FF stands for Normal file "..\data\tc2.cfg"-"!:\sys\bin\tc2.cfg" """.replace('\\', os.sep)) tc2_pkg.close() open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "data", "file1.dll"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "data", "file1.txt"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "data", "file2.mp3"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "data", "TestFramework.ini"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "data", "tc2.cfg"), 'w').close() tc3 = open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "group", "bld.inf"), "w") tc3.write( r""" PRJ_TESTMMPFILES tc3.mmp """) tc3.close() tc3_mmp = open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "group", "tc3.mmp"), 'w') tc3_mmp.write( r""" TARGET tc3.dll TARGETTYPE dll LIBRARY EUnit.lib """) tc3_mmp.close() tc3_pkg = open(path.joinpath(TSRC_DIR, "tsrc", "tc2", "group", "tc2.pkg"), 'w') tc3_pkg.write( r""" ;Language - standard language definitions &EN ; standard SIS file header #{"BTEngTestApp"},(0x04DA27D5),1,0,0 ;Supports Series 60 v 3.0 (0x101F7961), 0, 0, 0, {"Series60ProductID"} ;Localized Vendor Name %{"BTEngTestApp"} ;Unique Vendor name :"Nokia" ; Files to copy "..\data\file1.dll"-"c:\sys\bin\file1.dll" "..\data\file1.txt"-"e:\sys\bin\file1.txt" "..\data\file2.mp3"-"e:\sys\bin\file2.mp3" , FF ; FF stands for Normal file "..\data\TestFramework.ini"-"!:\sys\bin\TestFramework.ini" "..\data\temp.ini"-"!:\sys\bin\temp.ini" "..\data\tc2.cfg"-"!:\sys\bin\tc2.cfg" """.replace('\\', os.sep)) tc3_pkg.close() open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "data", "file1.dll"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "data", "file1.txt"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "data", "file2.mp3"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "data", "TestFramework.ini"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "data", "temp.ini"), 'w').close() open(path.joinpath(TSRC_DIR, "tsrc", "tc3", "data", "tc2.cfg"), 'w').close() macros = open(path.joinpath(TSRC_DIR, "tmp", "macros", "bldcodeline.hrh"), 'w') macros.write( r""" #ifndef __BLDCODELINE_HRH #define __BLDCODELINE_HRH /** #RD_TEST */ #define RD_TEST1 /** #RD_TEST2 */ #define RD_TEST2 /** #RD_TEST3 */ #define RD_TEST3 #endif // __BLDCODELINE_HRH """) macros.close()
def test_joinpath_on_class(self): "Construct a path from a series of strings" foo_bar = path.joinpath('foo', 'bar') assert foo_bar == p(nt='foo\\bar', posix='foo/bar')
libdir = path.abspath(path.join(basedir, 'lib')) guidir = path.abspath(path.join(basedir, 'package')) resdir = path.abspath(path.join(basedir, 'resources')) pythonpath.insert(0, libdir) pythonpath.insert(1, guidir) pythonpath.insert(2, resdir) # Initializing logger import logging from path import path as Path from package.log import set_log_file from os import environ log = logging.getLogger('Runner') homedir = Path(__file__).dirname() LOG_FILE = Path.joinpath(homedir, 'packagehelper.log') if not LOG_FILE.exists(): print "Creating log file" LOG_FILE.touch() environ['PKG_LOGFILE'] = str(LOG_FILE) environ['PKG_HELPER_PATH'] = homedir environ['PKG_BASEDIR'] = basedir set_log_file(LOG_FILE, '*') log.info("Logger started") def run_no_gui(): from package.ui.text import PackageProcessorUI PackageProcessorUI() def run_gui(): from package.ui.gui import PackageProcessorGUI
def test_testmmp_files_with_full_path(self): """Test if mmp file is returned with its full path""" self.mmp_file_path = [path.joinpath(TSRC_DIR, "tsrc", "tc1", "group", "tc1.mmp").normpath()] assert self.tcp.get_test_mmp_files(os.path.normpath(os.path.join(self.path_list[0], "bld.inf"))) == self.mmp_file_path
def main(argv): parser = argparse.ArgumentParser(prog='GenerateBattenbergPipeline', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-i", type=str, required=True, help="Input file containing at least four columns: samplename, tumour_file, normal_file, gender. If this is a samplesheet, also specify --ss.") parser.add_argument("-r", type=str, required=True, help="Full path to a directory where the pipelines will be run") parser.add_argument("--ss", action="store_true", help="Input file is a samplesheet") # SNP6 and WGS options parser.add_argument("--type", choices=["wgs", "snp6"], type=str, help='Type of pipeline to be set up') parser.add_argument("-v", "--version", type=str, help='Version of this pipeline to set up') # General options parser.add_argument("--imputeinfofile", help='Path to impute info file') parser.add_argument("--impute_exe", help='Path to impute exe') parser.add_argument("--g1000_prefix_alleles", help='Prefix to 1000 Genomes alleles reference files') parser.add_argument("--g1000_prefix_loci", help='Prefix to 1000 Genomes loci reference files') parser.add_argument("--phasing_gamma", help="Phasing gamma parameter") parser.add_argument("--segmentation_gamma", help="Phasing segmentation parameter") parser.add_argument("--clonality_dist_metric", help="Type of distance metric used when fitting the clonal copy number profile") parser.add_argument("--ascat_dist_metric", help="Type of distance metric used when fitting the ASCAT profile, before BB specific fitting") parser.add_argument("--min_ploidy", help="Minimum ploidy to be considered") parser.add_argument("--max_ploidy", help="Maximum ploidy to be considered") parser.add_argument("--min_rho", help="Minimum rho (cellularity) to be considered") parser.add_argument("--min_goodness_of_fit", help="Minimum goodness of fit to be allowed") parser.add_argument("--balanced_threshold", help="TODO") # TODO: explain this option parser.add_argument("--problemlocifile", help="File containing problem loci") # SNP6 options parser.add_argument("--min_count", type=int, help="Minimum read count") # TODO: shouldn't this be added to the WGS version?? => Added to WGS, remove from SNP6 now? parser.add_argument("--fill_in_snps", type=int, choices=[0,1], help="Fill in SNPs, should be either 0 or 1") # TODO: still used? parser.add_argument("--heterozygous_filter", type=str, help="Heterozygous filter to be used") # TODO: still used? parser.add_argument("--use_tumour_snps", type=int, choices=[0,1], help="Use tumour SNPs, either 0 or 1") # TODO: still used? parser.add_argument("--use_het_snps_only", type=int, choices=[0,1], help="Use heterozygous SNPs only, either 0 or 1") # TODO: still used? parser.add_argument("--hom_caveman_snps", type=str, help="File containing Caveman called homozygous SNPs") # TODO: still used? parser.add_argument("--use_loci_file", type=str, help="File containing loci locations") # TODO: still used? parser.add_argument("--snppos", type=str, help="File SNP6 SNP locations") parser.add_argument("--gc_snp6", type=str, help="File SNP6 SNP GC content information") parser.add_argument("--anno", type=str, help="File with SNP annotation information") parser.add_argument("--snp6_ref_info_file", type=str, help="Full path to file with SNP6 reference info") # TODO: better explanation parser.add_argument("--birdseed_report_file", type=str, help="Name of birdseed output file") parser.add_argument("--apt_probeset_geno_exe", type=str, help="Full path to apt_probeset_genotype exe from AFFY tools") parser.add_argument("--apt_probeset_summ_exe", type=str, help="Full path to apt_probeset_summarise exe from AFFY tools") parser.add_argument("--norm_geno_clust_exe", type=str, help="Full path to normalise_genotype_clusters exe from PennCNV") # cgpBB options parser.add_argument("-t", type=int, help="Number of threads to use") parser.add_argument("--genome_index", type=str, help="Full path to a reference genome index FAI file") parser.add_argument("--protocol", type=str, choices=["WGS"], help="Sequencing protocol used") parser.add_argument("--ignore_file", type=str, help="File with chromosomes to ignore") parser.set_defaults(ss=False, t=1, version="2.0.0", type="wgs", \ imputeinfofile=IMPUTEINFOFILE, impute_exe=IMPUTE_EXE, g1000_prefix_alleles=G1000_PREFIX, phasing_gamma=PHASING_GAMMA, \ segmentation_gamma=SEGMENTATION_GAMMA, clonality_dist_metric=CLONALITY_DIST_METRIC, ascat_dist_metric=ASCAT_DIST_METRIC, \ min_ploidy=MIN_PLOIDY, max_ploidy=MAX_PLOIDY, min_rho=MIN_RHO, min_goodness_of_fit=MIN_GOODNESS_OF_FIT, \ balanced_threshold=BALANCED_THRESHOLD, problemlocifile=PROBLEMLOCIFILE, min_count=MIN_COUNT, fill_in_snps=FILL_IN_SNPS, \ heterozygous_filter=HETEROZYGOUS_FILTER, use_tumour_snps=USE_TUMOUR_SNPS, use_het_snps_only=USE_HETEROZYGOUS_SNPS_ONLY, \ hom_caveman_snps=HOMOZYGOUS_CAVEMAN_CALLS_FILE, use_loci_file=USE_LOCI_FILE, snppos=SNPPOS, gc_snp6=GC_SNP6, anno=ANNO_FILE, \ snp6_ref_info_file=SNP6_REF_INFO_FILE, birdseed_report_file=BIRDSEED_REPORT_FILE, apt_probeset_geno_exe=APT_PROBESET_GENOTYPE_EXE, \ apt_probeset_summ_exe=APT_PROBESET_SUMMARIZE_EXE, norm_geno_clust_exe=NORM_GENO_CLUST_EXE, genome_index=GENOME_INDEX, \ protocol=PROTOCOL, ignore_file=IGNORE_FILE, g1000_prefix_loci=G1000_ALLELES_PREFIX) args = parser.parse_args() ''' ####################################################################### Setting Gamma platform parameter according to BB type ####################################################################### ''' if args.type=="wgs" or args.type=="WGS": platform_gamma = PLATFORM_GAMMA_WGS elif args.type=="snp6" or args.type=="SNP6": platform_gamma = PLATFORM_GAMMA_SNP6 else: print("Unknown BB type supplied, do not know how to set the platform gamma parameter") sys.exit(1) ''' ####################################################################### Checking for BB type and version compatibility ####################################################################### ''' if (args.type=="wgs" or args.type=="WGS"): if (args.version=="1.0"): pipe_dir = PIPE_DIR_WGS_1_0 pipe_exe = PIPE_DIR_WGS_1_0_EXE pipe_rerun_exe = PIPE_DIR_WGS_1_0_RERUN_EXE pipe_rerun_manual_exe = PIPE_DIR_WGS_1_0_RERUN_MANUAL_EXE elif (args.version=="1.1"): print("not implemented") elif (args.version=="2.0.0"): pipe_dir = PIPE_DIR_WGS_2_0_0 pipe_exe = PIPE_DIR_WGS_2_0_0_EXE pipe_rerun_exe = PIPE_DIR_WGS_2_0_0_RERUN_EXE pipe_rerun_manual_exe = PIPE_DIR_WGS_2_0_0_RERUN_MANUAL_EXE elif (args.version=="dev"): pipe_dir = PIPE_DIR_WGS_DEV pipe_exe = PIPE_DIR_WGS_DEV_EXE pipe_rerun_exe = PIPE_DIR_WGS_DEV_RERUN_EXE pipe_rerun_manual_exe = PIPE_DIR_WGS_DEV_RERUN_MANUAL_EXE elif (args.version=="cgp"): # Set bogus values, not needed for this version pipe_dir = "" pipe_exe = "" pipe_rerun_exe = "" pipe_rerun_manual_exe = "" else: print("Unsupported BB WGS version supplied") sys.exit(1) elif (args.type=="snp6" or args.type=="SNP6"): if (args.version=="1.0"): pipe_dir = PIPE_DIR_SNP6_1_0 pipe_exe = PIPE_DIR_SNP6_1_0_EXE pipe_rerun_exe = PIPE_DIR_SNP6_1_0_RERUN_EXE pipe_rerun_manual_exe = PIPE_DIR_SNP6_1_0_RERUN_MANUAL_EXE elif (args.version=="2.0.0"): pipe_dir = PIPE_DIR_SNP6_2_0_0 pipe_exe = PIPE_DIR_SNP6_2_0_0_EXE pipe_rerun_exe = PIPE_DIR_SNP6_2_0_0_RERUN_EXE pipe_rerun_manual_exe = PIPE_DIR_SNP6_2_0_0_RERUN_MANUAL_EXE elif (args.version=="dev"): pipe_dir = PIPE_DIR_SNP6_DEV pipe_exe = PIPE_DIR_SNP6_DEV_EXE pipe_rerun_exe = PIPE_DIR_SNP6_DEV_RERUN_EXE pipe_rerun_manual_exe = PIPE_DIR_SNP6_DEV_RERUN_MANUAL_EXE else: print("Unsupported BB SNP6 version supplied") sys.exit(1) runscripts = [] ''' ####################################################################### Read in input data ####################################################################### ''' if (args.ss): # Read in samplesheet ss = read_sample_infile(args.i) else: ss = read_basic_sample_infile(args.i, args.r) # For every entry: for sample in ss.getSamplenames(): print(sample) tn_pair = ss.getTumour2NormalPairingBam(sample) for tb,nb in tn_pair: tumourid = ss.getIdByTumourBam(tb) normalid = ss.getNormals(sample)[0] # TODO: what happens when multiple normals? run_dir = path.joinpath(args.r, tumourid) log_dir = path.joinpath(args.r, tumourid, "logs") if not run_dir.exists(): run_dir.makedirs() if not log_dir.exists(): log_dir.makedirs() ''' ####################################################################### Create the BB config master object ####################################################################### ''' bb_conf = bb_pipeline_config(pipe_type=args.type, pipe_version=args.version, pipe_dir=pipe_dir, \ tumour_file=tb, normal_file=nb, run_dir=run_dir, log_dir=log_dir, samplename=sample, gender=ss.getSex(sample), \ tumour_id=tumourid, normal_id=normalid, impute_info=args.imputeinfofile, impute_exe=args.impute_exe, \ g1000_loci_dir=args.g1000_prefix_loci, platform_gamma=platform_gamma, phasing_gamma=args.phasing_gamma, \ segmentation_gamma=args.segmentation_gamma, clonality_dist_metric=args.clonality_dist_metric, min_count=args.min_count, \ ascat_dist_metric=args.ascat_dist_metric, min_ploidy=args.min_ploidy, max_ploidy=args.max_ploidy, \ min_rho=args.min_rho, min_goodness_of_fit=args.min_goodness_of_fit, balanced_threshold=args.balanced_threshold, \ prob_loci_file=args.problemlocifile, heterozygous_filter=args.heterozygous_filter, use_tumour_snps=args.use_tumour_snps, \ use_het_snps_only=args.use_het_snps_only, hom_caveman_file=args.hom_caveman_snps, use_loci_file=args.use_loci_file, \ snppos_file=args.snppos, gc_snp6_file=args.gc_snp6, snp6_anno_file=args.anno, snp6_ref_info_file=args.snp6_ref_info_file, \ birdseed_report_file=args.birdseed_report_file, apt_probeset_geno_exe=args.apt_probeset_geno_exe, \ apt_probeset_summ_exe=args.apt_probeset_summ_exe, norm_geno_clust_exe=args.norm_geno_clust_exe, \ fill_in_snps=args.fill_in_snps, threads=args.t, genome_index=args.genome_index, protocol=args.protocol, \ ignore_file=args.ignore_file, g1000_alleles_dir=args.g1000_prefix_alleles) ''' ####################################################################### Set up the pipelines ####################################################################### ''' if (args.type == "wgs"): if (args.version=="cgp"): runscript = generateBattenbergPipeline_CGPIT(bb_conf) else: runscript, _, _ = generateBattenbergPipeline_WGS(bb_conf, pipe_exe, pipe_rerun_exe, pipe_rerun_manual_exe) elif (args.type == "snp6"): runscript, _, _ = generateBattenbergPipeline_SNP6(bb_conf, pipe_exe, pipe_rerun_exe, pipe_rerun_manual_exe) runscripts.append(runscript) # Create a master script scriptname = path.joinpath(args.r, "RunCommands.sh") runscript = open(scriptname, 'w') for item in runscripts: runscript.write(item+"\n") runscript.close() # Make executable st = os.stat(scriptname) os.chmod(scriptname, st.st_mode | stat.S_IEXEC)
''), filename)), }) abort(403) else: # DELETE fpath = asset_for(request.json['url']) if fpath: sh.rm(request.json['url']) return 'ok' abort(404) Babel(app) path_ = path.joinpath(app.config['UPLOAD_FOLDER']) if not path_.exists(): sh.mkdir('-p', path_) from json import JSONEncoder import datetime class DynamicJSONEncoder(JSONEncoder): """ JSON encoder for custom classes: Uses __json__() method if available to prepare the object. Especially useful for SQLAlchemy models """ def default(self, o):
def generateDPrunScript(run_dir, dp_in_dir, dp_master_file, projectname): ''' This function will create a series of shell scripts that make life easier when running dirichlet clustering pipelines. within the directory that is supplied by the run_dir parameter this will be created: - submit.block.sh : for the block parallel tree based method, when run this creates two LSF jobs for the trees and cons phase respectively - submit.nd.sh : for running the nD clustering, when run this creates a single LSF job - resubmit.block.sh : for resubmitting the cons step of the tree based method, when run this creates a single LSF job for the cons phase These would ideally never be called from the command line and only through the other scripts: - R wrapper script RunBlockTreeDP_trees.sh - R wrapper script RunBlockTreeDP_cons.sh ''' ''' Write block parallel run scripts ''' # Write R wrappers first #lines = _get_r_wrapper_lines("trees") #_write_script(path.joinpath(run_dir, "RunBlockTreeDP_trees.sh"), lines) #lines = _get_r_wrapper_lines("cons") #_write_script(path.joinpath(run_dir, "RunBlockTreeDP_cons.sh"), lines) # Write wrapper around R wrappers ''' Example SAMPLE=$1 QUEUE="basement" JOBNAME="tree_pros" ACCEPTEDHOSTS="-m vr-2-3-10 vr-2-3-02 vr-2-3-05 vr-2-3-08 vr-2-3-15 vr-2-3-13" LIBPATH="~/repo/dirichlet/dp_combined/" PARAMS="${LIBPATH} ${SAMPLE} 200 30 /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/Data/ /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/prostate_mets.txt tree_dp true 5 0.05" NBLOCKS=10 MEMTREE=75000 MEMCONS=75000 bsub -M ${MEMTREE} -R "select[mem>${MEMTREE}] rusage[mem=${MEMTREE}] span[hosts=1]" -n 5 -J "${JOBNAME}_t[1-${NBLOCKS}]" -q "${QUEUE}" -o $PWD/logs/${JOBNAME}_t.%J.%I.out -e $PWD/logs/${JOBNAME}_t.%J.%I.err "${LIBPATH}/RunBlockTreeDP_trees.sh ${PARAMS}" bsub -w"ended(${JOBNAME}_t[1-${NBLOCKS}])" -M ${MEMCONS} -R "select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]" -J "${JOBNAME}_c" -q "${QUEUE}" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err "${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}" ''' lines = [ "SAMPLE=$1", "QUEUE=\"basement\"", merge_items(["JOBNAME=\"tree_", projectname, "\""], sep=""), merge_items(["LIBPATH=\"", LIBPATH, "\""], sep=""), merge_items([ "PARAMS=\"${LIBPATH} ${SAMPLE} 200 30", dp_in_dir, dp_master_file, "tree_dp true 5 0.05\"" ]), "NBLOCKS=\"10\"", "MEMTREE=\"15000\"", "MEMCONS=\"15000\"", merge_items([ "bsub -M ${MEMTREE} -R \"select[mem>${MEMTREE}] rusage[mem=${MEMTREE}] span[hosts=1]\" -n 5 -J \"${JOBNAME}_t[1-${NBLOCKS}]\" -q \"${QUEUE}\" -o $PWD/logs/${JOBNAME}_t.%J.%I.out -e $PWD/logs/${JOBNAME}_t.%J.%I.err \"${LIBPATH}/RunBlockTreeDP_trees.sh ${PARAMS}\"" ]), merge_items([ "bsub -w\"ended(${JOBNAME}_t[1-${NBLOCKS}])\" -M ${MEMCONS} -R \"select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]\" -J \"${JOBNAME}_c\" -q \"${QUEUE}\" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err \"${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}\"" ]) ] _write_script(path.joinpath(run_dir, "submit.block.sh"), lines) ''' Write resubmit block parallel run script Example: resubmit.prostate_mets.block.sh SAMPLE=$1 QUEUE="long" JOBNAME="tree_pros" ACCEPTEDHOSTS="-m vr-2-3-10 vr-2-3-02 vr-2-3-05 vr-2-3-08 vr-2-3-15 vr-2-3-13" LIBPATH="~/repo/dirichlet/dp_combined/" PARAMS="${LIBPATH} ${SAMPLE} 200 30 /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/Data/ /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/prostate_mets.txt tree_dp true 5 0.05" NBLOCKS=10 MEMCONS=75000 bsub -M ${MEMCONS} -R "select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]" -J "${JOBNAME}_c" -q "${QUEUE}" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err "${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}" ''' lines = [ "SAMPLE=$1", "QUEUE=\"basement\"", merge_items(["JOBNAME=\"tree_", projectname, "\""], sep=""), merge_items(["LIBPATH=\"", LIBPATH, "\""], sep=""), merge_items([ "PARAMS=\"${LIBPATH} ${SAMPLE} 200 30", dp_in_dir, dp_master_file, "tree_dp true 5 0.05\"" ]), "NBLOCKS=\"10\"", "MEMCONS=\"15000\"", merge_items([ "bsub -w\"ended(${JOBNAME}_t[1-${NBLOCKS}])\" -M ${MEMCONS} -R \"select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]\" -J \"${JOBNAME}_c\" -q \"${QUEUE}\" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err \"${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}\"" ]) ] _write_script(path.joinpath(run_dir, "resubmit.block.sh"), lines) ''' Write the nD run script Example: submit.prostate_mets.nd.sh QUEUE="normal" JOBNAME="nd_pros" # -m "vr-2-3-10 vr-2-3-02 vr-2-3-05 vr-2-3-08 vr-2-3-15 vr-2-3-13" CMD="Rscript ~/repo/dirichlet/dp_combined/RunDP_pipeline.R" PARAMS="1 1250 250 /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/Data/ /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/prostate_mets.txt nd_dp false 1 NA 1 1" MEMORY="17000" bsub -J ${JOBNAME} -q ${QUEUE} -M ${MEMORY} -R 'span[hosts=1] select[mem>'${MEMORY}'] rusage[mem='${MEMORY}']' -o $PWD/logs/${JOBNAME}.%J.out -e $PWD/logs/${JOBNAME}.%J.err "${CMD} ${PARAMS}" ''' lines = [ "SAMPLE=$1", "QUEUE=\"normal\"", merge_items(["JOBNAME=\"nd_", projectname, "\""], sep=""), merge_items(["CMD=\"", SCRIPT, "\""], sep=""), merge_items([ "PARAMS=\"${SAMPLE} 1250 250", dp_in_dir, dp_master_file, "nd_dp false 1 NA 1 1\"" ]), "MEMORY=\"15000\"", "bsub -J ${JOBNAME} -q ${QUEUE} -M ${MEMORY} -R 'span[hosts=1] select[mem>'${MEMORY}'] rusage[mem='${MEMORY}']' -o $PWD/logs/${JOBNAME}.%J.out -e $PWD/logs/${JOBNAME}.%J.err \"${CMD} ${PARAMS}\"" ] _write_script(path.joinpath(run_dir, "submit.nd.sh"), lines)
def dp_preprocessing_icgc_pipeline(samplename, vcf_file, baf_file, hap_info_prefix, hap_info_suffix, subclone_file, rho_psi_file, fai_file, ignore_file, gender, bb_dir, log_dir, run_dir): ''' Simple pipeline for ICGC that runs from allele counts in a VCF file. It does not do any mutation phasing. ''' # Setup a pipeline script for this sample runscript = path.joinpath(run_dir, "RunCommands_" + samplename + ".sh") outf = open(runscript, 'w') # Set output names of the various steps # If the pipeline is to be run in splits per chromosome output files should be named different afloci_file_postfix = "_loci.txt" ''' ########################################################### Get loci ########################################################### ''' # Generate the loci file from vcf cmd = createGenerateAFLociCmd(samplename, afloci_file_postfix, vcf_file, run_dir) outf.write( generateBsubCmd("loci_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=None, isArray=False) + "\n") ''' ########################################################### Dump Counts ########################################################### ''' # Dump allele counts from the Sanger pipeline cmd = createDumpCountsSangerCmd(samplename, vcf_file, run_dir) outf.write( generateBsubCmd("dumpCounts_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=None, isArray=False) + "\n") ''' ########################################################### Generate DP input ########################################################### ''' cmd = createDpInputCmd(samplename, samplename + afloci_file_postfix, samplename + "_alleleFrequency.txt", subclone_file, rho_psi_file, "NA", "NA", gender, bb_dir, run_dir) outf.write( generateBsubCmd( "dpIn_" + samplename, log_dir, cmd, queue="normal", mem=2, depends=["loci_" + samplename, "dumpCounts_" + samplename], isArray=False) + "\n") ''' ########################################################### DP input to VCF ########################################################### ''' cmd = createDpIn2VcfCmd( vcf_file, path.joinpath(run_dir, samplename + "_allDirichletProcessInfo.txt"), path.joinpath(run_dir, samplename + ".dpIn.vcf"), fai_file, ignore_file) outf.write( generateBsubCmd("dpIn2Vcf_" + samplename, log_dir, cmd, queue="normal", mem=2, depends=["dpIn_" + samplename], isArray=False) + "\n") outf.close() # Make executable st = os.stat(runscript) os.chmod(runscript, st.st_mode | stat.S_IEXEC) return (runscript)
def main(argv): parser = argparse.ArgumentParser( prog='Dirichlet_preprocessing pipeline', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-s", required=True, type=str, help="Sample sheet that contains a line per sample") parser.add_argument("-r", required=True, type=str, help="Directory where the pipeline will be run") # Optional variables parser.add_argument( "--split_chroms", action="store_true", help="Split data per chromosome for quicker processing") parser.add_argument("-f", type=str, help="Full path to a Fasta index file") parser.add_argument( "-i", type=str, help="Full path to file with chromosome names to ignore") parser.add_argument( "--ip", type=str, help= "Full path to file with chromsome names to ignore ONLY when phasing") parser.add_argument("--icgc", action="store_true", help="Generate ICGC pipeline") # Parameters parser.add_argument("--min_baq", type=int, help="Minimum BAQ for a base to be included") parser.add_argument("--min_maq", type=int, help="Minimum MAQ for a base to be included") parser.add_argument( "--max_distance", type=int, help= "Maximum distance for a pair of mutations to be considered for phasing. Use when either mut_mut or mut_cn phasing" ) parser.set_defaults(min_baq=10, min_maq=10, max_distance=700, debug=False, f=CHROMS_FAI, i=IGNORE_FILE, ip=IGNORE_FILE_PHASE, split_chroms=False, icgc=False) args = parser.parse_args() # Determine number of chromosomes chroms = [line.strip().split("\t")[0] for line in open(args.f, 'r')] chroms_ignore = [line.strip() for line in open(args.i, 'r')] chroms_ignore_phase = [line.strip() for line in open(args.ip, 'r')] #print(chroms) no_chroms = 0 no_chroms_phasing = 0 for chrom in chroms: if not chrom in chroms_ignore: no_chroms = no_chroms + 1 if not chrom in chroms_ignore_phase: no_chroms_phasing = no_chroms_phasing + 1 # read in a samplesheet samples = _readSampleSheet(args.s) runscripts_sample = [] for i in range(0, len(samples)): samplename = samples[i][0] print(samplename) vcf_file = samples[i][1] bam_file = samples[i][2] bai_file = samples[i][3] bb_dir = samples[i][4] gender = samples[i][5] baf_file = samples[i][6] subclone_file = samples[i][7] rho_psi_file = samples[i][8] hap_info_prefix = samples[i][9] hap_info_suffix = samples[i][10] run_dir = path.joinpath(args.r, samplename) log_dir = path.joinpath(run_dir, "logs") if not run_dir.exists(): run_dir.mkdir() log_dir.mkdir() if (args.icgc): runscript = dp_preprocessing_icgc_pipeline( samplename=samplename, vcf_file=vcf_file, fai_file=args.f, ignore_file=args.i, baf_file=baf_file, hap_info_prefix=hap_info_prefix, hap_info_suffix=hap_info_suffix, subclone_file=subclone_file, rho_psi_file=rho_psi_file, gender=gender, bb_dir=bb_dir, log_dir=log_dir, run_dir=run_dir) else: runscript = dp_preprocessing_pipeline( samplename=samplename, vcf_file=vcf_file, fai_file=args.f, ignore_file=args.i, no_chroms=no_chroms, no_chroms_phasing=no_chroms_phasing, bam_file=bam_file, bai_file=bai_file, baf_file=baf_file, hap_info_prefix=hap_info_prefix, hap_info_suffix=hap_info_suffix, subclone_file=subclone_file, rho_psi_file=rho_psi_file, max_distance=args.max_distance, gender=gender, bb_dir=bb_dir, log_dir=log_dir, run_dir=run_dir, split_chroms=args.split_chroms) runscripts_sample.append(runscript) # Create a master script that contains pointers to all sample specific runscripts scriptname = path.joinpath(args.r, "..", "RunCommands.sh") runscript = open(scriptname, 'w') for item in runscripts_sample: print(item) runscript.write(item + "\n") runscript.close() # Make executable st = os.stat(scriptname) os.chmod(scriptname, st.st_mode | stat.S_IEXEC)
def test_joinpath_on_class(self): "Construct a path from a series of strings" foo_bar = path.joinpath("foo", "bar") assert foo_bar == p(nt="foo\\bar", posix="foo/bar")
import json import os import unittest from mock import Mock from path import path from xblock.field_data import DictFieldData from xmodule.x_module import ModuleSystem, XModuleDescriptor, XModuleMixin from xmodule.modulestore.inheritance import InheritanceMixin from xmodule.mako_module import MakoDescriptorSystem # Location of common test DATA directory # '../../../../edx-platform/common/test/data/' MODULE_DIR = path(__file__).dirname() DATA_DIR = path.joinpath(*MODULE_DIR.splitall()[:-4]) / 'test/data/' open_ended_grading_interface = { 'url': 'blah/', 'username': '******', 'password': '******', 'staff_grading': 'staff_grading', 'peer_grading': 'peer_grading', 'grading_controller': 'grading_controller' } def get_test_system(course_id=''): """ Construct a test ModuleSystem instance.
import panda3d.core as p3d import scenesim from path import path import logging # load panda configuration ROOT_PATH = path(__path__[0]).joinpath("../../").abspath() p3d.loadPrcFile(path.joinpath(ROOT_PATH, "Config.prc")) def get_path(name): return ROOT_PATH.joinpath(p3d.ConfigVariableString(name, "").get_value()) CPO_PATH = get_path("cpo-path") RENDER_PATH = get_path("render-path") SIM_PATH = get_path("sim-path") SIM_SCRIPT_PATH = get_path("sim-script-path") RENDER_SCRIPT_PATH = get_path("render-script-path") EXP_PATH = get_path("experiment-path") DATA_PATH = get_path("data-path") EGG_PATH = get_path("egg-path") TEXTURE_PATH = get_path("texture-path") BIN_PATH = get_path("bin-path") FIG_PATH = get_path("figures-path") RESULTS_PATH = get_path("results-path") p3d.getModelPath().appendDirectory(EGG_PATH) p3d.getModelPath().appendDirectory(TEXTURE_PATH) LOGLEVEL = p3d.ConfigVariableString("loglevel", "warn").get_value().upper()
IMPUTE_EXE='impute2' #/lustre/scratch110/sanger/sd11/epitax/battenberg/PD7404a/impute_v2.2.2_x86_64_static/impute2' IMPUTEINFOFILE='/lustre/scratch110/sanger/sd11/Documents/GenomeFiles/battenberg_impute_v3/impute_info.txt' G1000_PREFIX="/lustre/scratch110/sanger/sd11/Documents/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr" G1000_ALLELES_PREFIX="/lustre/scratch110/sanger/sd11/Documents/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr" PROBLEMLOCIFILE='/lustre/scratch110/sanger/sd11/Documents/GenomeFiles/battenberg_probloci/probloci.txt' ''' ################################################################################################################# Versions ################################################################################################################# ''' PIPE_BASE_DIR="/nfs/users/nfs_s/sd11/software/pipelines/" PIPE_DIR_WGS_1_0=PIPE_BASE_DIR+'battenberg_v1.0' PIPE_DIR_WGS_1_0_EXE=path.joinpath(PIPE_DIR_WGS_1_0, "RunCommands.sh") PIPE_DIR_WGS_1_0_RERUN_EXE=path.joinpath(PIPE_DIR_WGS_1_0, "RunCommandsRerunFitCopynumber.sh") PIPE_DIR_WGS_1_0_RERUN_MANUAL_EXE=path.joinpath(PIPE_DIR_WGS_1_0, "RunCommandsRerunFitCopynumberManual.sh") PIPE_DIR_WGS_2_0_0=PIPE_BASE_DIR+"battenberg_v2.0.0/" PIPE_DIR_WGS_2_0_0_EXE=path.joinpath(PIPE_DIR_WGS_2_0_0, "RunCommands.sh") PIPE_DIR_WGS_2_0_0_RERUN_EXE=path.joinpath(PIPE_DIR_WGS_2_0_0, "RunCommandsRerunFitCopynumber.sh") PIPE_DIR_WGS_2_0_0_RERUN_MANUAL_EXE=path.joinpath(PIPE_DIR_WGS_2_0_0, "RunCommandsRerunFitCopynumberManual.sh") PIPE_DIR_WGS_DEV="/nfs/users/nfs_s/sd11/repo/battenberg" PIPE_DIR_WGS_DEV_EXE=path.joinpath(PIPE_DIR_WGS_DEV, "RunCommands.sh") PIPE_DIR_WGS_DEV_RERUN_EXE=path.joinpath(PIPE_DIR_WGS_DEV, "RunCommandsRerunFitCopynumber.sh") PIPE_DIR_WGS_DEV_RERUN_MANUAL_EXE=path.joinpath(PIPE_DIR_WGS_DEV, "RunCommandsRerunFitCopynumberManual.sh") PIPE_DIR_SNP6_1_0=PIPE_BASE_DIR+'battenberg_snp6_v1.0' PIPE_DIR_SNP6_1_0_EXE=path.joinpath(PIPE_DIR_SNP6_1_0, "RunCommands2014farm3_SNP6.sh")
import os import unittest from mock import Mock from path import path from xblock.field_data import DictFieldData from xmodule.x_module import ModuleSystem, XModuleDescriptor, XModuleMixin from xmodule.modulestore.inheritance import InheritanceMixin from xmodule.mako_module import MakoDescriptorSystem # Location of common test DATA directory # '../../../../edx-platform/common/test/data/' MODULE_DIR = path(__file__).dirname() DATA_DIR = path.joinpath(*MODULE_DIR.splitall()[:-4]) / 'test/data/' open_ended_grading_interface = { 'url': 'blah/', 'username': '******', 'password': '******', 'staff_grading' : 'staff_grading', 'peer_grading' : 'peer_grading', 'grading_controller' : 'grading_controller' } def get_test_system(course_id=''): """ Construct a test ModuleSystem instance.
def dp_preprocessing_pipeline(samplename, vcf_file, bam_file, bai_file, baf_file, hap_info_prefix, hap_info_suffix, subclone_file, rho_psi_file, fai_file, ignore_file, no_chroms, no_chroms_phasing, max_distance, gender, bb_dir, log_dir, run_dir, split_chroms): ''' Creates a list of commands that together form the preprocessing pipeline. It consists of 3 separate threads (a,b,c) that come together in the last step. 1) Get list of loci of interest from vcf file 2) Split the loci per chromosome 3a1) Obtain allele counts for each of the split loci files in parallel 3a2) Concatenate the allele counts 3b1) Perform mutation to mutation phasing for those pairs of mutations less then max_distance apart, per chromosome in parallel 3b2) Concatenate the mutation to mutation phasing files 3c1) Perform mutation to copynumber phasing for pairs less then max_distance apart, per chromosome in parallel 3c2) Concatenate the mutation to copynumber phasing files 4) Create the Dirichlet input file using all the above information ''' # Setup a pipeline script for this sample runscript = path.joinpath(run_dir, "RunCommands_" + samplename + ".sh") outf = open(runscript, 'w') # Set output names of the various steps # If the pipeline is to be run in splits per chromosome output files should be named different afloci_file_postfix = "_loci.txt" mut_cn_file_prefix = samplename + "_phased_mutcn_chr" if split_chroms: loci_file_prefix = samplename + "_loci_chr" #afloci_file_postfix = ".loci" af_file_prefix = samplename + "_alleleFrequency_chr" mut_mut_file_prefix = samplename + "_phasedmuts_chr" else: loci_file_prefix = samplename + "_loci" af_file_prefix = samplename + "_alleleFrequency" mut_mut_file_prefix = samplename + "_phasedmuts" # Generate the loci file from vcf cmd = createGenerateAFLociCmd(samplename, afloci_file_postfix, vcf_file, run_dir) outf.write( generateBsubCmd("loci_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=None, isArray=False) + "\n") # Split the loci file per chromosome cmd = createSplitLociCmd(samplename, samplename + afloci_file_postfix, samplename + "_loci_chr", ".txt", fai_file, ignore_file, run_dir) outf.write( generateBsubCmd("splitLoci_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=["loci_" + samplename], isArray=False) + "\n") # Get the allele frequencies cmd = createGetAlleleFrequencyCmd(samplename, loci_file_prefix, bam_file, af_file_prefix, run_dir, split_chroms) writeSimpleShellScript(run_dir, "RunGetAlleleFrequency_" + samplename + ".sh", [cmd]) cmd = path.joinpath(run_dir, "RunGetAlleleFrequency_" + samplename + ".sh") if split_chroms: outf.write( generateBsubCmd("allCount_" + samplename + _arrayJobNameExt(no_chroms), log_dir, cmd, queue="normal", mem=1, depends=["splitLoci_" + samplename], isArray=True) + "\n") else: outf.write( generateBsubCmd("allCount_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=["loci_" + samplename], isArray=False) + "\n") if split_chroms: # Merge the counts together into a single file infile_list = [ item[0] + str(item[1]) + item[2] for item in zip([samplename + "_alleleFrequency_chr"] * no_chroms, range(1, no_chroms + 1), [".txt"] * no_chroms) ] cmd = createConcatSplitFilesCmd(samplename, infile_list, samplename + "_alleleFrequency.txt", True, run_dir) outf.write( generateBsubCmd("concCounts_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=["allCount_" + samplename], isArray=False) + "\n") ''' ########################################################### Mut Mut Phasing ########################################################### ''' cmd = createMutMutPhasingCmd(samplename, loci_file_prefix, mut_mut_file_prefix, bam_file, bai_file, max_distance, bb_dir, run_dir, split_chroms) writeSimpleShellScript(run_dir, "RunMutMutPhasing_" + samplename + ".sh", [cmd]) cmd = path.joinpath(run_dir, "RunMutMutPhasing_" + samplename + ".sh") if split_chroms: outf.write( generateBsubCmd("mmp_" + samplename + _arrayJobNameExt(no_chroms), log_dir, cmd, queue="normal", mem=2, depends=["splitLoci_" + samplename], isArray=True) + "\n") else: outf.write( generateBsubCmd("mmp_" + samplename, log_dir, cmd, queue="normal", mem=2, depends=["loci_" + samplename], isArray=False) + "\n") if split_chroms: infile_list = [ item[0] + str(item[1]) + item[2] for item in zip([samplename + "_phasedmuts_chr"] * no_chroms, range(1, no_chroms + 1), [".txt"] * no_chroms) ] cmd = createConcatSplitFilesCmd(samplename, infile_list, samplename + "_phasedmuts.txt", True, run_dir) outf.write( generateBsubCmd("concMMP_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=["mmp_" + samplename], isArray=False) + "\n") ''' ########################################################### Mut CN Phasing ########################################################### ''' # Not used because the impute output doesnt contain chromsome information and therefore the R method that does the phasing can't split that data properly # hap_info_files are split per chromosome, if this is not a split run we need to concatenate these files # if not split_chroms: # # Delete the file if it already exists # hap_concat_file = path.joinpath(bb_dir, hap_info_prefix.strip("_chr")+hap_info_suffix) # if hap_concat_file.exists(): hap_concat_file.remove() # # # Concatenate all the split files # for chrom in range(1, no_chroms_phasing): # cmd = ["cat", path.joinpath(bb_dir, hap_info_prefix)+str(chrom)+hap_info_suffix, ">>", hap_concat_file] # _, _, _ = run_command(merge_items(cmd, sep=" ")) # # # Redefine prefix to remove the _chr bit # hap_info_prefix = hap_info_prefix.strip("_chr") cmd = createMutCnPhasingCmd(samplename, samplename + "_loci_chr", baf_file, hap_info_prefix, hap_info_suffix, mut_cn_file_prefix, bam_file, bai_file, max_distance, bb_dir, run_dir, split_chroms) writeSimpleShellScript(run_dir, "RunMutCnPhasing_" + samplename + ".sh", [cmd]) cmd = path.joinpath(run_dir, "RunMutCnPhasing_" + samplename + ".sh") # if split_chroms: # Note: We run this bit only for the autosomal chromosomes. The Y chrom can never be phased, while X is not as simple to do. outf.write( generateBsubCmd("mcp_" + samplename + _arrayJobNameExt(no_chroms_phasing), log_dir, cmd, queue="normal", mem=2, depends=["splitLoci_" + samplename], isArray=True) + "\n") # else: # outf.write(generateBsubCmd("mcp_"+samplename, log_dir, cmd, queue="normal", mem=10, depends=["loci_"+samplename], isArray=False) + "\n") # if split_chroms: # Note: We run this bit only for the autosomal chromosomes. The Y chrom can never be phased, while X is not as simple to do. infile_list = [ item[0] + str(item[1]) + item[2] for item in zip([samplename + "_phased_mutcn_chr"] * no_chroms, range(1, no_aut_chroms + 1), [".txt"] * no_chroms) ] cmd = createConcatSplitFilesCmd(samplename, infile_list, samplename + "_phasedmutCN.txt", True, run_dir) outf.write( generateBsubCmd("concMCP_" + samplename, log_dir, cmd, queue="normal", mem=1, depends=["mcp_" + samplename], isArray=False) + "\n") ''' ########################################################### Generate DP input ########################################################### ''' cmd = createDpInputCmd(samplename, samplename + afloci_file_postfix, samplename + "_alleleFrequency.txt", subclone_file, rho_psi_file, samplename + "_phasedmuts.txt", samplename + "_phasedmutCN.txt", gender, bb_dir, run_dir) if split_chroms: outf.write( generateBsubCmd("dpIn_" + samplename, log_dir, cmd, queue="normal", mem=2, depends=[ "concCounts_" + samplename, "concMMP_" + samplename, "concMCP_" + samplename ], isArray=False) + "\n") else: outf.write( generateBsubCmd("dpIn_" + samplename, log_dir, cmd, queue="normal", mem=2, depends=[ "allCount_" + samplename, "mmp_" + samplename, "concMCP_" + samplename ], isArray=False) + "\n") ''' ########################################################### DP input to VCF ########################################################### ''' cmd = createDpIn2VcfCmd( vcf_file, path.joinpath(run_dir, samplename + "_allDirichletProcessInfo.txt"), path.joinpath(run_dir, samplename + ".dpIn.vcf"), fai_file, ignore_file) outf.write( generateBsubCmd("dpIn2Vcf_" + samplename, log_dir, cmd, queue="normal", mem=2, depends=["dpIn_" + samplename], isArray=False) + "\n") outf.close() # Make executable st = os.stat(runscript) os.chmod(runscript, st.st_mode | stat.S_IEXEC) return (runscript)
def read_basic_sample_infile(infile, bb_run_dir): ''' Reads in a table: samplename\ttumour_bam\tnormal_bam\tgender Headers should be commented out with a # It checks whether a sample name is already known, if that is the case additional tumour ids and bams are saved, normals not, bb_dirs not Creates and returns a SampleSheet object. ''' f = open(infile, 'r') tumour_ids = dict() tumour_bam = dict() normal_ids = dict() normal_bam = dict() bb_dir = dict() sex = dict() variants = dict() tumour_normal_pairs_id = dict() tumour_normal_pairs_bam = dict() tumour_bam2tumour_id = dict() normal_bam2normal_id = dict() for line in f: l = line.strip() if l.startswith('#'): continue # Only four of the regular columns available c1, c3, c5, c7 = l.split("\t") if (c3.endswith(".bam")): c2 = re.sub("\.bam", "", path(c3).basename()) c4 = re.sub("\.bam", "", path(c5).basename()) elif (c3.endswith(".CEL")): c2 = re.sub("\.CEL", "", path(c3).basename()) c4 = re.sub("\.CEL", "", path(c5).basename()) else: c2 = path(c3).basename() c4 = path(c5).basename() c6 = path.joinpath(bb_run_dir, c1) c8 = "placeholder" if c1 in normal_ids.keys(): # Case add new tumour/normal to existing sample #print("Item "+c1+" found more than once in input file") #sys.exit(1) tumour_ids[c1] = tumour_ids[c1] + c2 tumour_bam[c1] = tumour_bam[c1] + c3 normal_ids[c1] = normal_ids[c1] + c4 normal_bam[c1] = normal_bam[c1] + c5 bb_dir[c1] = bb_dir[c1] + (c2, c6) sex[c1] = c7 variants[c1] = variants[c1] + c8 tumour_normal_pairs_id[c1] = tumour_normal_pairs_id[c1] + (c2, c4) tumour_normal_pairs_bam[c1] = tumour_normal_pairs_bam[c1] + (c3, c5) else: # Case new sample tumour_ids[c1] = [c2] tumour_bam[c1] = [c3] normal_ids[c1] = [c4] normal_bam[c1] = [c5] bb_dir[c1] = [(c2, c6)] sex[c1] = c7 variants[c1] = [c8] tumour_normal_pairs_id[c1] = [(c2, c4)] tumour_normal_pairs_bam[c1] = [(c3, c5)] tumour_bam2tumour_id[c3] = c2 normal_bam2normal_id[c5] = c4 f.close() ss = SampleSheet(normal_ids, tumour_ids, sex, normal_bam, tumour_bam, bb_dir, variants, tumour_normal_pairs_id, tumour_normal_pairs_bam, tumour_bam2tumour_id, normal_bam2normal_id) return ss