def load_jobs(self): for m in self.models: args = self.args.copy() args["--model"] = m job = Job(self.conf["app"]["phyml"], args, parent_ids=[self.nodeid], jobname="phyml-bionj") job.flag = "phyml" self.jobs.append(job) if self.lk_mode == "raxml": raxml_args = { "-f": "e", "-s": self.alg_basename, "-m": "PROTGAMMA%s" % m, "-n": self.alg_basename+"."+m, "-t": os.path.join(GLOBALS["tasks_dir"], job.jobid, self.alg_basename+"_phyml_tree.txt") } raxml_job = Job(self.conf["app"]["raxml"], raxml_args, parent_ids=[job.jobid], jobname="raxml-tree-optimize") raxml_job.dependencies.add(job) raxml_job.flag = "raxml" raxml_job.model = m self.jobs.append(raxml_job) log.log(26, "Models to test %s", self.models)
def load_jobs(self): appname = self.conf[self.confname]["_app"] args = self.args.copy() args["-in"] = pjoin(GLOBALS["input_dir"], self.alg_fasta_file) args["-out"] = "clean.alg.fasta" job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.add_input_file(self.alg_fasta_file) self.jobs.append(job)
def load_jobs(self): # Only one Muscle job is necessary to run this task appname = self.conf[self.confname]["_app"] args = OrderedDict(self.args) args["-in"] = pjoin(GLOBALS["input_dir"], self.multiseq_file) args["-out"] = "alg.fasta" job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.add_input_file(self.multiseq_file) self.jobs.append(job)
def load_jobs(self): # Only one Muscle job is necessary to run this task appname = self.conf[self.confname]["_app"] args = OrderedDict(self.args) args[''] = "%s %s" % (pjoin(GLOBALS["input_dir"], self.multiseq_file), "alg.fasta") job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.add_input_file(self.multiseq_file) self.jobs.append(job)
def load_jobs(self): appname = self.conf[self.confname]["_app"] # Only one Muscle job is necessary to run this task args = OrderedDict(self.args) args["-i"] = pjoin(GLOBALS["input_dir"], self.multiseq_file) args["-o"] = "alg.fasta" job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.cores = self.conf["threading"].get(appname, 1) job.add_input_file(self.multiseq_file) self.jobs.append(job)
def load_jobs(self): conf = self.conf for m in self.models: args = self.args.copy() args["--model"] = m bionj_job = Job(conf["app"]["phyml"], args, parent_ids=[self.nodeid]) bionj_job.jobname += "-bionj-" + m bionj_job.jobcat = "bionj" bionj_job.add_input_file(self.alg_phylip_file, bionj_job.jobdir) self.jobs.append(bionj_job) if self.lk_mode == "raxml": raxml_args = { "-f": "e", "-s": pjoin(bionj_job.jobdir, self.alg_phylip_file), "-m": "PROTGAMMA%s" % m, "-n": self.alg_phylip_file + "." + m, "-t": pjoin(bionj_job.jobdir, self.alg_phylip_file + "_phyml_tree.txt") } raxml_job = Job(conf["app"]["raxml"], raxml_args, parent_ids=[bionj_job.jobid]) raxml_job.jobname += "-lk-optimize" raxml_job.dependencies.add(bionj_job) raxml_job.model = m raxml_job.jobcat = "raxml" self.jobs.append(raxml_job)
def load_jobs(self): args = self.args.copy() args["-outfile"] = "mcoffee.fasta" alg_paths = [pjoin(GLOBALS["input_dir"], algid) for algid in self.all_alg_files] args["-aln"] = ' '.join(alg_paths) job = Job(self.conf["app"]["tcoffee"], args, parent_ids=self.parent_ids) for key in self.all_alg_files: job.add_input_file(key) self.jobs.append(job)
def load_jobs(self): appname = self.conf[self.confname]["_app"] args = OrderedDict(self.args) # Mafft redirects resulting alg to std.output. The order of # arguments is important, input file must be the last # one. args[""] = pjoin(GLOBALS["input_dir"], self.multiseq_file) job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.add_input_file(self.multiseq_file) job.cores = self.conf["threading"][appname] self.jobs.append(job)
def seq_reverser_job(multiseq_file, outfile, parent_ids, trimal_bin): """ Returns a job reversing all sequences in MSF or MSA. """ reversion_args = {"-in": multiseq_file, "-out": outfile, "-reverse": "", "-fasta": ""} job = Job(trimal_bin, reversion_args, "TrimalAlgReverser", parent_ids=parent_ids) return job
def load_jobs(self): # split the original set of sequences in clusters. uhire_args = { "--clumpfasta": "./", "--maxclump": "%s" % self.conf["uhire"]["_maxclump"], "--usersort": "", "--uhire": self.multiseq_file, } uhire_job = Job(self.conf["app"]["usearch"], uhire_args, "usearch-uhire", parent_ids=[self.nodeid]) # Builds a muscle alignment for each of those clusters. (This # is a special job to align all clumps independently. The # whole shell command is used as job binary, so it is very # important that there is no trailing lines at the end of the # command.) cmd = """ (mkdir clumpalgs/; for fname in %s/clump.* %s/master; do %s -in $fname -out clumpalgs/`basename $fname` -maxiters %s; done;) """ % (os.path.join( "../", uhire_job.jobid), os.path.join( "../", uhire_job.jobid), self.conf["app"]["muscle"], self.conf["uhire"]["_muscle_maxiters"]) alg_job = Job(cmd, {}, "uhire_muscle_algs", parent_ids=[uhire_job.jobid]) alg_job.dependencies.add(uhire_job) # Merge the cluster alignemnts into a single one umerge_args = { "--maxlen": self.conf["uhire"]["_max_seq_length"], "--mergeclumps": "../%s/clumpalgs/" % alg_job.jobid, "--output": "alg.fasta", } umerge_job = Job(self.conf["app"]["usearch"], umerge_args, "usearch-umerge", parent_ids=[alg_job.jobid]) umerge_job.dependencies.add(alg_job) # Add all jobs to the task queue queue self.jobs.extend([uhire_job, alg_job, umerge_job])
def load_jobs(self): appname = self.conf[self.confname]["_app"] args = OrderedDict(self.args) args["--model"] = self.model args["--datatype"] = self.seqtype args["--input"] = self.alg_phylip_file if self.constrain_tree: args["--constraint_tree"] = self.constrain_tree args["-u"] = self.constrain_tree else: del args["--constraint_tree"] job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.add_input_file(self.alg_phylip_file, job.jobdir) if self.constrain_tree: job.add_input_file(self.constrain_tree, job.jobdir) job.jobname += "-"+self.model self.jobs.append(job)
def load_jobs(self): conf = self.conf for m in self.models: args = self.args.copy() args["--model"] = m bionj_job = Job(conf["app"]["phyml"], args, parent_ids=[self.nodeid]) bionj_job.jobname += "-bionj-" + m bionj_job.jobcat = "bionj" bionj_job.add_input_file(self.alg_phylip_file, bionj_job.jobdir) self.jobs.append(bionj_job) if self.lk_mode == "raxml": raxml_args = { "-f": "e", "-s": pjoin(bionj_job.jobdir, self.alg_phylip_file), "-m": "PROTGAMMA%s" % m, "-n": self.alg_phylip_file+"."+m, "-t": pjoin(bionj_job.jobdir, self.alg_phylip_file+"_phyml_tree.txt") } raxml_job = Job(conf["app"]["raxml"], raxml_args, parent_ids=[bionj_job.jobid]) raxml_job.jobname += "-lk-optimize" raxml_job.dependencies.add(bionj_job) raxml_job.model = m raxml_job.jobcat = "raxml" self.jobs.append(raxml_job)
def load_jobs(self): for m in self.models: args = self.args.copy() args["--model"] = m job = Job(self.conf["app"]["phyml"], args, parent_ids=[self.nodeid], jobname="phyml-bionj") job.flag = "phyml" self.jobs.append(job) if self.lk_mode == "raxml": raxml_args = { "-f": "e", "-s": self.alg_basename, "-m": "PROTGAMMA%s" % m, "-n": self.alg_basename + "." + m, "-t": os.path.join(GLOBALS["tasks_dir"], job.jobid, self.alg_basename + "_phyml_tree.txt") } raxml_job = Job(self.conf["app"]["raxml"], raxml_args, parent_ids=[job.jobid], jobname="raxml-tree-optimize") raxml_job.dependencies.add(job) raxml_job.flag = "raxml" raxml_job.model = m self.jobs.append(raxml_job) log.log(26, "Models to test %s", self.models)
def load_jobs(self): appname = self.conf[self.confname]["_app"] args = OrderedDict(self.args) args["--model"] = self.model args["--datatype"] = self.seqtype args["--input"] = self.alg_phylip_file if self.constrain_tree: args["--constraint_tree"] = self.constrain_tree args["-u"] = self.constrain_tree else: del args["--constraint_tree"] job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.add_input_file(self.alg_phylip_file, job.jobdir) if self.constrain_tree: job.add_input_file(self.constrain_tree, job.jobdir) job.jobname += "-" + self.model self.jobs.append(job)
def load_jobs(self): args = self.args.copy() try: del args["-wag"] except KeyError: pass if self.constrain_tree: args["-constraints"] = pjoin(GLOBALS["input_dir"], self.constrain_tree) args[pjoin(GLOBALS["input_dir"], self.alg_phylip_file)] = "" appname = self.conf[self.confname]["_app"] job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid]) job.cores = self.conf["threading"][appname] if self.constrain_tree: job.add_input_file(self.constrain_tree) job.add_input_file(self.alg_phylip_file) self.jobs.append(job)
def load_jobs(self): readal_bin = self.conf["app"]["readal"] trimal_bin = self.conf["app"]["trimal"] input_dir = GLOBALS["input_dir"] multiseq_file = pjoin(input_dir, self.multiseq_file) multiseq_file_r = pjoin(input_dir, self.multiseq_file+"_reversed") first = seq_reverser_job(multiseq_file, multiseq_file_r, [self.nodeid], readal_bin) #print self.multiseq_file first.add_input_file(self.multiseq_file) self.jobs.append(first) all_alg_names = [] mcoffee_parents = [] for aligner_name in self.conf[self.confname]["_aligners"]: aligner_name = aligner_name[1:] _classname = APP2CLASS[self.conf[aligner_name]["_app"]] _module = __import__(CLASS2MODULE[_classname], globals(), locals(), [], -1) _aligner = getattr(_module, _classname) # Normal alg task1 = _aligner(self.nodeid, self.multiseq_file, self.seqtype, self.conf, aligner_name) task1.size = self.size self.jobs.append(task1) all_alg_names.append(task1.alg_fasta_file) # Alg of the reverse task2 = _aligner(self.nodeid, self.multiseq_file+"_reversed", self.seqtype, self.conf, aligner_name) task2.size = self.size task2.dependencies.add(first) self.jobs.append(task2) # Restore reverse alg reverse_out = pjoin(input_dir, task2.alg_fasta_file) task3 = seq_reverser_job(reverse_out, reverse_out+"_restored", [task2.taskid], readal_bin) task3.dependencies.add(task2) task3.add_input_file(task2.alg_fasta_file) all_alg_names.append(reverse_out+"_restored") self.jobs.append(task3) mcoffee_parents.extend([task1.taskid, task2.taskid]) # Combine signal from all algs using Mcoffee mcoffee_task = MCoffee(self.nodeid, self.seqtype, all_alg_names, self.conf, self.confname, parent_ids=mcoffee_parents) # reversed algs are not actually saved into db, but it should # be present since the reverser job is always executed mcoffee_task.dependencies.update(list(self.jobs)) self.jobs.append(mcoffee_task) if self.conf[self.confname]["_alg_trimming"]: trimming_cutoff = 1.0 / len(all_alg_names) targs = {} targs["-forceselect"] = pjoin(input_dir, mcoffee_task.alg_fasta_file) targs["-compareset"] = pjoin(input_dir, mcoffee_task.alg_list_file) targs["-out"] = "mcoffee.trimmed.fasta" targs["-fasta"] = "" targs["-ct"] = trimming_cutoff trim_job = Job(trimal_bin, targs, parent_ids=[mcoffee_task.taskid]) trim_job.jobname = "McoffeeTrimming" trim_job.dependencies.add(mcoffee_task) trim_job.alg_fasta_file = targs["-out"] for key in all_alg_names: trim_job.add_input_file(key) trim_job.add_input_file(mcoffee_task.alg_fasta_file) trim_job.add_input_file(mcoffee_task.alg_list_file) self.jobs.append(trim_job)
def load_jobs(self): args = OrderedDict(self.args) args["-s"] = pjoin(GLOBALS["input_dir"], self.alg_phylip_file) args["-m"] = self.model_string args["-n"] = self.alg_phylip_file if self.constrain_tree: log.log(24, "Using constrain tree %s" % self.constrain_tree) args["-g"] = pjoin(GLOBALS["input_dir"], self.constrain_tree) if self.partitions_file: log.log(24, "Using alg partitions %s" % self.partitions_file) args['-q'] = pjoin(GLOBALS["input_dir"], self.partitions_file) tree_job = Job(self.raxml_bin, args, parent_ids=[self.nodeid]) tree_job.jobname += "-" + self.model_string tree_job.cores = self.threads # Register input files necessary to run the job tree_job.add_input_file(self.alg_phylip_file) if self.constrain_tree: tree_job.add_input_file(self.constrain_tree) if self.partitions_file: tree_job.add_input_file(self.partitions_file) self.jobs.append(tree_job) self.out_tree_file = os.path.join( tree_job.jobdir, "RAxML_bestTree." + self.alg_phylip_file) if self.bootstrap == "alrt": alrt_args = tree_job.args.copy() if self.constrain_tree: del alrt_args["-g"] if self.partitions_file: alrt_args["-q"] = args['-q'] alrt_args["-f"] = "J" alrt_args["-t"] = self.out_tree_file alrt_job = Job(self.raxml_bin, alrt_args, parent_ids=[tree_job.jobid]) alrt_job.jobname += "-alrt" alrt_job.dependencies.add(tree_job) alrt_job.cores = self.threads # Register necessary input files alrt_job.add_input_file(self.alg_phylip_file) if self.partitions_file: alrt_job.add_input_file(self.partitions_file) self.jobs.append(alrt_job) self.alrt_job = alrt_job elif self.bootstrap == "alrt_phyml": alrt_args = { "-o": "n", "-i": self.alg_phylip_file, "--bootstrap": "-2", "-d": self.seqtype, "-u": self.out_tree_file, "--model": self.model, "--quiet": "", "--no_memory_check": "", } #if self.constrain_tree: # alrt_args["--constraint_tree"] = self.constrain_tree alrt_job = Job(self.conf["app"]["phyml"], alrt_args, parent_ids=[tree_job.jobid]) alrt_job.add_input_file(self.alg_phylip_file, alrt_job.jobdir) alrt_job.jobname += "-alrt" alrt_job.dependencies.add(tree_job) alrt_job.add_input_file(self.alg_phylip_file) self.jobs.append(alrt_job) self.alrt_job = alrt_job else: # Bootstrap calculation boot_args = tree_job.args.copy() boot_args["-n"] = "bootstraps." + boot_args["-n"] boot_args["-N"] = int(self.bootstrap) boot_args["-b"] = 31416 boot_job = Job(self.raxml_bin, boot_args, parent_ids=[tree_job.jobid]) boot_job.jobname += "-%d-bootstraps" % (boot_args['-N']) boot_job.dependencies.add(tree_job) boot_job.cores = self.threads # Register necessary input files boot_job.add_input_file(self.alg_phylip_file) if self.constrain_tree: boot_job.add_input_file(self.constrain_tree) if self.partitions_file: boot_job.add_input_file(self.partitions_file) self.jobs.append(boot_job) # Bootstrap drawing on top of best tree bootd_args = tree_job.args.copy() if self.constrain_tree: del bootd_args["-g"] if self.partitions_file: del bootd_args["-q"] bootd_args["-n"] = "bootstrapped." + tree_job.args["-n"] bootd_args["-f"] = "b" bootd_args["-t"] = self.out_tree_file bootd_args["-z"] = pjoin(boot_job.jobdir, "RAxML_bootstrap." + boot_job.args["-n"]) bootd_job = Job(self.raxml_bin, bootd_args, parent_ids=[tree_job.jobid]) bootd_job.jobname += "-bootstrapped" bootd_job.dependencies.add(boot_job) bootd_job.cores = self.threads self.jobs.append(bootd_job) self.boot_job = boot_job self.bootd_job = bootd_job
def load_jobs(self): args = OrderedDict(self.args) args["-s"] = pjoin(GLOBALS["input_dir"], self.alg_phylip_file) args["-m"] = self.model_string args["-n"] = self.alg_phylip_file if self.constrain_tree: log.log(24, "Using constrain tree %s" %self.constrain_tree) args["-g"] = pjoin(GLOBALS["input_dir"], self.constrain_tree) if self.partitions_file: log.log(24, "Using alg partitions %s" %self.partitions_file) args['-q'] = pjoin(GLOBALS["input_dir"], self.partitions_file) tree_job = Job(self.raxml_bin, args, parent_ids=[self.nodeid]) tree_job.jobname += "-"+self.model_string tree_job.cores = self.threads # Register input files necessary to run the job tree_job.add_input_file(self.alg_phylip_file) if self.constrain_tree: tree_job.add_input_file(self.constrain_tree) if self.partitions_file: tree_job.add_input_file(self.partitions_file) self.jobs.append(tree_job) self.out_tree_file = os.path.join(tree_job.jobdir, "RAxML_bestTree." + self.alg_phylip_file) if self.bootstrap == "alrt": alrt_args = tree_job.args.copy() if self.constrain_tree: del alrt_args["-g"] if self.partitions_file: alrt_args["-q"] = args['-q'] alrt_args["-f"] = "J" alrt_args["-t"] = self.out_tree_file alrt_job = Job(self.raxml_bin, alrt_args, parent_ids=[tree_job.jobid]) alrt_job.jobname += "-alrt" alrt_job.dependencies.add(tree_job) alrt_job.cores = self.threads # Register necessary input files alrt_job.add_input_file(self.alg_phylip_file) if self.partitions_file: alrt_job.add_input_file(self.partitions_file) self.jobs.append(alrt_job) self.alrt_job = alrt_job elif self.bootstrap == "alrt_phyml": alrt_args = { "-o": "n", "-i": self.alg_phylip_file, "--bootstrap": "-2", "-d": self.seqtype, "-u": self.out_tree_file, "--model": self.model, "--quiet": "", "--no_memory_check": "", } #if self.constrain_tree: # alrt_args["--constraint_tree"] = self.constrain_tree alrt_job = Job(self.conf["app"]["phyml"], alrt_args, parent_ids=[tree_job.jobid]) alrt_job.add_input_file(self.alg_phylip_file, alrt_job.jobdir) alrt_job.jobname += "-alrt" alrt_job.dependencies.add(tree_job) alrt_job.add_input_file(self.alg_phylip_file) self.jobs.append(alrt_job) self.alrt_job = alrt_job else: # Bootstrap calculation boot_args = tree_job.args.copy() boot_args["-n"] = "bootstraps."+boot_args["-n"] boot_args["-N"] = int(self.bootstrap) boot_args["-b"] = 31416 boot_job = Job(self.raxml_bin, boot_args, parent_ids=[tree_job.jobid]) boot_job.jobname += "-%d-bootstraps" %(boot_args['-N']) boot_job.dependencies.add(tree_job) boot_job.cores = self.threads # Register necessary input files boot_job.add_input_file(self.alg_phylip_file) if self.constrain_tree: boot_job.add_input_file(self.constrain_tree) if self.partitions_file: boot_job.add_input_file(self.partitions_file) self.jobs.append(boot_job) # Bootstrap drawing on top of best tree bootd_args = tree_job.args.copy() if self.constrain_tree: del bootd_args["-g"] if self.partitions_file: del bootd_args["-q"] bootd_args["-n"] = "bootstrapped."+ tree_job.args["-n"] bootd_args["-f"] = "b" bootd_args["-t"] = self.out_tree_file bootd_args["-z"] = pjoin(boot_job.jobdir, "RAxML_bootstrap." + boot_job.args["-n"]) bootd_job = Job(self.raxml_bin, bootd_args, parent_ids=[tree_job.jobid]) bootd_job.jobname += "-bootstrapped" bootd_job.dependencies.add(boot_job) bootd_job.cores = self.threads self.jobs.append(bootd_job) self.boot_job = boot_job self.bootd_job = bootd_job
def load_jobs(self): tree_job = Job(self.conf["app"]["jmodeltest"], self.args, parent_ids=[self.nodeid]) self.jobs.append(tree_job)