def control_c(_signal, _frame): signal.signal(signal.SIGINT, signal.SIG_IGN) db.commit() ver = {28: "0", 26: "1", 24: "2", 22: "3", 20: "4", 10: "5"} ver_level = log.level print '\n\nYou pressed Ctrl+C!' print 'q) quit' print 'v) change verbosity level:', ver.get(ver_level, ver_level) print 'd) enter debug mode' print 'c) continue execution' key = ask(" Choose:", ["q", "v", "d", "c"]) if key == "q": raise KeyboardInterrupt elif key == "d": signal.signal(signal.SIGALRM, debug) signal.alarm(1) return elif key == "v": vl = ask("new level", sorted(ver.values())) new_level = sorted(ver.keys(), reverse=True)[int(vl)] log.setLevel(new_level) elif key == "d": import pdb pdb.set_trace() signal.signal(signal.SIGINT, control_c)
def schedule(workflow_task_processor, pending_tasks, schedule_time, execution, debug, norender): # Adjust debug mode if debug == "all": log.setLevel(10) pending_tasks = set(pending_tasks) ## =================================== ## INITIALIZE BASIC VARS execution, run_detached = execution thread2tasks = defaultdict(list) for task in pending_tasks: thread2tasks[task.configid].append(task) expected_threads = set(thread2tasks.keys()) past_threads = {} thread_errors = defaultdict(list) ## END OF VARS AND SHORTCUTS ## =================================== cores_total = GLOBALS["_max_cores"] if cores_total > 0: job_queue = Queue() back_launcher = Process(target=background_job_launcher, args=(job_queue, run_detached, GLOBALS["launch_time"], cores_total)) back_launcher.start() else: job_queue = None back_launcher = None GLOBALS["_background_scheduler"] = back_launcher GLOBALS["_job_queue"] = job_queue # Captures Ctrl-C for debuging DEBUG #signal.signal(signal.SIGINT, control_c) last_report_time = None BUG = set() try: # Enters into task scheduling while pending_tasks: wtime = schedule_time # ask SGE for running jobs if execution == "sge": sgeid2jobs = db.get_sge_tasks() qstat_jobs = sge.qstat() else: qstat_jobs = None # Show summary of pending tasks per thread thread2tasks = defaultdict(list) for task in pending_tasks: thread2tasks[task.configid].append(task) set_logindent(0) log.log(28, "@@13: Updating tasks status:@@1: (%s)" % (ctime())) info_lines = [] for tid, tlist in thread2tasks.iteritems(): threadname = GLOBALS[tid]["_name"] sizelist = ["%s" %getattr(_ts, "size", "?") for _ts in tlist] info = "Thread @@13:%s@@1:: pending tasks: @@8:%s@@1: of sizes: %s" %( threadname, len(tlist), ', '.join(sizelist)) info_lines.append(info) for line in info_lines: log.log(28, line) if GLOBALS["email"] and last_report_time is None: last_report_time = time() send_mail(GLOBALS["email"], "Your NPR process has started", '\n'.join(info_lines)) ## ================================ ## CHECK AND UPDATE CURRENT TASKS checked_tasks = set() check_start_time = time() to_add_tasks = set() GLOBALS["cached_status"] = {} for task in sorted(pending_tasks, sort_tasks): # Avoids endless periods without new job submissions elapsed_time = time() - check_start_time #if not back_launcher and pending_tasks and \ # elapsed_time > schedule_time * 2: # log.log(26, "@@8:Interrupting task checks to schedule new jobs@@1:") # db.commit() # wtime = launch_jobs(sorted(pending_tasks, sort_tasks), # execution, run_detached) # check_start_time = time() # Enter debuging mode if necessary if debug and log.level > 10 and task.taskid.startswith(debug): log.setLevel(10) log.debug("ENTERING IN DEBUGGING MODE") thread2tasks[task.configid].append(task) # Update tasks and job statuses if task.taskid not in checked_tasks: try: show_task_info(task) task.status = task.get_status(qstat_jobs) db.dataconn.commit() if back_launcher and task.status not in set("DE"): for j, cmd in task.iter_waiting_jobs(): j.status = "Q" GLOBALS["cached_status"][j.jobid] = "Q" if j.jobid not in BUG: if not os.path.exists(j.jobdir): os.makedirs(j.jobdir) for ifile, outpath in j.input_files.iteritems(): try: _tid, _did = ifile.split(".") _did = int(_did) except (IndexError, ValueError): dataid = ifile else: dataid = db.get_dataid(_tid, _did) if not outpath: outfile = pjoin(GLOBALS["input_dir"], ifile) else: outfile = pjoin(outpath, ifile) if not os.path.exists(outfile): open(outfile, "w").write(db.get_data(dataid)) log.log(24, " @@8:Queueing @@1: %s from %s" %(j, task)) if execution: job_queue.put([j.jobid, j.cores, cmd, j.status_file]) BUG.add(j.jobid) update_task_states_recursively(task) db.commit() checked_tasks.add(task.taskid) except TaskError, e: log.error("Errors found in %s" %task) import traceback traceback.print_exc() if GLOBALS["email"]: threadname = GLOBALS[task.configid]["_name"] send_mail(GLOBALS["email"], "Errors found in %s!" %threadname, '\n'.join(map(str, [task, e.value, e.msg]))) pending_tasks.discard(task) thread_errors[task.configid].append([task, e.value, e.msg]) continue else: # Set temporary Queued state to avoids launching # jobs from clones task.status = "Q" if log.level < 24: show_task_info(task) if task.status == "D": #db.commit() show_task_info(task) logindent(3) # Log commands of every task if 'cmd_log_file' not in GLOBALS[task.configid]: GLOBALS[task.configid]['cmd_log_file'] = pjoin(GLOBALS[task.configid]["_outpath"], "cmd.log") O = open(GLOBALS[task.configid]['cmd_log_file'], "w") O.close() cmd_lines = get_cmd_log(task) CMD_LOG = open(GLOBALS[task.configid]['cmd_log_file'], "a") print >>CMD_LOG, task for c in cmd_lines: print >>CMD_LOG, ' '+'\t'.join(map(str, c)) CMD_LOG.close() # try: #wkname = GLOBALS[task.configid]['_name'] create_tasks = workflow_task_processor(task, task.target_wkname) except TaskError, e: log.error("Errors found in %s" %task) pending_tasks.discard(task) thread_errors[task.configid].append([task, e.value, e.msg]) continue else: logindent(-3) to_add_tasks.update(create_tasks) pending_tasks.discard(task) elif task.status == "E": log.error("task contains errors: %s " %task) log.error("Errors found in %s") pending_tasks.discard(task) thread_errors[task.configid].append([task, None, "Found (E) task status"])
def process_task(task, wkname, npr_conf, nodeid2info): cogconf, cogclass = npr_conf.cog_selector concatconf, concatclass = npr_conf.alg_concatenator treebuilderconf, treebuilderclass = npr_conf.tree_builder splitterconf, splitterclass = npr_conf.tree_splitter threadid, nodeid, seqtype, ttype = (task.threadid, task.nodeid, task.seqtype, task.ttype) cladeid, targets, outgroups = db.get_node_info(threadid, nodeid) if not treebuilderclass or task.size < 4: # Allows to dump algs in workflows with no tree tasks or if tree # inference does not make sense given the number of sequences. DummyTree # will produce a fake fully collapsed newick tree. treebuilderclass = DummyTree if outgroups and len(outgroups) > 1: constrain_id = nodeid else: constrain_id = None node_info = nodeid2info[nodeid] conf = GLOBALS[task.configid] new_tasks = [] if ttype == "cog_selector": # Generates a md5 id based on the genetree configuration workflow used # for the concat alg task. If something changes, concat alg will change # and the associated tree will be rebuilt config_blocks = set([wkname]) for key, value in conf[wkname].iteritems(): if isinstance(value, list) or isinstance(value, tuple) \ or isinstance(value, set): for elem in value: config_blocks.add(elem[1:]) if isinstance( elem, str) and elem.startswith("@") else None elif isinstance(value, str): config_blocks.add(value[1:]) if value.startswith("@") else None config_checksum = md5(''.join([ "[%s]\n%s" % (x, dict_string(conf[x])) for x in sorted(config_blocks) ])) # THIS PART HAS BEEN MOVED TO COG_SELECTOR TASK # Check that current selection of cogs will cover all target and # outgroup species #cog_hard_limit = int(conf[concatconf]["_max_cogs"]) #sp_repr = defaultdict(int) #for co in task.raw_cogs[:cog_hard_limit]: # for sp, seq in co: # sp_repr[sp] += 1 #missing_sp = (targets | outgroups) - set(sp_repr.keys()) #if missing_sp: # raise TaskError("missing species under current cog selection: %s" %missing_sp) #else: # log.log(28, "Analysis of current COG selection:") # for sp, ncogs in sorted(sp_repr.items(), key=lambda x:x[1]): # log.log(28, " % 30s species present in % 6d COGs" %(sp, ncogs)) # register concat alignment task. NodeId associated to concat_alg tasks # and all its children jobs should take into account cog information and # not only species and outgroups included. concat_job = concatclass(task.cogs, seqtype, conf, concatconf, config_checksum) db.add_node(threadid, concat_job.nodeid, cladeid, targets, outgroups) # Register Tree constrains constrain_tree = "(%s, (%s));" % (','.join( sorted(outgroups)), ','.join(sorted(targets))) _outs = "\n".join(map(lambda name: ">%s\n0" % name, sorted(outgroups))) _tars = "\n".join(map(lambda name: ">%s\n1" % name, sorted(targets))) constrain_alg = '\n'.join([_outs, _tars]) db.add_task_data(concat_job.nodeid, DATATYPES.constrain_tree, constrain_tree) db.add_task_data(concat_job.nodeid, DATATYPES.constrain_alg, constrain_alg) db.dataconn.commit() # since the creation of some Task objects # may require this info, I need to commit # right now. concat_job.size = task.size new_tasks.append(concat_job) elif ttype == "concat_alg": # register tree for concat alignment, using constraint tree if # necessary alg_id = db.get_dataid(task.taskid, DATATYPES.concat_alg_phylip) try: parts_id = db.get_dataid(task.taskid, DATATYPES.model_partitions) except ValueError: parts_id = None nodeid2info[nodeid]["size"] = task.size nodeid2info[nodeid]["target_seqs"] = targets nodeid2info[nodeid]["out_seqs"] = outgroups tree_task = treebuilderclass(nodeid, alg_id, constrain_id, None, seqtype, conf, treebuilderconf, parts_id=parts_id) tree_task.size = task.size new_tasks.append(tree_task) elif ttype == "tree": merger_task = splitterclass(nodeid, seqtype, task.tree_file, conf, splitterconf) merger_task.size = task.size new_tasks.append(merger_task) elif ttype == "treemerger": # Lets merge with main tree if not task.task_tree: task.finish() log.log(24, "Saving task tree...") annotate_node(task.task_tree, task) db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree)) db.commit() if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1: current_iter = get_iternumber(threadid) if npr_conf.max_iters and current_iter >= npr_conf.max_iters: log.warning("Maximum number of iterations reached!") else: # Add new nodes source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" ttree, mtree = task.task_tree, task.main_tree log.log(26, "Processing tree: %s seqs, %s outgroups", len(targets), len(outgroups)) target_cladeids = None if tobool(conf[splitterconf].get("_find_ncbi_targets", False)): tcopy = mtree.copy() ncbi.connect_database() tax2name, tax2track = ncbi.annotate_tree_with_taxa( tcopy, None) #tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, "fake") # for testing sptree example n2content = tcopy.get_cached_content() broken_branches, broken_clades, broken_clade_sizes, tax2name = ncbi.get_broken_branches( tcopy, n2content) log.log( 28, 'restricting NPR to broken clades: ' + colorify( ', '.join( map(lambda x: "%s" % tax2name[x], broken_clades)), "wr")) target_cladeids = set() for branch in broken_branches: print branch.get_ascii(attributes=['spname', 'taxid'], compact=True) print map(lambda x: "%s" % tax2name[x], broken_branches[branch]) target_cladeids.add(branch.cladeid) for node, seqs, outs, wkname in get_next_npr_node( task.configid, ttree, task.out_seqs, mtree, None, npr_conf, target_cladeids): # None is to avoid alg checks log.log(24, "Adding new node: %s seqs, %s outgroups", len(seqs), len(outs)) new_task_node = cogclass(seqs, outs, source_seqtype, conf, cogconf) new_task_node.target_wkname = wkname new_tasks.append(new_task_node) db.add_node(threadid, new_task_node.nodeid, new_task_node.cladeid, new_task_node.targets, new_task_node.outgroups) return new_tasks
#else: # treemerge_task = TreeSplitter(nodeid, seqtype, task.tree_file, main_tree, conf) treemerge_task.size = task.size new_tasks.append(treemerge_task) elif ttype == "treemerger": if not task.task_tree: task.finish() log.log(24, "Saving task tree...") annotate_node(task.task_tree, task) db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree)) db.commit() if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1: current_iter = get_iternumber(threadid) if npr_conf.max_iters and current_iter >= npr_conf.max_iters: log.warning("Maximum number of iterations reached!") else: # Add new nodes source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" ttree, mtree = task.task_tree, task.main_tree log.log(26, "Processing tree: %s seqs, %s outgroups", len(target_seqs), len(out_seqs)) alg_path = node_info.get("clean_alg_path", node_info["alg_path"]) for node, seqs, outs, wkname in get_next_npr_node(threadid, ttree, task.out_seqs, mtree, alg_path, npr_conf):
def process_task(task, wkname, npr_conf, nodeid2info): cogconf, cogclass = npr_conf.cog_selector concatconf, concatclass = npr_conf.alg_concatenator treebuilderconf, treebuilderclass = npr_conf.tree_builder splitterconf, splitterclass = npr_conf.tree_splitter threadid, nodeid, seqtype, ttype = (task.threadid, task.nodeid, task.seqtype, task.ttype) cladeid, targets, outgroups = db.get_node_info(threadid, nodeid) if not treebuilderclass or task.size < 4: # Allows to dump algs in workflows with no tree tasks or if tree # inference does not make sense given the number of sequences. DummyTree # will produce a fake fully collapsed newick tree. treebuilderclass = DummyTree if outgroups and len(outgroups) > 1: constrain_id = nodeid else: constrain_id = None node_info = nodeid2info[nodeid] conf = GLOBALS[task.configid] new_tasks = [] if ttype == "cog_selector": # Generates a md5 id based on the genetree configuration workflow used # for the concat alg task. If something changes, concat alg will change # and the associated tree will be rebuilt config_blocks = set([wkname]) for key, value in conf[wkname].iteritems(): if isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set): for elem in value: config_blocks.add(elem[1:]) if isinstance(elem, str) and elem.startswith("@") else None elif isinstance(value, str): config_blocks.add(value[1:]) if value.startswith("@") else None config_checksum = md5("".join(["[%s]\n%s" % (x, dict_string(conf[x])) for x in sorted(config_blocks)])) # THIS PART HAS BEEN MOVED TO COG_SELECTOR TASK # Check that current selection of cogs will cover all target and # outgroup species # cog_hard_limit = int(conf[concatconf]["_max_cogs"]) # sp_repr = defaultdict(int) # for co in task.raw_cogs[:cog_hard_limit]: # for sp, seq in co: # sp_repr[sp] += 1 # missing_sp = (targets | outgroups) - set(sp_repr.keys()) # if missing_sp: # raise TaskError("missing species under current cog selection: %s" %missing_sp) # else: # log.log(28, "Analysis of current COG selection:") # for sp, ncogs in sorted(sp_repr.items(), key=lambda x:x[1]): # log.log(28, " % 30s species present in % 6d COGs" %(sp, ncogs)) # register concat alignment task. NodeId associated to concat_alg tasks # and all its children jobs should take into account cog information and # not only species and outgroups included. concat_job = concatclass(task.cogs, seqtype, conf, concatconf, config_checksum) db.add_node(threadid, concat_job.nodeid, cladeid, targets, outgroups) # Register Tree constrains constrain_tree = "(%s, (%s));" % (",".join(sorted(outgroups)), ",".join(sorted(targets))) _outs = "\n".join(map(lambda name: ">%s\n0" % name, sorted(outgroups))) _tars = "\n".join(map(lambda name: ">%s\n1" % name, sorted(targets))) constrain_alg = "\n".join([_outs, _tars]) db.add_task_data(concat_job.nodeid, DATATYPES.constrain_tree, constrain_tree) db.add_task_data(concat_job.nodeid, DATATYPES.constrain_alg, constrain_alg) db.dataconn.commit() # since the creation of some Task objects # may require this info, I need to commit # right now. concat_job.size = task.size new_tasks.append(concat_job) elif ttype == "concat_alg": # register tree for concat alignment, using constraint tree if # necessary alg_id = db.get_dataid(task.taskid, DATATYPES.concat_alg_phylip) try: parts_id = db.get_dataid(task.taskid, DATATYPES.model_partitions) except ValueError: parts_id = None nodeid2info[nodeid]["size"] = task.size nodeid2info[nodeid]["target_seqs"] = targets nodeid2info[nodeid]["out_seqs"] = outgroups tree_task = treebuilderclass( nodeid, alg_id, constrain_id, None, seqtype, conf, treebuilderconf, parts_id=parts_id ) tree_task.size = task.size new_tasks.append(tree_task) elif ttype == "tree": merger_task = splitterclass(nodeid, seqtype, task.tree_file, conf, splitterconf) merger_task.size = task.size new_tasks.append(merger_task) elif ttype == "treemerger": # Lets merge with main tree if not task.task_tree: task.finish() log.log(24, "Saving task tree...") annotate_node(task.task_tree, task) db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree)) db.commit() if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1: current_iter = get_iternumber(threadid) if npr_conf.max_iters and current_iter >= npr_conf.max_iters: log.warning("Maximum number of iterations reached!") else: # Add new nodes source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" ttree, mtree = task.task_tree, task.main_tree log.log(26, "Processing tree: %s seqs, %s outgroups", len(targets), len(outgroups)) target_cladeids = None if tobool(conf[splitterconf].get("_find_ncbi_targets", False)): tcopy = mtree.copy() ncbi.connect_database() tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, None) # tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, "fake") # for testing sptree example n2content = tcopy.get_cached_content() broken_branches, broken_clades, broken_clade_sizes, tax2name = ncbi.get_broken_branches( tcopy, n2content ) log.log( 28, "restricting NPR to broken clades: " + colorify(", ".join(map(lambda x: "%s" % tax2name[x], broken_clades)), "wr"), ) target_cladeids = set() for branch in broken_branches: print branch.get_ascii(attributes=["spname", "taxid"], compact=True) print map(lambda x: "%s" % tax2name[x], broken_branches[branch]) target_cladeids.add(branch.cladeid) for node, seqs, outs, wkname in get_next_npr_node( task.configid, ttree, task.out_seqs, mtree, None, npr_conf, target_cladeids ): # None is to avoid alg checks log.log(24, "Adding new node: %s seqs, %s outgroups", len(seqs), len(outs)) new_task_node = cogclass(seqs, outs, source_seqtype, conf, cogconf) new_task_node.target_wkname = wkname new_tasks.append(new_task_node) db.add_node( threadid, new_task_node.nodeid, new_task_node.cladeid, new_task_node.targets, new_task_node.outgroups, ) return new_tasks
def schedule(workflow_task_processor, pending_tasks, schedule_time, execution, debug, norender): # Adjust debug mode if debug == "all": log.setLevel(10) pending_tasks = set(pending_tasks) ## =================================== ## INITIALIZE BASIC VARS execution, run_detached = execution thread2tasks = defaultdict(list) for task in pending_tasks: thread2tasks[task.configid].append(task) expected_threads = set(thread2tasks.keys()) past_threads = {} thread_errors = defaultdict(list) ## END OF VARS AND SHORTCUTS ## =================================== cores_total = GLOBALS["_max_cores"] if cores_total > 0: job_queue = Queue() back_launcher = Process(target=background_job_launcher, args=(job_queue, run_detached, GLOBALS["launch_time"], cores_total)) back_launcher.start() else: job_queue = None back_launcher = None GLOBALS["_background_scheduler"] = back_launcher GLOBALS["_job_queue"] = job_queue # Captures Ctrl-C for debuging DEBUG #signal.signal(signal.SIGINT, control_c) last_report_time = None BUG = set() try: # Enters into task scheduling while pending_tasks: wtime = schedule_time # ask SGE for running jobs if execution == "sge": sgeid2jobs = db.get_sge_tasks() qstat_jobs = sge.qstat() else: qstat_jobs = None # Show summary of pending tasks per thread thread2tasks = defaultdict(list) for task in pending_tasks: thread2tasks[task.configid].append(task) set_logindent(0) log.log(28, "@@13: Updating tasks status:@@1: (%s)" % (ctime())) info_lines = [] for tid, tlist in thread2tasks.iteritems(): threadname = GLOBALS[tid]["_name"] sizelist = ["%s" % getattr(_ts, "size", "?") for _ts in tlist] info = "Thread @@13:%s@@1:: pending tasks: @@8:%s@@1: of sizes: %s" % ( threadname, len(tlist), ', '.join(sizelist)) info_lines.append(info) for line in info_lines: log.log(28, line) if GLOBALS["email"] and last_report_time is None: last_report_time = time() send_mail(GLOBALS["email"], "Your NPR process has started", '\n'.join(info_lines)) ## ================================ ## CHECK AND UPDATE CURRENT TASKS checked_tasks = set() check_start_time = time() to_add_tasks = set() GLOBALS["cached_status"] = {} for task in sorted(pending_tasks, sort_tasks): # Avoids endless periods without new job submissions elapsed_time = time() - check_start_time #if not back_launcher and pending_tasks and \ # elapsed_time > schedule_time * 2: # log.log(26, "@@8:Interrupting task checks to schedule new jobs@@1:") # db.commit() # wtime = launch_jobs(sorted(pending_tasks, sort_tasks), # execution, run_detached) # check_start_time = time() # Enter debuging mode if necessary if debug and log.level > 10 and task.taskid.startswith(debug): log.setLevel(10) log.debug("ENTERING IN DEBUGGING MODE") thread2tasks[task.configid].append(task) # Update tasks and job statuses if task.taskid not in checked_tasks: try: show_task_info(task) task.status = task.get_status(qstat_jobs) db.dataconn.commit() if back_launcher and task.status not in set("DE"): for j, cmd in task.iter_waiting_jobs(): j.status = "Q" GLOBALS["cached_status"][j.jobid] = "Q" if j.jobid not in BUG: if not os.path.exists(j.jobdir): os.makedirs(j.jobdir) for ifile, outpath in j.input_files.iteritems( ): try: _tid, _did = ifile.split(".") _did = int(_did) except (IndexError, ValueError): dataid = ifile else: dataid = db.get_dataid(_tid, _did) if not outpath: outfile = pjoin( GLOBALS["input_dir"], ifile) else: outfile = pjoin(outpath, ifile) if not os.path.exists(outfile): open(outfile, "w").write( db.get_data(dataid)) log.log( 24, " @@8:Queueing @@1: %s from %s" % (j, task)) if execution: job_queue.put([ j.jobid, j.cores, cmd, j.status_file ]) BUG.add(j.jobid) update_task_states_recursively(task) db.commit() checked_tasks.add(task.taskid) except TaskError, e: log.error("Errors found in %s" % task) import traceback traceback.print_exc() if GLOBALS["email"]: threadname = GLOBALS[task.configid]["_name"] send_mail( GLOBALS["email"], "Errors found in %s!" % threadname, '\n'.join(map(str, [task, e.value, e.msg]))) pending_tasks.discard(task) thread_errors[task.configid].append( [task, e.value, e.msg]) continue else: # Set temporary Queued state to avoids launching # jobs from clones task.status = "Q" if log.level < 24: show_task_info(task) if task.status == "D": #db.commit() show_task_info(task) logindent(3) # Log commands of every task if 'cmd_log_file' not in GLOBALS[task.configid]: GLOBALS[task.configid]['cmd_log_file'] = pjoin( GLOBALS[task.configid]["_outpath"], "cmd.log") O = open(GLOBALS[task.configid]['cmd_log_file'], "w") O.close() cmd_lines = get_cmd_log(task) CMD_LOG = open(GLOBALS[task.configid]['cmd_log_file'], "a") print >> CMD_LOG, task for c in cmd_lines: print >> CMD_LOG, ' ' + '\t'.join(map(str, c)) CMD_LOG.close() # try: #wkname = GLOBALS[task.configid]['_name'] create_tasks = workflow_task_processor( task, task.target_wkname) except TaskError, e: log.error("Errors found in %s" % task) pending_tasks.discard(task) thread_errors[task.configid].append( [task, e.value, e.msg]) continue else: logindent(-3) to_add_tasks.update(create_tasks) pending_tasks.discard(task) elif task.status == "E": log.error("task contains errors: %s " % task) log.error("Errors found in %s") pending_tasks.discard(task) thread_errors[task.configid].append( [task, None, "Found (E) task status"])
#else: # treemerge_task = TreeSplitter(nodeid, seqtype, task.tree_file, main_tree, conf) treemerge_task.size = task.size new_tasks.append(treemerge_task) elif ttype == "treemerger": if not task.task_tree: task.finish() log.log(24, "Saving task tree...") annotate_node(task.task_tree, task) db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree)) db.commit() if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1: current_iter = get_iternumber(threadid) if npr_conf.max_iters and current_iter >= npr_conf.max_iters: log.warning("Maximum number of iterations reached!") else: # Add new nodes source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" ttree, mtree = task.task_tree, task.main_tree log.log(26, "Processing tree: %s seqs, %s outgroups", len(target_seqs), len(out_seqs)) alg_path = node_info.get("clean_alg_path", node_info["alg_path"]) for node, seqs, outs, wkname in get_next_npr_node(