def finish_pasta_execution(pasta_team, user_config, temporaries_dir, pasta_products, multilocus_dataset): global _RunningJobs options = user_config.commandline user_config.save_to_filepath(os.path.join(temporaries_dir, 'last_used.cfg')) if options.timesfile: f = open_with_intermediates(options.timesfile, 'a') f.close() set_timing_log_filepath(options.timesfile) ############################################################################ # Launch threads to do work ##### pasta_config = user_config.get("sate") start_worker(pasta_config.num_cpus) #_LOG.debug("start reading the input alignment") #multilocus_dataset = read_input_sequences(user_config.input_seq_filepaths, # datatype=user_config.commandline.datatype, # missing=user_config.commandline.missing) ############################################################################ # We must read the incoming tree in before we call the get_sequences_for_pasta # function that relabels that taxa in the dataset ###### alignment_as_tmp_filename_to_report = None tree_as_tmp_filename_to_report = None starting_tree = None tree_file = options.treefile if tree_file: if not os.path.exists(tree_file): raise Exception('The tree file "%s" does not exist' % tree_file) tree_f = open(tree_file, 'rU') MESSENGER.send_info('Reading starting trees from "%s"...' % tree_file) try: tree_list = read_and_encode_splits(multilocus_dataset.dataset, tree_f, starting_tree=True) except KeyError: MESSENGER.send_error("Error in reading the treefile, probably due to a name in the tree that does not match the names in the input sequence files.\n") raise except: MESSENGER.send_error("Error in reading the treefile.\n") raise tree_f.close() if len(tree_list) > 1: MESSENGER.send_warning('%d starting trees found in "%s". The first tree will be used.' % (len(tree_list), tree_file)) starting_tree = tree_list[0] score = None tree_as_tmp_filename_to_report = tree_file ############################################################################ # This will relabel the taxa if they have problematic names ##### multilocus_dataset.relabel_for_pasta() ############################################################################ # This ensures all nucleotide data is DNA internally ##### restore_to_rna = False if user_config.commandline.datatype.upper() == 'RNA': multilocus_dataset.convert_rna_to_dna() user_config.commandline.datatype = 'DNA' restore_to_rna = True export_names = True if export_names: try: name_filename = pasta_products.get_abs_path_for_tag('name_translation.txt') name_output = open(name_filename, 'w') safe2real = multilocus_dataset.safe_to_real_names safe_list = safe2real.keys() safe_list.sort() for safe in safe_list: orig = safe2real[safe][0] name_output.write("%s\n%s\n\n" % (safe, orig)) name_output.close() MESSENGER.send_info("Name translation information saved to %s as safe name, original name, blank line format." % name_filename) except: MESSENGER.send_info("Error exporting saving name translation to %s" % name_filename) if options.aligned: options.aligned = all( [i.is_aligned() for i in multilocus_dataset] ) ############################################################################ # Be prepared to kill any long running jobs ##### prev_signals = [] for sig in [signal.SIGTERM, signal.SIGABRT, signal.SIGINT]: # signal.SIGABRT, signal.SIGBUS, signal.SIGINT, signal.SIGKILL, signal.SIGSTOP]: prev_handler = signal.signal(sig, killed_handler) prev_signals.append((sig, prev_handler)) try: pasta_config_dict = pasta_config.dict() if (not options.two_phase) and tree_file: # getting the newick string here will allow us to get a string that is in terms of the correct taxon labels starting_tree_str = starting_tree.compose_newick() else: if not options.two_phase: MESSENGER.send_info("Creating a starting tree for the PASTA algorithm...") if (options.two_phase) or (not options.aligned): MESSENGER.send_info("Performing initial alignment of the entire data matrix...") init_aln_dir = os.path.join(temporaries_dir, 'init_aln') init_aln_dir = pasta_team.temp_fs.create_subdir(init_aln_dir) delete_aln_temps = not (options.keeptemp and options.keepalignmenttemps) aln_job_list = [] query_fns = [] for unaligned_seqs in multilocus_dataset: #backbone = sorted(unaligned_seqs.keys())[0:100] backbone = sample(unaligned_seqs.keys(), min(100,len(unaligned_seqs))) backbone_seqs = unaligned_seqs.sub_alignment(backbone) query_seq=list(set(unaligned_seqs.keys()) - set(backbone)) qn = len(query_seq) chunks = min(int(4*pasta_config.num_cpus),int(ceil(qn/50.0))) _LOG.debug("Will align the remaining %d sequences in %d chunks" %(qn,chunks)) for ch in xrange(0,chunks): query_fn = os.path.join(init_aln_dir, "query-%d.fasta"%ch) qa = unaligned_seqs.sub_alignment(query_seq[ch:qn:chunks]) _LOG.debug("Chunk with %d sequences built" %len(qa)) qa.write_filepath(query_fn) query_fns.append(query_fn) job = pasta_team.aligner.create_job(backbone_seqs, tmp_dir_par=init_aln_dir, context_str="initalign", delete_temps=delete_aln_temps, num_cpus=pasta_config.num_cpus) aln_job_list.append(job) _RunningJobs = aln_job_list for job in aln_job_list: jobq.put(job) new_alignment = compact(job.get_results()) add_job_list = [] for query_fn in query_fns: job = pasta_team.hmmeralign.create_job(new_alignment, query_fn, tmp_dir_par=init_aln_dir, context_str="initalign", delete_temps=delete_aln_temps) add_job_list.append(job) _RunningJobs = None for job in add_job_list: jobq.put(job) for job in add_job_list: new_alignment.merge_in(compact(job.get_results())) #new_alignment_list.apend(new_alignment) #for locus_index, new_alignment in enumerate(new_alignment_list): multilocus_dataset[0] = new_alignment if delete_aln_temps: pasta_team.temp_fs.remove_dir(init_aln_dir) else: MESSENGER.send_info("Input sequences assumed to be aligned (based on sequence lengths).") MESSENGER.send_info("Performing initial tree search to get starting tree...") init_tree_dir = os.path.join(temporaries_dir, 'init_tree') init_tree_dir = pasta_team.temp_fs.create_subdir(init_tree_dir) delete_tree_temps = not options.keeptemp job = pasta_team.tree_estimator.create_job(multilocus_dataset, tmp_dir_par=init_tree_dir, num_cpus=pasta_config.num_cpus, context_str="inittree", delete_temps=delete_tree_temps, pasta_products=pasta_products, step_num='initialsearch', mask_gappy_sites = pasta_config_dict['mask_gappy_sites']) _RunningJobs = job jobq.put(job) score, starting_tree_str = job.get_results() _RunningJobs = None alignment_as_tmp_filename_to_report = pasta_products.get_abs_path_for_iter_output("initialsearch", TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) tree_as_tmp_filename_to_report = pasta_products.get_abs_path_for_iter_output("initialsearch", TEMP_TREE_TAG, allow_existing=True) if delete_tree_temps: pasta_team.temp_fs.remove_dir(init_tree_dir) _LOG.debug('We have the tree and whole_alignment, partitions...') if options.keeptemp: pasta_config_dict['keep_iteration_temporaries'] = True if options.keepalignmenttemps: pasta_config_dict['keep_realignment_temporaries'] = True job = PastaJob(multilocus_dataset=multilocus_dataset, pasta_team=pasta_team, name=options.job, status_messages=MESSENGER.send_info, score=score, **pasta_config_dict) if starting_tree is not None: job.tree = generate_tree_with_splits_from_tree(starting_tree, force_fully_resolved = True) else: job.tree_str = starting_tree_str job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report if score is not None: job.store_optimum_results(new_multilocus_dataset=multilocus_dataset, new_tree_str=starting_tree_str, new_score=score, curr_timestamp=time.time()) if options.two_phase: MESSENGER.send_info("Exiting with the initial tree because the PASTA algorithm is avoided when the --two-phase option is used.") else: _RunningJobs = job MESSENGER.send_info("Starting PASTA algorithm on initial tree...") job.run(tmp_dir_par=temporaries_dir, pasta_products=pasta_products) _RunningJobs = None if job.return_final_tree_and_alignment: alignment_as_tmp_filename_to_report = job.curr_iter_align_tmp_filename else: alignment_as_tmp_filename_to_report = job.best_alignment_tmp_filename if user_config.commandline.raxml_search_after: raxml_model = user_config.raxml.model.strip() if not raxml_model: dt = user_config.commandline.datatype mf = pasta_team.tree_estimator.model ms = fasttree_to_raxml_model_str(dt, mf) pasta_team.raxml_tree_estimator.model = ms rte = pasta_team.raxml_tree_estimator MESSENGER.send_info("Performing post-processing tree search in RAxML...") post_tree_dir = os.path.join(temporaries_dir, 'post_tree') post_tree_dir = pasta_team.temp_fs.create_subdir(post_tree_dir) delete_tree_temps = not options.keeptemp starting_tree = None if user_config.sate.start_tree_search_from_current: starting_tree = job.tree post_job = rte.create_job(job.multilocus_dataset, starting_tree=starting_tree, num_cpus=pasta_config.num_cpus, context_str="postraxtree", tmp_dir_par=post_tree_dir, delete_temps=delete_tree_temps, pasta_products=pasta_products, step_num="postraxtree", mask_gappy_sites = pasta_config_dict['mask_gappy_sites']) _RunningJobs = post_job jobq.put(post_job) post_score, post_tree = post_job.get_results() _RunningJobs = None tree_as_tmp_filename_to_report = pasta_products.get_abs_path_for_iter_output("postraxtree", TEMP_TREE_TAG, allow_existing=True) if delete_tree_temps: pasta_team.temp_fs.remove_dir(post_tree_dir) job.tree_str = post_tree job.score = post_score if post_score > job.best_score: job.best_tree_str = post_tree job.best_score = post_score else: if job.return_final_tree_and_alignment: tree_as_tmp_filename_to_report = job.curr_iter_tree_tmp_filename else: tree_as_tmp_filename_to_report = job.best_tree_tmp_filename ####################################################################### # Restore original taxon names and RNA characters ##### job.multilocus_dataset.restore_taxon_names() if restore_to_rna: job.multilocus_dataset.convert_dna_to_rna() user_config.commandline.datatype = 'RNA' assert len(pasta_products.alignment_streams) == len(job.multilocus_dataset) for i, alignment in enumerate(job.multilocus_dataset): alignment_stream = pasta_products.alignment_streams[i] MESSENGER.send_info("Writing resulting alignment to %s" % alignment_stream.name) alignment.write(alignment_stream, file_format="FASTA") alignment_stream.close() MESSENGER.send_info("Writing resulting tree to %s" % pasta_products.tree_stream.name) tree_str = job.tree.compose_newick() pasta_products.tree_stream.write("%s;\n" % tree_str) #outtree_fn = options.result #if outtree_fn is None: # if options.multilocus: # outtree_fn = os.path.join(seqdir, "combined_%s.tre" % options.job) # else: # outtree_fn = aln_filename + ".tre" #MESSENGER.send_info("Writing resulting tree to %s" % outtree_fn) #tree_str = job.tree.compose_newick() #pasta_products.tree_stream.write("%s;\n" % tree_str) MESSENGER.send_info("Writing resulting likelihood score to %s" % pasta_products.score_stream.name) pasta_products.score_stream.write("%s\n" % job.score) if alignment_as_tmp_filename_to_report is not None: MESSENGER.send_info('The resulting alignment (with the names in a "safe" form) was first written as the file "%s"' % alignment_as_tmp_filename_to_report) if tree_as_tmp_filename_to_report is not None: MESSENGER.send_info('The resulting tree (with the names in a "safe" form) was first written as the file "%s"' % tree_as_tmp_filename_to_report) finally: stop_worker() for el in prev_signals: sig, prev_handler = el if prev_handler is None: signal.signal(sig, signal.SIG_DFL) else: signal.signal(sig, prev_handler)
def finish_pasta_execution(pasta_team, user_config, temporaries_dir, pasta_products, multilocus_dataset): global _RunningJobs options = user_config.commandline user_config.save_to_filepath(os.path.join(temporaries_dir, 'last_used.cfg')) if options.timesfile: f = open_with_intermediates(options.timesfile, 'a') f.close() set_timing_log_filepath(options.timesfile) ############################################################################ # Launch threads to do work ##### pasta_config = user_config.get("sate") start_worker(pasta_config.num_cpus) #_LOG.debug("start reading the input alignment") #multilocus_dataset = read_input_sequences(user_config.input_seq_filepaths, # datatype=user_config.commandline.datatype, # missing=user_config.commandline.missing) ############################################################################ # We must read the incoming tree in before we call the get_sequences_for_pasta # function that relabels that taxa in the dataset ###### alignment_as_tmp_filename_to_report = None tree_as_tmp_filename_to_report = None starting_tree = None tree_file = options.treefile if tree_file: if not os.path.exists(tree_file): raise Exception('The tree file "%s" does not exist' % tree_file) tree_f = open(tree_file, 'rU') MESSENGER.send_info('Reading starting trees from "%s"...' % tree_file) try: tree_list = read_and_encode_splits(multilocus_dataset.dataset, tree_f, starting_tree=True) except KeyError: MESSENGER.send_error( "Error in reading the treefile, probably due to a name in the tree that does not match the names in the input sequence files.\n" ) raise except: MESSENGER.send_error("Error in reading the treefile.\n") raise tree_f.close() if len(tree_list) > 1: MESSENGER.send_warning( '%d starting trees found in "%s". The first tree will be used.' % (len(tree_list), tree_file)) starting_tree = tree_list[0] score = None tree_as_tmp_filename_to_report = tree_file ############################################################################ # This will relabel the taxa if they have problematic names ##### multilocus_dataset.relabel_for_pasta() ############################################################################ # This ensures all nucleotide data is DNA internally ##### restore_to_rna = False if user_config.commandline.datatype.upper() == 'RNA': multilocus_dataset.convert_rna_to_dna() user_config.commandline.datatype = 'DNA' restore_to_rna = True export_names = True if export_names: try: name_filename = pasta_products.get_abs_path_for_tag( 'name_translation.txt') name_output = open(name_filename, 'w') safe2real = multilocus_dataset.safe_to_real_names safe_list = list(safe2real.keys()) safe_list.sort() for safe in safe_list: orig = safe2real[safe][0] name_output.write("%s\n%s\n\n" % (safe, orig)) name_output.close() MESSENGER.send_info( "Name translation information saved to %s as safe name, original name, blank line format." % name_filename) except: MESSENGER.send_info( "Error exporting saving name translation to %s" % name_filename) if options.aligned: options.aligned = all([i.is_aligned() for i in multilocus_dataset]) ############################################################################ # Be prepared to kill any long running jobs ##### prev_signals = [] for sig in [ signal.SIGTERM, signal.SIGABRT, signal.SIGINT ]: # signal.SIGABRT, signal.SIGBUS, signal.SIGINT, signal.SIGKILL, signal.SIGSTOP]: prev_handler = signal.signal(sig, killed_handler) prev_signals.append((sig, prev_handler)) try: pasta_config_dict = pasta_config.dict() if (not options.two_phase) and tree_file: # getting the newick string here will allow us to get a string that is in terms of the correct taxon labels starting_tree_str = str(starting_tree) else: if not options.two_phase: MESSENGER.send_info( "Creating a starting tree for the PASTA algorithm...") if (options.two_phase) or (not options.aligned): MESSENGER.send_info( "Performing initial alignment of the entire data matrix..." ) init_aln_dir = os.path.join(temporaries_dir, 'init_aln') init_aln_dir = pasta_team.temp_fs.create_subdir(init_aln_dir) delete_aln_temps = not (options.keeptemp and options.keepalignmenttemps) aln_job_list = [] query_fns = [] for unaligned_seqs in multilocus_dataset: #backbone = sorted(unaligned_seqs.keys())[0:100] backbone = sample(list(unaligned_seqs.keys()), min(100, len(unaligned_seqs))) backbone_seqs = unaligned_seqs.sub_alignment(backbone) query_seq = list( set(unaligned_seqs.keys()) - set(backbone)) qn = len(query_seq) chunks = min(int(4 * pasta_config.num_cpus), int(ceil(qn / 50.0))) _LOG.debug( "Will align the remaining %d sequences in %d chunks" % (qn, chunks)) for ch in range(0, chunks): query_fn = os.path.join(init_aln_dir, "query-%d.fasta" % ch) qa = unaligned_seqs.sub_alignment( query_seq[ch:qn:chunks]) _LOG.debug("Chunk with %d sequences built" % len(qa)) qa.write_filepath(query_fn) query_fns.append(query_fn) job = pasta_team.aligner.create_job( backbone_seqs, tmp_dir_par=init_aln_dir, context_str="initalign", delete_temps=delete_aln_temps, num_cpus=pasta_config.num_cpus) aln_job_list.append(job) _RunningJobs = aln_job_list for job in aln_job_list: jobq.put(job) new_alignment = compact(job.get_results()) add_job_list = [] for query_fn in query_fns: job = pasta_team.hmmeralign.create_job( new_alignment, query_fn, tmp_dir_par=init_aln_dir, context_str="initalign", delete_temps=delete_aln_temps) add_job_list.append(job) _RunningJobs = None for job in add_job_list: jobq.put(job) for job in add_job_list: new_alignment.merge_in(compact(job.get_results())) #new_alignment_list.apend(new_alignment) #for locus_index, new_alignment in enumerate(new_alignment_list): multilocus_dataset[0] = new_alignment if delete_aln_temps: pasta_team.temp_fs.remove_dir(init_aln_dir) else: MESSENGER.send_info( "Input sequences assumed to be aligned (based on sequence lengths)." ) MESSENGER.send_info( "Performing initial tree search to get starting tree...") init_tree_dir = os.path.join(temporaries_dir, 'init_tree') init_tree_dir = pasta_team.temp_fs.create_subdir(init_tree_dir) delete_tree_temps = not options.keeptemp job = pasta_team.tree_estimator.create_job( multilocus_dataset, tmp_dir_par=init_tree_dir, num_cpus=pasta_config.num_cpus, context_str="inittree", delete_temps=delete_tree_temps, pasta_products=pasta_products, step_num='initialsearch', mask_gappy_sites=pasta_config_dict['mask_gappy_sites']) _RunningJobs = job jobq.put(job) score, starting_tree_str = job.get_results() _RunningJobs = None alignment_as_tmp_filename_to_report = pasta_products.get_abs_path_for_iter_output( "initialsearch", TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) tree_as_tmp_filename_to_report = pasta_products.get_abs_path_for_iter_output( "initialsearch", TEMP_TREE_TAG, allow_existing=True) if delete_tree_temps: pasta_team.temp_fs.remove_dir(init_tree_dir) _LOG.debug('We have the tree and whole_alignment, partitions...') if options.keeptemp: pasta_config_dict['keep_iteration_temporaries'] = True if options.keepalignmenttemps: pasta_config_dict['keep_realignment_temporaries'] = True job = PastaJob(multilocus_dataset=multilocus_dataset, pasta_team=pasta_team, name=options.job, status_messages=MESSENGER.send_info, score=score, **pasta_config_dict) if starting_tree is not None: job.tree = generate_tree_with_splits_from_tree( starting_tree, force_fully_resolved=True) else: job.tree_str = starting_tree_str job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report if score is not None: job.store_optimum_results( new_multilocus_dataset=multilocus_dataset, new_tree_str=starting_tree_str, new_score=score, curr_timestamp=time.time()) if options.two_phase: MESSENGER.send_info( "Exiting with the initial tree because the PASTA algorithm is avoided when the --two-phase option is used." ) else: _RunningJobs = job MESSENGER.send_info("Starting PASTA algorithm on initial tree...") job.run(tmp_dir_par=temporaries_dir, pasta_products=pasta_products) _RunningJobs = None if job.return_final_tree_and_alignment: alignment_as_tmp_filename_to_report = job.curr_iter_align_tmp_filename else: alignment_as_tmp_filename_to_report = job.best_alignment_tmp_filename if user_config.commandline.raxml_search_after: raxml_model = user_config.raxml.model.strip() if not raxml_model: dt = user_config.commandline.datatype mf = pasta_team.tree_estimator.model ms = fasttree_to_raxml_model_str(dt, mf) pasta_team.raxml_tree_estimator.model = ms rte = pasta_team.raxml_tree_estimator MESSENGER.send_info( "Performing post-processing tree search in RAxML...") post_tree_dir = os.path.join(temporaries_dir, 'post_tree') post_tree_dir = pasta_team.temp_fs.create_subdir(post_tree_dir) delete_tree_temps = not options.keeptemp starting_tree = None if user_config.sate.start_tree_search_from_current: starting_tree = job.tree post_job = rte.create_job( job.multilocus_dataset, starting_tree=starting_tree, num_cpus=pasta_config.num_cpus, context_str="postraxtree", tmp_dir_par=post_tree_dir, delete_temps=delete_tree_temps, pasta_products=pasta_products, step_num="postraxtree", mask_gappy_sites=pasta_config_dict['mask_gappy_sites']) _RunningJobs = post_job jobq.put(post_job) post_score, post_tree = post_job.get_results() _RunningJobs = None tree_as_tmp_filename_to_report = pasta_products.get_abs_path_for_iter_output( "postraxtree", TEMP_TREE_TAG, allow_existing=True) if delete_tree_temps: pasta_team.temp_fs.remove_dir(post_tree_dir) job.tree_str = post_tree job.score = post_score if post_score > job.best_score: job.best_tree_str = post_tree job.best_score = post_score else: if job.return_final_tree_and_alignment: tree_as_tmp_filename_to_report = job.curr_iter_tree_tmp_filename else: tree_as_tmp_filename_to_report = job.best_tree_tmp_filename ####################################################################### # Restore original taxon names and RNA characters ##### job.multilocus_dataset.restore_taxon_names() if restore_to_rna: job.multilocus_dataset.convert_dna_to_rna() user_config.commandline.datatype = 'RNA' assert len(pasta_products.alignment_streams) == len( job.multilocus_dataset) for i, alignment in enumerate(job.multilocus_dataset): alignment_stream = pasta_products.alignment_streams[i] MESSENGER.send_info("Writing resulting alignment to %s" % alignment_stream.name) alignment.write(alignment_stream, file_format="FASTA") alignment_stream.close() MESSENGER.send_info("Writing resulting tree to %s" % pasta_products.tree_stream.name) tree_str = job.tree.compose_newick() pasta_products.tree_stream.write("%s;\n" % tree_str) pasta_products.tree_stream.close() #outtree_fn = options.result #if outtree_fn is None: # outtree_fn = os.path.join(seqdir, "combined_%s.tre" % options.job) # else: # outtree_fn = aln_filename + ".tre" #MESSENGER.send_info("Writing resulting tree to %s" % outtree_fn) #tree_str = str(job.tree) #pasta_products.tree_stream.write("%s;\n" % tree_str) MESSENGER.send_info("Writing resulting likelihood score to %s" % pasta_products.score_stream.name) pasta_products.score_stream.write("%s\n" % job.score) pasta_products.score_stream.close() if alignment_as_tmp_filename_to_report is not None: MESSENGER.send_info( 'The resulting alignment (with the names in a "safe" form) was first written as the file "%s"' % alignment_as_tmp_filename_to_report) if tree_as_tmp_filename_to_report is not None: MESSENGER.send_info( 'The resulting tree (with the names in a "safe" form) was first written as the file "%s"' % tree_as_tmp_filename_to_report) finally: stop_worker() for el in prev_signals: sig, prev_handler = el if prev_handler is None: signal.signal(sig, signal.SIG_DFL) else: signal.signal(sig, prev_handler)
def finish_pasta_execution(pasta_team, user_config, temporaries_dir, pasta_products, multilocus_dataset): global _RunningJobs # Method created via pycharm refactoring # print "running pasta_prelim_step1" #DEBUG alignment_as_tmp_filename_to_report, options, pasta_config, score, starting_tree, tree_as_tmp_filename_to_report, tree_file = pasta_prelim_step1( multilocus_dataset, temporaries_dir, user_config) # Method created via pycharm refactoring # print "running pasta_prelim_step2" #DEBUG prev_signals, restore_to_rna = pasta_prelim_step2(multilocus_dataset, options, pasta_products, user_config) try: # Method created via pycharm refactoring # print "running pasta_prelim_step3" #DEBUG alignment_as_tmp_filename_to_report, pasta_config_dict, score, starting_tree_str, tree_as_tmp_filename_to_report = pasta_prelim_step3( multilocus_dataset, options, pasta_config, pasta_products, pasta_team, score, alignment_as_tmp_filename_to_report, starting_tree, temporaries_dir, tree_as_tmp_filename_to_report, tree_file) if options.keeptemp: pasta_config_dict['keep_iteration_temporaries'] = True if options.keepalignmenttemps: pasta_config_dict['keep_realignment_temporaries'] = True if options.interruptible==False and options.resume_state_path==None: job = PastaJob(multilocus_dataset=multilocus_dataset, pasta_team=pasta_team, name=options.job, status_messages=MESSENGER.send_info, score=score, **pasta_config_dict) if starting_tree is not None: job.tree = generate_tree_with_splits_from_tree(starting_tree, force_fully_resolved = True) else: job.tree_str = starting_tree_str job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report if score is not None: job.store_optimum_results(new_multilocus_dataset=multilocus_dataset, new_tree_str=starting_tree_str, new_score=score, curr_timestamp=time.time()) elif options.interruptible==True and options.resume_state_path==None: job = PastaInterruptibleJob(multilocus_dataset=multilocus_dataset, pasta_team=pasta_team, name=options.job, status_messages=MESSENGER.send_info, score=score, **pasta_config_dict) if starting_tree is not None: job.tree = generate_tree_with_splits_from_tree(starting_tree, force_fully_resolved = True) else: job.tree_str = starting_tree_str job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report if score is not None: job.store_optimum_results(new_multilocus_dataset=multilocus_dataset, new_tree_str=starting_tree_str, new_score=score, curr_timestamp=time.time()) else: import pickle pickledpastajob=open(options.resume_state_path,'rb') job=pickle.load(pickledpastajob) job.restore_non_pickled(status_messages=MESSENGER.send_info) pickledpastajob.close() if options.two_phase: MESSENGER.send_info("Exiting with the initial tree because the PASTA algorithm is avoided when the --two-phase option is used.") else: _RunningJobs = job if options.interruptible==False or options.resume_state_path==None: MESSENGER.send_info("Starting PASTA algorithm on initial tree...") else: MESSENGER.send_info("Resuming PASTA from state file %s" % options.resume_state_path) # The heavy-lifting line: aln_list, resumable= job.run(tmp_dir_par=temporaries_dir, pasta_products=pasta_products) if resumable==True: picklepath=job.pasta_products.get_abs_path_for_iter_output( job.current_iteration, 'picklefile', allow_existing=False) MESSENGER.send_info("Pickling PastaJob to file:\n%s\nPlease run the alignment jobs in the following comma-delimited file and resume:%s\n" % (picklepath, aln_list)) job.clean_for_pickling() import pickle pf=open(picklepath,'wb') # tempf=open('/projects/tallis/nute/work/baliphy-pseudo/test-interupt/test-job-cts.txt','w') # tempf.write(str(job.__dict__)) # tempf.close() # print job.__dict__.keys() # print get_pickling_errors(job) # pdb.set_trace() #DEBUG pickle.dump(job,pf) pf.close() sys.exit(0) _RunningJobs = None # Method created via pycharm refactoring alignment_as_tmp_filename_to_report, tree_as_tmp_filename_to_report = pasta_postproc_step1( alignment_as_tmp_filename_to_report, job, options, pasta_config, pasta_config_dict, pasta_products, pasta_team, temporaries_dir, tree_as_tmp_filename_to_report, user_config) # Method created via pycharm refactoring pasta_postproc_step2(alignment_as_tmp_filename_to_report, job, pasta_products, restore_to_rna, tree_as_tmp_filename_to_report, user_config) finally: stop_worker() for el in prev_signals: sig, prev_handler = el if prev_handler is None: signal.signal(sig, signal.SIG_DFL) else: signal.signal(sig, prev_handler)