def _write_comments(self, stream, comments): if self.allow_multiline_comments: if textprocessing.is_str_type(comments): stream.write("[{}]\n".format(comments)) else: comments = "\n".join([str(c) for c in comments]) stream.write("[\n{}\n]\n".format(comments)) else: if textprocessing.is_str_type(comments): comments = comments.replace("\r\n", "\n").replace("\n\r","\n").replace("\r","\n") # comments = re.split(r'[\r\n]+', comments) comments = [c for c in re.split(r'[\r\n]+', comments) if c] for c in comments: stream.write("[{}]\n".format(c))
def lookup_taxon_symbol(self, symbol, create_taxon_if_not_found=True): # if symbol in self.token_taxon_map: # return self.token_taxon_map[symbol] # if symbol in self.label_taxon_map: # return self.label_taxon_map[symbol] # if symbol in self.number_taxon_map: # return self.number_taxon_map[symbol] if not textprocessing.is_str_type(symbol): symbol = str(symbol) try: return self.token_taxon_map[symbol] except KeyError: pass try: return self.label_taxon_map[symbol] except KeyError: pass if self.enable_lookup_by_taxon_number: try: return self.number_taxon_map[symbol] except KeyError: pass if create_taxon_if_not_found: return self.new_taxon(symbol) return None
def stage_load_trees(self, tree_filepaths, is_rooted=None, use_tree_weights=None, burnin=None, mode=7): # keep trees in memory, specify 3 to clear """ Composes commands to load a set of trees into PAUP*, with the specified number of burnin dropped. """ if textprocessing.is_str_type(tree_filepaths): raise Exception("expecting list of filepaths, not string") if is_rooted is None: rooting = "" elif is_rooted: rooting = "rooted=yes" else: rooting = "unrooted=yes" if use_tree_weights is None: treewts = "" elif use_tree_weights: treewts = "storetreewts=yes" else: treewts = "storetreewts=no" if burnin is None: burnin = 0 gettree_template = "gett file= '{{tree_filepath}}' storebrlens=yes warntree=no {rooting} {treewts} from={burnin} mode={mode};".format( rooting=rooting, treewts=treewts, burnin=burnin + 1, mode=mode) for tree_filepath in tree_filepaths: # self.commands.append(gettree_template.format(tree_filepath=tree_filepath)) # using relpath because of a bug in PAUP* 4.0b10 with long paths passed to gettrees self.commands.append( gettree_template.format( tree_filepath=os.path.relpath(tree_filepath))) return self.commands
def _compose_comment_string(self, item): if not self.suppress_item_comments and item.comments: item_comments = [] if textprocessing.is_str_type(item.comments): item.comments = [item.comments] for comment in item.comments: item_comments.append("[{}]".format(comment)) item_comment_str = "".join(item_comments) else: item_comment_str = "" return item_comment_str
def fetch(self, db, ids, rettype): """ Raw fetch. Returns file-like object opened for reading on string returned by query. """ if textprocessing.is_str_type(ids): id_list = ids else: id_list = ",".join([str(i) for i in set(ids)]) params = {'db': db, 'id': id_list, 'rettype': rettype, 'retmode': 'text'} query_url = Entrez.BASE_URL + "/efetch.fcgi?" + urlio.urlencode(params) return urlio.read_url(query_url)
def efetch(db, ids, rettype, retmode="xml", email=None): """ Raw fetch. Returns file-like object opened for reading on string returned by query. """ if textprocessing.is_str_type(ids): id_list = ids else: id_list = ",".join([str(i) for i in set(ids)]) params = {'db': db, 'id': id_list, 'rettype': rettype, 'retmode': retmode} if email is not None: params["email"] = email query_url = ENTREZ_EUTILS_BASE_URL + "/efetch.fcgi?" + urlencode(params) response = urlopen(query_url) return response
def _run_vcs(self, cmd): if textprocessing.is_str_type(cmd): cmd = self.vcs_app_path + " " + cmd else: cmd.insert(0, self.vcs_app_path) try: p = subprocess.Popen(cmd, shell=True, cwd=os.path.abspath(self.repo_path), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = processio.communicate(p) retcode = p.returncode except OSError as e: return -999, "", str(e) return retcode, stdout, stderr
def iterate_over_file(self, current_file): if textprocessing.is_str_type(current_file): self._current_file = open(current_file, "r") self._current_file_name = current_file else: self._current_file = current_file try: self._current_file_name = self.current_file.name except AttributeError: self._current_file_name = None if hasattr(self._current_file, "__exit__"): with self._current_file: for item in self._yield_items_from_stream(stream=self._current_file): yield item else: # StringIO does not support ``with`` for item in self._yield_items_from_stream(stream=self._current_file): yield item self._current_file = None
def new_char_matrix(self, char_matrix_type, *args, **kwargs): """ Creation and accession of new |CharacterMatrix| (of class ``char_matrix_type``) into ``chars`` of self." """ if self.attached_taxon_namespace is not None: if "taxon_namespace" in kwargs and kwargs["taxon_namespace"] is not self.attached_taxon_namespace: raise TypeError("DataSet object is attached to TaxonNamespace %s, but 'taxon_namespace' argument specifies different TaxonNamespace %s" % ( repr(self.attached_taxon_namespace), repr(kwargs["taxon_namespace"]))) else: kwargs["taxon_namespace"] = self.attached_taxon_namespace if textprocessing.is_str_type(char_matrix_type): char_matrix = charmatrixmodel.new_char_matrix( data_type=char_matrix_type, *args, **kwargs) else: char_matrix = char_matrix_type(*args, **kwargs) return self.add_char_matrix(char_matrix)
def stage_load_trees(self, tree_filepaths, is_rooted=None, use_tree_weights=None, burnin=None, mode=7): # keep trees in memory, specify 3 to clear """ Composes commands to load a set of trees into PAUP*, with the specified number of burnin dropped. """ if textprocessing.is_str_type(tree_filepaths): raise Exception("expecting list of filepaths, not string") if is_rooted is None: rooting = "" elif is_rooted: rooting = "rooted=yes" else: rooting = "unrooted=yes" if use_tree_weights is None: treewts = "" elif use_tree_weights: treewts = "storetreewts=yes" else: treewts = "storetreewts=no" if burnin is None: burnin = 0 gettree_template = "gett file= '{{tree_filepath}}' storebrlens=yes warntree=no {rooting} {treewts} from={burnin} mode={mode};".format( rooting=rooting, treewts=treewts, burnin=burnin+1, mode=mode) for tree_filepath in tree_filepaths: # self.commands.append(gettree_template.format(tree_filepath=tree_filepath)) # using relpath because of a bug in PAUP* 4.0b10 with long paths passed to gettrees self.commands.append(gettree_template.format(tree_filepath=os.path.relpath(tree_filepath))) return self.commands
def add_translate_token(self, token, taxon): if not textprocessing.is_str_type(token): token = str(token) self.token_taxon_map[token] = taxon
def estimate_tree(char_matrix, tree_est_criterion="likelihood", num_states=6, unequal_base_freqs=True, gamma_rates=True, prop_invar=True, extra_pre_est_commands=None, extra_post_est_commands=None, paup_path='paup'): """ Given a dataset, ``char_matrix``, estimates a tree using the given criterion. """ paup_args = { 'nst': num_states, 'basefreq' : unequal_base_freqs and 'estimate' or 'equal', 'rates' : gamma_rates and 'gamma' or 'equal', 'pinvar' : prop_invar and 'estimate' or '0', } cf = tempfile.NamedTemporaryFile("w", delete=True) char_matrix.write_to_stream(cf, schema='nexus') cf.flush() paup_args['datafile'] = cf.name # output_tree_file_handle, output_tree_filepath = tempfile.mkstemp(text=True) output_tree_file_handle = tempfile.NamedTemporaryFile("w+", delete=True) output_tree_filepath = output_tree_file_handle.name paup_args['est_tree_file'] = output_tree_filepath if extra_pre_est_commands: if textprocessing.is_str_type(extra_pre_est_commands): extra_pre_est_commands = [extra_pre_est_commands] paup_args["pre_est_commands"] = ";\n".join(extra_pre_est_commands) else: paup_args["pre_est_commands"] = "" if extra_post_est_commands: if textprocessing.is_str_type(extra_post_est_commands): extra_post_est_commands = [extra_post_est_commands] paup_args["post_est_commands"] = ";\n".join(extra_post_est_commands) else: paup_args["post_est_commands"] = "" paup_template = """\ set warnreset=no; exe %(datafile)s; """ if tree_est_criterion.startswith("like"): paup_template += """\ lset tratio=estimate rmatrix=estimate nst=%(nst)s basefreq=%(basefreq)s rates=%(rates)s shape=estimate pinvar=%(pinvar)s userbrlens=yes; """ if tree_est_criterion not in ["nj", "upgma"] : paup_template += """\ set crit=%s; """ % tree_est_criterion paup_template += """\ %(pre_est_commands)s; """ if tree_est_criterion in ["nj", "upgma"] : paup_template += tree_est_criterion + ";" else: paup_template += "hsearch;" paup_template += """\ %(post_est_commands)s; savetrees file=%(est_tree_file)s format=nexus root=yes brlens=yes taxablk=yes maxdecimals=20; """ paup_run = subprocess.Popen(['%s -n' % paup_path], shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout, stderr = processio.communicate(paup_run, paup_template % paup_args) t = dendropy.Tree.get_from_path(output_tree_filepath, "nexus", taxon_namespace=char_matrix.taxon_namespace) cf.close() output_tree_file_handle.close() return t
def call( paup_commands, suppress_standard_preamble=False, ignore_error_returncode=False, ignore_nonempty_stderr=False, strip_extraneous_prompts_from_stdout=True, strip_extraneous_prompts_from_stderr=True, cwd=None, env=None, paup_path=PAUP_PATH ): """ Executes a sequence of commands in PAUP* and returns the results. Parameters ---------- paup_commands : iterable of strings A list or some other iterable of strings representing PAUP commands. suppress_standard_preamble : bool If |True|, then the command sequence will not be prefaced by the standard preamble. ignore_error_returncode : bool If |True|, then a non-0 return code from the PAUP process will not result in an exception being raised. ignore_nonempty_stderr : bool If |True|, then the PAUP process writing to standard error will not result in an exception being raised. strip_extraneous_prompts_from_stdout : bool If |True|, then all occurrences of 'paup>' will be removed from the standard output contents. strip_extraneous_prompts_from_stderr : bool If |True|, then all occurrences of 'paup>' will be removed from the standard error contents. cwd : string Set the working directory of the PAUP* process to this directory. env : dictionary Environmental variables to set for the PAUP* process. paup_path : string Path to the PAUP* executable. Returns ------- returncode : exit value of PAUP process. stdout : string Contents of the PAUP process standard output. stderr : string Contents of the PAUP process standard error. """ if textprocessing.is_str_type(paup_commands): commands = [paup_commands] else: commands = list(paup_commands) if not suppress_standard_preamble: commands.insert(0, STANDARD_PREAMBLE) commands.append("quit") paup_block = ";\n".join(commands) + ";\n" invocation_command = [paup_path, "-n", "-u"] p = subprocess.Popen( invocation_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env, ) raw_stdout, raw_stderr = processio.communicate(p, paup_block) stdout = raw_stdout stderr = raw_stderr if strip_extraneous_prompts_from_stdout: # weird dev/paup error ... lots or prompts spring up stdout = stdout.replace("paup>", "") if strip_extraneous_prompts_from_stderr: # weird dev/paup error ... lots or prompts spring up stderr = stderr.replace("paup>", "") chk_stderr = stderr else: chk_stderr = stderr.replace("paup>", "") if (p.returncode != 0 and not ignore_error_returncode) or (chk_stderr != "" and not ignore_nonempty_stderr): raise error.ExternalServiceError( service_name="PAUP*", invocation_command=invocation_command, service_input=paup_block, returncode = p.returncode, stdout=raw_stdout, stderr=raw_stderr) return p.returncode, stdout, stderr
def call(r_commands, ignore_error_returncode=False, cwd=None, env=None, rscript_path=RSCRIPT_EXECUTABLE, ): """ Executes a sequence of commands in R and returns the results. All the noise is sunk into the stderr return variable, and just the output comes out cleanly in the stdout return variable. Parameters ---------- r_commands : iterable of strings A list or some other iterable of strings of R commands. ignore_error_returncode : bool If |True|, then a non-0 return code from the R process will not result in an exception being raised. cwd : string Set the working directory of the R process to this directory. env : dictionary Environmental variables to set for the R process. rscript_path : string Path to the Rscript executable. Returns ------- returncode : exit value of the R process stdout : string Contents of the R process standard output. stderr : string Contents of the R process standard error. Examples -------- Build up a script (``s``) to calculate a range of values, print them to the standard output, and then post-process this to extract the values:: import itertools from dendropy.interop import rstats bb = [0.01, 0.05, 0.10, 0.50, 1.0] cc = [0.01, 0.05, 0.10, 0.50, 1.0] ee = [0.0, 0.1, 0.2] # store commands of script as a list # to be passed to the ``call()`` s = [] # set options, load required libraries, etc. s.append("options(digits=22)") s.append("library(PBD)") # build up list of commands in script params = [] for b, c, e in itertools.product(bb, cc, ee): s.append("print(pbd_durspec_mean(pars=c({},{},{})))".format(b, c, e)) # execute script returncode, stdout, stderr = rstats.call(s) # peek at the results print(stdout) # [1] 69.31472 # [1] 9.853723 # [1] 4.981369 # [1] 0.9950331 # ... # post-process the stdout to extract values results = [float(x.split(" ")[1]) for x in stdout.split("\n") if x] Notes ----- Note that newlines ('\n') and other special characters will be converted before being passed to the R interpreter, so need to be escaped or entered as raw string expressions. That is, instead of, e.g.:: returncode, stdout, stderr = RService.call([ "cat('hello, world\n')", ]) use this:: returncode, stdout, stderr = RService.call([ "cat('hello, world\\n')", ]) or:: returncode, stdout, stderr = RService.call([ r"cat('hello, world\n')", ]) """ if not textprocessing.is_str_type(r_commands): r_commands = "\n".join(r_commands) r_commands += "\n" invocation_command = [RSCRIPT_EXECUTABLE, rsubprocess_pipe_path] p = subprocess.Popen( invocation_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env, ) stdout, stderr = processio.communicate(p, r_commands) if (p.returncode != 0 and not ignore_error_returncode): raise error.ExternalServiceError( service_name="Rscript", invocation_command=invocation_command, service_input=r_commands, returncode = p.returncode, stdout=stdout, stderr=stderr) return p.returncode, stdout, stderr