示例#1
0
 def _write_comments(self, stream, comments):
     if self.allow_multiline_comments:
         if textprocessing.is_str_type(comments):
             stream.write("[{}]\n".format(comments))
         else:
             comments = "\n".join([str(c) for c in comments])
             stream.write("[\n{}\n]\n".format(comments))
     else:
         if textprocessing.is_str_type(comments):
             comments = comments.replace("\r\n", "\n").replace("\n\r","\n").replace("\r","\n")
             # comments = re.split(r'[\r\n]+', comments)
             comments = [c for c in re.split(r'[\r\n]+', comments) if c]
         for c in comments:
             stream.write("[{}]\n".format(c))
示例#2
0
 def _write_comments(self, stream, comments):
     if self.allow_multiline_comments:
         if textprocessing.is_str_type(comments):
             stream.write("[{}]\n".format(comments))
         else:
             comments = "\n".join([str(c) for c in comments])
             stream.write("[\n{}\n]\n".format(comments))
     else:
         if textprocessing.is_str_type(comments):
             comments = comments.replace("\r\n", "\n").replace("\n\r","\n").replace("\r","\n")
             # comments = re.split(r'[\r\n]+', comments)
             comments = [c for c in re.split(r'[\r\n]+', comments) if c]
         for c in comments:
             stream.write("[{}]\n".format(c))
示例#3
0
 def lookup_taxon_symbol(self, symbol, create_taxon_if_not_found=True):
     # if symbol in self.token_taxon_map:
     #     return self.token_taxon_map[symbol]
     # if symbol in self.label_taxon_map:
     #     return self.label_taxon_map[symbol]
     # if symbol in self.number_taxon_map:
     #     return self.number_taxon_map[symbol]
     if not textprocessing.is_str_type(symbol):
         symbol = str(symbol)
     try:
         return self.token_taxon_map[symbol]
     except KeyError:
         pass
     try:
         return self.label_taxon_map[symbol]
     except KeyError:
         pass
     if self.enable_lookup_by_taxon_number:
         try:
             return self.number_taxon_map[symbol]
         except KeyError:
             pass
     if create_taxon_if_not_found:
         return self.new_taxon(symbol)
     return None
示例#4
0
 def lookup_taxon_symbol(self, symbol, create_taxon_if_not_found=True):
     # if symbol in self.token_taxon_map:
     #     return self.token_taxon_map[symbol]
     # if symbol in self.label_taxon_map:
     #     return self.label_taxon_map[symbol]
     # if symbol in self.number_taxon_map:
     #     return self.number_taxon_map[symbol]
     if not textprocessing.is_str_type(symbol):
         symbol = str(symbol)
     try:
         return self.token_taxon_map[symbol]
     except KeyError:
         pass
     try:
         return self.label_taxon_map[symbol]
     except KeyError:
         pass
     if self.enable_lookup_by_taxon_number:
         try:
             return self.number_taxon_map[symbol]
         except KeyError:
             pass
     if create_taxon_if_not_found:
         return self.new_taxon(symbol)
     return None
示例#5
0
文件: paup.py 项目: wook2014/DendroPy
 def stage_load_trees(self,
                      tree_filepaths,
                      is_rooted=None,
                      use_tree_weights=None,
                      burnin=None,
                      mode=7):  # keep trees in memory, specify 3 to clear
     """
     Composes commands to load a set of trees into PAUP*, with the specified
     number of burnin dropped.
     """
     if textprocessing.is_str_type(tree_filepaths):
         raise Exception("expecting list of filepaths, not string")
     if is_rooted is None:
         rooting = ""
     elif is_rooted:
         rooting = "rooted=yes"
     else:
         rooting = "unrooted=yes"
     if use_tree_weights is None:
         treewts = ""
     elif use_tree_weights:
         treewts = "storetreewts=yes"
     else:
         treewts = "storetreewts=no"
     if burnin is None:
         burnin = 0
     gettree_template = "gett file= '{{tree_filepath}}' storebrlens=yes warntree=no {rooting} {treewts} from={burnin} mode={mode};".format(
         rooting=rooting, treewts=treewts, burnin=burnin + 1, mode=mode)
     for tree_filepath in tree_filepaths:
         # self.commands.append(gettree_template.format(tree_filepath=tree_filepath))
         # using relpath because of a bug in PAUP* 4.0b10 with long paths passed to gettrees
         self.commands.append(
             gettree_template.format(
                 tree_filepath=os.path.relpath(tree_filepath)))
     return self.commands
示例#6
0
 def _compose_comment_string(self, item):
     if not self.suppress_item_comments and item.comments:
         item_comments = []
         if textprocessing.is_str_type(item.comments):
             item.comments = [item.comments]
         for comment in item.comments:
             item_comments.append("[{}]".format(comment))
         item_comment_str = "".join(item_comments)
     else:
         item_comment_str = ""
     return item_comment_str
示例#7
0
 def _compose_comment_string(self, item):
     if not self.suppress_item_comments and item.comments:
         item_comments = []
         if textprocessing.is_str_type(item.comments):
             item.comments = [item.comments]
         for comment in item.comments:
             item_comments.append("[{}]".format(comment))
         item_comment_str = "".join(item_comments)
     else:
         item_comment_str = ""
     return item_comment_str
示例#8
0
文件: ncbi.py 项目: Zsailer/DendroPy
 def fetch(self, db, ids, rettype):
     """
     Raw fetch. Returns file-like object opened for reading on string
     returned by query.
     """
     if textprocessing.is_str_type(ids):
         id_list = ids
     else:
         id_list = ",".join([str(i) for i in set(ids)])
     params = {'db': db,
             'id': id_list,
             'rettype': rettype,
             'retmode': 'text'}
     query_url = Entrez.BASE_URL + "/efetch.fcgi?" + urlio.urlencode(params)
     return urlio.read_url(query_url)
示例#9
0
def efetch(db, ids, rettype, retmode="xml", email=None):
    """
    Raw fetch. Returns file-like object opened for reading on string
    returned by query.
    """
    if textprocessing.is_str_type(ids):
        id_list = ids
    else:
        id_list = ",".join([str(i) for i in set(ids)])
    params = {'db': db, 'id': id_list, 'rettype': rettype, 'retmode': retmode}
    if email is not None:
        params["email"] = email
    query_url = ENTREZ_EUTILS_BASE_URL + "/efetch.fcgi?" + urlencode(params)
    response = urlopen(query_url)
    return response
示例#10
0
文件: ncbi.py 项目: wook2014/DendroPy
 def fetch(self, db, ids, rettype):
     """
     Raw fetch. Returns file-like object opened for reading on string
     returned by query.
     """
     if textprocessing.is_str_type(ids):
         id_list = ids
     else:
         id_list = ",".join([str(i) for i in set(ids)])
     params = {'db': db,
             'id': id_list,
             'rettype': rettype,
             'retmode': 'text'}
     query_url = Entrez.BASE_URL + "/efetch.fcgi?" + urlio.urlencode(params)
     return urlio.read_url(query_url)
示例#11
0
 def _run_vcs(self, cmd):
     if textprocessing.is_str_type(cmd):
         cmd = self.vcs_app_path + " " + cmd
     else:
         cmd.insert(0, self.vcs_app_path)
     try:
         p = subprocess.Popen(cmd,
             shell=True,
             cwd=os.path.abspath(self.repo_path),
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         stdout, stderr = processio.communicate(p)
         retcode = p.returncode
     except OSError as e:
         return -999, "", str(e)
     return retcode, stdout, stderr
示例#12
0
 def _run_vcs(self, cmd):
     if textprocessing.is_str_type(cmd):
         cmd = self.vcs_app_path + " " + cmd
     else:
         cmd.insert(0, self.vcs_app_path)
     try:
         p = subprocess.Popen(cmd,
             shell=True,
             cwd=os.path.abspath(self.repo_path),
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         stdout, stderr = processio.communicate(p)
         retcode = p.returncode
     except OSError as e:
         return -999, "", str(e)
     return retcode, stdout, stderr
示例#13
0
def efetch(db, ids, rettype, retmode="xml", email=None):
    """
    Raw fetch. Returns file-like object opened for reading on string
    returned by query.
    """
    if textprocessing.is_str_type(ids):
        id_list = ids
    else:
        id_list = ",".join([str(i) for i in set(ids)])
    params = {'db': db,
            'id': id_list,
            'rettype': rettype,
            'retmode': retmode}
    if email is not None:
        params["email"] = email
    query_url = ENTREZ_EUTILS_BASE_URL + "/efetch.fcgi?" + urlencode(params)
    response = urlopen(query_url)
    return response
示例#14
0
 def iterate_over_file(self, current_file):
     if textprocessing.is_str_type(current_file):
         self._current_file = open(current_file, "r")
         self._current_file_name = current_file
     else:
         self._current_file = current_file
         try:
             self._current_file_name = self.current_file.name
         except AttributeError:
             self._current_file_name = None
     if hasattr(self._current_file, "__exit__"):
         with self._current_file:
             for item in self._yield_items_from_stream(stream=self._current_file):
                 yield item
     else:
         # StringIO does not support ``with``
         for item in self._yield_items_from_stream(stream=self._current_file):
             yield item
     self._current_file = None
示例#15
0
 def new_char_matrix(self, char_matrix_type, *args, **kwargs):
     """
     Creation and accession of new |CharacterMatrix| (of class
     ``char_matrix_type``) into ``chars`` of self."
     """
     if self.attached_taxon_namespace is not None:
         if "taxon_namespace" in kwargs and kwargs["taxon_namespace"] is not self.attached_taxon_namespace:
             raise TypeError("DataSet object is attached to TaxonNamespace %s, but 'taxon_namespace' argument specifies different TaxonNamespace %s" % (
                 repr(self.attached_taxon_namespace), repr(kwargs["taxon_namespace"])))
         else:
             kwargs["taxon_namespace"] = self.attached_taxon_namespace
     if textprocessing.is_str_type(char_matrix_type):
         char_matrix = charmatrixmodel.new_char_matrix(
                 data_type=char_matrix_type,
                 *args,
                 **kwargs)
     else:
         char_matrix = char_matrix_type(*args, **kwargs)
     return self.add_char_matrix(char_matrix)
示例#16
0
文件: paup.py 项目: Zsailer/DendroPy
 def stage_load_trees(self,
         tree_filepaths,
         is_rooted=None,
         use_tree_weights=None,
         burnin=None,
         mode=7): # keep trees in memory, specify 3 to clear
     """
     Composes commands to load a set of trees into PAUP*, with the specified
     number of burnin dropped.
     """
     if textprocessing.is_str_type(tree_filepaths):
         raise Exception("expecting list of filepaths, not string")
     if is_rooted is None:
         rooting = ""
     elif is_rooted:
         rooting = "rooted=yes"
     else:
         rooting = "unrooted=yes"
     if use_tree_weights is None:
         treewts = ""
     elif use_tree_weights:
         treewts = "storetreewts=yes"
     else:
         treewts = "storetreewts=no"
     if burnin is None:
         burnin = 0
     gettree_template = "gett file= '{{tree_filepath}}' storebrlens=yes warntree=no {rooting} {treewts} from={burnin} mode={mode};".format(
             rooting=rooting,
             treewts=treewts,
             burnin=burnin+1,
             mode=mode)
     for tree_filepath in tree_filepaths:
         # self.commands.append(gettree_template.format(tree_filepath=tree_filepath))
         # using relpath because of a bug in PAUP* 4.0b10 with long paths passed to gettrees
         self.commands.append(gettree_template.format(tree_filepath=os.path.relpath(tree_filepath)))
     return self.commands
示例#17
0
 def add_translate_token(self, token, taxon):
     if not textprocessing.is_str_type(token):
         token = str(token)
     self.token_taxon_map[token] = taxon
示例#18
0
def estimate_tree(char_matrix,
                    tree_est_criterion="likelihood",
                    num_states=6,
                    unequal_base_freqs=True,
                    gamma_rates=True,
                    prop_invar=True,
                    extra_pre_est_commands=None,
                    extra_post_est_commands=None,
                    paup_path='paup'):
    """
    Given a dataset, ``char_matrix``, estimates a tree using the given criterion.
    """
    paup_args = {
        'nst': num_states,
        'basefreq' : unequal_base_freqs and 'estimate' or 'equal',
        'rates' : gamma_rates and 'gamma' or 'equal',
        'pinvar' : prop_invar and 'estimate' or '0',
    }
    cf = tempfile.NamedTemporaryFile("w", delete=True)
    char_matrix.write_to_stream(cf, schema='nexus')
    cf.flush()
    paup_args['datafile'] = cf.name
    # output_tree_file_handle, output_tree_filepath = tempfile.mkstemp(text=True)
    output_tree_file_handle = tempfile.NamedTemporaryFile("w+", delete=True)
    output_tree_filepath = output_tree_file_handle.name
    paup_args['est_tree_file'] = output_tree_filepath
    if extra_pre_est_commands:
        if textprocessing.is_str_type(extra_pre_est_commands):
            extra_pre_est_commands = [extra_pre_est_commands]
        paup_args["pre_est_commands"] = ";\n".join(extra_pre_est_commands)
    else:
        paup_args["pre_est_commands"] = ""
    if extra_post_est_commands:
        if textprocessing.is_str_type(extra_post_est_commands):
            extra_post_est_commands = [extra_post_est_commands]
        paup_args["post_est_commands"] = ";\n".join(extra_post_est_commands)
    else:
        paup_args["post_est_commands"] = ""
    paup_template = """\
    set warnreset=no;
    exe %(datafile)s;
    """
    if tree_est_criterion.startswith("like"):
        paup_template += """\
    lset tratio=estimate rmatrix=estimate nst=%(nst)s basefreq=%(basefreq)s rates=%(rates)s shape=estimate pinvar=%(pinvar)s userbrlens=yes;
    """
    if tree_est_criterion not in ["nj", "upgma"] :
        paup_template += """\
        set crit=%s;
        """ % tree_est_criterion
    paup_template += """\
    %(pre_est_commands)s;
    """

    if tree_est_criterion in ["nj", "upgma"] :
        paup_template += tree_est_criterion + ";"
    else:
        paup_template += "hsearch;"

    paup_template += """\
    %(post_est_commands)s;
    savetrees file=%(est_tree_file)s format=nexus root=yes brlens=yes taxablk=yes maxdecimals=20;
    """
    paup_run = subprocess.Popen(['%s -n' % paup_path],
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE)
    stdout, stderr = processio.communicate(paup_run, paup_template % paup_args)
    t = dendropy.Tree.get_from_path(output_tree_filepath, "nexus", taxon_namespace=char_matrix.taxon_namespace)
    cf.close()
    output_tree_file_handle.close()
    return t
示例#19
0
    def call(
            paup_commands,
            suppress_standard_preamble=False,
            ignore_error_returncode=False,
            ignore_nonempty_stderr=False,
            strip_extraneous_prompts_from_stdout=True,
            strip_extraneous_prompts_from_stderr=True,
            cwd=None,
            env=None,
            paup_path=PAUP_PATH
            ):
        """
        Executes a sequence of commands in PAUP* and returns the results.

        Parameters
        ----------
        paup_commands : iterable of strings
            A list or some other iterable of strings representing PAUP
            commands.
        suppress_standard_preamble : bool
            If |True|, then the command sequence will not be prefaced by the
            standard preamble.
        ignore_error_returncode : bool
            If |True|, then a non-0 return code from the PAUP process will not
            result in an exception being raised.
        ignore_nonempty_stderr : bool
            If |True|, then the PAUP process writing to standard error will not
            result in an exception being raised.
        strip_extraneous_prompts_from_stdout : bool
            If |True|, then all occurrences of 'paup>' will be removed from the
            standard output contents.
        strip_extraneous_prompts_from_stderr : bool
            If |True|, then all occurrences of 'paup>' will be removed from the
            standard error contents.
        cwd : string
            Set the working directory of the PAUP* process to this directory.
        env : dictionary
            Environmental variables to set for the PAUP* process.
        paup_path : string
            Path to the PAUP* executable.

        Returns
        -------
        returncode : exit value of PAUP process.
        stdout : string
            Contents of the PAUP process standard output.
        stderr : string
            Contents of the PAUP process standard error.
        """
        if textprocessing.is_str_type(paup_commands):
            commands = [paup_commands]
        else:
            commands = list(paup_commands)
        if not suppress_standard_preamble:
            commands.insert(0, STANDARD_PREAMBLE)
        commands.append("quit")
        paup_block = ";\n".join(commands) + ";\n"
        invocation_command = [paup_path, "-n", "-u"]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        raw_stdout, raw_stderr = processio.communicate(p, paup_block)
        stdout = raw_stdout
        stderr = raw_stderr
        if strip_extraneous_prompts_from_stdout:
            # weird dev/paup error ... lots or prompts spring up
            stdout = stdout.replace("paup>", "")
        if strip_extraneous_prompts_from_stderr:
            # weird dev/paup error ... lots or prompts spring up
            stderr = stderr.replace("paup>", "")
            chk_stderr = stderr
        else:
            chk_stderr = stderr.replace("paup>", "")
        if (p.returncode != 0 and not ignore_error_returncode) or (chk_stderr != "" and not ignore_nonempty_stderr):
            raise error.ExternalServiceError(
                    service_name="PAUP*",
                    invocation_command=invocation_command,
                    service_input=paup_block,
                    returncode = p.returncode,
                    stdout=raw_stdout,
                    stderr=raw_stderr)
        return p.returncode, stdout, stderr
示例#20
0
    def call(r_commands,
            ignore_error_returncode=False,
            cwd=None,
            env=None,
            rscript_path=RSCRIPT_EXECUTABLE,
            ):
        """
        Executes a sequence of commands in R and returns the results. All the
        noise is sunk into the stderr return variable, and just the output
        comes out cleanly in the stdout return variable.

        Parameters
        ----------
        r_commands : iterable of strings
            A list or some other iterable of strings of R commands.
        ignore_error_returncode : bool
            If |True|, then a non-0 return code from the R process will not
            result in an exception being raised.
        cwd : string
            Set the working directory of the R process to this directory.
        env : dictionary
            Environmental variables to set for the R process.
        rscript_path : string
            Path to the Rscript executable.

        Returns
        -------
        returncode : exit value of the R process
        stdout : string
            Contents of the R process standard output.
        stderr : string
            Contents of the R process standard error.

        Examples
        --------

        Build up a script (``s``) to calculate a range of values, print them
        to the standard output, and then post-process this to extract the
        values::

            import itertools
            from dendropy.interop import rstats

            bb = [0.01, 0.05, 0.10, 0.50, 1.0]
            cc = [0.01, 0.05, 0.10, 0.50, 1.0]
            ee = [0.0, 0.1, 0.2]

            # store commands of script as a list
            # to be passed to the ``call()``
            s = []

            # set options, load required libraries, etc.
            s.append("options(digits=22)")
            s.append("library(PBD)")

            # build up list of commands in script
            params = []
            for b, c, e in itertools.product(bb, cc, ee):
                s.append("print(pbd_durspec_mean(pars=c({},{},{})))".format(b, c, e))

            # execute script
            returncode, stdout, stderr  = rstats.call(s)

            # peek at the results
            print(stdout)

            # [1] 69.31472
            # [1] 9.853723
            # [1] 4.981369
            # [1] 0.9950331
            # ...

            # post-process the stdout to extract values
            results = [float(x.split(" ")[1]) for x in stdout.split("\n") if x]

        Notes
        -----

        Note that newlines ('\n') and other special characters will be
        converted before being passed to the R interpreter, so need to
        be escaped or entered as raw string expressions.

        That is, instead of, e.g.::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\n')",
            ])

        use this::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\\n')",
            ])

        or::

            returncode, stdout, stderr = RService.call([
                r"cat('hello, world\n')",
            ])

        """
        if not textprocessing.is_str_type(r_commands):
            r_commands = "\n".join(r_commands)
        r_commands += "\n"
        invocation_command = [RSCRIPT_EXECUTABLE, rsubprocess_pipe_path]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        stdout, stderr = processio.communicate(p, r_commands)
        if (p.returncode != 0 and not ignore_error_returncode):
            raise error.ExternalServiceError(
                    service_name="Rscript",
                    invocation_command=invocation_command,
                    service_input=r_commands,
                    returncode = p.returncode,
                    stdout=stdout,
                    stderr=stderr)
        return p.returncode, stdout, stderr
示例#21
0
    def call(r_commands,
            ignore_error_returncode=False,
            cwd=None,
            env=None,
            rscript_path=RSCRIPT_EXECUTABLE,
            ):
        """
        Executes a sequence of commands in R and returns the results. All the
        noise is sunk into the stderr return variable, and just the output
        comes out cleanly in the stdout return variable.

        Parameters
        ----------
        r_commands : iterable of strings
            A list or some other iterable of strings of R commands.
        ignore_error_returncode : bool
            If |True|, then a non-0 return code from the R process will not
            result in an exception being raised.
        cwd : string
            Set the working directory of the R process to this directory.
        env : dictionary
            Environmental variables to set for the R process.
        rscript_path : string
            Path to the Rscript executable.

        Returns
        -------
        returncode : exit value of the R process
        stdout : string
            Contents of the R process standard output.
        stderr : string
            Contents of the R process standard error.

        Examples
        --------

        Build up a script (``s``) to calculate a range of values, print them
        to the standard output, and then post-process this to extract the
        values::

            import itertools
            from dendropy.interop import rstats

            bb = [0.01, 0.05, 0.10, 0.50, 1.0]
            cc = [0.01, 0.05, 0.10, 0.50, 1.0]
            ee = [0.0, 0.1, 0.2]

            # store commands of script as a list
            # to be passed to the ``call()``
            s = []

            # set options, load required libraries, etc.
            s.append("options(digits=22)")
            s.append("library(PBD)")

            # build up list of commands in script
            params = []
            for b, c, e in itertools.product(bb, cc, ee):
                s.append("print(pbd_durspec_mean(pars=c({},{},{})))".format(b, c, e))

            # execute script
            returncode, stdout, stderr  = rstats.call(s)

            # peek at the results
            print(stdout)

            # [1] 69.31472
            # [1] 9.853723
            # [1] 4.981369
            # [1] 0.9950331
            # ...

            # post-process the stdout to extract values
            results = [float(x.split(" ")[1]) for x in stdout.split("\n") if x]

        Notes
        -----

        Note that newlines ('\n') and other special characters will be
        converted before being passed to the R interpreter, so need to
        be escaped or entered as raw string expressions.

        That is, instead of, e.g.::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\n')",
            ])

        use this::

            returncode, stdout, stderr = RService.call([
                "cat('hello, world\\n')",
            ])

        or::

            returncode, stdout, stderr = RService.call([
                r"cat('hello, world\n')",
            ])

        """
        if not textprocessing.is_str_type(r_commands):
            r_commands = "\n".join(r_commands)
        r_commands += "\n"
        invocation_command = [RSCRIPT_EXECUTABLE, rsubprocess_pipe_path]
        p = subprocess.Popen(
                invocation_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=cwd,
                env=env,
                )
        stdout, stderr = processio.communicate(p, r_commands)
        if (p.returncode != 0 and not ignore_error_returncode):
            raise error.ExternalServiceError(
                    service_name="Rscript",
                    invocation_command=invocation_command,
                    service_input=r_commands,
                    returncode = p.returncode,
                    stdout=stdout,
                    stderr=stderr)
        return p.returncode, stdout, stderr
示例#22
0
 def add_translate_token(self, token, taxon):
     if not textprocessing.is_str_type(token):
         token = str(token)
     self.token_taxon_map[token] = taxon