Пример #1
0
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "http://www.rcsb.org/pdb/rest"
        self.services = REST(name="PDB", url=url, verbose=verbose, cache=cache)
Пример #2
0
    def __init__(self, verbose=False, cache=False):
        """**Constructor**

        """
        self.services = REST(name="cog",
                             url=COG._url,
                             verbose=verbose,
                             cache=cache)
Пример #3
0
 def __init__(self, verbose=False):
     url = "http://www.ebi.ac.uk/Tools/services/rest/muscle"
     self.services = REST(name='MUSCLE', url=url, verbose=verbose)
     self._parameters = None
     self._parametersDetails = {}
     self._headers = {
         "User-Agent": self.services.getUserAgent(),
         "accept": "application/json"
     }
Пример #4
0
    def __init__(self, verbose=True):
        """.. rubric:: Constructor

        :param bool verbose:

        """
        url = "https://www.ebi.ac.uk/Tools/services/rest/emboss_seqret"
        self.services = REST(name="seqret", url=url, verbose=verbose)
        self._parameters = None
Пример #5
0
    def __init__(self, verbose=False, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        self.services = REST(name="ENA",
                             url=ENA.url,
                             verbose=verbose,
                             cache=cache)
        self.services.TIMEOUT = 100
Пример #6
0
    def __init__(self, verbose=False):
        """.. rubric:: NCBIblast constructor

        :param bool verbose: prints informative messages

        """
        url = "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast"
        self.services = REST(name="NCBIblast", url=url, verbose=verbose)
        self._parameters = None
        self._parametersDetails = {}
        self.checkInterval = 2
Пример #7
0
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        self.services = REST(name="PDB",
                             verbose=verbose,
                             cache=cache,
                             url_defined_later=True)
        self.services.url = PDB._url
Пример #8
0
    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Constructor

        :param bool verbose:

        """
        self.services = REST(name="BioDBNet",
                             url=BioDBNet._url,
                             verbose=verbose,
                             cache=cache)
        self._valid_inputs = self.getInputs()
Пример #9
0
    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages

        """
        self.easyXMLConversion = False
        self._default_extension = "json"

        self.services = REST(name='PathwayCommons', url=PathwayCommons._url,
            verbose=verbose, cache=cache)
Пример #10
0
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)
Пример #11
0
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://mygene.info/v3"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)
Пример #12
0
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages

        """
        self.services = REST(name="ArrayExpress",
                             url="http://www.ebi.ac.uk/arrayexpress",
                             cache=cache,
                             verbose=verbose)

        self.version = "v2"
Пример #13
0
    def __init__(self, verbose=True, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        #super(Panther, self).__init__(name="Panther", url=Panther._url,
        #       verbose=verbose, cache=cache)
        self.services = REST(name="Panther",
                             url=Panther._url,
                             verbose=verbose,
                             cache=cache)

        self._allPathwaysURL = "http://www.pantherdb.org/pathway/pathwayList.jsp"
Пример #14
0
    def __init__(self, verbose=True, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        self.services = REST(name="BioCarta",
                             url=BioCarta._url,
                             cache=cache,
                             verbose=verbose)
        self.fname = "biocarta_pathways.txt"

        self._organism = None
        self._organism_prefix = None
        self._pathways = None
Пример #15
0
    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Rhea constructor

        :param bool verbose: True by default

        ::

            >>> from bioservices import Rhea
            >>> r = Rhea()
        """
        self.services = REST(name="Rhea",
                             url=Rhea._url,
                             verbose=verbose,
                             cache=cache)
Пример #16
0
    def __init__(self, version="1.0", verbose=True, cache=False):
        """.. rubric:: Rhea constructor

        :param str version: the current version of the interface (1.0)
        :param bool verbose: True by default

        ::

            >>> from bioservices import Rhea
            >>> r = Rhea()
        """
        self.services = REST(name="Rhea",
                             url=Rhea._url,
                             verbose=verbose,
                             cache=cache)

        self.version = version
        self.format_entry = ["cmlreact", "biopax2", "rxn"]
Пример #17
0
class BioCarta():
    """Interface to `BioCarta <http://www.biocarta.com>`_ pages

    This is not a REST interface actually but rather a parser to some of the
    HTML pages related to pathways.

    One can retrieve the pathways names and their list of proteins.

        >>> from bioservics import *
        >>> b = BioCarta()
        >>> pathways = b.get_pathway_names()
        >>> proteins = b.get_pathway_protein_names(pathways[0])


    .. warning:: biocarta pathways layout can be accesses from PID

    """
    _url = "http://cgap.nci.nih.gov/Pathways/BioCarta_Pathways"

    _organism_prefixes = {'H**o sapiens': 'h', 'Mus musculus': 'm'}
    organisms = set(_organism_prefixes.keys())

    _all_pathways = None
    _pathway_categories = None
    _all_pathways_url = "http://cgap.nci.nih.gov/Pathways/BioCarta_Pathways"

    def __init__(self, verbose=True, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        self.services = REST(name="BioCarta",
                             url=BioCarta._url,
                             cache=cache,
                             verbose=verbose)
        self.fname = "biocarta_pathways.txt"

        self._organism = None
        self._organism_prefix = None
        self._pathways = None

    # set the default organism used by pathways retrieval
    def _get_organism(self):
        return self._organism

    def _set_organism(self, organism):
        organism = organism[:1].upper() + organism[1:].lower()
        if organism == self._organism: return
        if organism not in BioCarta.organisms:
            raise ValueError(
                "Invalid organism. Check the list in :attr:`organisms` attribute"
            )

        self._organism = organism
        self._organism_prefix = BioCarta._organism_prefixes[organism]
        self._pathways = None

    organism = property(_get_organism,
                        _set_organism,
                        doc="returns the current default organism")

    def _get_pathway_categories(self):
        if self._pathway_categories is None:
            self._pathway_categories = self.services.http_get_ou_post()
        return self._pathway_categories

    pathway_categories = property(_get_pathway_categories)

    def _get_all_pathways(self):
        """returns pathways from biocarta

        human and mouse organisms are available but only those corresponding
        to the organism defined in :attr:`organism` are returned.
        """
        if self.organism is None:
            raise ValueError("Please set the organism attribute to one of %s" %
                             self._organism_prefixes.keys())

        if BioCarta._all_pathways is None:
            BioCarta._all_pathways = readXML(self._all_pathways_url)

        if self._pathways is None:

            url_pattern = re.compile("http://cgap.nci.nih.gov/Pathways/BioCarta/%s_(.+)[Pp]athway" \
                % (self._organism_prefix))
            is_pathway_url = lambda tag: tag.name == "a" and not tag.has_attr(
                "class")
            self._pathways = BioCarta._all_pathways.findAll(is_pathway_url,
                                                            href=url_pattern)

            # Now let us select only the name.
            self._pathways = sorted([
                entry.attrs['href'].rsplit("/", 1)[1]
                for entry in self._pathways
            ])
        return self._pathways

    all_pathways = property(_get_all_pathways)

    def get_pathway_protein_names(self, pathway):
        """returns list of genes for the corresponding pathway

        This function scans an HTML page. We have not found another way to 
        get the gene list in a more reobust way. This function was tested on 
        one pathway. Please use with caution.


        """
        self.logging.info("Fetching the pathway")
        # first identify gene from GeneInfo tag
        # this is not XML but HTML
        url = "http://cgap.nci.nih.gov/Pathways/BioCarta/%s" % pathway
        html_doc = urlopen(url).read()
        soup = BeautifulSoup(html_doc, 'html.parser')
        links = soup.find_all('area')
        links = [link for link in links if 'GeneInfo' in link.get('href')]

        links = set([link.attrs['href'] for link in links])

        self.logging.info("Scanning information about %s genes" % len(links))
        # open each page and get info
        genes = {}
        for link in links:
            html_doc = urlopen(link).read()
            soup = BeautifulSoup(html_doc, 'html.parser')

            table_gene_info = soup.findAll("table")[1]

            gene_name = link.rsplit("=", 1)[1]
            self.logging.info(" - " + gene_name)

            genes[gene_name] = {}
            self.tt = table_gene_info
            for row in table_gene_info.find_all('tr'):
                entry = row.find_all('td')
                try:
                    key = entry[0].text.strip()
                except:
                    continue
                try:
                    value = entry[1].text.strip()
                except:
                    continue
                if "[Text]" in key:
                    continue
                genes[gene_name][key] = value

        return genes
Пример #18
0
    def __init__(self, verbose=False, cache=False):

        # http://bigg.ucsd.edu/data_access
        self.services = REST(name="BiGG",
            url=BiGG._url, cache=cache, requests_per_sec=10,
            verbose=verbose)
Пример #19
0
class BiGG():
    """
    Interface to the `BiGG Models <http://bigg.ucsd.edu/>` API Service.

    ::

        >>> from bioservices import BiGG
        >>> bigg = BiGG()
        >>> bigg.search("e coli", "models")
        [{'bigg_id': 'e_coli_core',
          'gene_count': 137,
          'reaction_count': 95,
          'organism': 'Escherichia coli str. K-12 substr. MG1655',
          'metabolite_count': 72},
          ...
        ]
    """

    _base_url = "http://bigg.ucsd.edu"
    _api_version = "v2"
    _url = "%s/api/%s" % (_base_url, _api_version)

    def __init__(self, verbose=False, cache=False):

        # http://bigg.ucsd.edu/data_access
        self.services = REST(name="BiGG",
            url=BiGG._url, cache=cache, requests_per_sec=10,
            verbose=verbose)

    def __len__(self):
        return len(self.models)

    @property
    def version(self):
        return self.services.http_get("database_version")

    def _http_get_results(self, *args, **kwargs):
        response = self.services.http_get(*args, **kwargs)
        return response["results"]

    @property
    def models(self):
        return self._http_get_results("models")

    def _get_model_resource(self, type_, model_id, ids=None):
        if type_ not in _ACCEPTABLE_MODEL_RESOURCE_TYPES:
            raise TypeError("Unknown model resource type %s. Acceptable types are %s"
                % (type_, _ACCEPTABLE_MODEL_RESOURCE_TYPES))

        query = "models/%s/%s" % (model_id, type_)

        if ids is None:
            return self._http_get_results(query)

        ids = sequencify(ids)
        queries = [("%s/%s" % (query, id_)) for id_ in ids]

        response = self.services.http_get(queries)
        return squash(response)

    def metabolites(self, model_id=None, ids=None):
        if model_id is None:
            return self._http_get_results("universal/metabolites")

        return self._get_model_resource("metabolites", model_id=model_id, ids=ids)

    def reactions(self, model_id=None, ids=None):
        if model_id is None:
            return self._http_get_results("universal/reactions")

        return self._get_model_resource("reactions", model_id=model_id, ids=ids)

    def genes(self, model_id, ids=None):
        return self._get_model_resource("genes", model_id=model_id, ids=ids)

    def search(self, query, type_):
        if type_ not in _ACCEPTABLE_SEARCH_TYPES:
            raise TypeError("Unknown type %s. Acceptable types are %s"
                % (type_, _ACCEPTABLE_SEARCH_TYPES))

        params = { "query": query, "search_type": type_ }
        return self._http_get_results("search", params=params)

    def download(self, model_id, format_="json", gzip=True, target=None):
        if format_ not in _ACCEPTABLE_MODEL_DOWNLOAD_FORMATS:
            raise TypeError("Unknown format %s. Accepted types are %s."
                % (format_, _ACCEPTABLE_MODEL_DOWNLOAD_FORMATS))

        path = "%s.%s" % (model_id, format_)

        if gzip:
            path += ".gz"

        if not target:
            target = path

        url = self.services._build_url("%s/static/models/%s" %
            (BiGG._base_url, path))

        response = self.services.session.get(url, stream=True)

        if response.ok:
            with open(target, "wb") as f:
                for content in response.iter_content():
                    f.write(content)
        else:
            response.raise_for_status()
Пример #20
0
class PathwayCommons():
    """Interface to the `PathwayCommons <http://www.pathwaycommons.org/about>`_ service


    >>> from bioservices import *
    >>> pc2 = PathwayCommons(verbose=False)
    >>> res = pc2.get("http://identifiers.org/uniprot/Q06609")



    .. todo:: traverse() method not implemented. 
    """

    #: valid formats
    _valid_format = ["GSEA", "SBGN", "BIOPAX", "SIF", "TXT", "JSONLD"]
    _valid_directions = ["BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED"]
    _valid_patterns = [
            "CONTROLS_STATE_CHANGE_OF", "CONTROLS_PHOSPHORYLATION_OF", 
            "CONTROLS_TRANSPORT_OF", "CONTROLS_EXPRESSION_OF",
            "IN_COMPLEX_WITH", "INTERACTS_WITH", "CATALYSIS_PRECEDES", "NEIGHBOR_OF",
            "CONSUMPTION_CONTROLLED_BY", "CONTROLS_TRANSPORT_OF_CHEMICAL",
            "CONTROLS_PRODUCTION_OF",
            "CHEMICAL_AFFECTS", "REACTS_WITH", "USED_TO_PRODUCE"]
    _url = "https://www.pathwaycommons.org"
    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages

        """
        self.easyXMLConversion = False
        self._default_extension = "json"

        self.services = REST(name='PathwayCommons', url=PathwayCommons._url,
            verbose=verbose, cache=cache)

    # just a get/set to the default extension
    def _set_default_ext(self, ext):
        self.services.devtools.check_param_in_list(ext, ["json", "xml"])
        self._default_extension = ext
    def _get_default_ext(self):
        return self._default_extension
    default_extension = property(_get_default_ext, _set_default_ext,
             doc="set extension of the requests (default is json). Can be 'json' or 'xml'")

    def search(self, q, page=0, datasource=None, organism=None, type=None):
        """Text search in PathwayCommons using Lucene query syntax

        Some of the parameters are BioPAX properties, others are composite
        relationships.

        All index fields are (case-sensitive): comment, ecnumber,
        keyword, name, pathway, term, xrefdb, xrefid, dataSource, and organism.

        The pathway field maps to all participants of pathways that contain
        the keyword(s) in any of its text fields.

        Finally, keyword is a transitive aggregate field that includes all
        searchable keywords of that element and its child elements.

        All searches can also be filtered by data source and organism.

        It is also possible to restrict the domain class using the
        'type' parameter.

        This query can be used standalone or to retrieve starting points
        for graph searches.


        :param str q: requires a keyword , name, external identifier, or a
            Lucene query string.
        :param int page: (N>=0, default is 0), search result page number.
        :param str datasource: filter by data source (use names or URIs of
            pathway data sources or of any existing Provenance object). If
            multiple data source values are specified, a union of hits from
            specified sources is returned. datasource=[reactome,pid] returns
            hits associated with Reactome or PID.
        :param str organism: The organism can be specified either by
            official name, e.g. "h**o sapiens" or by NCBI taxonomy id,
            e.g. "9606". Similar to data sources, if multiple organisms
            are declared a union of all hits from specified organisms
            is returned. For example organism=[9606, 10016] returns results
            for both human and mice.
        :param str type: BioPAX class filter. (e.g., 'pathway', 'proteinreference')


        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(vverbose=False)
            >>> pc2.search("Q06609")
            >>> pc2.search("brca2", type="proteinreference",
                    organism="h**o sapiens",  datasource="pid")
            >>> pc2.search("name:'col5a1'", type="proteinreference", organism=9606)
            >>> pc2.search("a*", page=3)

        Find the FGFR2 keyword::

            pc2.search("FGFR2")

        Find pathways by FGFR2 keyword in any index field.::

            pc2.search("FGFR2", type="pathway")

        Finds control interactions that contain the word binding but not
        transcription in their indexed fields::

            pc2.search("binding NOT transcription", type="control")

        Find all interactions that directly or indirectly participate
        in a pathway that has a keyword match for "immune" (Note the star after
        immune):

            pc.search("pathway:immune*", type="conversion")


        Find all Reactome pathways::

            pc.search("*", type="pathway", datasource="reactome")

        """
        if self.default_extension == "xml":
            url = "pc2/search.xml?q=%s"  % q
        elif self.default_extension == "json":
            url = "pc2/search.json?q=%s"  % q

        params = {}
        if page>=0:
            params['page'] = page
        else:
            self.services.logging.warning("page should be >=0")

        if datasource:
            params['datasource'] = datasource

        if type:
            params['type'] = type

        if organism:
            params['organism'] = organism

        res = self.services.http_get(url, frmt=self.default_extension,
                params=params)

        #if self.default_extension == "json":
        #    res = json.loads(res)
        if self.default_extension == "xml":
            res = self.easyXML(res)

        return res

    def get(self, uri, frmt="BIOPAX"):
        """Retrieves full pathway information for a set of elements

        elements can be for example pathway, interaction or physical
        entity given the RDF IDs. Get commands only
        retrieve the BioPAX elements that are directly mapped to the ID.
        Use the :meth:`traverse` query to traverse BioPAX graph and
        obtain child/owner elements.

        :param str uri: valid/existing BioPAX element's URI (RDF ID; for
            utility classes that were "normalized", such as entity refereneces
            and controlled vocabularies, it is usually a Identifiers.org URL.
            Multiple IDs can be provided using list
            uri=[http://identifiers.org/uniprot/Q06609,
            http://identifiers.org/uniprot/Q549Z0']
            See also about MIRIAM and Identifiers.org.
        :param str format: output format (values)

        :return: a complete BioPAX representation for the record
            pointed to by the given URI is returned. Other output
            formats are produced by converting the BioPAX record on
            demand and can be specified by the optional format
            parameter. Please be advised that with some output formats
            it might return "no result found" error if the conversion is
            not applicable for the BioPAX result. For example,
            BINARY_SIF output usually works if there are some
            interactions, complexes, or pathways in the retrieved set
            and not only physical entities.


        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(verbose=False)
            >>> res = pc2.get("col5a1")
            >>> res = pc2.get("http://identifiers.org/uniprot/Q06609")


        """



        self.services.devtools.check_param_in_list(frmt, self._valid_format)

        # validates the URIs
        if isinstance(uri, str):
            url = "pc2/get?uri=" +uri
        elif instance(uri, list):
            url = "pc2/get?uri=" +uri[0]
            if len(uri)>1:
                for u in uri[1:]:
                    url += "&uri=" + u

        # ?uri=http://identifiers.org/uniprot/Q06609
        # http://www.pathwaycommons.org/pc2/get?uri=COL5A1

        if frmt != "BIOPAX":
            url += "&format=%s" % frmt

        if frmt.lower() in ["biopax", "sbgn"]: 
            frmt = "xml"
        else:
            frmt = "txt"
        res = self.services.http_get(url, frmt=frmt)

        return res

    def top_pathways(self, query="*", datasource=None, organism=None):
        """This command returns all *top* pathways

        Pathways can be top or pathways that are neither
        'controlled' nor 'pathwayComponent' of another process.

        :param query: a keyword, name, external identifier or lucene query
            string like in 'search'. Default is "*"
        :param str datasource: filter by data source (same as search)
        :param str organism: organism filter. 9606 for human.

        :return: dictionary with information about top pathways. Check the
            "searchHit" key for information about "dataSource" for instance


        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(verbose=False)
            >>> res = pc2.top_pathways()


https://www.pathwaycommons.org/pc2/top_pathways?q=TP53

        """
        if self.default_extension == "json":
            url = "pc2/top_pathways.json"
        else:
            url = "pc2/top_pathways"

        params = {}
        if datasource:
            params['datasource'] = datasource
        if organism:
            params['organism'] = organism
        params['q'] = query


        res = self.services.http_get(url, frmt=self.default_extension,
                params=params)

        if self.default_extension == "xml":
            res = self.easyXML(res)
        return res

    def graph(self, kind, source, target=None, direction=None, limit=1,
            frmt=None, datasource=None, organism=None):
        """Finds connections and neighborhoods of elements

        Connections can be for example the shortest path between two proteins
        or the neighborhood for a particular protein state or all states.

        Graph searches take detailed BioPAX semantics such as generics or
        nested complexes into account and traverse the graph accordingly.
        The starting points can be either physical entites or entity references.

        In the case of the latter the graph search starts from ALL
        the physical entities that belong to that particular entity references,
        i.e.  all of its states. Note that we integrate BioPAX data from
        multiple databases  based on our proteins and small molecules data
        warehouse and consistently normalize UnificationXref, EntityReference,
        Provenance, BioSource, and ControlledVocabulary objects when we are
        absolutely sure that two objects of the same type are equivalent. We,
        however, do not merge physical entities and reactions from different
        sources as matching and aligning pathways at that level is still an
        open research problem. As a result, graph searches can return
        several similar but disconnected sub-networks that correspond to
        the pathway data from different providers (though some physical
        entities often refer to the same small molecule or protein reference
        or controlled vocabulary).


        :param str kind: graph query
        :param str source:  source object's URI/ID. Multiple source URIs/IDs
            must be encoded as list of valid URI
            **source=['http://identifiers.org/uniprot/Q06609',
            'http://identifiers.org/uniprot/Q549Z0']**.
        :param str target: required for PATHSFROMTO graph query.  target
            URI/ID. Multiple target URIs must be encoded as list (see source
            parameter).
        :param str direction: graph search  direction in [BOTHSTREAM,
            DOWNSTREAM, UPSTREAM] see :attr:`_valid_directions` attribute.
        :param int limit: graph query search distance limit (default = 1).
        :param str format: output format. see :attr:`_valid-format`
        :param str datasource: datasource filter (same as for 'search').
        :param str organism: organism filter (same as for 'search').


        :return:  By default, graph queries return a complete BioPAX
            representation of the subnetwork matched by the algorithm.
            Other output formats are available as specified by the optional
            format parameter. Please be advised that some output format
            choices might cause "no result found" error if the conversion
            is not applicable for the BioPAX result (e.g., BINARY_SIF output
            fails if there are no interactions, complexes, nor pathways
            in the retrieved set).

        .. doctest::

            >>> from bioservices import PathwayCommons
            >>> pc2 = PathwayCommons(verbose=False)
            >>> res = pc2.graph(source="http://identifiers.org/uniprot/P20908",
                    kind="neighborhood", format="EXTENDED_BINARY_SIF")



        """
        url = "pc2/graph"
        params = {}
        params['source'] = source
        params['kind'] = kind
        params['limit'] = limit

        params = {}
        if target:
            params['target'] = target
        if frmt:
            params['format'] = frmt
        if datasource:
            params['datasource'] = datasource
        if organism:
            params['organism'] = organism

        res = self.services.http_get(url, frmt="txt", params=params)
        return res

    def traverse(self, uri, path):
        """Provides XPath-like access to the PC.


        The format of the path query is in the form::

            [InitialClass]/[property1]:[classRestriction(optional)]/[property2]... A "*"

        sign after the property instructs path accessor to transitively traverse
        that property. For example, the following path accessor will traverse
        through all physical entity components within a complex::

            "Complex/component*/entityReference/xref:UnificationXref"

        The following will list display names of all participants of
        interactions, which are components (pathwayComponent) of a pathway
        (note: pathwayOrder property, where same or other interactions can be
        reached, is not considered here)::

            "Pathway/pathwayComponent:Interaction/participant*/displayName"

        The optional parameter classRestriction allows to restrict/filter the
        returned property values to a certain subclass of the range of that
        property. In the first example above, this is used to get only the
        Unification Xrefs. Path accessors can use all the official BioPAX
        properties as well as additional derived classes and parameters in
        paxtools such as inverse parameters and interfaces that represent
        anonymous union classes in OWL. (See Paxtools documentation for more
        details).

        :param str uri: a biopax element URI - specified similar to the 'GET'
            command. multiple IDs are allowed as a list of strings.
        :param str path: a BioPAX propery path in the form of
                property1[:type1]/property2[:type2]; see above, inverse
                properties, Paxtools,
                org.biopax.paxtools.controller.PathAccessor.

        .. seealso:: `properties
            <http://www.pathwaycommons.org/pc2/#biopax_properties>`_

        :return:  XML result that follows the Search Response XML Schema
            (TraverseResponse type; pagination is disabled: returns all values at
            once)

        ::


            from bioservices import PathwayCommons
            pc2 = PathwayCommons(verbose=False)
            res = pc2.traverse(uri=['http://identifiers.org/uniprot/P38398','http://identifiers.org/uniprot/Q06609'], path="ProteinReference/organism")
            res = pc2.traverse(uri="http://identifiers.org/uniprot/Q06609",
                path="ProteinReference/entityReferenceOf:Protein/name")
            res = pc2.traverse("http://identifiers.org/uniprot/P38398",
                path="ProteinReference/entityReferenceOf:Protein")
            res = pc2.traverse(uri=["http://identifiers.org/uniprot/P38398",
                "http://identifiers.org/taxonomy/9606"], path="Named/name")


        """
        url =  "pc2/traverse?"

        if isinstance(uri, str):
            url += "?uri=" + uri
        elif isinstance(uri, list):
            url += "?uri=" + uri[0]
            for u in uri[1:]:
                url += "&uri=" + u

        url += "&path=" + path

        res = self.services.http_get(url, frmt="json")
        return res

    def get_sifgraph_neighborhood(self, source, limit=1, direction="BOTHSTREAM", pattern=None):
        """finds the neighborhood sub-network in the Pathway Commons Simple Interaction 
        Format (extented SIF) graph (see http://www.pathwaycommons.org/pc2/formats#sif)


        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param str direction: Graph traversal direction. Use UNDIRECTED if you want 
            to see interacts-with relationships too.
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.

        ::

            res = pc.get_sifgraph_neighborhood('BRD4')

        """
        self.services.devtools.check_param_in_list(direction, self._valid_directions)
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)

        params = {  "source": source,
                    "limit": limit,
                    "direction": direction}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/neighborhood", params=params,
            headers=self.services.get_headers(content="text"))

        return res.content


    def get_sifgraph_common_stream(self, source, limit=1, direction="DOWNSTREAM", pattern=None):
        """finds the common stream for them; extracts a sub-network from the loaded 
        Pathway Commons SIF model.

        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param str direction: Graph traversal direction. Use UNDIRECTED if you want 
            to see interacts-with relationships too.
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.

        ::

            res = pc.get_sifgraph_common_stream(['BRD4', 'MYC'])
        """
        self.services.devtools.check_param_in_list(direction, self._valid_directions)
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)

        params = {  "source": source,
                    "limit": limit,
                    "direction": direction}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/commonstream", params=params,
            headers=self.services.get_headers(content="text"))
        try:
            return res.content
        except:
            # if no match, returns code 406 and ""
            return None


    def get_sifgraph_pathsbetween(self, source, limit=1, directed=False, pattern=None):
        """finds the paths between them; extracts a sub-network from the Pathway Commons SIF graph.

        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param bool directed: Directionality: 'true' is for DOWNSTREAM/UPSTREAM, 'false' - UNDIRECTED
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.
        """
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)

        params = {  "source": source,
                    "limit": limit,
                    "directed": directed}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/pathsbetween", params=params,
            headers=self.services.get_headers(content="text"))

        return res.content


    def get_sifgraph_pathsfromto(self, source, target, limit=1, pattern=None):
        """finds the paths between them; extracts a sub-network from the Pathway Commons SIF graph.

        :param source: set of gene identifiers (HGNC symbol). Can be a list of
            identifiers or just one string(if only one identifier)
        param target: A target set of gene identifiers.
        :param int limit: Graph traversal depth. Limit > 1 value can result
            in very large data or error.
        :param str pattern: Filter by binary relationship (SIF edge) type(s).
            one of "BOTHSTREAM", "UPSTREAM", "DOWNSTREAM", "UNDIRECTED".

        returns: the graph in SIF format. The output must be stripped and
            returns one line per relation. In each line, items are separated by
            a tabulation. You can save the text with .sif extensions and it
            should be ready to use e.g. in cytoscape viewer.
        """
        if pattern:
            self.services.devtools.check_param_in_list(pattern, self._valid_patterns)
        assert limit>=1

        if isinstance(source, str):
            source = [source]
        assert isinstance(source, list)
        source = ",".join(source)
        if isinstance(target, str):
            target = [target]
        assert isinstance(target, list)
        target = ",".join(target)

        params = {  "source": source,
                    "target": target,
                    "limit": limit}

        if pattern:
            params['pattern'] = pattern

        res = self.services.http_get("sifgraph/v1/pathsfromto", params=params,
            headers=self.services.get_headers(content="text"))

        return res.content
Пример #21
0
class PDBe():
    """Interface to part of the `PDBe <http://www.ebi.ac.uk/pdbe>`_ service

    .. doctest::

        >>> from bioservices import PDBe
        >>> s = PDBe()
        >>> res = s.get_file("1FBV", "pdb")

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def _check_id(self, pdbid):
        if isinstance(pdbid, list):
            pdbid = ",".join(pdbid)

        if isinstance(pdbid, str):
            for item in pdbid.split(","):
                assert len(item) == 4, "a 4-character PDB id code is required"
        else:
            raise TypeError(
                "pdb id must be either a 4-character pdb id, a list of valid PDB ids, or a string made of pdb ids, separated by commas"
            )

        return pdbid

    def _return(self, res):
        if res == 404:
            return {}
        return res

    def get_summary(self, query):
        """Returns summary of a PDB entry

        This can be title of the entry, list of depositors, date of deposition,
        date of release, date of latest revision, experimental method, list
        of related entries in case split entries, etc.

        :param query: a 4-character PDB id code

        ::

            p.get_summary('1cbs')
            p.get_summary('1cbs,2kv8')
            p.get_summary(['1cbs', '2kv8'])

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("summary/{}".format(query))
        else:
            res = self.services.http_post("summary", data=query, frmt="json")
        return self._return(res)

    def get_molecules(self, query):
        """Return details of molecules  (or entities in mmcif-speak) modelled in the entry

        This can be entity id, description, type, polymer-type (if applicable), number
        of copies in the entry, sample preparation method, source organism(s)
        (if applicable), etc.

        :param query: a 4-character PDB id code

        ::

            p.get_molecules('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("molecules/{}".format(query))
        else:
            res = self.services.http_post("molecules", data=query, frmt="json")
        return self._return(res)

    def get_related_publications(self, query):
        """Return publications obtained from both EuroPMC and UniProt. T


        These are articles which cite the primary citation of the entry, or
        open-access articles which mention the entry id without explicitly citing the
        primary citation of an entry.


        :param query: a 4-character PDB id code

        ::

            p.get_related_publications('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "related_publications/{}".format(query))
        else:
            res = self.services.http_post("related_publications/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_experiment(self, query):
        """Provides details of experiment(s) carried out in determining the structure of the entry.

        Each experiment is described in a separate dictionary.
        For X-ray diffraction, the description consists of resolution, spacegroup, cell
        dimensions, R and Rfree, refinement program, etc.
        For NMR, details of spectrometer, sample, spectra, refinement, etc. are
        included.
        For EM, details of specimen, imaging, acquisition, reconstruction, fitting etc.
        are included.

        :param query: a 4-character PDB id code

        ::

            p.get_experiment('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("experiment/{}".format(query))
        else:
            res = self.services.http_post("experiment/{}",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_nmr_resources(self, query):
        """This call provides URLs of available additional resources for NMR
        entries. E.g., mapping between structure (PDB) and chemical shift (BMRB)
        entries.
        :param query: a 4-character PDB id code

        ::

            p.get_nmr_resources('1cbs')

        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("nmr_resources/{}".format(query))
        else:
            res = self.services.http_post("nmr_resources/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_ligand_monomers(self, query):
        """Provides a a list of modelled instances of ligands,

        ligands i.e. 'bound' molecules that are not waters.

        :param query: a 4-character PDB id code

        ::

            p.get_ligand_monomers('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("ligand_monomers/{}".format(query))
        else:
            res = self.services.http_post("ligand_monomers",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_modified_residues(self, query):
        """Provides a list of modelled instances of modified amino acids or
        nucleotides in protein, DNA or RNA chains.


        :param query: a 4-character PDB id code

        ::

            p.get_modified_residues('4v5j')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("modified_AA_or_NA/{}".format(query))
        else:
            res = self.services.http_post("modified_AA_or_NA",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_mutated_residues(self, query):
        """Provides a list of modelled instances of mutated amino acids or
        nucleotides in protein, DNA or RNA chains.


        :param query: a 4-character PDB id code

        ::

            p.get_mutated_residues('1bgj')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("mutated_AA_or_NA/{}".format(query))
        else:
            res = self.services.http_get("mutated_AA_or_NA",
                                         data=query,
                                         frmt="json")
        return self._return(res)

    def get_release_status(self, query):
        """Provides status of a PDB entry (released, obsoleted, on-hold etc)
        along with some other information such as authors, title, experimental method,
        etc.

        :param query: a 4-character PDB id code

        ::

            p.get_release_status('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("status/{}".format(query))
        else:
            res = self.services.http_get("status/{}", data=query, frmt="json")
        return self._return(res)

    def get_observed_ranges(self, query):
        """Provides observed ranges, i.e., segments of structural coverage of
         polymeric molecues that are modelled fully or partly

        :param query: a 4-character PDB id code

        ::

            p.get_observed_ranges('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("polymer_coverage/{}".format(query))
        else:
            res = self.services.http_post("polymer_coverage",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_observed_ranges_in_pdb_chain(self, query, chain_id):
        """Provides observed ranges, i.e., segments of structural coverage of
         polymeric molecules in a particular chain

        :param query: a 4-character PDB id code
        :param query: a PDB chain ID

        ::

            p.get_observed_ranges_in_pdb_chain('1cbs', "A")


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        res = self.services.http_get("polymer_coverage/{}/chain/{}".format(
            query, chain_id))
        return self._return(res)

    def get_secondary_structure(self, query):
        """Provides residue ranges of regular secondary structure 

        (alpha helices and beta strands) found in protein chains of the entry.
        For strands, sheet id can be used to identify a beta sheet.



        :param query: a 4-character PDB id code

        ::

            p.get_secondary_structure('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "secondary_structure/{}".format(query))
        else:
            res = self.services.http_post("secondary_structure/",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_residue_listing(self, query):
        """Provides lists all residues (modelled or otherwise) in the entry.
    
        Except waters, along with details of the fraction of expected atoms modelled for
        the residue and any alternate conformers.


        :param query: a 4-character PDB id code

        ::

            p.get_residue_listing('1cbs')


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("residue_listing/{}".format(query))
        return self._return(res)

    def get_residue_listing_in_pdb_chain(self, query, chain_id):
        """Provides all residues (modelled or otherwise) in the entry

        Except waters, along with details of the fraction of expected atoms 
        modelled for the residue and any alternate conformers.

        :param query: a 4-character PDB id code
        :param query: a PDB chain ID

        ::

            p.get_residue_listing_in_pdb_chain('1cbs')


        """
        assert len(query) == 4, "a 4-character PDB id code is required"
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("residue_listing/{}".format(
                query, chain_id))
        return self._return(res)

    def get_binding_sites(self, query):
        """Pprovides details on binding sites in the entry

        STRUCT_SITE records in PDB files (or mmcif equivalent thereof), such as ligand,
        residues in the site, description of the site, etc.


        :param query: a 4-character PDB id code

        ::

            p.get_binding_sites('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("binding_sites/{}".format(query))
        else:
            res = self.services.http_post("binding_sites",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_files(self, query):
        """Provides URLs and brief descriptions (labels) for PDB entry

        Also, for mmcif files, biological assembly files, FASTA file for sequences, 
        SIFTS cross reference XML files, validation XML files, X-ray structure 
        factor file, NMR experimental constraints files, etc. 

        :param query: a 4-character PDB id code

        ::

            p.get_files('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("files/{}".format(query))
        else:
            res = self.services.http_post("files", data=query, frmt="json")
        return self._return(res)

    def get_observed_residues_ratio(self, query):
        """Provides the ratio of observed residues for each chain in each molecule

        The list of chains within an entity is sorted by observed_ratio (descending order),
         partial_ratio (ascending order), and number_residues (descending order).

        :param query: a 4-character PDB id code

        ::

            p.get_observed_residues_ratio('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "observed_residues_ratio/{}".format(query))
        else:
            res = self.services.http_post("observed_residues_ratio",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_assembly(self, query):
        """Provides information for each assembly of a given PDB ID. T

        This information is broken down at the entity level for each assembly. The
        information given includes the molecule name, type and class, the chains where
        the molecule occur, and the number of copies of each entity in the assembly.

        :param query: a 4-character PDB id code

        ::

            p.get_assembly('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("assembly/{}".format(query))
        else:
            res = self.services.http_post("assembly", data=query, frmt="json")
        return self._return(res)

    def get_electron_density_statistics(self, query):
        """This call details the statistics for electron density.

        :param query: a 4-character PDB id code

        ::

            p.get_electron_density_statistics('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "electron_density_statistics/{}".format(query))
        else:
            res = self.services.http_post("electron_density_statistics",
                                          data=query,
                                          frmt="json")
        return self._return(res)

    def get_functional_annotation(self, query):
        """Provides functional annotation of all ligands, i.e. 'bound'

        :param query: a 4-character PDB id code

        ::

            p.get_functional_annotation('1cbs')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("cofactor/{}".format(query))
        else:
            res = self.services.http_post("cofactor", data=query, frmt="json")
        return self._return(res)

    def get_drugbank_annotation(self, query):
        """This call provides DrugBank annotation of all ligands, i.e. 'bound'

        :param query: a 4-character PDB id code

        ::

            p.get_drugbank_annotation('5hht')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get("drugbank/{}".format(query))
        else:
            res = self.services.http_post("drugbank", data=query, frmt="json")
        return self._return(res)

    def get_related_dataset(self, query):
        """Provides DOI’s for related raw experimental datasets

        Includes diffraction image data, small-angle scattering data and
        electron micrographs.


        :param query: a 4-character PDB id code

        ::

            p.get_cofactor('5o8b')


        """
        query = self._check_id(query)
        if isinstance(query, str) and "," not in query:
            res = self.services.http_get(
                "related_experiment_data/{}".format(query))
        else:
            res = self.services.http_post("related_experiment_data",
                                          data=query,
                                          frmt="json")
        return self._return(res)
Пример #22
0
 def __init__(self, verbose=True, cache=False):
     self.services = REST(name="Reactome",
                          url=Reactome._url,
                          verbose="ERROR",
                          cache=False)
     self.debugLevel = verbose
Пример #23
0
class Rhea():
    """Interface to the `Rhea <http://www.ebi.ac.uk/rhea/rest/1.0/>`_ service

    You can search by compound name, ChEBI ID, reaction ID, cross reference
    (e.g., EC number) or citation (author name, title, abstract text, publication ID).
    You can use double quotes - to match an exact phrase - and the following
    wildcards:

        * ? (question mark = one character),
        * `*` (asterisk = several characters).

    Searching for caffe* will find reactions with participants such as caffeine,
    trans-caffeic acid or caffeoyl-CoA::

        from bioservices import Rhea
        r = Rhea()
        response = r.search("caffe*")

    Searching for a?e?o* will find reactions with participants such as acetoin,
    acetone or adenosine.::

        from bioservices import Rhea
        r = Rhea()
        response = r.search("a?e?o*")

    The :meth:`search` :meth:`entry` methods require a list of valid columns.
    By default all columns are used but you can restrict to only a few. Here is
    the description of the columns:

        rhea-id	:   reaction identifier (with prefix RHEA)
        equation :  textual description of the reaction equation
        chebi :	    comma-separated list of ChEBI names used as reaction participants
        chebi-id :  comma-separated list of ChEBI identifiers used as reaction participants
        ec :        comma-separated list of EC numbers (with prefix EC)
        uniprot :   number of proteins (UniProtKB entries) annotated with the Rhea reaction
        pubmed :    comma-separated list of PubMed identifiers (without prefix)

    and 5 cross-references:

        reaction-xref(EcoCyc)
        reaction-xref(MetaCyc)
        reaction-xref(KEGG)
        reaction-xref(Reactome)
        reaction-xref(M-CSA)
    """
    _url = "https://www.rhea-db.org"

    _valid_columns = [
        'rhea-id', 'equation', 'chebi', 'chebi-id', 'ec', 'uniprot', 'pubmed',
        'reaction-xref(EcoCyc)', 'reaction-xref(MetaCyc)',
        'reaction-xref(KEGG)', 'reaction-xref(Reactome)', 'reaction-ref(M-CSA)'
    ]

    def __init__(self, verbose=True, cache=False):
        """.. rubric:: Rhea constructor

        :param bool verbose: True by default

        ::

            >>> from bioservices import Rhea
            >>> r = Rhea()
        """
        self.services = REST(name="Rhea",
                             url=Rhea._url,
                             verbose=verbose,
                             cache=cache)

    def search(self, query, columns=None, limit=None, frmt='tsv'):
        """Search for Rhea (mimics https://www.rhea-db.org/)

        :param str query: the search term using format parameter
        :param str format: the biopax2 or cmlreact format (default)

        :Returns: A pandas DataFrame. 

        ::

            >>> r = Rhea()
            >>> df = r.search("caffeine")
            >>> df = r.search("caffeine", columns='rhea-id,equation')


        """
        params = {}
        if limit:
            params['limit'] = limit
        if columns:
            params['columns'] = columns
        params['format'] = frmt
        if columns is None:
            params['columns'] = ",".join(self._valid_columns)

        response = self.services.http_get("rhea/?query={}".format(query),
                                          frmt="txt",
                                          params=params)

        try:
            import pandas as pd
            import io
            df = pd.read_csv(io.StringIO(response), sep='\t')
            return df
        except Exception as err:
            return response

    def query(self, query, columns=None, frmt="tsv", limit=None):
        """Retrieve a concrete reaction for the given id in a given format

        :param str query: the entry to retrieve
        :param str frmt: the result format (tsv); only tsv accepted for now (Nov
            2020).
        :param int limit: maximum number of results to retrieve
        :Returns: dataframe


        Retrieve Rhea reaction identifiers and equation text::

            r.query("", columns="rhea-id,equation", limit=10)

        Retrieve Rhea reactions with enzymes curated in UniProtKB (only first 10
        entries)::

            r.query("uniprot:*", columns="rhea-id,equation", limit=10)
            
        To retrieve a specific entry:: 

            df = r.get_entry("rhea:10661")


        .. versionchanged:: 1.8.0 (entry() method renamed in query() and no
            more format required. Must be given in the entry name e.g.
            query("10281.rxn") instead of entry(10281, format="rxn")
            the option *frmt* is now related to the result format

        """

        params = {"query": query}
        if limit:
            params['limit'] = limit
        if columns:
            params['columns'] = columns
        params['format'] = frmt
        if columns is None:
            params['columns'] = ",".join(self._valid_columns)

        response = self.services.http_get("rhea?".format(query),
                                          frmt="txt",
                                          params=params)
        try:
            import pandas as pd
            import io
            df = pd.read_csv(io.StringIO(response), sep='\t')
            return df
        except Exception as err:
            return response

    def get_metabolites(self, rxn_id):
        """Given a Rhea (http://www.rhea-db.org/) reaction id,
        returns its participant metabolites as a dict: {metabolite: stoichiometry},

        e.g. '2 H + 1 O2 = 1 H2O' would be represented ad {'H': -2, 'O2': -1, 'H2O': 1}.

        :param rxn_id: Rhea reaction id
        :return: dict of participant metabolites.
        """
        response = self.entry(rxn_id, frmt="cmlreact")

        reactants = [xx.attrs['title'] for xx in response.findAll("reactant")]
        products = [xx.attrs['title'] for xx in response.findAll("product")]
        return {"reactants": reactants, "products": products}
        """ms = defaultdict(lambda: 0)
Пример #24
0
class COG():
    """Interface to the COG service


    from bioservices import COG
    c = COG()
    cogs = c.get_all_cogs()   # This is a pandas dataframe

    """

    _url = "https://www.ncbi.nlm.nih.gov/research/cog/api"

    def __init__(self, verbose=False, cache=False):
        """**Constructor**

        """
        self.services = REST(name="cog",
                             url=COG._url,
                             verbose=verbose,
                             cache=cache)

    def get_cogs(self, page=1):
        """Get COGs. Unfortunately, the API sends 10 COGS at a tine given a
        specific page.

        The dictionary returned contains the results, count, previous and next
        page.
        """
        res = self.services.http_get("cog", frmt="json", params={"page": page})
        return res

    def get_cogs_by_gene(self, gene):
        """Filter COGs by gene tag: MK0280"""
        res = self.services.http_get("cog", frmt="json", params={"gene": gene})
        return res

    def get_cogs_by_id(self, cog_id):
        """Filter COGs by COG ID tag: COG0003"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"cog": cog_id})
        return res

    def get_cogs_by_assembly_id(self, assembly_id):
        """Filter COGs by assembly ID: GCA_000007185.1"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"assembly": assembly_id})
        return res

    def get_cogs_by_orgnanism(self, name):
        """Filter COGs by organism name: Nitrosopumilus_maritimus_SCM1"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"organism": name})
        return res

    def get_cogs_by_taxon_id(self, taxon_id):
        """Filter COGs by taxid: 1229908"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"taxid": taxon_id})
        return res

    def get_cogs_by_category(self, category):
        """Filter COGs by Taxonomic Category: ACTINOBACTERIA"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"category": category})
        return res

    def get_cogs_by_category_id(self, category):
        """Filter COGs by Taxonomic Category taxid: 651137"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"cat_taxid": category})
        return res

    def get_cogs_by_category_(self, protein):
        """Filter COGs by Protein name: AJP49128.1"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={"protein": protein})
        return res

    # The search keywords (cog, assembly, organism, taxid, category, cat_taxid and protein)
    #can be combined to filter the COG lists.

    def get_cogs_by_id_and_category(self, cog_id, category):
        """Filter COGs by COG id and Taxonomy Categories: COG0004 and CYANOBACTERIA"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={
                                         "cog": cog_id,
                                         "category": category
                                     })
        return res

    def get_cogs_by_id_and_organism(self, cog_id, organism):
        """Filter COGs by COG id and organism: COG0004 and Escherichia_coli_K-12_sub_MG1655"""
        res = self.services.http_get("cog",
                                     frmt="json",
                                     params={
                                         "cog": cog_id,
                                         "organism,": organism
                                     })
        return res

    def get_all_cogs_definition(self):
        """Get all COG Definitions:"""
        res = self.services.http_get("cogdef", frmt="json")
        return res

    def get_cog_definition_by_cog_id(self, cog_id):
        """Get specific COG Definitions by COG: COG0003"""
        res = self.services.http_get("cogdef",
                                     frmt="json",
                                     params={"cog": cog_id})
        return res

    def get_cog_definition_by_name(self, cog):
        """Get specific COG Definitions by name: Thiamin-binding stress-response protein YqgV, UPF0045 family"""
        res = self.services.http_get("cogdef",
                                     frmt="json",
                                     params={"name": cog})
        return res

    def get_taxonomic_categories(self):
        """Get all Taxonomic Categories:"""
        res = self.services.http_get("taxonomy", frmt="json")
        return res

    def get_taxonomic_category_by_name(self, name):
        """Get specific Taxonomic Category by name: ALPHAPROTEOBACTERIA"""
        res = self.services.http_get("taxonomy",
                                     frmt="json",
                                     params={"name": name})
        return res
Пример #25
0
class MyGeneInfo():
    """Interface to `mygene.infoe <http://mygene.info>`_ service

    .. doctest::

        >>> from bioservices import MyGeneInfo
        >>> s = MyGeneInfoe()

    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages (default is off)

        """
        url = "https://mygene.info/v3"
        self.services = REST(name="PDBe",
                             url=url,
                             verbose=verbose,
                             cache=cache)

    def get_genes(self,
                  ids,
                  fields="symbol,name,taxid,entrezgene,ensemblgene",
                  species=None,
                  dotfield=True,
                  email=None):
        """Get matching gene objects for a list of gene ids


        :param ids: list of geneinfo IDs
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species:  can be used to limit the gene hits from given
            species. You can use "common names" for nine common species (human, mouse, rat,
            fruitfly, nematode, zebrafish, thale-cress, frog and pig). All other species,
            you can provide their taxonomy ids. Multiple species can be passed using comma
            as a separator. Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes(("301345,22637"))
            # first one is rat, second is mouse. This will return a 'notfound'
            # entry and the second entry as expected.
            mgi.get_genes("301345,22637", species="mouse") 

        """
        params = {"ids": ids, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if species:
            params["species"] = species

        res = self.services.http_post(
            "gene",  #params=params, 
            data=params,
            frmt="json",
            headers={
                "User-Agent": self.services.getUserAgent(),
                "accept": "application/json",
                "Content-Type": "application/x-www-form-urlencoded"
            })
        return res

    def get_one_gene(self,
                     geneid,
                     fields="symbol,name,taxid,entrezgene,ensemblgene",
                     dotfield=True,
                     email=None):
        """Get matching gene objects for one gene id

        :param geneid: a valid gene ID
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.

        ::

            mgi = MyGeneInfoe()
            mgi.get_genes("301345")
        """
        params = {"ids": geneid, "fields": fields}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_get(f"gene/{geneid}",
                                     params=params,
                                     frmt="json")
        return res

    def get_one_query(self,
                      query,
                      email=None,
                      dotfield=True,
                      fields="symbol,name,taxid,entrezgene,ensemblgene",
                      species="human,mouse,rat",
                      size=10,
                      _from=0,
                      sort=None,
                      facets=None,
                      entrezonly=False,
                      ensemblonly=False):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param int size: the maximum number of matching gene hits to return
            (with a cap of 1000 at the moment). Default: 10.
        :param int _from: the number of matching gene hits to skip, starting
            from 0. Combining with "size" parameter, this can be useful for paging. Default:
            0.      
        :param sort: the comma-separated fields to sort on. Prefix with "-" for
            descending order, otherwise in ascending order. Default: sort by matching scores
            in decending order.
        :param str facets: a single field or comma-separated fields to return
            facets, for example, "facets=taxid", "facets=taxid,type_of_gene".
        :param bool entrezonly: when passed as True, the query returns only the hits 
            with valid Entrez gene ids. Default: False.
        :param bool ensembleonly: when passed as True, the query returns only the hits 
            with valid Ensembl gene ids. Default: False.
        :param dotfield: control the format of the returned fields when passed
            "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
            the returned data object contains a single "refseq.rna" field, otherwise
            (False), a single "refseq" field with a sub-field of "rna". Default:
            True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.




        """
        params = {"fields": fields, "size": size, "from": _from}
        if email:  # pragma: no cover
            params["email"] = email

        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        if sort:
            params["sort"] = sort
        if facets:  # pragma: no cover
            params["facets"] = sort
        assert entrezonly in [True, False]
        params["entrezonly"] = entrezonly
        assert ensemblonly in [True, False]
        params["ensemblonly"] = entrezonly

        res = self.services.http_get(f"query?q={query}",
                                     params=params,
                                     frmt="json")
        return res

    def get_queries(
        self,
        query,
        email=None,
        dotfield=True,
        scopes="all",
        species="human,mouse,rat",
        fields="symbol,name,taxid,entrezgene,ensemblgene",
    ):
        """Make gene query and return matching gene list. Support JSONP and CORS as well.

        :param str query: Query string. Examples "CDK2", "NM_052827", "204639_at",
            "chr1:151,073,054-151,383,976", "hg19.chr1:151073054-151383976". The detailed
            query syntax can be found from our docs.
        :param str fields: a comma-separated fields to limit the fields returned
            from the matching gene hits. The supported field names can be found from any
            gene object (e.g. http://mygene.info/v3/gene/1017). Note that it supports dot
            notation as well, e.g., you can pass "refseq.rna". If "fields=all", all
            available fields will be returned. Default:
            "symbol,name,taxid,entrezgene,ensemblgene".
        :param str species: can be used to limit the gene hits from given species. You can use
            "common names" for nine common species (human, mouse, rat, fruitfly, nematode,
            zebrafish, thale-cress, frog and pig). All other species, you can provide their
            taxonomy ids. Multiple species can be passed using comma as a separator.
            Default: human,mouse,rat.
        :param dotfield: control the format of the returned fields when passed
             "fields" parameter contains dot notation, e.g. "fields=refseq.rna". If True
             the returned data object contains a single "refseq.rna" field, otherwise
             (False), a single "refseq" field with a sub-field of "rna". Default:
             True.
        :param str email": If you are regular users of this services, the
            mygeneinfo maintainers/authors encourage you to provide an email, 
            so that we can better track the usage or follow up with you.
        :param str scopes: not documented. Set to 'all'

        """
        params = {"q": query, "fields": fields, "scopes": scopes}
        if email:  # pragma: no cover
            params["email"] = email
        assert dotfield in [True, False]
        params["dotfield"] = dotfield

        res = self.services.http_post("query",
                                      params=params,
                                      frmt="json",
                                      headers={
                                          "User-Agent":
                                          self.services.getUserAgent(),
                                          "accept":
                                          "application/json",
                                          "Content-Type":
                                          "application/x-www-form-urlencoded"
                                      })
        return res

    def get_metadata(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res

    def get_taxonomy(self):
        res = self.services.http_get(f"metadata", frmt="json")
        return res['taxonomy']
Пример #26
0
class Panther():
    """Interface to `Panther <http://www.pantherdb.org/services/oai/pantherdb>`_ pages


    ::

        >>> from bioservics import Panther
        >>> p = Panther()
        >>> p.get_supported_genomes()
        >>> p.get_ortholog("zap70", 9606)


        >>> from bioservics import Panther
        >>> p = Panther()
        >>> taxon = [x[0]['taxon_id'] for x in p.get_supported_genomes() if "coli" in x['name'].lower()]
        >>> # you may also use our method called search_organism
        >>> taxon = p.get_taxon_id(pattern="coli")
        >>> res = p.get_mapping("abrB,ackA,acuI", taxon)

    The get_mapping returns for each gene ID the GO terms corresponding to each
    ID. Those go terms may belong to different categories (see
    meth:`get_annotation_datasets`):

    - MF for molecular function
    - BP for biological process
    - PC for Protein class
    - CC Cellular location
    - Pathway

    Note that results from the website application http://pantherdb.org/
    do not agree with the oupput of the get_mapping service... Try out the dgt
    gene from ecoli for example




    """
    _url = "http://www.pantherdb.org/services/oai/pantherdb"

    def __init__(self, verbose=True, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        #super(Panther, self).__init__(name="Panther", url=Panther._url,
        #       verbose=verbose, cache=cache)
        self.services = REST(name="Panther",
                             url=Panther._url,
                             verbose=verbose,
                             cache=cache)

        self._allPathwaysURL = "http://www.pantherdb.org/pathway/pathwayList.jsp"

    def get_pathways(self):
        """Returns all pathways from pantherdb"""
        return self.services.http_get("supportedpantherpathways")

    def get_supported_genomes(self, type=None):
        """Returns list of supported organisms.

        :param type: can be chrLoc to restrict the search


        """
        if type is not None:
            params = {'type': type}
        else:
            params = {}
        res = self.services.http_get("supportedgenomes", params=params)
        res = [x for x in res["search"]["output"]["genomes"]['genome']]
        return res

    def get_taxon_id(self, pattern=None):
        """return all taxons supported by the service

        If pattern is provided, we filter the name to keep those that contain
        the filter. If only one is found, we return the name itself, otherwise a
        list of candidates

        """
        res = self.get_supported_genomes()
        if pattern:
            taxon = [
                x['taxon_id'] for x in res
                if pattern.lower() in x['name'].lower()
            ]
            if len(taxon) == 1:
                return taxon[0]
            else:
                return taxon
        else:
            taxon = [x["taxon_id"] for x in res]
            return taxon

    def get_mapping(self, gene_list, taxon):
        """Map identifiers

        Each identifier to be delimited by comma i.e. ',. Maximum of 1000 Identifiers
        can be any of the following: Ensemble gene identifier, Ensemble protein
        identifier, Ensemble transcript identifier, Entrez gene id, gene symbol, NCBI
        GI, HGNC Id, International protein index id, NCBI UniGene id, UniProt accession
        and UniProt id

        :param gene_list: see above
        :param taxon: one taxon ID. See supported
            :meth:`~bioservices.panther.Panther.get_supported_genomes`

        If an identifier is not found, information can be found in the
        unmapped_genes key while found identifiers are in the mapped_genes key.

        .. warning:: found and not found identifiers are dispatched into
            unmapped and mapped genes. If there are not found identifiers,
            the input gene list and the mapped genes list do not have the same
            length. The input names are not stored in the output.
            Developpers should be aware of that feature.

        """
        params = {"geneInputList": gene_list, "organism": taxon}
        res = self.services.http_post("geneinfo", params=params, frmt='json')

        if "mapped_genes" in res['search']:
            mapped_genes = res['search']['mapped_genes']['gene']
            # if only one identifier, retuns a dictionary.
            # if several identifiers, returns a list of dictionary.
            # We will be consistent and return a list
            if "accession" in mapped_genes:
                mapped_genes = [mapped_genes]
        else:
            mapped_genes = [{}]

        if "unmapped_list" in res['search']:
            unmapped_genes = res['search']['unmapped_list']["unmapped"]
            if isinstance(unmapped_genes, list):
                pass
            else:
                unmapped_genes = [unmapped_genes]
        else:
            unmapped_genes = []

        logger.warning("Some identifiers were not found")
        return {"unmapped": unmapped_genes, "mapped": mapped_genes}

    def get_enrichment(self,
                       gene_list,
                       organism,
                       annotation,
                       enrichment_test="Fisher",
                       correction="FDR",
                       ref_gene_list=None):
        """Returns over represented genes

        Compares a test gene list to a reference gene list,
        and determines whether a particular class (e.g. molecular function,
        biological process, cellular component, PANTHER protein class, the
        PANTHER pathway or Reactome pathway) of genes is overrepresented
        or underrepresented.

        :param organism: a valid taxon ID
        :param enrichment_test: either **Fisher** or **Binomial** test
        :param correction: correction for multiple testing. Either **FDR**,
            **Bonferonni**, or **None**.
        :param annotation: one of the supported PANTHER annotation data types.
            See :meth:`~bioservices.panther.Panther.get_annotation_datasets` to retrieve a list of
            supported annotation data types
        :param ref_gene_list: if not specified, the system will use all the genes
            for the specified organism. Otherwise, a list delimited by
            comma. Maximum of 100000 Identifiers can be any of the
            following: Ensemble gene identifier, Ensemble protein
            identifier, Ensemble transcript identifier, Entrez gene id,
            gene symbol, NCBI GI, HGNC Id, International protein index id,
            NCBI UniGene id, UniProt accession andUniProt id.

        :return: a dictionary with the following keys. 'reference' contains the
            orgnaism, 'input_list' is the input gene list with unmapped genes. 
            'result' contains the list of candidates. 

        ::

            >>> from bioservices import Panther
            >>> p = Panther()
            >>> res = p.get_enrichment('zap70,mek1,erk', 9606, "GO:0008150")
            >>> For molecular function, use :
            >>> res = p.get_enrichment('zap70,mek1,erk', 9606,
                    "ANNOT_TYPE_ID_PANTHER_GO_SLIM_MF")

        """
        assert enrichment_test.lower() in ['fisher', 'binomial']
        if correction is None:
            correction = 'none'

        assert correction.lower() in ['fdr', 'bonferroni', 'none']

        # This is a bug in panther DB where they used bonferonni . should be
        # bonferroni...
        if correction.lower() == "bonferroni":
            correction = "bonferonni"
        assert annotation in [x['id'] for x in self.get_annotation_datasets()]

        params = {'enrichmentTestType': enrichment_test.upper()}
        params['organism'] = organism
        if gene_list:
            params['geneInputList'] = gene_list
        if ref_gene_list:
            params['refInputList'] = ref_gene_list
        params['annotDataSet'] = annotation
        params['correction'] = correction.upper()
        try:
            res = self.services.http_post("enrich/overrep",
                                          params=params,
                                          frmt="json")
            try:
                return res['results']
            except:
                return res
        except:
            return res

    def get_annotation_datasets(self):
        """Retrieve the list of supported annotation data sets"""
        res = self.services.http_get("supportedannotdatasets")
        res = res["search"]["annotation_data_sets"]["annotation_data_type"]
        return res

    def get_ortholog(self,
                     gene_list,
                     organism,
                     target_organism=None,
                     ortholog_type="all"):
        """search for matching orthologs in target organisms.

        Searches for matching orthologs in the gene family that contains
        the search gene associated with the search terms. Returns
        ortholog genes in target organisms given a search organism,
        the search terms and a list of target organisms.

        :param gene_list:
        :param organism: a valid taxon ID
        :param target_organism: zero or more taxon IDs separated by ','. See
            :meth:`~bioservices.panther.Panther.get_supported_genomes`
        :param ortholog_type: optional parameter to specify ortholog type of target organism
        :return: a dictionary with "mapped" and "unmapped" keys, each of them
            being a list. For each unmapped gene, a dictionary with id and
            organism is is returned. For the mapped gene, a list of ortholog is
            returned.

        """
        assert ortholog_type in ['LDO', 'all']
        params = {
            "geneInputList": gene_list,
            "organism": organism,
            "targetOrganism": target_organism,
            "orthologType": ortholog_type
        }
        if params['targetOrganism'] is None:
            del params['targetOrganism']
        res = self.services.http_get("ortholog/matchortho",
                                     frmt='json',
                                     params=params)
        res = res['search']['mapping']
        mapped = res['mapped']

        try:
            unmapped = res['unmapped_ids']['unmapped']
            # make sure we always have a list
            if isinstance(unmapped, dict):
                unmapped = [unmapped]
        except:
            unmapped = []
        res = {"unmapped": unmapped, "mapped": mapped}

        return res

    def get_homolog_position(self,
                             gene,
                             organism,
                             position,
                             ortholog_type="all"):
        """

        :param gene: Can be any of the following: Ensemble gene identifier,
            Ensemble protein identifier, Ensemble transcript identifier, Entrez gene id,
            gene symbol, NCBI GI, HGNC Id, International protein index id, NCBI UniGene id,
            UniProt accession andUniProt id
        :param organism: a valid taxon ID
        :param ortholog_type: optional parameter to specify ortholog type of target organism
        """
        if "," in gene:
            logger.warning(
                "did not expect a comma. Please provide only one gene name")
        assert ortholog_type in ['LDO', 'all']
        assert position >= 1
        params = {
            "gene": gene,
            "organism": organism,
            "pos": position,
            "orthologType": ortholog_type
        }
        res = self.services.http_get("ortholog/homologpos",
                                     params=params,
                                     frmt="json")
        res = res['search']['mapping']
        if "mapped" in res.keys():
            res = res['mapped']
            return res
        elif "unmapped_ids" in res.keys():
            logger.warning("did not find any match for {}".format(gene))
            return res["unmapped_ids"]

    def get_supported_families(self, N=1000, progress=True):
        """Returns the list of supported PANTHER family IDs

        This services returns only 1000 items per request. This is defined by
        the index. For instance index set to 1 returns the first 1000 families.
        Index set to 2 returns families between index 1000 and 2000 and so on.
        As of 20 Feb 2020, there was about 15,000 families.

        This function simplifies your life by calling the service as many times
        as required. Therefore it returns all families in one go.

        """
        from easydev import Progress
        params = {'startIndex': 1}
        res = self.services.http_get("supportedpantherfamilies", params=params)
        results = res['search']['panther_family_subfam_list']['family']
        if len(results) != N:
            msg = "looks like the services changed. Call this function with N={}"
            msg = msg.format(len(results))
            raise ValueError(msg)

        number_of_families = res['search']['number_of_families']
        pb = Progress(int(number_of_families / N))
        pb.animate(1)
        for i in range(1, int(number_of_families / N) + 1):
            params = {'startIndex': i * N + 1}
            res = self.services.http_get("supportedpantherfamilies",
                                         params=params)
            data = res['search']['panther_family_subfam_list']['family']
            results.extend(data)
            if progress:
                pb.animate(i)
        return results

    def get_family_ortholog(self, family, taxon_list=None):
        """Search for matching orthologs in target organisms

        Also return the corresponding position in the target
        organism sequence. The system searches for matching
        orthologs in the gene family that contains the search
        gene associated with the search term.

        :param family: Family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.
        """

        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("familyortholog",
                                     params=params,
                                     frmt="json")
        return res['search']['ortholog_list']['ortholog']

    def get_family_msa(self, family, taxon_list=None):
        """Returns MSA information for the specified family.

        :param family: family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.

        """
        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("familymsa", params=params, frmt="json")
        return res['search']['MSA_list']['sequence_info']

    def get_tree_info(self, family, taxon_list=None):
        """Returns tree topology information and node attributes for the specified family.

        :param family: Family ID
        :param taxon_list: Zero or more taxon IDs separated by ','.
        """
        params = {"family": family}
        if taxon_list:
            params['taxonFltr'] = taxon_list
        res = self.services.http_get("treeinfo", params=params, frmt="json")
        return res['search']  #['tree_topology']['annotation_node']
Пример #27
0
class Reactome():
    """



    .. todo:: interactors, orthology, particiapnts, person,
        query, refernces, schema



    """

    _url = "https://reactome.org/ContentService"

    def __init__(self, verbose=True, cache=False):
        self.services = REST(name="Reactome",
                             url=Reactome._url,
                             verbose="ERROR",
                             cache=False)
        self.debugLevel = verbose

    @property
    def version(self):
        return self.services.http_get("data/database/version", frmt="txt")

    @property
    def name(self):
        return self.services.http_get("data/database/name", frmt="txt")

    def get_discover(self, identifier):
        """The schema.org for an Event in Reactome knowledgebase

        For each event (reaction or pathway) this method generates a
        json file representing the dataset object as defined by
        schema.org (http). This is mainly used by search engines in
        order to index the data

        ::

            r.data_discover("R-HSA-446203")

        """
        res = self.services.http_get("data/discover/{}".format(identifier),
                                     frmt="json")
        return res

    def get_diseases(self):
        """list of diseases objects"""
        return self.services.http_get("data/diseases", frmt="json")

    def get_diseases_doid(self):
        """retrieves the list of disease DOIDs annotated in Reactome

        return: dictionary with DOID contained in the values()
        """
        res = self.services.http_get("data/diseases/doid", frmt="txt")
        res = dict([x.split() for x in res.split("\n")])
        return res

    def get_interactors_psicquic_molecule_details(self):
        """Retrieve clustered interaction, sorted by score, of a given accession by resource."""
        raise NotImplementedError

    def get_interactors_psicquic_molecule_summary(self):
        """Retrieve a summary of a given accession by resource"""
        raise NotImplementedError

    def get_interactors_psicquic_resources(self):
        """Retrieve a list of all Psicquic Registries services"""
        raise NotImplementedError

    def get_interactors_static_molecule_details(self):
        """Retrieve a detailed interaction information of a given accession"""
        raise NotImplementedError

    def get_interactors_static_molecule_pathways(self):
        """Retrieve a list of lower level pathways where the interacting molecules can be found"""
        raise NotImplementedError

    def get_interactors_static_molecule_summary(self):
        """Retrieve a summary of a given accession"""
        raise NotImplementedError

    def get_exporter_fireworks(self):
        raise NotImplementedError

    def get_exporter_reaction(self):
        raise NotImplementedError

    def get_exporter_diagram(self,
                             identifier,
                             ext="png",
                             quality=5,
                             diagramProfile="Modern",
                             analysisProfile="Standard",
                             filename=None):
        """Export a given pathway diagram to raster file

        This method accepts identifiers for Event class instances.
        When a diagrammed pathway is provided, the diagram is exported
        to the specified format. When a subpathway is provided, the
        diagram for the parent is exported and the events that are part
        of the subpathways are selected. When a reaction is provided,
        the diagram containing the reaction is exported and the reaction
        is selected.

        :param identifier: Event identifier (it can be a pathway with
            diagram, a subpathway or a reaction)
        :param ext: File extension (defines the image format) in png,
            jpeg, jpg, svg, gif
        :param quality: Result image quality between [1 - 10]. It
            defines the quality of the final image (Default 5)
        :param flg: not implemented
        :param sel: not implemented
        :param diagramProfile: Diagram Color Profile
        :param token: not implemented
        :param analysisProfile: Analysis Color Profile
        :param expColumn: not implemented
        :param filename: if given, save the results in the provided filename

        return: raw data if filename parameter is not set. Otherwise, the data
            is saved in the filename and the function returns None

        """
        assert ext in ['png', 'jpg', 'jpeg', 'svg', "gif"]
        assert quality in range(11)
        assert diagramProfile in ["Modern", "Standard"]
        assert analysisProfile in ["Standard", "Strosobar", "Copper Plus"]

        params = {
            "diagramProfile": diagramProfile,
            "analysisProfile": analysisProfile,
            "quality": quality
        }

        res = self.services.http_get("exporter/diagram/{}.{}".format(
            identifier, ext),
                                     params=params,
                                     frmt=ext)
        if filename:
            if ext != "svg":
                with open(filename, "wb") as fout:
                    fout.write(res)
            else:
                with open(filename, "w") as fout:
                    fout.write(content)
        else:
            return res

    def get_complex_subunits(self,
                             identifier,
                             excludeStructuresSpecifies=False):
        """A list with the entities contained in a given complex

        Retrieves the list of subunits that constitute any given complex.
        In case the complex comprises other complexes, this method
        recursively traverses the content returning each contained
        PhysicalEntity. Contained complexes and entity sets can be
        excluded setting the ‘excludeStructures’ optional parameter to ‘true’

        :param identifier: The complex for which subunits are requested
        :param excludeStructures: Specifies whether contained complexes
            and entity sets are excluded in the response

        ::

            r.get_complex_subunits("R-HSA-5674003")
        """
        params = {"excludeStructuresSpecifies": excludeStructuresSpecifies}
        res = self.services.http_get(
            "data/complex/{}/subunits".format(identifier),
            params=params,
            frmt="json")
        return res

    def get_complexes(self, resources, identifier):
        """A list of complexes containing the pair (identifier, resource)

        Retrieves the list of complexes that contain a given (identifier,
        resource). The method deconstructs the complexes into all its
        participants to do so.

        :param resource: The resource of the identifier for complexes are
            requested (e.g. UniProt)
        :param identifier: The identifier for which complexes are requested

        ::

            r.get_complexes(resources, identifier)
            r.get_complexes("UniProt", "P43403")

        """
        res = self.services.http_get("data/complexes/{}/{}".format(
            resources, identifier),
                                     frmt="json")
        return res

    def get_entity_componentOf(self, identifier):
        """A list of larger structures containing the entity

        Retrieves the list of structures (Complexes and Sets) that
        include the given entity as their component. It should be
        mentioned that the list includes only simplified entries
        (type, names, ids) and not full information about each item.

        ::

            r.get_entity_componentOf("R-HSA-199420")

        """
        res = self.services.http_get(
            "data/entity/{}/componentOf".format(identifier), frmt="json")
        return res

    def get_entity_otherForms(self, identifier):
        """All other forms of PhysicalEntity

        Retrieves a list containing all other forms of the given
        PhysicalEntity. These other forms are PhysicalEntities that
        share the same ReferenceEntity identifier, e.g. PTEN
        H93R[R-HSA-2318524] and PTEN C124R[R-HSA-2317439] are two
        forms of PTEN.

        ::

            r.get_entity_otherForms("R-HSA-199420")

        """
        res = self.services.http_get(
            "data/entity/{}/otherForms".format(identifier), frmt="json")
        return res

    def get_event_ancestors(self, identifier):
        """The ancestors of a given event

        The Reactome definition of events includes pathways and reactions.
        Although events are organised in a hierarchical structure, a single
        event can be in more than one location, i.e. a reaction can take
        part in different pathways while, in the same way, a sub-pathway
        can take part in many pathways. Therefore, this method retrieves
        a list of all possible paths from the requested event to the top
        level pathway(s).

        :param identifier: The event for which the ancestors are requested

        ::

            r.get_event_ancestors("R-HSA-5673001")

        """
        res = self.services.http_get(
            "data/event/{}/ancestors".format(identifier), frmt="json")
        return res

    def get_eventsHierarchy(self, species):
        """The full event hierarchy for a given species

        Events (pathways and reactions) in Reactome are organised in a
        hierarchical structure for every species. By following all
        ‘hasEvent’ relationships, this method retrieves the full event
        hierarchy for any given species. The result is a list of tree
        structures, one for each TopLevelPathway. Every event in these trees is
        represented by a PathwayBrowserNode. The latter contains the stable identifier,
        the name, the species, the url, the type, and the diagram of the particular
        event.

        :param species: Allowed species filter: SpeciesName (eg: H**o sapiens)
            SpeciesTaxId (eg: 9606)

        ::

            r.get_eventsHierarchy(9606)
        """

        res = self.services.http_get("data/eventsHierarchy/{}".format(species),
                                     frmt="json")
        return res

    def get_exporter_sbml(self, identifier):
        """Export given Pathway to SBML


        :param identifier: DbId or StId of the requested database object

        ::

            r.exporter_sbml("R-HSA-68616")

        """
        res = self.services.http_get("exporter/sbml/{}.xml".format(identifier),
                                     frmt="xml")
        return res

    def get_pathway_containedEvents(self, identifier):
        """All the events contained in the given event

        Events are the building blocks used in Reactome to represent
        all biological processes, and they include pathways and reactions.
        Typically, an event can contain other events. For example, a
        pathway can contain smaller pathways and reactions. This method
        recursively retrieves all the events contained in any given event.

        ::

            res = r.get_pathway_containedEvents("R-HSA-5673001")

        """
        res = self.services.http_get(
            "data/pathway/{}/containedEvents".format(identifier), frmt="json")
        return res

    def get_pathway_containedEvents_by_attribute(self, identifier, attribute):
        """A single property for each event contained in the given event

        Events are the building blocks used in Reactome to represent all 
        biological processes, and they include pathways and reactions. 
        Typically, an event can contain other events. For example, a 
        pathway can contain smaller pathways (subpathways) and reactions.
        This method recursively retrieves a single attribute for each of 
        the events contained in the given event.


        :param identifier: The event for which the contained events are requested
        :param attribute: Attrubute to be filtered

        ::

             r.get_pathway_containedEvents_by_attribute("R-HSA-5673001", "stId")

        """
        res = self.services.http_get(
            "data/pathway/{}/containedEvents/{}".format(identifier, attribute),
            frmt="txt")
        try:
            res = [x.strip() for x in res[1:-1].split(",")]
        except:
            pass
        return res

    def get_pathways_low_diagram_entity(self, identifier):
        """A list of lower level pathways with diagram containing 
        a given entity or event

        This method traverses the event hierarchy and retrieves the 
        list of all lower level pathways that have a diagram and 
        contain the given PhysicalEntity or Event.

        :param identifier: The entity that has to be present in the pathways
        :param species:  The species for which the pathways are requested. 
            Taxonomy identifier (eg: 9606) or species name (eg: ‘H**o sapiens’)

        ::

            r.get_pathways_low_diagram_entity("R-HSA-199420")

        """
        res = self.services.http_get(
            "data/pathways/low/diagram/entity/{}".format(identifier),
            frmt="json")
        return res

    def get_pathways_low_diagram_entity_allForms(self, identifier):
        """

        ::

            r.get_pathways_low_diagram_entity_allForms("R-HSA-199420")
        """
        res = self.services.http_get(
            "data/pathways/low/diagram/entity/{}/allForms".format(identifier),
            frmt="json")
        return res

    def get_pathways_low_diagram_identifier_allForms(self, identifier):
        """

        ::

            r.get_pathways_low_diagram_identifier_allForms("PTEN")

        """
        res = self.services.http_get(
            "data/pathways/low/diagram/identifier/{}/allForms".format(
                identifier),
            frmt="json")
        return res

    def get_pathways_low_entity(self, identifier):
        """A list of lower level pathways containing a given entity or event

        This method traverses the event hierarchy and retrieves the 
        list of all lower level pathways that contain the given     
        PhysicalEntity or Event.

        ::

            r.get_pathways_low_entity("R-HSA-199420")
        """
        res = self.services.http_get(
            "data/pathways/low/entity/{}".format(identifier), frmt="json")
        return res

    def get_pathways_low_entity_allForms(self, identifier):
        """A list of lower level pathways containing any form of a given entity 

        This method traverses the event hierarchy and retrieves the list of all 
        lower level pathways that contain the given PhysicalEntity in any of 
        its variant forms. These variant forms include for example different 
        post-translationally modified versions of a single protein, or the 
        same chemical in different compartments.

        ::

            r.get_pathways_low_entity_allForms("R-HSA-199420")
        """
        res = self.services.http_get(
            "data/pathways/low/entity/{}/allForms".format(identifier),
            frmt="json")
        return res

    def get_pathways_top(self, species):
        res = self.services.http_get("data/pathways/top/{}".format(species),
                                     frmt="json")
        return res

    def get_references(self, identifier):
        """All referenceEntities for a given identifier

        Retrieves a list containing all the reference entities for a given
        identifier.

        ::

            r.get_references(15377)

        """
        res = self.services.http_get(
            "references/mapping/{}".format(identifier), frmt="json")
        return res

    def get_mapping_identifier_pathways(self, resource, identifier):
        res = self.services.http_get("data/mapping/{}/{}/pathways".format(
            resource, identifier),
                                     frmt="json")
        return res

    def get_mapping_identifier_reactions(self, resource, identifier):
        res = self.services.http_get("data/mapping/{}/{}/reactions".format(
            resource, identifier),
                                     frmt="json")

    def search_facet(self):
        """A list of facets corresponding to the whole Reactome search data

        This method retrieves faceting information on the whole Reactome search data.


        """
        res = self.services.http_get("search/facet", frmt="json")
        return res

    def search_facet_query(self, query):
        """A list of facets corresponding to a specific query

        This method retrieves faceting information on a specific query

        """
        res = self.services.http_get(
            "search/facet_query?query={}".format(query), frmt="json")
        return res

    def search_query(self, query):
        """Queries Solr against the Reactome knowledgebase

        This method performs a Solr query on the Reactome knowledgebase.
        Results can be provided in a paginated format.

        """
        res = self.services.http_get("search/query?query={}".format(query),
                                     frmt="json")
        return res

    def search_spellcheck(self, query):
        """Spell-check suggestions for a given query

        This method retrieves a list of spell-check suggestions
        for a given search term.

        """
        res = self.services.http_get(
            "search/spellcheck?query={}".format(query), frmt="json")
        return res

    def search_suggest(self, query):
        """Autosuggestions for a given query


        This method retrieves a list of suggestions for a given search term.

        ::

            >>> r.http_get("search/suggest?query=apopt")
            ['apoptosis', 'apoptosome', 'apoptosome-mediated', 'apoptotic']

        """
        res = self.services.http_get(
            "search/suggest?query={}".format(identifier), frmt="json")
        return res

    def get_species_all(self):
        """the list of all species in Reactome"""
        res = self.services.http_get("data/species/all", frmt="json")
        return res

    def get_species_main(self):
        """the list of main species in Reactome

        ::

            r.get_species_main()


        """
        res = self.services.http_get("data/species/main", frmt="json")
        return res
Пример #28
0
class Seqret():
    """Interface to the `Seqret <http://www.ebi.ac.uk/readseq>`_ service

    ::

        >>> from bioservices import *
        >>> s = Seqret()

    The ReadSeq service was replaced by #the Seqret services (2015).

    .. versionchanged:: 0.15

    """
    def __init__(self, verbose=True):
        """.. rubric:: Constructor

        :param bool verbose:

        """
        url = "https://www.ebi.ac.uk/Tools/services/rest/emboss_seqret"
        self.services = REST(name="seqret", url=url, verbose=verbose)
        self._parameters = None

    def get_parameters(self):
        """Get a list of the parameter names.

        :returns: a list of strings giving the names of the parameters.

        """
        parameters = self.services.http_get("parameters", frmt="json")

        return parameters['parameters']

    def _get_parameters(self):
        if self._parameters:
            return self._parameters
        else:
            res = self.get_parameters()
            self._parameters = res
        return self._parameters

    parameters = property(_get_parameters, doc="Get list of parameter names")

    def get_parameter_details(self, parameterId):
        """Get details of a specific parameter.

        :param str parameter: identifier/name of the parameter to fetch details of.
        :return: a data structure describing the parameter and its values.

        ::

            rs = ReadSeq()
            print(rs.get_parameter_details("stype"))

        """
        if parameterId not in self.parameters:
            raise ValueError(
                "Invalid parameterId provided(%s). See parameters attribute" %
                parameterId)

        request = "parameterdetails/" + parameterId
        res = self.services.http_get(request, frmt="json")
        return res

    def run(self, email, title, **kargs):
        """Submit a job to the service.

        :param str email: user e-mail address.
        :param str title: job title.
        :param params: parameters for the tool as returned by :meth:`get_parameter_details`.
        :return: string containing the job identifier (jobId).

        Deprecated (olf readseq service)::

            Format Name     Value
            Auto-detected   0
            EMBL            4
            GenBank         2
            Fasta(Pearson)  8
            Clustal/ALN     22
            ACEDB           25
            BLAST           20
            DNAStrider      6
            FlatFeat/FFF    23
            GCG             5
            GFF             24
            IG/Stanford     1
            MSF             15
            NBRF            3
            PAUP/NEXUS      17
            Phylip(Phylip4)     12
            Phylip3.2       11
            PIR/CODATA      14
            Plain/Raw       13
            SCF             21
            XML             19

        As output, you also have

        Pretty 18

        ::

            s = readseq.Seqret()
            jobid = s.run("*****@*****.**", "test", sequence=fasta, inputformat=8,
                outputformat=2)
            genbank = s.get_result(s._jobid)


        """
        for k in kargs.keys():
            self.services.devtools.check_param_in_list(k, self.parameters)

        assert "sequence" in kargs.keys()
        params = {"email": email, "title": title}

        for k in [
                'stype', 'inputformat', 'outputformat', "feature", "firstonly",
                "reverse", 'outputcase', 'seqrange'
        ]:
            if k in kargs.keys():
                value = kargs.get(k)
                details = self.get_parameter_details(k)
                valid_values = [
                    x['value'] for x in details['values']['values']
                ]
                self.services.devtools.check_param_in_list(
                    str(value), valid_values)
                params[k] = value
        #r = requests.post(url + "/run?", data={"sequence":fasta, "stype": "protein",
        #"inputformat":"raw", "outputformat":"fasta", "email":"*****@*****.**",
        #"title":"test"})

        params['sequence'] = kargs['sequence']

        jobid = self.services.http_post("run", frmt="txt", data=params)
        self._jobid = jobid
        return jobid

    def get_status(self, jobid=None):
        """Get the status of a submitted job.

        :param str jobid: job identifier.
        :return: string containing the status.

        The values for the status are:

        - RUNNING: the job is currently being processed.
        - FINISHED: job has finished, and the results can then be retrieved.
        - ERROR: an error occurred attempting to get the job status.
        - FAILURE: the job failed.
        - NOT_FOUND: the job cannot be found.

        """
        res = self.services.http_get("status/{}".format(jobid), frmt="txt")
        return res

    def get_result_types(self, jobid):
        """Get the available result types for a finished job.

        :param str jobid: job identifier.
        :return: a list of wsResultType data structures describing the available result types.
        """
        res = self.services.http_get("resulttypes/{}".format(jobid),
                                     frmt="json")
        return [x['identifier'] for x in res["types"]]

    def get_result(self, jobid, result_type="out"):
        """Get the result of a job of the specified type.

        :param str jobid: job identifier.
        :param parameters: optional list of wsRawOutputParameter used to
            provide additional parameters for derived result types.
        """
        if self.get_status(jobid) != 'FINISHED':
            self.services.logging.warning(
                "Your job is not finished yet. Try again later.")
            return

        #result_types = self.get_result_types(jobid)
        #assert parameters in result_types
        res = self.services.http_get("result/{}/{}".format(jobid, result_type),
                                     frmt="txt")

        return res
Пример #29
0
class ENA():
    """Interface to `ChEMBL <http://www.ebi.ac.uk/ena/index.php>`_

    Here is a quick example to retrieve a target given its ChEMBL Id

    .. doctest::

        >>> from bioservices import ENQ
        >>> s = ENA(verbose=False)


    Retrieve read domain metadata in XML format::

        print(e.get_data('ERA000092', 'xml'))

    Retrieve assemble and annotated sequences in fasta format::

        print(e.get_data('A00145', 'fasta'))

    The range parameter can be used in combination to retrieve a subsequence
    from sequence entry A00145 from bases 3 to 63 using ::

        e.get_data('A00145', 'fasta', fasta_range=[3,63])

    Retrieve assembled and annotated subsequences in HTML format (same
    as above but in HTML page).

        e.view_data('A00145')


    Retrieve expanded CON records:

    To retrieve expanded CON records use the expanded=true parameter. For
    example, the expanded CON entry AL513382 in flat file format can be i
    obtained as follows::

        e.get_data('AL513382', frmt='text', expanded=True)

    Expanded CON records are different from CON records in two ways.
    Firstly, the expanded CON records contain the full sequence in addition
    to the contig assembly instructions. Secondly, if a CON record contains
    only source or gap features the expanded CON records will also display
    all features from the segment records.

    Retrieve assembled and annotated sequence header in flat file format

    To retrieve assembled and annotated sequence header in flat file
    format please use the header=true parameter, e.g.:

        e.get_data('BN000065', 'text', header=True)


    Retrieve assembled and annotated sequence records using sequence
    versions::

        e.get_data('AM407889.1', 'fasta')
        e.get_data('AM407889.2', 'fasta')


    """
    url = "http://www.ebi.ac.uk/ena/browser/api"

    def __init__(self, verbose=False, cache=False):
        """**Constructor**

        :param verbose: set to False to prevent informative messages
        """
        self.services = REST(name="ENA",
                             url=ENA.url,
                             verbose=verbose,
                             cache=cache)
        self.services.TIMEOUT = 100

    def get_data(self,
                 identifier,
                 frmt,
                 fasta_range=None,
                 expanded=None,
                 header=None,
                 download=None):
        """

        :param frmt : xml, text, fasta, fastq, html, embl but does depend on the    
            entry

        Example:

            get_data("/AL513382", "embl")

        ENA API changed in 2020 but we tried to keep the same services in this
        method.
        """

        url = f"{self.url}/{frmt}/{identifier}"

        if frmt in ['text', 'fasta', 'fastq']:
            res = self.services.http_get(url, frmt="txt")
        elif frmt in ['html']:
            res = self.services.http_get(url, frmt="default")
        elif frmt in ['xml']:
            res = self.services.http_get(url, frmt="xml")
        return res

    def data_warehouse(self):
        #http://www.ebi.ac.uk/ena/data/warehouse/search?query="geo_circ(-0.587,-90.5713,170)"&result=sequence_release&display=text&download=gzip
        pass

    def get_taxon(self, taxon):
        print("deprecated since v.7.8 due to ENA update")
Пример #30
0
class ArrayExpress():
    """Interface to the `ArrayExpress <http://www.ebi.ac.uk/arrayexpress>`_ service

    ArrayExpress allows to retrieve data sets used in various experiments.

    **QuickStart** Given an experiment name (e.g., E-MEXP-31), type::

        s = ArrayExpress()
        s.getAE('E-MEXP-31')

    You can also quickyl retrieve experiments matching some search queries as
    follows::

        a.queryAE(keywords="pneumonia", species='h**o+sapiens')

    Now let us look at other methods.If you know the file and experiment
    name, you can retrieve a specific file as follows::

        >>> from bioservices import ArrayExpress
        >>> s = ArrayExpress()
        >>> # retrieve a specific file from a experiment
        >>> res = s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt")

    The main issue is that you may not know the experiment you are looking for.
    You can query experiments by keyword::

        >>> # Search for experiments
        >>> res = s.queryExperiments(keywords="cancer+breast", wholewords=True)

    keywords used in queries follows these rules:

    * Accession number and keyword searches are case insensitive
    * More than one keyword can be searched for using the + sign (e.g. keywords="cancer+breast")
    * Use an asterisk as a multiple character wild card (e.g. keywords="colo*")
    * use a question mark ? as a single character wild card (e.g. keywords="te?t")

    More complex queries can be constructed using the operators AND, OR or NOT.
    AND is the default if no operator is specified. Either experiments or
    files can be searched for. Examples are::

        keywords="prostate+AND+breast"
        keywords="prostate+breast"      # same as above
        keywords="prostate+OR+breast"
        keywords="prostate+NOT+breast "

    The returned objects are XML parsed with beautifulSoup. You can get all
    experiments using the getChildren method:

    .. doctest::
        :options: +SKIP

        >>> res = s.queryExperiments(keywords="breast+cancer")
        >>> len(res.getchildren())
        1487


    If you know what you are looking for, you can give the experiment name::

        >>> res = s.retrieveExperiment("E-MEXP-31")
        >>> exp = res.getchildren()[0]   # it contains only one experiment
        >>> [x.text for x in exp.getchildren() if x.tag == "name"]
        ['Transcription profiling of mammalian male germ cells undergoing mitotic
        growth, meiosis and gametogenesis in highly enriched cell populations']

    Using the same example, you can retrieve the names of the files related to
    the experiment::

        >>> files = [x.getchildren() for x in exp.getchildren() if x.tag == "files"]
        >>> [x.get("name") for x in files[0]]
        ['E-MEXP-31.raw.1.zip',
         'E-MEXP-31.processed.1.zip',
         'E-MEXP-31.idf.txt',
         'E-MEXP-31.sdrf.txt']

    New in version 1.3.7 you can use the method :meth:`getEA`

    Then, you may want to download a particular file::

        >>> s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt")


    .. seealso:: :meth:`queryFiles` for more details about the parameters to be
        used in queries.

    .. warning:: supports only new style (v2). You can still use the old style by
        setting the request manually using the :meth:`version`.

    .. warning:: some syntax requires the + character, which is a special character
        for http requests. It is replaced internally by spaces if found
    .. warning:: filtering is not implemented (e.g., assaycount:[x TO y]syntax.)
    """
    def __init__(self, verbose=False, cache=False):
        """.. rubric:: Constructor

        :param bool verbose: prints informative messages

        """
        self.services = REST(name="ArrayExpress",
                             url="http://www.ebi.ac.uk/arrayexpress",
                             cache=cache,
                             verbose=verbose)

        self.version = "v2"

    def _search(self, mode, **kargs):
        """common function to search for files or experiments"""
        assert mode in ["experiments", "files"]
        url = "{0}/{1}/{2}".format("json", self.version, mode)

        defaults = {
            "accession":
            None,  #ex: E-MEXP-31
            "keywords":
            None,
            "species":
            None,
            "wholewords":
            "on",
            "expdesign":
            None,
            "exptype":
            None,
            "gxa":
            "true",
            "pmid":
            None,
            "sa":
            None,
            "ef":
            None,  # e.g., CellType
            "efv":
            None,  # e.g., HeLa
            "array":
            None,  # ex: A-AFFY-33
            "expandfo":
            "on",
            "directsub":
            "true",
            "sortby": [
                "accession", "name", "assays", "species", "releasedate",
                "fgem", "raw", "atlas"
            ],
            "sortorder": ["ascending", "descending"],
        }

        for k in kargs.keys():
            if k not in defaults.keys():
                raise ValueError(
                    "Incorrect value provided ({}). Correct values are {}".
                    format(k, sorted(defaults.keys())))

        #if len(kargs.keys()):
        #    url += "?"
        params = {}

        for k, v in kargs.items():
            if k in ["expandfo", "wholewords"]:
                if v in ["on", True, "true", "TRUE", "True"]:
                    #params.append(k + "=on")
                    params[k] = "on"
            elif k in ["gxa", "directsub"]:
                if v in ["on", True, "true", "TRUE", "True"]:
                    #params.append(k + "=true")
                    params[k] = "true"
                elif v in [False, "false", "False"]:
                    #params.append(k + "=false")
                    params[k] = "false"
                else:
                    raise ValueError("directsub must be true or false")
            else:
                if k in ["sortby", "sortorder"]:
                    self.services.devtools.check_param_in_list(v, defaults[k])
                #params.append(k + "=" + v)
                params[k] = v

        # NOTE: + is a special character that is replaced by %2B
        # The + character is the proper encoding for a space when quoting
        # GET or POST data. Thus, a literal + character needs to be escaped
        # as well, lest it be decoded to a space on the other end
        for k, v in params.items():
            params[k] = v.replace("+", " ")

        self.services.logging.info(url)
        res = self.services.http_get(url, frmt="json", params=params)
        return res

    def queryFiles(self, **kargs):
        """Retrieve a list of files associated with a set of experiments

        The following parameters are used to search for experiments/files:

        :param str accession: experiment primary or secondary accession e.g. E-MEXP-31
        :param str array: array design accession or name e.g., A-AFFY-33
        :param str ef: Experimental factor, the name of the main variables in an
            experiment. (e.g., CellType)
        :param str efv:  Experimental factor value. Has EFO expansion. (e.g.,
            HeLa)
        :param str expdesign: Experiment design type  (e.g., "dose+response")
        :param str exptype:  Experiment type. Has EFO expansion. (e.g.,
            "RNA-seq")
        :param str gxa: Presence in the Gene Expression Atlas. Only value is gxa=true.
        :param str keywords: e.g. "cancer+breast"
        :param str pmid: PubMed identifier (e.g., 16553887)
        :param str sa: Sample attribute values. Has EFO expansion. fibroblast
        :param str species: Species of the samples.Has EFO expansion. (e.g., "h**o+sapiens")
        :param bool wholewords:

        The following parameters can filter the experiments:

        :param str directsub: only experiments directly submitted to
            ArrayExpress (true) or only imported from GEO databae (false)


        The following parameters can sort the results:

        :param str sortby: sorting by grouping (can be accession, name, assays,
            species, releasedata, fgem, raw, atlas)
        :param str sortorder: sorting by orderering. Can be either ascending or
            descending (default)

        .. doctest::
            :options: +SKIP

            >>> from bioservices import ArrayExpress
            >>> s = ArrayExpress()
            >>> res = s.queryFiles(keywords="cancer+breast", wholewords=True)
            >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o Sapiens")
            >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o Sapiens",
            ...                          sortorder="releasedate")
            >>> res = s.queryExperiments(array="A-AFFY-33", species="H**o+Sapiens",
            ...     expdesign="dose response", sortby="releasedate", sortorder="ascending")
            >>> dates = [x.findall("releasedate")[0].text for x in res.getchildren()]

        """
        res = self._search("files", **kargs)
        return res

    def queryExperiments(self, **kargs):
        """Retrieve experiments

        .. seealso:: :meth:`~bioservices.arrayexpress.ArrayExpress.queryFiles` for
            all possible keywords

        .. doctest::
            :options: +SKIP

            >>> res = s.queryExperiments(keywords="cancer+breast", wholewords=True)

        """
        res = self._search("experiments", **kargs)
        return res

    def retrieveExperiment(self, experiment):
        """alias to queryExperiments if you know the experiment name

        ::

            >>> s.retrieveExperiment("E-MEXP-31")
            >>> # equivalent to
            >>> s.queryExperiments(accession="E-MEXP-31")

        """
        res = self.queryExperiments(keywords=experiment)
        return res

    def retrieveFile(self, experiment, filename, save=False):
        """Retrieve a specific file from an experiment

        :param str filename:

        ::

            >>> s.retrieveFile("E-MEXP-31", "E-MEXP-31.idf.txt")
        """
        files = self.retrieveFilesFromExperiment(experiment)

        assert filename in files, """Error. Provided filename does not seem to be correct.
            Files available for %s experiment are %s """ % (experiment, files)

        url = "files/" + experiment + "/" + filename

        if save:
            res = self.services.http_get(url, frmt="txt")
            f = open(filename, "w")
            f.write(res)
            f.close()
        else:
            res = self.services.http_get(url, frmt="txt")
            return res

    def retrieveFilesFromExperiment(self, experiment):
        """Given an experiment, returns the list of files found in its description


        :param str experiment: a valid experiment name
        :return: the experiment files

        .. doctest::

            >>> from bioservices import ArrayExpress
            >>> s = ArrayExpress(verbose=False)
            >>> s.retrieveFilesFromExperiment("E-MEXP-31")
            ['E-MEXP-31.raw.1.zip', 'E-MEXP-31.processed.1.zip', 'E-MEXP-31.idf.txt', 'E-MEXP-31.sdrf.txt']


        """
        res = self.queryExperiments(keywords=experiment)
        exp = res['experiments']['experiment']
        files = exp['files']
        output = [v['name'] for k, v in files.items() if k]
        return output

    def queryAE(self, **kargs):
        """Returns list of experiments

        See :meth:`queryExperiments` for parameters and usage

        This is a wrapper around :meth:`queryExperiments` that returns only
        the accession values.

        ::

            a.queryAE(keywords="pneumonia", species='h**o+sapiens')
        """
        sets = self.queryExperiments(**kargs)
        return [x['accession'] for x in sets['experiments']['experiment']]

    def getAE(self, accession, type='full'):
        """retrieve all files from an experiments and save them locally"""
        filenames = self.retrieveFilesFromExperiment(accession)
        self.services.logging.info("Found %s files" % len(filenames))
        for i, filename in enumerate(filenames):
            res = self.retrieveFile(accession, filename)
            if filename.endswith('.zip'):
                with open(filename, 'wb') as fout:
                    self.services.logging.info("Downloading %s" % filename)
                    fout.write(res)
            else:
                with open(filename, 'w') as fout:
                    self.services.logging.info("Downloading %s" % filename)
                    fout.write(res)