Пример #1
0
    def Term(self, goid, frmt="oboxml"):
        """
        Obtain Term information

        :param goid: go id to retrieve
        :param str frmt: the output format (mini, obo, oboxml).

        The format can be:

        * mini:   Mini HTML, suitable for dynamically embedding in popup boxes.
        * obo:    OBO format snippet.
        * oboxml: OBO XML format snippet.

            import QuickGO
            s = QuickGO()
            s.Term("GO:0003824")


        """
        check_param_in_list(frmt, ["mini", "obo", "oboxml"])
        if goid.startswith("GO:") is False:
            raise ValueError("GO id must start with 'GO:'")

        params = {'id': goid, 'format': frmt}
        res = self.http_get("GTerm", frmt="xml", params=params)

        return res
Пример #2
0
 def smiles_query(self, query, search_type='exact', similarity=None):
     __valid_search_type = [
         'exact', 'substructure', 'superstructure', 'similarity'
     ]
     check_param_in_list(search_type, __valid_search_type)
     params = {'smiles': query, 'seach_type': search_type}
     if similarity is not None:
         check_range(similarity, 0, 1)
         params['similarity'] = similarity
     url = 'smilesQuery'
     res = self.http_get(query=url, frmt='xml', params=params)
     res = self.easyXML(res)
     return res
Пример #3
0
    def retrieve(self, uniprot_id, frmt="xml"):
        """
        Search for a uniprot ID in UniprotKB database

        u = UniProt()
        res = u.retrieve("P09958", frmt="xml")
        fasta = u.retrieve([u'P29317', u'Q5BKX8', u'Q8TCD6'], frmt='fasta')
        print(fasta[0])
        :param uniprot_id:
        :param frmt:
        """
        _valid_formats = ['txt', 'xml', 'rdf', 'gff', 'fasta']
        check_param_in_list(frmt, _valid_formats)
        queries = tolist(uniprot_id)

        url = ["uniprot/" + query + '.' + frmt for query in queries]
        res = self.http_get(url, frmt="txt", headers=self.__headers)
        if frmt == "xml":
            res = [self.easyXML(x) for x in res]
        if isinstance(res, list) and len(res) == 1:
            res = res[0]
        return res
Пример #4
0
    def EFetch(self, db, id, retmode='text', **kwargs):
        """
        Return formatted data records for a list of input id
        :param retmode: text, xml not recommended
        :param db: Database from which to retrieve UIDs, must be a valid entrez database
        :param id: UID list, limited to 200
        :param kwargs: rettype, could be fasta, summar
        """
        _valid_opt_param = [
            'query_key', 'WebEnv', 'retmode', 'rettype', 'retstart', 'retmax',
            'strand', 'seq_start', 'seq_stop', 'complexity'
        ]
        params = {
            'db': db,
            'id': id,
            'retmode': retmode,
            'tool': self.tool,
            'email': self.email
        }

        url = 'efetch.fcgi'
        for key, value in kwargs.items():
            if key in _valid_opt_param:
                if key is 'strand':
                    check_param_in_list(value, [1, 2])
                    params[key] = value
                else:
                    raise ValueError('Strand must be 0 or 1')
                if key is 'complexity':
                    check_param_in_list(value, [0, 1, 2, 3, 4])
                    params[key] = value
                else:
                    raise ValueError(
                        "invalid complexity. must be a number in 0,1,2,3,4")
                params[key] = value
        res = self.http_get(url, frmt=retmode, params=params)
        return res
Пример #5
0
    def Annotation(self, goid=None, protein=None, frmt="tsv", limit=10000, gz=False, col=None, db=None, aspect=None,
                   termUse=None, evidence=None, source=None, ref=None, tax=9606, qualifier=None):
        """
        Calling the Annotation service
        Mutual exclusive parameters are goid, protein

        :param col: This parameter, which is currently only applicable to the tsv download format, allows you to specify
            a comma-separated list of columns that you want to be included in the returned data set.
            The list below shows the available column names; clicking on the name of a column will take you to the
            description of the column in the QuickGO help file. The default set of columns is shown in bold text.
        :param tax: NCBI taxonomic identifer of annotated protein
        :param protein: Specifies one or more sequence identifiers or accessions from available database(s)
        (see DB filter column)
        :param limit: download limit (number of lines) (default 10,000 rows,
            which may not be sufficient for the data set that you are
            downloading. To bypass this default, and return the entire data set,
            specify a limit of -1).
        :param frmt: one of "gaf", "gene2go", "proteinList", "fasta",
            "tsv" or "dict". The "dict" argument is the default and is a
            python dictionary.
        :param gz: gzips the downloaded file.
        :param goid: GO identifiers either directly or indirectly
            (descendant GO identifiers) applied in annotations.
        :param aspect: use this to limit the annotations returned to a
            specific ontology or ontologies (Molecular Function, Biological
            Process or Cellular Component). The valid character can be F,P,C.
        :param termUse:  if you set this parameter to slim, then QuickGO will
            use the supplied set of GO identifiers as a slim and will map the
            annotations up to these terms. See here for more details:
            http://www.ebi.ac.uk/QuickGO/GMultiTerm
        :param db: protein database (identifier type). Can be UniProtKB, UniGene, Ensembl.
        :param evidence: annotation evidence code category (Ev). Example of
            valid evidence are: be IDA, IC, ISS, IEA, IPI, ND, IMP, ISO, IGI
            should be either a string with comma separated values (e.g.,
            IEA,IDA) or a list of strings (e.g. ["IEA","IDA"]).
        :param source: annotation provider. Examples are 'InterPro', 'UniPathway',
            'MGI', 'FlyBase', 'GOC', 'Source', 'UniProtKB', 'RGD', 'ENSEMBL',
            'ZFIN', 'IntAct'.
        :param ref: PubMed or GO reference supporting annotation. Can refer to a
            specific reference identifier or category (for category level, use
            `*`  after ref type). Can be 'PUBMED:`*`', 'GO_REF:0000002'.
        :param qualifier: tags that modify the interpretation of an annotation.
             Examples are NOT, colocalizes_with, contributes_to.

            * Any number of fields can be specified; they will be AND'ed together.
            * Any number of values can be specified for each field; they will be OR'ed together.
            * Values should be URI encoded.

            print s.Annotation(protein='P12345', frmt='tsv', col="ref,evidence",
            ... ref='PMID:*')
            print s.Annotation(protein='P12345,Q4VCS5', frmt='tsv',
            ...     col="ref,evidence",ref='PMID:,Reactome:')



        """
        _valid_formats = ["gaf", "gpa", "gene2go", "proteinList", "fasta", "tsv"]
        _valid_db = ['UniProtKB', 'UniGene', 'Ensembl']
        _valid_aspect = ['P', 'F', 'C']

        check_param_in_list(frmt, _valid_formats)

        if isinstance(limit, int) is False:
            raise TypeError("limit parameter must be an integer greater than zero")

        # fill params with parameters that have default values.
        params = {'format': frmt, 'limit': limit}

        # beginning of the URL
        url = "GAnnotation?"

        # what is the ID being provided. We can have only one of:
        # protein, goid
        if protein is not None:
            url += "protein=" + protein
        elif goid is not None:
            url += "goid=" + goid
        elif tax is not None:
            url += "tax=" + str(tax)

        # need to check that there are mutualy exclusive
        if goid is None and protein is None and tax is None:
            raise ValueError("you must provide at least one of the following parameter: goid, protein")

        if aspect is not None:
            check_param_in_list(aspect, _valid_aspect)
            params['aspect'] = aspect

        if db is not None:
            check_param_in_list(db, _valid_db)
            params['db'] = db

        if termUse is not None:
            check_param_in_list(termUse, ["slim"])
            params['termUse'] = termUse

        if evidence:
            if isinstance(evidence, list):
                evidence = ",".join([x.strip() for x in evidence])
            elif isinstance(evidence, str):
                pass
            else:
                raise ValueError("Invalid parameter: evidence parameters must be a list of strings ['IDA','IEA'] or a "
                                 "string (e.g., 'IDA', 'IDA,IEA')")
            params['evidence'] = evidence

        if source:
            if isinstance(source, list):
                source = ",".join([x.strip() for x in source])
            elif isinstance(source, str):
                pass
            else:
                raise ValueError("Invalid parameter: source parameters must be a list of strings ['UniProtKB'] or a "
                                 "string (e.g., 'UniProtKB')")
            params['source'] = source

        if ref:
            if isinstance(ref, list):
                ref = ",".join([x.strip() for x in ref])
            elif isinstance(ref, str):
                pass
            else:
                raise ValueError("Invalid parameter: source parameters must be a list of strings ['PUBMED'] or a string "
                                 "(e.g., 'PUBMED:*') ")
            params['ref'] = ref

        if qualifier:
            # NOT, colocalizes_with, contributes_to
            if isinstance(qualifier, list):
                qualifier = ",".join([x.strip() for x in qualifier])
            elif isinstance(qualifier, str):
                pass
            params['qualifier'] = qualifier

        # col parameter
        if frmt == "tsv":
            if col is None:
                col = 'proteinDB,proteinID,proteinSymbol,qualifier,'
                col += 'goID,goName,aspect,evidence,ref,with,proteinTaxon,'
                col += 'date,from,splice,proteinName,proteinSynonym,proteinType,'
                col += 'proteinTaxonName,originalTermID,originalGOName'
            else:
                col = ",".join([x.strip() for x in col.split(",")])

            for c in col.split(','):
                check_param_in_list(c, self._valid_col)
            params["col"] = col

        if frmt not in ["tsv", "dict"]:
            # col is provided but format is not appropriate
            if col is not None:
                raise ValueError("You provided the 'col' parameter but the format is not correct. You should use the "
                                 "frmt='tsv' or frmt='dict' ")

        # gz parameter. do not expect values so need to be added afterwards.
        if gz is True:
            url += '&gz'

        res = self.http_get(url, frmt="txt", params=params)

        return res
Пример #6
0
    def retrieve(self,
                 service,
                 query,
                 methods='query',
                 output="tab25",
                 firstresult=None,
                 maxresults=None,
                 compressed=True):
        """
        Send a query to a specific database

        :param methods: interation , interactor or query
        :param maxresults: max results
        :param firstresult: pos of first result
        :param str service: a registered service. See :attr:`registry_names`.
        :param str query: a valid query. Can be `*` or a protein name.
        :param str output: a valid format. See s._formats
        :param compressed: gzipped or not data, speedup and requests unzipped auto

            s.query("intact", "brca2", "tab27")
            s.query("intact", "zap70", "xml25")
            s.query("matrixdb", "*", "xml25")

        This is the programmatic approach to this website:
        http://www.ebi.ac.uk/Tools/webservices/psicquic/view/main.xhtml

        Another example consist in accessing the *string* database for fetching
        protein-protein interaction data of a particular model organism. Here we
        restrict the query to 100 results::
            s.query("string", "species:10090", firstResult=0, maxResults=100, output="tab25")

        # spaces are automatically converted
            s.query("biogrid", "ZAP70 AND species:9606")

        warning:: AND must be in big caps. Some database are ore permissive
            than other (e.g., intact accepts "and"). species must be a valid ID number. Again, some DB are more
            permissive and may accept the name (e.g., human)

        To obtain the number of interactions in intact for the human specy::
            len(p.query("intact", "species:9606"))

        """
        if methods not in self._retrieve_methods:
            raise ValueError(
                "Retrieve methods {} don't exists".format(methods))

        if service not in self.activeDBs:
            raise ValueError("database %s not in active databases" % service)

        params = {}
        if output is not None:
            check_param_in_list(output, self.formats)
            params['format'] = output
        else:
            output = "none"

        resturl = self.__get_rest_url(service)

        if firstresult is not None:
            params['firstResult'] = firstresult
        if maxresults is not None:
            params['maxResults'] = maxresults
        if compressed:
            params['compressed'] = 'y'

        url = resturl + 'query/' + query

        if "xml" in output:
            res = self.http_get(url, frmt="xml", params=params)
        else:
            res = self.http_get(url, frmt="txt", params=params)
            if res is not None:
                res = res.strip().split("\n")

        if output.startswith("tab"):
            if res is not None:
                res = self._convert_tab2dict(res)

        return res
Пример #7
0
    def _search(self, mode, **kargs):
        """
        common function to search for files or experiments
        """
        assert mode in ["experiments", "files"]
        url = "{0}/{1}/{2}".format(self.format, self.version, mode)

        defaults = {
            "accession":
            None,  # ex: E-MEXP-31
            "keywords":
            None,
            "species":
            None,
            "wholewords":
            "on",
            "expdesign":
            None,
            "exptype":
            None,
            "gxa":
            "true",
            "pmid":
            None,
            "sa":
            None,
            "ef":
            None,  # e.g., CellType
            "efv":
            None,  # e.g., HeLa
            "array":
            None,  # ex: A-AFFY-33
            "expandfo":
            "on",
            "directsub":
            "true",
            "sortby": [
                "accession", "name", "assays", "species", "releasedate",
                "fgem", "raw", "atlas"
            ],
            "sortorder": ["ascending", "descending"],
        }

        for k in kargs.keys():
            check_param_in_list(k, list(defaults.keys()))

        # if len(kargs.keys()):
        # url += "?"
        params = {}

        for k, v in kargs.items():
            if k in ["expandfo", "wholewords"]:
                if v in ["on", True, "true", "TRUE", "True"]:
                    # params.append(k + "=on")
                    params[k] = "on"
            elif k in ["gxa", "directsub"]:
                if v in ["on", True, "true", "TRUE", "True"]:
                    # params.append(k + "=true")
                    params[k] = "true"
                elif v in [False, "false", "False"]:
                    # params.append(k + "=false")
                    params[k] = "false"
                else:
                    raise ValueError("directsub must be true or false")
            else:
                if k in ["sortby", "sortorder"]:
                    check_param_in_list(v, defaults[k])
                # params.append(k + "=" + v)
                params[k] = v

        # NOTE: + is a special character that is replaced by %2B
        # The + character is the proper encoding for a space when quoting
        # GET or POST data. Thus, a literal + character needs to be escaped
        # as well, lest it be decoded to a space on the other end
        for k, v in params.items():
            params[k] = v.replace("+", " ")

        res = self.http_get(url, frmt=self.format, params=params)
        if self.format == "xml":
            res = self.easyXML(res)
        return res
Пример #8
0
 def _set_format(self, f):
     check_param_in_list(f, ["json", "xml"])
     self._format = f
Пример #9
0
    def search(self,
               query,
               frmt="tab",
               columns=None,
               include=False,
               sort="score",
               compress=False,
               limit=None,
               offset=None):
        """
        Provide some interface to the uniprot search interface.
        :param str query: query must be a valid uniprot query.
        See http://www.uniprot.org/help/text-search, http://www.uniprot.org/help/query-fields
        See also example below
        :param str frmt: a valid format amongst html, tab, xls, asta, gff,
        txt, xml, rdf, list, rss. If tab or xls, you can also provide the
        columns argument. (default is tab)
        :param str columns: comma-separated list of values. Works only if fomat
        is tab or xls. For UnitProtKB, some possible columns are:
        id, entry name, length, organism. Some column name must be followed by
        database name (e.g., "database(PDB)"). Again, see uniprot website
        for more details. See also :attr:`~bioservices.uniprot.UniProt._valid_columns`
        for the full list of column keyword.
        :param bool include: include isoform sequences when the frmt
        parameter is fasta. Include description when frmt is rdf.
        :param str sort: by score by default. Set to None to bypass this behaviour
        :param bool compress: gzip the results
        :param int limit: Maximum number of results to retrieve.
        :param int offset: Offset of the first result, typically used together
        with the limit parameter.

        To obtain the list of uniprot ID returned by the search of zap70 can be
        retrieved as follows::
        u.search('zap70+AND+organism:9606', frmt='list')
        u.search("zap70+and+taxonomy:9606", frmt="tab", limit=3, columns="entry name,length,id, genes")

        Entry name Length Entry Gene names
        CBLB_HUMAN 982 Q13191 CBLB RNF56 Nbla00127
        CBL_HUMAN 906 P22681 CBL CBL2 RNF55
        CD3Z_HUMAN 164 P20963 CD247 CD3Z T3Z TCRZ
        other examples::
        u.search("ZAP70+AND+organism:9606", limit=3, columns="id,database(PDB)")

        You can also do a search on several keywords. This is especially useful
        if you have a list of known entry names.::
        u.search("ZAP70_HUMAN+or+CBL_HUMAN", frmt="tab", limit=3, columns="entry name,length,id, genes")

        Entry name Length Entry Gene names
        .. warning:: this function request seems a bit unstable (UniProt web issue ?)
        so we repeat the request if it fails
        .. warning:: some columns although valid may not return anything, not even in
        the header: 'score', 'taxonomy', 'tools'. this is a uniprot feature
        """
        params = {}

        if frmt is not None:
            _valid_formats = [
                'tab', 'xls', 'fasta', 'gff', 'txt', 'xml', 'rss', 'list',
                'rss', 'html'
            ]
            check_param_in_list(frmt, _valid_formats)
            params['format'] = frmt

        if columns is not None:
            check_param_in_list(frmt, ["tab", "xls"])

            if "," in columns:
                columns = [x.strip() for x in columns.split(",")]
            else:
                columns = [columns]

            for col in columns:
                if col.startswith("database(") is True:
                    pass
                else:
                    check_param_in_list(col, self._valid_columns)

            params['columns'] = ",".join([x.strip() for x in columns])

        if include is True and frmt in ["fasta", "rdf"]:
            params['include'] = 'yes'

        if compress is True:
            params['compress'] = 'yes'

        if sort:
            check_param_in_list(sort, ["score"])
            params['sort'] = sort

        if offset is not None:
            if isinstance(offset, int):
                params['offset'] = offset

        if limit is not None:
            if isinstance(limit, int):
                params['limit'] = limit

        params['query'] = query.replace("+", " ")

        res = self.http_get("uniprot/",
                            frmt="txt",
                            params=params,
                            headers=self.__headers)
        return res
Пример #10
0
    def search(self,
               query,
               frmt='json',
               facets=None,
               number=None,
               filters=None):
        """
        Search for a complex inside intact complex.

        :param query: the query (e.g., ndc80)
        :param frmt: Defaults to json (could be a Pandas data frame if
            Pandas is installed; set frmt to 'pandas')
        :param facets: lists of facets as a string (separated by comma)
        :param number:
        :param filters: list of filters.


            s = Intact()
            # search for ndc80
            s.search('ncd80')

            #  Search for ndc80 and facet with the species field:
            s.search('ncd80', facets='species_f')

            # Search for ndc80 and facet with the species and biological role fields:
            s.search('ndc80', facets='species_f,pbiorole_f')

            # Search for ndc80, facet with the species and biological role
            # fields and filter the species using human:
            s.search('Ndc80', first=0, number=10,
                filters='species_f:("H**o sapiens")',
                facets='species_f,ptype_f,pbiorole_f')

            # Search for ndc80, facet with the species and biological role
            # fields and filter the species using human or mouse:
            s.search('Ndc80, first=0, number=10,
                filters='species_f:("H**o sapiens" "Mus musculus")',
                facets='species_f,ptype_f,pbiorole_f')

            # Search with a wildcard to retrieve all the information:
            s.search('*')

            # Search with a wildcard to retrieve all the information and facet
            # with the species, biological role and interactor type fields:
            s.search('*', facets='species_f,pbiorole_f,ptype_f')

            # Search with a wildcard to retrieve all the information, facet with
            # the species, biological role and interactor type fields and filter
            # the interactor type using small molecule:
            s.search('*', facets='species_f,pbiorole_f,ptype_f',
                filters='ptype_f:("small molecule")'

            # Search with a wildcard to retrieve all the information, facet with
            # the species, biological role and interactor type fields and filter
            # the interactor type using small molecule and the species using human:
            s.search('*', facets='species_f,pbiorole_f,ptype_f',
                filters='ptype_f:("small molecule"),species_f:("H**o sapiens")')

            # Search for GO:0016491 and paginate (first is for the offset and number
            # is how many do you want):
            s.search('GO:0016491', first=10, number=10)
        """
        check_param_in_list(frmt, ['pandas', 'json'])

        # note that code format to be json, which is the only option so
        # we can use pandas as a frmt without addition code.
        params = {
            'format': 'json',
            'facets': facets,
            'first': None,
            'number': number,
            'filters': filters
        }

        result = self.http_get('search/' + query, frmt="json", params=params)

        # if isinstance(result, int):
        #    raise ValueError("Got a number from Intact request. Check validity of the arguments ")

        if frmt == 'pandas':
            import pandas as pd
            df = pd.DataFrame(result['elements'])
            return df
        else:
            return result