Exemplo n.º 1
0
def search_count(db, query):
    """TogoWS search count (returns an integer).

    db - database (string), see http://togows.dbcls.jp/search
    query - search term (string)

    You could then use the count to download a large set of search results in
    batches using the offset and limit options to Bio.TogoWS.search(). In
    general however the Bio.TogoWS.search_iter() function is simpler to use.
    """
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
        warnings.warn("TogoWS search does not officially support database '%s'. "
                      "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s/count" % (db, _quote(query))
    handle = _open(url)
    data = handle.read()
    handle.close()
    try:
        count = int(data.strip())
    except ValueError:
        raise ValueError("Expected an integer from URL %s, got: %r" % (url, data))
    return count
Exemplo n.º 2
0
def search_count(db, query):
    """TogoWS search count (returns an integer).

    db - database (string), see http://togows.dbcls.jp/search
    query - search term (string)

    You could then use the count to download a large set of search results in
    batches using the offset and limit options to Bio.TogoWS.search(). In
    general however the Bio.TogoWS.search_iter() function is simpler to use.
    """
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
        warnings.warn(
            "TogoWS search does not officially support database '%s'. "
            "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s/count" % (db, _quote(query))
    handle = _open(url)
    data = handle.read()
    handle.close()
    try:
        count = int(data.strip())
    except ValueError:
        raise ValueError("Expected an integer from URL %s, got: %r" %
                         (url, data))
    return count
Exemplo n.º 3
0
def entry(db, id, format=None, field=None):
    """TogoWS fetch entry (returns a handle).

    db - database (string), see list below.
    id - identier (string) or a list of identifiers (either as a list of
         strings or a single string with comma separators).
    format - return data file format (string), options depend on the database
             e.g. "xml", "json", "gff", "fasta", "ttl" (RDF Turtle)
    field - specific field from within the database record (string)
            e.g. "au" or "authors" for pubmed.

    At the time of writing, this includes the following:

    KEGG: compound, drug, enzyme, genes, glycan, orthology, reaction,
          module, pathway
    DDBj: ddbj, dad, pdb
    NCBI: nuccore, nucest, nucgss, nucleotide, protein, gene, onim,
          homologue, snp, mesh, pubmed
    EBI:  embl, uniprot, uniparc, uniref100, uniref90, uniref50

    For the current list, please see http://togows.dbcls.jp/entry/

    This function is essentially equivalent to the NCBI Entrez service
    EFetch, available in Biopython as Bio.Entrez.efetch(...), but that
    does not offer field extraction.
    """
    global _entry_db_names, _entry_db_fields, fetch_db_formats
    if _entry_db_names is None:
        _entry_db_names = _get_entry_dbs()
    if db not in _entry_db_names:
        raise ValueError("TogoWS entry fetch does not officially support "
                         "database '%s'." % db)
    if field:
        try:
            fields = _entry_db_fields[db]
        except KeyError:
            fields = _get_entry_fields(db)
            _entry_db_fields[db] = fields
        if db == "pubmed" and field == "ti" and "title" in fields:
            #Backwards compatibility fix for TogoWS change Nov/Dec 2013
            field = "title"
            import warnings
            warnings.warn("TogoWS dropped 'pubmed' field alias 'ti', please use 'title' instead.")
        if field not in fields:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "field '%s' for database '%s'. Only: %s"
                             % (field, db, ", ".join(sorted(fields))))
    if format:
        try:
            formats = _entry_db_formats[db]
        except KeyError:
            formats = _get_entry_formats(db)
            _entry_db_formats[db] = formats
        if format not in formats:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "format '%s' for database '%s'. Only: %s"
                             % (format, db, ", ".join(sorted(formats))))

    if isinstance(id, list):
        id = ",".join(id)
    url = _BASE_URL + "/entry/%s/%s" % (db, _quote(id))
    if field:
        url += "/" + field
    if format:
        url += "." + format
    return _open(url)
Exemplo n.º 4
0
def search(db, query, offset=None, limit=None, format=None):
    """TogoWS search (returns a handle).

    This is a low level wrapper for the TogoWS search function, which
    can return results in a several formats. In general, the search_iter
    function is more suitable for end users.

    db - database (string), see http://togows.dbcls.jp/search/
    query - search term (string)
    offset, limit - optional integers specifying which result to start from
            (1 based) and the number of results to return.
    format - return data file format (string), e.g. "json", "ttl" (RDF)
             By default plain text is returned, one result per line.

    At the time of writing, TogoWS applies a default count limit of 100
    search results, and this is an upper bound. To access more results,
    use the offset argument or the search_iter(...) function.

    TogoWS supports a long list of databases, including many from the NCBI
    (e.g. "ncbi-pubmed" or "pubmed", "ncbi-genbank" or "genbank", and
    "ncbi-taxonomy"), EBI (e.g. "ebi-ebml" or "embl", "ebi-uniprot" or
    "uniprot, "ebi-go"), and KEGG (e.g. "kegg-compound" or "compound").
    For the current list, see http://togows.dbcls.jp/search/

    The NCBI provide the Entrez Search service (ESearch) which is similar,
    available in Biopython as the Bio.Entrez.esearch() function.

    See also the function Bio.TogoWS.search_count() which returns the number
    of matches found, and the Bio.TogoWS.search_iter() function which allows
    you to iterate over the search results (taking care of batching for you).
    """
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
        warnings.warn("TogoWS search does not explicitly support database '%s'. "
                      "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s" % (db, _quote(query))
    if offset is not None and limit is not None:
        try:
            offset = int(offset)
        except:
            raise ValueError("Offset should be an integer (at least one), not %r" % offset)
        try:
            limit = int(limit)
        except:
            raise ValueError("Limit should be an integer (at least one), not %r" % limit)
        if offset <= 0:
            raise ValueError("Offset should be at least one, not %i" % offset)
        if limit <= 0:
            raise ValueError("Count should be at least one, not %i" % limit)
        url += "/%i,%i" % (offset, limit)
    elif offset is not None or limit is not None:
        raise ValueError("Expect BOTH offset AND limit to be provided (or neither)")
    if format:
        url += "." + format
    #print(url)
    return _open(url)
Exemplo n.º 5
0
def entry(db, id, format=None, field=None):
    """TogoWS fetch entry (returns a handle).

    db - database (string), see list below.
    id - identier (string) or a list of identifiers (either as a list of
         strings or a single string with comma separators).
    format - return data file format (string), options depend on the database
             e.g. "xml", "json", "gff", "fasta", "ttl" (RDF Turtle)
    field - specific field from within the database record (string)
            e.g. "au" or "authors" for pubmed.

    At the time of writing, this includes the following:

    KEGG: compound, drug, enzyme, genes, glycan, orthology, reaction,
          module, pathway
    DDBj: ddbj, dad, pdb
    NCBI: nuccore, nucest, nucgss, nucleotide, protein, gene, onim,
          homologue, snp, mesh, pubmed
    EBI:  embl, uniprot, uniparc, uniref100, uniref90, uniref50

    For the current list, please see http://togows.dbcls.jp/entry/

    This function is essentially equivalent to the NCBI Entrez service
    EFetch, available in Biopython as Bio.Entrez.efetch(...), but that
    does not offer field extraction.
    """
    global _entry_db_names, _entry_db_fields, fetch_db_formats
    if _entry_db_names is None:
        _entry_db_names = _get_entry_dbs()
    if db not in _entry_db_names:
        raise ValueError("TogoWS entry fetch does not officially support "
                         "database '%s'." % db)
    if field:
        try:
            fields = _entry_db_fields[db]
        except KeyError:
            fields = _get_entry_fields(db)
            _entry_db_fields[db] = fields
        if db == "pubmed" and field == "ti" and "title" in fields:
            #Backwards compatibility fix for TogoWS change Nov/Dec 2013
            field = "title"
            import warnings
            warnings.warn(
                "TogoWS dropped 'pubmed' field alias 'ti', please use 'title' instead."
            )
        if field not in fields:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "field '%s' for database '%s'. Only: %s" %
                             (field, db, ", ".join(sorted(fields))))
    if format:
        try:
            formats = _entry_db_formats[db]
        except KeyError:
            formats = _get_entry_formats(db)
            _entry_db_formats[db] = formats
        if format not in formats:
            raise ValueError("TogoWS entry fetch does not explicitly support "
                             "format '%s' for database '%s'. Only: %s" %
                             (format, db, ", ".join(sorted(formats))))

    if isinstance(id, list):
        id = ",".join(id)
    url = _BASE_URL + "/entry/%s/%s" % (db, _quote(id))
    if field:
        url += "/" + field
    if format:
        url += "." + format
    return _open(url)
Exemplo n.º 6
0
def search(db, query, offset=None, limit=None, format=None):
    """TogoWS search (returns a handle).

    This is a low level wrapper for the TogoWS search function, which
    can return results in a several formats. In general, the search_iter
    function is more suitable for end users.

    db - database (string), see http://togows.dbcls.jp/search/
    query - search term (string)
    offset, limit - optional integers specifying which result to start from
            (1 based) and the number of results to return.
    format - return data file format (string), e.g. "json", "ttl" (RDF)
             By default plain text is returned, one result per line.

    At the time of writing, TogoWS applies a default count limit of 100
    search results, and this is an upper bound. To access more results,
    use the offset argument or the search_iter(...) function.

    TogoWS supports a long list of databases, including many from the NCBI
    (e.g. "ncbi-pubmed" or "pubmed", "ncbi-genbank" or "genbank", and
    "ncbi-taxonomy"), EBI (e.g. "ebi-ebml" or "embl", "ebi-uniprot" or
    "uniprot, "ebi-go"), and KEGG (e.g. "kegg-compound" or "compound").
    For the current list, see http://togows.dbcls.jp/search/

    The NCBI provide the Entrez Search service (ESearch) which is similar,
    available in Biopython as the Bio.Entrez.esearch() function.

    See also the function Bio.TogoWS.search_count() which returns the number
    of matches found, and the Bio.TogoWS.search_iter() function which allows
    you to iterate over the search results (taking care of batching for you).
    """
    global _search_db_names
    if _search_db_names is None:
        _search_db_names = _get_fields(_BASE_URL + "/search")
    if db not in _search_db_names:
        #TODO - Make this a ValueError? Right now despite the HTML website
        #claiming to, the "gene" or "ncbi-gene" don't work and are not listed.
        import warnings
        warnings.warn(
            "TogoWS search does not explicitly support database '%s'. "
            "See %s/search/ for options." % (db, _BASE_URL))
    url = _BASE_URL + "/search/%s/%s" % (db, _quote(query))
    if offset is not None and limit is not None:
        try:
            offset = int(offset)
        except:
            raise ValueError(
                "Offset should be an integer (at least one), not %r" % offset)
        try:
            limit = int(limit)
        except:
            raise ValueError(
                "Limit should be an integer (at least one), not %r" % limit)
        if offset <= 0:
            raise ValueError("Offset should be at least one, not %i" % offset)
        if limit <= 0:
            raise ValueError("Count should be at least one, not %i" % limit)
        url += "/%i,%i" % (offset, limit)
    elif offset is not None or limit is not None:
        raise ValueError(
            "Expect BOTH offset AND limit to be provided (or neither)")
    if format:
        url += "." + format
    #print(url)
    return _open(url)