Exemplo n.º 1
0
    def get_response(self,
                     path="",
                     q=None,
                     format=None,
                     http_method='get',
                     **params):
        url = self.url_prefix + path
        if q is not None:
            url += "/" + q
        if format is not None:
            url = url + "." + format
        if http_method == 'get':
            request = requests.get(url,
                                   params=params,
                                   headers={
                                       'User-Agent':
                                       get_user_agent(modules=[requests],
                                                      caller_name=__name__)
                                   })
        elif http_method == 'post':
            request = requests.post(url,
                                    data=params,
                                    headers={
                                        'User-Agent':
                                        get_user_agent(modules=[requests],
                                                       caller_name=__name__)
                                    })
        else:
            raise RequestException

        return request
Exemplo n.º 2
0
 def _ensure_file(self, file):
     logging.info("Ensure file: {}".format(file))
     if isinstance(file,str):
         # TODO Let's fix this if/elseif chain.
         if file.startswith("ftp"):
             f = tempfile.NamedTemporaryFile()
             fn = f.name
             cmd = ['wget',file,'-O',fn]
             subprocess.run(cmd, check=True)
             return open(fn,"r")
         elif file.startswith("http"):
             url = file
             with closing(requests.get(url, stream=False, headers={'User-Agent': get_user_agent(modules=[requests], caller_name=__name__)})) as resp:
                 logging.info("URL: {} STATUS: {} ".format(url, resp.status_code))
                 ok = resp.status_code == 200
                 if ok:
                     logging.debug("HEADER: {}".format(resp.headers))
                     if file.endswith(".gz"):
                         return io.StringIO(str(gzip.decompress(resp.content),'utf-8'))
                     else:
                         out = io.StringIO(resp.content)
                         return out
                 else:
                     return None
         else:
             logging.info("Testing suffix of {}".format(file))
             if file.endswith(".gz"):
                 return gzip.open(file, "rt")
             else:
                 return open(file, "r")
     else:
         return file
Exemplo n.º 3
0
def download_source_gafs(group_metadata, target_dir, exclusions=[]):
    gaf_urls = { data["dataset"]: data["source"] for data in group_metadata["datasets"] if data["type"] == "gaf" and data["dataset"] not in exclusions }

    click.echo("Found {}".format(", ".join(gaf_urls.keys())))
    downloaded_paths = {}
    for dataset, gaf_url in gaf_urls.items():
        path = os.path.join(target_dir, "groups", group_metadata["id"], "{}-src.gaf.gz".format(dataset))
        os.makedirs(os.path.split(path)[0], exist_ok=True)

        click.echo("Downloading source gaf to {}".format(path))
        if urllib.parse.urlparse(gaf_url)[0] in ["ftp", "file"]:
            urllib.request.urlretrieve(gaf_url, path)
        else:
            response = requests.get(gaf_url, stream=True, headers={'User-Agent': get_user_agent(modules=[requests], caller_name=__name__)})
            content_length = int(response.headers.get("Content-Length", None))

            with open(path, "wb") as downloaded:
                with click.progressbar(iterable=response.iter_content(chunk_size=512 * 1024), length=content_length, show_percent=True) as chunks:
                    for chunk in chunks:
                        if chunk:
                            downloaded.write(chunk)

        downloaded_paths[dataset] = path

    return downloaded_paths
Exemplo n.º 4
0
 def get_response(self, path="", q=None, format=None, **params):
     url = self.url_prefix + path
     if q is not None:
         url += "/" +q
     if format is not None:
         url = url  + "." + format
     r = requests.get(url, params=params, headers={'User-Agent': get_user_agent(name=NAME, version=VERSION, modules=[requests], caller_name=__name__)})
     return r
Exemplo n.º 5
0
 def create_from_remote_file(self, group, snapshot=True, **args):
     """
     Creates from remote GAF
     """
     import requests
     url = "http://snapshot.geneontology.org/annotations/{}.gaf.gz".format(group)
     r = requests.get(url, stream=True, headers={'User-Agent': get_user_agent(modules=[requests], caller_name=__name__)})
     p = GafParser()
     results = p.skim(r.raw)
     return self.create_from_tuples(results, **args)
Exemplo n.º 6
0
def get_ecomap_str(url):
    logger.info("Fetching ecomap from {}".format(url))
    with closing(
            requests.get(url,
                         stream=False,
                         headers={
                             'User-Agent':
                             get_user_agent(modules=[requests],
                                            caller_name=__name__)
                         })) as resp:
        # TODO: redirects
        if resp.status_code == 200:
            return resp.text
Exemplo n.º 7
0
 def _get_response(self, path="", q=None, format=None, **params):
     url = self.url
     if not url.endswith("/"):
         url += "/"
     url += path
     if q is not None:
         url += "/" + q
     if format is not None:
         url = url + "." + format
     r = requests.get(url,
                      params=params,
                      headers={
                          'User-Agent':
                          get_user_agent(modules=[requests],
                                         caller_name=__name__)
                      })
     return r
Exemplo n.º 8
0
def search_associations_go(
        subject_category=None,
        object_category=None,
        relation=None,
        subject=None,
        **kwargs):
    """
    Perform association search using Monarch golr
    """
    go_golr_url = "http://golr.geneontology.org/solr/"
    go_solr = pysolr.Solr(go_golr_url, timeout=5)
    go_solr.get_session().headers['User-Agent'] = get_user_agent(caller_name=__name__)
    return search_associations(subject_category,
                               object_category,
                               relation,
                               subject,
                               solr=go_solr,
                               field_mapping=goassoc_fieldmap(),
                               **kwargs)
Exemplo n.º 9
0
 - mapping to and from CURIEs used in Monarch
 - providing simple to call methods for common queries

TODO:

Return objects following the biolink/OBAN association model

"""
import SPARQLWrapper, logging

from biolink import NAME, VERSION
from ontobio.util.user_agent import get_user_agent

USER_AGENT = get_user_agent(name=NAME,
                            version=VERSION,
                            modules=[SPARQLWrapper],
                            caller_name=__name__)
sparql = SPARQLWrapper.SPARQLWrapper("http://query.wikidata.org/sparql",
                                     agent=USER_AGENT)


class PrefixMap:
    """
    Common SPARQL prefixes used by wikidata.

    Note we use the "trick" whereby an entire property URI can be encoded as a prefix.
    """
    def prefixes(self):
        return [
            attr for attr in dir(self)
            if not callable(getattr(self, attr)) and not attr.startswith("__")
Exemplo n.º 10
0
import git
from ontobio.util.user_agent import get_user_agent

NAME = "biolink-api"
VERSION = git.Repo(search_parent_directories=True).head.object.hexsha[:7]
USER_AGENT = get_user_agent(name=NAME, version=VERSION)