Пример #1
0
def phewas():
  builds = request.args.getlist("build")
  if len(builds) == 0:
    raise FlaskException("Must provide build parameter",400)

  filter_str = request.args.get("filter")
  if filter_str is None:
    raise FlaskException("No filter string specified",400)

  fparser = FilterParser()
  stmts = fparser.statements(filter_str)
  variant = getattr(stmts.get("variant"),"value",None)
  if variant is None:
    raise FlaskException(400,"Must provide a filter string with field 'variant' specified")

  db_cols = ["id","description","study","trait","trait_label","trait_group","tech","build","pmid",
             "variant","chromosome","position","ref_allele",
             "ref_allele_freq","log_pvalue","beta","se","score_test_stat"]

  sql = """
    SELECT
      sa.id,sa.study,sa.trait,traits.label as trait_label,traits.grouping as trait_group,sa.tech,sa.build,sa.analysis as description,sa.pmid,
      sr.variant_name as variant,sr.chrom as chromosome,sr.pos as position,sr.ref_allele,sr.ref_freq as ref_allele_freq,
      sr.log_pvalue,sr.beta,sr.se,sr.score_stat as score_test_stat
    FROM rest.assoc_master sa
      JOIN rest.assoc_results sr ON sa.id = sr.id
      LEFT JOIN rest.traits ON sa.trait = traits.trait
    WHERE variant_name = :vname
      AND sa.build = ANY(:builds)
      AND traits.grouping IS NOT NULL
      AND traits.label IS NOT NULL
    ORDER BY log_pvalue DESC;
  """

  return_fmt = request.args.get("format")
  if return_fmt is None or return_fmt == "":
    return_fmt = "table"

  if return_fmt not in ("table","objects"):
    raise FlaskException(400,"format must be either 'table' or 'objects'")

  cur = g.db.connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
  cur.callproc("rest.phewas_query",[variant,builds])
  data = reshape_data(cur,db_cols,None,return_fmt)
  return jsonify({
    "meta": {
      "build": builds
    },
    "data": data,
    "lastPage": None
  })
Пример #2
0
def invalid_request_check():
  illegal_words = ["null", "undefined", "nan", "none", "\n", "\\n", ";", "="]

  filter_str = request.args.get("filter")
  if filter_str is not None:
    filter_str = filter_str.lower()
    for w in illegal_words:
      if w in filter_str:
        raise FlaskException(f"Invalid string {w} found in filter string", 400)
Пример #3
0
def single_results():
  db_table = "rest.assoc_results"
  db_cols = "id variant_name chrom pos ref_allele ref_freq log_pvalue beta se score_stat".split()

  field_to_col = dict(
    analysis = "id",
    variant = "variant_name",
    chromosome = "chrom",
    position = "pos",
    score_test_stat = "score_stat",
    ref_allele_freq = "ref_freq"
  )

  limit = request.args.get("limit")
  try:
    if limit is not None:
      limit = int(limit)
  except:
    raise FlaskException("Invalid limit parameter, must be integer",400)

  return std_response(db_table,db_cols,field_to_col,limit=limit)
Пример #4
0
def get_metadata(dbid, table, schema="rest", rename=None):
  """
  Get metadata about a dataset ID (or list of IDs) from a given master table
  :param dbid: int or list of int dataset IDs
  :param table: master table, e.g. 'rest.gwascat_master' or 'rest.gene_master'
  :param schema: schema that master table belongs to
  :param rename: dictionary of old field -> new field name to return in metadata
  :return: List of dictionaries with information for each ID
  """
  if rename is None:
    rename = {}

  if isinstance(dbid, int):
    ids = (dbid,)
  else:
    try:
      ids = tuple(int(x) for x in dbid)
    except:
      raise FlaskException("Invalid ID type when retrieving metadata", 500)

  query = psycopg2.sql.SQL("SELECT * FROM {}.{} WHERE id in %s").format(
    psycopg2.sql.Identifier(schema),
    psycopg2.sql.Identifier(table)
  )
  cur = g.db.connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
  cur.execute(query, (ids,))
  results = cur.fetchall()
  if results is None or len(results) == 0:
    return None
  else:
    final = list()
    for i, row in enumerate(results):
      final.append(dict())
      for k, v in results[i].items():
        if k in rename:
          k = rename[k]

        final[i][k] = v

  return final
Пример #5
0
def genes():
  db_table = "rest.gene_data"
  db_cols = "id feature_type gene_id gene_name chrom start end strand transcript_id exon_id annotation".split()

  field_to_col = {
    "source": "id"
  }

  orig_filter = request.args.get("filter")
  if orig_filter is None:
    raise FlaskException("Filter is a required parameter for this endpoint")

  fp = FilterParser()
  orig_stmts = fp.statements(orig_filter)
  build = request.args.get("build")
  if 'source' not in orig_stmts:
    if build is None:
      raise FlaskException("If no gene source ID is specified via filter parameter, the best recommended gene information source will "
                           "automatically be selected, but you *must* specify the build (genome build) query parameter at a minimum")

    allowed_builds = fetch_distinct_builds('gene_master')
    if build not in allowed_builds:
      raise FlaskException(f"Invalid build {build}, must be one of: {allowed_builds}")

    dataset_id = fetch_recommended_id(build, 'gene_master')
    if not dataset_id:
      raise FlaskException(f"No best recommended gene source is available for build {build}, try querying the metadata endpoint to see all available gene sources")

    orig_filter += f' and source eq {dataset_id}'
    sources = [dataset_id]
  else:
    sources = orig_stmts["source"].value
    if build is not None:
      for v in sources:
        allowed_build = fetch_build_for_id(v, "gene_master", "genome_build", "rest")
        if allowed_build != build:
          raise FlaskException(f"Invalid build {build} given for source ID {v}")

  sql_compiler = SQLCompiler()

  cols = "id gene_id gene_name chrom start end strand annotation".split()
  sql_stmt, sql_params = sql_compiler.to_sql(orig_filter,db_table,db_cols,cols,None,field_to_col)
  sql_stmt += " AND feature_type = 'gene'"

  cur = g.db.execute(text(sql_stmt),sql_params)
  dgenes = {}
  genes_arr = []
  for row in cur:
    gene_data = dict(zip(cols,row))
    annotation = gene_data.pop("annotation")
    gene_data["gene_type"] = annotation["gene_type"]
    gene = Gene(**gene_data)
    dgenes[gene_data["gene_id"]] = gene
    genes_arr.append(gene)

  # Now retrieve transcripts/exons
  # Only retrieve if there were genes found in the region
  skip_transcripts = request.args.get("transcripts","").lower() in ("f","false","no")
  if not skip_transcripts and len(genes_arr) > 0:
    cols = "id feature_type gene_id chrom start end strand transcript_id exon_id".split()
    trans_keys = "transcript_id chrom start end strand".split()
    exon_keys = "exon_id chrom start end strand".split()
    dtranscripts = {}

    sql_stmt = (
      "SELECT {} from {} "
      "WHERE id in :p1 AND gene_id IN :p2 AND feature_type IN ('transcript','exon') "
      "ORDER BY CASE feature_type WHEN 'transcript' THEN 1 WHEN 'exon' THEN 2 ELSE 3 END "
    ).format(",".join(map(sql_compiler.quote_keywords,cols)),db_table)

    sql_params = {
      "p1": tuple(sources),
      "p2": tuple(dgenes.keys()),
    }

    cur = g.db.execute(text(sql_stmt),sql_params)
    for row in cur:
      rowd = dict(zip(cols,row))

      if rowd["feature_type"] == "transcript":
        tx_id = row["transcript_id"]
        gene_id = row["gene_id"]
        chrom = row["chrom"]

        transcript = dtranscripts.get(tx_id,None)
        if transcript is None:
          transcript_data = {k: rowd[k] for k in trans_keys}
          transcript = Transcript(**transcript_data)
          dtranscripts[tx_id] = transcript
          dgenes[gene_id].add_transcript(transcript)

      elif rowd["feature_type"] == "exon":
        gene_id = row["gene_id"]
        tx_id = row["transcript_id"]
        exon_id = row["exon_id"]
        exon_data = {k: rowd[k] for k in exon_keys}
        exon = Exon(**exon_data)

        gene = dgenes.get(gene_id)
        if gene is not None:
          gene.add_exon(exon)

        transcript = dtranscripts.get(tx_id)
        if transcript is not None:
          transcript.add_exon(exon)

  json_genes = [gene.to_dict() for gene in genes_arr]

  metadata = get_metadata(sources, "gene_master", "rest")

  outer = {
    "data": json_genes,
    "meta": {
      "datasets": metadata
    },
    "lastPage": None
  }

  return jsonify(outer)
Пример #6
0
def ld_results():
  # GET request parameters
  filter_str = request.args.get("filter")
  #fields_str = request.args.get("fields")
  #sort_str = request.args.get("sort")
  #format_str = request.args.get("format")

  if filter_str is None:
    raise FlaskException("No filter string specified",400)

  # Figure out the LD API URL to send the request through
  #base_url = "http://localhost:8888/api_ld/ld?"
  base_url = "http://portaldev.sph.umich.edu/api_ld/ld?"
  trans = LDAPITranslator()
  param_str, param_dict = trans.to_refsnp_url(filter_str)
  final_url = base_url + param_str

  # Cache
  ld_cache = RedisIntervalCache(g.redis_client)

  # Cache key for this particular request.
  # Note that in Daniel's API, for now, "reference" is implicitly
  # attached to build, reference panel, and population all at the same time.
  # In the future, it will hopefully expand to accepting paramters for all 3, and
  # then we can include this in the cache key.
  if "variant1" not in param_dict:
    raise FlaskException("Must provide variant1 in filter",400)

  if "reference" not in param_dict:
    raise FlaskException("Must provide reference in filter",400)

  refvariant = param_dict["variant1"]["eq"][0]
  reference = param_dict["reference"]["eq"][0]
  refpos = int(refvariant.split("_")[0].split(":")[1])

  cache_key = "{reference}__{refvariant}".format(
    reference = reference,
    refvariant = refvariant
  )

  try:
    start = int(param_dict["position2"]["ge"][0])
    end = int(param_dict["position2"]["le"][0])
  except:
    raise FlaskException("position2 compared to non-integer",400)

  chromosome = param_dict["chromosome2"]["eq"][0]
  rlength = abs(end - start)

  # Is the region larger than we're willing to calculate?
  max_ld_size = current_app.config["LD_MAX_SIZE"]
  if math.fabs(end - start) > max_ld_size:
    raise FlaskException("Requested LD window is too large, exceeded maximum of {}".format(max_ld_size),413)

  outer = {
    "data": None,
    "lastPage": None
  }

  data = {
    "chromosome2": [],
    "position2": [],
    "rsquare": [],
    "variant2": []
  }

  outer["data"] = data

  # Do we need to compute, or is the cache sufficient?
  cache_data = None
  try:
    cache_data = ld_cache.retrieve(cache_key,start,end)
  except redis.ConnectionError:
    print("Warning: cache retrieval failed (redis was unable to connect)")
  except:
    print("Error: redis connected, but retrieving data failed")
    traceback.print_exc()

  if cache_data is None:
    # Need to compute. Either the range given is larger than we've previously computed,
    # or redis is down.
    print("Cache miss for {reference}__{refvariant} in {start}-{end} ({rlength:,.2f}kb), recalculating".format(
      reference=reference,
      refvariant=refvariant,
      start=start,
      end=end,
      rlength=rlength/1000
    ))

    # Fire off the request to the LD server.
    try:
      resp = requests.get(final_url)
    except Exception as e:
      raise FlaskException("Failed retrieving data from LD server, error was {}".format(e.message),500)

    # Did it come back OK?
    if not resp.ok:
      raise FlaskException("Failed retrieving data from LD server, error was {}".format(resp.reason),500)

    # Get JSON
    ld_json = resp.json()

    # Store in format needed for API response
    for obj in ld_json["pairs"]:
      data["chromosome2"].append(ld_json["chromosome"])
      data["position2"].append(obj["position2"])
      data["rsquare"].append(JSONFloat(obj["rsquare"]))
      data["variant2"].append(obj["name2"])

    # Store data to cache
    keep = ("name2","rsquare")
    for_cache = dict(zip(
      (x["position2"] for x in ld_json["pairs"]),
      (dict((x,d[x]) for x in keep) for d in ld_json["pairs"])
    ))

    if len(for_cache) > 0:
      # If we actually received LD data for this variant, store it in the cache.
      try:
        ld_cache.store(cache_key,start,end,for_cache)
      except redis.ConnectionError:
        print("Warning: cache storage failed (redis was unable to connect)")
      except:
        print("Error: storing data in cache failed, traceback was: ")
        traceback.print_exc()

  else:
    print("Cache *match* for {reference}__{refvariant} in {start}-{end} ({rlength:,.2f}kb), using cached data".format(
      reference=reference,
      refvariant=refvariant,
      start=start,
      end=end,
      rlength=rlength/1000
    ))

    # We can just use the cache's data directly.
    for position, ld_pair in iteritems(cache_data):
      data["chromosome2"].append(chromosome)
      data["position2"].append(position)
      data["rsquare"].append(JSONFloat(ld_pair["rsquare"]))
      data["variant2"].append(ld_pair["name2"])

  final_resp = jsonify(outer)

  return final_resp
Пример #7
0
def gwascat_results():
  db_table = "rest.gwascat_data"
  db_cols = "id variant rsid chrom pos ref alt trait trait_group risk_allele risk_frq log_pvalue or_beta genes pmid pubdate first_author study".split()

  filter_str = request.args.get("filter")
  if filter_str is None:
    raise FlaskException("No filter string specified",400)

  fp = FilterParser()
  filter_stmts = fp.statements(filter_str)
  build = request.args.get("build")
  if 'id' not in filter_stmts:
    if build is None:
      raise FlaskException("If no GWAS catalog ID is specified via filter parameter, the best recommended catalog will "
                           "automatically be selected, but you *must* specify the build (genome build) parameter at a minimum")

    allowed_builds = fetch_distinct_builds('gwascat_master')
    if build not in allowed_builds:
      raise FlaskException(f"Invalid build {build}, must be one of: {allowed_builds}")

    dataset_id = fetch_recommended_id(build, 'gwascat_master')
    if not dataset_id:
      raise FlaskException(f"No best recommended GWAS catalog is available for build {build}, try querying the metadata endpoint to see all available catalogs")

    filter_str += f' and id eq {dataset_id}'
  else:
    dataset_id = filter_stmts["id"].value
    if build is not None:
      for dbid in dataset_id:
        allowed_build = fetch_build_for_id(dbid, "gwascat_master", "genome_build", "rest")
        if allowed_build != build:
          raise FlaskException(f"Invalid build {build} given for GWAS catalog ID {dbid}")

  json = std_response(db_table,db_cols,return_json=False)

  if 'decompose' in request.args:
    if isinstance(json, list):
      # This is "array of objects" format.
      new_json = []
      for entry in json:
        if len(entry["alt"]) == 1:
          new_json.append(entry)
        else:
          for alt in entry["alt"].split(","):
            alt_entry = deepcopy(entry)
            alt_entry["alt"] = alt
            alt_entry["variant"] = "{}:{}_{}/{}".format(entry["chrom"], entry["pos"], entry["ref"], alt)
            new_json.append(alt_entry)
    elif isinstance(json, dict):
      # This is "dictionary of arrays" format.
      new_json = OrderedDict()
      for k in json:
        new_json[k] = []

      other_keys = json.keys() - ["variant", "alt"]
      alt_array = json["alt"]
      for i in range(len(alt_array)):
        if len(alt_array[i]) == 1:
          for k in json:
            new_json[k].append(json[k][i])
        else:
          for alt in alt_array[i].split(","):
            new_variant = "{}:{}_{}/{}".format(
              json["chrom"][i],
              json["pos"][i],
              json["ref"][i],
              alt
            )

            new_json["variant"].append(new_variant)
            new_json["alt"].append(alt)
            for k in other_keys:
              new_json[k].append(json[k][i])

    else:
      raise FlaskException("Server error, resulting json object was not dict or list", 500)

    json = new_json

  variant_format = request.args.get("variant_format")
  if variant_format == "colons":
    if isinstance(json, list):
      for entry in json:
        entry["variant"] = re.sub("[:_/]",":",entry["variant"])

    elif isinstance(json, dict):
      for i, v in enumerate(json["variant"]):
        json["variant"][i] = re.sub("[:_/]",":",v)

    else:
      raise FlaskException("Server error, resulting json object was not dict or list", 500)

  metadata = get_metadata(dataset_id, "gwascat_master", "rest", {"catalog_version": "version"})

  return jsonify({
    "data": json,
    "meta": {
      "datasets": metadata
    },
    "lastPage": None
  })
Пример #8
0
def recomb_results():
  # Database columns and table
  db_table = "rest.recomb_results"
  db_cols = ["id","chromosome","position","recomb_rate","pos_cm"]

  filter_str = request.args.get("filter")
  fp = FilterParser()

  filter_stmts = fp.statements(filter_str)
  build = request.args.get("build")
  if 'id' not in filter_stmts:
    if build is None:
      raise FlaskException("If no ID is specified via filter parameter, the best recommended recombination rate "
                           "dataset will automatically be selected, but you *must* specify the build (genome build) "
                           "parameter at a minimum")

    allowed_builds = fetch_distinct_builds('recomb', 'build')
    if build not in allowed_builds:
      raise FlaskException(f"Invalid build {build}, must be one of: {allowed_builds}")

    dataset_id = fetch_recommended_id(build, 'recomb')
    if not dataset_id:
      raise FlaskException(f"No best recommended recombination rate dataset is available for build {build}, "
                           f"try querying the metadata endpoint to see all available datasets")

    filter_str += f' and id eq {dataset_id}'
  else:
    dataset_id = filter_stmts["id"].value
    if build is not None:
      for dbid in dataset_id:
        allowed_build = fetch_build_for_id(dbid, "recomb", "build", "rest")
        if allowed_build != build:
          raise FlaskException(f"Invalid build {build} given for recombination rate dataset ID {dbid}")

  matches = fp.parse(filter_str)
  lrm = fp.left_middle_right(matches)

  sql_compiler = SQLCompiler()
  def fetch(terms,limit=None):
    sql, params = sql_compiler.to_sql_parsed(terms,db_table,db_cols,limit=limit)
    rows = g.db.execute(text(sql),params).fetchall()
    return rows

  def interp(at,left,right):
    if left is None or right is None:
        return None
    left_at = left["position"]
    right_at = right["position"]
    left_r = left["recomb_rate"]
    right_r = right["recomb_rate"]
    result = dict(left)
    result["recom_rate"] = left_r + (at-left_at)/(right_at-left_at) * (right_r-left_r)
    result["position"] = at
    return result

  left = fetch(lrm["left"], 1)
  right =  fetch(lrm["right"], 1)
  middle = fetch(lrm["middle"])

  if len(left) < 1:
    left = [{"id": "chrleft", "position": lrm["range"]["left"],
        "chromosome": lrm["range"]["chrom"], "recomb_rate": 0, "pos_cm": 0}]
  if len(right) < 1:
    right = [{"id": "chrright", "position": lrm["range"]["right"],
        "chromosome": lrm["range"]["chrom"], "recomb_rate": 0, "pos_cm": 0}]

  if len(middle) > 0:
    left_end = interp(lrm["range"]["left"],left[0],middle[0])
    right_end = interp(lrm["range"]["right"],middle[-1],right[0])
  else:
    left_end = interp(lrm["range"]["left"],left[0],right[0])
    right_end = interp(lrm["range"]["right"],left[0],right[0])

  data = reshape_data([left_end] + middle + [right_end],db_cols)

  metadata = get_metadata(dataset_id, "recomb", "rest", {"build": "genome_build"})

  return jsonify({
    "data": data,
    "meta": {
      "datasets": metadata
    },
    "lastPage": None
  })