Exemplo n.º 1
0
def association_stats(request, sources=None):
    ''' Get association statistics for a given marker ID. '''
    seqid = request.GET.get('chr').replace('chr', '')
    idx_type = request.GET.get('idx_type').upper()
    start = request.GET.get('start')
    end = request.GET.get('end')
    data = []

    def get_stats(resp_json):
        hits = resp_json['hits']['hits']
        for hit in hits:
            d = Document(hit)
            data.append({
                "CHROM": getattr(d, 'seqid'),
                "POS": getattr(d, 'position'),
                "PVALUE": getattr(d, 'p_value'),
                "DBSNP_ID": getattr(d, 'marker')
            })

    query = ElasticQuery(Query.query_string(seqid, fields=["seqid"]), sources=sources)
    if start is not None and end is not None:
        query = ElasticQuery(BoolQuery(must_arr=[Query.query_string(seqid, fields=["seqid"]),
                                                 RangeQuery("position", gte=start, lte=end)]), 
                             sources=sources)
    ScanAndScroll.scan_and_scroll(ElasticSettings.idx('IC_STATS', idx_type), call_fun=get_stats, query=query)

    json = {"variants": data}
    return JsonResponse(json)
Exemplo n.º 2
0
    def post(self, request, *args, **kwargs):
        ens_id = self.request.POST.get('ens_id')
        marker = self.request.POST.get('marker')
        markers = self.request.POST.getlist('markers[]')

        if ens_id:
            sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
        elif marker:
            sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
        elif markers:
            sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())

        query = ElasticQuery.filtered(Query.match_all(), sfilter)
        elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
        study_hits = elastic.get_json_response()['hits']

        ens_ids = []
        pmids = []
        for hit in study_hits['hits']:
            if 'pmid' in hit['_source']:
                pmids.append(hit['_source']['pmid'])
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    ens_ids.append(ens_id)
        docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
        pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])

        for hit in study_hits['hits']:
            genes = {}
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    try:
                        genes[ens_id] = getattr(docs[ens_id], 'symbol')
                    except KeyError:
                        genes = {ens_id: ens_id}
            hit['_source']['genes'] = genes
            if 'pmid' in hit['_source']:
                pmid = hit['_source']['pmid']
                try:
                    authors = getattr(pub_docs[pmid], 'authors')
                    journal = getattr(pub_docs[pmid], 'journal')
                    hit['_source']['pmid'] = \
                        {'pmid': pmid,
                         'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
                         'journal': journal}
                except KeyError:
                    hit['_source']['pmid'] = {'pmid': pmid}

        return JsonResponse(study_hits)
Exemplo n.º 3
0
    def query_string(cls, query_term, sources=None, highlight=None, query_filter=None, **string_opts):
        ''' Factory method for creating elastic Query String Query.

        @type  query_term: string
        @param query_term: The string to use in the query.
        @type  sources: array of result fields
        @keyword sources: The _source filtering to be used (default: None).
        @type  highlight: Highlight
        @keyword highlight: Define the highlighting of results (default: None).
        @type query_filter: Filter
        @keyword query_filter: Optional filter for query.
        @return: L{ElasticQuery}
        '''
        if query_filter is None:
            query = Query.query_string(query_term, **string_opts)
        else:
            query = FilteredQuery(Query.query_string(query_term, **string_opts), query_filter)
        return cls(query, sources, highlight)
 def test_bool_filtered_query2(self):
     ''' Test building and running a filtered boolean query. '''
     query_bool = BoolQuery()
     query_bool.should(RangeQuery("start", lte=20000)) \
               .should(Query.term("seqid", 2)) \
               .must(Query.term("seqid", 1))
     query_string = Query.query_string("rs768019142", fields=["id", "seqid"])
     query = ElasticQuery.filtered_bool(query_string, query_bool, sources=["id", "seqid", "start"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total == 1, "Elastic filtered query retrieved marker (rs768019142)")
 def test_or_filtered_query(self):
     ''' Test building and running a filtered query. '''
     highlight = Highlight(["id", "seqid"])
     query_bool = BoolQuery(must_arr=[RangeQuery("start", lte=1),
                                      RangeQuery("end", gte=100000)])
     or_filter = OrFilter(RangeQuery("start", gte=1, lte=100000))
     or_filter.extend(query_bool) \
              .extend(Query.query_string("rs*", fields=["id", "seqid"]).query_wrap())
     query = ElasticQuery.filtered(Query.term("seqid", 1), or_filter, highlight=highlight)
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     self.assertTrue(elastic.search().hits_total >= 1, "Elastic filtered query retrieved marker(s)")
 def test_function_score_query(self):
     ''' Test a function score query with a query (using the start position as the score). '''
     score_function = ScoreFunction.create_score_function('field_value_factor', field='start', modifier='reciprocal')
     query_string = Query.query_string("rs*", fields=["id", "seqid"])
     query = ElasticQuery(FunctionScoreQuery(query_string, [score_function], boost_mode='replace'))
     docs = Search(query, idx=ElasticSettings.idx('DEFAULT')).search().docs
     self.assertGreater(len(docs), 1, str(len(docs)))
     last_start = 0
     for doc in docs:
         start = getattr(doc, 'start')
         self.assertLess(last_start, start)
         last_start = start
Exemplo n.º 7
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request feature locations. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
            query_str = filters.get('feature', 'PTPN22')
            build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
            if query_str is None or query_str == '':
                return [ElasticObject(initial={'error': 'No feature name provided.'})]

            search_fields = ['id',
                             'symbol', 'dbxrefs.ensembl',
                             'region_name']
            sources = ['start', 'stop', 'seqid', 'chromosome',
                       'disease_loci']
            idxs = ElasticSettings.getattr('IDX')
            MARKER_IDX = ''

            if build == ElasticSettings.get_label('MARKER', label='build'):
                MARKER_IDX = 'MARKER'
            if MARKER_IDX == '':
                for idx in idxs:
                    if 'MARKER' in idx:
                        if build == ElasticSettings.get_label(idx, label='build'):
                            MARKER_IDX = idx

            (idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER')
            (idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION')
            (idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE')
            idx += ',' + idx_r + ',' + idx_g
            idx_type += ',' + idx_type_r + ',' + idx_type_g

            equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields))
            elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type)
            docs = elastic.search().docs
            locs = []
            for doc in docs:
                if isinstance(doc, RegionDocument):
                    doc = Region.pad_region_doc(doc)

                loc = doc.get_position(build=build).split(':')
                pos = loc[1].replace(',', '').split('-')
                locs.append(ElasticObject(
                    {'feature': query_str,
                     'chr': loc[0],
                     'start': int(pos[0]),
                     'end': int(pos[1]) if len(pos) > 1 else int(pos[0]),
                     'locusString': query_str+" ("+str(loc[1])+")"}))
            return locs
        except (TypeError, ValueError, IndexError, ConnectionError):
            raise Http404
Exemplo n.º 8
0
def _gene_lookup(search_term):
    ''' Look for any gene symbols (e.g. PTPN22) and get the corresponding
    Ensembl ID and append to query string '''
    if re.compile(r'[^\w\s]').findall(search_term):
        logger.debug('skip gene lookup as contains non-word pattern '+search_term)
        return search_term
    words = re.sub("[^\w]", " ",  search_term)
    equery = BoolQuery(b_filter=Filter(Query.query_string(words, fields=['symbol'])))
    search_query = ElasticQuery(equery, sources=['symbol'])
    (idx, idx_type) = ElasticSettings.idx('GENE', 'GENE').split('/')
    result = Search(search_query=search_query, size=10, idx=idx, idx_type=idx_type).search()
    if result.hits_total > 0:
        return ' '.join([doc.doc_id() for doc in result.docs]) + ' ' + search_term
    return search_term
Exemplo n.º 9
0
def _build_exon_query(chrom, segmin, segmax, genes):
    # get exonic structure for genes in this section
    geneExons = dict()
    query_bool = BoolQuery()
    query_bool.must([Query.term("seqid", chrom)])
    if len(genes) > 0:
        for g in genes:
            query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]),
                                               query_bool, sources=utils.snpFields)
            elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000)
            result = elastic.get_result()
            exons = result['data']
            exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons)
            geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start"))
    return geneExons
Exemplo n.º 10
0
def marker_page(request):
    ''' Renders a gene page. '''
    query_dict = request.GET
    marker = query_dict.get("m")
    if marker is None:
        messages.error(request, 'No gene name given.')
        raise Http404()

    fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
    sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
    aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
    query = ElasticQuery(Query.query_string(marker, fields=fields))
    elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
    res = elastic.search()
    if res.hits_total >= 1:
        types = getattr(res.aggs['types'], 'buckets')
        marker_doc = None
        ic_docs = []
        history_docs = []
        for doc_type in types:
            hits = doc_type['top_hits']['hits']['hits']
            for hit in hits:
                doc = Document(hit)
                if 'marker' == doc_type['key']:
                    marker_doc = doc
                elif 'immunochip' == doc_type['key']:
                    ic_docs.append(doc)
                elif 'rs_merge' == doc_type['key']:
                    history_docs.append(doc)

        criteria = {}
        if marker_doc is not None:
            if ElasticSettings.idx('CRITERIA') is not None:
                criteria = views.get_criteria([marker_doc], 'marker', 'id', 'MARKER')
            marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))

        context = {
            'marker': marker_doc,
            'old_dbsnp_docs': _get_old_dbsnps(marker),
            'ic': ic_docs,
            'history': history_docs,
            'criteria': criteria
        }
        return render(request, 'marker/marker.html', context,
                      content_type='text/html')
    elif res.hits_total == 0:
        messages.error(request, 'Marker '+marker+' not found.')
        raise Http404()
Exemplo n.º 11
0
def _get_old_dbsnps(marker):
    ''' Get markers from old versions of DBSNP. Assumes the index key is
    prefixed by 'MARKER_'. '''
    old_dbsnps_names = sorted([ElasticSettings.idx(k) for k in ElasticSettings.getattr('IDX').keys()
                               if 'MARKER_' in k], reverse=True)
    old_dbsnp_docs = []
    if len(old_dbsnps_names) > 0:
        search_query = ElasticQuery(Query.query_string(marker, fields=['id', 'rscurrent']))
        for idx_name in old_dbsnps_names:
            elastic2 = Search(search_query=search_query, idx=idx_name, idx_type='marker')
            docs = elastic2.search().docs
            if len(docs) > 0:
                old_doc = docs[0]
                old_doc.marker_build = _get_marker_build(idx_name)
                old_dbsnp_docs.append(old_doc)
    return old_dbsnp_docs
 def test_function_score_query2(self):
     ''' Test multiple function score query with a query. '''
     score_function1 = ScoreFunction.create_score_function('field_value_factor', field='start')
     score_function2 = ScoreFunction.create_score_function('field_value_factor', field='start')
     query_string = Query.query_string("rs*", fields=["id"])
     query = ElasticQuery(FunctionScoreQuery(query_string, [score_function1, score_function2],
                                             score_mode='sum', boost_mode='replace', min_score=1.,
                                             max_boost=100000000.),
                          sources=['start'])
     docs = Search(query, idx=ElasticSettings.idx('DEFAULT')).search().docs
     self.assertGreater(len(docs), 1, str(len(docs)))
     last_start = sys.maxsize
     for doc in docs:
         start = getattr(doc, 'start')
         self.assertGreater(last_start, start)
         last_start = start
Exemplo n.º 13
0
    def get_marker(cls, request, marker, context):
        if marker is None:
            messages.error(request, 'No marker name given.')
            raise Http404()

        fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
        sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
        aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
        query = ElasticQuery(Query.query_string(marker, fields=fields))
        elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
        res = elastic.search()
        title = ''
        if res.hits_total >= 1:
            types = getattr(res.aggs['types'], 'buckets')
            marker_doc = None
            ic_docs = []
            history_docs = []
            for doc_type in types:
                hits = doc_type['top_hits']['hits']['hits']
                for hit in hits:
                    doc = PydginDocument.factory(hit)
                    if doc.get_name() is not None:
                        title = doc.get_name()

                    if 'marker' == doc_type['key']:
                        marker_doc = doc
                    elif 'immunochip' == doc_type['key']:
                        ic_docs.append(doc)
                    elif 'rs_merge' == doc_type['key']:
                        history_docs.append(doc)

            if marker_doc is not None:
                marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))

            criteria_disease_tags = MarkerView.criteria_disease_tags(request, [marker])
            context['criteria'] = criteria_disease_tags
            context['features'] = [marker_doc]
            context['old_dbsnp_docs'] = _get_old_dbsnps(marker)
            context['ic'] = ic_docs
            context['history'] = history_docs
            context['title'] = title
            context['jbrowse_tracks'] = "PydginRegions%2Cdbsnp146%2CEnsemblGenes"
            return context
        elif res.hits_total == 0:
            messages.error(request, 'Marker '+marker+' not found.')
            raise Http404()
Exemplo n.º 14
0
def studies_details(request):
    """ Get studies for a given ensembl ID. """
    ens_id = request.POST.get("ens_id")
    sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
    query = ElasticQuery.filtered(Query.match_all(), sfilter)
    elastic = Search(query, idx=ElasticSettings.idx("REGION", "STUDY_HITS"), size=500)
    study_hits = elastic.get_json_response()["hits"]

    ens_ids = []
    pmids = []
    for hit in study_hits["hits"]:
        if "pmid" in hit["_source"]:
            pmids.append(hit["_source"]["pmid"])
        for ens_id in hit["_source"]["genes"]:
            ens_ids.append(ens_id)
    docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])
    pub_docs = _get_pub_docs_by_pmid(pmids, sources=["authors.name", "journal"])

    for hit in study_hits["hits"]:
        genes = {}
        for ens_id in hit["_source"]["genes"]:
            try:
                genes[ens_id] = getattr(docs[ens_id], "symbol")
            except KeyError:
                genes = {ens_id: ens_id}
        hit["_source"]["genes"] = genes
        if "pmid" in hit["_source"]:
            pmid = hit["_source"]["pmid"]
            try:
                authors = getattr(pub_docs[pmid], "authors")
                journal = getattr(pub_docs[pmid], "journal")
                hit["_source"]["pmid"] = {
                    "pmid": pmid,
                    "author": authors[0]["name"].rsplit(None, 1)[-1],
                    "journal": journal,
                }
            except KeyError:
                hit["_source"]["pmid"] = {"pmid": pmid}

    return JsonResponse(study_hits)
Exemplo n.º 15
0
    def _build_filters(self, filters=None):
        ''' Build filters using L{AndFilter}. '''
        if filters is None:
            filters = {}

        and_filter = None
        for filter_expr, value in filters.items():
            filter_bits = filter_expr.split('__')
            field_name = filter_bits.pop(0)
            filter_type = 'exact'

            if len(filter_bits):
                filter_type = filter_bits.pop()
            if filter_type != 'exact':
                field_name = field_name + "." + filter_type

            q = Query.query_string(value, fields=[field_name]).query_wrap()
            if and_filter is None:
                and_filter = AndFilter(q)
            else:
                and_filter.extend(q)
        return and_filter
Exemplo n.º 16
0
def genesets_details(request):
    """ Get pathway gene sets for a given ensembl ID. """
    ens_id = request.POST.get("ens_id")
    geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap())
    query = ElasticQuery.filtered(Query.match_all(), geneset_filter)
    elastic = Search(query, idx=ElasticSettings.idx("GENE", "PATHWAY"), size=500)
    genesets_hits = elastic.get_json_response()["hits"]
    ens_ids = []
    for hit in genesets_hits["hits"]:
        for ens_id in hit["_source"]["gene_sets"]:
            ens_ids.append(ens_id)
    docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])

    for hit in genesets_hits["hits"]:
        genesets = {}
        for ens_id in hit["_source"]["gene_sets"]:
            try:
                genesets[ens_id] = getattr(docs[ens_id], "symbol")
            except KeyError:
                genesets[ens_id] = ens_id
        hit["_source"]["gene_sets"] = genesets
    return JsonResponse(genesets_hits)
Exemplo n.º 17
0
def genesets_details(request):
    ''' Get pathway gene sets for a given ensembl ID. '''
    ens_id = request.POST.get('ens_id')
    geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap())
    query = ElasticQuery.filtered(Query.match_all(), geneset_filter)
    elastic = Search(query, idx=ElasticSettings.idx('GENE', 'PATHWAY'), size=500)
    genesets_hits = elastic.get_json_response()['hits']
    ens_ids = []
    for hit in genesets_hits['hits']:
        for ens_id in hit['_source']['gene_sets']:
            ens_ids.append(ens_id)
    docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])

    for hit in genesets_hits['hits']:
        genesets = {}
        for ens_id in hit['_source']['gene_sets']:
            try:
                genesets[ens_id] = getattr(docs[ens_id], 'symbol')
            except KeyError:
                genesets[ens_id] = ens_id
        hit['_source']['gene_sets'] = genesets
    return JsonResponse(genesets_hits)
Exemplo n.º 18
0
def chicpeaSearch(request, url):
    queryDict = request.GET
    user = request.user
    targetIdx = queryDict.get("targetIdx")
    blueprint = {}
    hic = []
    addList = []
    searchType = 'gene'
    searchTerm = queryDict.get("searchTerm").upper()
    searchTerm = searchTerm.replace(",", "")
    searchTerm = searchTerm.replace("..", "-")
    snpTrack = queryDict.get("snp_track")

    (idx_keys_auth, idx_type_keys_auth) = get_authenticated_idx_and_idx_types(
                                            user=user, idx_keys=None, idx_type_keys=None)

    if snpTrack:
        mo = re.match(r"(.*)-(.*)", snpTrack)
        (group, track) = mo.group(1, 2)  # @UnusedVariable
        if group != 'ud' and 'CP_STATS_'+group.upper()+'.'+snpTrack.upper() not in idx_type_keys_auth:
            snpTrack = None

    if targetIdx not in utils.tissues:
        for target in getattr(chicp_settings, 'CP_TARGET'):
            if 'CP_TARGET_'+target not in idx_keys_auth:
                if targetIdx == target:
                    retJSON = {'error': 'Sorry, you do not have permission to view this dataset.'}
                    return JsonResponse(retJSON)
                continue
            elasticJSON = Search(idx=ElasticSettings.idx('CP_TARGET_'+target)).get_mapping(mapping_type="gene_target")
            tissueList = list(elasticJSON[ElasticSettings.idx('CP_TARGET_'+target)]
                              ['mappings']['gene_target']['_meta']['tissue_type'].keys())
            utils.tissues['CP_TARGET_'+target] = tissueList

    if queryDict.get("region") or re.match(r"(.*):(\d+)-(\d+)", searchTerm):
        searchType = 'region'
        region = searchTerm
        if queryDict.get("region"):
            region = queryDict.get("region")
        else:
            searchTerm = ""
        mo = re.match(r"(.*):(\d+)-(\d+)", region)
        (chrom, segmin, segmax) = mo.group(1, 2, 3)
        chrom = chrom.replace('chr', "")
        chrom = chrom.replace('CHR', "")
    if re.search("^rs[0-9]+", searchTerm.lower()):
        searchTerm = searchTerm.lower()
        addList.append(_find_snp_position(snpTrack, searchTerm))
        if addList[0].get("error"):
            return JsonResponse({'error': addList[0]['error']})
        position = addList[0]['end']
        if searchType != 'region':
            searchType = 'snp'

    logger.warn("### "+searchType+" - "+searchTerm+' ###')

    if searchType == 'region':
        query_bool = BoolQuery()
        filter_bool = BoolQuery()
        if searchTerm and len(addList) == 0 and re.match(r"(.*):(\d+)-(\d+)",
                                                         queryDict.get("searchTerm").replace(",", "")) == None:
            query_bool.must([Query.query_string(searchTerm, fields=["name", "ensg"]),
                             Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])
        else:
            query_bool.must([Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])

        query_bool = _add_tissue_filter(query_bool, targetIdx)

        if len(addList) > 0:
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
                                                    RangeQuery("baitEnd", gte=position)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
                                                    RangeQuery("oeEnd", gte=position)])])
        else:
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", gte=segmin, lte=segmax),
                                                    RangeQuery("baitEnd", gte=segmin, lte=segmax)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", gte=segmin, lte=segmax),
                                                    RangeQuery("oeEnd", gte=segmin, lte=segmax)])])

        query = ElasticQuery.filtered_bool(query_bool, filter_bool,
                                           sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])
        (hic, v1, v2) = _build_hic_query(query, targetIdx, segmin, segmax)  # @UnusedVariable

        if "error" in hic:
            return JsonResponse(hic)
        if len(hic) == 0:
            retJSON = {'error': queryDict.get("searchTerm")+' does not overlap any bait/target regions in this dataset.'}
            return JsonResponse(retJSON)

    elif searchType == 'snp':
        if len(addList) > 0:
            chrom = addList[0]['chr']

            query_bool = BoolQuery()
            query_bool.must([Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])
            query_bool = _add_tissue_filter(query_bool, targetIdx)

            filter_bool = BoolQuery()
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
                                                    RangeQuery("baitEnd", gte=position)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
                                                    RangeQuery("oeEnd", gte=position)])])

            query = ElasticQuery.filtered_bool(query_bool, filter_bool,
                                               sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])
            hic, segmin, segmax = _build_hic_query(query, targetIdx)

            if "error" in hic:
                return JsonResponse(hic)
            if len(hic) == 0:
                retJSON = {'error': 'Marker '+searchTerm+' does not overlap any bait/target regions in this dataset.'}
                return JsonResponse(retJSON)
    else:
        # geneQuery = ElasticQuery.query_string(searchTerm, fields=["gene_name"])
        geneQuery = ElasticQuery.filtered(Query.match_all(), Filter(Query.match("gene_name", searchTerm).query_wrap()))
        resultObj = Search(idx=getattr(chicp_settings, 'CP_GENE_IDX') + '/genes/',
                           search_query=geneQuery, size=0, qsort=Sort('seqid:asc,start')).search()
        if resultObj.hits_total > 1:
            geneResults = []
            resultObj2 = Search(idx=getattr(chicp_settings, 'CP_GENE_IDX') + '/genes/', search_query=geneQuery,
                                size=(resultObj.hits_total+1), qsort=Sort('seqid:asc,start')).search()

            docs = resultObj2.docs
            gene_ids = [getattr(doc, 'attr')['gene_id'][1:-1] for doc in docs]

            query = ElasticQuery.filtered(Query.match_all(), TermsFilter.get_terms_filter('ensg', gene_ids))
            agg = Agg('ensg_agg', "terms", {"field": "ensg", "size": 0})
            res = Search(idx=ElasticSettings.idx('CP_TARGET_'+targetIdx), search_query=query, aggs=Aggs(agg),
                         size=0).search()

            ensg_count = res.aggs['ensg_agg'].get_buckets()
            gene_ids = [g['key'] for g in ensg_count]

            for d in resultObj2.docs:
                if getattr(d, "attr")["gene_id"].replace('\"', '') in gene_ids:
                    geneResults.append({
                        'gene_name': getattr(d, "attr")["gene_name"].replace('\"', ''),
                        'gene_id': getattr(d, "attr")["gene_id"].replace('\"', ''),
                        'location': "chr" + getattr(d, "seqid") + ":" +
                        locale.format_string("%d", getattr(d, "start"), grouping=True) + ".." +
                        locale.format_string("%d", getattr(d, "end"), grouping=True),
                    })

            if len(geneResults) == 0:
                retJSON = {'error': 'Gene name '+searchTerm+' not found in this dataset.'}
                return JsonResponse(retJSON)
            elif len(geneResults) > 1:
                retJSON = {
                    'error': 'Gene name <strong>'+searchTerm+'</strong> returns too many hits, please select your prefered result from the list below.',
                    'results': geneResults,
                    'cols': ['HGNC Symbol', 'Ensembl Gene ID', 'Location']
                }
                return JsonResponse(retJSON)

        query_bool = BoolQuery()
        query_bool.must([RangeQuery("dist", gte=-2e6, lte=2e6)])
        query_bool = _add_tissue_filter(query_bool, targetIdx)
        query = ElasticQuery.filtered_bool(Query.query_string(searchTerm, fields=["name", "ensg", "oeName"]),
                                           query_bool, sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])

        (hic, segmin, segmax) = _build_hic_query(query, targetIdx)

        if "error" in hic:
            return JsonResponse(hic)
        if len(hic) == 0:
            retJSON = {'error': 'Gene name '+searchTerm+' not found in this dataset.'}
            return JsonResponse(retJSON)
        chrom = hic[0]['baitChr']

    try:
        chrom
    except NameError:
        retJSON = {'error': 'No chromosome defined for search'}
        return JsonResponse(retJSON)

    # get genes based on this segment
    genes = _build_gene_query(chrom, segmin, segmax)
    (snps, snpMeta) = _build_snp_query(snpTrack, chrom, segmin, segmax)
    frags = _build_frags_query(getattr(chicp_settings, 'DEFAULT_FRAG'), chrom, segmin, segmax)

    addList = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], addList)

    retJSON = {"hic": hic,
               "frags": frags,
               "meta": {"ostart": int(segmin),
                        "oend": int(segmax),
                        "rstart": 1,
                        "rend": int(segmax) - int(segmin),
                        "rchr": str(chrom),
                        "tissues": utils.tissues['CP_TARGET_'+targetIdx]},
               "snps": snps,
               "snp_meta": snpMeta,
               "genes": genes,
               "region": str(chrom) + ":" + str(segmin) + "-" + str(segmax),
               "blueprint": blueprint,
               "extra": addList
               }

    response = JsonResponse(retJSON)
    return response
Exemplo n.º 19
0
 def _get_ens_gene(self, gene_list):
     genes = re.sub("__", " ", gene_list)
     query = ElasticQuery(Query.query_string(genes))
     result = Search(query, idx=ElasticSettings.idx('GENE',
                                                    'GENE')).search()
     return [doc.doc_id() for doc in result.docs]
Exemplo n.º 20
0
    def do_identifier_search(cls, identifiers, user=None):

        source_filter = [
                        'symbol', 'synonyms', "dbxrefs.*",                            # gene
                        'id', 'rscurrent', 'rshigh',                                  # marker
                        'study_id', 'study_name',                                     # study
                        'region_name', 'marker', "region_id"]                                      # regions

        highlight = Highlight(["symbol", "dbxrefs.*", "region", "region_name", "region_id",
                               "study_id", "study_name", "id", "rscurrent", "rshigh", "marker"])

        search_query = ElasticQuery(Query.query_string(" ".join(identifiers), fields=source_filter),
                                    highlight=highlight, sources=source_filter)

        search_idx_keys = ['REGION', 'GENE', 'STUDY', 'MARKER']
        search_idx_type_keys = ['REGION', 'GENE',  'STUDY', 'MARKER']

        idx_all = [ElasticSettings.idx_names(idx, idx_type=idx_type) for idx, idx_type in zip(search_idx_keys,
                                                                                              search_idx_type_keys)]
        idx_dict = dict(idx_all)

        search_idx = ','.join(idx_dict.keys())
        search_idx_types = ','.join(idx_dict.values())

        elastic = Search(search_query=search_query, idx=search_idx, idx_type=search_idx_types)

        gene_dict = {}
        region_dict = {}
        marker_dict = {}
        study_dict = {}

        docs = elastic.search().docs
        for doc in docs:
            existing_feature_list = []

            idx = getattr(doc, '_meta')['_index']
            idx_type = getattr(doc, '_meta')['_type']
            doc_id = doc.doc_id()

            highlight = doc.highlight()
            if highlight is not None:
                pattern = ".*?<em>(.*?)</em>.*"
                result = re.match(pattern, str(highlight))
                if result is not None:
                    highlight_hit = result.group(1)

                    if idx_type == "studies":
                        feature_id = getattr(doc, "study_id")

                        if highlight_hit not in study_dict:
                            study_dict[highlight_hit] = {}

                        if feature_id in study_dict[highlight_hit]:
                            existing_feature_list = study_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        study_dict[highlight_hit] = existing_feature_list

                    if idx_type == "gene":
                        feature_id = doc_id

                        if highlight_hit not in gene_dict:
                            gene_dict[highlight_hit] = {}

                        if feature_id in gene_dict[highlight_hit]:
                            existing_feature_list = gene_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        gene_dict[highlight_hit] = existing_feature_list

                    if idx_type == "marker":
                        feature_id = getattr(doc, "id")

                        if highlight_hit not in marker_dict:
                            marker_dict[highlight_hit] = {}

                        if feature_id in marker_dict[highlight_hit]:
                            existing_feature_list = marker_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        marker_dict[highlight_hit] = existing_feature_list

                    if idx_type == "region":
                        feature_id = getattr(doc, "region_id")

                        if highlight_hit not in region_dict:
                            region_dict[highlight_hit] = {}

                        if feature_id in region_dict[highlight_hit]:
                            existing_feature_list = region_dict[highlight_hit]

                        existing_feature_list.append(feature_id)
                        region_dict[highlight_hit] = existing_feature_list

        all_result_dict = {}
        all_result_dict['gene'] = gene_dict
        all_result_dict['marker'] = marker_dict
        all_result_dict['region'] = region_dict
        all_result_dict['study'] = study_dict

        original_list = [_id.lower() for _id in identifiers]
        result_list = list(study_dict.keys()) + list(gene_dict.keys()) + list(marker_dict.keys()) + \
            list(region_dict.keys())
        result_list = [_id.lower() for _id in result_list]

        diff_list = set(original_list) - set(result_list)
        all_result_dict['missing'] = list(diff_list)
        return all_result_dict
Exemplo n.º 21
0
def chicpeaSearch(request, url):
    queryDict = request.GET
    user = request.user
    targetIdx = queryDict.get("targetIdx")
    blueprint = {}
    hic = []
    addList = []
    searchType = 'gene'
    searchTerm = queryDict.get("searchTerm").upper()
    searchTerm = searchTerm.replace(",", "")
    searchTerm = searchTerm.replace("..", "-")
    searchTerm = searchTerm.replace(" ", "") # Chris suggestion to prevent issue with spaces in queries
    snpTrack = queryDict.get("snp_track")

    (idx_keys_auth, idx_type_keys_auth) = get_authenticated_idx_and_idx_types(
                                            user=user, idx_keys=None, idx_type_keys=None)

    if snpTrack:
        mo = re.match(r"(.*)-(.*)", snpTrack)
        (group, track) = mo.group(1, 2)  # @UnusedVariable
        if group != 'ud' and 'CP_STATS_'+group.upper()+'.'+snpTrack.upper() not in idx_type_keys_auth:
            snpTrack = None

    if targetIdx not in utils.tissues:
        for target in getattr(chicp_settings, 'CP_TARGET'):
            if 'CP_TARGET_'+target not in idx_keys_auth:
                if targetIdx == target:
                    retJSON = {'error': 'Sorry, you do not have permission to view this dataset.'}
                    return JsonResponse(retJSON)
                continue
            elasticJSON = Search(idx=ElasticSettings.idx('CP_TARGET_'+target)).get_mapping(mapping_type="gene_target")
            tissueList = list(elasticJSON[ElasticSettings.idx('CP_TARGET_'+target)]
                              ['mappings']['gene_target']['_meta']['tissue_type'].keys())
            utils.tissues['CP_TARGET_'+target] = tissueList

    if queryDict.get("region") or re.match(r"(.*):(\d+)-(\d+)", searchTerm):
        searchType = 'region'
        region = searchTerm
        if queryDict.get("region"):
            region = queryDict.get("region")
        else:
            searchTerm = ""
        mo = re.match(r"(.*):(\d+)-(\d+)", region)
        (chrom, segmin, segmax) = mo.group(1, 2, 3)
        chrom = chrom.replace('chr', "")
        chrom = chrom.replace('CHR', "")
    if re.search("^rs[0-9]+", searchTerm.lower()):
        searchTerm = searchTerm.lower()
        addList.append(_find_snp_position(snpTrack, searchTerm))
        if addList[0].get("error"):
            return JsonResponse({'error': addList[0]['error']})
        position = addList[0]['end']
        if searchType != 'region':
            searchType = 'snp'

    logger.warn("### "+searchType+" - "+searchTerm+' ###')

    if searchType == 'region':
        query_bool = BoolQuery()
        filter_bool = BoolQuery()
        if searchTerm and len(addList) == 0 and re.match(r"(.*):(\d+)-(\d+)",
                                                         queryDict.get("searchTerm").replace(",", "")) == None:
            query_bool.must([Query.query_string(searchTerm, fields=["name", "ensg"]),
                             Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])
        else:
            query_bool.must([Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])

        query_bool = _add_tissue_filter(query_bool, targetIdx)

        if len(addList) > 0:
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
                                                    RangeQuery("baitEnd", gte=position)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
                                                    RangeQuery("oeEnd", gte=position)])])
        else:
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", gte=segmin, lte=segmax),
                                                    RangeQuery("baitEnd", gte=segmin, lte=segmax)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", gte=segmin, lte=segmax),
                                                    RangeQuery("oeEnd", gte=segmin, lte=segmax)])])

        query = ElasticQuery.filtered_bool(query_bool, filter_bool,
                                           sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])
        (hic, v1, v2) = _build_hic_query(query, targetIdx, segmin, segmax)  # @UnusedVariable

        if "error" in hic:
            return JsonResponse(hic)
        if len(hic) == 0:
            retJSON = {'error': queryDict.get("searchTerm")+' does not overlap any bait/target regions in this dataset.'}
            return JsonResponse(retJSON)

    elif searchType == 'snp':
        if len(addList) > 0:
            chrom = addList[0]['chr']

            query_bool = BoolQuery()
            query_bool.must([Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])
            query_bool = _add_tissue_filter(query_bool, targetIdx)

            filter_bool = BoolQuery()
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
                                                    RangeQuery("baitEnd", gte=position)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
                                                    RangeQuery("oeEnd", gte=position)])])

            query = ElasticQuery.filtered_bool(query_bool, filter_bool,
                                               sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])
            hic, segmin, segmax = _build_hic_query(query, targetIdx)

            if "error" in hic:
                return JsonResponse(hic)
            if len(hic) == 0:
                retJSON = {'error': 'Marker '+searchTerm+' does not overlap any bait/target regions in this dataset.'}
                return JsonResponse(retJSON)
    else:
        # geneQuery = ElasticQuery.query_string(searchTerm, fields=["gene_name"])
        geneQuery = ElasticQuery.filtered(Query.match_all(), Filter(Query.match("gene_name", searchTerm).query_wrap()))
        resultObj = Search(idx=getattr(chicp_settings, 'CP_GENE_IDX') + '/genes/',
                           search_query=geneQuery, size=0, qsort=Sort('seqid:asc,start')).search()
        if resultObj.hits_total > 1:
            geneResults = []
            resultObj2 = Search(idx=getattr(chicp_settings, 'CP_GENE_IDX') + '/genes/', search_query=geneQuery,
                                size=(resultObj.hits_total+1), qsort=Sort('seqid:asc,start')).search()

            docs = resultObj2.docs
            gene_ids = [getattr(doc, 'attr')['gene_id'][1:-1] for doc in docs]

            query = ElasticQuery.filtered(Query.match_all(), TermsFilter.get_terms_filter('ensg', gene_ids))
            agg = Agg('ensg_agg', "terms", {"field": "ensg", "size": 0})
            res = Search(idx=ElasticSettings.idx('CP_TARGET_'+targetIdx), search_query=query, aggs=Aggs(agg),
                         size=0).search()

            ensg_count = res.aggs['ensg_agg'].get_buckets()
            gene_ids = [g['key'] for g in ensg_count]

            for d in resultObj2.docs:
                if getattr(d, "attr")["gene_id"].replace('\"', '') in gene_ids:
                    geneResults.append({
                        'gene_name': getattr(d, "attr")["gene_name"].replace('\"', ''),
                        'gene_id': getattr(d, "attr")["gene_id"].replace('\"', ''),
                        'location': "chr" + getattr(d, "seqid") + ":" +
                        locale.format_string("%d", getattr(d, "start"), grouping=True) + ".." +
                        locale.format_string("%d", getattr(d, "end"), grouping=True),
                    })

            if len(geneResults) == 0:
                retJSON = {'error': 'Gene name '+searchTerm+' not found in this dataset.'}
                return JsonResponse(retJSON)
            elif len(geneResults) > 1:
                retJSON = {
                    'error': 'Gene name <strong>'+searchTerm+'</strong> returns too many hits, please select your prefered result from the list below.',
                    'results': geneResults,
                    'cols': ['HGNC Symbol', 'Ensembl Gene ID', 'Location']
                }
                return JsonResponse(retJSON)

        query_bool = BoolQuery()
        query_bool.must([RangeQuery("dist", gte=-2e6, lte=2e6)])
        query_bool = _add_tissue_filter(query_bool, targetIdx)
        query = ElasticQuery.filtered_bool(Query.query_string(searchTerm, fields=["name", "ensg", "oeName"]),
                                           query_bool, sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])

        (hic, segmin, segmax) = _build_hic_query(query, targetIdx)

        if "error" in hic:
            return JsonResponse(hic)
        if len(hic) == 0:
            retJSON = {'error': 'Gene name '+searchTerm+' not found in this dataset.'}
            return JsonResponse(retJSON)
        chrom = hic[0]['baitChr']

    try:
        chrom
    except NameError:
        retJSON = {'error': 'No chromosome defined for search'}
        return JsonResponse(retJSON)

    # get genes based on this segment
    genes = _build_gene_query(chrom, segmin, segmax)
    (snps, snpMeta) = _build_snp_query(snpTrack, chrom, segmin, segmax)
    frags = _build_frags_query(getattr(chicp_settings, 'DEFAULT_FRAG'), chrom, segmin, segmax)

    addList = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], addList)

    retJSON = {"hic": hic,
               "frags": frags,
               "meta": {"ostart": int(segmin),
                        "oend": int(segmax),
                        "rstart": 1,
                        "rend": int(segmax) - int(segmin),
                        "rchr": str(chrom),
                        "tissues": utils.tissues['CP_TARGET_'+targetIdx]},
               "snps": snps,
               "snp_meta": snpMeta,
               "genes": genes,
               "region": str(chrom) + ":" + str(segmin) + "-" + str(segmax),
               "blueprint": blueprint,
               "extra": addList
               }

    response = JsonResponse(retJSON)
    return response
Exemplo n.º 22
0
def _search_engine(query_dict, user_filters, user):
    ''' Carry out a search and add results to the context object. '''
    user_query = query_dict.get("query")
    query = _gene_lookup(user_query)

    source_filter = [
        'symbol', 'synonyms', "dbxrefs.*", 'biotype', 'description',  # gene
        'id', 'rscurrent', 'rshigh',                                  # marker
        'journal', 'title', 'tags.disease',                           # publication
        'name', 'code',                                               # disease
        'study_id', 'study_name',                                     # study
        'region_name', 'marker']                                      # regions

    if re.compile(r'^[0-9 ]+$').findall(query):
        source_filter.append('pmid')      # publication - possible PMID(s)
    search_fields = []
    maxsize = 20
    if user_filters.getlist("maxsize"):
        maxsize = int(user_filters.get("maxsize"))

    # build search_fields from user input filter fields
    for it in user_filters.items():
        if len(it) == 2:
            if it[0] == 'query':
                continue
            parts = it[1].split(":")
            if len(parts) == 3:
                search_fields.append(parts[1]+"."+parts[2])
            elif len(parts) == 2:
                search_fields.append(parts[1])

    if len(search_fields) == 0:
        search_fields = list(source_filter)
        search_fields.extend(['abstract', 'authors.name',   # publication
                              'authors', 'pmids',                    # study
                              'markers', 'genes'])                   # study/region
    source_filter.extend(['date', 'pmid', 'build_id', 'ref', 'alt', 'chr_band',
                          'disease_locus', 'disease_loci', 'region_id'])

    idx_name = query_dict.get("idx")
    idx_dict = ElasticSettings.search_props(idx_name, user)
    query_filters = _get_query_filters(user_filters, user)

    highlight = Highlight(search_fields, pre_tags="<strong>", post_tags="</strong>", number_of_fragments=0)
    sub_agg = Agg('idx_top_hits', 'top_hits', {"size": maxsize, "_source": source_filter,
                                               "highlight": highlight.highlight['highlight']})
    aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg),
                 Agg("biotypes", "terms", {"field": "biotype", "size": 0}),
                 Agg("categories", "terms", {"field": "_type", "size": 0})])

    # create score functions
    score_fns = _build_score_functions(idx_dict)
    equery = BoolQuery(must_arr=Query.query_string(query, fields=search_fields),
                       should_arr=_auth_arr(user),
                       b_filter=query_filters,
                       minimum_should_match=1)

    search_query = ElasticQuery(FunctionScoreQuery(equery, score_fns, boost_mode='replace'))
    elastic = Search(search_query=search_query, aggs=aggs, size=0,
                     idx=idx_dict['idx'], idx_type=idx_dict['idx_type'])
    result = elastic.search()

    mappings = elastic.get_mapping()
    _update_mapping_filters(mappings, result.aggs)
    _update_biotypes(user_filters, result)

    return {'data': _top_hits(result), 'aggs': result.aggs,
            'query': user_query, 'idx_name': idx_name,
            'fields': search_fields, 'mappings': mappings,
            'hits_total': result.hits_total,
            'maxsize': maxsize, 'took': result.took}
Exemplo n.º 23
0
def chicpeaSearch(request, url):
    queryDict = request.GET
    targetIdx = queryDict.get("targetIdx")
    blueprint = {}
    hic = []
    addList = []
    searchType = 'gene'
    searchTerm = queryDict.get("searchTerm").upper()

    if targetIdx not in utils.tissues:
        for idx in getattr(chicp_settings, 'TARGET_IDXS'):
            elasticJSON = Search(idx=idx).get_mapping(mapping_type="gene_target")
            tissueList = list(elasticJSON[idx]['mappings']['gene_target']['_meta']['tissue_type'].keys())
            utils.tissues[idx] = tissueList

    if queryDict.get("region") or re.match(r"(.*):(\d+)-(\d+)", queryDict.get("searchTerm")):
        searchType = 'region'
        region = queryDict.get("searchTerm")
        if queryDict.get("region"):
            region = queryDict.get("region")
        else:
            searchTerm = ""
        mo = re.match(r"(.*):(\d+)-(\d+)", region)
        (chrom, segmin, segmax) = mo.group(1, 2, 3)
        chrom = chrom.replace('chr', "")
    if re.search("^rs[0-9]+", queryDict.get("searchTerm").lower()):
        searchTerm = queryDict.get("searchTerm").lower()
        addList.append(_find_snp_position(queryDict.get("snp_track"), searchTerm))
        if addList[0].get("error"):
            return JsonResponse({'error': addList[0]['error']})
        position = addList[0]['end']
        if searchType != 'region':
            searchType = 'snp'

    logger.warn("### "+searchType+" - "+searchTerm+' ###')

    if searchType == 'region':
        query_bool = BoolQuery()
        filter_bool = BoolQuery()
        if searchTerm and len(addList) == 0 and re.match(r"(.*):(\d+)-(\d+)", queryDict.get("searchTerm")) == None:
            query_bool.must([Query.query_string(searchTerm, fields=["name", "ensg"]),
                             Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])
        else:
            query_bool.must([Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])

        query_bool = _add_tissue_filter(query_bool, targetIdx)

        if len(addList) > 0:
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
                                                    RangeQuery("baitEnd", gte=position)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
                                                    RangeQuery("oeEnd", gte=position)])])
        else:
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", gte=segmin, lte=segmax),
                                                    RangeQuery("baitEnd", gte=segmin, lte=segmax)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", gte=segmin, lte=segmax),
                                                    RangeQuery("oeEnd", gte=segmin, lte=segmax)])])

        query = ElasticQuery.filtered_bool(query_bool, filter_bool, sources=utils.hicFields + utils.tissues[targetIdx])
        (hic, v1, v2) = _build_hic_query(query, targetIdx, segmin, segmax)
        # print(hic)

        if len(hic) == 0:
            retJSON = {'error': queryDict.get("searchTerm")+' does not overlap any bait/target regions in this dataset.'}
            return JsonResponse(retJSON)

    elif searchType == 'snp':
        if len(addList) > 0:
            chrom = addList[0]['chr']

            query_bool = BoolQuery()
            query_bool.must([Query.term("baitChr", chrom),
                             Query.term("oeChr", chrom),
                             RangeQuery("dist", gte=-2e6, lte=2e6)])
            query_bool = _add_tissue_filter(query_bool, targetIdx)

            filter_bool = BoolQuery()
            filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
                                                    RangeQuery("baitEnd", gte=position)]),
                                BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
                                                    RangeQuery("oeEnd", gte=position)])])

            query = ElasticQuery.filtered_bool(query_bool, filter_bool,
                                               sources=utils.hicFields + utils.tissues[targetIdx])
            hic, segmin, segmax = _build_hic_query(query, targetIdx)

            if len(hic) == 0:
                retJSON = {'error': 'Marker '+searchTerm+' does not overlap any bait/target regions in this dataset.'}
                return JsonResponse(retJSON)
    else:
        query_bool = BoolQuery()
        query_bool.must([RangeQuery("dist", gte=-2e6, lte=2e6)])
        query_bool = _add_tissue_filter(query_bool, targetIdx)
        query = ElasticQuery.filtered_bool(Query.query_string(searchTerm, fields=["name", "ensg", "oeName"]),
                                           query_bool, sources=utils.hicFields + utils.tissues[targetIdx])

        hic, segmin, segmax = _build_hic_query(query, targetIdx)

        if len(hic) == 0:
            retJSON = {'error': 'Gene name '+searchTerm+' not found in this dataset.'}
            return JsonResponse(retJSON)
        chrom = hic[0]['baitChr']

    try:
        chrom
    except NameError:
        retJSON = {'error': 'No chromosome defined for search'}
        return JsonResponse(retJSON)

    # get genes based on this segment
    genes = _build_gene_query(chrom, segmin, segmax)
    (snps, snpMeta) = _build_snp_query(queryDict.get("snp_track"), chrom, segmin, segmax)
    frags = _build_frags_query(getattr(chicp_settings, 'DEFAULT_FRAG'), chrom, segmin, segmax)

    addList = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], addList)

    retJSON = {"hic": hic,
               "frags": frags,
               "meta": {"ostart": int(segmin),
                        "oend": int(segmax),
                        "rstart": 1,
                        "rend": int(segmax) - int(segmin),
                        "rchr": str(chrom),
                        "tissues": utils.tissues[targetIdx]},
               "snps": snps,
               "snp_meta": snpMeta,
               "genes": genes,
               "region": str(chrom) + ":" + str(segmin) + "-" + str(segmax),
               "blueprint": blueprint,
               "extra": addList
               }

    response = JsonResponse(retJSON)
    return response