Пример #1
0
    def test_bulk(self):
        ''' Test the Bulk.load(). '''
        self.set_up()
        idx = IDX['MARKER']['indexName']
        elastic = Search(ElasticQuery(Query.match_all()), idx=idx)
        hits_total1 = elastic.get_count()['count']

        json_data = '{"index": {"_index": "%s", "_type": "%s"}}\n' % \
                    (idx, 'marker')
        json_data += json.dumps({"alt": "G", "start": 946, "seqid": "1", "filter": ".",
                                 "ref": "A", "id": "rsXXXXX", "qual": ".", "info": "RS=XXXXX"})
        resp = Bulk.load(idx, '', json_data)
        self.assertNotEquals(resp.status_code, 200)

        # note: needs a trailing line return to work
        Bulk.load(idx, '', json_data + '\n')
        Search.index_refresh(idx)
        hits_total2 = elastic.get_count()['count']
        self.assertEquals(hits_total2, hits_total1+1, "contains documents")

        # produce errors updating doc id that doesn't exist
        json_data += '{"delete": {"_index": "%s", "_type": "%s", "_id": "%s"}}\n' % \
                     (idx, 'marker', 'XYZ')
        json_data += '{"update": {"_index": "%s", "_type": "%s", "_id": "%s"}}\n' % \
                     (idx, 'marker', 'XYZ')
        json_data += '{"doc": {"start": 100, "end": 200}}\n'
        resp = Bulk.load(idx, '', json_data)
        self.assertTrue('errors' in resp.json() and resp.json()['errors'])
Пример #2
0
def setUpModule():
    ''' Change ini config (MY_INI_FILE) to use the test suffix when
    creating pipeline indices. '''
    ini_file = os.path.join(os.path.dirname(__file__), 'test_download.ini')
    if os.path.isfile(MY_INI_FILE):
        return

    with open(MY_INI_FILE, 'w') as new_file:
        with open(ini_file) as old_file:
            for line in old_file:
                new_file.write(line.replace('auto_tests', IDX_SUFFIX))

    '''load ensembl GTF  and GENE_HISTORY'''
    INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
    idx = INI_CONFIG['ENSEMBL_GENE_GTF']['index']

    call_command('pipeline', '--steps', 'load', sections='GENE_HISTORY',
                 dir=TEST_DATA_DIR, ini=MY_INI_FILE)
    call_command('pipeline', '--steps', 'stage', 'load', sections='ENSEMBL_GENE_GTF',
                 dir=TEST_DATA_DIR, ini=MY_INI_FILE)
    Search.index_refresh(idx)
    call_command('pipeline', '--steps', 'load', sections='GENE2ENSEMBL',
                 dir=TEST_DATA_DIR, ini=MY_INI_FILE)

    Search.index_refresh(idx)
    def test_region_idx_loader(self):
        ''' Test loader has created and populated indices.  '''

        key = 'PRIVATE_REGIONS_GFF'

        if key in IDX.keys():
            idx = IDX[key]['indexName']
            Search.index_refresh(idx)
            self.assertTrue(Search.index_exists(idx=idx), 'Index exists: '+idx)
            ndocs = Search(idx=idx).get_count()['count']
            self.assertTrue(ndocs > 0, "Elastic count documents in " + idx + ": " + str(ndocs))
Пример #4
0
 def test_pub_ini_file2(self):
     ''' Test publication pipeline with a list of PMIDs. '''
     out = StringIO()
     call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load',
                  sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out)
     INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
     idx = INI_CONFIG['DISEASE']['index']
     Search.index_refresh(idx)
     query = ElasticQuery.query_string("test", fields=["tags.disease"])
     elastic = Search(query, idx=idx)
     docs = elastic.search().docs
     self.assertGreater(len(docs), 1)
Пример #5
0
 def test_update_doc(self):
     ''' Update with a partial document. '''
     idx = IDX['MARKER']['indexName']
     docs = Search(ElasticQuery(Query.term("id", "rs2476601"), sources=['id']), idx=idx).search().docs
     self.assertEquals(len(docs), 1, "rs2476601 document")
     update_field = {"doc": {"start": 100, "end": 200}}
     Update.update_doc(docs[0], update_field)
     Search.index_refresh(IDX['MARKER']['indexName'])
     docs = Search(ElasticQuery(Query.term("id", "rs2476601")), idx=idx).search().docs
     self.assertEquals(len(docs), 1, "rs2476601 document")
     self.assertEquals(getattr(docs[0], 'start'), 100, "rs2476601 start")
     self.assertEquals(getattr(docs[0], 'end'), 200, "rs2476601 end")
Пример #6
0
    def test_region_idx_loader(self):
        ''' Test loader has created and populated indices.  '''

        key = 'PRIVATE_REGIONS_GFF'

        if key in IDX.keys():
            idx = IDX[key]['indexName']
            Search.index_refresh(idx)
            self.assertTrue(Search.index_exists(idx=idx),
                            'Index exists: ' + idx)
            ndocs = Search(idx=idx).get_count()['count']
            self.assertTrue(
                ndocs > 0,
                "Elastic count documents in " + idx + ": " + str(ndocs))
    def test_gene_history_loader(self):
        """ Test the gene history loading. """
        call_command("pipeline", "--steps", "load", sections="GENE_HISTORY", dir=TEST_DATA_DIR, ini=MY_INI_FILE)

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["GENE_HISTORY"]["index"]
        idx_type = INI_CONFIG["GENE_HISTORY"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)

        self.assertTrue(elastic.get_count()["count"] > 1, "Count documents in the index")
        map1_props = Gene.gene_history_mapping(idx, idx_type, test_mode=True).mapping_properties
        map2_props = elastic.get_mapping()
        if idx not in map2_props:
            logger.error("MAPPING ERROR: " + json.dumps(map2_props))
        self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)
    def test_marker_pipeline(self):
        """ Test marker pipeline. """
        call_command("pipeline", "--steps", "load", sections="DBSNP", dir=TEST_DATA_DIR, ini=MY_INI_FILE)

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["DBSNP"]["index"]
        idx_type = INI_CONFIG["DBSNP"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)
        self.assertGreater(elastic.get_count()["count"], 0)

        call_command("pipeline", "--steps", "load", sections="RSMERGEARCH", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        idx = INI_CONFIG["RSMERGEARCH"]["index"]
        idx_type = INI_CONFIG["RSMERGEARCH"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)
        self.assertGreater(elastic.get_count()["count"], 0)
Пример #9
0
def setUpModule():
    ''' Run the index loading script to create test indices and
    create test repository '''
    for idx_kwargs in IDX.values():
        call_command('index_search', **idx_kwargs)

    # wait for the elastic load to finish
    for key in IDX:
        Search.index_refresh(IDX[key]['indexName'])

    for idx_kwargs in IDX_UPDATE.values():
        call_command('index_search', **idx_kwargs)

    # create test repository
    call_command('repository',
                 SnapshotTest.TEST_REPO,
                 dir=SnapshotTest.TEST_REPO_DIR)
Пример #10
0
    def test_delete_docs_by_query(self):
        ''' Test deleting docs using a query. '''
        self.set_up()
        idx = IDX['MARKER']['indexName']
        elastic = Search(ElasticQuery(Query.match_all()), idx=idx)
        hits_total1 = elastic.get_count()['count']
        self.assertGreater(hits_total1, 0, "contains documents")

        # delete single doc
        Delete.docs_by_query(idx, query=Query.term("id", "rs2476601"))
        Search.index_refresh(idx)
        hits_total2 = elastic.get_count()['count']
        self.assertEquals(hits_total2, hits_total1-1, "contains documents")

        # delete remaining docs
        Delete.docs_by_query(idx, 'marker')
        Search.index_refresh(idx)
        self.assertEquals(elastic.get_count()['count'], 0, "contains no documents")
Пример #11
0
 def setupIdx(cls, idx_name_arr):
     ''' Setup indices in the given array of key names (e.g. ['GENE', 'DIISEASE', ...]). '''
     idx_settings = {
         "settings": {
             "analysis": {
                 "analyzer": {
                     "full_name": {"filter": ["standard", "lowercase"], "tokenizer": "keyword"}}
             },
             "number_of_shards": 1
         }
     }
     IDX = PydginTestSettings.IDX
     for name in idx_name_arr:
         requests.put(ElasticSettings.url() + '/' + IDX[name]['indexName'], data=json.dumps(idx_settings))
         call_command('index_search', **IDX[name])
     for name in idx_name_arr:
         # wait for the elastic load to finish
         Search.index_refresh(IDX[name]['indexName'])
Пример #12
0
    def test_doc_auth(self):
        ''' Test private documents are not returned in the search. '''
        idx = PydginTestSettings.IDX['MARKER']['indexName']
        docs = Search(ElasticQuery(Query.match_all(), sources=['id']), idx=idx, size=1).search().docs
        self.assertEquals(len(docs), 1, "MARKER document")
        marker_id = getattr(docs[0], 'id')

        url = reverse('search_page')
        resp = self.client.post(url+'?idx=ALL&query='+marker_id)
        nhits1 = resp.context['hits_total']
        self.assertGreater(nhits1, 0, 'search hits > 0')
        # update document to be in DIL
        update_field = {"doc": {"group_name": "DIL"}}
        Update.update_doc(docs[0], update_field)
        Search.index_refresh(PydginTestSettings.IDX['MARKER']['indexName'])

        resp = self.client.post(url+'?idx=ALL&query='+marker_id)
        nhits2 = resp.context['hits_total']
        self.assertEqual(nhits1-1, nhits2, 'private document hidden')
Пример #13
0
    def test_doc_auth(self):
        idx = PydginTestSettings.IDX["STUDY_HITS"]["indexName"]
        docs = Search(ElasticQuery(Query.match_all(), sources=["chr_band", "marker"]), idx=idx, size=1).search().docs
        self.assertEquals(len(docs), 1, "STUDY_HITS document")
        marker_id = getattr(docs[0], "marker")

        url = reverse("search_page")
        resp = self.client.post(url + "?idx=ALL&query=" + marker_id)
        nhits1 = resp.context["hits_total"]
        self.assertGreater(nhits1, 0, "search hits > 0")
        # update document to be in DIL
        update_field = {"doc": {"group_name": "DIL"}}
        Update.update_doc(docs[0], update_field)
        Search.index_refresh(PydginTestSettings.IDX["STUDY_HITS"]["indexName"])

        url = reverse("search_page")
        resp = self.client.post(url + "?idx=ALL&query=" + marker_id)
        nhits2 = resp.context["hits_total"]
        self.assertEqual(nhits1 - 1, nhits2, "private document hidden")
    def test_mapping_parent_child(self):
        ''' Test creating mapping with parent child relationship. '''
        gene_mapping = MappingProperties("gene")
        gene_mapping.add_property("symbol", "string", analyzer="full_name")
        inta_mapping = MappingProperties("publication", "gene")
        load = Loader()
        idx = "test__mapping__"+SEARCH_SUFFIX
        options = {"indexName": idx, "shards": 1}
        requests.delete(ElasticSettings.url() + '/' + idx)

        # add child mappings first
        status = load.mapping(inta_mapping, "publication", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping inteactions")
        status = load.mapping(gene_mapping, "gene", analyzer=Loader.KEYWORD_ANALYZER, **options)
        self.assertTrue(status, "mapping genes")

        ''' load docs and test has parent query'''
        json_data = '{"index": {"_index": "%s", "_type": "gene", "_id" : "1"}}\n' % idx
        json_data += json.dumps({"symbol": "PAX1"}) + '\n'
        json_data += '{"index": {"_index": "%s", "_type": "publication", "_id" : "2", "parent": "1"}}\n' % idx
        json_data += json.dumps({"pubmed": 1234}) + '\n'
        Bulk.load(idx, '', json_data)
        Search.index_refresh(idx)
        query = ElasticQuery.has_parent('gene', Query.match('symbol', 'PAX1'))
        elastic = Search(query, idx=idx, idx_type='publication', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'pubmed'), 1234)
        self.assertEquals(docs[0].parent(), '1')
        self.assertRaises(QueryError, ElasticQuery.has_parent, 'gene', 'xxxxx')

        ''' test has child query '''
        query = ElasticQuery.has_child('publication', Query.match('pubmed', 1234))
        elastic = Search(query, idx=idx, idx_type='gene', size=500)
        docs = elastic.search().docs
        self.assertEquals(len(docs), 1)
        self.assertEquals(getattr(docs[0], 'symbol'), 'PAX1')
        self.assertEquals(docs[0].parent(), None)
        requests.delete(ElasticSettings.url() + '/' + idx)
Пример #15
0
    def test_create_restore_delete_snapshot(self):
        self.wait_for_running_snapshot()
        snapshot = 'test_' + ElasticSettings.getattr('TEST')
        repo = SnapshotTest.TEST_REPO

        # create a snapshot
        call_command('snapshot',
                     snapshot,
                     indices=IDX['MARKER']['indexName'],
                     repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot)
        self.assertTrue(Snapshot.exists(repo, snapshot),
                        "Created snapshot " + snapshot)
        # snapshot already exist so return false
        self.assertFalse(
            Snapshot.create_snapshot(repo, snapshot,
                                     IDX['MARKER']['indexName']))

        # delete index
        requests.delete(ElasticSettings.url() + '/' +
                        IDX['MARKER']['indexName'])
        self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']),
                         "Removed index")
        # restore from snapshot
        call_command('restore_snapshot',
                     snapshot,
                     repo=repo,
                     indices=IDX['MARKER']['indexName'])
        Search.index_refresh(IDX['MARKER']['indexName'])
        self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']),
                        "Restored index exists")

        # remove snapshot
        call_command('snapshot', snapshot, delete=True, repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10)
        self.assertFalse(Snapshot.exists(repo, snapshot),
                         "Deleted snapshot " + snapshot)
def setUpModule():
    ''' Load test indices (marker) '''
    call_command('index_search', **IDX['MARKER'])
    call_command('index_search', **IDX['GFF_GENERIC'])
    call_command('index_search', **IDX['JSON_NESTED'])

    # wait for the elastic load to finish
    Search.index_refresh(IDX['MARKER']['indexName'])
    Search.index_refresh(IDX['GFF_GENERIC']['indexName'])
    Search.index_refresh(IDX['JSON_NESTED']['indexName'])
Пример #17
0
    def test_elastic_group_name(self):
        '''
        Testing the workflow defined in: https://killin.cimr.cam.ac.uk/nextgensite/2015/08/05/region-authorization/
        Testing various elastic queries

        idx doc:
         "_source":{"attr": {"region_id": "803", "group_name": "[\"DIL\"]", "Name": "4q27"},
         "seqid": "chr4", "source": "immunobase", "type": "region",
         "score": ".", "strand": ".", "phase": ".", "start": 122061159, "end": 122684373}
        idx_query:
        Private(in given group) OR Public
        -d '{"query":{"filtered":{"filter":{"bool": {
                                            "should": [
                                                        {"terms": {"group_name":["dil"]}},
                                                        { "missing": { "field": "group_name"   }}
                                                      ]
                                                    }}}}}'
        Private(in given group):
        -d '{"query":{"filtered":{"filter":{"terms":{"group_name":["dil"]}}}}}'
        Public:
        -d {'query': {'filtered': {'filter': {'missing': {'field': 'group_name'}},
-                         'query': {'term': {'match_all': '{}'}}}}}
        '''
        # get the groups for the given user
        response = self.client.post('/accounts/login/', {
            'username': '******',
            'password': '******'
        })
        self.assertTrue(response.status_code, "200")

        logged_in_user = User.objects.get(
            id=self.client.session['_auth_user_id'])
        if logged_in_user and logged_in_user.is_authenticated():
            user_groups = get_user_groups(logged_in_user)
            self.assertTrue('READ' in user_groups,
                            "user present in READ group")
            # make sure the user is not yet in DIL group
            self.assertFalse('DIL' in user_groups,
                             "user not present in DIL group")

        group_names = get_user_groups(logged_in_user)
        if 'READ' in group_names: group_names.remove('READ')  # @IgnorePep8
        group_names = [x.lower() for x in group_names]
        self.assertTrue(len(group_names) == 0, "No group present")

        # Match all query, as there is no group we do a match all
        query = ElasticQuery(Query.match_all())
        expected_query_string = {"query": {"match_all": {}}}
        self.assertJSONEqual(json.dumps(query.query),
                             json.dumps(expected_query_string),
                             "Query string matched")

        Search.index_refresh(self.index_name)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(
            len(docs) == 12,
            "Elastic string query retrieved all public regions")

        # Filtered query for group names, add the user to DIL group and get the query string
        self.dil_group = Group.objects.create(name='DIL')
        logged_in_user.groups.add(self.dil_group)
        group_names = get_user_groups(logged_in_user)
        if 'READ' in group_names: group_names.remove('READ')  # @IgnorePep8
        group_names = [x.lower() for x in group_names]
        self.assertTrue(len(group_names) > 0, "More than 1 group present")
        self.assertTrue("dil" in group_names, "DIL group present")

        # retrieves all docs with missing field group_name - 11 docs
        terms_filter = TermsFilter.get_missing_terms_filter(
            "field", "attr.group_name")
        query = ElasticQuery.filtered(Query.match_all(), terms_filter)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(
            len(docs) == 11,
            "Elastic string query retrieved all public regions")

        # build filtered boolean query to bring all public docs + private docs 11+1 = 12 docs
        query_bool = BoolQuery()
        query_bool.should(Query.missing_terms("field", "group_name")) \
                  .should(Query.terms("group_name", group_names).query_wrap())

        query = ElasticQuery.filtered_bool(Query.match_all(), query_bool)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(
            len(docs) == 12,
            "Elastic string query retrieved both public + private regions")

        terms_filter = TermsFilter.get_terms_filter("attr.group_name",
                                                    group_names)
        query = ElasticQuery.filtered(Query.match_all(), terms_filter)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(
            len(docs) == 1,
            "Elastic string query retrieved one private regions")
        self.assertEqual(docs[0].attr['Name'], "4q27", "type matched region")
        self.assertEqual(docs[0].attr['region_id'], "803",
                         "type matched region")
        self.assertEqual(docs[0].attr['group_name'], "[\"DIL\"]",
                         "type matched region")
Пример #18
0
 def set_up(self):
     ''' Load test indices (marker) '''
     call_command('index_search', **IDX['MARKER'])
     # wait for the elastic load to finish
     Search.index_refresh(IDX['MARKER']['indexName'])
Пример #19
0
def setUpModule():
    ''' Load test indices (marker) '''
    call_command('index_search', **IDX['MARKER'])
    Search.index_refresh(IDX['MARKER']['indexName'])
    def test_gene_pipeline(self):
        """ Test gene pipeline. """

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["ENSEMBL_GENE_GTF"]["index"]
        idx_type = INI_CONFIG["ENSEMBL_GENE_GTF"]["index_type"]

        """ 1. Test ensembl GTF loading. """
        call_command(
            "pipeline", "--steps", "stage", "load", sections="ENSEMBL_GENE_GTF", dir=TEST_DATA_DIR, ini=MY_INI_FILE
        )
        Search.index_refresh(idx)

        elastic = Search(idx=idx, idx_type=idx_type)
        self.assertGreaterEqual(elastic.get_count()["count"], 1, "Count documents in the index")
        map1_props = Gene.gene_mapping(idx, idx_type, test_mode=True).mapping_properties
        map2_props = elastic.get_mapping()
        if idx not in map2_props:
            logger.error("MAPPING ERROR: " + json.dumps(map2_props))
        self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)

        """ 2. Test adding entrez ID to documents """
        call_command("pipeline", "--steps", "load", sections="GENE2ENSEMBL", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertEqual(len(docs), 1)
        self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
        self.assertEqual(getattr(docs[0], "dbxrefs")["entrez"], "26191")

        """ 3. Add uniprot and fill in missing entrez fields. """
        call_command(
            "pipeline", "--steps", "download", "load", sections="ENSMART_GENE", dir=TEST_DATA_DIR, ini=MY_INI_FILE
        )
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("DNMT3L", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
        self.assertTrue("swissprot" in getattr(docs[0], "dbxrefs"))

        """ 4. Add gene synonyms and dbxrefs. """
        call_command("pipeline", "--steps", "load", sections="GENE_INFO", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertTrue("PTPN8" in getattr(docs[0], "synonyms"))

        """ 5. Add PMIDs to gene docs. """
        call_command("pipeline", "--steps", "load", sections="GENE_PUBS", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertGreater(len(getattr(docs[0], "pmids")), 0)

        """ 6. Add ortholog data. """
        call_command("pipeline", "--steps", "load", sections="ENSMART_HOMOLOG", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        dbxrefs = getattr(docs[0], "dbxrefs")
        self.assertTrue("orthologs" in dbxrefs, dbxrefs)
        self.assertTrue("mmusculus" in dbxrefs["orthologs"], dbxrefs)
        self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])

        query = ElasticQuery.filtered(
            Query.match_all(),
            TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl", ["ENSMUSG00000027843"]),
        )
        docs = Search(query, idx=idx, size=1).search().docs
        self.assertEqual(len(docs), 1)

        """ 7. Add mouse ortholog link to MGI """
        call_command("pipeline", "--steps", "load", sections="ENSEMBL2MGI", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        docs = Search(query, idx=idx, size=1).search().docs
        dbxrefs = getattr(docs[0], "dbxrefs")
        self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])
        self.assertEqual("107170", dbxrefs["orthologs"]["mmusculus"]["MGI"])
        "tier": tier,
        "species": species,
        "tags": {"weight": weight},
        "locus_id": locus_id,
        "seqid": seqid,
        "hits": doc_ids
    }
    #    "suggest": {"input": [disease+" "+regionName, regionName], "weight": weight}
    resp = requests.put(ElasticSettings.url()+'/' + idx+'/disease_locus/'+locus_id, data=json.dumps(data))
    if resp.status_code != 201:
        print(str(resp.content))
        print("Problem loading "+getattr(doc, "disease")+" "+regionName)
    else:
        print("Loaded "+locus_id+" - "+regionName)

Search.index_refresh(idx)
diseases_by_seqid = Agg('diseases_by_seqid', 'terms', {"size": 0, "field": "disease"})
disease_hits = Agg('disease_hits', 'reverse_nested', {}, sub_agg=diseases_by_seqid)
seq_hits = Agg('seq_hits', 'terms', {'field': 'build_info.seqid', 'size': 0}, sub_agg=disease_hits)
build_info = Agg('build_info', 'nested', {"path": 'build_info'}, sub_agg=[seq_hits])

qnested = ElasticQuery(Query.nested('build_info', Query.term("build_info.build", build)))
elastic = Search(qnested, idx=idx, aggs=Aggs(build_info), search_type='count')
resultObj = elastic.search()
resultAggs = resultObj.aggs
seq_hits = getattr(resultAggs['build_info'], 'seq_hits')['buckets']

for chr_bucket in seq_hits:
    seqid = chr_bucket['key'].upper()

    for disease_bucket in chr_bucket['disease_hits']['diseases_by_seqid']['buckets']:
    def test_elastic_group_name(self):
        '''
        Testing the workflow defined in: https://killin.cimr.cam.ac.uk/nextgensite/2015/08/05/region-authorization/
        Testing various elastic queries

        idx doc:
         "_source":{"attr": {"region_id": "803", "group_name": "[\"DIL\"]", "Name": "4q27"},
         "seqid": "chr4", "source": "immunobase", "type": "region",
         "score": ".", "strand": ".", "phase": ".", "start": 122061159, "end": 122684373}
        idx_query:
        Private(in given group) OR Public
        -d '{"query":{"filtered":{"filter":{"bool": {
                                            "should": [
                                                        {"terms": {"group_name":["dil"]}},
                                                        { "missing": { "field": "group_name"   }}
                                                      ]
                                                    }}}}}'
        Private(in given group):
        -d '{"query":{"filtered":{"filter":{"terms":{"group_name":["dil"]}}}}}'
        Public:
        -d {'query': {'filtered': {'filter': {'missing': {'field': 'group_name'}},
-                         'query': {'term': {'match_all': '{}'}}}}}
        '''
        # get the groups for the given user
        response = self.client.post('/accounts/login/', {'username': '******', 'password': '******'})
        self.assertTrue(response.status_code, "200")

        logged_in_user = User.objects.get(id=self.client.session['_auth_user_id'])
        if logged_in_user and logged_in_user.is_authenticated():
            user_groups = get_user_groups(logged_in_user)
            self.assertTrue('READ' in user_groups, "user present in READ group")
            # make sure the user is not yet in DIL group
            self.assertFalse('DIL' in user_groups, "user not present in DIL group")

        group_names = get_user_groups(logged_in_user)
        if 'READ' in group_names : group_names.remove('READ')  # @IgnorePep8
        group_names = [x.lower() for x in group_names]
        self.assertTrue(len(group_names) == 0, "No group present")

        # Match all query, as there is no group we do a match all
        query = ElasticQuery(Query.match_all())
        expected_query_string = {"query": {"match_all": {}}}
        self.assertJSONEqual(json.dumps(query.query), json.dumps(expected_query_string), "Query string matched")

        Search.index_refresh(self.index_name)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(len(docs) == 12, "Elastic string query retrieved all public regions")

        # Filtered query for group names, add the user to DIL group and get the query string
        self.dil_group = Group.objects.create(name='DIL')
        logged_in_user.groups.add(self.dil_group)
        group_names = get_user_groups(logged_in_user)
        if 'READ' in group_names : group_names.remove('READ')  # @IgnorePep8
        group_names = [x.lower() for x in group_names]
        self.assertTrue(len(group_names) > 0, "More than 1 group present")
        self.assertTrue("dil" in group_names, "DIL group present")

        # retrieves all docs with missing field group_name - 11 docs
        terms_filter = TermsFilter.get_missing_terms_filter("field", "attr.group_name")
        query = ElasticQuery.filtered(Query.match_all(), terms_filter)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(len(docs) == 11, "Elastic string query retrieved all public regions")

        # build filtered boolean query to bring all public docs + private docs 11+1 = 12 docs
        query_bool = BoolQuery()
        query_bool.should(Query.missing_terms("field", "group_name")) \
                  .should(Query.terms("group_name", group_names).query_wrap())

        query = ElasticQuery.filtered_bool(Query.match_all(), query_bool)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(len(docs) == 12, "Elastic string query retrieved both public + private regions")

        terms_filter = TermsFilter.get_terms_filter("attr.group_name", group_names)
        query = ElasticQuery.filtered(Query.match_all(), terms_filter)
        elastic = Search(query, idx=self.index_name)
        docs = elastic.search().docs
        self.assertTrue(len(docs) == 1, "Elastic string query retrieved one private regions")
        self.assertEqual(docs[0].attr['Name'], "4q27", "type matched region")
        self.assertEqual(docs[0].attr['region_id'], "803", "type matched region")
        self.assertEqual(docs[0].attr['group_name'], "[\"DIL\"]", "type matched region")
Пример #23
0
def setUpModule():
    ''' Load test indices (marker) '''
    call_command('index_search', **IDX['MARKER'])
    call_command('index_search', **IDX['MARKER_RS_HISTORY'])
    call_command('index_search', **IDX['JSON_MARKER_IC'])
    Search.index_refresh(IDX['MARKER']['indexName'])