def test_string_query(self):
     ''' Test building and running a string query. '''
     query = ElasticQuery.query_string("rs2476601", fields=["id"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'))
     docs = elastic.search()
     self.assertTrue(len(docs.docs) == 1, "Elastic string query retrieved marker (rs2476601)")
     self.assertRaises(QueryError, ElasticQuery.query_string, "rs2476601", fieldssss=["id"])
    def test_scan_and_scroll(self):
        ''' Test scan and scroll interface. '''
        def check_hits(resp_json):
            self.assertTrue('hits' in resp_json, 'scan and scroll hits')
            self.assertGreaterEqual(len(resp_json['hits']['hits']), 1)

        ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits)
        ScanAndScroll.scan_and_scroll(ElasticSettings.idx('DEFAULT'), call_fun=check_hits,
                                      query=ElasticQuery.query_string("rs2476601", fields=["id"]))
Exemplo n.º 3
0
 def test_pub_ini_file2(self):
     ''' Test publication pipeline with a list of PMIDs. '''
     out = StringIO()
     call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load',
                  sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out)
     INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
     idx = INI_CONFIG['DISEASE']['index']
     Search.index_refresh(idx)
     query = ElasticQuery.query_string("test", fields=["tags.disease"])
     elastic = Search(query, idx=idx)
     docs = elastic.search().docs
     self.assertGreater(len(docs), 1)
    def test_gene_pipeline(self):
        """ Test gene pipeline. """

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["ENSEMBL_GENE_GTF"]["index"]
        idx_type = INI_CONFIG["ENSEMBL_GENE_GTF"]["index_type"]

        """ 1. Test ensembl GTF loading. """
        call_command(
            "pipeline", "--steps", "stage", "load", sections="ENSEMBL_GENE_GTF", dir=TEST_DATA_DIR, ini=MY_INI_FILE
        )
        Search.index_refresh(idx)

        elastic = Search(idx=idx, idx_type=idx_type)
        self.assertGreaterEqual(elastic.get_count()["count"], 1, "Count documents in the index")
        map1_props = Gene.gene_mapping(idx, idx_type, test_mode=True).mapping_properties
        map2_props = elastic.get_mapping()
        if idx not in map2_props:
            logger.error("MAPPING ERROR: " + json.dumps(map2_props))
        self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)

        """ 2. Test adding entrez ID to documents """
        call_command("pipeline", "--steps", "load", sections="GENE2ENSEMBL", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertEqual(len(docs), 1)
        self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
        self.assertEqual(getattr(docs[0], "dbxrefs")["entrez"], "26191")

        """ 3. Add uniprot and fill in missing entrez fields. """
        call_command(
            "pipeline", "--steps", "download", "load", sections="ENSMART_GENE", dir=TEST_DATA_DIR, ini=MY_INI_FILE
        )
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("DNMT3L", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
        self.assertTrue("swissprot" in getattr(docs[0], "dbxrefs"))

        """ 4. Add gene synonyms and dbxrefs. """
        call_command("pipeline", "--steps", "load", sections="GENE_INFO", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertTrue("PTPN8" in getattr(docs[0], "synonyms"))

        """ 5. Add PMIDs to gene docs. """
        call_command("pipeline", "--steps", "load", sections="GENE_PUBS", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertGreater(len(getattr(docs[0], "pmids")), 0)

        """ 6. Add ortholog data. """
        call_command("pipeline", "--steps", "load", sections="ENSMART_HOMOLOG", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        dbxrefs = getattr(docs[0], "dbxrefs")
        self.assertTrue("orthologs" in dbxrefs, dbxrefs)
        self.assertTrue("mmusculus" in dbxrefs["orthologs"], dbxrefs)
        self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])

        query = ElasticQuery.filtered(
            Query.match_all(),
            TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl", ["ENSMUSG00000027843"]),
        )
        docs = Search(query, idx=idx, size=1).search().docs
        self.assertEqual(len(docs), 1)

        """ 7. Add mouse ortholog link to MGI """
        call_command("pipeline", "--steps", "load", sections="ENSEMBL2MGI", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        docs = Search(query, idx=idx, size=1).search().docs
        dbxrefs = getattr(docs[0], "dbxrefs")
        self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])
        self.assertEqual("107170", dbxrefs["orthologs"]["mmusculus"]["MGI"])
 def test_string_query_with_wildcard_and_highlight(self):
     highlight = Highlight("id", pre_tags="<strong>", post_tags="</strong>")
     query = ElasticQuery.query_string("rs*", fields=["id"], highlight=highlight)
     search = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5)
     self.assertTrue(len(search.search().docs) > 1, "Elastic string query retrieved marker (rs*)")
 def test_string_query_with_wildcard(self):
     query = ElasticQuery.query_string("rs*", fields=["id"])
     elastic = Search(query, idx=ElasticSettings.idx('DEFAULT'), size=5)
     result = elastic.search()
     self.assertTrue(result.hits_total > 1, "Elastic string query retrieved marker (rs*)")