def get(self, term): """ Returns list of matching concepts or entities using lexical search """ args = simple_parser.parse_args() q = GolrSearchQuery(term, user_agent=USER_AGENT, **args) results = q.search() return results
def get(self, term): """ Returns list of matching concepts or entities using lexical search """ args = simple_parser.parse_args() q = GolrSearchQuery(term, **args) results = q.autocomplete() return results
def get(self, term): """ Returns list of matching concepts or entities using lexical search """ args = simple_parser.parse_args() args['fq_string'] = copy.copy(args['fq']) args['fq'] = {} q = GolrSearchQuery(term, user_agent=USER_AGENT, **args) results = q.autocomplete() return results
def setup_class(self): self.manager = GolrSearchQuery() # Mock the PySolr search function to # return our test docs input_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/solr-docs.json') input_docs = json.load(open(input_fh)) self.test_results = pysolr.Results(input_docs) self.manager.solr.search = MagicMock(return_value=self.test_results)
def test_search_go_ontol(): q = GolrSearchQuery("transport*", category='ontology_class', is_go=True) print("Q={}".format(q)) params = q.solr_params() print("PARAMS={}".format(params)) results = q.exec() print("RESULTS={}".format(results)) docs = results['docs'] for r in docs: print(str(r)) assert len(docs) > 0
def test_search(): q = GolrSearchQuery("abnormal") print("Q={}".format(q)) params = q.solr_params() print("PARAMS={}".format(params)) results = q.exec() print("RESULTS={}".format(results)) docs = results['docs'] for r in docs: print(str(r)) assert len(docs) > 0
def test_search_go_all(): q = GolrSearchQuery("transport*", is_go=True) print("Q={}".format(q)) params = q.solr_params() print("PARAMS={}".format(params)) results = q.exec() print("RESULTS={}".format(results)) docs = results['docs'] for r in docs: print(str(r)) assert len(docs) > 0 print(str(results['facet_counts']))
def clean_feature_ids(id: str) -> str: """ MME queries often need to be sanitized before going into owlsim, for example: MIM:610536 -> OMIM:610536 SHH -> HGNC:10848 """ if ':' in id: prefix, reference = id.split(':') if prefix == 'MIM': id = 'OMIM:' + reference else: # Assume it's a label and look it up # Assume it's human, and make sure it's an exact match query = GolrSearchQuery(id, taxon=['NCBITaxon:9606'], min_match="100%") results = query.search() if results.docs: id = results.docs[0]['id'] return id
def get_concepts(): keywords = request.args.get('keywords', None) semanticGroups = request.args.get('semanticGroups', None) pageSize = int(request.args.get('pageSize', 1)) pageNumber = int(request.args.get('pageNumber', 1)) validatePagination(pageSize, pageNumber) validateKeywords(keywords) q = GolrSearchQuery(term=keywords, category=build_categories(semanticGroups), rows=pageSize, start=getStartIndex(pageNumber, pageSize)) results = q.exec() concepts = [] for d in results['docs']: concept = parse_concept(d) concepts.append(concept) return jsonify(concepts)
def get_concept(conceptId): if conceptId.startswith("biolink"): conceptId = objectId(conceptId) results = GolrSearchQuery(term=conceptId, fq={ 'id': conceptId }, rows=1, hl=False).exec() c = None entries = [] for d in results['docs']: c = parse_concept(d) break return c
def find_exactmatches(conceptId): """ Returns a list of concept ID's that are exact matches for the given conceptId """ results = GolrSearchQuery(term=conceptId, fq={ 'id': conceptId }, rows=1, hl=False).exec() docs = results['docs'] for d in docs: if get_concept_property(d, 'id') == conceptId: exactmatches = get_concept_property(d, 'equivalent_curie') if exactmatches == None: exactmatches = [] # just in case this property is empty exactmatches.append(conceptId) return exactmatches if exactmatches != None else [] return []
def get_concept_details(conceptId): if conceptId.startswith("biolink"): conceptId = objectId(conceptId) results = GolrSearchQuery(term=conceptId, fq={ 'id': conceptId }, rows=1, hl=False).exec() entries = [] for d in results['docs']: c = parse_concept(d) details = {} details['iri'] = get_concept_property(d, 'iri') details['taxon'] = get_concept_property(d, 'taxon') details['taxon_label'] = get_concept_property(d, 'taxon_label') details['taxon_label_synonym'] = get_concept_property( d, 'taxon_label_synonym') if details['taxon_label_synonym'] is not None: details['taxon_label_synonym'] = ', '.join( details['taxon_label_synonym']) c['details'] = [{ 'tag': k, 'value': v } for k, v in details.items() if v is not None] entries += [c] return jsonify(entries)
def test_cursor(): """ Tests rows and start parameters. First fetch 100 docs, then same query but iterate with cursor in increments of ten. The two sets of IDs returned should be identicial """ q = GolrSearchQuery("abnormal", rows=100) results = q.exec() docs = results['docs'] ids = set([d['id'] for d in docs]) print('Init ids={}'.format(ids)) assert len(ids) == 100 matches = set() for i in range(0,10): q = GolrSearchQuery("abnormal", start=i*10, rows=10) docs = q.exec()['docs'] next_ids = [d['id'] for d in docs] assert len(next_ids) == 10 print('Next ids (from {}) = {}'.format(i*10, next_ids)) matches.update(next_ids) assert len(matches) == 100 assert len(matches.intersection(ids)) == 100
def main(): """ Wrapper for OGR """ parser = argparse.ArgumentParser( description='Command line interface to python-ontobio.golr library' """ Provides command line interface onto the ontobio.golr python library, a high level abstraction layer over Monarch and GO solr indices. """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-A', '--associations', dest='associations', action='store_true', default=False, help='Path to output file') parser.add_argument('-s', '--settings', type=str, help='Path to config file') parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument('-f', '--facets', type=str, required=False, help='Facet fields: comma-delimited') parser.add_argument('-q', '--fq', type=json.loads, default={}, required=False, help='Facet query (solr fq) - should be json') parser.add_argument( '-Q', '--qargs', type=json.loads, default={}, required=False, help='Query to be passed directly to python golr_associations query') parser.add_argument('-l', '--legacy_solr', dest='legacy_solr', action='store_true', default=False, help='Set for legacy solr schema (solr3 golr)') parser.add_argument('-u', '--url', type=str, required=False, help='Solr URL. E.g. http://localhost:8983/solr/golr') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') parser.add_argument('search', type=str, help='Search terms') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") facets = [] if args.facets is not None: facets = args.facets.split(",") config = None if args.settings is not None: from ontobio.config import load_config config = load_config(args.settings) results = None if args.associations: q = None if args.search != '%': q = args.search q = GolrAssociationQuery(q=q, is_go=args.legacy_solr, fq=args.fq, facet_fields=facets, url=args.url) results = q.exec() #print("RESULTS={}".format(results)) docs = results['associations'] print("RESULTS: {}".format(len(docs))) for r in docs: print(str(r)) else: logging.info("FQ={}".format(args.fq)) q = GolrSearchQuery(args.search, is_go=args.legacy_solr, fq=args.fq, facet_fields=facets, url=args.url) results = q.exec() #print("RESULTS={}".format(results)) docs = results['docs'] print("RESULTS: {}".format(len(docs))) for r in docs: print(" {} '{}' {} // {}".format(r['id'], r['label'], r['score'], r['category'])) if len(facets) > 0: #from collections import OrderedDict fcs = results['facet_counts'] for f in facets: d = fcs[f] print(str(d)) print("## FACET: {}".format(f)) for k, v in sorted(d.items(), key=lambda t: -t[1]): print(" {:5d}: {}".format(v, k))
def test_solr_404(): q = GolrSearchQuery("abnormal") q.update_solr_url("https://httpbin.org/status/404") pytest.raises(pysolr.SolrError, q.search)
class TestGolrSearchQuery(): @classmethod def setup_class(self): self.manager = GolrSearchQuery() # Mock the PySolr search function to # return our test docs input_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/solr-docs.json') input_docs = json.load(open(input_fh)) self.test_results = pysolr.Results(input_docs) self.manager.solr.search = MagicMock(return_value=self.test_results) @classmethod def teardown_class(self): self.manager = None def test_longest_hl(self): test_data = [ "<em>Muscle</em> <em>atrophy</em>, generalized", "Generalized <em>muscle</em> degeneration", "Diffuse skeletal <em>muscle</em> wasting" ] expected = "<em>Muscle</em> <em>atrophy</em>, generalized" results = self.manager._get_longest_hl(test_data) assert expected == results def test_longest_hl_ambiguous(self): test_data = [ "<em>Muscle</em> <em>atrophy</em>, generalized", "Generalized <em>muscle</em> degeneration", "Diffuse skeletal <em>muscle</em> wasting", "<em>Muscle</em> <em>atrophy</em>, not generalized", ] expected = "<em>Muscle</em> <em>atrophy</em>, generalized" results = self.manager._get_longest_hl(test_data) assert expected == results def test_hl_to_string(self): test_data = "Foo <em>Muscle</em> bar <em>atrophy</em>, generalized" expected = "Foo Muscle bar atrophy, generalized" results = self.manager._hl_as_string(test_data) assert expected == results def test_invalid_xml(self): test_data = "Foo<Foo> <em>Muscle</em> bar <em>atrophy</em>, generalized" pytest.raises(ET.ParseError, self.manager._hl_as_string, test_data) def test_autocomplete_doc_conversion(self): """ Given a sample solr output as a pysolr.Results object test that _process_autocomplete_results returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager._process_autocomplete_results(self.test_results) assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_search_doc_conversion(self): """ Given a sample solr output as a pysolr.Results object test that _process_autocomplete_results returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/search-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager._process_search_results(self.test_results) assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_search(self): """ Given a mock PySolr.search method test that search() returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/search-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager.search() assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_autocomplete(self): """ Given a mock PySolr.search method test that autocomplete() returns the expected object """ expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-expected.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager.autocomplete() assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True) def test_autocomplete_no_category(self): """ Test for document without a category """ # Provide a new mock file input_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-nocat.json') input_docs = json.load(open(input_fh)) self.test_results = pysolr.Results(input_docs) self.manager.solr.search = MagicMock(return_value=self.test_results) expected_fh = os.path.join(os.path.dirname(__file__), 'resources/solr/autocomplete-nocat-expect.json') processed_docs = json.load(open(expected_fh)) output_docs = self.manager.autocomplete() print(json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True)) assert json.dumps(processed_docs, sort_keys=True) == \ json.dumps(output_docs, default=lambda obj: getattr(obj, '__dict__', str(obj)), sort_keys=True)
def search(term, args): q = GolrSearchQuery(term, args) return q.search()