Пример #1
0
 def get(self, term):
     """
     Returns list of matching concepts or entities using lexical search
     """
     args = simple_parser.parse_args()
     q = GolrSearchQuery(term, user_agent=USER_AGENT, **args)
     results = q.search()
     return results
Пример #2
0
def test_search_go_ontol():
    q = GolrSearchQuery("transport*", category='ontology_class', is_go=True)
    print("Q={}".format(q))
    params = q.solr_params()
    print("PARAMS={}".format(params))
    results = q.search()
    print("RESULTS={}".format(results))
    docs = results.docs
    for doc in docs:
        print(str(doc))
    assert len(docs) > 0
Пример #3
0
def test_search():
    q = GolrSearchQuery("abnormal")
    print("Q={}".format(q))
    params = q.solr_params()
    print("PARAMS={}".format(params))
    results = q.search()
    print("RESULTS={}".format(results))
    docs = results.docs
    for r in docs:
        print(str(r))
    assert len(docs) > 0
Пример #4
0
def test_search_go_all():
    q = GolrSearchQuery("transport*", is_go=True)
    print("Q={}".format(q))
    params = q.solr_params()
    print("PARAMS={}".format(params))
    results = q.search()
    print("RESULTS={}".format(results))
    docs = results.docs
    for r in docs:
        print(str(r))
    assert len(docs) > 0
    print(str(results.facet_counts))
Пример #5
0
def test_cursor():
    """
    Tests rows and start parameters.

    First fetch 100 docs, then same query but iterate with cursor in increments of ten.

    The two sets of IDs returned should be identical
    """
    q = GolrSearchQuery("abnormal", rows=100)
    results = q.search()
    docs = results.docs
    ids = set([d['id'] for d in docs])
    print('Init ids={}'.format(ids))
    assert len(ids) == 100
    matches = set()
    for i in range(0, 10):
        q = GolrSearchQuery("abnormal", start=i * 10, rows=10)
        docs = q.search().docs
        next_ids = [d['id'] for d in docs]
        assert len(next_ids) == 10
        print('Next ids (from {}) = {}'.format(i * 10, next_ids))
        matches.update(next_ids)
    assert len(matches) == 100
    assert len(matches.intersection(ids)) == 100
Пример #6
0
def clean_feature_ids(id: str) -> str:
    """
    MME queries often need to be sanitized before going into owlsim, for example:
    MIM:610536 -> OMIM:610536
    SHH -> HGNC:10848
    """
    if ':' in id:
        prefix, reference = id.split(':')
        if prefix == 'MIM':
            id = 'OMIM:' + reference
    else:
        # Assume it's a label and look it up
        # Assume it's human, and make sure it's an exact match
        query = GolrSearchQuery(id, taxon=['NCBITaxon:9606'], min_match="100%")
        results = query.search()
        if results.docs:
            id = results.docs[0]['id']

    return id
Пример #7
0
class TestGolrSearchQuery():

    @classmethod
    def setup_class(self):
        self.manager = GolrSearchQuery()

        # Mock the PySolr search function to
        # return our test docs
        input_fh = os.path.join(os.path.dirname(__file__),
                                'resources/solr/solr-docs.json')
        input_docs = json.load(open(input_fh))
        self.test_results = pysolr.Results(input_docs)
        self.manager.solr.search = MagicMock(return_value=self.test_results)

    @classmethod
    def teardown_class(self):
        self.manager = None

    def test_longest_hl(self):
        test_data = [
            "<em>Muscle</em> <em>atrophy</em>, generalized",
            "Generalized <em>muscle</em> degeneration",
            "Diffuse skeletal <em>muscle</em> wasting"
        ]
        expected = "<em>Muscle</em> <em>atrophy</em>, generalized"
        results = self.manager._get_longest_hl(test_data)
        assert expected == results

    def test_longest_hl_ambiguous(self):
        test_data = [
            "<em>Muscle</em> <em>atrophy</em>, generalized",
            "Generalized <em>muscle</em> degeneration",
            "Diffuse skeletal <em>muscle</em> wasting",
            "<em>Muscle</em> <em>atrophy</em>, not generalized",
        ]
        expected = "<em>Muscle</em> <em>atrophy</em>, generalized"
        results = self.manager._get_longest_hl(test_data)
        assert expected == results

    def test_hl_to_string(self):
        test_data = "Foo <em>Muscle</em> bar <em>atrophy</em>, generalized"
        expected = "Foo Muscle bar atrophy, generalized"
        results = self.manager._hl_as_string(test_data)
        assert expected == results

    def test_invalid_xml(self):
        test_data = "Foo<Foo> <em>Muscle</em> bar <em>atrophy</em>, generalized"
        pytest.raises(ET.ParseError, self.manager._hl_as_string, test_data)

    def test_autocomplete_doc_conversion(self):
        """
        Given a sample solr output as a pysolr.Results object
        test that _process_autocomplete_results returns the
        expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/autocomplete-expected.json')
        processed_docs = json.load(open(expected_fh))
        output_docs = self.manager._process_autocomplete_results(self.test_results)

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_search_doc_conversion(self):
        """
        Given a sample solr output as a pysolr.Results object
        test that _process_autocomplete_results returns the
        expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/search-expected.json')
        processed_docs = json.load(open(expected_fh))

        output_docs = self.manager._process_search_results(self.test_results)

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_search(self):
        """
        Given a mock PySolr.search method test that
        search() returns the expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/search-expected.json')
        processed_docs = json.load(open(expected_fh))
        output_docs = self.manager.search()

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_autocomplete(self):
        """
        Given a mock PySolr.search method test that
        autocomplete() returns the expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/autocomplete-expected.json')
        processed_docs = json.load(open(expected_fh))
        output_docs = self.manager.autocomplete()

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_autocomplete_no_category(self):
        """
        Test for document without a category
        """
        # Provide a new mock file
        input_fh = os.path.join(os.path.dirname(__file__),
                                'resources/solr/autocomplete-nocat.json')
        input_docs = json.load(open(input_fh))
        self.test_results = pysolr.Results(input_docs)
        self.manager.solr.search = MagicMock(return_value=self.test_results)

        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/autocomplete-nocat-expect.json')
        processed_docs = json.load(open(expected_fh))

        output_docs = self.manager.autocomplete()
        print(json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True))

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)
Пример #8
0
def search(term, args):
    q = GolrSearchQuery(term, args)
    return q.search()