示例#1
0
class EntitySearch:

    """This class is used for concept based entity search in DBpedia"""

    def __init__(self):
        self._linker = NameSPARQL()
        self._extracter = Extraction()
        self._yago = YagoTypeSimilarity()
        self._query_graph = QueryGraph()

    def query_process(self, query):
        """
        Process query into concept (common noun) and entity (proper noun). Link them
        to Knowledge Graph uri links respectively.
        :param query: short text query
        :return: tuple of concepts and entities in uris.
        """
        concepts = self._extracter.extract_words_sent(query)
        entities = self._extracter.extract_chunks_sent(query)
        concept_uris = list(itertools.chain.from_iterable(map(self._yago.word2yago, concepts)))
        entity_uris = list(itertools.chain.from_iterable(map(self._linker.name2entities, entities)))
        return list(set(concept_uris)), list(set(entity_uris))

    def search(self, query):
        results = []
        concepts, entities = self.query_process(query)
        for e in entities:
            for i in xrange(0, len(concepts), 5):
                results.extend(self._query_graph.type_entity_query(concepts[i:i + 5], e))
        return list(set(results))
示例#2
0
 def __init__(self, result_limit=5000, expansion=True, show_query=False):
     self._expansion = expansion
     self._show_query = show_query
     self._linker = NameSPARQL()
     self._extracter = Extraction()
     self._yago = YagoTypeSimilarity()
     self._query_graph = QueryGraph(result_limit)
示例#3
0
def test_extraction():
    from sematch.nlp import Extraction
    from sematch.sparql import EntityFeatures
    entity_f = EntityFeatures()
    yin_and_yang = entity_f.features('http://dbpedia.org/resource/Yin_and_yang')
    assert yin_and_yang is not None
    extract = Extraction()
    assert 'Chinese' in extract.extract_chunks_doc(yin_and_yang['abstract'])
    assert 'philosophy' in extract.extract_words_doc(yin_and_yang['abstract'])
示例#4
0
def test_extraction():
    from sematch.nlp import Extraction
    from sematch.sparql import EntityFeatures
    entity_f = EntityFeatures()
    yin_and_yang = entity_f.features(
        'http://dbpedia.org/resource/Yin_and_yang')
    assert yin_and_yang is not None
    extract = Extraction()
    assert 'Chinese' in extract.extract_chunks_doc(yin_and_yang['abstract'])
    assert 'philosophy' in extract.extract_words_doc(yin_and_yang['abstract'])
    def __init__(self, result_limit=5000, expansion=False, show_query=False):
        """ semantic search of entities and concepts

        :param result_limit: maximumn number of retrieved entities
        :param expansion: if conduct concept expansion
        :param show_query: if SPARQL query is shown
        """
        self._expansion = expansion
        self._show_query = show_query
        self._linker = NameSPARQL()
        self._extracter = Extraction()
        self._yago = YagoTypeSimilarity()
        self._query_graph = QueryGraph(result_limit)
示例#6
0
 def __init__(self, result_limit=5000, expansion=True, show_query=False):
     self._expansion = expansion
     self._show_query = show_query
     self._linker = NameSPARQL()
     self._extracter = Extraction()
     self._yago = YagoTypeSimilarity()
     self._query_graph = QueryGraph(result_limit)
示例#7
0
def test_extraction():
    from sematch.nlp import Extraction
    from sematch.semantic.sparql import EntityFeatures
    upm = EntityFeatures().features('http://dbpedia.org/resource/Technical_University_of_Madrid')
    extract = Extraction()
    assert extract.extract_nouns(upm['abstract']) is not None
    assert extract.extract_verbs(upm['abstract']) is not None
    assert extract.extract_chunks_doc(upm['abstract']) is not None
    cats = extract.category_features(upm['category'])
    assert extract.category2words(cats) is not None
示例#8
0
def test_sim_graph():
    from sematch.semantic.graph import SimGraph
    from sematch.semantic.similarity import WordNetSimilarity
    from sematch.nlp import Extraction, lemmatization
    from sematch.sparql import EntityFeatures
    from collections import Counter
    madrid = EntityFeatures().features(
        'http://dbpedia.org/resource/Tom_Cruise')
    words = Extraction().extract_words_sent(madrid['abstract'])
    words = list(set(lemmatization(words)))
    wns = WordNetSimilarity()
    word_graph = SimGraph(words, wns.word_similarity)
    word_scores = word_graph.page_rank()
    words, scores = zip(*Counter(word_scores).most_common(10))
    assert words is not None
示例#9
0
 def __init__(self):
     self._linker = NameSPARQL()
     self._extracter = Extraction()
     self._yago = YagoTypeSimilarity()
     self._query_graph = QueryGraph()
class Matcher:

    """This class is used for concept based entity match in DBpedia"""

    def __init__(self, result_limit=5000, expansion=False, show_query=False):
        """ semantic search of entities and concepts

        :param result_limit: maximumn number of retrieved entities
        :param expansion: if conduct concept expansion
        :param show_query: if SPARQL query is shown
        """
        self._expansion = expansion
        self._show_query = show_query
        self._linker = NameSPARQL()
        self._extracter = Extraction()
        self._yago = YagoTypeSimilarity()
        self._query_graph = QueryGraph(result_limit)

    def type_links(self, word, lang='eng'):
        synsets = self._yago.multilingual2synset(word, lang=lang)
        if self._expansion:
            synsets = list(set(itertools.chain.from_iterable([self._yago.synset_expand(s) for s in synsets])))
        links = []
        for s in synsets:
            link_dic = {}
            link_dic['name'] = s.name()
            link_dic['gloss'] = s._definition
            link_dic['lemma'] = ' '.join(s._lemma_names)
            concept_link = []
            yago_link = self._yago.synset2yago(s)
            dbpedia_link = self._yago.synset2dbpedia(s)
            concept_link.append(yago_link) if yago_link else None
            concept_link.append(dbpedia_link) if dbpedia_link else None
            link_dic['lod'] = concept_link
            if link_dic['lod']:
                links.append(link_dic)
        return links

    def query_process(self, query):
        """
        Process query into concept (common noun) and entity (proper noun). Link them
        to Knowledge Graph uri links respectively.
        :param query: short text query
        :return: tuple of concepts and entities in uris.
        """
        entities = self._extracter.extract_chunks_sent(query)
        entity_filter = list(itertools.chain.from_iterable([e.lower().split() for e in entities]))
        entity_filter = set(entity_filter)
        concepts = list(set(self._extracter.extract_nouns(query)))
        concepts = [c for c in concepts if c not in entity_filter]
        concept_uris = [list(itertools.chain.from_iterable([s['lod'] for s in self.type_links(c)])) for c in concepts]
        concept_uris = list(itertools.chain.from_iterable(concept_uris))
        entity_uris = list(itertools.chain.from_iterable(map(self._linker.name2entities, entities)))
        return list(set(concept_uris)), list(set(entity_uris))

    def match_concepts(self, concepts, lang='en'):
        results = []
        for i in xrange(0, len(concepts), 5):
            results.extend(self._query_graph.type_query(concepts[i:i + 5], lang, self._show_query))
        result_dic = {}
        for res in results:
            if res['uri'] not in result_dic:
                result_dic[res['uri']] = res
        return [result_dic[key] for key in result_dic.keys()]

    def match_type(self, query, lang='eng'):
        lang_map = {'eng':'en','spa':'es', 'cmn':'zh'}
        result_lang = lang_map[lang]
        words = query.split()
        concept_uris = []
        for w in words:
            concepts = list(itertools.chain.from_iterable([s['lod'] for s in self.type_links(w,lang)]))
            concept_uris.extend(concepts)
        concept_uris = list(set(concept_uris))
        return self.match_concepts(concept_uris, result_lang)

    def match_entity_type(self, query):
        results = []
        concepts, entities = self.query_process(query)
        for e in entities:
            for i in xrange(0, len(concepts), 5):
                results.extend(self._query_graph.type_entity_query(concepts[i:i + 5], e, self._show_query))
        result_dic = {}
        for res in results:
            if res['uri'] not in result_dic:
                result_dic[res['uri']] = res
        result = [result_dic[key] for key in result_dic.keys()]
        return result