Пример #1
0
 def fuzzy_keyword_match(self, keywords, max_hits=15):
     """
     Performs a search query on elastic_field_name to match the provided keywords
     :param keywords: the list of keyword to match
     :param max_hits: maximum number of returned objects
     :return: the list of documents that contain the keywords
     """
     filter_path = [
         'hits.hits._source.id', 'hits.hits._score', 'hits.total',
         'hits.hits._source.dbName', 'hits.hits._source.sourceName',
         'hits.hits._source.columnName'
     ]
     index = "text"
     query_body = {
         "from": 0,
         "size": max_hits,
         "query": {
             "match": {
                 "text": {
                     "query": keywords,
                     "fuzziness": "AUTO"
                 }
             }
         }
     }
     res = client.search(index=index,
                         body=query_body,
                         filter_path=filter_path)
     if res['hits']['total'] == 0:
         return []
     for el in res['hits']['hits']:
         data = Hit(str(el['_source']['id']), el['_source']['dbName'],
                    el['_source']['sourceName'],
                    el['_source']['columnName'], el['_score'])
         yield data
Пример #2
0
 def drs_from_raw_field(self, field: (str, str, str)) -> DRS:
     """
     Given a field and source name, it returns a DRS with its representation
     :param field: a tuple with the name of the field, (db_name, source_name, field_name)
     :return: a DRS with the source-field internal representation
     """
     db, source, field = field
     nid = id_from(db, source, field)
     h = Hit(nid, db, source, field, 0)
     return self.drs_from_hit(h)
Пример #3
0
 def _node_to_hit(self, node: (str, str, str)) -> Hit:
     """
     Given a field and source name, it returns a Hit with its representation
     :param node: a tuple with the name of the field,
         (db_name, source_name, field_name)
     :return: Hit
     """
     db, source, field = node
     nid = id_from(db, source, field)
     hit = Hit(nid, db, source, field, 0)
     return hit
Пример #4
0
 def _nid_to_hit(self, nid: int) -> Hit:
     """
     Given a node id, convert it to a Hit
     :param nid: int or string
     :return: DRS
     """
     nid = str(nid)
     score = 0.0
     nid, db, source, field = self._network.get_info_for([nid])[0]
     hit = Hit(nid, db, source, field, score)
     return hit
Пример #5
0
 def add_fields(self, list_of_fields):
     """
     Creates a list of graph nodes from the list of fields and adds them to the graph
     :param list_of_fields: list of (source_name, field_name) tuples
     :return: the newly added list of field nodes
     """
     nodes = []
     for nid, sn, fn in list_of_fields:
         n = Hit(nid, sn, fn, -1)
         nodes.append(n)
     self.__G.add_nodes_from(nodes)
     return nodes
Пример #6
0
 def schema_neighbors(self, field: (str, str, str)) -> DRS:
     """
     Returns all the other attributes/fields that appear in the same relation than the provided field
     :param field: the provided field
     :return: returns a list of Hit elements of the form (id, source_name, field_name, score)
     """
     db_name, source_name, field_name = field
     hits = self.__network.get_hits_from_table(source_name)
     origin_hit = Hit(id_from(db_name, source_name, field_name), db_name,
                      source_name, field_name, 0)
     o_drs = DRS([x for x in hits], Operation(OP.TABLE,
                                              params=[origin_hit]))
     return o_drs
Пример #7
0
 def enumerate_relation(self, relation, as_str=True):
     seen_pairs = set()
     for nid in self.iterate_ids():
         db_name, source_name, field_name, data_type = self.__id_names[nid]
         hit = Hit(nid, db_name, source_name, field_name, 0)
         neighbors = self.neighbors_id(hit, relation)
         for n2 in neighbors:
             if not (n2.nid, nid) in seen_pairs:
                 seen_pairs.add((nid, n2.nid))
                 if as_str:
                     string = str(hit) + " - " + str(n2)
                     yield string
                 else:
                     yield hit, n2
Пример #8
0
 def md_neighbors_id(self, hit: Hit, md_neighbors: MRS, relation: Relation) -> DRS:
     if isinstance(hit, Hit):
         nid = str(hit.nid)
     if isinstance(hit, str):
         nid = hit
     nid = str(nid)
     data = []
     score = 1.0 # TODO: return more meaningful score results
     for hit in md_neighbors:
         k = hit.target if hit.target != nid else hit.source
         (db_name, source_name, field_name, data_type) = self.__id_names[k]
         data.append(Hit(k, db_name, source_name, field_name, score))
     op = self.get_op_from_relation(relation)
     o_drs = DRS(data, Operation(op, params=[hit]))
     return o_drs
Пример #9
0
 def neighbors_id(self, hit: Hit, relation: Relation) -> DRS:
     if isinstance(hit, Hit):
         nid = str(hit.nid)
     if isinstance(hit, str):
         nid = hit
     nid = str(nid)
     data = []
     neighbours = self.__G[nid]
     for k, v in neighbours.items():
         if relation in v:
             score = v[relation]['score']
             (db_name, source_name, field_name, data_type) = self.__id_names[k]
             data.append(Hit(k, db_name, source_name, field_name, score))
     op = self.get_op_from_relation(relation)
     o_drs = DRS(data, Operation(op, params=[hit]))
     return o_drs
Пример #10
0
 def search_keywords(self, keywords, elasticfieldname, max_hits=15):
     """
     Performs a search query on elastic_field_name to match the provided keywords
     :param keywords: the list of keyword to match
     :param elasticfieldname: what is the field in the store where to apply the query
     :return: the list of documents that contain the keywords
     """
     index = None
     query_body = None
     filter_path = [
         'hits.hits._source.id', 'hits.hits._score', 'hits.total',
         'hits.hits._source.dbName', 'hits.hits._source.sourceName',
         'hits.hits._source.columnName'
     ]
     if elasticfieldname == KWType.KW_CONTENT:
         index = "text"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "match": {
                     "text": keywords
                 }
             }
         }
     elif elasticfieldname == KWType.KW_SCHEMA:
         index = "profile"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "match": {
                     "columnName": keywords
                 }
             }
         }
     elif elasticfieldname == KWType.KW_ENTITIES:
         index = "profile"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "match": {
                     "entities": keywords
                 }
             }
         }
     elif elasticfieldname == KWType.KW_TABLE:
         index = "profile"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "match": {
                     "sourceName": keywords
                 }
             }
         }
     res = client.search(index=index,
                         body=query_body,
                         filter_path=filter_path)
     if res['hits']['total'] == 0:
         return []
     for el in res['hits']['hits']:
         data = Hit(str(el['_source']['id']), el['_source']['dbName'],
                    el['_source']['sourceName'],
                    el['_source']['columnName'], el['_score'])
         yield data
Пример #11
0
 def exact_search_keywords(self, keywords, elasticfieldname, max_hits=15):
     """
     Like search_keywords, but returning only exact results
     :param keywords:
     :param elasticfieldname:
     :param max_hits:
     :return:
     """
     index = None
     query_body = None
     filter_path = [
         'hits.hits._source.id', 'hits.hits._score', 'hits.total',
         'hits.hits._source.dbName', 'hits.hits._source.sourceName',
         'hits.hits._source.columnName'
     ]
     if elasticfieldname == KWType.KW_CONTENT:
         index = "text"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "term": {
                     "text": keywords
                 }
             }
         }
     elif elasticfieldname == KWType.KW_SCHEMA:
         index = "profile"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "term": {
                     "columnNameNA": keywords
                 }
             }
         }
     elif elasticfieldname == KWType.KW_ENTITIES:
         index = "profile"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "term": {
                     "entities": keywords
                 }
             }
         }
     elif elasticfieldname == KWType.KW_TABLE:
         index = "profile"
         query_body = {
             "from": 0,
             "size": max_hits,
             "query": {
                 "term": {
                     "sourceNameNA": keywords
                 }
             }
         }
     res = client.search(index=index,
                         body=query_body,
                         filter_path=filter_path)
     if res['hits']['total'] == 0:
         return []
     for el in res['hits']['hits']:
         data = Hit(str(el['_source']['id']), el['_source']['dbName'],
                    el['_source']['sourceName'],
                    el['_source']['columnName'], el['_score'])
         yield data
Пример #12
0
 def get_hits_from_table(self, table) -> [Hit]:
     nids = self.get_fields_of_source(table)
     info = self.get_info_for(nids)
     hits = [Hit(nid, db_name, s_name, f_name, 0) for nid, db_name, s_name, f_name in info]
     return hits
Пример #13
0
 def get_hits_from_info(self, info):
     hits = [Hit(nid, db_name, s_name, f_name, 0) for nid, db_name, s_name, f_name in info]
     return hits
Пример #14
0
def build_hit(sn, fn):
    nid = compute_field_id(sn, fn)
    return Hit(nid, sn, fn, -1)