Exemplo n.º 1
0
 def init_analyzers(self, index_config):
     print("init_analyzers")
     print(index_config)
     print(get_index_name(index_config))
     if (self.es.indices.exists(get_index_name(index_config))):
         self.es.indices.delete(index=get_index_name(index_config))
     self.es.indices.create(index=get_index_name(index_config))
 def search_columns_data(self, index_config, source_names):
     if source_names:
         result = list(
             scan(self.es,
                  index=get_index_name(index_config),
                  doc_type='service',
                  query={
                      "query": {
                          "constant_score": {
                              "filter": {
                                  "terms": {
                                      "source": source_names
                                  }
                              }
                          }
                      }
                  }))
     else:
         result = list(
             scan(self.es,
                  index=get_index_name(index_config),
                  doc_type='service',
                  query={"query": {
                      "match_all": ""
                  }}))
     return result
Exemplo n.º 3
0
 def index_column(self, column, source_name, index_config):
     logging.info("Indexing column " + str(column))
     body = column.to_json()
     body['source'] = source_name
     self.es.index(index=get_index_name(index_config),
                   doc_type=source_name,
                   body=body)
Exemplo n.º 4
0
    def search_columns_data(self, index_config, source_names):
        result = list(
            scan(self.es,
                 index=get_index_name(index_config),
                 doc_type=','.join(source_names),
                 query={"query": {
                     "match_all": {}
                 }}))

        return result
Exemplo n.º 5
0
 def index_column(self, column, source_name, index_config):
     body = column.to_json()
     body['source'] = source_name
     try:
         self.es.index(index=get_index_name(index_config),
                       doc_type="service",
                       body=body)
         return True
     except RequestError:
         print("Error")
         return False
Exemplo n.º 6
0
    def search_similar_text_data(self, index_config, value_text, source_names):

        try:
            text = value_text
            if source_names:
                result = self.es.search(index=get_index_name(index_config),
                                        doc_type='service',
                                        body={
                                            "query": {
                                                "bool": {
                                                    "must": {
                                                        "match": {
                                                            "textual": text
                                                        }
                                                    },
                                                    "filter": {
                                                        "terms": {
                                                            "source":
                                                            source_names
                                                        }
                                                    }
                                                }
                                            }
                                        },
                                        size=10)
            else:
                result = self.es.search(
                    index=get_index_name(index_config),
                    doc_type='service',
                    body={"query": {
                        "match": {
                            "textual": text
                        }
                    }},
                    size=10)
        except Exception:
            result = {"hits": {"hits": []}}
            logging.exception("Exception when querying elasticsearch")

        return result
    def search_similar_text_data(self, index_config, value_text, source_names):
        try:
            text = value_text
            if source_names:
                result = self.es.search(
                    index=get_index_name(index_config),
                    doc_type='service',
                    body={
                        "query": {
                            "bool": {
                                "must": {
                                    "match": {
                                        "textual": text
                                    }
                                },
                                "filter": {
                                    "terms": {
                                        "source": source_names
                                    }
                                }
                            }
                        }
                    },
                    size=10)
            else:
                result = self.es.search(
                    index=get_index_name(index_config),
                    doc_type='service',
                    body={"query": {
                        "match": {
                            "textual": text
                        }
                    }},
                    size=10)
        except Exception:
            result = {"hits": {"hits": []}}
            logging.exception("Exception when querying elasticsearch")

        return result
 def delete_column(self, attr_name, source_name, index_config):
     bulk_deletes = []
     for result in scan(self.es, query={
         "query": {
             "match": {
                 "name": attr_name,
             }
         }
     }, index=get_index_name(index_config), doc_type="service", _source=False,
             track_scores=False, scroll='5m'):
         result['_op_type'] = 'delete'
         bulk_deletes.append(result)
     bulk(self.es, bulk_deletes)
 def search_columns_data(self, index_config, source_names):
     if source_names:
         result = list(
             scan(
                 self.es,
                 index=get_index_name(index_config),
                 doc_type='service',
                 query={"query": {
                     "constant_score": {
                         "filter": {
                             "terms": {
                                 "source": source_names
                             }
                         }
                     }
                 }}))
     else:
         result = list(
             scan(
                 self.es, index=get_index_name(index_config), doc_type='service', query={"query": {
                     "match_all": ""
                 }}))
     return result
Exemplo n.º 10
0
 def delete_column(self, attr_name, source_name, index_config):
     bulk_deletes = []
     for result in scan(self.es,
                        query={"query": {
                            "match": {
                                "name": attr_name,
                            }
                        }},
                        index=get_index_name(index_config),
                        doc_type="service",
                        _source=False,
                        track_scores=False,
                        scroll='5m'):
         result['_op_type'] = 'delete'
         bulk_deletes.append(result)
     bulk(self.es, bulk_deletes)
Exemplo n.º 11
0
 def search_similar_text_data(self, index_config, value_text, source_names):
     try:
         text = value_text
         result = self.es.search(
             index=get_index_name(index_config),
             doc_type=','.join(source_names),
             body={"query": {
                 "match": {
                     "textual": text,
                 }
             }},
             size=10)
     except Exception as e:
         logging.warning("Search similar text data not possible")
         result = {"hits": {"hits": []}}
     return result
Exemplo n.º 12
0
    def index_source(self, source, index_config):
        self.es.indices.put_mapping(index=get_index_name(index_config),
                                    doc_type="service",
                                    body={
                                        "service": {
                                            "properties": {
                                                "source": {
                                                    "type": "string",
                                                    "index": "not_analyzed"
                                                }
                                            }
                                        }
                                    })

        for column in source.column_map.values():
            if column.semantic_type:
                if len(column.value_list) > 0:
                    self.index_column(column, source.index_name, index_config)
                else:
                    logging.warning(
                        "Indexer: IGNORE COLUMN `%s` in source `%s` because of empty values",
                        column.name, source.name)
Exemplo n.º 13
0
 def init_analyzers(self, index_config):
     logging.info("Initializing analyzers")
     self.es.indices.create(
         index=get_index_name(index_config),
         body={
             "settings": {
                 "analysis": {
                     "analyzer": {
                         "textual": {
                             "filter": [
                                 "standard",
                                 "lowercase",
                                 "stop",
                             ],
                             "type": "custom",
                             "tokenizer": "standard"
                         },
                         "number_text": {
                             "filter": [
                                 "lowercase",
                                 "word_delimiter",
                                 "stop",
                             ],
                             "type": "custom",
                             "tokenizer": "standard"
                         },
                         "whitespace_text": {
                             "filter": ["lowercase", "stop", "kstem"],
                             "type": "custom",
                             "tokenizer": "whitespace"
                         }
                     }
                 }
             }
         })
     logging.debug("Done: Initializing analyzers")
Exemplo n.º 14
0
 def index_column(self, column, source_name, index_config):
     body = column.to_json()
     body['source'] = source_name
     self.es.index(index=get_index_name(index_config),
                   doc_type="service",
                   body=body)
Exemplo n.º 15
0
 def init_analyzers(self, index_config):
     self.es.indices.create(index=get_index_name(index_config))
Exemplo n.º 16
0
 def delete_column(self, index_config):
     logging.info("Deleting index for column")
     if self.es.indices.exists(get_index_name(index_config)):
         self.es.delete(index=get_index_name(index_config))
         return True
     return False
Exemplo n.º 17
0
 def init_analyzers(self, index_config):
     self.es.indices.create(index=get_index_name(index_config))
Exemplo n.º 18
0
 def index_column(self, column, source_name, index_config):
     body = column.to_json()
     body['source'] = source_name
     self.es.index(index=get_index_name(index_config), doc_type="service",
         body=body)