def __init__(self): """ Initializes DataStore object with the config set in environment variables. Raises: DataStoreSettingsImproperlyConfiguredException if connection settings are invalid or missing """ self._engine = CHATBOT_NER_DATASTORE.get(ENGINE) if self._engine is None: raise DataStoreSettingsImproperlyConfiguredException() self._connection_settings = CHATBOT_NER_DATASTORE.get(self._engine) if self._connection_settings is None: raise DataStoreSettingsImproperlyConfiguredException() # This can be index name for elastic search, table name for SQL, self._store_name = None self._client_or_connection = None self._connect()
def _check_doc_type_for_elasticsearch(self): """ Checks if doc_type is present in connection settings, if not an exception is raised Raises: DataStoreSettingsImproperlyConfiguredException if doc_type was not found in connection settings """ if ELASTICSEARCH_DOC_TYPE not in self._connection_settings: raise DataStoreSettingsImproperlyConfiguredException( 'Elasticsearch needs doc_type. Please configure ES_DOC_TYPE in your environment')
def _check_doc_type_for_crf_data_elasticsearch(self): """ Checks if doc_type is present in connection settings, if not an exception is raised Raises: DataStoreSettingsImproperlyConfiguredException if doc_type was not found in connection settings """ # TODO: This check should be during init or boot if ELASTICSEARCH_CRF_DATA_DOC_TYPE not in self._connection_settings: raise DataStoreSettingsImproperlyConfiguredException( 'Elasticsearch training data needs doc_type. Please configure ' 'ES_TRAINING_DATA_DOC_TYPE in your environment')
def _configure_store(self): """ Configure self variables and connection settings. """ self._connection_settings = CHATBOT_NER_DATASTORE.get( self._engine_name) if self._connection_settings is None: raise DataStoreSettingsImproperlyConfiguredException() self._check_doc_type_for_elasticsearch() self._index_name = self._connection_settings[ constants.ELASTICSEARCH_ALIAS] self._doc_type = self._connection_settings[ constants.ELASTICSEARCH_DOC_TYPE]
def _check_doc_type_for_elasticsearch(self): """ Checks if doc_type is present in connection settings, if not an exception is raised Raises: DataStoreSettingsImproperlyConfiguredException if doc_type was not found in connection settings """ # TODO: This check should be during init or boot if constants.ELASTICSEARCH_DOC_TYPE not in self._connection_settings: ner_logger.debug("No doc type is present") raise DataStoreSettingsImproperlyConfiguredException( 'Elasticsearch needs doc_type. Please configure ES_DOC_TYPE in your environment' )
def _check_doc_type_for_elasticsearch(self): """ Checks if doc_type is present in connection settings, if not an exception is raised Raises: DataStoreSettingsImproperlyConfiguredException if doc_type was not found in connection settings """ if constants.ELASTICSEARCH_DOC_TYPE not in self._connection_settings: ner_logger.debug( "No doc type is present in chatbot_ner.config.CHATBOT_NER_DATASTORE" ) raise DataStoreSettingsImproperlyConfiguredException( 'Elasticsearch needs doc_type. Please configure ES_DOC_TYPE in your environment' )
def _configure_store(self, **kwargs): """ Configure self variables and connection. Also add default connection to registry with alias `default` """ self._connection_settings = CHATBOT_NER_DATASTORE. \ get(self._engine_name) if self._connection_settings is None: raise DataStoreSettingsImproperlyConfiguredException() self._index_name = self._connection_settings[ constants.ELASTICSEARCH_ALIAS] self._connection = self.connect(**self._connection_settings) self._conns['default'] = self._connection
def transfer_entities_elastic_search(self, entity_list): """ This method is used to transfer the entities from one environment to the other for elastic search engine only. Args: entity_list (list): List of entities that have to be transfered """ if self._engine != ELASTICSEARCH: raise NonESEngineTransferException es_url = CHATBOT_NER_DATASTORE.get(self._engine).get('connection_url') if es_url is None: es_url = elastic_search.connect.get_es_url() if es_url is None: raise DataStoreSettingsImproperlyConfiguredException() destination = CHATBOT_NER_DATASTORE.get(self._engine).get('destination_url') es_object = elastic_search.transfer.ESTransfer(source=es_url, destination=destination) es_object.transfer_specific_entities(list_of_entities=entity_list)
def create(self, err_if_exists=True, **kwargs): """ Creates the schema/structure for the datastore depending on the engine configured in the environment. Args: err_if_exists (bool): if to throw error when index already exists, default True kwargs: For Elasticsearch: master_timeout: Specify timeout for connection to master timeout: Explicit operation timeout update_all_types: Whether to update the mapping for all fields with the same name across all types or not wait_for_active_shards: Set the number of active shards to wait for before the operation returns. doc_type: The name of the document type allow_no_indices: Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes _all string or when no indices have been specified) expand_wildcards: Whether to expand wildcard expression to concrete indices that are open, closed or both., default 'open', valid choices are: 'open', 'closed', 'none', 'all' ignore_unavailable: Whether specified concrete indices should be ignored when unavailable (missing or closed) Refer-- https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.create https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.IndicesClient.put_mapping Raises: DataStoreSettingsImproperlyConfiguredException if connection settings are invalid or missing All other exceptions raised by elasticsearch-py library """ if self._client_or_connection is None: self._connect() if self._engine == ELASTICSEARCH: es_url = elastic_search.connect.get_es_url() create_map = [ # TODO: use namedtuples (True, ELASTICSEARCH_INDEX_1, ELASTICSEARCH_DOC_TYPE, self._store_name, self._check_doc_type_for_elasticsearch, elastic_search.create.create_entity_index), (False, ELASTICSEARCH_INDEX_2, ELASTICSEARCH_DOC_TYPE, self._store_name, self._check_doc_type_for_elasticsearch, elastic_search.create.create_entity_index), (False, ELASTICSEARCH_CRF_DATA_INDEX_NAME, ELASTICSEARCH_CRF_DATA_DOC_TYPE, None, self._check_doc_type_for_crf_data_elasticsearch, elastic_search.create.create_crf_index), ] for (required, index_name_key, doc_type_key, alias_name, doc_type_checker, create_fn) in create_map: index_name = self._connection_settings.get(index_name_key) doc_type = self._connection_settings.get(doc_type_key) if not index_name: if required: raise DataStoreSettingsImproperlyConfiguredException( '{} key is required in datastore settings for elastic_search') else: continue doc_type_checker() create_fn( connection=self._client_or_connection, index_name=index_name, doc_type=doc_type, logger=ner_logger, err_if_exists=err_if_exists, **kwargs ) if alias_name: elastic_search.create.create_alias(connection=self._client_or_connection, index_list=[index_name], alias_name=alias_name, logger=ner_logger)