def _create_es_client(): if es_config: es_client = ElasticSearchService(es_config) if not es_client.client_ok(): log.error("Unable to create Elasticsearch client instance") return False return es_client return False
def load_test_data_into_es(dataset_amt): log.info("Loading test data into Elasticsearch..") es_client = ElasticSearchService.get_elasticsearch_service(es_config) if es_client is None: log.error("Unable to initialize Elastisearch client") return False metax_api = MetaxAPIService.get_metax_api_service(metax_api_config) if metax_api is None: log.error("Unable to initialize Metax API client") return False if not es_client.ensure_index_existence(): return False cr_identifiers = metax_api.get_latest_catalog_record_identifiers() if cr_identifiers: identifiers_to_load = cr_identifiers[0:min(len(cr_identifiers), dataset_amt)] identifiers_to_delete = [] es_data_models = convert_identifiers_to_es_data_models(metax_api, identifiers_to_load, identifiers_to_delete) es_client.do_bulk_request_for_datasets(es_data_models, identifiers_to_delete) log.info("Test data loaded into Elasticsearch") return True log.error("No catalog record identifiers to load") return False
def delete_search_index(): es_client = ElasticSearchService.get_elasticsearch_service(es_config) if es_client is None: log.error("Unable to initialize Elasticsearch client") return es_client.delete_index()
def __init__(self): self.log = get_logger(__name__) self.event_processing_completed = True self.init_ok = False # Get configs # If these raise errors, let consumer init fail self.rabbit_settings = get_metax_rabbit_mq_config() es_settings = get_elasticsearch_config() self.is_local_dev = True if os.path.isdir("/etsin/ansible") else False if not self.rabbit_settings or not es_settings: self.log.error( "Unable to load RabbitMQ configuration or Elasticsearch configuration" ) return self.credentials = pika.PlainCredentials( self.rabbit_settings['USER'], self.rabbit_settings['PASSWORD']) self.exchange = self.rabbit_settings['EXCHANGE'] self._set_queue_names(self.is_local_dev) self.es_client = ElasticSearchService.get_elasticsearch_service( es_settings) if self.es_client is None: return if not self.es_client.ensure_index_existence(): return self.init_ok = True
def create_search_index_and_doc_type_mapping_if_not_exist(): es_client = ElasticSearchService.get_elasticsearch_service(es_config) if es_client is None: log.error("Unable to initialize Elasticsearch client") return False if not es_client.ensure_index_existence(): return False return True
def load_test_data_into_es(dataset_amt): log.info("Loading test data into Elasticsearch..") es_client = ElasticSearchService(es_config) metax_api = MetaxAPIService(metax_api_config) if not es_client or not metax_api: log.error("Loading test data into Elasticsearch failed") if not es_client.index_exists(): log.info("Index does not exist, trying to create") if not es_client.create_index_and_mapping(): log.error("Unable to create index") return False all_metax_urn_identifiers = metax_api.get_all_catalog_record_urn_identifiers( ) if all_metax_urn_identifiers: urn_ids_to_load = all_metax_urn_identifiers[ 0:min(len(all_metax_urn_identifiers), dataset_amt)] identifiers_to_delete = [] es_data_models = convert_identifiers_to_es_data_models( metax_api, urn_ids_to_load, identifiers_to_delete) es_client.do_bulk_request_for_datasets(es_data_models, identifiers_to_delete) log.info("Test data loaded into Elasticsearch") return True return False
def __init__(self): self.log = get_logger(__name__) self.event_processing_completed = True self.init_ok = False # Get configs # If these raise errors, let consumer init fail rabbit_settings = get_metax_rabbit_mq_config() es_settings = get_elasticsearch_config() is_local_dev = True if os.path.isdir("/etsin/ansible") else False if not rabbit_settings or not es_settings: self.log.error("Unable to load RabbitMQ configuration or Elasticsearch configuration") return # Set up RabbitMQ connection, channel, exchange and queues credentials = pika.PlainCredentials( rabbit_settings['USER'], rabbit_settings['PASSWORD']) # Try connecting every one minute 30 times try: connection = pika.BlockingConnection( pika.ConnectionParameters( rabbit_settings['HOST'], rabbit_settings['PORT'], rabbit_settings['VHOST'], credentials, connection_attempts=30, retry_delay=60)) except Exception as e: self.log.error(e) self.log.error("Unable to open RabbitMQ connection") return # Set up ElasticSearch client. In case connection cannot be established, try every 2 seconds for 30 seconds es_conn_ok = False i = 0 while not es_conn_ok and i < 15: self.es_client = ElasticSearchService(es_settings) if self.es_client.client_ok(): es_conn_ok = True else: time.sleep(2) i += 1 if not es_conn_ok or not self._ensure_index_existence(): return self.channel = connection.channel() self.exchange = rabbit_settings['EXCHANGE'] self._set_queue_names(is_local_dev) self._create_and_bind_queues(is_local_dev) def callback_create(ch, method, properties, body): if not self._init_event_callback_ok("create", ch, method): return body_as_json = self._get_event_json_body(ch, method, body) if not body_as_json: return self._convert_to_es_doc_and_reindex(ch, method, body_as_json) def callback_update(ch, method, properties, body): if not self._init_event_callback_ok("update", ch, method): return body_as_json = self._get_event_json_body(ch, method, body) if not body_as_json: return self._convert_to_es_doc_and_reindex(ch, method, body_as_json) def callback_delete(ch, method, properties, body): if not self._init_event_callback_ok("delete", ch, method): return body_as_json = self._get_event_json_body(ch, method, body) if not body_as_json: return self._delete_from_index(ch, method, body_as_json) # Set up consumers so that acks are required self.create_consumer_tag = self.channel.basic_consume(callback_create, queue=self.create_queue, no_ack=False) self.update_consumer_tag = self.channel.basic_consume(callback_update, queue=self.update_queue, no_ack=False) self.delete_consumer_tag = self.channel.basic_consume(callback_delete, queue=self.delete_queue, no_ack=False) self.init_ok = True
class MetaxConsumer(): def __init__(self): self.log = get_logger(__name__) self.event_processing_completed = True self.init_ok = False # Get configs # If these raise errors, let consumer init fail rabbit_settings = get_metax_rabbit_mq_config() es_settings = get_elasticsearch_config() is_local_dev = True if os.path.isdir("/etsin/ansible") else False if not rabbit_settings or not es_settings: self.log.error("Unable to load RabbitMQ configuration or Elasticsearch configuration") return # Set up RabbitMQ connection, channel, exchange and queues credentials = pika.PlainCredentials( rabbit_settings['USER'], rabbit_settings['PASSWORD']) # Try connecting every one minute 30 times try: connection = pika.BlockingConnection( pika.ConnectionParameters( rabbit_settings['HOST'], rabbit_settings['PORT'], rabbit_settings['VHOST'], credentials, connection_attempts=30, retry_delay=60)) except Exception as e: self.log.error(e) self.log.error("Unable to open RabbitMQ connection") return # Set up ElasticSearch client. In case connection cannot be established, try every 2 seconds for 30 seconds es_conn_ok = False i = 0 while not es_conn_ok and i < 15: self.es_client = ElasticSearchService(es_settings) if self.es_client.client_ok(): es_conn_ok = True else: time.sleep(2) i += 1 if not es_conn_ok or not self._ensure_index_existence(): return self.channel = connection.channel() self.exchange = rabbit_settings['EXCHANGE'] self._set_queue_names(is_local_dev) self._create_and_bind_queues(is_local_dev) def callback_create(ch, method, properties, body): if not self._init_event_callback_ok("create", ch, method): return body_as_json = self._get_event_json_body(ch, method, body) if not body_as_json: return self._convert_to_es_doc_and_reindex(ch, method, body_as_json) def callback_update(ch, method, properties, body): if not self._init_event_callback_ok("update", ch, method): return body_as_json = self._get_event_json_body(ch, method, body) if not body_as_json: return self._convert_to_es_doc_and_reindex(ch, method, body_as_json) def callback_delete(ch, method, properties, body): if not self._init_event_callback_ok("delete", ch, method): return body_as_json = self._get_event_json_body(ch, method, body) if not body_as_json: return self._delete_from_index(ch, method, body_as_json) # Set up consumers so that acks are required self.create_consumer_tag = self.channel.basic_consume(callback_create, queue=self.create_queue, no_ack=False) self.update_consumer_tag = self.channel.basic_consume(callback_update, queue=self.update_queue, no_ack=False) self.delete_consumer_tag = self.channel.basic_consume(callback_delete, queue=self.delete_queue, no_ack=False) self.init_ok = True def run(self): self.log.info('RabbitMQ client starting to consume messages..') print('[*] RabbitMQ is running. To exit press CTRL+C. See logs for indexing details.') self.channel.start_consuming() def before_stop(self): self._cancel_consumers() def _delete_from_index(self, ch, method, body_as_json): try: identifier_to_delete = body_as_json.get('urn_identifier', None) if not identifier_to_delete: ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) else: delete_success = self.es_client.delete_dataset(identifier_to_delete) if delete_success: ch.basic_ack(delivery_tag=method.delivery_tag) else: self.log.error('Failed to delete %s', body_as_json.get('urn_identifier')) ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) except RequestError: ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) finally: self.event_processing_completed = True def _convert_to_es_doc_and_reindex(self, ch, method, body_as_json): if catalog_record_is_deprecated(body_as_json): self.log.debug("Received identifier {0} for a catalog record that is deprecated. " "Trying to delete from index if it exists.." .format(body_as_json['research_dataset'].get('urn_identifier', ''))) self._delete_from_index(ch, method, body_as_json) self.event_processing_completed = True ch.basic_ack(delivery_tag=method.delivery_tag) return if catalog_record_has_next_version_identifier(body_as_json): self.log.debug("Received identifier {0} for a catalog record that has a next version {1}. " "Skipping reindexing..".format(body_as_json['research_dataset'].get('urn_identifier', ''), body_as_json['next_version'].get('urn_identifier'))) self.event_processing_completed = True ch.basic_ack(delivery_tag=method.delivery_tag) return prev_version_id = get_catalog_record_previous_version_identifier(body_as_json) converter = CRConverter() es_data_model = converter.convert_metax_cr_json_to_es_data_model(body_as_json) if not es_data_model: self.log.error("Unable to convert metax catalog record to es data model, not requeing message") ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) self.event_processing_completed = True return try: es_reindex_success = \ self.es_client.reindex_dataset(es_data_model) and (prev_version_id is None or self.es_client.delete_dataset(prev_version_id)) if es_reindex_success: ch.basic_ack(delivery_tag=method.delivery_tag) else: self.log.error('Failed to reindex %s', body_as_json.get('research_dataset').get('urn_identifier')) ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) except Exception: ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) finally: self.event_processing_completed = True def _init_event_callback_ok(self, callback_type, ch, method): self.event_processing_completed = False self.log.debug("Received {0} message from Metax RabbitMQ".format(callback_type)) if not self._ensure_index_existence(): ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) self.event_processing_completed = True return False return True def _get_event_json_body(self, ch, method, body): body_as_json = self._get_message_body_as_json(body) if not body_as_json: ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) self.event_processing_completed = True return None return body_as_json def _get_message_body_as_json(self, body): try: return json.loads(body) except ValueError: self.log.error("RabbitMQ message cannot be interpreted as json") return None def _cancel_consumers(self): self.channel.basic_cancel(consumer_tag=self.create_consumer_tag) self.channel.basic_cancel(consumer_tag=self.update_consumer_tag) self.channel.basic_cancel(consumer_tag=self.delete_consumer_tag) def _set_queue_names(self, is_local_dev): self.create_queue = 'etsin-create' self.update_queue = 'etsin-update' self.delete_queue = 'etsin-delete' if is_local_dev: # The point of this section is to give unique names to the queues created in local dev env # This is done to prevent the target rabbitmq server from having multiple consumers consuming the same # queue (this would result in round-robin type of delivering of messages). # # Below, also a time-to-live for a local dev queue is set to automatically delete the queues from the # target rabbitmq server after a set period of time so as not to make the rabbitmq virtual host to be # filled with local dev queues. Cf. http://www.rabbitmq.com/ttl.html#queue-ttl import json if os.path.isfile('/srv/etsin/rabbitmq_queues.json'): with open('/srv/etsin/rabbitmq_queues.json') as json_data: queues = json.load(json_data) self.create_queue = queues.get('create', None) self.update_queue = queues.get('update', None) self.delete_queue = queues.get('delete', None) else: import time timestamp = str(time.time()) self.create_queue += '-' + timestamp self.update_queue += '-' + timestamp self.delete_queue += '-' + timestamp queues = {'create': self.create_queue, 'update': self.update_queue, 'delete': self.delete_queue} with open('/srv/etsin/rabbitmq_queues.json', 'w') as outfile: json.dump(queues, outfile) def _create_and_bind_queues(self, is_local_dev): args = {} if is_local_dev: # Expire local dev created queues after 8 hours of inactivity args['x-expires'] = 28800000 self.channel.queue_declare(self.create_queue, durable=True, arguments=args) self.channel.queue_declare(self.update_queue, durable=True, arguments=args) self.channel.queue_declare(self.delete_queue, durable=True, arguments=args) self.channel.queue_bind(exchange=self.exchange, queue=self.create_queue, routing_key='create') self.channel.queue_bind(exchange=self.exchange, queue=self.update_queue, routing_key='update') self.channel.queue_bind(exchange=self.exchange, queue=self.delete_queue, routing_key='delete') def _ensure_index_existence(self): if not self.es_client.index_exists(): if not self.es_client.create_index_and_mapping(): # If there's no ES index, don't create consumer self.log.error("Unable to create Elasticsearch index and type mapping") return False return True
def __init__(self): self.metax_api = MetaxAPIService.get_metax_api_service(metax_api_config) self.es_client = ElasticSearchService.get_elasticsearch_service(es_config)