def publish_message(self): try: return self.__publish_message() except Exception as e: logwarn(LOGGER, 'Error in feeder.publish_message(): %s: %s', e.__class__.__name__, e.message) raise e
def __publish_message_to_channel(self): # Find a message to publish. # If no messages left, well, nothing to publish! try: message = self.__get_message_from_stack() except queue.Empty as e: logtrace(LOGGER, 'Queue empty. No more messages to be published.') return # Now try to publish it. # If anything goes wrong, you need to put it back to # the stack of unpublished messages! try: success = self.__try_publishing_otherwise_put_back_to_stack(message) if success: self.__postparations_after_successful_feeding(message) # Treat various errors that may occur during publishing: except pika.exceptions.ChannelClosed as e: logwarn(LOGGER, 'Cannot publish message %i to RabbitMQ because the Channel is closed (%s)', self.__delivery_number+1, repr(e)) except AttributeError as e: if self.thread._channel is None: logwarn(LOGGER, 'Cannot publish message %i to RabbitMQ because there is no channel.', self.__delivery_number+1) else: logwarn(LOGGER, 'Cannot publish message %i to RabbitMQ (unexpected error %s:%s)', self.__delivery_number+1, e.__class__.__name__, repr(e)) except AssertionError as e: logwarn(LOGGER, 'Cannot publish message to RabbitMQ %i because of AssertionError: "%s"', self.__delivery_number+1,e) if 'A non-string value was supplied for self.exchange' in repr(e): exch = self.thread.get_exchange_name() logwarn(LOGGER, 'Exchange was "%s" (type %s)', exch, type(exch))
def finish_gently(self): try: return self.__finish_gently() except Exception as e: logwarn(LOGGER, 'Error in shutter.finish_gently(): %s: %s', e.__class__.__name__, e.message) raise e
def set_next_host(self): if len(self.__trusted_nodes) > 0: self.__current_node = self.__get_highest_priority_node( self.__trusted_nodes) logdebug(LOGGER, 'Selected a trusted node: %s', self.__current_node['host']) elif len(self.__open_nodes) > 0: self.__current_node = self.__get_highest_priority_node( self.__open_nodes) logdebug(LOGGER, 'Selected an open node: %s', self.__current_node['host']) else: if self.__current_node is None: logwarn( LOGGER, 'Unexpected: No RabbitMQ node left to try, and there is no current one.' ) raise esgfpid.exceptions.ArgumentError( 'No RabbitMQ nodes were passed at all.') logwarn(LOGGER, 'No RabbitMQ node left to try! Leaving the last one: %s', self.__current_node['host']) self.__exchange_name = self.__current_node['exchange_name']
def __send_a_message(self, message): if self.__statemachine.is_WAITING_TO_BE_AVAILABLE(): self.__log_receival_one_message(message) self.__put_one_message_into_queue_of_unsent_messages(message) elif self.__statemachine.is_AVAILABLE(): self.__log_receival_one_message(message) self.__put_one_message_into_queue_of_unsent_messages(message) self.__trigger_one_publish_action() elif self.__statemachine.is_AVAILABLE_BUT_WANTS_TO_STOP( ) or self.__statemachine.is_PERMANENTLY_UNAVAILABLE(): errormsg = 'Accepting no more messages' logdebug(LOGGER, errormsg + ' (dropping %s).', message) logwarn( LOGGER, 'RabbitMQ module was closed and does not accept any more messages. Dropping message. Reason: %s', self.__statemachine.get_reason_shutdown()) # Note: This may happen if the connection failed. We may not stop # the publisher in this case, so we do not raise an exception. # We only raise an exception if the closing was asked by the publisher! if self.__statemachine.get_detail_closed_by_publisher(): raise OperationNotAllowed(errormsg) elif self.__statemachine.is_NOT_STARTED_YET(): errormsg( 'Cannot send a message, the messaging thread was not started yet!' ) logwarn(LOGGER, errormsg + ' (dropping %s).', message) raise OperationNotAllowed(errormsg)
def __try_publishing_otherwise_put_back_to_stack(self, message): try: # Getting message info: properties = self.nodemanager.get_properties_for_message_publications() routing_key, msg_string = rabbitutils.get_routing_key_and_string_message_from_message_if_possible(message) routing_key = self.nodemanager.adapt_routing_key_for_untrusted(routing_key) # Logging logtrace(LOGGER, 'Publishing message %i (key %s) (body %s)...', self.__delivery_number+1, routing_key, msg_string) # +1 because it will be incremented after the publish. log_every_x_times(LOGGER, self.__logcounter_trigger, self.__LOGFREQUENCY, 'Trying actual publish... (trigger no. %i).', self.__logcounter_trigger) logtrace(LOGGER, '(Publish to channel no. %i).', self.thread._channel.channel_number) # Actual publish to exchange self.thread._channel.basic_publish( exchange=self.thread.get_exchange_name(), routing_key=routing_key, body=msg_string, properties=properties, mandatory=defaults.RABBIT_MANDATORY_DELIVERY ) return True # If anything went wrong, put it back into the stack of # unpublished messages before re-raising the exception # for further handling: except Exception as e: success = False logwarn(LOGGER, 'Message was not published. Putting back to queue. Reason: %s: "%s"',e.__class__.__name__, repr(e)) self.thread.put_one_message_into_queue_of_unsent_messages(message) logtrace(LOGGER, 'Now (after putting back) left in queue to be published: %i messages.', self.thread.get_num_unpublished()) raise e
def force_finish(self): try: return self.__force_finish( 'Forced finish from outside the thread.') except Exception as e: logwarn(LOGGER, 'Error in shutter.force_finish(): %s: %s', e.__class__.__name__, e.message) raise e
def __react_on_nack(self, deliv_tag, multiple): if multiple: logwarn(LOGGER, 'Received "NACK" for delivery tag: %i and below.', deliv_tag) self.__nack_delivery_tag_and_message_several(deliv_tag) else: logwarn(LOGGER, 'Received "NACK" for delivery tag: %i.', deliv_tag) self.__nack_delivery_tag_and_message_single(deliv_tag)
def __check_if_adding_files_allowed_right_now(self): dataset_added = self.__machine_state == self.__machine_states['dataset_added'] files_added = self.__machine_state == self.__machine_states['files_added'] if dataset_added or files_added: pass else: msg = 'Too late to add files!' logwarn(LOGGER, msg) raise esgfpid.exceptions.OperationUnsupportedException(msg)
def __inform_about_state_at_shutdown(self): unsent = self.thread.get_num_unpublished() unconfirmed = self.thread.get_num_unconfirmed() if unsent + unconfirmed > 0: logwarn(LOGGER, 'At close down: %i pending messages (%i unpublished messages, %i unconfirmed messages).', (unsent+unconfirmed), unsent, unconfirmed) else: loginfo(LOGGER, 'After close down: All messages were published and confirmed.')
def __rescue_unconfirmed_messages(self): if self.__thread.is_alive(): logwarn(LOGGER, 'Cannot retrieve unconfirmed messages while thread still alive.') else: self.__leftovers_unconfirmed = self.__get_unconfirmed_messages_as_list() num = len(self.__leftovers_unconfirmed) if num > 0: logdebug(LOGGER, 'Rescued %i unconfirmed messages.', num) else: logdebug(LOGGER, 'No unconfirmed messages to rescue.')
def __rescue_nacked_messages(self): if self.__thread.is_alive(): logwarn(LOGGER, 'Cannot retrieve rejected (NACKed) messages while thread still alive.') else: self.__leftovers_nacked = self.__get_nacked_messages_as_list() num = len(self.__leftovers_nacked) if num > 0: logdebug(LOGGER, 'Rescued %i rejected (NACKed) messages.', num) else: logdebug(LOGGER, 'No rejected (NACKed) messages to rescue.')
def create_publication_assistant(self, **args): ''' Create an assistant for a dataset that allows to make PID requests for the dataset and all of its files. :param drs_id: Mandatory. The dataset id of the dataset to be published. :param version_number: Mandatory. The version number of the dataset to be published. :param is_replica: Mandatory. Flag to indicate whether the dataset is a replica. .. note:: If the replica flag is set to False, the publication may still be considered a replica by the consuming servlet, namely if the dataset was already published at a different host. For this, please refer to the consumer documentation. :return: A publication assistant which provides all necessary methods to publish a dataset and its files. ''' # Check args logdebug(LOGGER, 'Creating publication assistant..') mandatory_args = ['drs_id', 'version_number', 'is_replica'] esgfpid.utils.check_presence_of_mandatory_args(args, mandatory_args) # Check if service path is given if self.__thredds_service_path is None: msg = 'No thredds_service_path given (but it is mandatory for publication)' logwarn(LOGGER, msg) raise esgfpid.exceptions.ArgumentError(msg) # Check if data node is given if self.__data_node is None: msg = 'No data_node given (but it is mandatory for publication)' logwarn(LOGGER, msg) raise esgfpid.exceptions.ArgumentError(msg) # Check if solr has access: if self.__coupler.is_solr_switched_off(): pass # solr access not mandatory anymore # Create publication assistant assistant = esgfpid.assistant.publish.DatasetPublicationAssistant( drs_id=args['drs_id'], version_number=args['version_number'], thredds_service_path=self.__thredds_service_path, data_node=self.__data_node, prefix=self.prefix, coupler=self.__coupler, is_replica=args['is_replica'], consumer_solr_url=self.__consumer_solr_url # may be None ) logdebug(LOGGER, 'Creating publication assistant.. done') return assistant
def __check_if_dataset_publication_allowed_right_now(self): if not self.__machine_state == self.__machine_states['files_added']: msg = None if self.__machine_state == self.__machine_states['dataset_added']: msg = 'No file added yet.' else: msg = 'Publication was already done.' logwarn(LOGGER, msg) raise esgfpid.exceptions.OperationUnsupportedException(msg)
def force_finish_rabbit_thread(self): logdebug(LOGGER, 'Force finishing...') # Make sure no more messages are accepted from publisher # while confirms are still accepted: self.__statemachine.detail_asked_to_force_close_by_publisher = True self.__statemachine.set_to_wanting_to_stop() # Asking the thread to finish: logwarn(LOGGER, 'Forced close down of message sending module. Will not wait for pending messages, if any.') self.__thread.add_event_force_finish() logdebug(LOGGER, 'Force finishing... done') self.__join_and_rescue()
def unpublish_one_version(self, **args): ''' Sends a PID update request for the unpublication of one version of a dataset currently published at the given data node. Either the handle or the pair of drs_id and version_number have to be provided, otherwise an exception will occur. The consumer will of course check the PID request message's timestamp with the timestamp of the last publication, so that republications in the mean time are not unpublished. The unpublication of the files is included in this method. :param handle: Optional. The handle of the dataset to be unpublished. :param drs_id: Optional. The dataset id of the dataset to be unpublished. :param version_number: Optional. The version number of the dataset to be unpublished. :raises: ArgumentError: If not enough arguments are passed to identify the dataset, or if no data node was specified during library init. ''' # Check args optional_args = ['handle', 'drs_id', 'version_number'] esgfpid.utils.add_missing_optional_args_with_value_none(args, optional_args) # Check if data node is given if self.__data_node is None: msg = 'No data_node given (but it is mandatory for unpublication)' logwarn(LOGGER, msg) raise esgfpid.exceptions.ArgumentError(msg) # Unpublish assistant = esgfpid.assistant.unpublish.AssistantOneVersion( drs_id = args['drs_id'], data_node = self.__data_node, prefix=self.prefix, coupler=self.__coupler, message_timestamp=esgfpid.utils.get_now_utc_as_formatted_string() ) assistant.unpublish_one_dataset_version( handle = args['handle'], version_number = args['version_number'] )
def set_next_host(self): if len(self.__trusted_nodes) > 0: self.__current_node = self.__get_highest_priority_node(self.__trusted_nodes) logdebug(LOGGER, 'Selected a trusted node: %s', self.__current_node['host']) elif len(self.__open_nodes) > 0: self.__current_node = self.__get_highest_priority_node(self.__open_nodes) logdebug(LOGGER, 'Selected an open node: %s', self.__current_node['host']) else: logwarn(LOGGER, 'No RabbitMQ node left to try! Leaving the last one: %s', self.__current_node['host']) self.__exchange_name = self.__current_node['exchange_name']
def on_delivery_confirmation(self, method_frame): deliv_tag, confirmation_type, multiple = self.__get_confirm_info( method_frame) log_every_x_times(LOGGER, self.__logcounter, self.__LOGFREQUENCY, 'Received a confirm (%s)', confirmation_type) self.__logcounter += 1 if confirmation_type == 'ack': logtrace(LOGGER, 'Received "ACK" from messaging service.') self.__react_on_ack(deliv_tag, multiple) elif confirmation_type == 'nack': logtrace(LOGGER, 'Received "NACK" from messaging service.') self.__react_on_nack(deliv_tag, multiple) else: msg = 'Received asynchronous response of unknown type from messaging service.' logwarn(LOGGER, msg) raise UnknownServerResponse(msg + ':' + str(method_frame))
def __log_why_cannot_feed_the_rabbit_now(self): log_every_x_times( LOGGER, self.__logcounter_trigger, self.__LOGFREQUENCY, 'Cannot publish message to RabbitMQ (trigger no. %i).', self.__logcounter_trigger) if self.statemachine.is_WAITING_TO_BE_AVAILABLE(): logdebug( LOGGER, 'Cannot publish message to RabbitMQ yet, as the connection is not ready.' ) elif self.statemachine.is_NOT_STARTED_YET(): logerror( LOGGER, 'Cannot publish message to RabbitMQ, as the thread is not running yet.' ) elif self.statemachine.is_PERMANENTLY_UNAVAILABLE( ) or self.statemachine.is_FORCE_FINISHED(): if self.statemachine.detail_could_not_connect: logtrace( LOGGER, 'Could not publish message to RabbitMQ, as the connection failed.' ) if self.__have_not_warned_about_connection_fail_yet: logwarn( LOGGER, 'Could not publish message(s) to RabbitMQ. The connection failed definitively.' ) self.__have_not_warned_about_connection_fail_yet = False elif self.statemachine.get_detail_closed_by_publisher(): logtrace( LOGGER, 'Cannot publish message to RabbitMQ, as the connection was closed by the user.' ) if self.__have_not_warned_about_force_close_yet: logwarn( LOGGER, 'Could not publish message(s) to RabbitMQ. The sender was closed by the user.' ) self.__have_not_warned_about_force_close_yet = False else: if self.thread._channel is None: logerror( LOGGER, 'Very unexpected. Could not publish message(s) to RabbitMQ. There is no channel.' )
def __is_this_node_in_last_prio_already(self, where_to_look): try: list_candidates = where_to_look[LAST_PRIO] except KeyError as e: errmsg = 'No node of last prio (%s) exists.' % LAST_PRIO logwarn(LOGGER, errmsg) return False for i in range(len(list_candidates)): candidate = list_candidates[i] if self.__compare_nodes(candidate, self.__current_node): logtrace( LOGGER, 'Found current node in archive (in list of last-prio nodes).' ) return True return False
def unpublish_all_versions(self, **args): ''' Sends a PID update request for the unpublication of all versions of a dataset currently published at the given data node. If the library has solr access, it will try to find all the dataset versions and their handles from solr, and send individual messages for each version. Otherwise, one message is sent, and the queue consuming servlet has to identify the relevant versions, also making sure not to unpublish any versions that may have been republished in the meantime. :param drs_id: Dataset id of the dataset to be unpublished. :raises: ArgumentError: If the data node was not provided at library initialization. ''' # Check args mandatory_args = ['drs_id'] esgfpid.utils.check_presence_of_mandatory_args(args, mandatory_args) # Check if data node is given if self.__data_node is None: msg = 'No data_node given (but it is mandatory for publication)' logwarn(LOGGER, msg) raise esgfpid.exceptions.ArgumentError(msg) # Check if solr has access: if self.__coupler.is_solr_switched_off(): msg = 'Unpublication of all versions. Without solr access, we cannot identify the versions, so the consumer will have to take care of this.' logdebug(LOGGER, msg) #raise esgfpid.exceptions.ArgumentError('No solr access. Solr access is needed for publication. Please provide access to a solr index when initializing the library') # Unpublish assistant = esgfpid.assistant.unpublish.AssistantAllVersions( drs_id = args['drs_id'], data_node = self.__data_node, prefix=self.prefix, coupler=self.__coupler, message_timestamp=esgfpid.utils.get_now_utc_as_formatted_string(), consumer_solr_url = self.__consumer_solr_url # may be None ) assistant.unpublish_all_dataset_versions()
def __check_data_consistency(self, ignore_exception): checker = esgfpid.assistant.consistency.Checker( coupler=self.__coupler, drs_id=self.__drs_id, version_number=self.__version_number, data_node=self.__data_node ) check_possible = checker.can_run_check() if check_possible: check_passed = checker.data_consistency_check(self.__list_of_file_handles) if check_passed: loginfo(LOGGER, 'Data consistency check passed for dataset %s.', self.__dataset_handle) else: msg = 'Dataset consistency check failed' logwarn(LOGGER, msg) if not ignore_exception: raise esgfpid.exceptions.InconsistentFilesetException(msg) else: logdebug(LOGGER, 'No consistency check was carried out.')
def __send_many_messages(self, messages): if self.__statemachine.is_WAITING_TO_BE_AVAILABLE(): self.__log_receival_many_messages(messages) self.__put_all_messages_into_queue_of_unsent_messages(messages) elif self.__statemachine.is_AVAILABLE(): self.__log_receival_many_messages(messages) self.__put_all_messages_into_queue_of_unsent_messages(messages) self.__trigger_n_publish_actions(len(messages)) elif self.__statemachine.is_AVAILABLE_BUT_WANTS_TO_STOP() or self.__statemachine.is_PERMANENTLY_UNAVAILABLE() or self.__statemachine.is_FORCE_FINISHED(): errormsg = 'Accepting no more messages' logwarn(LOGGER, errormsg+' (dropping %i messages).', len(messages)) if self.__statemachine.get_detail_closed_by_publisher(): raise OperationNotAllowed(errormsg) elif self.__statemachine.is_NOT_STARTED_YET(): errormsg = 'Cannot send any messages, the messaging thread was not started yet!' logwarn(LOGGER, errormsg+' (dropping %i messages).', len(messages)) raise OperationNotAllowed(errormsg)
def set_priority_low_for_current(self): # We do not change the priority stored ass attribute in the # dicts, BUT we change the priority under which it is stored in # the list of nodes to be used. # Deal with open or trusted node: if self.__current_node['is_open']: where_to_look = self.__open_nodes_archive else: where_to_look = self.__trusted_nodes_archive # Go over all nodes of the current prio to find the # current one, then move it to a different prio: moved = False try: current_prio = self.__current_node['priority'] moved = self.__move_to_last_prio(current_prio, where_to_look) if moved: return # changed successfully! except KeyError as e: logsafe = make_logsafe(where_to_look) errmsg = 'No node of prio %s found. Nodes: %s.' % (current_prio, logsafe) logwarn(LOGGER, errmsg) # The node had already been added to the last-prio nodes ?! last_already = self.__is_this_node_in_last_prio_already( where_to_look) if last_already: logdebug(LOGGER, 'Node already had lowest priority.') return # nothing to change! # This is extremely unlikely - in fact I don't see how it could occur: if (not moved) and (not last_already): errmsg = 'Could not find this node\'s priority (%s), nor the last-priority (%s). Somehow this node\'s priority was changed weirdly.' % ( current_prio, LAST_PRIO) logwarn(LOGGER, errmsg) logsafe = make_logsafe(where_to_look) logwarn(LOGGER, 'All nodes: %s' % logsafe) # No matter where the node is stored, move it to "last" prio: for prio, nodes in where_to_look.items(): logtrace(LOGGER, 'Looking in prio "%s"...' % prio) moved = self.__move_to_last_prio(prio, where_to_look) if moved: return # changed successfully! errmsg = 'Node definitely not found, cannot change prio.' logwarn(LOGGER, errmsg) raise ValueError(errmsg)
def __close_because_no_point_in_waiting(self): # Logging, depending on why we closed... logdebug( LOGGER, 'Gentle finish (iteration %i): Closing, as there is no point in waiting any longer.', self.__close_decision_iterations) if self.statemachine.get_detail_closed_by_publisher(): logwarn( LOGGER, 'Not waiting for pending messages: No connection to server (previously closed by user).' ) elif self.statemachine.detail_could_not_connect: logwarn( LOGGER, 'Not waiting for pending messages: No connection to server (unable to connect).' ) else: logwarn( LOGGER, 'Not waiting for pending messages: No connection to server (unsure why).' ) # Actual close self.__force_finish( 'Force finish as we are not sending the messages anyway.') self.__tell_publisher_to_stop_waiting_for_gentle_finish()
def __logwarn(self, msg): if self.__print_errors_to_console == True: print(msg) utils.logwarn(LOGGER, msg)
def open_rabbit_connection(self): continue_connecting = True while continue_connecting: success = self.__try_connecting_to_next() if success: continue_connecting = False else: # Log failure: oldhost = self.__nodemanager.get_connection_parameters().host time_passed = datetime.datetime.now( ) - self.__start_connect_time loginfo( LOGGER, 'Failed connection to RabbitMQ at %s after %s seconds.', oldhost, time_passed.total_seconds()) # If there is alternative URLs, try one of them: if self.__nodemanager.has_more_urls(): logdebug( LOGGER, 'Connection failure: %s fallback URLs left to try.', self.__nodemanager.get_num_left_urls()) self.__nodemanager.set_next_host() newhost = self.__nodemanager.get_connection_parameters( ).host loginfo(LOGGER, 'Next connection attempt (now) %s.', newhost) # If there is no URLs, reset the node manager to # start at the first nodes again... else: self.__reconnect_counter += 1 if self.__reconnect_counter <= self.__max_reconnection_tries: reopen_seconds = self.__wait_seconds_before_reconnect logdebug( LOGGER, 'Connection failure: Failed connecting to all hosts. Waiting %s seconds and starting over.', reopen_seconds) self.__nodemanager.reset_nodes() newhost = self.__nodemanager.get_connection_parameters( ).host loginfo( LOGGER, 'Next connection attempt (in %s seconds) to %s.', reopen_seconds, newhost) time.sleep(reopen_seconds) # Give up after so many tries... else: continue_connecting = False errormsg = ( 'Permanently failed to connect to RabbitMQ. Tried all hosts %s %s times. Giving up. No PID requests will be sent.' % (list(self.__all_hosts_that_were_tried), self.__max_reconnection_tries)) logerror(LOGGER, errormsg) collected_errors = ' - '.join( self.__error_messages_during_init) logwarn( LOGGER, 'No connection possible. Errors: %s' % collected_errors) raise PIDServerException(errormsg)