def __map_to_downstream(self, mapped_step_name, handler_name, mapped_steps_gen, workflow, invocation, message_parameters): """A mapping step has completed and has mapped into one or more downstream steps. Queue the downstream steps to be handled and tracked. """ assert invocation.mapped_count is None assert invocation.mapped_waiting is None mapped_step = mr.models.kv.step.get(workflow, mapped_step_name) # This has to be an integer just in case one of the downstream steps # completes before we finish our accounting, here. invocation.mapped_waiting = 0 invocation.save() i = 0 for (k, v) in mapped_steps_gen: _logger.debug("Queueing mapping (%d) from invocation [%s].", i, invocation) self.__queue_map_step( mapped_step, (k, v), message_parameters) i += 1 step_count = i # Now, update the number of mapped steps into the invocation. # # Because we either decrement or add maximum positive value, and the # value of mapped_waiting will never be glimpsed before decrementing # it, there won't be any chance of a completed step seeing the # mappd_waiting value equal zero more than once (which is our trigger # for a reduction), which will be the very last manipulation it # counters. _logger.debug("Invocation [%s] has mapped (%d) steps.", invocation, step_count) # TODO(dustin): We might need to check for whether a reduction is necessary # here. By the time we get here, we could've potentially finished # all steps, which nothing else checking for (0) waiting-steps. invocation = self.__add_mapped_steps( workflow, invocation, step_count) _logger.debug("Invocation [%s] has had its counts updated: MC=(%d) " "MW=(%d)", invocation, invocation.mapped_count, invocation.mapped_waiting)
def package_request(self, workflow, job, step, handler, arguments, context, is_blocking=False): """Prepare an incoming request to be processed.""" invocation = mr.models.kv.invocation.Invocation( invocation_id=None, workflow_name=workflow.workflow_name, step_name=step.step_name, direction=mr.constants.D_MAP) invocation.save() _flow_logger.debug( "+ Writing ARGUMENTS dataset for root invocation: " "[%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) for (k, v) in arguments: data = { 'p': (k, v), } dq.add(data) request = mr.models.kv.request.Request( request_id=None, workflow_name=workflow.workflow_name, job_name=job.job_name, invocation_id=invocation.invocation_id, context=context, is_blocking=is_blocking) request.save() _logger.debug("Received request: [%s]", request) message_parameters = mr.shared_types.QUEUE_MESSAGE_PARAMETERS_CLS( workflow=workflow, invocation=invocation, request=request, job=job, step=step, handler=handler) return message_parameters
def __map_to_downstream(self, mapped_step_name, handler_name, mapped_steps_gen, workflow, invocation, message_parameters): """A mapping step has completed and has mapped into one or more downstream steps. Queue the downstream steps to be handled and tracked. """ assert invocation.mapped_count is None assert invocation.mapped_waiting is None mapped_step = mr.models.kv.step.get(workflow, mapped_step_name) # This has to be an integer just in case one of the downstream steps # completes before we finish our accounting, here. invocation.mapped_waiting = 0 invocation.save() i = 0 for (k, v) in mapped_steps_gen: _logger.debug("Queueing mapping (%d) from invocation [%s].", i, invocation) self.__queue_map_step(mapped_step, (k, v), message_parameters) i += 1 step_count = i # Now, update the number of mapped steps into the invocation. # # Because we either decrement or add maximum positive value, and the # value of mapped_waiting will never be glimpsed before decrementing # it, there won't be any chance of a completed step seeing the # mappd_waiting value equal zero more than once (which is our trigger # for a reduction), which will be the very last manipulation it # counters. _logger.debug("Invocation [%s] has mapped (%d) steps.", invocation, step_count) # TODO(dustin): We might need to check for whether a reduction is necessary # here. By the time we get here, we could've potentially finished # all steps, which nothing else checking for (0) waiting-steps. invocation = self.__add_mapped_steps(workflow, invocation, step_count) _logger.debug( "Invocation [%s] has had its counts updated: MC=(%d) " "MW=(%d)", invocation, invocation.mapped_count, invocation.mapped_waiting)
def package_request(self, workflow, job, step, handler, arguments, context): """Prepare an incoming request to be processed.""" invocation = mr.models.kv.invocation.Invocation( invocation_id=None, workflow_name=workflow.workflow_name, step_name=step.step_name, direction=mr.constants.D_MAP) invocation.save() _flow_logger.debug("+ Writing ARGUMENTS dataset for root invocation: " "[%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) for (k, v) in arguments: data = { 'p': (k, v), } dq.add(data) request = mr.models.kv.request.Request( request_id=None, workflow_name=workflow.workflow_name, job_name=job.job_name, invocation_id=invocation.invocation_id, context=context) request.save() _logger.debug("Received request: [%s]", request) message_parameters = mr.shared_types.QUEUE_MESSAGE_PARAMETERS_CLS( workflow=workflow, invocation=invocation, request=request, job=job, step=step, handler=handler) return message_parameters
def handle_map(self, message_parameters): """Handle one dequeued map job.""" request = message_parameters.request step = message_parameters.step invocation = message_parameters.invocation workflow = message_parameters.workflow _logger.debug("Processing MAP: [%s] [%s]", invocation, invocation.created_timestamp) try: ## Call the handler. _flow_logger.debug( " Reading ARGUMENTS dataset for (and from) " "mapper: [%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) # Enumerate the 'p' member of every record. arguments = (d['p'] for d in dq.list_data()) if mr.config.IS_DEBUG is True: arguments = list(arguments) _logger.debug("Sending arguments to mapper:\n%s", pprint.pformat(arguments)) wrapped_arguments = { 'arguments': arguments, } construction_context = mr.handlers.general.HANDLER_CONTEXT_CLS( request=request, invocation=invocation) handler_result_gen = self.__call_handler(construction_context, workflow, step.map_handler_name, wrapped_arguments) path_type = next(handler_result_gen) _logger.debug("Mapper [%s] path-type: [%s]", invocation, path_type.__class__.__name__) assert issubclass(path_type.__class__, mr.handlers.scope.MrConfigure) is True # Manage downstream steps that were mapped to (the handler was a # generator). if issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToMap) is True: self.__map_to_downstream(path_type.next_step_name, step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) elif issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToReturn) is True: self.__map_collect_result(step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) except Exception as e: _logger.exception( "Exception while processing MAP under request: " "%s", request) if issubclass(e.__class__, mr.handlers.general.HandlerException): # TODO(dustin): Finish debugging this. print("MAP ERROR STDOUT >>>>>>>>>>>>>") print(e.stdout) print("MAP ERROR STDERR >>>>>>>>>>>>>") print(e.stderr) print("MAP ERROR <<<<<<<<<<<<<<<<<<<<") invocation.error = traceback.format_exc() invocation.save() # Formally mark the request as failed but finished. In the event # that request-cleanup is disabled, forensics will be intact. request.failed_invocation_id = invocation.invocation_id request.is_done = True request.save() # Send notification. notify = mr.log.get_notify() notify.exception( "Mapper invocation [%s] under request [%s] " "failed. HANDLER=[%s]", invocation.invocation_id, request.request_id, step.map_handler_name) # Schedule the request for destruction. wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request) raise
def handle_map(self, message_parameters): """Handle one dequeued map job.""" request = message_parameters.request step = message_parameters.step invocation = message_parameters.invocation workflow = message_parameters.workflow _logger.debug("Processing MAP: [%s] [%s]", invocation, invocation.created_timestamp) try: ## Call the handler. _flow_logger.debug(" Reading ARGUMENTS dataset for (and from) " "mapper: [%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS ) # Enumerate the 'p' member of every record. arguments = (d["p"] for d in dq.list_data()) if mr.config.IS_DEBUG is True: arguments = list(arguments) _logger.debug("Sending arguments to mapper:\n%s", pprint.pformat(arguments)) wrapped_arguments = {"arguments": arguments} construction_context = mr.handlers.general.HANDLER_CONTEXT_CLS(request=request, invocation=invocation) handler_result_gen = self.__call_handler( construction_context, workflow, step.map_handler_name, wrapped_arguments ) path_type = next(handler_result_gen) _logger.debug("Mapper [%s] path-type: [%s]", invocation, path_type.__class__.__name__) assert issubclass(path_type.__class__, mr.handlers.scope.MrConfigure) is True # Manage downstream steps that were mapped to (the handler was a # generator). if issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToMap) is True: self.__map_to_downstream( path_type.next_step_name, step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters, ) elif issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToReturn) is True: self.__map_collect_result( step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters ) except Exception as e: _logger.exception("Exception while processing MAP under request: " "%s", request) if issubclass(e.__class__, mr.handlers.general.HandlerException): # TODO(dustin): Finish debugging this. print("MAP ERROR STDOUT >>>>>>>>>>>>>") print(e.stdout) print("MAP ERROR STDERR >>>>>>>>>>>>>") print(e.stderr) print("MAP ERROR <<<<<<<<<<<<<<<<<<<<") invocation.error = traceback.format_exc() invocation.save() # Formally mark the request as failed but finished. In the event # that request-cleanup is disabled, forensics will be intact. request.failed_invocation_id = invocation.invocation_id request.is_done = True request.save() # Send notification. notify = mr.log.get_notify() notify.exception( "Mapper invocation [%s] under request [%s] " "failed. HANDLER=[%s]", invocation.invocation_id, request.request_id, step.map_handler_name, ) # Schedule the request for destruction. wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request) raise
def handle_map(self, message_parameters): """Handle one dequeued map job.""" request = message_parameters.request step = message_parameters.step invocation = message_parameters.invocation workflow = message_parameters.workflow _logger.debug("Processing MAP: [%s]", invocation) try: ## Call the handler. _flow_logger.debug(" Reading ARGUMENTS dataset for (and from) " "mapper: [%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) # Enumerate the 'p' member of every record. arguments = (d['p'] for d in dq.list_data()) if mr.config.IS_DEBUG is True: arguments = list(arguments) _logger.debug("Sending arguments to mapper:\n%s", pprint.pformat(arguments)) handler_ctx = self.__get_handler_context(workflow, invocation) wrapped_arguments = { 'arguments': arguments, 'ctx': handler_ctx, } handler_result_gen = self.__call_handler( workflow, step.map_handler_name, wrapped_arguments) path_type = next(handler_result_gen) _logger.debug("Mapper [%s] path-type: [%s]", invocation, path_type.__class__.__name__) assert issubclass( path_type.__class__, mr.handlers.scope.MrConfigure) is True # Manage downstream steps that were mapped to (the handler was a # generator). if issubclass( path_type.__class__, mr.handlers.scope.MrConfigureToMap) is True: self.__map_to_downstream( path_type.next_step_name, step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) elif issubclass( path_type.__class__, mr.handlers.scope.MrConfigureToReturn) is True: self.__map_collect_result( step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) except: _logger.exception("Exception while processing MAP under request: " "%s", request) # TODO(dustin): We might have to remove the chain of invocations, on error. invocation.error = traceback.format_exc() invocation.save() request.failed_invocation_id = invocation.invocation_id request.done = True request.save() raise