def postprocess_job(klass, job, state): if job.uuid in klass.current_footprints: del klass.current_footprints[job.uuid] if state == 'failed': transaction_id, platefile_id = klass.get_plate_info(job.output) if transaction_id and platefile_id: idx_transaction_failed = { 'platefile_id': platefile_id, 'transaction_id': transaction_id } request = { 'sequence_number': 0, 'requestor': '', 'method': 'IndexTransactionFailed', 'payload': protocols.pack(protobuf.IndexTransactionFailed, idx_transaction_failed), } msg = Message( protocols.pack(protobuf.BroxtonRequestWrapper, request)) klass.messagebus.basic_publish(msg, exchange='ngt.platefile.index', routing_key='index') logger.debug("Message published: " + str(msg)) return job
def postprocess(self): if self.job.uuid in self.current_footprints: del self.current_footprints[self.job.uuid] if self.job.status == 'failed': transaction_id, platefile_id = self._get_plate_info( self.job.output) if transaction_id and platefile_id: idx_transaction_failed = { 'platefile_id': platefile_id, 'transaction_id': transaction_id } request = { 'sequence_number': 0, 'requestor': '', 'method': 'TransactionFailed', 'payload': protocols.pack(protobuf.IndexTransactionFailed, idx_transaction_failed), } msg = Message( protocols.pack(protobuf.BroxtonRequestWrapper, request)) self.messagebus.basic_publish(msg, exchange='ngt.platefile.index_0', routing_key='index') return self.job
def register_reaper(msgbytes): # TODO: Handle the corner case where a reaper has been expired or soft-deleted, and tries to register itself again. # Currently this would result in a ProgrammerError from psycopg request = protocols.unpack(protobuf.ReaperRegistrationRequest, msgbytes) logger.info("Got registration request from reaper %s" % request.reaper_uuid) thread_database.enqueue(_register_reaper, request) return protocols.pack(protobuf.AckResponse, {'ack': protobuf.AckResponse.ACK})
def job_ended(msgbytes): '''Update job record with properties defined at job end time ()''' request = protocols.unpack(protobuf.ReaperJobEndRequest, msgbytes) logger.info("Job %s ended: %s" % (request.job_id[:8], request.state)) d_logger.debug("Request end: %s (%s)" % (request.job_id[:8], request.state) ) #time.sleep(0.10) thread_database.enqueue(_job_ended, request) return protocols.pack(protobuf.AckResponse, {'ack': protobuf.AckResponse.ACK})
def postprocess(self): if self.job.uuid in self.current_footprints: del self.current_footprints[self.job.uuid] if self.job.status == 'failed': transaction_id, platefile_id = self._get_plate_info(self.job.output) if transaction_id and platefile_id: idx_transaction_failed = { 'platefile_id': platefile_id, 'transaction_id': transaction_id } request = { 'sequence_number': 0, 'requestor': '', 'method': 'TransactionFailed', 'payload': protocols.pack(protobuf.IndexTransactionFailed, idx_transaction_failed), } msg = Message(protocols.pack(protobuf.BroxtonRequestWrapper, request)) self.messagebus.basic_publish(msg, exchange='ngt.platefile.index_0', routing_key='index') return self.job
def job_started(msgbytes): '''Update the Job record to with properties defined at job start (pid, start_time,...)''' request = protocols.unpack(protobuf.ReaperJobStartRequest, msgbytes) logger.debug("Received job start message: %s" % str(request)) d_logger.debug("Request start: %s" % request.job_id[:8]) # add request to the database queue thread_database.enqueue(_job_started, request) resp = {'ack': protobuf.AckResponse.ACK} logger.debug("Response to send: " + str(resp)) return protocols.pack(protobuf.AckResponse, resp)
def postprocess_job(klass, job, state): if job.uuid in klass.current_footprints: del klass.current_footprints[job.uuid] if state == 'failed': transaction_id, platefile_id = klass.get_plate_info(job.output) if transaction_id and platefile_id: idx_transaction_failed = { 'platefile_id': platefile_id, 'transaction_id': transaction_id } request = { 'sequence_number': 0, 'requestor': '', 'method': 'IndexTransactionFailed', 'payload': protocols.pack(protobuf.IndexTransactionFailed, idx_transaction_failed), } msg = Message(protocols.pack(protobuf.BroxtonRequestWrapper, request)) klass.messagebus.basic_publish(msg, exchange='ngt.platefile.index', routing_key='index') logger.debug("Message published: " + str(msg)) return job
def get_next_job(msgbytes): global enqueued_job_count t0 = time.time() logger.debug("Looking for the next job.") request = protocols.unpack(protobuf.ReaperJobRequest, msgbytes) try: job = job_buffer.next() except Queue.Empty: job = None logger.info("Job buffer empty.") return protocols.pack(protobuf.ReaperJobResponse,{'job_available': False}) assert job t1 = time.time() logger.debug("Fetched a job in %s sec." % str(t1-t0)) job = preprocess_job(job) response = { 'job_available' : True, 'uuid' : job.uuid, 'command' : job.command, 'args' : job.arguments or [], } logger.info("Sending job %s to reaper %s (%s)" % (job.uuid[:8], request.reaper_uuid[:8], str(time.time() - t0))) job.status = "enqueued" job.processor = request.reaper_uuid #dblock.acquire() #job.save() thread_database.enqueue(_save_job, job) job = None #dblock.release() if options.show_queries: # print the slowest queries from django.db import connection from pprint import pprint pprint([q for q in connection.queries if float(q['time']) > 0.001]) enqueued_job_count += 1 d_logger.debug("Dispatching %s" % response['uuid'][:8]) return protocols.pack(protobuf.ReaperJobResponse, response)
def _job_ended(request): logger.info("Setting job %s to %s" % (request.job_id[:8], request.state)) d_logger.debug("Commit end: %s (%s)" % (request.job_id[:8], request.state)) dblock.acquire() try: job = Job.objects.get(uuid=request.job_id) except Job.DoesNotExist: job_does_not_exist(request.job_id) raise #return protocols.pack(protobuf.AckResponse, {'ack': protobuf.AckResponse.NOACK}) # assert job.status == 'processing' job.status = request.state job.time_ended = request.end_time.replace('T',' ') # django DateTimeField should be able to parse it this way. (pyiso8601 would be the alternative). job.output = request.output job = postprocess_job(job) job.save() try: reaper = Reaper.objects.filter(deleted=False, expired=False).get(uuid=job.processor) reaper.jobcount += 1 reaper.current_job_id = None reaper.save() except Reaper.DoesNotExist: # <shrug> Log a warning. Re-register. logger.warning("A job ended that was assigned to an unregistered reaper %s. Probably not good. Reaper will be reregistered." % request.reaper_id) register_reaper(protocols.pack(protobuf.ReaperRegistrationRequest, {'reaper_uuid': job.processor})) if request.state == 'complete' and job.creates_new_asset: try: job.spawn_output_asset() except: logger.error("ASSET CREATION FAILED FOR JOB %s" % job.uuid) sys.excepthook(*sys.exc_info()) job.status = "asset_creation_fail" job.save() dblock.release()
def command_handler(msg): """ Unpack a message and process commands Speaks the RpcRequest protocol. """ global command_request_count command_request_count += 1 mb.basic_ack(msg.delivery_tag) #cmd = protocols.unpack(protocols.Command, msg.body) request = protocols.unpack(protobuf.RpcRequestWrapper, msg.body) logger.debug("command_handler got a command: %s" % str(request.method)) response = dotdict() response.sequence_number = request.sequence_number if request.method in command_map: t0 = datetime.now() try: response.payload = globals()[command_map[request.method]](request.payload) response.error = False except Exception as e: logger.error("Error in command '%s': %s %s" % (request.method, type(e), str(e.args))) # TODO: send a stack trace. traceback.print_tb(sys.exc_info()[2]) # traceback response.payload = '' response.error = True response.error_string = str(e) or '' t1 = datetime.now() logger.info("COMMAND %s finished in %s." % (request.method, str(t1-t0))) else: logger.error("Invalid Command: %s" % request.method) response.payload = '' response.error = True response.error_string = "Invalid Command: %s" % request.method #mb.basic_ack(msg.delivery_tag) response_bytes = protocols.pack(protobuf.RpcResponseWrapper, response) mb.basic_publish(Message(response_bytes), routing_key=request.requestor)
def reaper_does_not_exist(reaper_uuid): # <shrug> Log a warning, register the reaper logger.warning("Dispatch received a status message from unregistered reaper %s. Reaper record will be created." % request['reaper_id']) register_reaper(protocols.pack(protobuf.ReaperRegistrationRequest, {'reaper_uuid':reaper_uuid, 'reaper_type':'generic'}))
def unregister_reaper(msgbytes): request = protocols.unpack(protobuf.ReaperUnregistrationRequest, msgbytes) thread_database.enqueue(_unregister_reaper, request) return protocols.pack(protobuf.AckResponse, {'ack': protobuf.AckResponse.ACK})
def CallMethod(self, method_descriptor, rpc_controller, request, response_class, done): """Calls the method identified by the descriptor. Call the given method of the remote service. The signature of this procedure looks the same as Service.CallMethod(), but the requirements are less strict in one important way: the request object doesn't have to be of any specific class as long as its descriptor is method.input_type. """ rpc_controller.Reset() self.sync_sequence_number += 1 wrapped_request_bytes = protocols.pack(protocols.RpcRequestWrapper, { 'requestor': self.response_queue, 'method': method_descriptor.name, 'payload': request.SerializeToString(), 'sequence_number': self.sync_sequence_number } ) #print ' '.join([hex(ord(c))[2:] for c in request.SerializeToString()]) #print ' '.join([hex(ord(c))[2:] for c in request_wrapper]) retries = 0 while True: # begin retry loop if self.max_retries > -1 and retries > self.max_retries: rpc_controller.SetFailed("Too many retries. (Max was %d)" % self.max_retries) #if done: # done(None) # raise RPCFailure("Too many retries") return None # Still not too sure about this whole return None on failure business if retries > 0: logger.info("Retrying (%d)." % retries) logger.debug("About to publish to exchange '%s' with key '%s'" % (self.exchange, self.request_routing_key)) self.messagebus.basic_publish(amqp.Message(wrapped_request_bytes), exchange=self.exchange, routing_key=self.request_routing_key) # Wait for a response logger.debug("Waiting for a response on queue '%s'" % self.response_queue) timeout_flag = False sync_ok = False t0 = time.time() # begin sync loop while not sync_ok: # begin response loop response = None while not response: delta_t = time.time() - t0 if self.timeout_ms >= 0 and delta_t * 1000.0 > self.timeout_ms: timeout_flag = True break response = self.messagebus.basic_get(self.response_queue, no_ack=True) # returns a message or None if not response: time.sleep(self.polling_interval) # polling interval # end response loop #self.messagebus.basic_ack(response.delivery_tag) if timeout_flag: logger.warning("RPC method '%s' timed out," % method_descriptor.name) retries += 1 break # from the sync loop out to retry loop. resets timer logger.info("Got a response in %s secs" % str(time.time() - t0)) response_wrapper = protocols.unpack(protocols.RpcResponseWrapper, response.body) if response_wrapper.sequence_number == self.sync_sequence_number: logger.debug("Sync OK!") sync_ok = True break # from the sync loop else: sync_delta = self.sync_sequence_number - response_wrapper.sequence_number logger.warning("Message sync error. Sync delta: %d" % sync_delta) logger.debug("Expected %d but got %d" % (self.sync_sequence_number, response_wrapper.sequence_number)) if sync_delta > 0: logger.warning("Trying to catch up.") t0 = time.time() # reset the timeout clock continue # to "while not sync_ok" elif sync_delta < 0: logger.error("The message queue is ahead of us! Purging.") purged = self.messagebus.queue_purge(queue=self.response_queue) # clear the response queue and try again logger.error("Purged %d messages from %s" % (purged, self.response_queue)) time.sleep(0.1) retries += 1 break #end sync loop if timeout_flag: continue # jump to the top of the retry loop if sync_ok: break # from the retry loop if response_wrapper.error: logger.error("RPC response error: %s" % response_wrapper.error_string) rpc_controller.SetFailed(response_wrapper.error) #if done: # done(None) raise RPCFailure("RPC response error: %s" % response_wrapper.error_string) response = protocols.unpack(response_class, response_wrapper.payload) logger.debug("Response is: %s" % str(response)) if done: done(response) return response