def connect_socket(self): reply = None for c in self.controllers: self.logger.debug('Establishing socket connection to %s', c) tmp_sock = self.context.socket(zmq.REQ) tmp_sock.setsockopt(zmq.RCVTIMEO, 2000) tmp_sock.setsockopt(zmq.LINGER, 0) tmp_sock.identity = self.identity tmp_sock.connect(c) # first ping the controller to see if it responds at all msg = RPCMessage({'payload': 'ping'}) tmp_sock.send_json(msg) try: reply = msg_factory(tmp_sock.recv_json()) self.address = c break except: logging.exception("Unable to connect to %s", c) continue if reply: # Now set the timeout to the actual requested self.logger.debug( "Connection OK, setting network timeout to %s milliseconds", self.timeout * 1000) self.controller = tmp_sock self.controller.setsockopt(zmq.RCVTIMEO, self.timeout * 1000) else: raise Exception('No controller connection')
def __getattr__(self, name): def _rpc(*args, **kwargs): self.logger.debug('Call %s on %s' % (name, self.address)) start_time = time.time() params = {} if args: params['args'] = args if kwargs: params['kwargs'] = kwargs # We do not want string args to be converted into unicode by the JSON machinery # bquery ctable does not like col names to be unicode for example msg = RPCMessage({'payload': name}) msg.add_as_binary('params', params) rep = None for x in range(self.retries): try: self.controller.send_json(msg) rep = self.controller.recv() break except Exception, e: self.controller.close() self.logger.critical(e) if x == self.retries: raise e else: self.logger.debug("Error, retrying %s" % (x + 1)) self.connect_socket() pass if not rep: raise RPCError("No response from DQE, retries %s exceeded" % self.retries) try: # The results returned from controller is a tarfile with all the results, convert it to a Dataframe if name == 'groupby': _, groupby_col_list, agg_list, where_terms_list = args[ 0], args[1], args[2], args[3] result = self.uncompress_groupby_to_df( rep, groupby_col_list, agg_list, where_terms_list, aggregate=kwargs.get('aggregate', False)) else: rep = msg_factory(json.loads(rep)) result = rep.get_from_binary('result') except (ValueError, TypeError): self.logger.exception('Could not use RPC method: {}/{}'.format( name, rep)) result = rep if isinstance(rep, ErrorMessage): raise RPCError(rep.get('payload')) stop_time = time.time() self.last_call_duration = stop_time - start_time return result
def send(self, addr, msg_buf, is_rpc=False): try: if addr == self.address: self.handle_peer(addr, msg_factory(msg_buf)) return if is_rpc: tmp = [addr, '', msg_buf] else: tmp = [addr, msg_buf] self.socket.send_multipart(tmp) except zmq.ZMQError, ze: self.logger.critical("Problem with %s: %s" % (addr, ze))
def handle_in(self): self.msg_count_in += 1 data = self.socket.recv_multipart() binary, sender = None, None # initialise outside for edge cases if len(data) == 3: if data[1] == '': # This is a RPC call from a zmq.REQ socket sender, _blank, msg_buf = data self.handle_rpc(sender, msg_factory(msg_buf)) return sender, msg_buf, binary = data elif len( data ) == 2: # This is an internode call from another zmq.ROUTER, a Controller or Worker sender, msg_buf = data msg = msg_factory(msg_buf) if binary: msg['data'] = binary if sender in self.others: self.handle_peer(sender, msg) else: self.handle_worker(sender, msg)
def handle_calc_message(self, msg): args, kwargs = msg.get_args_kwargs() affinity = kwargs.get('affinity') if len(args) != 4: return 'Error, No correct args given, expecting: ' + \ 'path_list, groupby_col_list, measure_col_list, where_terms_list' filenames = args[0] if not filenames: return 'Error, no filenames given' # Make sure that all filenames are available before any messages are sent for filename in filenames: if filename and filename not in self.files_map: return 'Sorry, filename %s was not found' % filename parent_token = msg['token'] rpc_segment = { 'msg': msg_factory(msg.copy()), 'results': {}, 'filenames': dict([(x, None) for x in filenames]) } params = {} for filename in filenames: msg['filename'] = filename params['args'] = list(args) params['args'][0] = filename params['kwargs'] = kwargs msg.add_as_binary('params', params) # Make up a new token for the message sent to the workers, and collect the responses using that id msg['parent_token'] = parent_token new_token = binascii.hexlify(os.urandom(8)) msg['token'] = new_token rpc_segment['filenames'][filename] = new_token self.worker_out_messages.setdefault(affinity, []).append(msg.copy()) self.rpc_segments[parent_token] = rpc_segment
def handle_in(self): try: tmp = self.socket.recv_multipart() except zmq.Again: return if len(tmp) != 2: self.logger.critical( 'Received a msg with len != 2, something seriously wrong. ') return sender, msg_buf = tmp self.logger.info("Received message from sender %s", sender) msg = msg_factory(msg_buf) data = self.controllers.get(sender) if not data: self.logger.critical( 'Received a msg from %s - this is an unknown sender' % sender) return data['last_seen'] = time.time() self.logger.debug('Received from %s', sender) # TODO Notify Controllers that we are busy, no more messages to be sent # The above busy notification is not perfect as other messages might be on their way already # but for long-running queries it will at least ensure other controllers # don't try and overuse this node by filling up a queue busy_msg = BusyMessage() self.send_to_all(busy_msg) try: tmp = self.handle(msg) except Exception: tmp = ErrorMessage(msg) tmp['payload'] = traceback.format_exc() self.logger.exception("Unable to handle message [%s]", msg) if tmp: self.send(sender, tmp) self.send_to_all( DoneMessage() ) # Send a DoneMessage to all controllers, this flags you as 'Done'. Duh