def throttler(): xsResult = xstream.Subscribe(outchan) while True: msg = q.get() if msg == None: break xsResult.get()
def xstream2server(): xs = xstream.Subscribe("__server__") while True: # subscribe to special "__server__" channel for # other processes to send messages to this server # e.g. speedodata -> websockets msg_str = xs.get_msg() if msg_str is None: break try: msg = json.loads(msg_str) if msg['topic'] == 'speedodata': WebSocketHandler.broadcast(msg['topic'], msg['message']) elif msg['topic'] == 'callback' and 'callback_id' in msg: # print("sending callback message") # print(msg['message']) WebSocketHandler.send_to_client(\ msg['callback_id'], msg['topic'], msg['message']) elif msg['topic'] == 'xs_throughput': report = json.loads(msg['message']) #print(report) for name, throughput in report.items(): serviceName = name.split('.')[0] edgeName = name[name.find('.') + 1:] ServiceManager().update_throughput_stats( serviceName, edgeName, throughput) except: pass cancel_async_tasks()
def run(rundir, chanIdx, q, args): xspub = xstream.Publisher() xssub = xstream.Subscribe(chanIdx2Str(chanIdx)) runner = Runner(rundir) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() q.put(1) # ready for work fpgaBlobs = None fcOutput = None labels = xdnn_io.get_labels(args['labels']) xdnnCPUOp = xdnn.XDNNCPUOp("%s/weights.h5" % rundir) while True: try: payload = xssub.get() if not payload: break (meta, buf) = payload if fpgaBlobs == None: # allocate buffers fpgaBlobs = [] batchsz = meta['shape'][0] # inTensors[0].dims[0] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batchsz,) + tuple([t.dims[i] for i in range(t.ndims)][1:]) blobs.append(np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) fcOutput = np.empty((batchsz, args['outsz'],), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] assert(tuple(meta['shape']) == fpgaInput.shape) data = np.frombuffer(buf, dtype=np.float32).reshape(fpgaInput.shape) np.copyto(fpgaInput, data) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) xdnnCPUOp.computeFC(fpgaBlobs[1][0], fcOutput) softmaxOut = xdnnCPUOp.computeSoftmax(fcOutput) xdnn_io.printClassification(softmaxOut, meta['images'], labels) sys.stdout.flush() if meta['id'] % 1000 == 0: print("Recvd query %d" % meta['id']) sys.stdout.flush() del data del buf del payload xspub.send(meta['from'], "success") except Exception as e: logging.error("Worker exception " + str(e))
def _init(nWorkers, inshape): token = pid2TokenStr() if token not in Dispatcher.xspub: Dispatcher.xspub[token] = xstream.Publisher() Dispatcher.xstoken[token] = xstream.Subscribe(token) Dispatcher.inshape = inshape Dispatcher.nWorkers = nWorkers Dispatcher.inBlob[token] = np.zeros(tuple(inshape), dtype=np.float32, order='C')
def run(rundir, chanIdx, q, args): xspub = xstream.Publisher() xssub = xstream.Subscribe(chanIdx2Str(chanIdx)) runner = Runner(rundir) inTensors = runner.get_input_tensors() outTensors = runner.get_output_tensors() q.put(1) # ready for work fpgaBlobs = None labels = xdnn_io.get_labels(args['labels']) if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc else: assert args['yolo_version'] in ( 'v2', 'v3'), "--yolo_version should be <v2|v3>" biases = bias_selector(args) if (args['visualize']): colors = generate_colors(len(labels)) while True: try: payload = xssub.get() if not payload: break (meta, buf) = payload if fpgaBlobs == None: # allocate buffers fpgaBlobs = [] batchsz = meta['shape'][0] # inTensors[0].dims[0] for io in [inTensors, outTensors]: blobs = [] for t in io: shape = (batchsz, ) + tuple( [t.dims[i] for i in range(t.ndims)][1:]) blobs.append( np.empty((shape), dtype=np.float32, order='C')) fpgaBlobs.append(blobs) fcOutput = np.empty(( batchsz, args['outsz'], ), dtype=np.float32, order='C') fpgaInput = fpgaBlobs[0][0] assert (tuple(meta['shape']) == fpgaInput.shape) data = np.frombuffer(buf, dtype=np.float32).reshape(fpgaInput.shape) np.copyto(fpgaInput, data) jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1]) runner.wait(jid) boxes = yolo_postproc(fpgaBlobs[1], args, meta['image_shapes'], biases=biases) if (not args['profile']): for i in range(min(batchsz, len(meta['image_shapes']))): print("Detected {} boxes in {}".format( len(boxes[i]), meta['images'][i]), flush=True) # Save the result if (args['results_dir']): for i in range(min(batchsz, len(meta['image_shapes']))): fname = meta['images'][i] filename = os.path.splitext(os.path.basename(fname))[0] out_file_txt = os.path.join(args['results_dir'], filename + '.txt') print("Saving {} boxes to {}".format( len(boxes[i]), out_file_txt)) sys.stdout.flush() saveDetectionDarknetStyle(out_file_txt, boxes[i], meta['image_shapes'][i]) if (args['visualize']): out_file_png = os.path.join( args['results_dir'], filename + '.png') print("Saving result to {}".format(out_file_png)) sys.stdout.flush() draw_boxes(fname, boxes[i], labels, colors, out_file_png) if meta['id'] % 1000 == 0: print("Recvd query %d" % meta['id']) sys.stdout.flush() del data del buf del payload xspub.send(meta['from'], "success") except Exception as e: logging.error("Worker exception " + str(e))
def heartbeat(stop): xs = xstream.Subscribe("__heartbeat__", timeout=5000) service_manager = ServiceManager() node_status = {} def check_services(node_status): if stop: return invalid_services = [] for service, status in node_status.items(): last_valid = status['last_valid'] service_state = service_manager._services[service]['state'] is_starting = service_state == service_manager.STARTING is_started = service_state == service_manager.STARTED # if the service has been stopped, clear it if service_state == service_manager.STOPPED: invalid_services.append(service) # if there's a discrepancy in what the service_manager says # and what we have cached, clear it elif is_starting and node_status[service]['is_started']: invalid_services.append(service) # if it's started and hasn't been valid in the last n secs, # restart it elif is_started and now - last_valid > 5: logger.warning("Service %s is dead, restarting" % service) service_manager.stop(service) service_manager.start(service) node_status[service]['is_started'] = False for service in invalid_services: del node_status[service] logger = logging.getLogger(__name__) while True: if stop(): break # when enabling coverage, this line will raise an exception for some # reason. For now, just catching it try: msg_str = xs.get_msg() now = time.time() except Exception: logger.exception("Shouldn't happen") # the get_msg timed out, i.e. no heartbeats received if msg_str == (None, None): check_services(node_status) continue msg = json.loads(msg_str) service = msg['service'] channel = msg['channel'] # if this is the first time we've seen this service if service not in node_status: _first_edge, last_edge = service_manager._get_graph_io(service) node_status[service] = { 'last_valid': 0, # saves the last time this service was valid 'is_started': False, # our check that services haven't stopped 'last_edge': last_edge[0], # saves the last edge of the service 'channels': {} # save heartbeat times for each channel } node_status[service]['channels'][channel] = now service_state = service_manager._services[service]['state'] if node_status[service]['last_edge'] == channel: if service_state == service_manager.STARTING: if not node_status[service]['is_started']: service_manager._services[service][ 'state'] = service_manager.STARTED node_status[service]['is_started'] = True else: # there's a discrepancy. For example, the service may # have been stopped and something else started with # the same name. In this case, clear the cache del node_status[service] continue node_status[service]['last_valid'] = now check_services(node_status) cancel_async_tasks()