def issue_queries(self, query_samples): for query_sample in query_samples: waveform = self.qsl[query_sample.index] assert waveform.ndim == 1 waveform_length = np.array(waveform.shape[0], dtype=np.int64) waveform = np.expand_dims(waveform, 0) waveform_length = np.expand_dims(waveform_length, 0) with torch.no_grad(): waveform = torch.from_numpy(waveform) waveform_length = torch.from_numpy(waveform_length) feature, feature_length = self.audio_preprocessor.forward( (waveform, waveform_length)) assert feature.ndim == 3 assert feature_length.ndim == 1 feature = feature.permute(2, 0, 1) _, _, transcript = self.greedy_decoder.forward( feature, feature_length) assert len(transcript) == 1 response_array = array.array('q', transcript[0]) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_sample.id, bi[0], bi[1] * response_array.itemsize) lg.QuerySamplesComplete([response])
def run_one_item(self, qitem): # run the prediction processed_results = [] try: results = self.model.predict(qitem.batch_dense_X, qitem.batch_lS_o, qitem.batch_lS_i) processed_results = self.post_process(results, qitem.batch_T, self.result_dict) if self.take_accuracy: self.post_process.add_results(processed_results) self.result_timing.append(time.time() - qitem.start) except Exception as ex: # pylint: disable=broad-except log.error("thread: failed, %s", ex) # since post_process will not run, fake empty responses processed_results = [[]] * len(qitem.query_id) finally: response_array_refs = [] response = [] for idx, query_id in enumerate(qitem.query_id): # NOTE: processed_results returned by DlrmPostProcess store both # result = processed_results[idx][0] and target = processed_results[idx][1] # also each idx might be a query of samples, rather than a single sample # depending on the --samples-to-aggregate* arguments. s_idx = qitem.idx_offsets[idx] e_idx = qitem.idx_offsets[idx + 1] # debug prints # print("s,e:",s_idx,e_idx, len(processed_results)) response_array = array.array("B", np.array(processed_results[s_idx:e_idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response)
def process_query_async(query_samples, i_slice): time.sleep(LOOPBACK_LATENCY_S * (i_slice + 1)) responses = [] samples_to_complete = query_samples[i_slice:len(query_samples):NUM_AGENTS] for j, s in enumerate(samples_to_complete): responses.append(mlperf_loadgen.QuerySampleResponse(s.id, 0, 0)) mlperf_loadgen.QuerySamplesComplete(responses)
def run_one_item(self, qitem): # run the prediction processed_results = [] try: results = self.model.predict({self.model.inputs[0]: qitem.img}) processed_results = self.post_process(results, qitem.content_id, qitem.label, self.result_dict) if self.take_accuracy: self.post_process.add_results(processed_results) self.result_timing.append(time.time() - qitem.start) except Exception as ex: # pylint: disable=broad-except src = [self.ds.get_item_loc(i) for i in qitem.content_id] log.error("thread: failed on contentid=%s, %s", src, ex) # since post_process will not run, fake empty responses processed_results = [[]] * len(qitem.query_id) finally: response_array_refs = [] response = [] for idx, query_id in enumerate(qitem.query_id): response_array = array.array( "B", np.array(processed_results[idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response)
def run_one_item(self, qitem, cloud_tpu_id): # run the prediction try: for i in [1]: data, _ = self.ds.get_indices(qitem.content_id) results = self.models[cloud_tpu_id].predict(data) except Exception as ex: # pylint: disable=broad-except src = [self.ds.get_item_loc(i) for i in qitem.content_id] log.error("thread: failed on contentid=%s, %s", src, ex) results = [[]] * len(qitem.query_id) # since post_process will not run, fake empty responses finally: response = [] response_arrays = [] # Ignore padded samples. results = np.array(results).reshape( (self.max_batchsize, -1)).tolist() for idx, query_id in enumerate(qitem.query_id): if query_id == -1: continue response_array = array.array( "B", np.array(results[idx], np.float32).tobytes()) response_arrays.append(response_array) index = len(response_arrays) - 1 bi = response_arrays[index].buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response)
def handle_tasks(self, tpu_id): while True: # Block until an item becomes available qitem = self.tasks.get(block=True) # When a "None" item was added, it is a # signal from the parent to indicate we should stop # working (see finish) if qitem is None: break results = self.process(qitem, tpu_id) response = [] gc_hack = [] for res, q_id in zip(results, qitem.query_id_list): result_arr = array.array("B", res) gc_hack.append(result_arr) r_info = result_arr.buffer_info() response.append( mlperf_loadgen.QuerySampleResponse(q_id, r_info[0], r_info[1])) # Tell loadgen that we're ready with this query mlperf_loadgen.QuerySamplesComplete(response) self.tasks.task_done()
def issue_queries(self, query_samples): input_ids = np.zeros((len(query_samples), 1, 384), dtype=np.int32) input_mask = np.zeros((len(query_samples), 1, 384), dtype=np.int32) segment_ids = np.zeros((len(query_samples), 1, 384), dtype=np.int32) for sample_idx in range(len(query_samples)): eval_features = self.qsl.get_features( query_samples[sample_idx].index) input_ids[sample_idx, ...] = np.array(eval_features.input_ids) input_mask[sample_idx, ...] = np.array(eval_features.input_mask) segment_ids[sample_idx, ...] = np.array(eval_features.segment_ids) def input_fn(): inputs = { "input_ids": input_ids, "input_mask": input_mask, "segment_ids": segment_ids } return tf.data.Dataset.from_tensor_slices(inputs) for i, result in enumerate(self.estimator.predict(input_fn)): logits = [float(x) for x in result["logits"].flat] response_array = array.array( "B", np.array(logits).astype(np.float32).tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def issue_queries(query_samples): global BATCH_SIZE if VERBOSITY_LEVEL: printable_query = [(qs.index, qs.id) for qs in query_samples] print("issue_queries( {} )".format(printable_query)) tick('Q', len(query_samples)) for j in range(0, len(query_samples), BATCH_SIZE): batch = query_samples[ j:j + BATCH_SIZE] # NB: the last one may be shorter than BATCH_SIZE in length batch_data = [preprocessed_image_buffer[qs.index] for qs in batch] batch_predicted_labels = predict_labels_for_batch(batch_data) tick('p', len(batch)) if VERBOSITY_LEVEL: print( "predicted_batch_results = {}".format(batch_predicted_labels)) response = [] response_array_refs = [ ] # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected. for qs, predicted_label in zip(batch, batch_predicted_labels): response_array = array.array( "B", np.array(predicted_label, np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1])) lg.QuerySamplesComplete(response) #tick('R', len(response)) sys.stdout.flush()
def response_loadgen(out_queue): global out_queue_cnt while True: next_task = out_queue.get() if next_task is None: # None means shutdown log.info('Exiting response thread') break query_id_list = next_task.query_id_list result = next_task.result batch_size = len(query_id_list) result.reshape(batch_size, -1, 2) out_list = np.split(result, batch_size, axis=0) #responses = [] for i, o in enumerate(out_list): response_array = array.array( "B", np.array(o).astype(np.float32).tobytes()) bi = response_array.buffer_info() #responses.append(lg.QuerySampleResponse(query_id_list[i], bi[0], bi[1])) responses = [ lg.QuerySampleResponse(query_id_list[i], bi[0], bi[1]) ] out_queue_cnt += 1 #print('Response loadgen ({}), query_id {}, out_queue_cnt {}'.format(os.getpid(), query_id_list[i], out_queue_cnt)) lg.QuerySamplesComplete(responses)
def handle_tasks(self): while True: # Block until an item becomes available qitem = self.tasks.get(block=True) # When a "None" item was added, it is a # signal from the parent to indicate we should stop # working (see finish) if qitem is None: break result = self.process(qitem) response = [] # TBD: do something when we are running accuracy mode # We need to properly store the result. Perhaps through QuerySampleResponse, otherwise internally # in this instance of Runner. # QuerySampleResponse contains an ID, a size field and a data pointer field for query_id in qitem.query_id: response.append(mlperf_loadgen.QuerySampleResponse(query_id, 0, 0)) # Tell loadgen that we're ready with this query mlperf_loadgen.QuerySamplesComplete(response) self.tasks.task_done()
def handle_tasks(self, tasks_queue): """Worker thread.""" while True: qitem = tasks_queue.get() if qitem is None: # None in the queue indicates the parent want us to exit tasks_queue.task_done() break try: # run the prediction results = self.model.predict({self.model.inputs[0]: qitem.img}) if self.take_accuracy: response = self.post_process(results, qitem.content_id, qitem.label, self.result_dict) except Exception as ex: # pylint: disable=broad-except src = [self.ds.get_item_loc(i) for i in qitem.content_id] log.error("thread: failed on contentid=%s, %s", src, ex) finally: response = [] for query_id in qitem.query_id: # FIXME: unclear what to return here response.append(lg.QuerySampleResponse(query_id, 0, 0)) lg.QuerySamplesComplete(response) tasks_queue.task_done()
def issue_queries(query_samples): printable_query = [(qs.index, qs.id) for qs in query_samples] print("LG: issue_queries( {} )".format(printable_query)) predicted_results = {} for qs in query_samples: query_index, query_id = qs.index, qs.id x_vector = dataset[query_index] predicted_label = predict_label(x_vector) predicted_results[query_index] = predicted_label print("LG: predicted_results = {}".format(predicted_results)) response = [] for qs in query_samples: query_index, query_id = qs.index, qs.id response_array = array.array( "B", np.array(predicted_results[query_index], np.float32).tobytes()) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response)
def process_query_async(query_samples, i_slice): time.sleep(.001 * (i_slice + 1)) responses = [] samples_to_complete = query_samples[i_slice:len(query_samples):3] for s in samples_to_complete: responses.append(mlperf_loadgen.QuerySampleResponse(s.id, 0, 0)) mlperf_loadgen.QuerySamplesComplete(responses)
def handle_tasks(self, tasks_queue): """Worker thread.""" while True: qitem = tasks_queue.get() if qitem is None: # None in the queue indicates the parent want us to exit tasks_queue.task_done() break try: # run the prediction results = self.model.predict({self.model.inputs[0]: qitem.img}) # and keep track of how long it took took = time.time() - qitem.start response = [] for idx, result in enumerate(results[0]): result = self.post_process(result) if qitem.label[idx] == result: self.result_dict["good"] += 1 self.result_dict["total"] += 1 # FIXME: unclear what to return here response.append(lg.QuerySampleResponse( qitem.id[idx], 0, 0)) self.result_list.append(took) lg.QuerySamplesComplete(response) except Exception as ex: # pylint: disable=broad-except log.error("execute_parallel thread: %s", ex) tasks_queue.task_done()
def post_process(self, query_ids, results): response = [] for res, q_id in zip(results, query_ids): response.append(mlperf_loadgen.QuerySampleResponse(q_id, 0, 0)) # Tell loadgen that we're ready with this query mlperf_loadgen.QuerySamplesComplete(response)
def issue_queries(self, query_samples): with torch.no_grad(): for i in range(len(query_samples)): data = self.qsl.get_features(query_samples[i].index) print("Processing sample id {:d} with shape = {:}".format( query_samples[i].index, data.shape)) image = torch.from_numpy(data[np.newaxis, ...]).float().to(self.device) prediction = self.trainer.network(image) softmax = F.softmax(prediction[0], dim=1).cpu().numpy().astype(np.float16) transpose_forward = self.trainer.plans.get("transpose_forward") transpose_backward = self.trainer.plans.get( "transpose_backward") assert transpose_forward == [ 0, 1, 2 ], "Unexpected transpose_forward {:}".format(transpose_forward) assert transpose_backward == [ 0, 1, 2 ], "Unexpected transpose_backward {:}".format( transpose_backward) response_array = array.array("B", softmax.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def issue_queries(query_samples): global BATCH_SIZE global model if VERBOSITY_LEVEL > 2: printable_query = [(qs.index, qs.id) for qs in query_samples] print("issue_queries( {} )".format(printable_query)) tick('Q', len(query_samples)) for j in range(0, len(query_samples), BATCH_SIZE): batch = query_samples[ j:j + BATCH_SIZE] # NB: the last one may be shorter than BATCH_SIZE in length batch_data = preprocessed_image_buffer[preprocessed_image_map[[ qs.index for qs in batch ]]] torch_batch = torch.from_numpy(batch_data) begin_time = time.time() # move the input to GPU for speed if available if USE_CUDA: torch_batch = torch_batch.to('cuda') with torch.no_grad(): trimmed_batch_results = model(torch_batch) inference_time_s = time.time() - begin_time actual_batch_size = len(trimmed_batch_results) if VERBOSITY_LEVEL > 1: print("[batch of {}] inference={:.2f} ms".format( actual_batch_size, inference_time_s * 1000)) batch_predicted_labels = torch.argmax(trimmed_batch_results, dim=1).tolist() tick('p', len(batch)) if VERBOSITY_LEVEL > 2: print( "predicted_batch_results = {}".format(batch_predicted_labels)) response = [] response_array_refs = [ ] # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected. for qs, predicted_label in zip(batch, batch_predicted_labels): response_array = array.array( "B", np.array(predicted_label, np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1])) lg.QuerySamplesComplete(response) #tick('R', len(response)) sys.stdout.flush()
def process_query_async(query_samples): time.sleep(.001) responses = [] response_data = array.array( 'B', [0, 1, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 254, 255]) rdbi = response_data.buffer_info() for s in query_samples: responses.append( mlperf_loadgen.QuerySampleResponse(s.id, rdbi[0], rdbi[1])) mlperf_loadgen.QuerySamplesComplete(responses)
def issue_queries(query_samples): global BATCH_SIZE global model_output_volume global num_classes if VERBOSITY_LEVEL > 2: printable_query = [(qs.index, qs.id) for qs in query_samples] print("issue_queries( {} )".format(printable_query)) tick('Q', len(query_samples)) for j in range(0, len(query_samples), BATCH_SIZE): batch = query_samples[ j:j + BATCH_SIZE] # NB: the last one may be shorter than BATCH_SIZE in length batch_data = preprocessed_image_buffer[preprocessed_image_map[[ qs.index for qs in batch ]]] trimmed_batch_results, inference_time_s = inference_for_given_batch( batch_data) actual_batch_size = len(trimmed_batch_results) if VERBOSITY_LEVEL > 1: print("[batch of {}] inference={:.2f} ms".format( actual_batch_size, inference_time_s * 1000)) if model_output_volume == 1: batch_predicted_labels = trimmed_batch_results else: batch_predicted_labels = [ np.argmax(trimmed_batch_results[k][-num_classes:]) for k in range(actual_batch_size) ] tick('p', len(batch)) if VERBOSITY_LEVEL > 2: print( "predicted_batch_results = {}".format(batch_predicted_labels)) response = [] response_array_refs = [ ] # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected. for qs, predicted_label in zip(batch, batch_predicted_labels): response_array = array.array( "B", np.array(predicted_label, np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1])) lg.QuerySamplesComplete(response) #tick('R', len(response)) sys.stdout.flush()
def issue_queries(query_samples): global BATCH_SIZE if VERBOSITY_LEVEL: printable_query = [(qs.index, qs.id) for qs in query_samples] print("issue_queries( {} )".format(printable_query)) tick('Q', len(query_samples)) for j in range(0, len(query_samples), BATCH_SIZE): batch = query_samples[ j:j + BATCH_SIZE] # NB: the last one may be shorter than BATCH_SIZE in length batch_data = [preprocessed_image_buffer[qs.index] for qs in batch] predictions_for_a_batch = predict_labels_for_batch(batch_data) tick('p', len(batch)) if VERBOSITY_LEVEL: print( "predicted_batch_results = {}".format(predictions_for_a_batch)) response = [] response_array_refs = [ ] # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected. for qs, all_boxes_for_this_sample in zip(batch, predictions_for_a_batch): num_active_boxes_for_this_sample = all_boxes_for_this_sample[ MODEL_MAX_PREDICTIONS * 7].view('int32') global_image_index = qs.index width_orig, height_orig = original_w_h[global_image_index] reformed_active_boxes_for_this_sample = [] for i in range(num_active_boxes_for_this_sample): (image_id, ymin, xmin, ymax, xmax, confidence_score, class_number) = all_boxes_for_this_sample[i * 7:(i + 1) * 7] if class_map: class_number = float(class_map[int(class_number)]) reformed_active_boxes_for_this_sample += [ float(global_image_index), ymin, xmin, ymax, xmax, confidence_score, class_number ] response_array = array.array( "B", np.array(reformed_active_boxes_for_this_sample, np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1])) lg.QuerySamplesComplete(response) #tick('R', len(response)) sys.stdout.flush()
def worker_code(): while True: ## Grab a new batch: # grabbed_count = 0 deadline_ts = None batch_jobs = [] batch_inputs = [] for grabbed_count in range( BATCH_CAPACITY ): # may not run to the end due to the cumulative timeout try: grab_timeout = None if deadline_ts == None else max( 0, deadline_ts - time.time()) # no waiting limit on the first job job = task_queue.get(timeout=grab_timeout) batch_jobs.append(job) batch_inputs.append(job['inputs']) if grabbed_count == 0: deadline_ts = job['ts_submitted'] + TOPUP_TIME_S except queue.Empty: break # we ran out of TOPUP_TIME_S print(f"LG: worker grabbed and submitted {len(batch_jobs)} jobs") ## Predict the whole batch: # predicted_labels = predict_labels( batch_inputs) # takes LATENCY_S of time ts_predicted = time.time() ## Report batch results: # for index_in_batch, job in enumerate(batch_jobs): predicted_label = predicted_labels[index_in_batch] one_input = job['inputs'] query_sample = job['query_sample'] ts_submitted = job['ts_submitted'] print( f"LG: worker predicted: for input={one_input} label={predicted_label} in {(ts_predicted-ts_submitted)*1000} ms" ) response_array = array.array( "B", np.array(predicted_label, np.float32).tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_sample.id, bi[0], bi[1]) lg.QuerySamplesComplete([response]) task_queue.task_done()
def process_query_async(query_samples): time.sleep(.001) responses = [] response_array = array.array( 'f', [0,1,7,8,15,16,31,32,63,64,127,128,254,255]) response_info = response_array.buffer_info() response_data = response_info[0] response_size = response_info[1] * response_array.itemsize for s in query_samples: responses.append( mlperf_loadgen.QuerySampleResponse( s.id, response_data, response_size)) mlperf_loadgen.QuerySamplesComplete(responses)
def issue_queries(self, query_samples): with torch.no_grad(): for i in range(len(query_samples)): eval_features = self.qsl.get_features(query_samples[i].index) start_scores, end_scores = self.model.forward(input_ids=torch.LongTensor(eval_features.input_ids).unsqueeze(0).cuda(), attention_mask=torch.LongTensor(eval_features.input_mask).unsqueeze(0).cuda(), token_type_ids=torch.LongTensor(eval_features.segment_ids).unsqueeze(0).cuda()) output = torch.stack([start_scores, end_scores], axis=-1).squeeze(0).cpu().numpy() response_array = array.array("B", output.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def run_one_item(self, qitem): # run the prediction processed_results = [] query_id, content_id, feed = qitem results = self.session.run(self.onnx_output_names, feed) processed_results = [[]] * len(query_id) response_array_refs = [] response = [] for idx, qid in enumerate(query_id): response_array = array.array("B", np.array(processed_results[idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) lg.QuerySamplesComplete(response)
def issue_queries(self, query_samples): for i in range(len(query_samples)): data = self.qsl.get_features(query_samples[i].index) print("Processing sample id {:d} with shape = {:}".format( query_samples[i].index, data.shape)) output = self.sess.run(self.output, feed_dict={self.input: data[np.newaxis, ...]})[0].astype(np.float16) response_array = array.array("B", output.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def issue_queries(self, query_samples): for i in range(len(query_samples)): data = self.qsl.get_features(query_samples[i].index) print("Processing sample id {:d} with shape = {:}".format(query_samples[i].index, data.shape)) # Follow the PyTorch implementation. # The ONNX file has five outputs, but we only care about the one named "output". output = self.sess.run(["output"], {"input": data[np.newaxis, ...]})[0].squeeze(0).astype(np.float16) response_array = array.array("B", output.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def issue_queries(query_samples): global so global last_timeing global result_timeing idx = np.array([q.index for q in query_samples]).astype(np.int32) query_id = [q.id for q in query_samples] if args.dataset == 'brats2019': start = time.time() response_array_refs = [] response = [] for i, qid in enumerate(query_id): processed_results = so.IssueQuery(1, idx[i][np.newaxis]) processed_results = json.loads( processed_results.decode('utf-8')) response_array = array.array( "B", np.array(processed_results[0], np.float16).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) result_timeing.append(time.time() - start) lg.QuerySamplesComplete(response) else: start = time.time() processed_results = so.IssueQuery(len(idx), idx) result_timeing.append(time.time() - start) processed_results = json.loads(processed_results.decode('utf-8')) response_array_refs = [] response = [] for idx, qid in enumerate(query_id): response_array = array.array( "B", np.array(processed_results[idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) lg.QuerySamplesComplete(response)
def issue_queries(self, query_samples): for i in range(len(query_samples)): data = self.qsl.get_features(query_samples[i].index) print("Processing sample id {:d} with shape = {:}".format( query_samples[i].index, data.shape)) before_softmax = self.exec_net.infer( inputs={self.input_name: data[np.newaxis, ...]})[self.output_name] after_softmax = softmax(before_softmax, axis=1).astype(np.float16) response_array = array.array("B", after_softmax.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])
def postprocess(self, qitem): qitem.readyEvent.synchronize() with torch.cuda.stream(self.ppStream): qitem.results = qitem.results.cpu() qitem.results = qitem.results.numpy() processed_results = self.post_proc(qitem.results, qitem.label, self.result_dict) response_array_refs = [] response = [] for result, query_id in zip(processed_results, qitem.query_id): response_array = array.array("B", np.array(result, np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) lg.QuerySamplesComplete(response) if qitem.index == self.batches - 1: self.finishTime = time.time()
def issue_queries(self, query_samples): for i in range(len(query_samples)): eval_features = self.qsl.get_features(query_samples[i].index) input_ids = np.array([eval_features.input_ids]) input_mask = np.array([eval_features.input_mask]) segment_ids = np.array([eval_features.segment_ids]) feeds = { 'input_ids:0': input_ids, 'input_mask:0': input_mask, 'segment_ids:0': segment_ids } result = self.sess.run(["logits:0"], feed_dict=feeds) logits = [float(x) for x in result[0].flat] response_array = array.array("B", np.array(logits).astype(np.float32).tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response])