Python QuerySamplesComplete примеры, mlperf_loadgen.QuerySamplesComplete Python примеры использования

Пример #1

0

Показать файл

Файл: pytorch_SUT.py Проект: peiwenhuang27/lpot

    def issue_queries(self, query_samples):
        for query_sample in query_samples:
            waveform = self.qsl[query_sample.index]
            assert waveform.ndim == 1
            waveform_length = np.array(waveform.shape[0], dtype=np.int64)
            waveform = np.expand_dims(waveform, 0)
            waveform_length = np.expand_dims(waveform_length, 0)
            with torch.no_grad():
                waveform = torch.from_numpy(waveform)
                waveform_length = torch.from_numpy(waveform_length)
                feature, feature_length = self.audio_preprocessor.forward(
                    (waveform, waveform_length))
                assert feature.ndim == 3
                assert feature_length.ndim == 1
                feature = feature.permute(2, 0, 1)

                _, _, transcript = self.greedy_decoder.forward(
                    feature, feature_length)

            assert len(transcript) == 1
            response_array = array.array('q', transcript[0])
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_sample.id, bi[0],
                                              bi[1] * response_array.itemsize)
            lg.QuerySamplesComplete([response])

Пример #2

0

Показать файл

Файл: main.py Проект: grant7788/inference

 def run_one_item(self, qitem):
     # run the prediction
     processed_results = []
     try:
         results = self.model.predict(qitem.batch_dense_X, qitem.batch_lS_o, qitem.batch_lS_i)
         processed_results = self.post_process(results, qitem.batch_T, self.result_dict)
         if self.take_accuracy:
             self.post_process.add_results(processed_results)
             self.result_timing.append(time.time() - qitem.start)
     except Exception as ex:  # pylint: disable=broad-except
         log.error("thread: failed, %s", ex)
         # since post_process will not run, fake empty responses
         processed_results = [[]] * len(qitem.query_id)
     finally:
         response_array_refs = []
         response = []
         for idx, query_id in enumerate(qitem.query_id):
             # NOTE: processed_results returned by DlrmPostProcess store both
             # result = processed_results[idx][0] and target = processed_results[idx][1]
             # also each idx might be a query of samples, rather than a single sample
             # depending on the --samples-to-aggregate* arguments.
             s_idx = qitem.idx_offsets[idx]
             e_idx = qitem.idx_offsets[idx + 1]
             # debug prints
             # print("s,e:",s_idx,e_idx, len(processed_results))
             response_array = array.array("B", np.array(processed_results[s_idx:e_idx], np.float32).tobytes())
             response_array_refs.append(response_array)
             bi = response_array.buffer_info()
             response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
         lg.QuerySamplesComplete(response)

Пример #3

0

Показать файл

def process_query_async(query_samples, i_slice):
    time.sleep(LOOPBACK_LATENCY_S * (i_slice + 1))
    responses = []
    samples_to_complete = query_samples[i_slice:len(query_samples):NUM_AGENTS]
    for j, s in enumerate(samples_to_complete):
        responses.append(mlperf_loadgen.QuerySampleResponse(s.id, 0, 0))
    mlperf_loadgen.QuerySamplesComplete(responses)

Пример #4

0

Показать файл

Файл: main.py Проект: tjyhlt/inference

 def run_one_item(self, qitem):
     # run the prediction
     processed_results = []
     try:
         results = self.model.predict({self.model.inputs[0]: qitem.img})
         processed_results = self.post_process(results, qitem.content_id,
                                               qitem.label,
                                               self.result_dict)
         if self.take_accuracy:
             self.post_process.add_results(processed_results)
             self.result_timing.append(time.time() - qitem.start)
     except Exception as ex:  # pylint: disable=broad-except
         src = [self.ds.get_item_loc(i) for i in qitem.content_id]
         log.error("thread: failed on contentid=%s, %s", src, ex)
         # since post_process will not run, fake empty responses
         processed_results = [[]] * len(qitem.query_id)
     finally:
         response_array_refs = []
         response = []
         for idx, query_id in enumerate(qitem.query_id):
             response_array = array.array(
                 "B",
                 np.array(processed_results[idx], np.float32).tobytes())
             response_array_refs.append(response_array)
             bi = response_array.buffer_info()
             response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
         lg.QuerySamplesComplete(response)

Пример #5

0

Показать файл

 def run_one_item(self, qitem, cloud_tpu_id):
     # run the prediction
     try:
         for i in [1]:
             data, _ = self.ds.get_indices(qitem.content_id)
             results = self.models[cloud_tpu_id].predict(data)
     except Exception as ex:  # pylint: disable=broad-except
         src = [self.ds.get_item_loc(i) for i in qitem.content_id]
         log.error("thread: failed on contentid=%s, %s", src, ex)
         results = [[]] * len(qitem.query_id)
         # since post_process will not run, fake empty responses
     finally:
         response = []
         response_arrays = []
         # Ignore padded samples.
         results = np.array(results).reshape(
             (self.max_batchsize, -1)).tolist()
         for idx, query_id in enumerate(qitem.query_id):
             if query_id == -1:
                 continue
             response_array = array.array(
                 "B",
                 np.array(results[idx], np.float32).tobytes())
             response_arrays.append(response_array)
             index = len(response_arrays) - 1
             bi = response_arrays[index].buffer_info()
             response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
         lg.QuerySamplesComplete(response)

Пример #6

0

Показать файл

Файл: loadgen_gnmt.py Проект: sead0812/inference_results_v0.5

    def handle_tasks(self, tpu_id):
        while True:
            # Block until an item becomes available
            qitem = self.tasks.get(block=True)

            # When a "None" item was added, it is a
            # signal from the parent to indicate we should stop
            # working (see finish)
            if qitem is None:
                break

            results = self.process(qitem, tpu_id)
            response = []
            gc_hack = []
            for res, q_id in zip(results, qitem.query_id_list):
                result_arr = array.array("B", res)
                gc_hack.append(result_arr)
                r_info = result_arr.buffer_info()
                response.append(
                    mlperf_loadgen.QuerySampleResponse(q_id, r_info[0],
                                                       r_info[1]))

            # Tell loadgen that we're ready with this query
            mlperf_loadgen.QuerySamplesComplete(response)

            self.tasks.task_done()

Пример #7

0

Показать файл

    def issue_queries(self, query_samples):
        input_ids = np.zeros((len(query_samples), 1, 384), dtype=np.int32)
        input_mask = np.zeros((len(query_samples), 1, 384), dtype=np.int32)
        segment_ids = np.zeros((len(query_samples), 1, 384), dtype=np.int32)
        for sample_idx in range(len(query_samples)):
            eval_features = self.qsl.get_features(
                query_samples[sample_idx].index)
            input_ids[sample_idx, ...] = np.array(eval_features.input_ids)
            input_mask[sample_idx, ...] = np.array(eval_features.input_mask)
            segment_ids[sample_idx, ...] = np.array(eval_features.segment_ids)

        def input_fn():
            inputs = {
                "input_ids": input_ids,
                "input_mask": input_mask,
                "segment_ids": segment_ids
            }
            return tf.data.Dataset.from_tensor_slices(inputs)

        for i, result in enumerate(self.estimator.predict(input_fn)):
            logits = [float(x) for x in result["logits"].flat]
            response_array = array.array(
                "B",
                np.array(logits).astype(np.float32).tobytes())
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_samples[i].id, bi[0],
                                              bi[1])
            lg.QuerySamplesComplete([response])

Пример #8

0

Показать файл

def issue_queries(query_samples):

    global BATCH_SIZE

    if VERBOSITY_LEVEL:
        printable_query = [(qs.index, qs.id) for qs in query_samples]
        print("issue_queries( {} )".format(printable_query))
    tick('Q', len(query_samples))

    for j in range(0, len(query_samples), BATCH_SIZE):
        batch = query_samples[
            j:j +
            BATCH_SIZE]  # NB: the last one may be shorter than BATCH_SIZE in length
        batch_data = [preprocessed_image_buffer[qs.index] for qs in batch]

        batch_predicted_labels = predict_labels_for_batch(batch_data)
        tick('p', len(batch))
        if VERBOSITY_LEVEL:
            print(
                "predicted_batch_results = {}".format(batch_predicted_labels))

        response = []
        response_array_refs = [
        ]  # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected.
        for qs, predicted_label in zip(batch, batch_predicted_labels):

            response_array = array.array(
                "B",
                np.array(predicted_label, np.float32).tobytes())
            response_array_refs.append(response_array)
            bi = response_array.buffer_info()
            response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1]))
        lg.QuerySamplesComplete(response)
        #tick('R', len(response))
    sys.stdout.flush()

Пример #9

0

Показать файл

Файл: run_mxnet_server.py Проект: mlcommons/inference_results_v1.0

def response_loadgen(out_queue):
    global out_queue_cnt
    while True:
        next_task = out_queue.get()
        if next_task is None:
            # None means shutdown
            log.info('Exiting response thread')
            break
        query_id_list = next_task.query_id_list
        result = next_task.result

        batch_size = len(query_id_list)
        result.reshape(batch_size, -1, 2)

        out_list = np.split(result, batch_size, axis=0)
        #responses = []
        for i, o in enumerate(out_list):
            response_array = array.array(
                "B",
                np.array(o).astype(np.float32).tobytes())
            bi = response_array.buffer_info()
            #responses.append(lg.QuerySampleResponse(query_id_list[i], bi[0], bi[1]))
            responses = [
                lg.QuerySampleResponse(query_id_list[i], bi[0], bi[1])
            ]
            out_queue_cnt += 1
            #print('Response loadgen ({}), query_id {}, out_queue_cnt {}'.format(os.getpid(), query_id_list[i], out_queue_cnt))
            lg.QuerySamplesComplete(responses)

Пример #10

0

Показать файл

Файл: generic_loadgen.py Проект: sead0812/inference_results_v0.5

  def handle_tasks(self):
    while True:
      # Block until an item becomes available
      qitem = self.tasks.get(block=True)

      # When a "None" item was added, it is a
      # signal from the parent to indicate we should stop
      # working (see finish)
      if qitem is None:
        break

      result = self.process(qitem)
      response = []

      # TBD: do something when we are running accuracy mode
      # We need to properly store the result. Perhaps through QuerySampleResponse, otherwise internally
      # in this instance of Runner.
      # QuerySampleResponse contains an ID, a size field and a data pointer field
      for query_id in qitem.query_id:
        response.append(mlperf_loadgen.QuerySampleResponse(query_id, 0, 0))

      # Tell loadgen that we're ready with this query
      mlperf_loadgen.QuerySamplesComplete(response)

      self.tasks.task_done()

Пример #11

0

Показать файл

    def handle_tasks(self, tasks_queue):
        """Worker thread."""
        while True:
            qitem = tasks_queue.get()
            if qitem is None:
                # None in the queue indicates the parent want us to exit
                tasks_queue.task_done()
                break

            try:
                # run the prediction
                results = self.model.predict({self.model.inputs[0]: qitem.img})
                if self.take_accuracy:
                    response = self.post_process(results, qitem.content_id,
                                                 qitem.label, self.result_dict)
            except Exception as ex:  # pylint: disable=broad-except
                src = [self.ds.get_item_loc(i) for i in qitem.content_id]
                log.error("thread: failed on contentid=%s, %s", src, ex)
            finally:
                response = []
                for query_id in qitem.query_id:
                    # FIXME: unclear what to return here
                    response.append(lg.QuerySampleResponse(query_id, 0, 0))
                lg.QuerySamplesComplete(response)
            tasks_queue.task_done()

Пример #12

0

Показать файл

Файл: example_loadgen_program.py Проект: xzfc/cknowledge-mlperf

def issue_queries(query_samples):

    printable_query = [(qs.index, qs.id) for qs in query_samples]
    print("LG: issue_queries( {} )".format(printable_query))

    predicted_results = {}
    for qs in query_samples:
        query_index, query_id = qs.index, qs.id

        x_vector = dataset[query_index]
        predicted_label = predict_label(x_vector)

        predicted_results[query_index] = predicted_label
    print("LG: predicted_results = {}".format(predicted_results))

    response = []
    for qs in query_samples:
        query_index, query_id = qs.index, qs.id

        response_array = array.array(
            "B",
            np.array(predicted_results[query_index], np.float32).tobytes())
        bi = response_array.buffer_info()
        response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
    lg.QuerySamplesComplete(response)

Пример #13

0

Показать файл

Файл: py_demo_multi_stream.py Проект: luluseptember/inference

def process_query_async(query_samples, i_slice):
    time.sleep(.001 * (i_slice + 1))
    responses = []
    samples_to_complete = query_samples[i_slice:len(query_samples):3]
    for s in samples_to_complete:
        responses.append(mlperf_loadgen.QuerySampleResponse(s.id, 0, 0))
    mlperf_loadgen.QuerySamplesComplete(responses)

Пример #14

0

Показать файл

    def handle_tasks(self, tasks_queue):
        """Worker thread."""
        while True:
            qitem = tasks_queue.get()
            if qitem is None:
                # None in the queue indicates the parent want us to exit
                tasks_queue.task_done()
                break

            try:
                # run the prediction
                results = self.model.predict({self.model.inputs[0]: qitem.img})
                # and keep track of how long it took
                took = time.time() - qitem.start
                response = []
                for idx, result in enumerate(results[0]):
                    result = self.post_process(result)
                    if qitem.label[idx] == result:
                        self.result_dict["good"] += 1
                    self.result_dict["total"] += 1
                    # FIXME: unclear what to return here
                    response.append(lg.QuerySampleResponse(
                        qitem.id[idx], 0, 0))
                    self.result_list.append(took)
                lg.QuerySamplesComplete(response)
            except Exception as ex:  # pylint: disable=broad-except
                log.error("execute_parallel thread: %s", ex)

            tasks_queue.task_done()

Пример #15

0

Показать файл

Файл: generic_loadgen.py Проект: xz10620/inference

    def post_process(self, query_ids, results):
        response = []
        for res, q_id in zip(results, query_ids):
            response.append(mlperf_loadgen.QuerySampleResponse(q_id, 0, 0))

        # Tell loadgen that we're ready with this query
        mlperf_loadgen.QuerySamplesComplete(response)

Пример #16

0

Показать файл

Файл: pytorch_SUT.py Проект: wom-ai/inference

    def issue_queries(self, query_samples):
        with torch.no_grad():
            for i in range(len(query_samples)):
                data = self.qsl.get_features(query_samples[i].index)

                print("Processing sample id {:d} with shape = {:}".format(
                    query_samples[i].index, data.shape))

                image = torch.from_numpy(data[np.newaxis,
                                              ...]).float().to(self.device)
                prediction = self.trainer.network(image)
                softmax = F.softmax(prediction[0],
                                    dim=1).cpu().numpy().astype(np.float16)

                transpose_forward = self.trainer.plans.get("transpose_forward")
                transpose_backward = self.trainer.plans.get(
                    "transpose_backward")
                assert transpose_forward == [
                    0, 1, 2
                ], "Unexpected transpose_forward {:}".format(transpose_forward)
                assert transpose_backward == [
                    0, 1, 2
                ], "Unexpected transpose_backward {:}".format(
                    transpose_backward)

                response_array = array.array("B", softmax.tobytes())
                bi = response_array.buffer_info()
                response = lg.QuerySampleResponse(query_samples[i].id, bi[0],
                                                  bi[1])
                lg.QuerySamplesComplete([response])

Пример #17

0

Показать файл

def issue_queries(query_samples):

    global BATCH_SIZE
    global model

    if VERBOSITY_LEVEL > 2:
        printable_query = [(qs.index, qs.id) for qs in query_samples]
        print("issue_queries( {} )".format(printable_query))
    tick('Q', len(query_samples))

    for j in range(0, len(query_samples), BATCH_SIZE):
        batch = query_samples[
            j:j +
            BATCH_SIZE]  # NB: the last one may be shorter than BATCH_SIZE in length
        batch_data = preprocessed_image_buffer[preprocessed_image_map[[
            qs.index for qs in batch
        ]]]
        torch_batch = torch.from_numpy(batch_data)

        begin_time = time.time()

        # move the input to GPU for speed if available
        if USE_CUDA:
            torch_batch = torch_batch.to('cuda')

        with torch.no_grad():
            trimmed_batch_results = model(torch_batch)

        inference_time_s = time.time() - begin_time

        actual_batch_size = len(trimmed_batch_results)

        if VERBOSITY_LEVEL > 1:
            print("[batch of {}] inference={:.2f} ms".format(
                actual_batch_size, inference_time_s * 1000))

        batch_predicted_labels = torch.argmax(trimmed_batch_results,
                                              dim=1).tolist()

        tick('p', len(batch))
        if VERBOSITY_LEVEL > 2:
            print(
                "predicted_batch_results = {}".format(batch_predicted_labels))

        response = []
        response_array_refs = [
        ]  # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected.
        for qs, predicted_label in zip(batch, batch_predicted_labels):

            response_array = array.array(
                "B",
                np.array(predicted_label, np.float32).tobytes())
            response_array_refs.append(response_array)
            bi = response_array.buffer_info()
            response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1]))
        lg.QuerySamplesComplete(response)
        #tick('R', len(response))
    sys.stdout.flush()

Пример #18

0

Показать файл

Файл: py_demo_single_stream.py Проект: satyakee1/inference

def process_query_async(query_samples):
    time.sleep(.001)
    responses = []
    response_data = array.array(
        'B', [0, 1, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 254, 255])
    rdbi = response_data.buffer_info()
    for s in query_samples:
        responses.append(
            mlperf_loadgen.QuerySampleResponse(s.id, rdbi[0], rdbi[1]))
    mlperf_loadgen.QuerySamplesComplete(responses)

Пример #19

0

Показать файл

def issue_queries(query_samples):

    global BATCH_SIZE
    global model_output_volume
    global num_classes

    if VERBOSITY_LEVEL > 2:
        printable_query = [(qs.index, qs.id) for qs in query_samples]
        print("issue_queries( {} )".format(printable_query))
    tick('Q', len(query_samples))

    for j in range(0, len(query_samples), BATCH_SIZE):
        batch = query_samples[
            j:j +
            BATCH_SIZE]  # NB: the last one may be shorter than BATCH_SIZE in length
        batch_data = preprocessed_image_buffer[preprocessed_image_map[[
            qs.index for qs in batch
        ]]]

        trimmed_batch_results, inference_time_s = inference_for_given_batch(
            batch_data)
        actual_batch_size = len(trimmed_batch_results)

        if VERBOSITY_LEVEL > 1:
            print("[batch of {}] inference={:.2f} ms".format(
                actual_batch_size, inference_time_s * 1000))

        if model_output_volume == 1:
            batch_predicted_labels = trimmed_batch_results
        else:
            batch_predicted_labels = [
                np.argmax(trimmed_batch_results[k][-num_classes:])
                for k in range(actual_batch_size)
            ]

        tick('p', len(batch))
        if VERBOSITY_LEVEL > 2:
            print(
                "predicted_batch_results = {}".format(batch_predicted_labels))

        response = []
        response_array_refs = [
        ]  # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected.
        for qs, predicted_label in zip(batch, batch_predicted_labels):

            response_array = array.array(
                "B",
                np.array(predicted_label, np.float32).tobytes())
            response_array_refs.append(response_array)
            bi = response_array.buffer_info()
            response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1]))
        lg.QuerySamplesComplete(response)
        #tick('R', len(response))
    sys.stdout.flush()

Пример #20

0

Показать файл

def issue_queries(query_samples):

    global BATCH_SIZE

    if VERBOSITY_LEVEL:
        printable_query = [(qs.index, qs.id) for qs in query_samples]
        print("issue_queries( {} )".format(printable_query))
    tick('Q', len(query_samples))

    for j in range(0, len(query_samples), BATCH_SIZE):
        batch = query_samples[
            j:j +
            BATCH_SIZE]  # NB: the last one may be shorter than BATCH_SIZE in length
        batch_data = [preprocessed_image_buffer[qs.index] for qs in batch]

        predictions_for_a_batch = predict_labels_for_batch(batch_data)
        tick('p', len(batch))
        if VERBOSITY_LEVEL:
            print(
                "predicted_batch_results = {}".format(predictions_for_a_batch))

        response = []
        response_array_refs = [
        ]  # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected.
        for qs, all_boxes_for_this_sample in zip(batch,
                                                 predictions_for_a_batch):

            num_active_boxes_for_this_sample = all_boxes_for_this_sample[
                MODEL_MAX_PREDICTIONS * 7].view('int32')
            global_image_index = qs.index
            width_orig, height_orig = original_w_h[global_image_index]
            reformed_active_boxes_for_this_sample = []
            for i in range(num_active_boxes_for_this_sample):
                (image_id, ymin, xmin, ymax, xmax, confidence_score,
                 class_number) = all_boxes_for_this_sample[i * 7:(i + 1) * 7]

                if class_map:
                    class_number = float(class_map[int(class_number)])

                reformed_active_boxes_for_this_sample += [
                    float(global_image_index), ymin, xmin, ymax, xmax,
                    confidence_score, class_number
                ]

            response_array = array.array(
                "B",
                np.array(reformed_active_boxes_for_this_sample,
                         np.float32).tobytes())
            response_array_refs.append(response_array)
            bi = response_array.buffer_info()
            response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1]))
        lg.QuerySamplesComplete(response)
        #tick('R', len(response))
    sys.stdout.flush()

Пример #21

0

Показать файл

Файл: queued_loadgen_example.py Проект: ctuning/ck-ml

def worker_code():
    while True:
        ## Grab a new batch:
        #
        grabbed_count = 0
        deadline_ts = None
        batch_jobs = []
        batch_inputs = []
        for grabbed_count in range(
                BATCH_CAPACITY
        ):  # may not run to the end due to the cumulative timeout
            try:
                grab_timeout = None if deadline_ts == None else max(
                    0, deadline_ts -
                    time.time())  # no waiting limit on the first job
                job = task_queue.get(timeout=grab_timeout)

                batch_jobs.append(job)
                batch_inputs.append(job['inputs'])

                if grabbed_count == 0:
                    deadline_ts = job['ts_submitted'] + TOPUP_TIME_S

            except queue.Empty:
                break  # we ran out of TOPUP_TIME_S

        print(f"LG: worker grabbed and submitted {len(batch_jobs)} jobs")

        ## Predict the whole batch:
        #
        predicted_labels = predict_labels(
            batch_inputs)  # takes LATENCY_S of time
        ts_predicted = time.time()

        ## Report batch results:
        #
        for index_in_batch, job in enumerate(batch_jobs):
            predicted_label = predicted_labels[index_in_batch]
            one_input = job['inputs']
            query_sample = job['query_sample']
            ts_submitted = job['ts_submitted']
            print(
                f"LG: worker predicted: for input={one_input} label={predicted_label} in {(ts_predicted-ts_submitted)*1000} ms"
            )

            response_array = array.array(
                "B",
                np.array(predicted_label, np.float32).tobytes())
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_sample.id, bi[0], bi[1])
            lg.QuerySamplesComplete([response])

            task_queue.task_done()

Пример #22

0

Показать файл

def process_query_async(query_samples):
    time.sleep(.001)
    responses = []
    response_array = array.array(
        'f', [0,1,7,8,15,16,31,32,63,64,127,128,254,255])
    response_info = response_array.buffer_info()
    response_data = response_info[0]
    response_size = response_info[1] * response_array.itemsize
    for s in query_samples:
        responses.append(
            mlperf_loadgen.QuerySampleResponse(
                s.id, response_data, response_size))
    mlperf_loadgen.QuerySamplesComplete(responses)

Пример #23

0

Показать файл

Файл: pytorch_SUT.py Проект: xz10620/inference

    def issue_queries(self, query_samples):
        with torch.no_grad():
            for i in range(len(query_samples)):
                eval_features = self.qsl.get_features(query_samples[i].index)
                start_scores, end_scores = self.model.forward(input_ids=torch.LongTensor(eval_features.input_ids).unsqueeze(0).cuda(),
                    attention_mask=torch.LongTensor(eval_features.input_mask).unsqueeze(0).cuda(),
                    token_type_ids=torch.LongTensor(eval_features.segment_ids).unsqueeze(0).cuda())
                output = torch.stack([start_scores, end_scores], axis=-1).squeeze(0).cpu().numpy()

                response_array = array.array("B", output.tobytes())
                bi = response_array.buffer_info()
                response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1])
                lg.QuerySamplesComplete([response])

Пример #24

0

Показать файл

Файл: server_runner.py Проект: microsoft/OLive

 def run_one_item(self, qitem):
     # run the prediction
     processed_results = []
     query_id, content_id, feed = qitem
     results = self.session.run(self.onnx_output_names, feed)
     processed_results = [[]] * len(query_id)
     response_array_refs = []
     response = []
     for idx, qid in enumerate(query_id):
         response_array = array.array("B", np.array(processed_results[idx], np.float32).tobytes())
         response_array_refs.append(response_array)
         bi = response_array.buffer_info()
         response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
     lg.QuerySamplesComplete(response)

Пример #25

0

Показать файл

    def issue_queries(self, query_samples):
        for i in range(len(query_samples)):
            data = self.qsl.get_features(query_samples[i].index)

            print("Processing sample id {:d} with shape = {:}".format(
                query_samples[i].index, data.shape))

            output = self.sess.run(self.output, feed_dict={self.input: data[np.newaxis, ...]})[0].astype(np.float16)

            response_array = array.array("B", output.tobytes())
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_samples[i].id, bi[0],
                                              bi[1])
            lg.QuerySamplesComplete([response])

Пример #26

0

Показать файл

Файл: onnxruntime_SUT.py Проект: xz10620/inference

    def issue_queries(self, query_samples):
        for i in range(len(query_samples)):
            data = self.qsl.get_features(query_samples[i].index)

            print("Processing sample id {:d} with shape = {:}".format(query_samples[i].index, data.shape))

            # Follow the PyTorch implementation.
            # The ONNX file has five outputs, but we only care about the one named "output".
            output = self.sess.run(["output"], {"input": data[np.newaxis, ...]})[0].squeeze(0).astype(np.float16)

            response_array = array.array("B", output.tobytes())
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1])
            lg.QuerySamplesComplete([response])

Пример #27

0

Показать файл

Файл: main.py Проект: c3sr/mlcommons-mlmodelscope

 def issue_queries(query_samples):
     global so
     global last_timeing
     global result_timeing
     idx = np.array([q.index for q in query_samples]).astype(np.int32)
     query_id = [q.id for q in query_samples]
     if args.dataset == 'brats2019':
         start = time.time()
         response_array_refs = []
         response = []
         for i, qid in enumerate(query_id):
             processed_results = so.IssueQuery(1, idx[i][np.newaxis])
             processed_results = json.loads(
                 processed_results.decode('utf-8'))
             response_array = array.array(
                 "B",
                 np.array(processed_results[0], np.float16).tobytes())
             response_array_refs.append(response_array)
             bi = response_array.buffer_info()
             response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
         result_timeing.append(time.time() - start)
         lg.QuerySamplesComplete(response)
     else:
         start = time.time()
         processed_results = so.IssueQuery(len(idx), idx)
         result_timeing.append(time.time() - start)
         processed_results = json.loads(processed_results.decode('utf-8'))
         response_array_refs = []
         response = []
         for idx, qid in enumerate(query_id):
             response_array = array.array(
                 "B",
                 np.array(processed_results[idx], np.float32).tobytes())
             response_array_refs.append(response_array)
             bi = response_array.buffer_info()
             response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
         lg.QuerySamplesComplete(response)

Пример #28

0

Показать файл

Файл: ov_SUT.py Проект: tonyreina/inference

    def issue_queries(self, query_samples):
        for i in range(len(query_samples)):
            data = self.qsl.get_features(query_samples[i].index)

            print("Processing sample id {:d} with shape = {:}".format(
                query_samples[i].index, data.shape))

            before_softmax = self.exec_net.infer(
                inputs={self.input_name: data[np.newaxis,
                                              ...]})[self.output_name]
            after_softmax = softmax(before_softmax, axis=1).astype(np.float16)

            response_array = array.array("B", after_softmax.tobytes())
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_samples[i].id, bi[0],
                                              bi[1])
            lg.QuerySamplesComplete([response])

Пример #29

0

Показать файл

 def postprocess(self, qitem):
   qitem.readyEvent.synchronize()
   with torch.cuda.stream(self.ppStream):
     qitem.results = qitem.results.cpu()
   qitem.results = qitem.results.numpy()
   processed_results = self.post_proc(qitem.results, qitem.label,
                                      self.result_dict)
   response_array_refs = []
   response = []
   for result, query_id in zip(processed_results, qitem.query_id):
     response_array = array.array("B", np.array(result, np.float32).tobytes())
     response_array_refs.append(response_array)
     bi = response_array.buffer_info()
     response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
   lg.QuerySamplesComplete(response)
   if qitem.index == self.batches - 1:
     self.finishTime = time.time()

Пример #30

0

Показать файл

Файл: tf_SUT.py Проект: xz10620/inference

    def issue_queries(self, query_samples):
        for i in range(len(query_samples)):
            eval_features = self.qsl.get_features(query_samples[i].index)
            input_ids   = np.array([eval_features.input_ids])
            input_mask  = np.array([eval_features.input_mask])
            segment_ids = np.array([eval_features.segment_ids])
            feeds = {
                'input_ids:0':   input_ids,
                'input_mask:0':  input_mask,
                'segment_ids:0': segment_ids
            }
            result = self.sess.run(["logits:0"], feed_dict=feeds)

            logits = [float(x) for x in result[0].flat]
            response_array = array.array("B", np.array(logits).astype(np.float32).tobytes())
            bi = response_array.buffer_info()
            response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1])
            lg.QuerySamplesComplete([response])

Python QuerySamplesComplete примеры использования