Пример #1
0
    def test_length1_sequence(self):
        # Send a length-1 sequence and check for correct accumulator
        # result. The result should be returned immediately.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                self.clear_deferred_exceptions()
                try:
                    dtype = self.get_datatype(trial)
                    model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                    self.check_setup(model_name)
                    self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
                    self.assertFalse("TRTSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                    corrid = 99
                    self.check_sequence(trial, model_name, dtype, corrid,
                                        (4000, None),
                                        # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                                        (("start,end", 42, None, None),),
                                        self.get_expected_result(42 + corrid, corrid, 42,
                                                                 trial, "start,end"),
                                        protocol, sequence_name="{}_{}".format(
                                            self._testMethodName, protocol))

                    self.check_deferred_exception()
                    self.check_status(model_name, (1,), (idx + 1), (idx + 1))
                except InferenceServerException as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
    def test_simple_sequence(self):
        # Send one sequence and check for correct accumulator
        # result. The result should be returned immediately.
        for trial in _trials:
            # Run on different protocols.
            for idx, protocol in enumerate(_protocols):
                self.clear_deferred_exceptions()
                try:
                    dtype = self.get_datatype(trial)
                    model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                    self.check_setup(model_name)
                    self.assertFalse(
                        "TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                    self.assertFalse(
                        "TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                    if "string" in trial:
                        corrid = '52'
                    else:
                        corrid = 52

                    expected_result = self.get_expected_result(
                        45 + int(corrid), corrid, 9, trial, "end"
                    ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                        45, corrid, 9, trial, "end")

                    self.check_sequence(
                        trial,
                        model_name,
                        dtype,
                        corrid,
                        (4000, None),
                        # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
                        (("start", 1, None, None), (None, 2, None, None),
                         (None, 3, None, None), (None, 4, None, None),
                         (None, 5, None, None), (None, 6, None, None),
                         (None, 7, None, None), (None, 8, None, None),
                         ("end", 9, None, None)),
                        expected_result,
                        protocol,
                        sequence_name="{}_{}".format(self._testMethodName,
                                                     protocol))

                    self.check_deferred_exception()
                    self.check_status(model_name, {1: 9 * (idx + 1)},
                                      9 * (idx + 1), 9 * (idx + 1))
                except Exception as ex:
                    self.assertTrue(False, "unexpected error {}".format(ex))
Пример #3
0
    def test_backlog_sequence_timeout(self):
        # Send 4 sequences in parallel and make sure they get
        # completely batched into batch-size 4 inferences. One of the
        # sequences has a long delay that causes it to timeout and
        # that allows a 5th sequence to come out of the backlog and
        # finish. The timed-out sequence will then send the delayed
        # inference but it will appear as a new sequence and so fail
        # because it doesn't have the START flag.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1,3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions((11,12,12,13), dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions((111,112,112,113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions((1111,1112,1112,1113), dtype, 3)
            precreated_shm4_handles = self.precreate_register_regions((11111,11113), dtype, 4)
            try:
                protocol = "streaming"
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse("TRTSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                corrids = [ 1001, 1002, 1003, 1004, 1005 ]
                threads = []
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[0],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1, None),
                           (None, 3, _max_sequence_idle_ms + 1000)),
                          self.get_expected_result(4 + corrids[0], corrids[0], 3, trial, None),
                          protocol, precreated_shm0_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[1],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 11, None),
                           (None, 12, _max_sequence_idle_ms / 2),
                           (None, 12, _max_sequence_idle_ms / 2),
                           ("end", 13, _max_sequence_idle_ms / 2)),
                          self.get_expected_result(48 + corrids[1], corrids[1], 13, trial, None),
                          protocol, precreated_shm1_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[2],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 111, None),
                           (None, 112, _max_sequence_idle_ms / 2),
                           (None, 112, _max_sequence_idle_ms / 2),
                           ("end", 113, _max_sequence_idle_ms / 2)),
                          self.get_expected_result(448 + corrids[2], corrids[2], 113, trial, None),
                          protocol, precreated_shm2_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[3],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1111, None),
                           (None, 1112, _max_sequence_idle_ms / 2),
                           (None, 1112, _max_sequence_idle_ms / 2),
                           ("end", 1113, _max_sequence_idle_ms / 2)),
                          self.get_expected_result(4448 + corrids[3], corrids[3], 1113, trial, None),
                          protocol, precreated_shm3_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[4],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 11111, None),
                           ("end", 11113, None)),
                          self.get_expected_result(22224 + corrids[4], corrids[4], 11113, trial, "end"),
                          protocol, precreated_shm4_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                for t in threads:
                    t.join()

                self.check_deferred_exception()
                self.assertTrue(False, "expected error")
            except InferenceServerException as ex:
                self.assertEqual("inference:0", ex.server_id())
                self.assertTrue(
                    ex.message().startswith(
                        str("inference request for sequence 1001 to " +
                            "model '{}' must specify the START flag on the first " +
                            "request of the sequence").format(model_name)))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
Пример #4
0
    def test_backlog_fill_no_end(self):
        # Send 4 sequences in parallel, two of which are shorter. Send
        # 2 additional sequences that should go into backlog but
        # should immediately fill into the short sequences. One of
        # those sequences is filled before it gets its end request.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1,2,3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions((11,13), dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions((111,113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions((1111,1112,1113), dtype, 3)
            precreated_shm4_handles = self.precreate_register_regions((11111,), dtype, 4)
            precreated_shm5_handles = self.precreate_register_regions((22222,22223,22224), dtype, 5)
            try:
                protocol = "streaming"
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse("TRTSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                corrids = [ 1001, 1002, 1003, 1004, 1005, 1006 ]
                threads = []
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[0],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1, None),
                           (None, 2, None),
                           ("end", 3, None)),
                          self.get_expected_result(6 + corrids[0], corrids[0], 3, trial, "end"),
                          protocol, precreated_shm0_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[1],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 11, None),
                           ("end", 13, None)),
                          self.get_expected_result(24 + corrids[1], corrids[1], 13, trial, "end"),
                          protocol, precreated_shm1_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[2],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 111, None),
                           ("end", 113, None)),
                          self.get_expected_result(224 + corrids[2], corrids[2], 113, trial, "end"),
                          protocol, precreated_shm2_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[3],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1111, None),
                           (None, 1112, 3000),
                           ("end", 1113, None)),
                          self.get_expected_result(3336 + corrids[3], corrids[3], 1113, trial, "end"),
                          protocol, precreated_shm3_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[4],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start,end", 11111, None),),
                          self.get_expected_result(11111 + corrids[4], corrids[4], 11111,
                                                   trial, "start,end"),
                          protocol, precreated_shm4_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[5],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 22222, None),
                           (None, 22223, None),
                           ("end", 22224, 2000),),
                          self.get_expected_result(66669 + corrids[5], corrids[5], 22224, trial, "end"),
                          protocol, precreated_shm5_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                threads[5].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, (1,), 5, 14)
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
                    self.cleanup_shm_regions(precreated_shm5_handles)
Пример #5
0
    def test_backlog(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 5 equal-length sequences in
        # parallel and make sure they get completely batched into
        # batch-size 4 inferences plus the 5th should go in the
        # backlog and then get handled once there is a free slot.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1,2,3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions((11,12,13), dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions((111,112,113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions((1111,1112,1113), dtype, 3)
            precreated_shm4_handles = self.precreate_register_regions((11111,11112,11113), dtype, 4)
            try:
                protocol = "streaming"
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse("TRTSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                corrids = [ 1001, 1002, 1003, 1004, 1005 ]
                threads = []
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[0],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1, None),
                           (None, 2, None),
                           ("end", 3, None)),
                          self.get_expected_result(6 + corrids[0], corrids[0], 3, trial, "end"),
                          protocol, precreated_shm0_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[1],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 11, None),
                           (None, 12, None),
                           ("end", 13, None)),
                          self.get_expected_result(36 + corrids[1], corrids[1], 13, trial, "end"),
                          protocol, precreated_shm1_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[2],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 111, None),
                           (None, 112, None),
                           ("end", 113, None)),
                          self.get_expected_result(336 + corrids[2], corrids[2], 113, trial, "end"),
                          protocol, precreated_shm2_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[3],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1111, None),
                           (None, 1112, None),
                           ("end", 1113, None)),
                          self.get_expected_result(3336 + corrids[3], corrids[3], 1113, trial, "end"),
                          protocol, precreated_shm3_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[4],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 11111, None),
                           (None, 11112, None),
                           ("end", 11113, None)),
                          self.get_expected_result(33336 + corrids[4], corrids[4], 11113, trial, "end"),
                          protocol, precreated_shm4_handles),
                    kwargs={'sequence_name' : "{}_{}".format(self._testMethodName, protocol)}))

                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, (1,), 6, 15)
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
Пример #6
0
    def _multi_sequence_impl(self, trials, expected_exec_cnt, sleep_secs, tensor_shapes):
        for trial in trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1,2,3), dtype, 0,
                                                                      tensor_shape=(tensor_shapes[0],))
            precreated_shm1_handles = self.precreate_register_regions((11,12,13), dtype, 1,
                                                                      tensor_shape=(tensor_shapes[1],))
            precreated_shm2_handles = self.precreate_register_regions((111,112,113), dtype, 2,
                                                                      tensor_shape=(tensor_shapes[2],))
            precreated_shm3_handles = self.precreate_register_regions((1111,1112,1113), dtype, 3,
                                                                      tensor_shape=(tensor_shapes[3],))
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
                protocol = "streaming"

                self.check_setup(model_name)
                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse("TRTSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                corrids = [ 1001, 1002, 1003, 1004 ]
                threads = []
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[0],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1, None),
                           ("end", 3, None)),
                          self.get_expected_result(4*tensor_shapes[0] + corrids[0],
                                                   corrids[0], 3, trial, "end"),
                          protocol, precreated_shm0_handles),
                    kwargs={'sequence_name' : "{}_{}_{}".format(
                        self._testMethodName, protocol, corrids[0]),
                            'tensor_shape' : (tensor_shapes[0],) }))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[1],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 11, None),
                           (None, 12, None),
                           ("end", 13, None)),
                          self.get_expected_result(36*tensor_shapes[1] + corrids[1],
                                                   corrids[1], 13, trial, "end"),
                          protocol, precreated_shm1_handles),
                    kwargs={'sequence_name' : "{}_{}_{}".format(
                        self._testMethodName, protocol, corrids[1]),
                            'tensor_shape' : (tensor_shapes[1],) }))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[2],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 111, None),
                           (None, 112, None),
                           ("end", 113, None)),
                          self.get_expected_result(336*tensor_shapes[2] + corrids[2],
                                                   corrids[2], 113, trial, "end"),
                          protocol, precreated_shm2_handles),
                    kwargs={'sequence_name' : "{}_{}_{}".format(
                        self._testMethodName, protocol, corrids[2]),
                            'tensor_shape' : (tensor_shapes[2],) }))
                threads.append(threading.Thread(
                    target=self.check_sequence_async,
                    args=(trial, model_name, dtype, corrids[3],
                          (None, None),
                          # (flag_str, value, pre_delay_ms)
                          (("start", 1111, None),
                           (None, 1112, None),
                           ("end", 1113, None)),
                          self.get_expected_result(3336*tensor_shapes[3] + corrids[3],
                                                   corrids[3], 1113, trial, "end"),
                          protocol, precreated_shm3_handles),
                    kwargs={'sequence_name' : "{}_{}_{}".format(
                        self._testMethodName, protocol, corrids[3]),
                            'tensor_shape' : (tensor_shapes[3],) }))

                for t in threads:
                    t.start()
                    if sleep_secs > 0:
                        time.sleep(sleep_secs)
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, (1,), expected_exec_cnt, 11)
            except InferenceServerException as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
Пример #7
0
    def _multi_sequence_identical_shape_impl(self, sleep_secs):
        self.clear_deferred_exceptions()
        dtype = np.float32

        precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions(
            ((2, 1), (4, 2), (8, 3)), dtype, 0)
        precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions(
            ((2, 11), (4, 12), (8, 13)), dtype, 1)
        precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions(
            ((2, 111), (4, 112), (8, 113)), dtype, 2)
        precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions(
            ((2, 1111), (4, 1112), (8, 1113)), dtype, 3)

        try:
            model_name = tu.get_dyna_sequence_model_name("plan", dtype)

            self.check_setup(model_name)
            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
            self.assertFalse(
                "TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

            corrids = [1001, 1002, 1003, 1004]
            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        corrids[0],
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 2, 1, None), (None, 4, 2, None), ("end", 8,
                                                                     3, None)),
                        self.get_expected_result(4 + corrids[0], corrids[0], 3,
                                                 "end"),
                        precreated_shm0_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, corrids[0]),
                        'using_dynamic_batcher':
                        True
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        corrids[1],
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 2, 11, None), (None, 4, 12, None),
                         ("end", 8, 13, None)),
                        self.get_expected_result(36 + corrids[1], corrids[1],
                                                 13, "end"),
                        precreated_shm1_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, corrids[1]),
                        'using_dynamic_batcher':
                        True
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        corrids[2],
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 2, 111, None), (None, 4, 112, None),
                         ("end", 8, 113, None)),
                        self.get_expected_result(336 + corrids[2], corrids[2],
                                                 113, "end"),
                        precreated_shm2_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, corrids[2]),
                        'using_dynamic_batcher':
                        True
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        corrids[3],
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 2, 1111, None), (None, 4, 1112, None),
                         ("end", 8, 1113, None)),
                        self.get_expected_result(3336 + corrids[3], corrids[3],
                                                 1113, "end"),
                        precreated_shm3_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, corrids[3]),
                        'using_dynamic_batcher':
                        True
                    }))

            for t in threads:
                t.start()
                if sleep_secs > 0:
                    time.sleep(sleep_secs)
            for t in threads:
                t.join()
            self.check_deferred_exception()
            self.check_status(model_name, {4: 3}, 3, 12)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        finally:
            if TEST_SYSTEM_SHARED_MEMORY:
                self.cleanup_shm_regions(precreated_shm0_handles)
                self.cleanup_shm_regions(precreated_shm1_handles)
                self.cleanup_shm_regions(precreated_shm2_handles)
                self.cleanup_shm_regions(precreated_shm3_handles)
    def test_skip_batch(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four sequences in parallel where
        # two sequences have shorter length so that padding must be
        # applied correctly for the longer sequences.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions((1, 3),
                                                                      dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 12, 13, 14), dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113, 1114), dtype, 3)
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)

                # Need scheduler to wait for queue to contain all
                # inferences for both sequences.
                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
                self.assertEqual(
                    int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER",
                              os.environ)
                self.assertEqual(
                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)

                corrids = [1001, 1002, 1003, 1004]
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), ("end", 3, None)),
                            self.get_expected_result(4 + corrids[0], corrids[0],
                                                     3, trial, "end"),
                            precreated_shm0_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), (None, 12, None),
                             (None, 13, None), ("end", 14, None)),
                            self.get_expected_result(50 + corrids[1],
                                                     corrids[1], 14, trial,
                                                     "end"),
                            precreated_shm1_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), ("end", 113, None)),
                            self.get_expected_result(224 + corrids[2],
                                                     corrids[2], 113, trial,
                                                     "end"),
                            precreated_shm2_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1111, None), (None, 1112, None),
                             (None, 1113, None), ("end", 1114, None)),
                            self.get_expected_result(4450 + corrids[3],
                                                     corrids[3], 1114, trial,
                                                     "end"),
                            precreated_shm3_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                threads[1].start()
                threads[3].start()
                time.sleep(1)
                threads[0].start()
                threads[2].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                if _model_instances == 1:
                    self.check_status(model_name, {4: 4}, 12, 12)
                elif _model_instances == 2:
                    self.check_status(model_name, {2: 8}, 12, 12)
                elif _model_instances == 4:
                    self.check_status(model_name, {1: 12}, 12, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
    def test_backlog_fill(self):
        # Test model instances together are configured with
        # total-max-batch-size 4. Send 4 sequences in parallel, two of
        # which are shorter. Send 2 additional sequences that should
        # go into backlog but should immediately fill into the short
        # sequences.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 2, 3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions((11, 13),
                                                                      dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3)
            precreated_shm4_handles = self.precreate_register_regions(
                (11111, ), dtype, 4)
            precreated_shm5_handles = self.precreate_register_regions(
                (22222, ), dtype, 5)
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse(
                    "TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                corrids = [1001, 1002, 1003, 1004, 1005, 1006]
                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), (None, 2, None), ("end", 3,
                                                                   None)),
                            self.get_expected_result(6 + corrids[0],
                                                     corrids[0], 3, trial,
                                                     "end"),
                            precreated_shm0_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), ("end", 13, None)),
                            self.get_expected_result(24 + corrids[1],
                                                     corrids[1], 13, trial,
                                                     "end"),
                            precreated_shm1_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), ("end", 113, None)),
                            self.get_expected_result(224 + corrids[2],
                                                     corrids[2], 113, trial,
                                                     "end"),
                            precreated_shm2_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1111, None), (None, 1112, 3000),
                             ("end", 1113, None)),
                            self.get_expected_result(3336 + corrids[3],
                                                     corrids[3], 1113, trial,
                                                     "end"),
                            precreated_shm3_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start,end", 11111, None), ),
                            self.get_expected_result(11111 + corrids[4],
                                                     corrids[4], 11111, trial,
                                                     "start,end"),
                            precreated_shm4_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[5],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start,end", 22222, None), ),
                            self.get_expected_result(22222 + corrids[5],
                                                     corrids[5], 22222, trial,
                                                     "start,end"),
                            precreated_shm5_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                threads[5].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 3}, 12, 12)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
                    self.cleanup_shm_regions(precreated_shm5_handles)
Пример #10
0
    def test_backlog_fill_no_end(self):
        # Send 4 sequences in parallel, two of which are shorter. Send
        # 2 additional sequences that should go into backlog but
        # should immediately fill into the short sequences. One of
        # those sequences is filled before it gets its end request.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 2, 3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions((11, 13),
                                                                      dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3)
            precreated_shm4_handles = self.precreate_register_regions(
                (11111, ), dtype, 4)
            precreated_shm5_handles = self.precreate_register_regions(
                (22222, 22223, 22224), dtype, 5)
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse(
                    "TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                if "string" in trial:
                    corrids = ['1001', '1002', '1003', '1004', '1005', '1006']
                else:
                    corrids = [1001, 1002, 1003, 1004, 1005, 1006]
                threads = []
                expected_result = self.get_expected_result(
                    6 + int(corrids[0]), corrids[0], 3, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    6, corrids[0], 3, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), (None, 2, None), ("end", 3,
                                                                   None)),
                            expected_result,
                            precreated_shm0_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                expected_result = self.get_expected_result(
                    24 + int(corrids[1]), corrids[1], 13, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    24, corrids[1], 13, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), ("end", 13, None)),
                            expected_result,
                            precreated_shm1_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                expected_result = self.get_expected_result(
                    224 + int(corrids[2]), corrids[2], 113, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    224, corrids[2], 113, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), ("end", 113, None)),
                            expected_result,
                            precreated_shm2_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                expected_result = self.get_expected_result(
                    3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    3336, corrids[3], 1113, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1111, None), (None, 1112, 3000),
                             ("end", 1113, None)),
                            expected_result,
                            precreated_shm3_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                expected_result = self.get_expected_result(
                    11111 +
                    int(corrids[4]), corrids[4], 11111, trial, "start,end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    11111, corrids[4], 11111, trial, "start,end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start,end", 11111, None), ),
                            expected_result,
                            precreated_shm4_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))
                expected_result = self.get_expected_result(
                    66669 + int(corrids[5]), corrids[5], 22224, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    66669, corrids[5], 22224, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[5],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (
                                ("start", 22222, None),
                                (None, 22223, None),
                                ("end", 22224, 2000),
                            ),
                            expected_result,
                            precreated_shm5_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                threads[0].start()
                threads[1].start()
                threads[2].start()
                threads[3].start()
                time.sleep(2)
                threads[4].start()
                threads[5].start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                # Expecting the requests of the same sequence to be in the same
                # slot, so the execution for thelast long sequence will be
                # padded to a batch.
                self.check_status(model_name, {4: 3, 1: 2}, 5, 14)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)
                    self.cleanup_shm_regions(precreated_shm5_handles)
Пример #11
0
    def test_backlog(self):
        # Send 5 equal-length sequences in parallel and make sure they
        # get completely batched into batch-size 4 inferences plus the
        # 5th should go in the backlog and then get handled once there
        # is a free slot.
        for trial in _trials:
            self.clear_deferred_exceptions()
            dtype = self.get_datatype(trial)
            precreated_shm0_handles = self.precreate_register_regions(
                (1, 2, 3), dtype, 0)
            precreated_shm1_handles = self.precreate_register_regions(
                (11, 12, 13), dtype, 1)
            precreated_shm2_handles = self.precreate_register_regions(
                (111, 112, 113), dtype, 2)
            precreated_shm3_handles = self.precreate_register_regions(
                (1111, 1112, 1113), dtype, 3)
            precreated_shm4_handles = self.precreate_register_regions(
                (11111, 11112, 11113), dtype, 4)
            try:
                model_name = tu.get_dyna_sequence_model_name(trial, dtype)

                self.check_setup(model_name)
                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
                self.assertFalse(
                    "TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)

                if "string" in trial:
                    corrids = ['1001', '1002', '1003', '1004', '1005']
                else:
                    corrids = [1001, 1002, 1003, 1004, 1005]

                expected_result = self.get_expected_result(
                    6 + int(corrids[0]), corrids[0], 3, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    6, corrids[0], 3, trial, "end")

                threads = []
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[0],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1, None), (None, 2, None), ("end", 3,
                                                                   None)),
                            expected_result,
                            precreated_shm0_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                expected_result = self.get_expected_result(
                    36 + int(corrids[1]), corrids[1], 13, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    36, corrids[1], 13, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[1],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11, None), (None, 12, None), ("end", 13,
                                                                     None)),
                            expected_result,
                            precreated_shm1_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                expected_result = self.get_expected_result(
                    336 + int(corrids[2]), corrids[2], 113, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    336, corrids[2], 113, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[2],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 111, None), (None, 112, None),
                             ("end", 113, None)),
                            expected_result,
                            precreated_shm2_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                expected_result = self.get_expected_result(
                    3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    3336, corrids[3], 1113, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[3],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 1111, None), (None, 1112, None),
                             ("end", 1113, None)),
                            expected_result,
                            precreated_shm3_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                expected_result = self.get_expected_result(
                    33336 + int(corrids[4]), corrids[4], 11113, trial, "end"
                ) if not IMPLICIT_STATE else self.get_expected_result_implicit(
                    33336, corrids[4], 11113, trial, "end")
                threads.append(
                    threading.Thread(
                        target=self.check_sequence_async,
                        args=(
                            trial,
                            model_name,
                            dtype,
                            corrids[4],
                            (None, None),
                            # (flag_str, value, pre_delay_ms)
                            (("start", 11111, None), (None, 11112, None),
                             ("end", 11113, None)),
                            expected_result,
                            precreated_shm4_handles),
                        kwargs={
                            'sequence_name': "{}".format(self._testMethodName)
                        }))

                for t in threads:
                    t.start()
                for t in threads:
                    t.join()
                self.check_deferred_exception()
                self.check_status(model_name, {4: 3, 1: 3}, 6, 15)
            except Exception as ex:
                self.assertTrue(False, "unexpected error {}".format(ex))
            finally:
                if _test_system_shared_memory or _test_cuda_shared_memory:
                    self.cleanup_shm_regions(precreated_shm0_handles)
                    self.cleanup_shm_regions(precreated_shm1_handles)
                    self.cleanup_shm_regions(precreated_shm2_handles)
                    self.cleanup_shm_regions(precreated_shm3_handles)
                    self.cleanup_shm_regions(precreated_shm4_handles)