def create_sequence_ensemble_modelconfig(base_model, models_dir, max_batch,
                                         model_version, shape, dtype):

    # No validation as long as the base model supports the type and shape

    model_dtype = np_to_model_dtype(dtype)

    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        # Use a different model name for the non-batching variant
        ensemble_model_name = "{}_{}{}".format(
            ensemble_type, base_model, "_nobatch" if max_batch == 0 else "")
        model_name = tu.get_sequence_model_name(ensemble_model_name, dtype)
        base_model_name = tu.get_sequence_model_name(
            "{}{}".format(base_model, "_nobatch" if max_batch == 0 else ""),
            dtype)

        ensemble_schedule = SequenceEnsembleSchedule(
            ensemble_type).get_schedule(base_model_name, shape, model_dtype)

        config_dir = models_dir + "/" + model_name
        config = create_general_modelconfig(model_name, "ensemble", max_batch,
                                            [dtype], [shape], [None], [dtype],
                                            [shape], [None], [None])
        config += ensemble_schedule

        try:
            os.makedirs(config_dir)
        except OSError as ex:
            pass  # ignore existing dir

        with open(config_dir + "/config.pbtxt", "w") as cfile:
            cfile.write(config)
Пример #2
0
    def run(self,
            client_metadata,
            len_mean=SEQUENCE_LENGTH_MEAN,
            len_stddev=SEQUENCE_LENGTH_STDEV):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = False

        # Create two variable length sequences with "start" and "end"
        # flags, where both sequences use the same correlation ID and are
        # sent back-to-back.
        seqlen = [
            max(1, int(self.rng_.normal(len_mean, len_stddev))),
            max(1, int(self.rng_.normal(len_mean, len_stddev)))
        ]
        print("{} {}: valid-valid seqlen[0] = {}, seqlen[1] = {}".format(
            self.name_, client_metadata[1], seqlen[0], seqlen[1]),
              file=self.out_stream_)

        values = [
            self.rng_.randint(0, 1024 * 1024, size=seqlen[0]).astype(dtype),
            self.rng_.randint(0, 1024 * 1024, size=seqlen[1]).astype(dtype)
        ]

        for p in [0, 1]:
            steps = []
            expected_result = 0

            for idx, _ in enumerate(range(seqlen[p])):
                flags = ""
                if idx == 0:
                    flags += ",start"
                if idx == (seqlen[p] - 1):
                    flags += ",end"

                val = values[p][idx]
                delay_ms = None
                expected_result += val
                expected_result = self.get_expected_result(
                    expected_result, val, trial, flags)

                # (flag_str, value, expected_result, delay_ms)
                steps.append((flags, val, expected_result, delay_ms), )

        return self.check_sequence_async(client_metadata,
                                         trial,
                                         model_name,
                                         dtype,
                                         steps,
                                         sequence_name=self.name_)
Пример #3
0
    def run(self,
            client_metadata,
            len_mean=SEQUENCE_LENGTH_MEAN,
            len_stddev=SEQUENCE_LENGTH_STDEV):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = True

        # Create a variable length sequence with "start" flag but that
        # never ends. The sequence should be aborted by the server and its
        # slot reused for another sequence.
        seqlen = max(1, int(self.rng_.normal(len_mean, len_stddev)))
        print("{} {}: no-end seqlen = {}".format(self.name_,
                                                 client_metadata[1], seqlen),
              file=self.out_stream_)

        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)

        steps = []
        expected_result = 0

        for idx, _ in enumerate(range(seqlen)):
            flags = ""
            if idx == 0:
                flags = "start"

            val = values[idx]
            delay_ms = None
            expected_result += val
            expected_result = self.get_expected_result(expected_result, val,
                                                       trial, flags)

            # (flag_str, value, expected_result, delay_ms)
            steps.append((flags, val, expected_result, delay_ms), )

        return self.check_sequence_async(client_metadata,
                                         trial,
                                         model_name,
                                         dtype,
                                         steps,
                                         sequence_name=self.name_)
def create_sequence_ensemble_modelfile(base_model, models_dir, max_batch,
                                       model_version, shape, dtype):

    # No actual model file in ensemble model

    # Use a different model name for the non-batching variant
    for ensemble_type in BASIC_ENSEMBLE_TYPES:
        ensemble_model_name = "{}_{}{}".format(
            ensemble_type, base_model, "_nobatch" if max_batch == 0 else "")
        model_name = tu.get_sequence_model_name(ensemble_model_name, dtype)
        model_version_dir = models_dir + "/" + model_name + "/" + str(
            model_version)

        try:
            os.makedirs(model_version_dir)
        except OSError as ex:
            pass  # ignore existing dir
Пример #5
0
    def run(self, client_metadata):
        trial = self.get_trial()
        dtype = self.get_datatype(trial)
        model_name = tu.get_sequence_model_name(trial, dtype)
        if not self.check_constraints(model_name, client_metadata[1]):
            return None

        # Track that the sequence id of the model is used for no-end sequence
        if not model_name in self.sequence_constraints_:
            self.sequence_constraints_[model_name] = {}
        self.sequence_constraints_[model_name][client_metadata[1]] = False

        # Create a sequence without a "start" flag. Sequence should get an
        # error from the server.
        seqlen = 1
        print("{} {}: no-start seqlen = {}".format(self.name_,
                                                   client_metadata[1], seqlen),
              file=self.out_stream_)

        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)

        steps = []

        for idx, _ in enumerate(range(seqlen)):
            flags = None
            val = values[idx]
            delay_ms = None

            # (flag_str, value, expected_result, delay_ms)
            steps.append((flags, val, None, delay_ms), )

        try:
            self.check_sequence_async(client_metadata, trial, model_name,
                                      dtype, steps)
            # Hit this point if sending no-start sequence to sequence id that
            # was used for no-end sequence and that means the constraints check
            # is inaccurate
            assert False, "expected inference failure from missing START flag"
        except Exception as ex:
            if "must specify the START flag" not in ex.message():
                raise
            # Expect no START error as success case
            return seqlen
    # Initialize the random seed. For reproducibility each thread
    # maintains its own RNG which is initialized based on this seed.
    randseed = 0
    if FLAGS.random_seed != None:
        randseed = FLAGS.random_seed
    else:
        randseed = int(time.time())
    np.random.seed(randseed)

    print("random seed = {}".format(randseed))
    print("concurrency = {}".format(FLAGS.concurrency))
    print("iterations = {}".format(FLAGS.iterations))

    trial = "custom"
    dtype = get_datatype(trial)
    model_name = tu.get_sequence_model_name(trial, dtype)

    threads = []
    for idx, thd in enumerate(range(FLAGS.concurrency)):
        thread_name = "thread_{}".format(idx)

        # Create the seed for the thread. Since these are created in
        # reproducible order off of the initial seed we will get
        # reproducible results when given the same seed.
        seed = np.random.randint(2**32)

        # Each thread is reserved a block of correlation IDs or size
        # CORRELATION_ID_BLOCK_SIZE
        correlation_id_base = 1 + (idx * CORRELATION_ID_BLOCK_SIZE)

        threads.append(
Пример #7
0
    def test_sequence_different_shape_values(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four equal-length sequences with
        # different shape values in 2 sequences and 2 sequences that
        # share the same shape value. Make sure that the 2 sequences
        # with same shapes batch together but other two sequences do
        # not.
        self.clear_deferred_exceptions()
        dtype = np.float32

        precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
            ((1, 1), (1, 2), (1, 3)), dtype, 0)
        precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
            ((32, 11), (32, 12), (32, 13)), dtype, 1)
        precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
            ((16, 111), (16, 112), (16, 113)), dtype, 2)
        precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
            ((1, 1111), (1, 1112), (1, 1113)), dtype, 3)
        try:
            model_name = tu.get_sequence_model_name("plan", dtype)
            self.check_setup(model_name)

            # Need scheduler to wait for queue to contain all
            # inferences for both sequences.
            self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
            self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]),
                             12)
            self.assertTrue(
                "TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
            self.assertEqual(
                int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)

            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1001,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 1, 1, None), (None, 1, 2, None), ("end", 1,
                                                                     3, None)),
                        self.get_expected_result(6, 3, "end"),
                        precreated_shm0_handles),
                    kwargs={
                        'sequence_name': "{}".format(self._testMethodName)
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1002,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 32, 11, None), (None, 32, 12, None),
                         ("end", 32, 13, None)),
                        self.get_expected_result(36, 13, "end"),
                        precreated_shm1_handles),
                    kwargs={
                        'sequence_name': "{}".format(self._testMethodName)
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1003,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 16, 111, None), (None, 16, 112, None),
                         ("end", 16, 113, None)),
                        self.get_expected_result(336, 113, "end"),
                        precreated_shm2_handles),
                    kwargs={
                        'sequence_name': "{}".format(self._testMethodName)
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1004,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 1, 1111, None), (None, 1, 1112, None),
                         ("end", 1, 1113, None)),
                        self.get_expected_result(3336, 1113, "end"),
                        precreated_shm3_handles),
                    kwargs={
                        'sequence_name': "{}".format(self._testMethodName)
                    }))

            for t in threads:
                t.start()
                time.sleep(1)
            for t in threads:
                t.join()

            self.check_deferred_exception()
            self.check_status(model_name, {4: 3, 3: 6}, 9, 12)
        except Exception as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        finally:
            if TEST_SYSTEM_SHARED_MEMORY:
                self.cleanup_shm_regions(precreated_shm0_handles)
                self.cleanup_shm_regions(precreated_shm1_handles)
                self.cleanup_shm_regions(precreated_shm2_handles)
                self.cleanup_shm_regions(precreated_shm3_handles)
Пример #8
0
    def __init__(self,
                 name,
                 rng,
                 sequence_trials,
                 identity_trials,
                 queue_latency_range_us=(10000, 100000),
                 sequence_id_range=None,
                 verbose=False,
                 out_stream=sys.stdout):
        super().__init__(name, [], verbose, out_stream)
        self.rng_ = rng
        self.sequence_id_range_ = sequence_id_range
        # List of tuples
        # (model_name, max_concurrency, batch_size, list(more PA options),
        #  real_data_file),
        self.options_ = []

        # Add no validation models
        self.options_.append(
            PerfAnalyzerScenario.ModelOption("resnet_v1_50_graphdef_def", 32,
                                             (1, 4, 1),
                                             queue_latency_range_us))
        for trial in sequence_trials:
            dtype = self.get_datatype(trial)
            # Skip string sequence model for now, it is hard for PA to generate
            # valid input
            if dtype == np.dtype(object):
                continue
            model_name = tu.get_sequence_model_name(trial, dtype)
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(model_name, 1, (1, 4, 1),
                                                 queue_latency_range_us))
        for trial in identity_trials:
            dtype = np.float32
            model_name = tu.get_zero_model_name(trial, 1, dtype)
            if "libtorch" in trial:
                input_shapes = [("INPUT__0", "16")]
            else:
                input_shapes = [("INPUT0", "16")]
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(model_name, 1, (1, 4, 1),
                                                 queue_latency_range_us,
                                                 input_shapes))

        # Add output validation version of the models
        # Skip resnet as the output data has variation which makes exact
        # matching hard
        for trial in sequence_trials:
            dtype = self.get_datatype(trial)
            model_name = tu.get_sequence_model_name(trial, dtype)
            data_file = os.path.join("validation_data",
                                     "{}.json".format(model_name))
            self.generate_sequence_data(trial, dtype, data_file)
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(model_name,
                                                 1, (1, 4, 1),
                                                 queue_latency_range_us,
                                                 input_file=data_file))
        for trial in identity_trials:
            dtype = np.float32
            model_name = tu.get_zero_model_name(trial, 1, dtype)
            data_file = os.path.join("validation_data",
                                     "{}.json".format(model_name))
            self.generate_identity_data(trial, dtype, data_file)
            self.options_.append(
                PerfAnalyzerScenario.ModelOption(model_name,
                                                 1, (1, 4, 1),
                                                 queue_latency_range_us,
                                                 input_file=data_file))
    def test_sequence_different_shape_values(self):
        # Test model instances together are configured with
        # total-batch-size 4. Send four equal-length sequences
        # with different shape values in parallel. As the
        # sequence batcher currently doesn't support ragged batch
        # the requests will still get batched together but the
        # result will be incorrect.
        self.clear_deferred_exceptions()
        dtype = np.float32

        precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
            ((1, 1), (1, 2), (1, 3)), dtype, 0)
        precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
            ((32, 11), (32, 12), (32, 13)), dtype, 1)
        precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
            ((16, 111), (16, 112), (16, 113)), dtype, 2)
        precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
            ((1, 1111), (1, 1112), (1, 1113)), dtype, 3)
        try:
            model_name = tu.get_sequence_model_name("plan", dtype)
            protocol = "streaming"

            self.check_setup(model_name)

            # Need scheduler to wait for queue to contain all
            # inferences for both sequences.
            self.assertTrue("TRTSERVER_DELAY_SCHEDULER" in os.environ)
            self.assertEqual(int(os.environ["TRTSERVER_DELAY_SCHEDULER"]), 12)
            self.assertTrue("TRTSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
            self.assertEqual(
                int(os.environ["TRTSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)

            threads = []
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1001,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 1, 1, None), (None, 1, 2, None), ("end", 1,
                                                                     3, None)),
                        self.get_expected_result(6, 3, "end"),
                        protocol,
                        precreated_shm0_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, protocol)
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1002,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 32, 11, None), (None, 32, 12, None),
                         ("end", 32, 13, None)),
                        self.get_expected_result(36, 13, "end"),
                        protocol,
                        precreated_shm1_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, protocol)
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1003,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 16, 111, None), (None, 16, 112, None),
                         ("end", 16, 113, None)),
                        self.get_expected_result(336, 113, "end"),
                        protocol,
                        precreated_shm2_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, protocol)
                    }))
            threads.append(
                threading.Thread(
                    target=self.check_sequence_shape_tensor_io,
                    args=(
                        model_name,
                        dtype,
                        1004,
                        (None, None),
                        # (flag_str, shape_value, value, pre_delay_ms)
                        (("start", 1, 1111, None), (None, 1, 1112, None),
                         ("end", 1, 1113, None)),
                        self.get_expected_result(3336, 1113, "end"),
                        protocol,
                        precreated_shm3_handles),
                    kwargs={
                        'sequence_name':
                        "{}_{}".format(self._testMethodName, protocol)
                    }))

            for t in threads:
                t.start()
            for t in threads:
                t.join()

            self.check_failure()
        except InferenceServerException as ex:
            self.assertTrue(False, "unexpected error {}".format(ex))
        finally:
            if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
                self.cleanup_shm_regions(precreated_shm0_handles)
                self.cleanup_shm_regions(precreated_shm1_handles)
                self.cleanup_shm_regions(precreated_shm2_handles)
                self.cleanup_shm_regions(precreated_shm3_handles)