def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to intialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args['model_config']) # Get OUTPUT0 configuration output0_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT0") # Get OUTPUT1 configuration output1_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT1") # Convert Triton types to numpy types self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) self.output1_dtype = pb_utils.triton_string_to_numpy( output1_config['data_type'])
def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) output0_config = pb_utils.get_output_config_by_name( model_config, "ANOMALY_SCORE0") output1_config = pb_utils.get_output_config_by_name( model_config, "ANOMALY0") self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) self.output1_dtype = pb_utils.triton_string_to_numpy( output1_config['data_type'])
def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to intialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args['model_config']) # Get yolov4_parser output configuration detected_objects_config = pb_utils.get_output_config_by_name( model_config, "DETECTED_OBJECTS_JSON") # Convert Triton types to numpy types self.detected_objects_dtype = pb_utils.triton_string_to_numpy( detected_objects_config['data_type']) logger.info(f"detected_objects_dtype={self.detected_objects_dtype}") dir_path = os.path.dirname(os.path.realpath(__file__)) logger.info(f"dir_path={dir_path}") self.class_names = [c.strip() for c in open(dir_path+'/coco.names').readlines()] logger.info(self.class_names)
def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) using_decoupled = pb_utils.using_decoupled_model_transaction_policy( model_config) if not using_decoupled: raise pb_utils.TritonModelException( """the model `{}` can generate any number of responses per request, enable decoupled transaction policy in model configuration to serve this model""".format(args['model_name'])) output0_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT0") output1_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT1") self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) self.output1_dtype = pb_utils.triton_string_to_numpy( output1_config['data_type'])
def initialize(self, args): workflow_path = os.path.join( args["model_repository"], str(args["model_version"]), "workflow" ) self.workflow = nvtabular.Workflow.load(workflow_path) self.model_config = json.loads(args["model_config"]) self.output_dtypes = dict() for name in self.workflow.column_group.input_column_names: conf = get_output_config_by_name(self.model_config, name) self.output_dtypes[name] = triton_string_to_numpy(conf["data_type"])
def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) output0_config = pb_utils.get_output_config_by_name( model_config, "PYTHON_OUTPUT_0") self.output_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) with torch.no_grad(): mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) self.mean = mean.cuda() self.std = std.cuda()
def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) output0_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT0") output1_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT1") self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) self.output1_dtype = pb_utils.triton_string_to_numpy( output1_config['data_type']) self.numpy_to_pytorch_dtype = { np.bool_: torch.bool, np.uint8: torch.uint8, np.int8: torch.int8, np.int16: torch.int16, np.int32: torch.int32, np.int64: torch.int64, np.float16: torch.float16, np.float32: torch.float32, np.float64: torch.float64, }
def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to initialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ self.model_config = model_config = json.loads(args['model_config']) self.max_batch_size = max(model_config["max_batch_size"], 1) # Get OUTPUT0 configuration output0_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT0") # Convert Triton types to numpy types self.out0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) # Get INPUT configuration batch_log_probs = pb_utils.get_input_config_by_name( model_config, "batch_log_probs") self.beam_size = batch_log_probs['dims'][-1] encoder_config = pb_utils.get_input_config_by_name( model_config, "encoder_out") self.data_type = pb_utils.triton_string_to_numpy( encoder_config['data_type']) self.feature_size = encoder_config['dims'][-1] self.lm = None self.init_ctc_rescore(self.model_config['parameters']) print('Initialized Rescoring!')
def initialize(self, args): # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args['model_config']) using_decoupled = pb_utils.using_decoupled_model_transaction_policy( model_config) if not using_decoupled: raise pb_utils.TritonModelException( """the model `{}` can generate any number of responses per request, enable decoupled transaction policy in model configuration to serve this model""".format(args['model_name'])) # Get OUT configuration out_config = pb_utils.get_output_config_by_name(model_config, "OUT") # Convert Triton types to numpy types self.out_dtype = pb_utils.triton_string_to_numpy( out_config['data_type']) self.inflight_thread_count = 0 self.inflight_thread_count_lck = threading.Lock()
def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to intialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args['model_config']) # get device if args["model_instance_kind"] == "GPU": self.device = 'cuda' else: self.device = 'cpu' # get parameter configurations self.model = WenetModel(model_config["parameters"], self.device) # Get OUTPUT0 configuration output0_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT0") # Convert Triton types to numpy types self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) # use to record every sequence state self.seq_states = {} print("Finish Init")
def initialize(self, args): # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args['model_config']) # Get OUTPUT0 configuration output0_config = pb_utils.get_output_config_by_name( model_config, "OUTPUT0") # Convert Triton types to numpy types self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) max_seq_len = 64 self.download_cddd_models() tokenizer_path = '/models/molbart/mol_opt_tokeniser.pickle' model_chk_path = '/models/molbart/az_molbart_pretrain.ckpt' if torch.cuda.is_available(): self.device = 'cuda' else: self.device = 'cpu' self.tokenizer = self.load_tokenizer(tokenizer_path) self.bart_model = self.load_model(model_chk_path, self.tokenizer, max_seq_len) self.bart_model.to('cuda')
def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) output0_config = pb_utils.get_output_config_by_name( model_config, "PYTHON_OUTPUT_0") self.output_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) self.feat_proc = features.FilterbankFeatures(spec_augment=None, cutout_augment=None, sample_rate=16000, window_size=0.02, window_stride=0.01, window="hann", normalize="per_feature", n_fft=512, preemph=0.97, n_filt=64, lowfreq=0, highfreq=None, log=True, dither=1e-5, pad_align=16, pad_to_max_duration=False, max_duration=float('inf'), frame_splicing=1)
def _set_output_dtype(self, name): conf = get_output_config_by_name(self.model_config, name) self.output_dtypes[name] = triton_string_to_numpy(conf["data_type"])
def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to initialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ self.model_config = model_config = json.loads(args['model_config']) self.max_batch_size = max(model_config["max_batch_size"], 1) if "GPU" in model_config["instance_group"][0]["kind"]: self.device = "cuda" else: self.device = "cpu" # Get OUTPUT0 configuration output0_config = pb_utils.get_output_config_by_name( model_config, "speech") # Convert Triton types to numpy types self.output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) if self.output0_dtype == np.float32: self.dtype = torch.float32 else: self.dtype = torch.float16 self.feature_size = output0_config['dims'][-1] self.decoding_window = output0_config['dims'][-2] # Get OUTPUT1 configuration output1_config = pb_utils.get_output_config_by_name( model_config, "speech_lengths") # Convert Triton types to numpy types self.output1_dtype = pb_utils.triton_string_to_numpy( output1_config['data_type']) feat_opt = self.parse_model_params(model_config["parameters"]) opts = kaldifeat.FbankOptions() opts.frame_opts.dither = 0 opts.mel_opts.num_bins = self.feature_size frame_length_ms = feat_opt["frame_length_ms"] frame_shift_ms = feat_opt["frame_shift_ms"] opts.frame_opts.frame_length_ms = frame_length_ms opts.frame_opts.frame_shift_ms = frame_shift_ms opts.frame_opts.samp_freq = feat_opt["sample_rate"] opts.device = torch.device(self.device) self.opts = opts self.feature_extractor = Fbank(self.opts) self.seq_feat = {} chunk_size_s = feat_opt["chunk_size_s"] sample_rate = feat_opt["sample_rate"] self.chunk_size = int(chunk_size_s * sample_rate) self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms first_chunk_size = int(self.chunk_size) cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts) while cur_frames < self.decoding_window: first_chunk_size += frame_shift_ms * sample_rate // 1000 cur_frames = _kaldifeat.num_frames(first_chunk_size, opts.frame_opts) # self.pad_silence = first_chunk_size - self.chunk_size self.first_chunk_size = first_chunk_size self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms) self.sample_rate = sample_rate self.min_seg = frame_length_ms * sample_rate // 1000 print("MIN SEG IS", self.min_seg)
def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to initialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ self.model_config = model_config = json.loads(args['model_config']) self.max_batch_size = max(model_config["max_batch_size"], 1) if "GPU" in model_config["instance_group"][0]["kind"]: self.device = "cuda" else: self.device = "cpu" # Get OUTPUT0 configuration output0_config = pb_utils.get_output_config_by_name( model_config, "speech") # Convert Triton types to numpy types output0_dtype = pb_utils.triton_string_to_numpy( output0_config['data_type']) if output0_dtype == np.float32: self.output0_dtype = torch.float32 else: self.output0_dtype = torch.float16 # Get OUTPUT1 configuration output1_config = pb_utils.get_output_config_by_name( model_config, "speech_lengths") # Convert Triton types to numpy types self.output1_dtype = pb_utils.triton_string_to_numpy( output1_config['data_type']) params = self.model_config['parameters'] opts = kaldifeat.FbankOptions() opts.frame_opts.dither = 0 for li in params.items(): key, value = li value = value["string_value"] if key == "num_mel_bins": opts.mel_opts.num_bins = int(value) elif key == "frame_shift_in_ms": opts.frame_opts.frame_shift_ms = float(value) elif key == "frame_length_in_ms": opts.frame_opts.frame_length_ms = float(value) elif key == "sample_rate": opts.frame_opts.samp_freq = int(value) opts.device = torch.device(self.device) self.opts = opts self.feature_extractor = Fbank(self.opts) self.feature_size = opts.mel_opts.num_bins