def add_noncategorical_slots(self, state_update, system_span_boundaries, user_span_boundaries): """Add features for non-categorical slots.""" noncategorical_slots = self.service_schema.non_categorical_slots self.num_noncategorical_slots = len(noncategorical_slots) for slot_idx, slot in enumerate(noncategorical_slots): values = state_update.get(slot, []) self.noncat_slot_status_mask[slot_idx] = 1 if not values: self.noncategorical_slot_status[slot_idx] = STATUS_OFF elif values[0] == STR_DONTCARE: self.noncategorical_slot_status[slot_idx] = STATUS_DONTCARE else: self.noncategorical_slot_status[slot_idx] = STATUS_ACTIVE # Add indices of the start and end tokens for the first encountered # value. Spans in user utterance are prioritized over the system # utterance. If a span is not found, the slot value is ignored. if slot in user_span_boundaries: start, end = user_span_boundaries[slot] elif slot in system_span_boundaries: start, end = system_span_boundaries[slot] else: # A span may not be found because the value was cropped out or because # the value was mentioned earlier in the dialogue. Since this model # only makes use of the last two utterances to predict state updates, # it will fail in such cases. logging.debug( f'"Slot values {str(values)} not found in user or system utterance in example with id - {self.example_id}.' ) continue self.noncategorical_slot_value_start[slot_idx] = start self.noncategorical_slot_value_end[slot_idx] = end
def __init__( self, manifest_filepath, featurizer, labels=None, max_duration=None, min_duration=None, trim=False, load_audio=True, ): self.collection = collections.ASRSpeechLabel( manifests_files=manifest_filepath.split(','), min_duration=min_duration, max_duration=max_duration, ) self.featurizer = featurizer self.trim = trim self.load_audio = load_audio self.labels = labels if labels else self.collection.uniq_labels self.num_commands = len(self.labels) self.label2id, self.id2label = {}, {} for label_id, label in enumerate(self.labels): self.label2id[label] = label_id self.id2label[label_id] = label for idx in range(len(self.labels[:5])): logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx]))
def perturb(self, data): impulse_record = self._rng.sample(self._manifest.data, 1)[0] impulse = AudioSegment.from_file(impulse_record['audio_filepath'], target_sr=data.sample_rate) logging.debug("impulse: %s", impulse_record['audio_filepath']) data._samples = signal.fftconvolve(data.samples, impulse.samples, "full")
def infer(self, feed_dict, output): for name in self.engine: if name in feed_dict: in_out = [feed_dict[name]] elif isinstance(output, tuple): in_out = [ output[i].detach().cpu().numpy() for i in range(len(output)) ] else: in_out = [output.detach().cpu().numpy()] binding = self.engine[name] # Only set shapes if required for i in range(len(in_out)): shape = in_out[i].shape if self.engine.is_shape_binding(binding) and is_shape_dynamic( self.context.get_shape(binding)): logging.debug( "Setting shape binding: {:} (index: {:}) to: {:}". format(name, binding, in_out[i])) self.context.set_shape_input(binding, in_out[i]) elif is_shape_dynamic(self.context.get_binding_shape(binding)): logging.debug( "Setting binding: {:} (index: {:}) to shape: {:}". format(name, binding, shape)) self.context.set_binding_shape(binding, shape) # Check if not self.context.all_binding_shapes_specified: logging.critical( "Some input shapes were not specified.\nNote: Inputs are: {:}". format(self.get_input_metadata())) if not self.context.all_shape_inputs_specified: logging.critical( "Some shape inputs were not specified.\nNote: Inputs are: {:}". format(self.get_input_metadata())) bindings_per_profile = self.engine.num_bindings // self.engine.num_optimization_profiles start_binding = self.context.active_optimization_profile * bindings_per_profile end_binding = start_binding + bindings_per_profile # Resize buffers so they are the appropriate size. for binding in range(start_binding, end_binding): shape = tuple(self.context.get_binding_shape(binding)) self.buffers.resize(self.engine[binding], shape) bindings = self.buffers.get_bindings() start = time.perf_counter() self.buffers.copy_inputs(feed_dict, self.stream) self.context.execute_async_v2(bindings=bindings, stream_handle=self.stream.handle) self.buffers.copy_outputs(self.stream) self.stream.synchronize() end = time.perf_counter() self.inference_time = end - start return self.buffers.get_outputs()
def __call__(self): class DummyContextManager(object): def __enter__(self): return None def __exit__(self, exc_type, exc_value, traceback): return None network_parser = self.network_loader() try: network, parser = network_parser assert isinstance(network, trt.INetworkDefinition) except (ValueError, AssertionError): network = network_parser parser = DummyContextManager() with trt.Builder(TRT_LOGGER) as builder, network, parser: if self.preprocess_network: logging.debug("Applying network preprocessing: {:}".format( self.preprocess_network)) self.preprocess_network(network) if self.layerwise: TensorRTRunnerV2.mark_layerwise(network) if logging.getEffectiveLevel() <= logging.DEBUG: TensorRTRunnerV2.log_network(network) config = builder.create_builder_config() profile = TensorRTRunnerV2.build_profile(builder, network, self.profile_shapes) config.add_optimization_profile(profile) config.max_workspace_size = int(self.max_workspace_size) if self.fp16_mode: config.flags = 1 << int(trt.BuilderFlag.FP16) if self.int8_mode: config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8) if not network.has_explicit_precision: if not self.calibrator: logging.critical( "Network does not have explicit precision. A calibrator must be provided in order to use int8 mode." ) self.calibrator.set_input_metadata( get_input_metadata_from_profile(profile, network)) config.int8_calibrator = self.calibrator logging.debug("Using builder configuration flags: {:}".format( config.flags)) logging.info( "Building engine: max workspace size={:} bytes, fp16={:}, int8={:}, layerwise={:}" .format(self.max_workspace_size, self.fp16_mode, self.int8_mode, self.layerwise)) engine = builder.build_engine(network, config) self.written_engine_path = write_timestamped( contents=lambda: engine.serialize(), dir=self.write_engine, name="tensorrt_runner_v2.engine") return engine
def check(self, model): try: onnx.checker.check_model(model) logging.debug("ONNX Checker Passed") except onnx.checker.ValidationError as err: logging.warning( "ONNX Checker exited with an error: {:}".format(err)) return model
def receive_on_queue(queue, timeout=None): logging.info("Waiting for data to become available on queue") obj = queue.get(block=True, timeout=timeout) if is_compressed(obj): logging.debug("Decompressing output") obj = decompress(obj) logging.info("Received {:} on queue".format(obj)) return obj
def forward(self, x, o=None): lst = [] if o is None: logging.debug("O is None") else: logging.debug("O is not None") for pw in range(self._dim): lst.append(x**pw) nx = t.cat(lst, dim=-1) return self.fc1(nx)
def write_predictions_to_file(predictions, input_json_files, output_dir, schemas, state_tracker, eval_debug, in_domain_services): """Write the predicted dialogues as json files. Args: predictions: An iterator containing model predictions. This is the output of the predict method in the estimator. input_json_files: A list of json paths containing the dialogues to run inference on. schemas: Schemas to all services in the dst dataset (train, dev and test splits). output_dir: The directory where output json files will be created. """ logging.info(f"Writing predictions to {output_dir} started.") # Index all predictions. all_predictions = {} for idx, prediction in enumerate(predictions): if not prediction["is_real_example"]: continue eval_dataset, dialog_id, turn_id, service_name = prediction[ 'example_id'].split('-') all_predictions[(dialog_id, turn_id, service_name)] = prediction logging.info( f'Predictions for {idx} examples in {eval_dataset} dataset are getting processed.' ) # Read each input file and write its predictions. for input_file_path in input_json_files: with open(input_file_path) as f: dialogs = json.load(f) logging.debug(f'{input_file_path} file is loaded') pred_dialogs = [] for d in dialogs: if state_tracker == 'baseline': pred_dialog = get_predicted_dialog_baseline( d, all_predictions, schemas) elif state_tracker == 'nemotracker': pred_dialog = get_predicted_dialog_nemotracker( d, all_predictions, schemas, eval_debug, in_domain_services) else: raise ValueError( f"tracker_mode {state_tracker} is not defined.") pred_dialogs.append(pred_dialog) f.close() input_file_name = os.path.basename(input_file_path) output_file_path = os.path.join(output_dir, input_file_name) with open(output_file_path, "w") as f: json.dump(pred_dialogs, f, indent=2, separators=(",", ": "), sort_keys=True) f.close()
def __init__(self, input_file, max_seq_length, tokenizer, num_samples=-1, shuffle=True): with open(input_file, "r") as f: sent_labels, all_sent_subtokens = [], [] sent_lengths = [] too_long_count = 0 lines = f.readlines()[1:] logging.info(f'{input_file}: {len(lines)}') if shuffle or num_samples > -1: random.seed(0) random.shuffle(lines) if num_samples > 0: lines = lines[:num_samples] for index, line in enumerate(lines): if index % 20000 == 0: logging.debug(f"Processing line {index}/{len(lines)}") sent_label = int(line.split()[-1]) sent_labels.append(sent_label) sent_words = line.strip().split()[:-1] sent_subtokens = ['[CLS]'] for word in sent_words: word_tokens = tokenizer.tokenize(word) sent_subtokens.extend(word_tokens) sent_subtokens.append('[SEP]') all_sent_subtokens.append(sent_subtokens) sent_lengths.append(len(sent_subtokens)) get_stats(sent_lengths) self.max_seq_length = min(max_seq_length, max(sent_lengths)) for i in range(len(all_sent_subtokens)): if len(all_sent_subtokens[i]) > self.max_seq_length: shorten_sent = all_sent_subtokens[i][-self.max_seq_length + 1:] all_sent_subtokens[i] = ['[CLS]'] + shorten_sent too_long_count += 1 logging.info(f'{too_long_count} out of {len(sent_lengths)} \ sentencess with more than {max_seq_length} subtokens.') self.convert_sequences_to_features(all_sent_subtokens, sent_labels, tokenizer, self.max_seq_length) self.tokenizer = tokenizer self.vocab_size = self.tokenizer.vocab_size
def resize(self, name, shape): found = False for buf_dict in [self.device_buffers, self.host_outputs]: if name in buf_dict: found = True buf_dict[name].resize(shape) if not found: logging.warning( "Buffer: {:} was not found, could not resize".format(name)) else: logging.debug("Resizing {:} buffer to {:}".format(name, shape))
def perturb(self, data): shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms) if abs(shift_ms) / 1000 > data.duration: # TODO: do something smarter than just ignore this condition return shift_samples = int(shift_ms * data.sample_rate // 1000) logging.debug("shift: %s", shift_samples) if shift_samples < 0: data._samples[-shift_samples:] = data._samples[:shift_samples] data._samples[:-shift_samples] = 0 elif shift_samples > 0: data._samples[:-shift_samples] = data._samples[shift_samples:] data._samples[-shift_samples:] = 0
def add_categorical_slots(self, state_update, agg_sys_state): """Add features for categorical slots.""" categorical_slots = self.service_schema.categorical_slots self.num_categorical_slots = len(categorical_slots) for slot_idx, slot in enumerate(categorical_slots): values = state_update.get(slot, []) # Add categorical slot value features. slot_values = self.service_schema.get_categorical_slot_values(slot) self.num_categorical_slot_values[slot_idx] = len(slot_values) # set slot mask to 1, i.e. the slot exists in the service self.cat_slot_status_mask[slot_idx] = 1 # set the number of active slot values for this slots in the service for slot_value_idx in range( len(self.service_schema._categorical_slot_values[slot])): self.cat_slot_values_mask[slot_idx][slot_value_idx] = 1 if not values: self.categorical_slot_status[slot_idx] = STATUS_OFF elif values[0] == STR_DONTCARE: self.categorical_slot_status[slot_idx] = STATUS_DONTCARE else: value_id = self.service_schema.get_categorical_slot_value_id( slot, values[0]) if value_id < 0: logging.warning( f"Categorical value not found: EXAMPLE_ID:{self.example_id}, EXAMPLE_ID_NUM:{self.example_id_num}" ) logging.warning( f"SYSTEM: {self.system_utterance} || USER: {self.user_utterance}" ) else: if values[0] not in agg_sys_state.get(slot, []): self.categorical_slot_status[slot_idx] = STATUS_ACTIVE self.categorical_slot_values[slot_idx] = value_id else: if self._add_carry_status: self.categorical_slot_status[ slot_idx] = STATUS_CARRY else: self.categorical_slot_status[ slot_idx] = STATUS_ACTIVE if self._add_carry_value: self.categorical_slot_values[ slot_idx] = self.service_schema.get_categorical_slot_value_id( slot, "#CARRYVALUE#") logging.debug( f"Found slot:{slot}, value:{values[0]}, slot_id:{self.categorical_slot_values[slot_idx]} in prev states: {agg_sys_state}" ) else: self.categorical_slot_values[slot_idx] = value_id
def __init__(self, schema_json_paths, add_carry_value, add_carry_status): """ TODO fix: schema_json_paths: list of .json path to schema files of a single str with path to the json file. """ # Load the schema from the json file. self._add_carry_value = add_carry_value self._add_carry_status = add_carry_status if isinstance(schema_json_paths, str): with open(schema_json_paths, "r") as f: all_schemas = json.load(f) f.close() else: # load multiple schemas from the list of the json files all_schemas = [] completed_services = [] for schema_json_path in schema_json_paths: with open(schema_json_path, "r") as f: schemas = json.load(f) f.close() logging.debug("Num of services in %s: %s", schema_json_path, len(schemas)) for service in schemas: if service['service_name'] not in completed_services: completed_services.append(service['service_name']) all_schemas.append(service) self._services = sorted(schema["service_name"] for schema in all_schemas) self._services_vocab = {v: k for k, v in enumerate(self._services)} self._services_id_to_vocab = { v: k for k, v in self._services_vocab.items() } service_schemas = {} for schema in all_schemas: service = schema["service_name"] service_schemas[service] = ServiceSchema( schema, service_id=self.get_service_id(service), add_carry_value=self._add_carry_value) self._service_schemas = service_schemas self._schemas = all_schemas self._slots_relation_list = {}
def _populate_schema_embeddings(self, schema_embeddings, hidden_states, mode): """ Populate all schema embeddings with BERT embeddings. """ completed_services = set() batch_size, seq_len, hidden_size = hidden_states[0].shape for idx in range(len(self)): service_id = self.features['service_id'][idx] service = self.schemas.get_service_from_id(service_id) if service not in completed_services: logging.debug(f"Generating embeddings for service {service}.") completed_services.add(service) tensor_name = self.features["embedding_tensor_name"][idx] emb_mat = schema_embeddings[service_id][tensor_name] if mode == 'random': # randomly initialize schema embeddings random_token = random.randint(0, seq_len - 1) embedding = [ round(float(x), 6) for x in hidden_states[0][idx, random_token, :].flat ] elif mode == 'last_layer_average': # Obtain the encoding of the [CLS] token. embedding = [ round(float(x), 6) for x in np.mean(hidden_states[0][idx, :], 0).flat ] elif mode == 'baseline': # Obtain the encoding of the [CLS] token. embedding = [ round(float(x), 6) for x in hidden_states[0][idx, 0, :].flat ] else: raise ValueError( f'Mode {mode} for generation schema embeddings is not supported' ) intent_or_slot_id = self.features['intent_or_slot_id'][idx] value_id = self.features['value_id'][idx] if tensor_name == "cat_slot_value_emb": emb_mat[intent_or_slot_id, value_id] = embedding else: emb_mat[intent_or_slot_id] = embedding
def __init__(self, model_loader=None, plugins=None, name=None): """ Creates a runner that manages a single TensorRT engine. Args: model_loader (Callable() -> trt.ICudaEngine): A callable that can supply a TensorRT engine. Optional Args: max_workspace_size (int): The maximum workspace size in bytes. plugins (List[str]): A list of paths to plugin libraries to load before inference. name (str): The human-readable name to use for this runner. """ set_trt_logging_level(logging.getEffectiveLevel()) def load_plugins(): import ctypes for plugin in plugins: path = os.path.abspath(plugin) logging.info("Loading plugin library: {:}".format(path)) ctypes.CDLL(path) # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization. if plugins: load_plugins() # Choose a unique name for this runner. super().__init__( default_value( name, "trt-v2-runner-{:}".format(TensorRTRunnerV2.total_runners))) TensorRTRunnerV2.total_runners += 1 logging.debug("Creating {:}".format(self.name)) self.model_loader = model_loader self.engine = self.model_loader() if not self.engine: logging.critical( "Invalid Engine. Please ensure the engine was built correctly." ) self.buffers = Buffers.from_engine(self.engine) self.stream = cuda.Stream() self.context = self.engine.create_execution_context()
def perturb(self, data): snr_db = self._rng.uniform(self._min_snr_db, self._max_snr_db) noise_record = self._rng.sample(self._manifest.data, 1)[0] noise = AudioSegment.from_file(noise_record['audio_filepath'], target_sr=data.sample_rate) noise_gain_db = min(data.rms_db - noise.rms_db - snr_db, self._max_gain_db) logging.debug("noise: %s %s %s", snr_db, noise_gain_db, noise_record['audio_filepath']) # calculate noise segment to use start_time = self._rng.uniform(0.0, noise.duration - data.duration) noise.subsegment(start_time=start_time, end_time=start_time + data.duration) # adjust gain for snr purposes and superimpose noise.gain_db(noise_gain_db) data._samples = data._samples + noise.samples
def mark_layerwise(network): # Layers within loops cannot be marked as network outputs. # TODO: FIXME: This assumes that the network is topologically sorted. LOOP_START_LAYERS = [trt.LayerType.TRIP_LIMIT, trt.LayerType.ITERATOR] LOOP_END_LAYERS = [trt.LayerType.LOOP_OUTPUT] num_layers_marked = 0 in_loop = False for layer in network: if layer.type in LOOP_START_LAYERS: in_loop = True elif layer.type in LOOP_END_LAYERS: in_loop = False for index in range(layer.num_outputs): out = layer.get_output(index) if not out.is_network_output and not in_loop: logging.debug("Marking {:} as an output".format(out.name)) network.mark_output(out) num_layers_marked += 1 logging.debug( "Running in layerwise mode. Marking {:} layers as outputs".format( num_layers_marked))
def get_input_metadata(self): inputs = OrderedDict() active_profile = self.context.active_optimization_profile bindings_per_profile = len( self.engine) // self.engine.num_optimization_profiles logging.debug( "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}" .format(self.engine.num_optimization_profiles, bindings_per_profile, active_profile)) start_binding = bindings_per_profile * active_profile end_binding = start_binding + bindings_per_profile logging.info("Start Binding: {:}, End Binding: {:}".format( start_binding, end_binding)) for binding in range(start_binding, end_binding): if self.engine.binding_is_input(binding): inputs[self.engine[binding]] = ( trt.nptype(self.engine.get_binding_dtype(binding)), list(self.engine.get_binding_shape(binding)), ) return inputs
def perturb(self, data): gain = self._rng.uniform(self._min_gain_dbfs, self._max_gain_dbfs) logging.debug("gain: %d", gain) data._samples = data._samples * (10.0**(gain / 20.0))
def perturb(self, data): speed_rate = self._rng.uniform(self._min_rate, self._max_rate) if speed_rate <= 0: raise ValueError("speed_rate should be greater than zero.") logging.debug("speed: %f", speed_rate) data._samples = librosa.effects.time_stretch(data._samples, speed_rate)
def load(self, iteration, input_metadata, input_example=None): """ Load the specified iteration from the cache if present, or generate using the data loader. Args: iteration (int): The iteration whose data to retrieve. input_metadata (OrderedDict[str, Tuple[np.dtype, Tuple[int]]]): Input Metadata, including shape and type information. The loader may attempt to match input_metadata when data in the cache does not exactly match a new set of input_metadata. """ if iteration not in self.cache: logging.debug( "Iteration {:} not found in cache, generating new buffers for all inputs" .format(iteration)) self.cache[iteration] = self.data_loader(iteration, input_metadata, input_example) if self.cache[iteration] is None: logging.critical( "Received no data from data_loader(iteration, input_metadata) for input_metadata: {:}" .format(input_metadata)) else: logging.info("Found iteration {:} in cache".format(iteration)) feed_dict = OrderedDict() for index, (name, (dtype, shape)) in enumerate(input_metadata.items()): cached_name = find_in_dict(name, self.cache[iteration], index) if cached_name is None: logging.warning( "Could not find input: {:} in cache, regenerating buffers". format(name)) self.cache[iteration] = self.data_loader( iteration, input_metadata, input_example) cached_name = name buffer = self.cache[iteration][cached_name] if dtype != buffer.dtype: logging.warning( "Cached buffer data type does not match data type for input: {:}. Note: Cached type: {:}, input type: {:}. Attempting to cast" .format(name, buffer.dtype, dtype)) buffer = buffer.astype(dtype) if not is_valid_shape_override(buffer.shape, shape): logging.warning( "Cached buffer shape does not match shape for input. Note: Cached shape: {:}, input shape: {:}." .format(buffer.shape, shape)) # Try to permute the shape to match try: perm = FormatManager.permutation( FormatManager.deduce_format(buffer.shape), FormatManager.deduce_format(shape)) new_shape = FormatManager.convert( tuple(buffer.shape), FormatManager.deduce_format(shape)) logging.warning( "Attempting to permute shape: {:} using permutation {:}. New shape: {:}" .format(buffer.shape, perm, new_shape)) buffer = np.transpose(buffer, perm) except NotImplementedError as err: # If the FormatManager does not recognize the format, skip permutation. logging.info("Skipping permutation due to {:}".format(err)) except KeyError as err: # If the FormatManager cannot generate the permutation for the format combination, skip permutation. logging.info("Skipping permutation due to {:}".format(err)) feed_dict[name] = buffer return feed_dict
def __test_export_route(self, module, out_name, mode, input_example=None): # select correct extension based on the output format ext = { DF.ONNX: ".onnx", DF.TRTONNX: ".trt.onnx", DF.PYTORCH: ".pt", DF.TORCHSCRIPT: ".ts" }.get(mode, ".onnx") out = Path(f"{out_name}{ext}") out_name = str(out) if out.exists(): os.remove(out) module.eval() outputs_fwd = (module.forward(*tuple(input_example.values())) if isinstance(input_example, OrderedDict) else (module.forward( *input_example) if isinstance(input_example, tuple) else module.forward(input_example) if input_example is not None else None)) deploy_input_example = (tuple(input_example.values()) if isinstance( input_example, OrderedDict) else input_example) self.nf.deployment_export( module=module, output=out_name, input_example=deploy_input_example, d_format=mode, output_example=outputs_fwd, ) tol = 5.0e-3 assert out.exists() == True if mode == DF.TRTONNX: data_loader = DefaultDataLoader() loader_cache = DataLoaderCache(data_loader) profile_shapes = OrderedDict() names = list(module.input_ports) + list(module.output_ports) names = list( filter( lambda x: x not in (module._disabled_deployment_input_ports | module. _disabled_deployment_output_ports), names, )) if isinstance(input_example, tuple): si = [ tuple(input_example[i].shape) for i in range(len(input_example)) ] elif isinstance(input_example, OrderedDict): si = [ tuple(input_example.values())[i].shape for i in range(len(input_example)) ] else: si = [tuple(input_example.shape)] if isinstance(outputs_fwd, tuple): fi = [ tuple(outputs_fwd[i].shape) for i in range(len(outputs_fwd)) ] else: fi = [tuple(outputs_fwd.shape)] si = si + fi i = 0 for name in names: profile_shapes[name] = [si[i]] * 3 i = i + 1 onnx_loader = OnnxFileLoader(out_name) network_loader = OnnxNetworkLoader(onnx_loader, explicit_precision=False) model_loader = BuildEngineLoader( network_loader, max_workspace_size=1 << 30, fp16_mode=False, int8_mode=False, profile_shapes=profile_shapes, write_engine=None, calibrator=None, layerwise=False, ) with TensorRTRunnerV2(model_loader=model_loader) as active_runner: input_metadata = active_runner.get_input_metadata() if input_metadata is None: logging.critical( "For {:}, get_input_metadata() returned None!".format( active_runner.name)) logging.debug("Runner Inputs: {:}".format(input_metadata)) feed_dict = loader_cache.load(iteration=0, input_metadata=input_metadata, input_example=input_example) inputs = dict() input_names = list(input_metadata.keys()) for i in range(len(input_names)): input_name = input_names[i] if input_name in module._disabled_deployment_input_ports: continue inputs[input_name] = ( input_example[input_name].cpu().numpy() if isinstance( input_example, OrderedDict) else (input_example[i].cpu().numpy() if isinstance( input_example, tuple) else input_example.cpu().numpy())) out_dict = active_runner.infer(feed_dict=feed_dict, output=outputs_fwd) for ov in out_dict.values(): outputs_scr = torch.from_numpy(ov).cuda() break outputs = [] outputs.append(copy.deepcopy(out_dict)) logging.debug("Received outputs: {:}".format([ "{:}: {:}".format(name, out.shape) for name, out in out_dict.items() ])) logging.info("Output Buffers: {:}".format(outputs)) inpex = [] for ie in feed_dict.values(): # loader_cache.cache[0].values(): if ie.dtype.type is np.int32: inpex.append(torch.from_numpy(ie).long().cuda()) else: inpex.append(torch.from_numpy(ie).cuda()) if len(inpex) == len(input_example): break inpex = tuple(inpex) outputs_fwd = module.forward(*inpex) elif mode == DF.ONNX: # Must recompute because *module* might be different now outputs_fwd = ( module.forward(*tuple(input_example.values())) if isinstance( input_example, OrderedDict) else (module.forward(*input_example) if isinstance( input_example, tuple) else module.forward(input_example))) sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC ort_session = ort.InferenceSession(out_name, sess_options, ['CUDAExecutionProvider']) print('Execution Providers: ', ort_session.get_providers()) inputs = dict() input_names = list(module.input_ports) ort_inputs = ort_session.get_inputs() for i in range(len(input_names)): input_name = input_names[i] if input_name in module._disabled_deployment_input_ports: continue inputs[input_name] = (input_example[input_name].cpu().numpy() if isinstance(input_example, OrderedDict) else (input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy())) outputs_scr = ort_session.run(None, inputs) outputs_scr = torch.from_numpy(outputs_scr[0]).cuda() elif mode == DF.TORCHSCRIPT: scr = torch.jit.load(out_name) if isinstance(module, nemo.backends.pytorch.tutorials.TaylorNet): input_example = torch.randn(4, 1).cuda() outputs_fwd = module.forward(input_example) outputs_scr = ( module.forward(*tuple(input_example.values())) if isinstance( input_example, OrderedDict) else (module.forward(*input_example) if isinstance( input_example, tuple) else module.forward(input_example))) elif mode == DF.PYTORCH: module.load_state_dict(torch.load(out_name)) module.eval() outputs_scr = ( module.forward(*tuple(input_example.values())) if isinstance( input_example, OrderedDict) else (module.forward(*input_example) if isinstance( input_example, tuple) else module.forward(input_example))) outputs_scr = (outputs_scr[0] if isinstance(outputs_scr, tuple) or isinstance(outputs_scr, list) else outputs_scr) outputs_fwd = (outputs_fwd[0] if isinstance(outputs_fwd, tuple) or isinstance(outputs_fwd, list) else outputs_fwd) assert (outputs_scr - outputs_fwd).norm(p=2) < tol if out.exists(): os.remove(out)
def add_utterance_features(self, system_tokens, system_inv_alignments, user_tokens, user_inv_alignments, system_utterance, user_utterance): """Add utterance related features input to bert. Note: this method modifies the system tokens and user_tokens in place to make their total length <= the maximum input length for BERT model. Args: system_tokens: a list of strings which represents system utterance. system_inv_alignments: a list of tuples which denotes the start and end charater of the tpken that a bert token originates from in the original system utterance. user_tokens: a list of strings which represents user utterance. user_inv_alignments: a list of tuples which denotes the start and end charater of the token that a bert token originates from in the original user utterance. """ # Make user-system utterance input (in BERT format) # Input sequence length for utterance BERT encoder max_utt_len = self._max_seq_length # Modify lengths of sys & usr utterance so that length of total utt # (including cls_token, setp_token, sep_token) is no more than max_utt_len is_too_long = truncate_seq_pair(system_tokens, user_tokens, max_utt_len - 3) if is_too_long: logging.debug( f'Utterance sequence truncated in example id - {self.example_id}.' ) # Construct the tokens, segment mask and valid token mask which will be # input to BERT, using the tokens for system utterance (sequence A) and # user utterance (sequence B). utt_subword = [] utt_seg = [] utt_mask = [] start_char_idx = [] end_char_idx = [] utt_subword.append(self._tokenizer.cls_token) utt_seg.append(0) utt_mask.append(1) start_char_idx.append(0) end_char_idx.append(0) for subword_idx, subword in enumerate(system_tokens): utt_subword.append(subword) utt_seg.append(0) utt_mask.append(1) st, en = system_inv_alignments[subword_idx] start_char_idx.append(-(st + 1)) end_char_idx.append(-(en + 1)) utt_subword.append(self._tokenizer.sep_token) utt_seg.append(0) utt_mask.append(1) start_char_idx.append(0) end_char_idx.append(0) for subword_idx, subword in enumerate(user_tokens): utt_subword.append(subword) utt_seg.append(1) utt_mask.append(1) st, en = user_inv_alignments[subword_idx] start_char_idx.append(st + 1) end_char_idx.append(en + 1) utt_subword.append(self._tokenizer.sep_token) utt_seg.append(1) utt_mask.append(1) start_char_idx.append(0) end_char_idx.append(0) utterance_ids = self._tokenizer.tokens_to_ids(utt_subword) # Zero-pad up to the BERT input sequence length. while len(utterance_ids) < max_utt_len: utterance_ids.append(0) utt_seg.append(0) utt_mask.append(0) start_char_idx.append(0) end_char_idx.append(0) self.utterance_ids = utterance_ids self.utterance_segment = utt_seg self.utterance_mask = utt_mask self.start_char_idx = start_char_idx self.end_char_idx = end_char_idx self.user_utterances = user_utterance self.system_utterance = system_utterance
def __call__(self, index, input_metadata, input_example=None): logging.debug("Updating seed to: {:}".format(self.seed + index)) rng = np.random.RandomState(self.seed + index) buffers = OrderedDict() i = 0 for name, (dtype, shape) in input_metadata.items(): if input_example is not None and (not isinstance( input_example, tuple) or i < len(input_example)): if isinstance(input_example, tuple): static_shape = input_example[i].shape elif isinstance(input_example, OrderedDict): static_shape = tuple(input_example.values())[i].shape else: static_shape = [tuple(input_example.shape)] elif is_shape_dynamic(shape): if name in self.default_shapes: static_shape = self.default_shapes[name] else: static_shape = [ self.default_shape_value if is_dimension_dynamic(elem) else elem for elem in shape ] if static_shape != shape: if not is_valid_shape_override(static_shape, shape): logging.critical( "Cannot override original shape: {:}, for input: {:} to {:}" .format(shape, name, static_shape)) logging.warning( "Input: {:}: Adjusted dynamic shape: {:} to: {:}". format(name, shape, static_shape), mode=logging_mode.ONCE, ) else: if name in self.default_shapes: logging.warning( "Will not override static shape: {:}, for input: {:}". format(shape, name), mode=logging_mode.ONCE, ) static_shape = shape if input_example is not None and (not isinstance( input_example, tuple) or i < len(input_example)): if isinstance(input_example, OrderedDict): buffers[name] = list(input_example.values())[i].cpu() else: buffers[name] = input_example[i].cpu() if isinstance( input_example, tuple) else input_example.cpu() elif np.issubdtype(dtype, np.integer): buffers[name] = rng.randint(low=self.int_min, high=self.int_max, size=static_shape, dtype=dtype) elif np.issubdtype(dtype, np.bool_): buffers[name] = rng.randint(low=0, high=2, size=static_shape).astype(dtype) else: buffers[name] = (rng.random_sample(size=static_shape) * (self.float_max - self.float_min) + self.float_min).astype(dtype) buffers[name] = np.array( buffers[name] ) # To handle scalars. The above functions return a float if shape is (). # If the shape is 1D, and has a length equal to the rank of the provided default shape, it is # likely to be a TRT shape tensor, and so should be overriden such that it's value (not shape) is the default shape. is_shape_tensor = ((not is_shape_dynamic(shape)) and (name in self.default_shapes) and (len(shape) == 1) and (shape[0] == len(self.default_shapes[name]))) if is_shape_tensor: buffers[name] = np.array(self.default_shapes[name], dtype=dtype) logging.warning( "Assuming {:} is a shape tensor. Setting to: {:}".format( name, buffers[name]), mode=logging_mode.ONCE, ) i = i + 1 return buffers
def get_predicted_dialog_nemotracker(dialog, all_predictions, schemas, eval_debug, in_domain_services): """This is NeMo Tracker which would be enabled by passing "--state_tracker=nemotracker". It improves the performance significantly by employing carry-over mechanism for in-service and cross-service. * **In-service carry-over mechanism**: There are cases that the value for some slots are not mentioned in the last user utterrance, but in the previous system utterances or actions.\ Therefore, whenever the status of a non-categorical slot is active but no value can be found in the user utterance, we search the list of slots and their values mentioned in the previous system actions to find a value for this slot. The most recent value is used as the value for the slot. It is called in-domain carry-over as it happens inside a service. * **Cross-service carry-over mechanism**: In multi-domain dialogues, switching between two services can happen in the dialogue. In such cases, there can be some values to get transfered to the new service automatically. For instance when user is reserving flight tickets for two persons, it can be assumed that number of people for hotel reservation should also be two. To handle such cases, when we process the dialogues, we also record the list of these carry-over between two services from the training data. A candidate list for each (service, slot) is produced which show the list possible carry-over for that slot. These lists are stored in a file along with the processed dialogues and would be read and used in the state tracker to carry values when switches happens from one service to another. Whenever we find a switch and have an active non-categorical slot without any value, we would try to use that candidate list to retrieve a value for that slot from other slots in other services in previous turns. The latest value is used if multiple values are found. Args: dialog: A json object containing dialogue whose labels are to be updated. all_predictions: A dict mapping prediction name to the predicted value. schemas: A Schema object wrapping all the schemas for the dataset. eval_debug: specifies if it is running in DEBUG mode, so to generate the error analysis outputs in_domain_services: list of the seen services Returns: A json object containing the dialogue with labels predicted by the model. """ dialog_id = dialog["dialogue_id"] # The slot values tracked for each service. all_slot_values = defaultdict(OrderedDict) sys_slots_agg = defaultdict(OrderedDict) sys_slots_last = defaultdict(OrderedDict) sys_rets = OrderedDict() true_state_prev = OrderedDict() true_state = OrderedDict() frame_service_prev = "" slots_relation_list = schemas._slots_relation_list for turn_idx, turn in enumerate(dialog["turns"]): if turn["speaker"] == "SYSTEM": sys_slots_last = defaultdict(OrderedDict) for frame in turn["frames"]: if frame["service"] not in sys_slots_agg: sys_slots_agg[frame["service"]] = OrderedDict() if frame["service"] not in sys_slots_last: sys_slots_last[frame["service"]] = OrderedDict() for action in frame["actions"]: if action["slot"] and len(action["values"]) > 0: sys_slots_agg[frame["service"]][ action["slot"]] = action["values"][0] sys_slots_last[frame["service"]][ action["slot"]] = action["values"][0] elif turn["speaker"] == "USER": user_utterance = turn["utterance"] system_utterance = dialog["turns"][ turn_idx - 1]["utterance"] if turn_idx else "" turn_id = "{:02d}".format(turn_idx) for frame in turn["frames"]: cat_slot_status_acc = 0 cat_slot_status_num = 0 noncat_slot_status_num = 0 noncat_slot_status_acc = 0 cat_slot_value_acc = 0 cat_slot_value_num = 0 noncat_slot_value_acc = 0 noncat_slot_value_num = 0 predictions = all_predictions[(dialog_id, turn_id, frame["service"])] slot_values = all_slot_values[frame["service"]] service_schema = schemas.get_service_schema(frame["service"]) predictions["cat_slot_status_p"] = predictions[ "cat_slot_status_p"].cpu().numpy() predictions["cat_slot_status"] = predictions[ "cat_slot_status"].cpu().numpy() predictions["cat_slot_value"] = predictions[ "cat_slot_value"].cpu().numpy() predictions["cat_slot_value_p"] = predictions[ "cat_slot_value_p"].cpu().numpy() predictions["noncat_slot_status_p"] = predictions[ "noncat_slot_status_p"].cpu().numpy() predictions["noncat_slot_status"] = predictions[ "noncat_slot_status"].cpu().numpy() predictions["noncat_slot_p"] = predictions[ "noncat_slot_p"].cpu().numpy() predictions["noncat_alignment_start"] = predictions[ "noncat_alignment_start"].cpu().numpy() predictions["noncat_alignment_end"] = predictions[ "noncat_alignment_end"].cpu().numpy() predictions["cat_slot_status_GT"] = predictions[ "cat_slot_status_GT"].cpu().numpy() predictions["noncat_slot_status_GT"] = predictions[ "noncat_slot_status_GT"].cpu().numpy() # Remove the slot spans and state if present. true_state_prev = [] if len( true_state) == 0 else true_state["slot_values"] true_slots = frame.pop("slots", None) true_state = frame.pop("state", None) # The baseline model doesn't predict slot spans. Only state predictions # are added. state = OrderedDict() # Add prediction for active intent. Offset is subtracted to account for # NONE intent. active_intent_id = predictions["intent_status"] state["active_intent"] = ( service_schema.get_intent_from_id(active_intent_id - 1) if active_intent_id else "NONE") # Add prediction for requested slots. requested_slots = [] for slot_idx, slot in enumerate(service_schema.slots): if predictions["req_slot_status"][ slot_idx] > REQ_SLOT_THRESHOLD: requested_slots.append(slot) state["requested_slots"] = requested_slots # Add prediction for user goal (slot values). # Categorical slots. categorical_slots_dict = OrderedDict() non_categorical_slots_dict = OrderedDict() for slot_idx, slot in enumerate( service_schema.categorical_slots): cat_slot_status_num += 1 slot_status = predictions["cat_slot_status"][slot_idx] extracted_value = None if slot_status == STATUS_DONTCARE: extracted_value = STR_DONTCARE elif slot_status == STATUS_ACTIVE: if (service_schema.get_categorical_slot_values(slot)[ predictions["cat_slot_value"][slot_idx]] != "#CARRYVALUE#"): value_idx = predictions["cat_slot_value"][slot_idx] extracted_value = service_schema.get_categorical_slot_values( slot)[value_idx] else: carryover_value = get_carryover_value( slot, frame, all_slot_values, slots_relation_list, frame_service_prev, sys_slots_last, sys_slots_agg, sys_rets, ) if carryover_value is not None: extracted_value = carryover_value print( f'slot:{slot} with value:{carryover_value} extratced with CARRYVALUE' ) elif slot_status == STATUS_OFF: extracted_value = None if extracted_value is not None: slot_values[slot] = extracted_value # debugging info processing if predictions["cat_slot_status_GT"][ slot_idx] != predictions["cat_slot_status"][ slot_idx] or ( predictions["cat_slot_status_GT"][slot_idx] == predictions["cat_slot_status"][slot_idx] and predictions["cat_slot_status_GT"][slot_idx] != STATUS_OFF and extracted_value not in true_state['slot_values'][slot]): categorical_slots_dict[slot] = ( predictions["cat_slot_status_GT"][slot_idx], predictions["cat_slot_status"][slot_idx], predictions["cat_slot_status_p"][slot_idx], service_schema.get_categorical_slot_values(slot)[ predictions["cat_slot_value"][slot_idx]], service_schema.get_categorical_slot_values(slot)[ predictions["cat_slot_value_GT"][slot_idx]], extracted_value, predictions["cat_slot_value_p"][slot_idx], ) if predictions["cat_slot_status_GT"][ slot_idx] == predictions["cat_slot_status"][ slot_idx]: cat_slot_status_acc += 1 if predictions["cat_slot_status_GT"][ slot_idx] != STATUS_OFF: cat_slot_value_num += 1 if extracted_value in true_state['slot_values'][slot]: cat_slot_value_acc += 1 # debugging info processing ended for slot_idx, slot in enumerate( service_schema.non_categorical_slots): noncat_slot_status_num += 1 tok_start_idx = predictions["noncat_slot_start"][slot_idx] tok_end_idx = predictions["noncat_slot_end"][slot_idx] ch_start_idx = predictions["noncat_alignment_start"][ tok_start_idx] ch_end_idx = predictions["noncat_alignment_end"][ tok_end_idx] extracted_value = None if ch_start_idx > 0 and ch_end_idx > 0: # Add span from the user utterance. extracted_value = user_utterance[ch_start_idx - 1:ch_end_idx] else: extracted_value = get_carryover_value( slot, frame, all_slot_values, slots_relation_list, frame_service_prev, sys_slots_last, sys_slots_agg, sys_rets, ) slot_status = predictions["noncat_slot_status"][slot_idx] if slot_status == STATUS_DONTCARE: slot_values[slot] = STR_DONTCARE elif slot_status == STATUS_ACTIVE: if extracted_value is not None: slot_values[slot] = extracted_value # debugging info processing if predictions["noncat_slot_status_GT"][ slot_idx] != predictions["noncat_slot_status"][ slot_idx] or ( predictions["noncat_slot_status_GT"] [slot_idx] == predictions["noncat_slot_status"][slot_idx] and predictions["noncat_slot_status_GT"] [slot_idx] != STATUS_OFF and extracted_value not in true_state['slot_values'][slot]): non_categorical_slots_dict[slot] = ( predictions["noncat_slot_status_GT"][slot_idx], predictions["noncat_slot_status"][slot_idx], predictions["noncat_slot_status_p"][slot_idx], (ch_start_idx, ch_end_idx), user_utterance[ch_start_idx - 1:ch_end_idx] if (ch_start_idx > 0 and ch_end_idx > 0) else system_utterance[-ch_start_idx - 1:-ch_end_idx], extracted_value, predictions["noncat_slot_p"][slot_idx], ) if predictions["noncat_slot_status_GT"][ slot_idx] != STATUS_OFF: noncat_slot_value_num += 1 if extracted_value is not None and extracted_value in true_state[ 'slot_values'][slot]: noncat_slot_value_acc += 1 if predictions["noncat_slot_status_GT"][ slot_idx] == predictions["noncat_slot_status"][ slot_idx]: noncat_slot_status_acc += 1 # debugging info processing ended carry_over_slots( frame, all_slot_values, slots_relation_list, frame_service_prev, sys_slots_last, sys_slots_agg, slot_values, ) # in debug mode, the following outputs would get generated which can be used for performing error analysis. # It prints out the information about the frames in the evaluation set which contain errors and those error in the predictaed state are not originated from previous frames or turns. # Therefore, these frames would be the origin of errors in the evaluation dialogues. # It just prints out the frames for seen services as our model is designed mostly for seen services and it does not work great on unseen ones. if eval_debug and frame["service"] in in_domain_services: equal_state = True for s, v in true_state['slot_values'].items(): if s not in slot_values or slot_values[s] not in v: equal_state = False break for s, v in slot_values.items(): if s not in true_state[ 'slot_values'] or v not in true_state[ 'slot_values'][s]: equal_state = False break if not equal_state: cat_slot_status_acc = ("NAN" if cat_slot_status_num == 0 else cat_slot_status_acc / cat_slot_status_num) noncat_slot_status_acc = ("NAN" if noncat_slot_status_num == 0 else noncat_slot_status_acc / noncat_slot_status_num) cat_slot_value_acc = ("NAN" if cat_slot_value_num == 0 else cat_slot_value_acc / cat_slot_value_num) noncat_slot_value_acc = ("NAN" if noncat_slot_value_num == 0 else noncat_slot_value_acc / noncat_slot_value_num) found_err = False if cat_slot_status_acc != "NAN" and cat_slot_status_acc < 1.0: found_err = True if noncat_slot_status_acc != "NAN" and noncat_slot_status_acc < 1.0: found_err = True if cat_slot_value_acc != "NAN" and cat_slot_value_acc != 1.0: found_err = True if noncat_slot_value_acc != "NAN" and noncat_slot_value_acc != 1.0: found_err = True if found_err: logging.debug( "-----------------------------------New Frame------------------------------" ) logging.debug( f'DIALOGUE ID : {dialog_id}, TURN ID: {turn_id}, SERVICE: {frame["service"]}' ) logging.debug(f'SYS : {system_utterance}') logging.debug(f'USER: {user_utterance}') logging.debug("\n") logging.debug( f"PRED CAT: {categorical_slots_dict}") logging.debug( f"PRED NON-CAT: {non_categorical_slots_dict}") logging.debug("\n") logging.debug( f"STATE - LABEL: {sorted(true_state['slot_values'].items())}" ) logging.debug( f"STATE - PRED : {sorted(slot_values.items())}" ) logging.debug(f"STATE - PREV: {true_state_prev}") logging.debug("\n") logging.debug(f"SLOTS - LABEL: {true_slots}") logging.debug(f"SYS SLOT AGG: {sys_slots_agg}") logging.debug(f"SYS RETS: {sys_rets}") logging.debug("\n") logging.debug( f"CAT STATUS ACC: {cat_slot_status_acc}") logging.debug( f"NONCAT STATUS ACC: {noncat_slot_status_acc}") logging.debug( f"CAT VALUES ACC: {cat_slot_value_acc} ,NONCAT VALUES ACC: {noncat_slot_value_acc}" ) found_err = False if cat_slot_status_acc != "NAN" and cat_slot_status_acc < 1.0: logging.debug("CAT_STATUS_ERR") found_err = True if noncat_slot_status_acc != "NAN" and noncat_slot_status_acc < 1.0: logging.debug("NONCAT_STATUS_ERR") found_err = True if (noncat_slot_status_acc != "NAN" and noncat_slot_status_acc < 1.0 and cat_slot_status_acc != "NAN" and cat_slot_status_acc < 1.0): logging.debug("BOTH_STATUS_ERR") found_err = True if cat_slot_value_acc != "NAN" and cat_slot_value_acc < 1.0: logging.debug("CAT_VALUE_ERR") found_err = True if noncat_slot_value_acc != "NAN" and noncat_slot_value_acc < 1.0: logging.debug("NONCAT_VALUE_ERR") found_err = True if (noncat_slot_value_acc != "NAN" and noncat_slot_value_acc != 1.0 and cat_slot_value_acc != "NAN" and cat_slot_value_acc != 1.0): logging.debug("BOTH_VALUE_ERR") found_err = True if not found_err: logging.debug("CLEAN_FRAME") # Create a new dict to avoid overwriting the state in previous turns # because of use of same objects. state["slot_values"] = {s: [v] for s, v in slot_values.items()} frame["state"] = state frame_service_prev = frame["service"] return dialog
def get_features( queries, max_seq_length, tokenizer, label_ids=None, pad_label='O', raw_labels=None, ignore_extra_tokens=False, ignore_start_end=False, ): """ Args: queries (list of str): text sequences max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP] tokenizer (Tokenizer): such as NemoBertTokenizer pad_label (str): pad value use for labels. by default, it's the neutral label. raw_labels (list of str): list of labels for every word in a sequence label_ids (dict): dict to map labels to label ids. Starts with pad_label->0 and then increases in alphabetical order. Required for training and evaluation, not needed for inference. ignore_extra_tokens (bool): whether to ignore extra tokens in the loss_mask, ignore_start_end (bool): whether to ignore bos and eos tokens in the loss_mask """ all_subtokens = [] all_loss_mask = [] all_subtokens_mask = [] all_segment_ids = [] all_input_ids = [] all_input_mask = [] sent_lengths = [] all_labels = [] with_label = False if raw_labels is not None: with_label = True for i, query in enumerate(queries): words = query.strip().split() # add bos token subtokens = ['[CLS]'] loss_mask = [1 - ignore_start_end] subtokens_mask = [0] if with_label: pad_id = label_ids[pad_label] labels = [pad_id] query_labels = [label_ids[lab] for lab in raw_labels[i]] for j, word in enumerate(words): word_tokens = tokenizer.text_to_tokens(word) subtokens.extend(word_tokens) loss_mask.append(1) loss_mask.extend([int(not ignore_extra_tokens)] * (len(word_tokens) - 1)) subtokens_mask.append(1) subtokens_mask.extend([0] * (len(word_tokens) - 1)) if with_label: labels.extend([query_labels[j]] * len(word_tokens)) # add eos token subtokens.append('[SEP]') loss_mask.append(1 - ignore_start_end) subtokens_mask.append(0) sent_lengths.append(len(subtokens)) all_subtokens.append(subtokens) all_loss_mask.append(loss_mask) all_subtokens_mask.append(subtokens_mask) all_input_mask.append([1] * len(subtokens)) if with_label: labels.append(pad_id) all_labels.append(labels) max_seq_length = min(max_seq_length, max(sent_lengths)) logging.info(f'Max length: {max_seq_length}') datasets_utils.get_stats(sent_lengths) too_long_count = 0 for i, subtokens in enumerate(all_subtokens): if len(subtokens) > max_seq_length: subtokens = ['[CLS]'] + subtokens[-max_seq_length + 1 :] all_input_mask[i] = [1] + all_input_mask[i][-max_seq_length + 1 :] all_loss_mask[i] = [int(not ignore_start_end)] + all_loss_mask[i][-max_seq_length + 1 :] all_subtokens_mask[i] = [0] + all_subtokens_mask[i][-max_seq_length + 1 :] if with_label: all_labels[i] = [pad_id] + all_labels[i][-max_seq_length + 1 :] too_long_count += 1 all_input_ids.append([tokenizer.tokens_to_ids(t) for t in subtokens]) if len(subtokens) < max_seq_length: extra = max_seq_length - len(subtokens) all_input_ids[i] = all_input_ids[i] + [0] * extra all_loss_mask[i] = all_loss_mask[i] + [0] * extra all_subtokens_mask[i] = all_subtokens_mask[i] + [0] * extra all_input_mask[i] = all_input_mask[i] + [0] * extra if with_label: all_labels[i] = all_labels[i] + [pad_id] * extra all_segment_ids.append([0] * max_seq_length) logging.warning(f'{too_long_count} are longer than {max_seq_length}') for i in range(min(len(all_input_ids), 5)): logging.debug("*** Example ***") logging.debug("i: %s", i) logging.debug("subtokens: %s", " ".join(list(map(str, all_subtokens[i])))) logging.debug("loss_mask: %s", " ".join(list(map(str, all_loss_mask[i])))) logging.debug("input_mask: %s", " ".join(list(map(str, all_input_mask[i])))) logging.debug("subtokens_mask: %s", " ".join(list(map(str, all_subtokens_mask[i])))) if with_label: logging.debug("labels: %s", " ".join(list(map(str, all_labels[i])))) return (all_input_ids, all_segment_ids, all_input_mask, all_loss_mask, all_subtokens_mask, all_labels)
import zlib from collections import OrderedDict import numpy as np # Only initialize GPU after this runner is activated. import pycuda.autoinit # This import causes pycuda to automatically manage CUDA context creation and cleanup. import pycuda.driver as cuda import tensorrt as trt from nemo import logging, logging_mode logging.info("Using TensorRT {:}".format(trt.__version__)) logging.debug("Note: Using tensorrt from {:}".format(trt.__path__)) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) def set_trt_logging_level(sev): global TRT_LOGGER if sev == logging.DEBUG: logging.min_severity = trt.Logger.INFO elif sev == logging.WARNING: logging.min_severity = trt.Logger.WARNING elif sev == logging.ERROR: logging.min_severity = trt.Logger.ERROR elif sev == logging.CRITICAL: logging.min_severity = trt.Logger.INTERNAL_ERROR
def forward(self, belief_state, request_state): """ Generated System Act and add it to the belief state Args: belief_state (dict): dialogue state with slot-slot_values pairs for all domains request_state (dict): requested slots dict Returns: belief_state (dict): updated belief state system_acts (list): DA(Dialog Act), in the form of {act_type1: [[slot_name_1, value_1], [slot_name_2, value_2], ...], ...} """ if self.recommend_flag != -1: self.recommend_flag += 1 self.kb_result = {} DA = {} user_action = self.check_diff(belief_state, request_state) self.last_request_state = deepcopy(request_state) self.last_belief_state = deepcopy(belief_state) for user_act in user_action: domain, _ = user_act.split('-') # Respond to general greetings if domain == 'general': self._update_greeting(user_act, DA) # Book taxi for user elif domain == 'Taxi': self._book_taxi(belief_state, DA) elif domain == 'Booking': pass # User's talking about other domain elif domain != "Train": self._update_DA(user_act, user_action, belief_state, DA) # Info about train else: self._update_train(user_act, user_action, belief_state, DA) # Judge if user want to book self._judge_booking(user_act, user_action, DA) if 'Booking-Book' in DA: if random.random() < 0.5: DA['general-reqmore'] = [] user_acts = [] for user_act_ in DA: if user_act_ != 'Booking-Book': user_acts.append(user_act_) for user_act_ in user_acts: del DA[user_act_] if DA == {}: DA = {'general-greet': [['none', 'none']]} system_acts = [] for domain_intent, svs in DA.items(): domain, intent = domain_intent.split('-') if not svs and domain == 'general': system_acts.append([intent, domain, 'none', 'none']) else: for slot, value in svs: system_acts.append([intent, domain, slot, value]) logging.debug("DPM output: %s", system_acts) logging.debug("Belief State after DPM: %s", belief_state) logging.debug("Request State after DPM: %s", request_state) return belief_state, system_acts
def log_network(network): LAYER_TYPE_CLASS_MAPPING = { trt.LayerType.CONVOLUTION: trt.IConvolutionLayer, trt.LayerType.FULLY_CONNECTED: trt.IFullyConnectedLayer, trt.LayerType.ACTIVATION: trt.IActivationLayer, trt.LayerType.POOLING: trt.IPoolingLayer, trt.LayerType.LRN: trt.ILRNLayer, trt.LayerType.SCALE: trt.IScaleLayer, trt.LayerType.SOFTMAX: trt.ISoftMaxLayer, trt.LayerType.DECONVOLUTION: trt.IDeconvolutionLayer, trt.LayerType.CONCATENATION: trt.IConcatenationLayer, trt.LayerType.ELEMENTWISE: trt.IElementWiseLayer, trt.LayerType.PLUGIN: trt.IPluginLayer, trt.LayerType.RNN: trt.IRNNLayer, trt.LayerType.UNARY: trt.IUnaryLayer, trt.LayerType.PADDING: trt.IPaddingLayer, trt.LayerType.SHUFFLE: trt.IShuffleLayer, trt.LayerType.REDUCE: trt.IReduceLayer, trt.LayerType.TOPK: trt.ITopKLayer, trt.LayerType.GATHER: trt.IGatherLayer, trt.LayerType.MATRIX_MULTIPLY: trt.IMatrixMultiplyLayer, trt.LayerType.RAGGED_SOFTMAX: trt.IRaggedSoftMaxLayer, trt.LayerType.CONSTANT: trt.IConstantLayer, trt.LayerType.RNN_V2: trt.IRNNv2Layer, trt.LayerType.IDENTITY: trt.IIdentityLayer, trt.LayerType.PLUGIN_V2: trt.IPluginV2Layer, trt.LayerType.SLICE: trt.ISliceLayer, trt.LayerType.SHAPE: trt.IShapeLayer, trt.LayerType.PARAMETRIC_RELU: trt.IParametricReLULayer, trt.LayerType.RESIZE: trt.IResizeLayer, } def is_special_attribute(attr): return attr.startswith("__") and attr.endswith("__") def is_valid_attribute(attr, layer): if (type(layer) == trt.IPoolingLayer or type(layer) == trt.IConvolutionLayer or type(layer) == trt.IDeconvolutionLayer): if len(layer.get_input(0).shape) > 4: # 3D pooling uses padding_nd return attr not in ["padding", "stride", "window_size"] if type(layer) == trt.IResizeLayer: if layer.num_inputs > 1: return attr not in ["scales"] if type(layer) == trt.ISliceLayer: if layer.num_inputs > 1: return attr not in ["shape", "start", "stride"] return True logging.debug("Network Inputs: {:}".format( TensorRTRunnerV2.get_network_inputs(network))) for layer in network: if layer.type in LAYER_TYPE_CLASS_MAPPING: layer.__class__ = LAYER_TYPE_CLASS_MAPPING[layer.type] input_info = [ "{:}: {:} ({:})".format( layer.get_input(i).name, layer.get_input(i).shape, layer.get_input(i).dtype) for i in range(layer.num_inputs) if layer.get_input(i) ] output_info = [ "{:}: {:} ({:})".format( layer.get_output(i).name, layer.get_output(i).shape, layer.get_output(i).dtype) for i in range(layer.num_outputs) if layer.get_output(i) ] logging.info("{:} [Op: {:}]".format(layer.name, layer.type)) logging.info("\t{:} -> {:}".format(input_info, output_info)) attrs = dir(layer) for attr in attrs: # Exclude special attributes, as well as any attributes of the base layer class (those can be displayed above). if (not is_special_attribute(attr) and not hasattr(trt.ILayer, attr) and is_valid_attribute(attr, layer)): logging.info("\t{:}.{:} = {:}".format( layer.name, attr, getattr(layer, attr))) network_outputs = { network.get_output(i).name: network.get_output(i).shape for i in range(network.num_outputs) } logging.debug("Network Outputs: {:}".format(network_outputs))