def predict(data, label_map, batch_size): """ Args: sentences (list[str]): each string is a sentence. If have sentences then no need paths paths (list[str]): The paths of file which contain sentences. If have paths then no need sentences Returns: res (list(numpy.ndarray)): The result of sentence, indicate whether each word is replaced, same shape with sentences. """ # TODO: Text tokenization which is done in the serving end not the client end may be better. tokenizer = ErnieTinyTokenizer.from_pretrained("ernie-tiny") examples = [] for text in data: example = {"text": text} input_ids, token_type_ids = convert_example( example, tokenizer, max_seq_length=args.max_seq_length, is_test=True) examples.append((input_ids, token_type_ids)) batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int64' ), # input ids Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int64' ), # token type ids ): fn(samples) # Seperates data into some batches. batches = [ examples[idx:idx + batch_size] for idx in range(0, len(examples), batch_size) ] # initialize client client = Client() client.load_client_config(args.client_config_file) client.connect([args.server_ip_port]) results = [] for batch in batches: input_ids, token_type_ids = batchify_fn(batch) fetch_map = client.predict(feed={ "input_ids": input_ids, "token_type_ids": token_type_ids }, fetch=["save_infer_model/scale_0.tmp_1"], batch=True) output_data = np.array(fetch_map["save_infer_model/scale_0.tmp_1"]) probs = softmax(output_data, axis=1) idx = np.argmax(probs, axis=1) idx = idx.tolist() labels = [label_map[i] for i in idx] results.extend(labels) return results
def predict(self, data, tokenizer, label_map): """ Predicts the data labels. Args: data (obj:`List(str)`): The batch data whose each element is a raw text. tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` which contains most of the methods. Users should refer to the superclass for more information regarding methods. label_map(obj:`dict`): The label id (key) to label str (value) map. Returns: results(obj:`dict`): All the predictions labels. """ if args.benchmark: self.autolog.times.start() examples = [] for text in data: example = {"text": text} input_ids, segment_ids = convert_example( example, tokenizer, max_seq_length=self.max_seq_length, is_test=True) examples.append((input_ids, segment_ids)) batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # input Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment ): fn(samples) if args.benchmark: self.autolog.times.stamp() input_ids, segment_ids = batchify_fn(examples) self.input_handles[0].copy_from_cpu(input_ids) self.input_handles[1].copy_from_cpu(segment_ids) self.predictor.run() logits = self.output_handle.copy_to_cpu() if args.benchmark: self.autolog.times.stamp() probs = softmax(logits, axis=1) idx = np.argmax(probs, axis=1) idx = idx.tolist() labels = [label_map[i] for i in idx] if args.benchmark: self.autolog.times.end(stamp=True) return labels
def predict(model, data, tokenizer, label_map, batch_size=1): """ Predicts the data labels. Args: model (obj:`paddle.nn.Layer`): A model to classify texts. data (obj:`List(Example)`): The processed data whose each element is a Example (numedtuple) object. A Example object contains `text`(word_ids) and `seq_len`(sequence length). tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` which contains most of the methods. Users should refer to the superclass for more information regarding methods. label_map(obj:`dict`): The label id (key) to label str (value) map. batch_size(obj:`int`, defaults to 1): The number of batch. Returns: results(obj:`dict`): All the predictions labels. """ examples = [] for text in data: example = {"text": text} input_ids, token_type_ids = convert_example( example, tokenizer, max_seq_length=args.max_seq_length, is_test=True) examples.append((input_ids, token_type_ids)) # Seperates data into some batches. batches = [ examples[idx:idx + batch_size] for idx in range(0, len(examples), batch_size) ] batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # input Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # segment ): fn(samples) results = [] model.eval() for batch in batches: input_ids, token_type_ids = batchify_fn(batch) input_ids = paddle.to_tensor(input_ids) token_type_ids = paddle.to_tensor(token_type_ids) logits = model(input_ids, token_type_ids) probs = F.softmax(logits, axis=1) idx = paddle.argmax(probs, axis=1).numpy() idx = idx.tolist() labels = [label_map[i] for i in idx] results.extend(labels) return results