Python PreTrainedModel.generate примеры использования

Язык программирования: Python

Пространство имен/Пакет: transformers

Класс/Тип: PreTrainedModel

Метод/Функция: generate

Примеров на hotexamples.com: 5

Python PreTrainedModel.generate - 5 примеров найдено. Это лучшие примеры Python кода для transformers.PreTrainedModel.generate, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

eval(30)

parameters(18)

zero_grad(16)

train(16)

named_parameters(15)

to(12)

resize_token_embeddings(8)

generate(5)

save_pretrained(4)

get_input_embeddings(3)

from_pretrained(2)

set_input_embeddings(2)

load_state_dict(2)

_tie_encoder_decoder_weights(2)

num_parameters(2)

state_dict(2)

cuda(1)

forward(1)

prepare_inputs_for_generation(1)

lm_head(1)

get_encoder(1)

__init__(1)

Пример #1

Показать файл

def produce_summary_huggingface(source_text: str,
                                model: PreTrainedModel,
                                tokenizer: PreTrainedTokenizer,
                                config: Optional[dict] = None) -> str:
    """
    Generates a summary using a model (huggingface's transformers library implementation).
    :param source_text: Source text/paragraph to produce summary on
    :param model: Pretrained transformer model
    :param tokenizer: corresponding tokenizer used for the model
    :param config: Configuration for generation/decoding.
    :return: Produced summary by the model
    """

    if not config:
        config = DEFAULT_CONFIG

    input_ids = torch.tensor(
        tokenizer.encode(source_text, add_special_tokens=True)).unsqueeze(0)
    input_ids = input_ids.to('cuda')
    generated = model.generate(input_ids, **config)
    gen_text = tokenizer.batch_decode(generated,
                                      skip_special_tokens=True,
                                      clean_up_tokenization_spaces=True)[0]

    gen_text = gen_text.strip()
    return gen_text

Пример #2

Показать файл

Файл: util.py Проект: liyucheng09/GraphQsumm

def generate_samples(
    model: PreTrainedModel,
    tokenizer: BertTokenizer,
    prompt_text: str,
    max_length=args['max_length'],
    temperature=args['temperature'],
    top_k=args['k'],
    top_p=args['p'],
    repetition_penalty=args['repetition_penalty'],
    num_return_sequences=args['num_return_sequences'],
    stop_token=args['stop']
    ):

    encoded_prompt=tokenizer.encode(prompt_text, add_special_tokens=True, return_tensors='pt')
    encoded_prompt=encoded_prompt.to(model.device)
    input_ids=encoded_prompt if encoded_prompt.shape[-1]>0 else None

    output_sequences = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        num_return_sequences=num_return_sequences,
    )

    if len(output_sequences.shape) > 2:
        output_sequences.squeeze_()

    generated_sequences = []

    for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
        print("=== GENERATED SEQUENCE {} ===".format(generated_sequence_idx + 1))
        generated_sequence = generated_sequence.tolist()

        # Decode text
        text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)

        # Remove all text after the stop token
        text = text[: text.find(stop_token) if args['stop_token'] else None]

        # Add the prompt at the beginning of the sequence. Remove the excess text that was used for pre-processing
        total_sequence = (
            prompt_text + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :]
        )

        generated_sequences.append(total_sequence)
        print(total_sequence)

    return generated_sequences

Пример #3

Показать файл

Файл: main.py Проект: idocal/transformers-qa

def predict(args, p_model: PreTrainedModel, p_tokenizer: PreTrainedTokenizer,
            questions: list):
    questions_encoded = p_tokenizer.batch_encode_plus(
        questions,
        pad_to_max_length=True,
        max_length=args.max_length,
        return_tensors='pt')
    input_ids, attention_mask = questions_encoded[
        "input_ids"], questions_encoded["attention_mask"]
    outputs = p_model.generate(input_ids=input_ids,
                               attention_mask=attention_mask,
                               num_beams=4,
                               max_length=20,
                               early_stopping=True)
    predictions = [
        p_tokenizer.decode(o, skip_special_tokens=True)[1:] for o in outputs
    ]
    return predictions

Пример #4

Показать файл

    def predict_task_split(self,
                           model: transformers.PreTrainedModel,
                           inputs: tf.data.Dataset,
                           task: Task,
                           max_length: int = 140,
                           min_length: int = 55) -> typing.Sequence[typing.Sequence[int]]:

        try:
            outputs = []
            model.to(self.device)
            for batch_inputs in tqdm.tqdm(inputs.as_numpy_iterator(),
                                          desc="Predicting %s" % task,
                                          unit="batch", leave=False):
                with torch.no_grad():
                    model.eval()
                    forward_params = self.prepare_forward_inputs(model, batch_inputs)
                    batch_outputs = model.generate(forward_params['input_ids'],
                                                   attention_mask=forward_params['attention_mask'],
                                                   do_sample=False,
                                                   max_length=GENERATION_MAX_LENGTHS.get(task.dataset, max_length) + 2,
                                                   min_length=GENERATION_MIN_LENGTHS.get(task.dataset, min_length) + 1,
                                                   num_beams=4,
                                                   length_penalty=2.,
                                                   no_repeat_ngram_size=3,
                                                   early_stopping=True)

                    batch_outputs = batch_outputs.detach().cpu().numpy()
                    outputs.extend(batch_outputs)
            return outputs
        # We can't just except tf.errors.UnknownError, because it is thrown as some sort of weird proxy
        # instance of a tf.errors.UnknownError and python's pattern matching can't handle the scandal
        except Exception as e:
            if isinstance(e, tf.errors.UnknownError):
                logging.warning('Encountered error: %s on %s: %s', type(e), task, e)
                # Unfortunately, we don't get a more helpful error type, but this usually means
                # that the dataset has no labels for a given split (e.g., test evaluation occurs on a server)
                return []
            else:
                # We got a different exception type so let python freak out accordingly
                logging.error('Encountered error: %s on %s: %s', type(e), task, e)
                raise e

Пример #5

Показать файл

def produce_summary(model: PreTrainedModel, tokenizer: PreTrainedTokenizer,
                    source_text: str, cuda: bool):
    """
    Use a summarization model to generate output
    :param model: Pretrained Summarization model
    :param tokenizer: Tokenizer for the summarization model
    :param source_text:
    :param cuda:
    :return:
    """
    input_ids = torch.tensor(
        tokenizer.encode(source_text, truncation=True,
                         add_special_tokens=True)).unsqueeze(0)

    if cuda:
        input_ids = input_ids.to('cuda')

    generated = model.generate(input_ids)
    gen_text = tokenizer.batch_decode(generated,
                                      skip_special_tokens=True,
                                      clean_up_tokenization_spaces=True)[0]

    gen_text = gen_text.strip()
    return gen_text