示例#1
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('question', required=True)
        args = parser.parse_args()
        # Configs
        logger = logging.getLogger(__name__)

        MODEL_CONFIG_CLASSES = list(MODEL_WITH_LM_HEAD_MAPPING.keys())
        MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)

        # parser=argparse.ArgumentParser(description="talk to Digital Jesus")
        # parser.add_argument("text",type=str,help='message to send to digital Jesus')
        # args=parser.parse_args()
        # print(args.text)

        step = 0
        new_user_input_ids = tokenizer.encode(
            (f">> User:{args.question}") + tokenizer.eos_token,
            return_tensors='pt')
        bot_input_ids = new_user_input_ids
        chat_history_ids = model.generate(bot_input_ids,
                                          max_length=1000,
                                          pad_token_id=tokenizer.eos_token_id,
                                          top_p=0.92,
                                          top_k=50)
        answer = "Jesus: {}".format(
            tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0],
                             skip_special_tokens=True))
        return {'answer': answer}, 200
示例#2
0
    DataCollatorForLanguageModeling,
    DataCollatorForPermutationLanguageModeling,
    HfArgumentParser,
    LineByLineTextDataset,
    PreTrainedTokenizer,
    TextDataset,
    Trainer,
    TrainingArguments,
    set_seed,
)


logger = logging.getLogger(__name__)


MODEL_CONFIG_CLASSES = list(MODEL_WITH_LM_HEAD_MAPPING.keys())
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)


@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
    """

    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={
            "help": "The model checkpoint for weights initialization. Leave None if you want to train a model from scratch."
        },
    )
示例#3
0
    AutoTokenizer,
    # preprocess batches of tensors for MLM
    DataCollatorForLanguageModeling,
    HfArgumentParser,
    PreTrainedTokenizer,
    Trainer,
    TrainingArguments,
    set_seed,
)
from loguru import logger
from paccmann_proteomics.data.datasets.language_modeling import (
    LineByLineTextDatasetCached, LineByLineTextDatasetChunksCached,
    LineByLineTextDataset)

MODEL_CONFIG_CLASSES = list(
    MODEL_WITH_LM_HEAD_MAPPING.keys())  # [ModelConfig, RobertaConfig, ...]
MODEL_TYPES = tuple(
    conf.model_type
    for conf in MODEL_CONFIG_CLASSES)  # ('roberta', 'bert', ...)


@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
    """

    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={
            'help':