示例#1
0
def chat(model_params, sent='0'):
    tok_path = get_tokenizer()
    model, vocab = get_mxnet_kogpt2_model(ctx=ctx)
    tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0)
    kogptqa = KoGPT2Chat(model)
    kogptqa.load_parameters(model_params, ctx=ctx)
    sent_tokens = tok(sent)
    while 1:
        q = input('user > ').strip()
        if q == 'quit':
            break
        q_tok = tok(q)
        a = ''
        a_tok = []
        while 1:
            input_ids = mx.nd.array([vocab[U_TKN]] + vocab[q_tok] +
                                    vocab[EOS, SENT] + vocab[sent_tokens] +
                                    vocab[EOS, S_TKN] +
                                    vocab[a_tok]).expand_dims(axis=0)
            pred = kogptqa(input_ids.as_in_context(ctx))
            gen = vocab.to_tokens(
                mx.nd.argmax(
                    pred,
                    axis=-1).squeeze().astype('int').asnumpy().tolist())[-1]
            if gen == EOS:
                break
            a += gen.replace('▁', ' ')
            a_tok = tok(a)
        print("Simsimi > {}".format(a.strip()))
示例#2
0
def Load_Model():
    global vocab_global
    global sent_tokens_global
    global kogptqa_global
    global tok_global
    tok_path = get_tokenizer()
    model, vocab = get_mxnet_kogpt2_model(ctx=ctx)
    tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0)
    kogptqa = KoGPT2Chat(model)
    kogptqa.load_parameters("KoGPT2-chatbot\kogpt2_chat.params", ctx=ctx)
    sent_tokens = tok("0")
    vocab_global = vocab
    sent_tokens_global = sent_tokens
    kogptqa_global = kogptqa
    tok_global = tok
示例#3
0
def train():
    tok_path = get_tokenizer()
    model, vocab = get_mxnet_kogpt2_model(ctx=ctx)
    # tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0)

    data = pd.read_csv('Chatbot_data/ChatbotData.csv')

    max_len = opt.max_seq_len
    train_set = chat_data(data, tok_path, vocab, max_len=max_len)
    batch_size = opt.batch_size

    train_dataloader = mx.gluon.data.DataLoader(train_set,
                                                batch_size=batch_size,
                                                num_workers=5,
                                                shuffle=True)
    kogptqa = KoGPT2Chat(model)
    kogptqa.hybridize()

    # softmax cross entropy loss for classification
    loss_function = gluon.loss.SoftmaxCrossEntropyLoss()
    loss_function.hybridize()

    num_epochs = opt.num_epoch
    lr = 5e-5
    trainer = gluon.Trainer(kogptqa.collect_params(), 'bertadam', {
        'learning_rate': lr,
        'epsilon': 1e-8,
        'wd': 0.01
    })
    # LayerNorm과 Bias에는 Weight Decay를 적용하지 않는다.
    for _, v in kogptqa.collect_params('.*beta|.*gamma|.*bias').items():
        v.wd_mult = 0.0
    params = [
        p for p in kogptqa.collect_params().values() if p.grad_req != 'null'
    ]
    # learning rate warmup
    accumulate = opt.accumulate
    step_size = batch_size * accumulate if accumulate else batch_size
    num_train_examples = len(train_set)
    num_train_steps = int(num_train_examples / step_size * num_epochs)
    warmup_ratio = 0.1
    num_warmup_steps = int(num_train_steps * warmup_ratio)
    step_num = 0
    all_model_params = kogptqa.collect_params()

    log_interval = 50
    neg = -1e18
    # Set grad_req if gradient accumulation is required
    if accumulate and accumulate > 1:
        for p in params:
            p.grad_req = 'add'

    for epoch_id in range(num_epochs):
        step_loss = 0
        for batch_id, (token_ids, mask, label) in enumerate(train_dataloader):
            if step_num < num_warmup_steps:
                new_lr = lr * step_num / num_warmup_steps
            else:
                non_warmup_steps = step_num - num_warmup_steps
                offset = non_warmup_steps / (num_train_steps -
                                             num_warmup_steps)
                new_lr = lr - offset * lr
            trainer.set_learning_rate(new_lr)
            with mx.autograd.record():
                # load data to GPU or GPU
                token_ids = token_ids.as_in_context(ctx)
                mask = mask.as_in_context(ctx)
                label = label.as_in_context(ctx)
                # forward computation
                out = kogptqa(token_ids)
                masked_out = nd.where(
                    mask.expand_dims(axis=2).repeat(repeats=out.shape[2],
                                                    axis=2), out,
                    neg * nd.ones_like(out))
                # loss for responses exincluding MASK and PAD
                ls = loss_function(masked_out, label).sum() / mask.sum()
            # backward computation
            ls.backward()
            if not accumulate or (batch_id + 1) % accumulate == 0:
                trainer.allreduce_grads()
                nlp.utils.clip_grad_global_norm(params, 1)
                trainer.update(accumulate if accumulate else 1)
                step_num += 1
                if accumulate and accumulate > 1:
                    # set grad to zero for gradient accumulation
                    all_model_params.zero_grad()
            step_loss += ls.asscalar()
            if step_num % log_interval == 0 and step_num > 0:
                print(
                    '[Epoch {} Batch {}/{}] loss={:.4f}, lr={:.10f}, train ppl={:.3f}'
                    .format(epoch_id + 1, batch_id + 1, len(train_dataloader),
                            step_loss / log_interval, trainer.learning_rate,
                            math.exp(step_loss / log_interval)))
                step_loss = 0
    logging.info('saving model file to {}'.format(opt.model_params))
    kogptqa.save_parameters(opt.model_params)
示例#4
0
        super(KoGPT2Chat, self).__init__(prefix=prefix, params=params)
        self.kogpt2 = kogpt2

    def hybrid_forward(self, F, inputs):
        # (batch, seq_len, hiddens)
        output, _ = self.kogpt2(inputs)
        return output


if mx.context.num_gpus() > 0:
    ctx = mx.gpu()
else:
    ctx = mx.cpu()
    
tok_path = get_tokenizer()
model, vocab = get_mxnet_kogpt2_model(ctx=ctx)
tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0)
kogptqa = KoGPT2Chat(model)
# kogptqa.load_parameters("Kogpt2_chat.params", ctx=ctx)
def chat(text="",sent='0'):
    sent_tokens = tok(sent)
    cnt=0
    q = text.strip()
    q_tok = tok(q)
    a = ''
    a_tok = []
    while cnt>25:
        cnt+=1
        input_ids = mx.nd.array([vocab[U_TKN]] + vocab[q_tok] +
                                vocab[EOS, SENT] + vocab[sent_tokens] +
                                vocab[EOS, S_TKN] +