예제 #1
0
    def predict(self):
        f = open("model/data_map.pkl", "rb")
        maps = cPickle.load(f)
        f.close()
        self.batch_size = 1
        self.sp_model = spm.SentencePieceProcessor()
        self.sp_model.Load(FLAGS.spm)
        self.train_length = 10

        self.tag_map = maps.get("tag_map", {})
        self.nums_tags = len(self.tag_map.values())
        self.__creat_model()
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print("[->] restore model")
                self.saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("[->] no model, initializing")
                sess.run(tf.global_variables_initializer())

            trans = self.trans.eval()
            dataset = []

            with open('data/test_raw_big.txt', 'r', encoding="utf-8") as f:
                for data in f.readlines():
                    ent, raw_con, info = data.replace('\n', '').split('\t')
                    dataset.append([ent, raw_con, info])
            f.close()
            for ele in dataset:
                info = ele[2]
                text = info

                feed = self.prepare_xlnet_pred_data(text)

                paths, length = sess.run([self.pred_ids, self.length],
                                         feed_dict=feed)

                print(format_tags(paths[0], self.tag_map))
                org = get_tags(paths[0], "", self.tag_map)
                org_entity = format_result(org, text, "")
                per = get_tags(paths[0], "", self.tag_map)
                per_entity = format_result(per, text, "")
                loc = get_tags(paths[0], "", self.tag_map)
                loc_entity = format_result(loc, text, "")

                resp = org_entity["entities"] + per_entity[
                    "entities"] + loc_entity["entities"]
                ele.append(str(resp))

            with open('data/test_result.txt', 'w', encoding="utf-8") as f1:
                for ele in dataset:
                    f1.write(ele[2])
                    f1.write('\t')
                    f1.write(ele[1])
                    f1.write('\t')
                    f1.write(ele[3])
                    f1.write('\n')
            f1.close()
            '''
예제 #2
0
파일: model.py 프로젝트: lisakk/ChineseNER
    def predict(self):
        f = open("data/data_map.pkl", "rb")
        maps = cPickle.load(f)
        f.close()
        self.vocab = maps.get("vocab", {})
        self.tag_map = maps.get("tag_map", {})
        self.nums_tags = len(self.tag_map.values())
        self.input_size = maps.get("input_size", 10000) + 1
        self.__creat_model()
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print("[->] restore model")
                self.saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("[->] no model, initializing")
                sess.run(tf.global_variables_initializer())

            trans = self.trans.eval()
            while True:
                text = input(" > ")
                feed = self.prepare_pred_data(text)

                logits, length = sess.run([self.logits, self.length],
                                          feed_dict=feed)
                paths = self.decode(logits, length, trans)
                org = get_tags(paths[0], "ORG", self.tag_map)
                org_entity = format_result(org, text, "ORG")
                per = get_tags(paths[0], "PER", self.tag_map)
                per_entity = format_result(per, text, "PER")

                resp = org_entity["entities"] + per_entity["entities"]
                print(json.dumps(resp, indent=2, ensure_ascii=False))
예제 #3
0
 def act(self, action):
     assert (self.context.strip() + action.strip())
     assert (settings.getint('top-keks') is not None)
     self.actions.append(format_result(action))
     result = self.generator.generate(
         self.get_story() + action,
         self.context + ' '.join(self.memory),
         temperature=settings.getfloat('temp'),
         top_p=settings.getfloat('top-p'),
         top_k=settings.getint('top-keks'),
         repetition_penalty=settings.getfloat('rep-pen'))
     self.results.append(format_result(result))
     return self.results[-1]
예제 #4
0
파일: main.py 프로젝트: whongzhong/CHI_NER
    def predict(self, input_str="", input_path=None):
        if input_path is not None:
            tests = pd.read_csv(input_path)
            with open('output.txt', 'w', encoding='utf-8') as o:
                #o.write('id,aspect,opinion\n')
                for ids in range(1, 2235):
                    input_str = self.get_string(
                        str(tests.loc[ids - 1:ids - 1, ['Review']]))
                    index = int(
                        self.get_string(str(tests.loc[ids - 1:ids - 1,
                                                      ['id']])))
                    input_vec = [self.vocab.get(i, 0) for i in input_str]
                    # convert to tensor
                    if (self.use_gpu):  # gpu加速
                        sentences = torch.tensor(input_vec).view(1, -1).cuda()
                    else:
                        sentences = torch.tensor(input_vec).view(1, -1)
                    _, paths = self.model(sentences)

                    entities = []
                    for tag in self.tags:
                        tags = get_tags(paths[0], tag, self.tag_map)
                        entities += format_result(tags, input_str, tag)
                    entities = sorted(entities, key=lambda x: x['start'])
                    #print(str(index) + "  " + input_str + " " +str(len(entities)))
                    for entity in entities:
                        #print(entity)
                        o.write(
                            str(index) + ',' + entity['type'] + ',' +
                            entity['word'] + '\n')
        else:
            if not input_str:
                input_str = input("请输入文本: ")
            input_vec = [self.vocab.get(i, 0) for i in input_str]
            # convert to tensor
            if (self.use_gpu):  # gpu加速
                sentences = torch.tensor(input_vec).view(1, -1).cuda()
            else:
                sentences = torch.tensor(input_vec).view(1, -1)
            _, paths = self.model(sentences)

            entities = []
            for tag in self.tags:
                tags = get_tags(paths[0], tag, self.tag_map)
                entities += format_result(tags, input_str, tag)
            return entities
예제 #5
0
def decode(s, **kwargs):
    if s.startswith('0b'):
        s = int(s[2:], 2)
    else:
        s = int(s, 2)
    result = long_to_bytes(s)
    return format_result(s, hex(s),
                         str(result, 'utf-8', errors='backslashreplace'))
    def test(self):
        with torch.no_grad():
            id2vocab = {self.vocab[i]: i for i in self.vocab}
            print(len(id2vocab))
            f = open('./result/test_tag.json', 'w')
            total_matrix = np.zeros(
                [len(self.tags), 3]
            )  #横坐标分别表示component,disease&symptom,people;纵坐标分别表示recall, precision, f1
            count = 0
            for batch in self.dev_manager.get_batch():
                count += 1
                print(count)
                #                 print(type(items))
                sentences, labels, length = zip(*batch)
                #             sentences, labels, length = zip(*self.dev_batch.__next__())
                #                 print('I am in')
                strs = [[id2vocab[w] for w in s] for s in sentences]
                #                 print(strs)
                #                 print(len(sentences),len(sentences[0]),len(sentences[5]))
                _, paths = self.model(sentences)
                #                 print("\teval")
                #                 print('path',len(paths),len(paths[0]),len(paths[1]))
                for i in range(len(self.tags)):
                    recall, precision, f1 = f1_score(labels, paths,
                                                     self.tags[i],
                                                     self.model.tag_map)
                    total_matrix[i][0] += recall
                    total_matrix[i][1] += precision
                    total_matrix[i][2] += f1
                entities = []
                for i in range(len(paths)):
                    tmp = []

                    for tag in self.tags:
                        tags = get_tags(paths[i], tag, self.tag_map)
                        tmp += format_result(tags, strs[i], tag)
                    entities.append(tmp)

    #             print(entities)
                for i in range(len(entities)):
                    dic = {
                        'sentense': ''.join(strs[i]),
                        'entities': entities[i]
                    }
                    json.dump(dic, f, ensure_ascii=False)


#                     f.write(''.join(strs[i])+'#####找到的实体为#####'+'&'.join(entities[i])+'\n')
            total_matrix /= count
            #             print(total_matrix)
            for i in range(len(self.tags)):
                print(
                    "{}\tcount\t{}\trecall {:.2f}\tprecision {:.2f}\tf1 {:.2f}"
                    .format(count, self.tags[i], total_matrix[i][0],
                            total_matrix[i][1], total_matrix[i][2]))
            f.close()
예제 #7
0
def from_query(request):
    """
    Serve an index page that knows a user's location from their browser,
    with details passed in via query parameters.
    """
    lat = float(request.GET.get("lat", 0.0))
    lon = float(request.GET.get("lon", 0.0))
    conditions = weather.current_conditions(lat, lon)
    raining = weather.is_it_raining_at(lat, lon, conditions)
    return utils.format_result(raining, (lat, lon), conditions, "Location determined from your browser")
예제 #8
0
    def predict(self):
        f = open("model/data_map.pkl", "rb")
        maps = cPickle.load(f)
        f.close()
        self.batch_size = 1
        self.sp_model = spm.SentencePieceProcessor()
        self.sp_model.Load(FLAGS.spm)
        self.train_length = 10

        self.tag_map = maps.get("tag_map", {})
        self.nums_tags = len(self.tag_map.values())
        self.__creat_model()
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print("[->] restore model")
                self.saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("[->] no model, initializing")
                sess.run(tf.global_variables_initializer())

            trans = self.trans.eval()
            while True:
                text = input(" > ")

                feed = self.prepare_xlnet_pred_data(text)

                paths, length = sess.run([self.pred_ids, self.length],
                                         feed_dict=feed)

                print(format_tags(paths[0], self.tag_map))
                org = get_tags(paths[0], "ORG", self.tag_map)
                org_entity = format_result(org, text, "ORG")
                per = get_tags(paths[0], "PER", self.tag_map)
                per_entity = format_result(per, text, "PER")
                loc = get_tags(paths[0], "LOC", self.tag_map)
                loc_entity = format_result(loc, text, "LOC")

                resp = org_entity["entities"] + per_entity[
                    "entities"] + loc_entity["entities"]
                print(json.dumps(resp, indent=2, ensure_ascii=False))
예제 #9
0
파일: main.py 프로젝트: 513415184/nlp
    def predict(self, tag, input_str=""):
        model.load_state_dict(torch.load("./model/params.pkl"))
        if not input_str:
            input_str = input("请输入文本: ")
        input_vec = [word2id.get(i, 0) for i in input_str]
        # convert to tensor
        sentences = torch.tensor(input_vec).view(1, -1)
        paths = model(sentences)

        entities = []
        tags = get_tags(paths[0], tag, tag2id)
        entities += format_result(tags, input_str, tag)
        print(entities)
예제 #10
0
 def print_story(self, wrap=True, color=True):
     first_result = format_result(self.actions[0] + ' ' + self.results[0])
     col1 = 'user-text' if color else None
     col2 = 'ai-text' if color else None
     output(self.context, col1, first_result, col2, wrap=wrap)
     maxactions = len(self.actions)
     maxresults = len(self.results)
     for i in range(1, max(maxactions, maxresults)):
         if i < maxactions and self.actions[i].strip() != "":
             caret = "> " if re.match("^[Yy]ou +", self.actions[i]) else ""
             output(caret + self.actions[i], col1, wrap=wrap)
         if i < maxresults and self.results[i].strip() != "":
             output(self.results[i], col2, wrap=wrap)
예제 #11
0
    def predict(self, input_str=""):
        if not input_str:
            input_str = input("请输入文本: ")
        input_vec = [self.vocab.get(i, 0) for i in input_str]
        # convert to tensor
        sentences = torch.tensor(input_vec).view(1, -1)
        _, paths = self.model(sentences)

        entities = []
        for tag in self.tags:
            tags = get_tags(paths[0], tag, self.tag_map)
            entities += format_result(tags, input_str, tag)
        return entities
예제 #12
0
 def print_action_result(self, i, wrap=True, color=True):
     col1 = 'user-text' if color else None
     col2 = 'ai-text' if color else None
     if i == 0 or len(self.actions) == 1:
         start = format_result(self.context + ' ' + self.actions[0])
         result = format_result(self.results[0])
         is_start_end = re.match(r"[.!?]\s*$",
                                 start)  # if start ends logically
         is_result_continue = re.match(
             r"^\s*[a-z.!?,\"]", result)  # if result is a continuation
         sep = ' ' if not is_start_end and is_result_continue else '\n'
         if not self.actions[0]:
             output(self.context, col1, self.results[0], col2, sep=sep)
         else:
             output(self.context, col1)
             output(self.actions[0], col1, self.results[0], col2, sep=sep)
     else:
         if i < len(self.actions) and self.actions[i].strip() != "":
             caret = "> " if re.match(
                 r"^ *you +", self.actions[i], flags=re.I) else ""
             output(format_result(caret + self.actions[i]), col1, wrap=wrap)
         if i < len(self.results) and self.results[i].strip() != "":
             output(format_result(self.results[i]), col2, wrap=wrap)
예제 #13
0
def encode(s, **kwargs):
    if not kwargs['key']:
        print("Key not provided")
        return None
    if s.startswith('0x'):
        s = int(s[2:], 16)
    key = kwargs['key']
    if key.startswith('0x'):
        key = int(key[2:], 16)
    if isint(s) and isint(key):
        result = xor_int(s, key)
    else:
        result = xor_strings(s, key)
    return format_result(result, hex(result),
                         long_to_bytes(result).decode('latin-1'))
예제 #14
0
def predict_model(model, word2id, tag2id):
    input_str = input("请输入文本: ")
    # 将输入的每个字转换为对应的id
    input2id = [word2id.get(i, 0) for i in input_str]
    # 转换为tensor(修改shape)
    sentences = torch.tensor(input2id).view(1, -1)
    # 转换为list
    sentences = sentences.tolist()

    _, paths = model(sentences)

    entities = []
    for tag in ["ORG", "PER"]:
        positions = get_entity_position(paths[0], tag, tag2id)
        entities += format_result(positions, input_str, tag)

    # 输出结果
    print(entities)
예제 #15
0
    def perform_result(self, items):
        """
        Фильтрация и сортировка результатов поиска
        :param items: максимальное число элементов в выдаче
        :return:
        """
        result = [i for i in self.counts.items()
                  if len(i[1]) > 0]  # убираем пустые результаты

        def sort_order(dict_pair):
            return (
                len(dict_pair[1]),  # число найденных слов
                sum(map(lambda x: x[1],
                        dict_pair[1].items()))  # общее число вхождений
            )

        result.sort(key=sort_order, reverse=True)  # сортируем результаты
        return map(lambda x: format_result(x[0], x[1]), result[:items])
예제 #16
0
    def predict(self, input_str=""):
        if not input_str:
            input_str = input("请输入文本: ")
        # 获取输入句子所有汉字的在vocab的索引
        input_vec = [self.vocab.get(i, 0) for i in input_str]
        # convert to tensor
        sentences = torch.tensor(input_vec, dtype=torch.long).view(1, -1)
        sentences = sentences.cuda()
        # paths 预测出来的标签索引 shape 为 [1,1]
        _, paths = self.model(sentences)

        entities = []
        # "tags": ["ORG", "PER"]
        for tag in self.tags:
            tags = get_tags(paths[0], tag, self.tag_map)
            entities += format_result(tags, input_str, tag)
        print(entities)
        print(json.dumps(entities, indent=4, ensure_ascii=False))
        return entities
    def predict(self, path):  #, input_str=""):
        #         if not input_str:
        #             input_str = input("请输入文本: ")
        sentences = []
        with open('./data/' + path + '.txt', 'r', encoding='utf-8') as f:
            for i in f:
                sentences += i.strip().split('。')
        f = open('./result/tag_' + path + '.json', 'w')
        for input_str in sentences:
            input_vec = [self.vocab.get(i, 0) for i in input_str]
            # convert to tensor
            sentences = torch.tensor(input_vec).view(1, -1)
            _, paths = self.model(sentences)

            entities = []
            for tag in self.tags:
                tags = get_tags(paths[0], tag, self.tag_map)
                entities += format_result(tags, input_str, tag)
            dic = {'sentense': input_str, 'entities': entities}
            json.dump(dic, f, ensure_ascii=False)
        f.close()
예제 #18
0
def predict(text, config, params, is_export=False):
    """模型预测。"""
    # 读取词典
    vocab2id, id2vocab = read_vocab(config["vocab_file"])
    tag2id, id2tag = read_vocab(config["tag_file"])

    # 构建模型
    model = BiLSTMCRF(hidden_num=params["hidden_num"],
                      vocab_size=len(vocab2id),
                      label_size=len(tag2id),
                      embedding_size=params["embedding_size"])
    model.load_weights(config["ckpt_path"])

    # 数据预处理
    dataset = tf.keras.preprocessing.sequence.pad_sequences(
        [[vocab2id.get(char, 0) for char in text]],
        padding='post',
        maxlen=params["maxlen"])

    # 模型预测
    result = model.predict(dataset)[0]
    result = np.argmax(result, axis=-1)
    result = [id2tag[i] for i in result]
    print(result)
    # 结果处理
    entities_result = format_result(list(text), result)
    print(json.dumps(entities_result, indent=4, ensure_ascii=False))

    if is_export:
        # 导出模型
        tf.keras.models.save_model(model,
                                   config["export_dir"],
                                   overwrite=True,
                                   include_optimizer=True,
                                   save_format=None,
                                   options=None)
예제 #19
0
def predict(text, config, params):
    """模型预测。"""
    # 读取词典
    vocab2id, id2vocab = read_vocab(config["vocab_file"])
    tag2id, id2tag = read_vocab(config["tag_file"])

    # 构建模型
    model = BiLSTMCRF(
        hidden_num=params["hidden_num"], vocab_size=len(vocab2id),
        label_size=len(tag2id), embedding_size=params["embedding_size"])
    model.load_weights(config["ckpt_path"])

    # 数据预处理
    dataset = tf.keras.preprocessing.sequence.pad_sequences(
        [[vocab2id.get(char, 0) for char in text]], padding='post')

    # 模型预测
    result = model.predict(dataset)[0]
    result = np.argmax(result, axis=-1)
    result = [id2tag[i] for i in result]
    print(result)
    # 结果处理
    entities_result = format_result(list(text), result)
    print(json.dumps(entities_result, indent=4, ensure_ascii=False))
예제 #20
0
import json


vocab2id, id2vocab = read_vocab(args.vocab_file)
tag2id, id2tag = read_vocab(args.tag_file)
text_sequences ,label_sequences= tokenize(args.test_path,vocab2id,tag2id)



optimizer = tf.keras.optimizers.Adam(args.lr)
model = NerModel(hidden_num = args.hidden_num, vocab_size =len(vocab2id), label_size = len(tag2id), embedding_size = args.embedding_size)
# restore model
ckpt = tf.train.Checkpoint(optimizer=optimizer,model=model)
ckpt.restore(tf.train.latest_checkpoint(args.output_dir))


while True:
    text = input("input:")
    dataset = tf.keras.preprocessing.sequence.pad_sequences([[vocab2id.get(char,0) for char in text]], padding='post')
    print(dataset)
    logits, text_lens = model.predict(dataset)
    paths = []
    for logit, text_len in zip(logits, text_lens):
        viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len], model.transition_params)
        paths.append(viterbi_path)
    print(paths[0])
    print([id2tag[id] for id in paths[0]])

    entities_result = format_result(list(text), [id2tag[id] for id in paths[0]])
    print(json.dumps(entities_result, indent=4, ensure_ascii=False))
예제 #21
0
def sample_sequence(
        model,
        length,
        context,
        temperature=1,
        top_k=0,
        top_p=0.9,
        repetition_penalty=1.0,
        device="cpu",
        stop_tokens=None,
        tokenizer=None
):
    """Actually generate the tokens"""
    logger.debug(
        'temp: {}    top_k: {}    top_p: {}    rep-pen: {}'.format(temperature, top_k, top_p, repetition_penalty))
    context_tokens = context
    context = torch.tensor(context, dtype=torch.long, device=device)
    # context = context.repeat(num_samples, 1)
    generated = context
    USE_PAST = True
    next_token = context
    pasts = None
    clines = 0
    with torch.no_grad():
        for j in range(length):
            # why would we ever not use past?
            # is generated and next_token always same thing?
            if not USE_PAST:
                input_ids_next = generated
                pasts = None
            else:
                input_ids_next = next_token

            # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)
            logits, pasts = model(input_ids=input_ids_next, past=pasts)
            logits = logits[-1, :].float()

            # переписать  логику TODO
            if settings.getboolean('sparse-gen'): 
                probs = entmax_bisect(logits, dim=-1, alpha=settings.sparse-level)
                next_token = torch.multinomial(probs, num_samples=1)
            else:
                # Originally the order was Temperature, Repetition Penalty, then top-k/p
                if settings.getboolean('top-p-first'):
                    logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)

                logits = logits / (temperature if temperature > 0 else 1.0)

                # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858)
                for k in set(generated.tolist()):
                    logits[k] /= repetition_penalty

                if not settings.getboolean('top-p-first'):
                    logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)

                if temperature == 0:  # greedy sampling:
                    next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
                else:
                    next_token = torch.multinomial(
                        F.softmax(logits, dim=-1), num_samples=1
                    )
            generated = torch.cat((generated, next_token), dim=-1)
            # Decode into plain text
            o = generated[len(context_tokens):].tolist()
            generated.text = tokenizer.decode(
                o, clean_up_tokenization_spaces=False, skip_special_tokens=True
            )
            if use_ptoolkit():
                clear_lines(clines)
                generated.text = format_result(generated.text)
                clines = output(generated.text, "ai-text")
            if (
                    (stop_tokens is not None)
                    and (j > 4)
                    and (next_token[0] in stop_tokens)
            ):
                # Why the minimum tokens, j>X. Because sometimes the models starts with whitespace, which will strip away anyway. Having a minimum amount of tokens before we stop usually means we don't just stop because of "\n " or similar
                logger.debug(
                    "Stopping generation as we found stop tokens. One of `%s`, in '%s'. token generated `%s`",
                    stop_tokens,
                    next_token,
                    j,
                )
                break
    clear_lines(clines)
    return generated
예제 #22
0
def decode(s, **kwargs):
    bs = b64decode(s)
    long_bs = bytes_to_long(bs)
    return format_result(long_bs, hex(long_bs), str(bs))
예제 #23
0
def sample_sequence(model,
                    length,
                    context,
                    temperature=1,
                    top_k=0,
                    top_p=0.9,
                    repetition_penalty=1.0,
                    repetition_penalty_range=512,
                    repetition_penalty_slope=3.33,
                    device="cpu",
                    stop_tokens=None,
                    tokenizer=None):
    """Actually generate the tokens"""
    logger.debug(
        'temp: {}    top_k: {}    top_p: {}    rep-pen: {}    rep-pen-range: {}    rep-pen-slope: {}'
        .format(temperature, top_k, top_p, repetition_penalty,
                repetition_penalty_range, repetition_penalty_slope))
    context_tokens = context
    context = torch.tensor(context, dtype=torch.long, device=device)
    # context = context.repeat(num_samples, 1)
    generated = context
    USE_PAST = True
    next_token = context
    pasts = None
    clines = 0

    penalty = None
    if not repetition_penalty_range is None and not repetition_penalty_slope is None and repetition_penalty_range > 0:
        penalty = (torch.arange(repetition_penalty_range) /
                   (repetition_penalty_range - 1)) * 2. - 1
        penalty = (repetition_penalty_slope *
                   penalty) / (1 + torch.abs(penalty) *
                               (repetition_penalty_slope - 1))
        penalty = 1 + ((penalty + 1) / 2) * (repetition_penalty - 1)

    with torch.no_grad():
        for j in range(length):
            # why would we ever not use past?
            # is generated and next_token always same thing?
            if not USE_PAST:
                input_ids_next = generated
                pasts = None
            else:
                input_ids_next = next_token

            # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)
            model_kwargs = {"past": pasts, "use_cache": True}
            model_inputs = model.prepare_inputs_for_generation(
                generated.unsqueeze(0), **model_kwargs)
            model_outputs = model(**model_inputs, return_dict=True)
            logits, pasts = model_outputs.logits, model_outputs.past_key_values
            logits = logits[0, -1, :].float()

            # Originally the order was Temperature, Repetition Penalty, then top-k/p
            if settings.getboolean('top-p-first'):
                logits = top_k_top_p_filtering(logits,
                                               top_k=top_k,
                                               top_p=top_p)

            logits = logits / (temperature if temperature > 0 else 1.0)

            # repetition penalty from CTRL (https://arxiv.org/abs/1909.05858) plus range limit
            if repetition_penalty != 1.0:
                if penalty is not None:
                    penalty_len = min(generated.shape[0],
                                      repetition_penalty_range)
                    penalty_context = generated[-repetition_penalty_range:]
                    score = torch.gather(logits, 0, penalty_context)
                    penalty = penalty.type(score.dtype).to(score.device)
                    penalty_window = penalty[-penalty_len:]
                    score = torch.where(score < 0, score * penalty_window,
                                        score / penalty_window)
                    logits.scatter_(0, penalty_context, score)
                else:
                    score = torch.gather(logits, 0, generated)
                    score = torch.where(score < 0, score * repetition_penalty,
                                        score / repetition_penalty)
                    logits.scatter_(0, generated, score)

            if not settings.getboolean('top-p-first'):
                logits = top_k_top_p_filtering(logits,
                                               top_k=top_k,
                                               top_p=top_p)

            if temperature == 0:  # greedy sampling:
                next_token = torch.argmax(logits, dim=-1).unsqueeze(-1)
            else:
                next_token = torch.multinomial(F.softmax(logits, dim=-1),
                                               num_samples=1)
            generated = torch.cat((generated, next_token), dim=-1)
            # Decode into plain text
            o = generated[len(context_tokens):].tolist()
            generated.text = tokenizer.decode(
                o,
                clean_up_tokenization_spaces=False,
                skip_special_tokens=True)
            if use_ptoolkit():
                clear_lines(clines)
                generated.text = format_result(generated.text)
                clines = output(generated.text, "ai-text")
            if ((stop_tokens is not None) and (j > 4)
                    and (next_token[0] in stop_tokens)):
                # Why the minimum tokens, j>X. Because sometimes the models starts with whitespace, which will strip away anyway. Having a minimum amount of tokens before we stop usually means we don't just stop because of "\n " or similar
                logger.debug(
                    "Stopping generation as we found stop tokens. One of `%s`, in '%s'. token generated `%s`",
                    stop_tokens,
                    next_token,
                    j,
                )
                break
    clear_lines(clines)
    return generated