def test_component_similarity(doc): s2v = Sense2VecComponent(doc.vocab, shape=(4, 4)) s2v.first_run = False vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32) s2v.s2v.add("hello|INTJ", vector) s2v.s2v.add("world|NOUN", vector) doc = s2v(doc) assert doc[0]._.s2v_similarity(doc[1]) == 1.0 assert doc[1:3]._.s2v_similarity(doc[1:3]) == 1.0
def test_component_to_from_bytes(doc): s2v = Sense2VecComponent(doc.vocab, shape=(1, 4)) s2v.first_run = False vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32) s2v.s2v.add("world|NOUN", vector) assert "world|NOUN" in s2v.s2v assert "world|GPE" not in s2v.s2v doc = s2v(doc) assert doc[0]._.in_s2v is False assert doc[1]._.in_s2v is True s2v_bytes = s2v.to_bytes() new_s2v = Sense2VecComponent(doc.vocab).from_bytes(s2v_bytes) new_s2v.first_run = False assert "world|NOUN" in new_s2v.s2v assert numpy.array_equal(new_s2v.s2v["world|NOUN"], vector) assert "world|GPE" not in new_s2v.s2v new_s2v.s2v.vectors.resize((2, 4)) new_s2v.s2v.add("hello|INTJ", vector) assert doc[0]._.in_s2v is False new_doc = new_s2v(doc) assert new_doc[0]._.in_s2v is True
def test_component_lemmatize(doc): lookups = doc.vocab.lookups.add_table("lemma_lookup") lookups["world"] = "wrld" s2v = Sense2VecComponent(doc.vocab, shape=(4, 4), lemmatize=True) s2v.first_run = False vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32) s2v.s2v.add("hello|INTJ", vector) s2v.s2v.add("world|NOUN", vector) s2v.s2v.add("wrld|NOUN", vector) doc = s2v(doc) assert doc[0]._.s2v_key == "hello|INTJ" assert doc[1].lemma_ == "wrld" assert doc[1]._.s2v_key == "wrld|NOUN" lookups["hello"] = "hll" assert doc[0].lemma_ == "hll" assert doc[0]._.s2v_key == "hello|INTJ" s2v.s2v.add("hll|INTJ", vector) assert doc[0]._.s2v_key == "hll|INTJ" new_s2v = Sense2VecComponent().from_bytes(s2v.to_bytes()) assert new_s2v.s2v.cfg["lemmatize"] is True doc.vocab.lookups.remove_table("lemma_lookup")
def test_component_attributes(doc): s2v = Sense2VecComponent(doc.vocab, shape=(10, 4)) vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32) s2v.s2v.add("world|NOUN", vector, 123) doc = s2v(doc) assert doc[0]._.s2v_key == "hello|INTJ" assert doc[1]._.s2v_key == "world|NOUN" assert doc[0]._.in_s2v is False assert doc[1]._.in_s2v is True assert doc[0]._.s2v_freq is None assert doc[1]._.s2v_freq == 123 assert numpy.array_equal(doc[1]._.s2v_vec, vector)
def test_component_attributes_ents(doc): s2v = Sense2VecComponent(doc.vocab, shape=(10, 4)) s2v.first_run = False vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32) s2v.s2v.add("world|NOUN", vector) s2v.s2v.add("world|GPE", vector) doc = s2v(doc) assert len(doc._.s2v_phrases) == 0 doc.ents = [Span(doc, 1, 2, label="GPE")] assert len(doc._.s2v_phrases) == 1 phrase = doc._.s2v_phrases[0] assert phrase._.s2v_key == "world|GPE" assert phrase[0]._.s2v_key == "world|NOUN" assert phrase._.in_s2v is True assert phrase[0]._.in_s2v is True
def test_component_lemmatize(doc): def lemmatize(doc, lookups): for token in doc: token.lemma_ = lookups.get(token.text, token.text) return doc s2v = Sense2VecComponent(doc.vocab, shape=(4, 4), lemmatize=True) s2v.first_run = False vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32) s2v.s2v.add("hello|INTJ", vector) s2v.s2v.add("world|NOUN", vector) s2v.s2v.add("wrld|NOUN", vector) doc = lemmatize(doc, {"world": "wrld"}) doc = s2v(doc) assert doc[0]._.s2v_key == "hello|INTJ" assert doc[1].lemma_ == "wrld" assert doc[1]._.s2v_key == "wrld|NOUN" doc = lemmatize(doc, {"hello": "hll"}) assert doc[0].lemma_ == "hll" assert doc[0]._.s2v_key == "hello|INTJ" s2v.s2v.add("hll|INTJ", vector) assert doc[0]._.s2v_key == "hll|INTJ" new_s2v = Sense2VecComponent().from_bytes(s2v.to_bytes()) assert new_s2v.s2v.cfg["lemmatize"] is True
def __init__(self): self.nlp = spacy.load("en_core_web_lg") s2v = Sense2VecComponent('/path/to/reddit_vectors-1.1.0') self.nlp.add_pipe(s2v) self.elements = {} with open('ai_chatbot/scripts/QuestionDomain/csv/TAG_ELEMENT.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: self.elements[row[0]] = row[1:-1] self.ordinal = {} with open('ai_chatbot/scripts/QuestionDomain/csv/TAG_ORDINAL.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: self.ordinal[row[0]] = int(row[1])
import spacy import numpy as np import sense2vec from sense2vec import Sense2VecComponent nlp = spacy.load('en_core_web_lg') s2v = Sense2VecComponent() nlp.add_pipe(s2v) doc = nlp("A sentence about natural language processing.") most_similar = doc[3:6]._.s2v_most_similar(3) print(most_similar)
import spacy from spacy.matcher import Matcher from spacy.tokens import Doc, Span from sense2vec import Sense2VecComponent nlp = spacy.load("en_core_web_lg") s2v = Sense2VecComponent(nlp.vocab).from_disk("C:/fyp/s2v_reddit_2019_lg") nlp.add_pipe(s2v) with open("api/v0/v01/input_list/input_list.txt", "r", encoding="utf-8") as f: TEXT = f.read() doc = nlp(TEXT) def list_of_files(doc): #find all first letters in the doc and create only unique a list of unique text file names to use text_files = list() for token in doc: if token.text.isalpha(): text_files.append("text" + (str(token.text[0])).lower() + ".txt") return set(text_files) def find_all_begining_with(doc): #take your entire vocab and find all words in it that begin with the same letter that each of the text_files (created in list_of_files) end with # ..and create each text file and populate it with its own vocab with open("api/v0/v01/vocab/vocab.txt", "r", encoding="utf-8") as f: vocab_list = list(f.readlines()) #vocab_list =["ant", "apple", "arch", "arm", "army", "baby", "bag", "ball", "band", "basin", "basket", "bath", "bed", "bee", "bell", "berry", "bird", "blade", "board", "boat", "bone", "book", "boot", "bottle", "box", "boy", "brain", "brake", "branch", "brick", "bridge", "brush", "bucket", "bulb", "button", "cake", "camera","card", "cart", "carriage", "cat", "chain", "cheese", "chest", "chin", "church", "circle", "clock", "cloud", "coat", "collar", "comb", "cord", "cow", "cup", "curtain", "cushion", "dog", "door", "drain", "drawer", "dress", "drop", "ear", "egg", "engine", "eye", "face", "farm", "feather", "finger", "fish", "flag", "floor", "fly", "foot", "fork", "fowl", "frame", "garden", "girl", "glove", "goat", "gun", "hair", "hammer", "hand", "hat", "head", "heart", "hook", "horn", "horse", "hospital", "house", "island", "jewel", "kettle", "key", "knee", "knife", "knot", "leaf", "leg", "library", "line", "lip", "lock", "map", "match", "monkey", "moon", "mouth", "muscle", "nail", "neck", "needle", "nerve", "net", "nose", "nut", "office", "orange", "oven", "parcel", "pen", "pencil", "picture", "pig", "pin", "pipe", "plane", "plate", "plough", "pocket", "pot", "potato", "prison", "pump", "rail", "rat", "receipt", "ring", "rod", "roof", "root", "sail", "school", "scissors", "screw", "seed", "sheep", "shelf", "ship", "shoe", "skin", "snake", "sock", "spade", "sponge", "spoon", "spring", "square", "stamp", "star", "station", "stem", "stick", "stocking", "stomach", "store", "street", "sun", "table", "tail", "thread", "throat", "thumb", "ticket", "toe", "tongue", "tooth", "town", "train", "tray", "tree", "trousers", "umbrella", "wall", "watch", "wheel", "whip", "whistle", "window", "wing", "wire", "worm"] text_files = list_of_files(doc) for file in text_files: with open(("api/v0/v01/textfiles/" + file), "w+",
for line in f: values = line.split() token = values[0] vector = np.asarray(values[1:], "float32") embeddings_dict[token] = vector #Loading Word2Vec Embeddings model = gensim.models.KeyedVectors.load_word2vec_format('/content/drive/My Drive/GoogleNews-vectors-negative300.bin.gz', binary=True) norm_model = gensim.models.KeyedVectors.load_word2vec_format('/content/drive/My Drive/GoogleNews-vectors-negative300.bin.gz', binary=True) norm_model.init_sims(replace=True) #Loading Sense Embeddings nlp = en_core_web_lg.load() s2v = Sense2VecComponent(nlp.vocab).from_disk("/content/drive/My Drive/s2v_reddit_2019_lg") nlp.add_pipe(s2v) #Load training data df = pd.read_csv('train_tsv.tsv', sep='\t', names=["is_duplicate", "question1", "question2", "id"]) df = df.set_index('id') df = df[(df['question1'].isna() == False) & (df['question2'].isna() == False)] training_data = list(df['question1'])+list(df['question2']) training_data = [preprocessing_pipeline(i) for i in training_data] tfidf_vectorizer = tfidf(training_data) vectorizer_bow = bag_of_words(training_data, 0) vectorizer_ngram = bag_of_words(training_data, 1) vectorizer_3gram = trigram(training_data)
noun_chunks_df.loc[i, 'root'] = chunk.root, noun_chunks_df.loc[i, 'root.text'] = chunk.root.text, noun_chunks_df.loc[i, 'root.dep_'] = chunk.root.dep_ noun_chunks_df.loc[i, 'root.head.text'] = chunk.root.head.text print(noun_chunks_df[:20]) nlp = spacy.load('en_core_web_sm', disable_pipes=["tagger", "ner"]) train_df['parsed'] = train_df.Text[49500:50500].apply(nlp) corpus = st.CorpusFromParsedDocuments(train_df[49500:50500], category_col='Score', parsed_col='parsed').build() from sense2vec.vectors import VectorMap s2v = Sense2VecComponent('data/reddit_vectors-1.1.0/reddit_vectors-1.1.0') spacy_tok.add_pipe(s2v) doc = spacy_tok(u"dessert.") freq = doc[0]._.s2v_freq vector = doc[0]._.s2v_vec most_similar = doc[0]._.s2v_most_similar(5) print(most_similar, freq) doc = spacy_tok(u"burger") most_similar = doc[0]._.s2v_most_similar(4) print(most_similar) train_df['tuples'] = train_df.apply(lambda row: (row['Text'], row['Score']), axis=1) train = train_df['tuples'].tolist() print(train[:1])
def tokenize(self): if self.texts is None: if not self.context: # Read in text data from text_file path self.texts = open(self.text_file).read().split('\n') self.texts = [str(t) for t in self.texts] print('Made texts') else: filename, file_ext = os.path.splitext(self.text_file) if file_ext == '.json': # Read in json data as dataframe # noinspection PyUnresolvedReferences df = pd.read_json(self.text_file, lines=True) else: # Read in tabular data as dataframe # noinspection PyUnresolvedReferences df = pd.read_csv(self.text_file, sep=self.sep, usecols=self.use_cols) # Extract the text text_col_name = self.use_cols[0] self.texts = df[text_col_name].values.astype(str).tolist() # Small memory reduction by deleting this del df[text_col_name] self.context_df = df # Get number of documents supplied self.n_docs = len(self.texts) # Init data as a bunch of zeros - shape [n_docs, max_len] self.data = np.zeros((self.n_docs, self.max_len), dtype=np.uint64) if not self.tokenizing_new: # Add the skip token to the vocab, creating a unique hash for it self.nlp.vocab.strings.add(self.skip_token) self.skip_token = self.nlp.vocab.strings[self.skip_token] self.data[:] = self.skip_token # Make array to store row numbers of documents that must be deleted self.purged_docs = [] # This array will hold tokenized text data if it is asked for if self.save_tokenized_text_data: self.text_data = [] if self.tokenize_sents: self.sentence_tokenize() return # If we want to merge phrases, we add s2v component # to our pipe and it will do it for us. if self.merge: s2v = Sense2VecComponent('reddit_vectors-1.1.0') self.nlp.add_pip(s2v) for i, doc in enumerate( self.nlp.pipe(self.texts, n_threads=self.n_threads, batch_size=10000)): # noinspection PyBroadException try: # Create temp list for holding doc text if self.save_tokenized_text_data: doc_text = [] for token in doc: # TODO - determine if you want to leave spaces or replace with underscores # Replaces spaces between phrases with underscore # text = token.text.replace(" ", "_") # Get the string token for the given token type if self.token_type == 'lower': _token = token.lower_ elif self.token_type == 'lemma': _token = token.lemma_ else: _token = token.orth_ # Add token to spacy string list so we can use oov as known hash tokens if token.is_oov: self.nlp.vocab.strings.add(_token) if self.save_tokenized_text_data: doc_text.append(_token) if self.save_tokenized_text_data: self.text_data.append(doc_text) # Options for how to tokenize if self.token_type == 'lower': dat = doc.to_array([ LOWER, LIKE_EMAIL, LIKE_URL, IS_OOV, IS_PUNCT, IS_ALPHA ]) elif self.token_type == 'lemma': dat = doc.to_array([ LEMMA, LIKE_EMAIL, LIKE_URL, IS_OOV, IS_PUNCT, IS_ALPHA ]) else: dat = doc.to_array([ ORTH, LIKE_EMAIL, LIKE_URL, IS_OOV, IS_PUNCT, IS_ALPHA ]) if len(dat) > 0: assert dat.min( ) >= 0, 'Negative indices reserved for special tokens' if self.skip_oov: # Get indices of email, URL and oov tokens idx = (dat[:, 1] > 0) | (dat[:, 2] > 0) | (dat[:, 3] > 0) else: # Get indices of email and URL tokens idx = (dat[:, 1] > 0) | (dat[:, 2] > 0) # Replace email and URL tokens with skip token dat[idx] = self.skip_token # Delete punctuation if self.delete_punc: delete = np.where(dat[:, 4] == 1) dat = np.delete(dat, delete, 0) if self.only_keep_alpha: delete = np.where(dat[:, 5] == 0) dat = np.delete(dat, delete, 0) length = min(len(dat), self.max_len) self.data[i, :length] = dat[:length, 0].ravel() except Exception: exc_type, exc_obj, exc_tb = sys.exc_info() filename = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print('\n\n') print(exc_type, filename, exc_tb.tb_lineno) self.purged_docs.append(i) continue # If necessary, delete documents that failed to tokenize correctly. self.data = np.delete(self.data, self.purged_docs, 0).astype(np.uint64) # Unique tokens self.uniques = np.unique(self.data) # Saved Spacy Vocab self.vocab = self.nlp.vocab # Making an idx to word mapping for vocab self.hash_to_word = {} # Insert padding id into the hash self.hash_to_word[self.skip_token] = '<SKIP>' # If lemma, insert pronoun ID into the hash if self.token_type == 'lemma': self.hash_to_word[self.nlp.vocab.strings['-PRON-']] = '-PRON-' for v in self.uniques: if v != self.skip_token: # noinspection PyPep8,PyBroadException try: if self.token_type == 'lower': self.hash_to_word[v] = self.nlp.vocab[v].lower_ elif self.token_type == 'lemma': self.hash_to_word[v] = self.nlp.vocab[v].lemma_ else: self.hash_to_word[v] = self.nlp.vocab[v].orth_ except Exception: pass
def tokenize(self): # This is here in case we want to tokenize more documents later if self.texts == None: if self.context == False: # Read in text data from textfile path self.texts = open(self.textfile).read().split('\n') self.texts = [str(t) for t in self.texts] print("made texts") else: filename, file_extension = os.path.splitext(self.textfile) if file_extension == ".json": # Read in json data as dataframe df = pd.read_json(self.textfile, lines=True) else: # Read in data as dataframe df = pd.read_csv(self.textfile, sep=self.sep, usecols=self.usecols) # Extract the text text_col_name = self.usecols[0] self.texts = df[text_col_name].values.astype(str).tolist() # Small memory reduction by deleting this del df[text_col_name] self.context_df = df # Get number of documents supplied self.num_docs = len(self.texts) # Init data as a bunch of zeros - shape [num_texts, max_length] self.data = np.zeros((len(self.texts), self.max_length), dtype=np.uint64) if not self.tokenizing_new: # Add the skip token to the vocab, creating a unique hash for it self.nlp.vocab.strings.add(self.skip) self.skip = self.nlp.vocab.strings[self.skip] self.data[:] = self.skip # Make array to store row numbers of documents that must be deleted self.purged_docs = [] # This array will hold tokenized text data if it is asked for if self.save_tokenized_text_data: self.text_data = [] if self.tokenize_sentences: self.sentence_tokenize() return # If we want to merge phrases, we add s2v component # to our pipe and it will do it for us. if self.merge: s2v = Sense2VecComponent('reddit_vectors-1.1.0') self.nlp.add_pipe(s2v) for row, doc in enumerate( self.nlp.pipe(self.texts, n_threads=self.num_threads, batch_size=10000)): try: # Create temp list for holding doc text if self.save_tokenized_text_data: doc_text = [] # Loop through tokens in doc for token in doc: # TODO - determine if you want to leave spaces or replace with underscores # Replaces spaces between phrases with underscore # text = token.text.replace(" ", "_") # Get the string token for the given token type if self.token_type == "lower": _token = token.lower_ elif self.token_type == "lemma": _token = token.lemma_ else: _token = token.orth_ # Add token to spacy string list so we can use oov as known hash tokens if token.is_oov: self.nlp.vocab.strings.add(_token) if self.save_tokenized_text_data: doc_text.append(_token) if self.save_tokenized_text_data: self.text_data.append(doc_text) # Options for how to tokenize if self.token_type == "lower": dat = doc.to_array([ LOWER, LIKE_EMAIL, LIKE_URL, IS_OOV, IS_PUNCT, IS_ALPHA ]) elif self.token_type == "lemma": dat = doc.to_array([ LEMMA, LIKE_EMAIL, LIKE_URL, IS_OOV, IS_PUNCT, IS_ALPHA ]) else: dat = doc.to_array([ ORTH, LIKE_EMAIL, LIKE_URL, IS_OOV, IS_PUNCT, IS_ALPHA ]) if len(dat) > 0: msg = "Negative indices reserved for special tokens" assert dat.min() >= 0, msg if self.skip_oov: # Get Indexes of email and URL and oov tokens idx = (dat[:, 1] > 0) | (dat[:, 2] > 0) | (dat[:, 3] > 0) else: # Get Indexes of email and URL tokens idx = (dat[:, 1] > 0) | (dat[:, 2] > 0) # Replace email and URL tokens with skip token dat[idx] = self.skip # Delete punctuation if self.delete_punctuation: delete = np.where(dat[:, 4] == 1) dat = np.delete(dat, delete, 0) if self.only_keep_alpha == True: delete = np.where(dat[:, 5] == 0) dat = np.delete(dat, delete, 0) length = min(len(dat), self.max_length) self.data[row, :length] = dat[:length, 0].ravel() except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print("\n\n") print(exc_type, fname, exc_tb.tb_lineno) # print("Warning! Document", row, "broke, likely due to spaCy merge issues.\nMore info at their github, issues #1547 and #1474") self.purged_docs.append(row) continue # If necessary, delete documents that failed to tokenize correctly. self.data = np.delete(self.data, self.purged_docs, 0).astype(np.uint64) # Unique tokens self.uniques = np.unique(self.data) # Saved Spacy Vocab self.vocab = self.nlp.vocab # Making an idx to word mapping for vocab self.hash_to_word = {} # Manually putting in this hash for the padding ID self.hash_to_word[self.skip] = '<SKIP>' # If lemma, manually put in hash for the pronoun ID if self.token_type == "lemma": self.hash_to_word[self.nlp.vocab.strings["-PRON-"]] = "-PRON-" for v in self.uniques: if v != self.skip: try: if self.token_type == "lower": self.hash_to_word[v] = self.nlp.vocab[v].lower_ elif self.token_type == "lemma": self.hash_to_word[v] = self.nlp.vocab[v].lemma_ else: self.hash_to_word[v] = self.nlp.vocab[v].orth_ except: pass
import nltk, spacy from nltk.tokenize import sent_tokenize from sense2vec import Sense2VecComponent nltk.download('punkt') # Resources for determining similarity: Spacy, sense2vec s2v_path = "D:\\Programs\\Python37x64\\nlp_config\\s2v_reddit_2015_md" spacy_lg_path = 'D:\\Programs\\Python37x64\\nlp_config\\venv\\Lib\\site-packages\\en_core_web_lg\\en_core_web_lg-2.2.5' nlp = spacy.load(spacy_lg_path) s2v = Sense2VecComponent(nlp.vocab).from_disk(s2v_path) nlp.add_pipe(s2v) seeds = {} seeds['food']="food drink" seeds['atms']= "atmosphere place environment" seeds['serv']= "server management time" seeds['prce']= "money expensive" def avg(my_list): if len(my_list) == 0: return 0 return sum(my_list) / len(my_list) def get_noun_toks(doc): return [tok for tok in doc if tok.tag_.startswith('N')] def calculate_similarity(noun, seed): if not seed.has_vector or not noun.has_vector:
predargs = True #use standard python randomizer import random #glob needed to find files import glob #nltk import nltk #sense2vec in combination with Spacy import spacy from sense2vec import Sense2VecComponent spacynlp = spacy.load('en') s2v = Sense2VecComponent('C:/Python27/ReqAnalyzing/reddit_vectors-1.1.0') spacynlp.add_pipe(s2v) #make deepcopy available import copy #grammar check import grammar_check tool = grammar_check.LanguageTool('en-GB') ''' Construct requirements starts here Proposed order. 1. Pick verb from allparts list 2. Check which frames/arguments are needed with verbnet 3. Find a semantically matching argument pair in allparts files
def __init__(self, model_name, sense2vec_path): self.nlp = spacy.load(model_name) s2v = Sense2VecComponent(self.nlp.vocab).from_disk(sense2vec_path) self.nlp.add(s2v)
import spacy from dataclasses import dataclass from fastapi import FastAPI, HTTPException from pydantic import BaseModel, root_validator from sense2vec import Sense2VecComponent from starlette.responses import Response from starlette.status import HTTP_204_NO_CONTENT app: FastAPI = FastAPI() model: str = os.getenv('SPACY_MODEL') pipeline_error: str = f"The model ({model}) doesn't support " + '{}.' nlp: spacy = spacy.load(model) if os.getenv('SENSE2VEC') == '1': nlp.add_pipe( Sense2VecComponent(nlp.vocab).from_disk('src/s2v_old') ) def enforce_components(components: List[str], message: str) -> None: """Throws the <message> if the model doesn't have the <components>.""" for component in components: if not nlp.has_pipe(component): raise HTTPException( status_code=400, detail=pipeline_error.format(message) ) class NERRequest(BaseModel): sections: List[str]
from pipelines import pipeline from text2text.text_generator import TextGenerator import nltk from nltk.stem.porter import * import spacy from sense2vec import Sense2VecComponent spacy_nlp = spacy.load("en_core_web_sm") s2v = Sense2VecComponent(spacy_nlp.vocab).from_disk("./s2v_old") spacy_nlp.add_pipe(s2v) t5_generator = pipeline("question-generation") t2t_generator = TextGenerator(output_type="question") def generate_from_T5(context, n=5): res = t5_generator(context) ans = [] que = [] for i, r in enumerate(res): if i < n: ans.append(r['answer']) que.append(r['question']) return que, ans def generate_from_t2t(context, n=5): res = t2t_generator.predict([context] * n) ans = [] que = [] for r in res: