def predict(query: str): if query is None or len(query) == 0: return {'success': False, 'message': 'query is required'} try: sentence = Sentence(query) TaggerModel.load_model() TaggerModel._model.predict(sentence) temp = defaultdict(list) for entity in sentence.to_dict(tag_type='ner-fast').get('entities'): temp[entity['text']].extend(entity['labels']) for entity in sentence.to_dict(tag_type='ner-ontonotes-fast').get('entities'): temp[entity['text']].extend(entity['labels']) ner_entities = [{'text': text, 'labels': label} for text, label in temp.items()] entities = list() for item in ner_entities: entity = dict() entity['text'] = item['text'] labels = TaggerModel.map_and_merge_labels( [label.to_dict() for label in item['labels']], ['PERSON', 'MISC', 'DATE'] ) entity['labels'] = labels if len(labels) > 0: entities.append(entity) return {'success': True, 'data': entities} except RuntimeError as e: logging.error(e, exc_info=True) return {'success': False, 'message': "Runtime Error: {0}".format(e)} except Exception as e: logging.error(e, exc_info=True) return {'success': False, 'message': 'exception occurred'}
def test_sentence_to_dict(): sentence = Sentence( 'Zalando Research is located in Berlin, the capital of Germany.', labels=['business'], use_tokenizer=True) sentence[0].add_tag('ner', 'B-ORG') sentence[1].add_tag('ner', 'E-ORG') sentence[5].add_tag('ner', 'S-LOC') sentence[10].add_tag('ner', 'S-LOC') dict = sentence.to_dict('ner') assert ('Zalando Research is located in Berlin, the capital of Germany.' == dict['text']) assert ('Zalando Research' == dict['entities'][0]['text']) assert ('Berlin' == dict['entities'][1]['text']) assert ('Germany' == dict['entities'][2]['text']) assert (1 == len(dict['labels'])) sentence = Sentence( 'Facebook, Inc. is a company, and Google is one as well.', use_tokenizer=True) sentence[0].add_tag('ner', 'B-ORG') sentence[1].add_tag('ner', 'I-ORG') sentence[2].add_tag('ner', 'E-ORG') sentence[8].add_tag('ner', 'S-ORG') dict = sentence.to_dict('ner') assert ('Facebook, Inc. is a company, and Google is one as well.' == dict['text']) assert ('Facebook, Inc.' == dict['entities'][0]['text']) assert ('Google' == dict['entities'][1]['text']) assert (0 == len(dict['labels']))
def predict(sentence): """ Predict the sentiment of a sentence """ if sentence == "": return 0 text = Sentence(sentence) # stacked_embeddings.embed(text) classifier.predict(text) value = text.labels[0].to_dict()['value'] if value == 'POSITIVE': result = text.to_dict()['labels'][0]['confidence'] else: result = -(text.to_dict()['labels'][0]['confidence']) return round(result, 3)
def entity_recognition(text): #print('inside entity recognition') if isinstance(text, str): doc = text else: #print(type(text)) doc = '' s = Sentence(doc.title()) model.predict(s) #print('model') a = s.to_dict(tag_type = 'ner') #print(a) b = a['entities'][0] #print('inside entity recognition2') if len(b) > 0: #print('in if') #origText = b[0]['text'] #print(b['labels'][0].to_dict()) entity = b['labels'][0].to_dict()['value'] #print(entity) confidence = round(b['labels'][0].to_dict()['confidence'],2) #print(confidence) else: #print('in else1') #origText = b[0]['text'] entity = '' confidence = '' #print('inside entity recognition3') return entity, confidence
def Predict_textfile(self, textfile, is_path=False, path=""): if is_path == False: if self.checkpoint_download == False: path = self.checkpoint_path print("Checkpoint File already present") else: if not os.path.exists(self.download_dir + "/resources"): os.mkdir(self.download_dir + "/resources") if not os.path.exists(self.download_dir + "/resources/tagger"): os.mkdir(self.download_dir + "/resources/tagger") if not os.path.exists(self.download_dir + "/resources/tagger/example-ner"): os.mkdir(self.download_dir + "/resources/tagger/example-ner") print("Checkpoint File will be downloaded from ....") download_file_from_google_drive(self.google_id, self.checkpoint_path) print("Checkpoint Downloaded successfully") path = self.checkpoint_path self.checkpoint_download = False tagger = SequenceTagger.load(path) dest_path = textfile[:-4] + "__NER.txt" out_f = open(dest_path, "w") with open(textfile, "r") as f: for i, line in enumerate(f): sentence = Sentence(line) tagger.predict(sentence) for word in sentence.to_dict(tag_type='ner')["entities"]: out_f.write(word['text'] + "\t" + word['type'] + "\n") out_f.write("\n")
def extract_entities_flair(sentences: List[str]): result = list() for sentence in sentences: sentence = Sentence(sentence) ner.predict(sentence) line_result = sentence.to_dict(tag_type="ner") cache = dict() dedup = list() for entity in line_result["entities"]: existence = cache.get(entity["text"], False) if not existence: dedup.append({ "word": entity["text"], "entity": entity["labels"][0].value, "start": entity["start_pos"], "end": entity["end_pos"], }) cache[entity["text"]] = True result.append(dedup) return result
def get_flair_entities(input, score_threshold=0.9): sentence = Sentence(input, use_tokenizer=True) model.predict(sentence) # refactor flair output entities = [] prev_end_pos = 0 prev_entity_part = '' for entity in sentence.to_dict(tag_type='ner')['entities']: if entity['labels'][0]._score < score_threshold: continue logger.info('flair entity detected: ' + str(entity)) if prev_end_pos + 1 == entity['start_pos']: del entities[-1] final_entity = prev_entity_part + ' ' + entity['text'] else: final_entity = entity['text'] entities.append(final_entity.strip()) prev_end_pos = entity['end_pos'] prev_entity_part += ' ' + entity['text'] return entities
def tag_entities(text): sentences = sent_tokenize(text) output = [] for s in sentences: s = Sentence(s) tagger.predict(s) output.append(s.to_dict(tag_type='ner')) return output
def Resume(text): text = ' '.join( [line.replace('\t', ' ') for line in text.split('\n') if line]) data = { 'content': text, 'person': { 'count': 0, 'source': [], }, 'location': { 'count': 0, 'source': [] }, 'organization': { 'count': 0, 'source': [] }, 'emails': { 'count': 0, 'source': [] }, 'mobiles': { 'count': 0, 'source': [] } } emails = find_emails(text) mobiles = mob_num_extractor(text) persons = extract_name(text) #print(persons) for email in emails: data["emails"]["count"] += 1 data["emails"]["source"].append(email) for mobile in mobiles: data["mobiles"]["count"] += 1 data["mobiles"]["source"].append(mobile) for person in persons: data["person"]["count"] += 1 data["person"]["source"].append(person) sentence = Sentence(text) model = SequenceTagger.load( '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt') model.predict(sentence) d = sentence.to_dict(tag_type='ner') for item in d['entities']: if item["type"] == "PER" and item['confidence'] > 0.70: data["person"]["count"] += 1 data["person"]["source"].append(item) elif item["type"] == "LOC" and item['confidence'] > 0.60: data["location"]["count"] += 1 data["location"]["source"].append(item) elif item["type"] == "ORG" and item['confidence'] > 0.95: data["organization"]["count"] += 1 data["organization"]["source"].append(item) return data
def do_NER(context): s = Sentence(context) NERmodel.predict(s) raw = s.to_dict(tag_type='ner') answers = [] for item in raw['entities']: answers.append(item['text']) if not answers: answers = get_key_words(context) return list(map(lambda x: x.capitalize(), list(set(map(lambda x: x.lower(), answers)))))
def get_score(text): # create example sentence sentence = Sentence(text, use_tokenizer=japanese_tokenizer) # predict class and print classifier.predict(sentence) label_dict = sentence.to_dict()["labels"][0] return label_dict["confidence"] if label_dict[ "value"] == "__label__O" else 0
def predict(self, sentences): mentions = [] for sent_idx, sent in enumerate(sentences): sent = Sentence(sent, use_tokenizer=True) self.model.predict(sent) sent_mentions = sent.to_dict(tag_type="ner")["entities"] for mention in sent_mentions: mention["sent_idx"] = sent_idx mentions.extend(sent_mentions) return {"sentences": sentences, "mentions": mentions}
def ListParser(text): data = { 'content': text, 'person': { 'count': 0, 'source': [], }, 'location': { 'count': 0, 'source': [] }, 'organization': { 'count': 0, 'source': [] }, 'emails': { 'count': 0, 'source': [] }, 'mobiles': { 'count': 0, 'source': [] } } arra = "" for line in text.split('\n'): line = line.split('\t') if len(line) > 1: arra += (" ".join(line) + ".\n") emails = find_emails(text) mobiles = mob_num_extractor(text) for email in emails: data["emails"]["count"] += 1 data["emails"]["source"].append(email) for mobile in mobiles: data["mobiles"]["count"] += 1 data["mobiles"]["source"].append(mobile) sentence = Sentence(arra) model = SequenceTagger.load( '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt') model.predict(sentence) d = sentence.to_dict(tag_type='ner') for item in d['entities']: if item["type"] == "PER" and item['confidence'] > 0.90: data["person"]["count"] += 1 data["person"]["source"].append(item) elif item["type"] == "LOC" and item['confidence'] > 0.90: data["location"]["count"] += 1 data["location"]["source"].append(item) elif item["type"] == "ORG" and item['confidence'] > 0.90: data["organization"]["count"] += 1 data["organization"]["source"].append(item) return data
def test_sentence_to_dict(): sentence = Sentence( "Zalando Research is located in Berlin, the capital of Germany.", labels=["business"], use_tokenizer=segtok_tokenizer, ) # bioes tags sentence[0].add_tag("ner", "B-ORG") sentence[1].add_tag("ner", "E-ORG") sentence[5].add_tag("ner", "S-LOC") sentence[10].add_tag("ner", "S-LOC") dict = sentence.to_dict("ner") assert ( "Zalando Research is located in Berlin, the capital of Germany." == dict["text"] ) assert "Zalando Research" == dict["entities"][0]["text"] assert "Berlin" == dict["entities"][1]["text"] assert "Germany" == dict["entities"][2]["text"] assert 1 == len(dict["labels"]) sentence = Sentence( "Facebook, Inc. is a company, and Google is one as well.", use_tokenizer=segtok_tokenizer, ) # bioes tags sentence[0].add_tag("ner", "B-ORG") sentence[1].add_tag("ner", "I-ORG") sentence[2].add_tag("ner", "E-ORG") sentence[8].add_tag("ner", "S-ORG") dict = sentence.to_dict("ner") assert "Facebook, Inc. is a company, and Google is one as well." == dict["text"] assert "Facebook, Inc." == dict["entities"][0]["text"] assert "Google" == dict["entities"][1]["text"] assert 0 == len(dict["labels"])
def posTagFinder(self): _it = 0 for _it in range(1500): item = random.choice(self.data) temp = Sentence(item['sentence']) self.tagger.predict(temp) _dict = temp.to_dict(tag_type='pos') self.postags.extend([ sample['type'] for sample in _dict['entities'] if sample['type'] not in self.postags ]) self.postags = list(set(self.postags))
def predict(model, selected_embeddings, data_file): """ takes data in a form text, post_id, and saves both those plus prediction results in the out file """ selected_embeddings_text = [ key for key in selected_embeddings if selected_embeddings[key] ] selected_embeddings_text = '_'.join(selected_embeddings_text) print(selected_embeddings_text) model_dir = 'resources/taggers/CADECglove_char_flair' # # load the model you trained model = SequenceTagger.load(model_dir + '/best-model.pt') line_counts = 0 with bz2.open(f_in, 'rt') as f: with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug: with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis: header = "post_ID,matched,score,start_pos,end_pos\n" f_dis.write(header) f_drug.write(header) for line in tqdm(f, total=get_num_lines(f_in)): if len(line) > 0: line_dict = process_txt(line) line_counts += 1 body = line_dict['text'] tweet_id = line_dict['id'] sentence = Sentence(str(body)) # print (r) # # predict tags and print model.predict(sentence) res = sentence.to_dict(tag_type='ner') for el in res['entities']: if el['type'] == 'DIS': f_dis.write(tweet_id+',"'+\ el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n') elif el['type'] == 'DRUG': f_drug.write(tweet_id+',"'+\ el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
def flair12NER(title, text): s = Sentence(text) flair12class.predict(s) entities = s.to_dict(tag_type="ner") sentences = getSpaCySentences(entities["text"]) vertexSet = getDocREDVertexSetFromFlairEntities(entities["entities"], sentences) docREDDocumentObject = { "vertexSet": vertexSet, "title": title, "sents": [[word.text for word in sentence] for sentence in sentences] } return docREDDocumentObject
def tag_instance_using_flair(target_tagger, ner_tagger, pos_tagger, instance): print('processing:', instance[0]) instance = instance[1] conclusion = instance['_claim'] claims = list(instance['_argument_sentences'].values()) #predict targets... conclusion_sent = Sentence(conclusion) claims_sents = list(map(lambda x: Sentence(x), claims)) target_tagger.predict([conclusion_sent] + claims_sents) ner_tagger.predict([conclusion_sent] + claims_sents) pos_tagger.predict([conclusion_sent] + claims_sents) tagged_claims = [] for i, c in enumerate(claims_sents): tagged_claims.append({ 'text': claims[i], 'pos': c.to_dict(tag_type='pos')['entities'], 'named_entities': c.to_dict(tag_type='ner')['entities'], 'targets': c.to_dict(tag_type='ct')['entities'] }) return { '_debate_id': instance['_debate_id'], 'conclusion': { 'text': conclusion, 'pos': conclusion_sent.to_dict(tag_type='pos')['entities'], 'named_entities': conclusion_sent.to_dict(tag_type='ner')['entities'], 'targets': conclusion_sent.to_dict(tag_type='ct')['entities'] }, 'claims': tagged_claims }
def find_tags(user_input,keyword): sentence = Sentence(user_input) tagger.predict(sentence) tag_dict = sentence.to_dict(tag_type='ner') tag_dict = tag_dict['entities'] tags = [] for _ in tag_dict: label=_['labels'] word = findWholeWord(keyword)(_['text']) if(word): return label[0].value return ""
def flair_ner(text, tagger): """ Tag with Flair :param text: source text to tag :param tagger: Flair initialised with tagging model :return: list of tuples (text, start, end, entity label) """ sentence = Sentence(text, use_tokenizer=True) tagger.predict(sentence) s = sentence.to_dict(tag_type="ner") ents = [(e["text"], e["start_pos"], e["end_pos"], e["type"]) for e in s["entities"]] return ents
def text_to_features(tagger, sents): if type(sents) != list: sents = nltk.sent_tokenize(sents) tokens = [] tagged_sents = [] for sent in sents: sent = Sentence(sent) tagger.predict(sent) tokens += [token.text+ u"\uFFE8" + token.tags['ct'].value for token in sent.tokens] tagged_sents.append(sent.to_dict(tag_type='ct')) return ' '.join(tokens), tagged_sents
def sifrank(): req_data = request.get_json(force=True) text = req_data['text'] sentence = Sentence(text) tagger.predict(sentence) o = sentence.to_dict(tag_type='ner') output = o['entities'] print(output) GPE = [] ORG = [] LOC = [] PERSON = [] EVENT = [] DATE = [] MONEY = [] NORP = [] #NATIONALITIES ADDITIONAL = [] for i in range(len(output)): if "GPE" in str(output[i]["labels"]): GPE.append(output[i]["text"]) elif "PERSON" in str(output[i]["labels"]): PERSON.append(output[i]["text"]) elif "ORG" in str(output[i]["labels"]): ORG.append(output[i]["text"]) elif "LOC" in str(output[i]["labels"]): LOC.append(output[i]["text"]) elif "EVENT" in str(output[i]["labels"]): EVENT.append(output[i]["text"]) elif "DATE" in str(output[i]["labels"]): DATE.append(output[i]["text"]) elif "MONEY" in str(output[i]["labels"]): MONEY.append(output[i]["text"]) elif "NORP" in str(output[i]["labels"]): NORP.append(output[i]["text"]) else: ADDITIONAL.append(output[i]["text"]) entities = { 'GPE': list(set(GPE)), 'ORG': list(set(ORG)), "PERSON": list(set(PERSON)), "EVENT": list(set(EVENT)), "DATE": list(set(DATE)), "MONEY": list(set(MONEY)), "NORP": list(set(NORP)), "LOC": list(set(LOC)), "ADDITIONAL": list(set(ADDITIONAL)) } return jsonify(entities)
def sentence_to_org(sentence): try: sentence_tokenized = Sentence(sentence) tagger.predict(sentence_tokenized) sentence_dict = sentence_tokenized.to_dict(tag_type='ner') org_names = [] for entity in sentence_dict['entities']: if entity['type'] == 'ORG': org_names.append(entity['text']) predicted_org = most_common(org_names) return predicted_org except: print("Did not found any organisations from the text")
async def getNamedEntities(body: FLAIR_NER_MODEL): text = body.text text = re.sub('[^.,a-zA-Z0-9 \n\.]', '', text) sentence = Sentence(text) tagger.predict(sentence) o = sentence.to_dict(tag_type='ner') output = o['entities'] #print(output) GPE = [] ORG = [] LOC = [] PERSON = [] EVENT = [] DATE = [] MONEY = [] NORP = [] #NATIONALITIES ADDITIONAL = [] for i in range(len(output)): if "GPE" in str(output[i]["labels"]): GPE.append(output[i]["text"]) elif "PERSON" in str(output[i]["labels"]): PERSON.append(output[i]["text"]) elif "ORG" in str(output[i]["labels"]): ORG.append(output[i]["text"]) elif "LOC" in str(output[i]["labels"]): LOC.append(output[i]["text"]) elif "EVENT" in str(output[i]["labels"]): EVENT.append(output[i]["text"]) elif "DATE" in str(output[i]["labels"]): DATE.append(output[i]["text"]) elif "MONEY" in str(output[i]["labels"]): MONEY.append(output[i]["text"]) elif "NORP" in str(output[i]["labels"]): NORP.append(output[i]["text"]) else: ADDITIONAL.append(output[i]["text"]) entities = { 'GPE': list(set(GPE)), 'ORG': list(set(ORG)), "PERSON": list(set(PERSON)), "EVENT": list(set(EVENT)), "DATE": list(set(DATE)), "MONEY": list(set(MONEY)), "NORP": list(set(NORP)), "LOC": list(set(LOC)), "ADDITIONAL": list(set(ADDITIONAL)) } return entities
def predict(model, selected_embeddings, data_file): """ takes data in a form text, post_id, and saves both those plus prediction results in the out file """ selected_embeddings_text = [ key for key in selected_embeddings if selected_embeddings[key] ] selected_embeddings_text = '_'.join(selected_embeddings_text) print(selected_embeddings_text) model_dir = 'resources/taggers/' + 'to_resume_' + model + selected_embeddings_text # load the model you trained model = SequenceTagger.load(model_dir + '/best-model.pt') data = pd.read_csv(f_in) # ,year,month,subreddit,body,clean_body,post_index print(data.head()) with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug: with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis: header = "post_ID,matched,score,start_pos,end_pos\n" f_dis.write(header) f_drug.write(header) for i, row in tqdm.tqdm(data.iterrows(), total=data.shape[0]): #r = ' '.join(eval(row['body'])) for r in eval(row['body']): sentence = Sentence(str(r)) # print (r) # # predict tags and print model.predict(sentence) res = sentence.to_dict(tag_type='ner') for el in res['entities']: if el['type'] == 'DIS': f_dis.write(row['post_ID']+',"'+\ el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n') elif el['type'] == 'DRUG': f_drug.write(row['post_ID']+',"'+\ el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n') if i == 10: break
def predict(model, predict_sentence): sentence = Sentence(predict_sentence) model.predict(sentence) print(predict_sentence) dic = sentence.to_dict(tag_type='tox') toxic_spans = [] for token in dic['entities']: label = int(token['labels'][0].value) if label == 1: start_pos = token['start_pos'] end_pos = token['end_pos'] for i in range(start_pos, end_pos): toxic_spans.append(i) return [toxic_spans, predict_sentence]
def get_ner_entities(self, text): entities = [] try: sentences = nltk.sent_tokenize(text) except: sentences = [text] print(sentences) for sent in sentences: sentence = Sentence(sent) self.ontoner_tagger.predict(sentence) sent_tags = sentence.to_dict(tag_type='ner') entities.extend(sent_tags["entities"]) return entities
def createNERFiles(statFilePath, resultTxt, tagger): if verbose: print("\tCreating named entity recognized file at: " + statFilePath) statFile = open(statFilePath, "w") try: sentence = Sentence(resultTxt) # predict NER tags tagger.predict(sentence) except RuntimeError as err: print("Runtime error: {0}".format(err)) print("Failed at: " + statFilePath) taggedStr = sentence.to_tagged_string() details = sentence.to_dict(tag_type='ner') statFile.write(taggedStr) statFile.close() return (taggedStr, details)
def test(model, selected_embeddings): selected_embeddings_text = [key for key in selected_embeddings if selected_embeddings[key]] selected_embeddings_text = '_'.join(selected_embeddings_text) print (selected_embeddings_text) model_dir = 'resources/taggers/' + model + selected_embeddings_text + '_fine-tuned7s' # load the model you trained model = SequenceTagger.load(model_dir + '/best-model.pt') sentence = Sentence("If you've been on a low calorie diet + exercise for a long time, probably you have low free T3 blood levels causing your hypo symptoms. You should ask specifically for freeT3 and freeT4 to be tested. The low conversion of T4 to T3 is your bodies way of ""protecting itself"" from any further calorie deficiet. The rest of this only matters if you do get low T3 confirmed: it is important you do not go on a T4 monotherapy, it would very likely make your situation worse because it's tricking your brain into thinking you have more then enough thyroid hormones, while your T3 deficit worsens. Either get T3 and T4 combination or no medication. Instead make sure you have enough Iodine, Selenium and Zinc in your diet and consider significantly increasing your calorie intake! It seems paradoxical but because this will eventually increase you T3 levels and basal metabolic rate it will not necessarily make you gain weight in the long term. Also dizzy spells could be low blood sugar (even if you don't who the classical symptoms of shaking/sweating.) If it is low blood sugar you need to be careful with that and make sure to get some glucose quick (both for preventing your dizzines causing accidents and also because every hypoglycemic state will stress out your metabolic system, autoamplifying the low T3)") # # predict tags and print model.predict(sentence) print(sentence.to_dict(tag_type='ner'))
def generateTextToNer(text): """ Returns a dictionary with the following keys: 'ents' contains a list of entities, 'text' contains the entire text string, and 'passToRelation' contains a list of possible combinations of two entities in a sentence. """ clean_text = normalize_corpus([text], to_lower=False, to_remove_html=False, to_remove_accent=True, to_expand_contractions=True, to_lemmatize=False, to_remove_special=False, to_remove_stopword=False) clean_text = clean_text[0] idTracker = defaultdict(int) res = {'ents': [], 'text': '', 'passToRelation': []} lst_sentences = nltk.sent_tokenize(clean_text) prevLen = 0 for s in lst_sentences: sentence = Sentence(s, use_tokenizer=True) tagger_fast.predict(sentence) dict_flair = sentence.to_dict(tag_type='ner') for idx in dict_flair['entities']: idx['id'] = idTracker[idx['text']] idTracker[idx['text']] += 1 idx['end'] = idx.pop('end_pos') idx['start'] = idx.pop('start_pos') full_label = idx.pop('labels')[0] full_label = str(full_label) idx['type'] = full_label[:full_label.find(' ')] dict_flair['ents'] = dict_flair.pop('entities') combination = combine(dict_flair) res['passToRelation'].extend(combination) for idx in dict_flair['ents']: idx['end'] = idx['end'] + prevLen idx['start'] = idx['start'] + prevLen dict_flair.pop('labels') res['text'] += ' '+dict_flair['text'] res['ents'].extend(dict_flair['ents']) prevLen += len(dict_flair['text']) + 1 res['text'] = res['text'].strip() return res