Exemplo n.º 1
0
    def predict(query: str):

        if query is None or len(query) == 0:
            return {'success': False, 'message': 'query is required'}

        try:
            sentence = Sentence(query)
            TaggerModel.load_model()
            TaggerModel._model.predict(sentence)

            temp = defaultdict(list)
            for entity in sentence.to_dict(tag_type='ner-fast').get('entities'):
                temp[entity['text']].extend(entity['labels'])
            for entity in sentence.to_dict(tag_type='ner-ontonotes-fast').get('entities'):
                temp[entity['text']].extend(entity['labels'])
            ner_entities = [{'text': text, 'labels': label} for text, label in temp.items()]

            entities = list()
            for item in ner_entities:
                entity = dict()
                entity['text'] = item['text']
                labels = TaggerModel.map_and_merge_labels(
                    [label.to_dict() for label in item['labels']],
                    ['PERSON', 'MISC', 'DATE']
                )
                entity['labels'] = labels
                if len(labels) > 0:
                    entities.append(entity)
            return {'success': True, 'data': entities}
        except RuntimeError as e:
            logging.error(e, exc_info=True)
            return {'success': False, 'message': "Runtime Error: {0}".format(e)}
        except Exception as e:
            logging.error(e, exc_info=True)
            return {'success': False, 'message': 'exception occurred'}
Exemplo n.º 2
0
def test_sentence_to_dict():
    sentence = Sentence(
        'Zalando Research is   located in Berlin, the capital of Germany.',
        labels=['business'],
        use_tokenizer=True)
    sentence[0].add_tag('ner', 'B-ORG')
    sentence[1].add_tag('ner', 'E-ORG')
    sentence[5].add_tag('ner', 'S-LOC')
    sentence[10].add_tag('ner', 'S-LOC')
    dict = sentence.to_dict('ner')
    assert ('Zalando Research is   located in Berlin, the capital of Germany.'
            == dict['text'])
    assert ('Zalando Research' == dict['entities'][0]['text'])
    assert ('Berlin' == dict['entities'][1]['text'])
    assert ('Germany' == dict['entities'][2]['text'])
    assert (1 == len(dict['labels']))
    sentence = Sentence(
        'Facebook, Inc. is a company, and Google is one as well.',
        use_tokenizer=True)
    sentence[0].add_tag('ner', 'B-ORG')
    sentence[1].add_tag('ner', 'I-ORG')
    sentence[2].add_tag('ner', 'E-ORG')
    sentence[8].add_tag('ner', 'S-ORG')
    dict = sentence.to_dict('ner')
    assert ('Facebook, Inc. is a company, and Google is one as well.' ==
            dict['text'])
    assert ('Facebook, Inc.' == dict['entities'][0]['text'])
    assert ('Google' == dict['entities'][1]['text'])
    assert (0 == len(dict['labels']))
Exemplo n.º 3
0
def predict(sentence):
    """ Predict the sentiment of a sentence """
    if sentence == "":
        return 0
    text = Sentence(sentence)
    # stacked_embeddings.embed(text)
    classifier.predict(text)
    value = text.labels[0].to_dict()['value'] 
    if value == 'POSITIVE':
        result = text.to_dict()['labels'][0]['confidence']
    else:
        result = -(text.to_dict()['labels'][0]['confidence'])
    return round(result, 3)
Exemplo n.º 4
0
def entity_recognition(text):
    #print('inside entity recognition')
    if isinstance(text, str):
        doc = text
    else:
        #print(type(text))
        doc = ''
        
    s = Sentence(doc.title())
    model.predict(s)
    #print('model')
    a = s.to_dict(tag_type = 'ner')
    #print(a)
    b = a['entities'][0]
    
    #print('inside entity recognition2')
    if len(b) > 0:
        #print('in if')
        #origText = b[0]['text']
        #print(b['labels'][0].to_dict())
        entity = b['labels'][0].to_dict()['value']
        #print(entity)
        confidence = round(b['labels'][0].to_dict()['confidence'],2)
        #print(confidence)
    else:
        #print('in else1')
        #origText = b[0]['text']
        entity = ''
        confidence = ''
    #print('inside entity recognition3')
    return entity, confidence
Exemplo n.º 5
0
    def Predict_textfile(self, textfile, is_path=False, path=""):

        if is_path == False:
            if self.checkpoint_download == False:
                path = self.checkpoint_path
                print("Checkpoint File already present")
            else:
                if not os.path.exists(self.download_dir + "/resources"):
                    os.mkdir(self.download_dir + "/resources")
                if not os.path.exists(self.download_dir + "/resources/tagger"):
                    os.mkdir(self.download_dir + "/resources/tagger")
                if not os.path.exists(self.download_dir +
                                      "/resources/tagger/example-ner"):
                    os.mkdir(self.download_dir +
                             "/resources/tagger/example-ner")
                print("Checkpoint File will be downloaded from ....")
                download_file_from_google_drive(self.google_id,
                                                self.checkpoint_path)
                print("Checkpoint Downloaded successfully")
                path = self.checkpoint_path
                self.checkpoint_download = False

        tagger = SequenceTagger.load(path)

        dest_path = textfile[:-4] + "__NER.txt"
        out_f = open(dest_path, "w")
        with open(textfile, "r") as f:
            for i, line in enumerate(f):
                sentence = Sentence(line)
                tagger.predict(sentence)
                for word in sentence.to_dict(tag_type='ner')["entities"]:
                    out_f.write(word['text'] + "\t" + word['type'] + "\n")
                out_f.write("\n")
Exemplo n.º 6
0
        def extract_entities_flair(sentences: List[str]):
            result = list()

            for sentence in sentences:
                sentence = Sentence(sentence)
                ner.predict(sentence)
                line_result = sentence.to_dict(tag_type="ner")

                cache = dict()
                dedup = list()

                for entity in line_result["entities"]:
                    existence = cache.get(entity["text"], False)

                    if not existence:
                        dedup.append({
                            "word": entity["text"],
                            "entity": entity["labels"][0].value,
                            "start": entity["start_pos"],
                            "end": entity["end_pos"],
                        })
                        cache[entity["text"]] = True

                result.append(dedup)

            return result
Exemplo n.º 7
0
def get_flair_entities(input, score_threshold=0.9):

    sentence = Sentence(input, use_tokenizer=True)
    model.predict(sentence)

    # refactor flair output
    entities = []
    prev_end_pos = 0
    prev_entity_part = ''
    for entity in sentence.to_dict(tag_type='ner')['entities']:

        if entity['labels'][0]._score < score_threshold:
            continue

        logger.info('flair entity detected: ' + str(entity))
        if prev_end_pos + 1 == entity['start_pos']:
            del entities[-1]
            final_entity = prev_entity_part + ' ' + entity['text']
        else:
            final_entity = entity['text']

        entities.append(final_entity.strip())
        prev_end_pos = entity['end_pos']
        prev_entity_part += ' ' + entity['text']

    return entities
Exemplo n.º 8
0
def tag_entities(text):
    sentences = sent_tokenize(text)
    output = []
    for s in sentences:
        s = Sentence(s)
        tagger.predict(s)
        output.append(s.to_dict(tag_type='ner'))
    return output
Exemplo n.º 9
0
def Resume(text):
    text = ' '.join(
        [line.replace('\t', ' ') for line in text.split('\n') if line])
    data = {
        'content': text,
        'person': {
            'count': 0,
            'source': [],
        },
        'location': {
            'count': 0,
            'source': []
        },
        'organization': {
            'count': 0,
            'source': []
        },
        'emails': {
            'count': 0,
            'source': []
        },
        'mobiles': {
            'count': 0,
            'source': []
        }
    }

    emails = find_emails(text)
    mobiles = mob_num_extractor(text)
    persons = extract_name(text)
    #print(persons)
    for email in emails:
        data["emails"]["count"] += 1
        data["emails"]["source"].append(email)
    for mobile in mobiles:
        data["mobiles"]["count"] += 1
        data["mobiles"]["source"].append(mobile)
    for person in persons:
        data["person"]["count"] += 1
        data["person"]["source"].append(person)

    sentence = Sentence(text)
    model = SequenceTagger.load(
        '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt')
    model.predict(sentence)
    d = sentence.to_dict(tag_type='ner')
    for item in d['entities']:
        if item["type"] == "PER" and item['confidence'] > 0.70:
            data["person"]["count"] += 1
            data["person"]["source"].append(item)
        elif item["type"] == "LOC" and item['confidence'] > 0.60:
            data["location"]["count"] += 1
            data["location"]["source"].append(item)
        elif item["type"] == "ORG" and item['confidence'] > 0.95:
            data["organization"]["count"] += 1
            data["organization"]["source"].append(item)
    return data
Exemplo n.º 10
0
def do_NER(context):
    s = Sentence(context)
    NERmodel.predict(s)
    raw = s.to_dict(tag_type='ner')
    answers = []
    for item in raw['entities']:
        answers.append(item['text'])
    if not answers:
        answers = get_key_words(context)
    return list(map(lambda x: x.capitalize(), list(set(map(lambda x: x.lower(), answers)))))
Exemplo n.º 11
0
def get_score(text):
    # create example sentence
    sentence = Sentence(text, use_tokenizer=japanese_tokenizer)
    # predict class and print
    classifier.predict(sentence)

    label_dict = sentence.to_dict()["labels"][0]

    return label_dict["confidence"] if label_dict[
        "value"] == "__label__O" else 0
Exemplo n.º 12
0
 def predict(self, sentences):
     mentions = []
     for sent_idx, sent in enumerate(sentences):
         sent = Sentence(sent, use_tokenizer=True)
         self.model.predict(sent)
         sent_mentions = sent.to_dict(tag_type="ner")["entities"]
         for mention in sent_mentions:
             mention["sent_idx"] = sent_idx
         mentions.extend(sent_mentions)
     return {"sentences": sentences, "mentions": mentions}
Exemplo n.º 13
0
def ListParser(text):
    data = {
        'content': text,
        'person': {
            'count': 0,
            'source': [],
        },
        'location': {
            'count': 0,
            'source': []
        },
        'organization': {
            'count': 0,
            'source': []
        },
        'emails': {
            'count': 0,
            'source': []
        },
        'mobiles': {
            'count': 0,
            'source': []
        }
    }
    arra = ""

    for line in text.split('\n'):
        line = line.split('\t')
        if len(line) > 1:
            arra += (" ".join(line) + ".\n")

    emails = find_emails(text)
    mobiles = mob_num_extractor(text)
    for email in emails:
        data["emails"]["count"] += 1
        data["emails"]["source"].append(email)
    for mobile in mobiles:
        data["mobiles"]["count"] += 1
        data["mobiles"]["source"].append(mobile)
    sentence = Sentence(arra)
    model = SequenceTagger.load(
        '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt')
    model.predict(sentence)
    d = sentence.to_dict(tag_type='ner')
    for item in d['entities']:
        if item["type"] == "PER" and item['confidence'] > 0.90:
            data["person"]["count"] += 1
            data["person"]["source"].append(item)
        elif item["type"] == "LOC" and item['confidence'] > 0.90:
            data["location"]["count"] += 1
            data["location"]["source"].append(item)
        elif item["type"] == "ORG" and item['confidence'] > 0.90:
            data["organization"]["count"] += 1
            data["organization"]["source"].append(item)
    return data
Exemplo n.º 14
0
def test_sentence_to_dict():
    sentence = Sentence(
        "Zalando Research is   located in Berlin, the capital of Germany.",
        labels=["business"],
        use_tokenizer=segtok_tokenizer,
    )

    # bioes tags
    sentence[0].add_tag("ner", "B-ORG")
    sentence[1].add_tag("ner", "E-ORG")
    sentence[5].add_tag("ner", "S-LOC")
    sentence[10].add_tag("ner", "S-LOC")

    dict = sentence.to_dict("ner")

    assert (
        "Zalando Research is   located in Berlin, the capital of Germany."
        == dict["text"]
    )
    assert "Zalando Research" == dict["entities"][0]["text"]
    assert "Berlin" == dict["entities"][1]["text"]
    assert "Germany" == dict["entities"][2]["text"]
    assert 1 == len(dict["labels"])

    sentence = Sentence(
        "Facebook, Inc. is a company, and Google is one as well.",
        use_tokenizer=segtok_tokenizer,
    )

    # bioes tags
    sentence[0].add_tag("ner", "B-ORG")
    sentence[1].add_tag("ner", "I-ORG")
    sentence[2].add_tag("ner", "E-ORG")
    sentence[8].add_tag("ner", "S-ORG")

    dict = sentence.to_dict("ner")

    assert "Facebook, Inc. is a company, and Google is one as well." == dict["text"]
    assert "Facebook, Inc." == dict["entities"][0]["text"]
    assert "Google" == dict["entities"][1]["text"]
    assert 0 == len(dict["labels"])
Exemplo n.º 15
0
 def posTagFinder(self):
     _it = 0
     for _it in range(1500):
         item = random.choice(self.data)
         temp = Sentence(item['sentence'])
         self.tagger.predict(temp)
         _dict = temp.to_dict(tag_type='pos')
         self.postags.extend([
             sample['type'] for sample in _dict['entities']
             if sample['type'] not in self.postags
         ])
     self.postags = list(set(self.postags))
Exemplo n.º 16
0
def predict(model, selected_embeddings, data_file):
    """
			takes data in a form text, post_id, and saves both those plus 
			prediction results in the out file
	"""

    selected_embeddings_text = [
        key for key in selected_embeddings if selected_embeddings[key]
    ]
    selected_embeddings_text = '_'.join(selected_embeddings_text)

    print(selected_embeddings_text)

    model_dir = 'resources/taggers/CADECglove_char_flair'  #

    # load the model you trained
    model = SequenceTagger.load(model_dir + '/best-model.pt')

    line_counts = 0

    with bz2.open(f_in, 'rt') as f:

        with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug:
            with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis:

                header = "post_ID,matched,score,start_pos,end_pos\n"
                f_dis.write(header)
                f_drug.write(header)

                for line in tqdm(f, total=get_num_lines(f_in)):
                    if len(line) > 0:
                        line_dict = process_txt(line)
                        line_counts += 1

                        body = line_dict['text']
                        tweet_id = line_dict['id']

                        sentence = Sentence(str(body))
                        # print (r)
                        # # predict tags and print

                        model.predict(sentence)
                        res = sentence.to_dict(tag_type='ner')

                        for el in res['entities']:

                            if el['type'] == 'DIS':
                                f_dis.write(tweet_id+',"'+\
                                 el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
                            elif el['type'] == 'DRUG':
                                f_drug.write(tweet_id+',"'+\
                                 el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
Exemplo n.º 17
0
def flair12NER(title, text):
    s = Sentence(text)
    flair12class.predict(s)
    entities = s.to_dict(tag_type="ner")
    sentences = getSpaCySentences(entities["text"])
    vertexSet = getDocREDVertexSetFromFlairEntities(entities["entities"],
                                                    sentences)
    docREDDocumentObject = {
        "vertexSet": vertexSet,
        "title": title,
        "sents": [[word.text for word in sentence] for sentence in sentences]
    }
    return docREDDocumentObject
def tag_instance_using_flair(target_tagger, ner_tagger, pos_tagger, instance):
    print('processing:', instance[0])
    instance = instance[1]

    conclusion = instance['_claim']
    claims = list(instance['_argument_sentences'].values())

    #predict targets...
    conclusion_sent = Sentence(conclusion)
    claims_sents = list(map(lambda x: Sentence(x), claims))

    target_tagger.predict([conclusion_sent] + claims_sents)
    ner_tagger.predict([conclusion_sent] + claims_sents)
    pos_tagger.predict([conclusion_sent] + claims_sents)

    tagged_claims = []
    for i, c in enumerate(claims_sents):
        tagged_claims.append({
            'text':
            claims[i],
            'pos':
            c.to_dict(tag_type='pos')['entities'],
            'named_entities':
            c.to_dict(tag_type='ner')['entities'],
            'targets':
            c.to_dict(tag_type='ct')['entities']
        })

    return {
        '_debate_id': instance['_debate_id'],
        'conclusion': {
            'text': conclusion,
            'pos': conclusion_sent.to_dict(tag_type='pos')['entities'],
            'named_entities':
            conclusion_sent.to_dict(tag_type='ner')['entities'],
            'targets': conclusion_sent.to_dict(tag_type='ct')['entities']
        },
        'claims': tagged_claims
    }
Exemplo n.º 19
0
def find_tags(user_input,keyword):
	sentence = Sentence(user_input)
	tagger.predict(sentence)
	tag_dict = sentence.to_dict(tag_type='ner')
	tag_dict = tag_dict['entities']
	tags = []
	for _ in tag_dict:
		label=_['labels']
		word = findWholeWord(keyword)(_['text'])
		if(word):
			return label[0].value
		
	return ""
Exemplo n.º 20
0
def flair_ner(text, tagger):
    """
    Tag with Flair

    :param text: source text to tag
    :param tagger: Flair initialised with tagging model
    :return: list of tuples (text, start, end, entity label)
    """
    sentence = Sentence(text, use_tokenizer=True)
    tagger.predict(sentence)
    s = sentence.to_dict(tag_type="ner")
    ents = [(e["text"], e["start_pos"], e["end_pos"], e["type"])
            for e in s["entities"]]
    return ents
def text_to_features(tagger, sents):
    if type(sents) != list:
        sents = nltk.sent_tokenize(sents)
    
    tokens = []
    tagged_sents = []
    for sent in sents:
        sent = Sentence(sent)
        tagger.predict(sent)
        tokens += [token.text+ u"\uFFE8" + token.tags['ct'].value for token in sent.tokens]

        tagged_sents.append(sent.to_dict(tag_type='ct'))

    return ' '.join(tokens), tagged_sents
def sifrank():
    req_data = request.get_json(force=True)
    text = req_data['text']
    sentence = Sentence(text)
    tagger.predict(sentence)
    o = sentence.to_dict(tag_type='ner')
    output = o['entities']
    print(output)
    GPE = []
    ORG = []
    LOC = []
    PERSON = []
    EVENT = []
    DATE = []
    MONEY = []
    NORP = []  #NATIONALITIES
    ADDITIONAL = []
    for i in range(len(output)):
        if "GPE" in str(output[i]["labels"]):
            GPE.append(output[i]["text"])
        elif "PERSON" in str(output[i]["labels"]):
            PERSON.append(output[i]["text"])
        elif "ORG" in str(output[i]["labels"]):
            ORG.append(output[i]["text"])
        elif "LOC" in str(output[i]["labels"]):
            LOC.append(output[i]["text"])
        elif "EVENT" in str(output[i]["labels"]):
            EVENT.append(output[i]["text"])
        elif "DATE" in str(output[i]["labels"]):
            DATE.append(output[i]["text"])
        elif "MONEY" in str(output[i]["labels"]):
            MONEY.append(output[i]["text"])
        elif "NORP" in str(output[i]["labels"]):
            NORP.append(output[i]["text"])
        else:
            ADDITIONAL.append(output[i]["text"])
    entities = {
        'GPE': list(set(GPE)),
        'ORG': list(set(ORG)),
        "PERSON": list(set(PERSON)),
        "EVENT": list(set(EVENT)),
        "DATE": list(set(DATE)),
        "MONEY": list(set(MONEY)),
        "NORP": list(set(NORP)),
        "LOC": list(set(LOC)),
        "ADDITIONAL": list(set(ADDITIONAL))
    }

    return jsonify(entities)
Exemplo n.º 23
0
def sentence_to_org(sentence):
    try:
        sentence_tokenized = Sentence(sentence)
        tagger.predict(sentence_tokenized)
        sentence_dict = sentence_tokenized.to_dict(tag_type='ner')

        org_names = []
        for entity in sentence_dict['entities']:
            if entity['type'] == 'ORG':
                org_names.append(entity['text'])

        predicted_org = most_common(org_names)
        return predicted_org
    except:
        print("Did not found any organisations from the text")
Exemplo n.º 24
0
async def getNamedEntities(body: FLAIR_NER_MODEL):
    text = body.text
    text = re.sub('[^.,a-zA-Z0-9 \n\.]', '', text)
    sentence = Sentence(text)
    tagger.predict(sentence)
    o = sentence.to_dict(tag_type='ner')
    output = o['entities']
    #print(output)
    GPE = []
    ORG = []
    LOC = []
    PERSON = []
    EVENT = []
    DATE = []
    MONEY = []
    NORP = []  #NATIONALITIES
    ADDITIONAL = []
    for i in range(len(output)):
        if "GPE" in str(output[i]["labels"]):
            GPE.append(output[i]["text"])
        elif "PERSON" in str(output[i]["labels"]):
            PERSON.append(output[i]["text"])
        elif "ORG" in str(output[i]["labels"]):
            ORG.append(output[i]["text"])
        elif "LOC" in str(output[i]["labels"]):
            LOC.append(output[i]["text"])
        elif "EVENT" in str(output[i]["labels"]):
            EVENT.append(output[i]["text"])
        elif "DATE" in str(output[i]["labels"]):
            DATE.append(output[i]["text"])
        elif "MONEY" in str(output[i]["labels"]):
            MONEY.append(output[i]["text"])
        elif "NORP" in str(output[i]["labels"]):
            NORP.append(output[i]["text"])
        else:
            ADDITIONAL.append(output[i]["text"])
    entities = {
        'GPE': list(set(GPE)),
        'ORG': list(set(ORG)),
        "PERSON": list(set(PERSON)),
        "EVENT": list(set(EVENT)),
        "DATE": list(set(DATE)),
        "MONEY": list(set(MONEY)),
        "NORP": list(set(NORP)),
        "LOC": list(set(LOC)),
        "ADDITIONAL": list(set(ADDITIONAL))
    }
    return entities
Exemplo n.º 25
0
def predict(model, selected_embeddings, data_file):
    """
			takes data in a form text, post_id, and saves both those plus 
			prediction results in the out file
	"""

    selected_embeddings_text = [
        key for key in selected_embeddings if selected_embeddings[key]
    ]
    selected_embeddings_text = '_'.join(selected_embeddings_text)

    print(selected_embeddings_text)

    model_dir = 'resources/taggers/' + 'to_resume_' + model + selected_embeddings_text

    # load the model you trained
    model = SequenceTagger.load(model_dir + '/best-model.pt')

    data = pd.read_csv(f_in)
    # ,year,month,subreddit,body,clean_body,post_index
    print(data.head())

    with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug:
        with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis:
            header = "post_ID,matched,score,start_pos,end_pos\n"
            f_dis.write(header)
            f_drug.write(header)

            for i, row in tqdm.tqdm(data.iterrows(), total=data.shape[0]):
                #r = ' '.join(eval(row['body']))
                for r in eval(row['body']):
                    sentence = Sentence(str(r))
                    # print (r)
                    # # predict tags and print
                    model.predict(sentence)
                    res = sentence.to_dict(tag_type='ner')

                    for el in res['entities']:

                        if el['type'] == 'DIS':
                            f_dis.write(row['post_ID']+',"'+\
                             el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
                        elif el['type'] == 'DRUG':
                            f_drug.write(row['post_ID']+',"'+\
                             el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')

                if i == 10:
                    break
def predict(model, predict_sentence):
    sentence = Sentence(predict_sentence)
    model.predict(sentence)
    print(predict_sentence)

    dic = sentence.to_dict(tag_type='tox')
    toxic_spans = []
    for token in dic['entities']:
        label = int(token['labels'][0].value)
        if label == 1:
            start_pos = token['start_pos']
            end_pos = token['end_pos']
            for i in range(start_pos, end_pos):
                toxic_spans.append(i)

    return [toxic_spans, predict_sentence]
Exemplo n.º 27
0
    def get_ner_entities(self, text):
        entities = []

        try:
            sentences = nltk.sent_tokenize(text)
        except:
            sentences = [text]

        print(sentences)

        for sent in sentences:
            sentence = Sentence(sent)
            self.ontoner_tagger.predict(sentence)
            sent_tags = sentence.to_dict(tag_type='ner')
            entities.extend(sent_tags["entities"])

        return entities
Exemplo n.º 28
0
def createNERFiles(statFilePath, resultTxt, tagger):
    if verbose:
        print("\tCreating named entity recognized file at: " + statFilePath)
    statFile = open(statFilePath, "w")
    try:
        sentence = Sentence(resultTxt)
        # predict NER tags
        tagger.predict(sentence)
    except RuntimeError as err:
        print("Runtime error: {0}".format(err))
        print("Failed at: " + statFilePath)

    taggedStr = sentence.to_tagged_string()
    details = sentence.to_dict(tag_type='ner')
    statFile.write(taggedStr)
    statFile.close()
    return (taggedStr, details)
Exemplo n.º 29
0
def test(model, selected_embeddings):
  selected_embeddings_text = [key  for key in selected_embeddings if selected_embeddings[key]]
  selected_embeddings_text = '_'.join(selected_embeddings_text)

  print (selected_embeddings_text)

  model_dir = 'resources/taggers/' + model + selected_embeddings_text + '_fine-tuned7s'

  # load the model you trained
  model = SequenceTagger.load(model_dir + '/best-model.pt')

  sentence = Sentence("If you've been on a low calorie diet + exercise for a long time, probably you have low free T3 blood levels causing your hypo symptoms. You should ask specifically for freeT3 and freeT4 to be tested. The low conversion of T4 to T3 is your bodies way of ""protecting itself"" from any further calorie deficiet. The rest of this only matters if you do get low T3 confirmed: it is important you do not go on a T4 monotherapy, it would very likely make your situation worse because it's tricking your brain into thinking you have more then enough thyroid hormones, while your T3 deficit worsens. Either get T3 and T4 combination or no medication. Instead make sure you have enough Iodine, Selenium and Zinc in your diet and consider significantly increasing your calorie intake! It seems paradoxical but because this will eventually increase you T3 levels and basal metabolic rate it will not necessarily make you gain weight in the long term. Also dizzy spells could be low blood sugar (even if you don't who the classical symptoms of shaking/sweating.) If it is low blood sugar you need to be careful with that and make sure to get some glucose quick (both for preventing your dizzines causing accidents and also because every hypoglycemic state will stress out your metabolic system, autoamplifying the low T3)")

  # # predict tags and print
  model.predict(sentence)

  print(sentence.to_dict(tag_type='ner'))
Exemplo n.º 30
0
def generateTextToNer(text):
    """
    Returns a dictionary with the following keys:
    'ents' contains a list of entities, 'text' contains the entire text string,
    and 'passToRelation' contains a list of possible combinations of two 
    entities in a sentence.
    """
    clean_text = normalize_corpus([text], to_lower=False, to_remove_html=False,
                                  to_remove_accent=True, to_expand_contractions=True,
                                  to_lemmatize=False, to_remove_special=False,
                                  to_remove_stopword=False)
    clean_text = clean_text[0]
    idTracker = defaultdict(int)
    res = {'ents': [], 'text': '', 'passToRelation': []}

    lst_sentences = nltk.sent_tokenize(clean_text)
    prevLen = 0
    for s in lst_sentences:
        sentence = Sentence(s, use_tokenizer=True)
        tagger_fast.predict(sentence)
        dict_flair = sentence.to_dict(tag_type='ner')
        for idx in dict_flair['entities']:
            idx['id'] = idTracker[idx['text']]
            idTracker[idx['text']] += 1
            idx['end'] = idx.pop('end_pos')
            idx['start'] = idx.pop('start_pos')
            full_label = idx.pop('labels')[0]
            full_label = str(full_label)
            idx['type'] = full_label[:full_label.find(' ')]
        dict_flair['ents'] = dict_flair.pop('entities')
        combination = combine(dict_flair)
        res['passToRelation'].extend(combination)

        for idx in dict_flair['ents']:
            idx['end'] = idx['end'] + prevLen
            idx['start'] = idx['start'] + prevLen

        dict_flair.pop('labels')
        res['text'] += ' '+dict_flair['text']
        res['ents'].extend(dict_flair['ents'])
        prevLen += len(dict_flair['text']) + 1

    res['text'] = res['text'].strip()

    return res