Python Sentence.to_dict примеры, flair.data.Sentence.to_dict Python примеры использования

Пример #1

0

Показать файл

    def predict(query: str):

        if query is None or len(query) == 0:
            return {'success': False, 'message': 'query is required'}

        try:
            sentence = Sentence(query)
            TaggerModel.load_model()
            TaggerModel._model.predict(sentence)

            temp = defaultdict(list)
            for entity in sentence.to_dict(tag_type='ner-fast').get('entities'):
                temp[entity['text']].extend(entity['labels'])
            for entity in sentence.to_dict(tag_type='ner-ontonotes-fast').get('entities'):
                temp[entity['text']].extend(entity['labels'])
            ner_entities = [{'text': text, 'labels': label} for text, label in temp.items()]

            entities = list()
            for item in ner_entities:
                entity = dict()
                entity['text'] = item['text']
                labels = TaggerModel.map_and_merge_labels(
                    [label.to_dict() for label in item['labels']],
                    ['PERSON', 'MISC', 'DATE']
                )
                entity['labels'] = labels
                if len(labels) > 0:
                    entities.append(entity)
            return {'success': True, 'data': entities}
        except RuntimeError as e:
            logging.error(e, exc_info=True)
            return {'success': False, 'message': "Runtime Error: {0}".format(e)}
        except Exception as e:
            logging.error(e, exc_info=True)
            return {'success': False, 'message': 'exception occurred'}

Пример #2

0

Показать файл

Файл: test_data.py Проект: azawalich/flair

def test_sentence_to_dict():
    sentence = Sentence(
        'Zalando Research is   located in Berlin, the capital of Germany.',
        labels=['business'],
        use_tokenizer=True)
    sentence[0].add_tag('ner', 'B-ORG')
    sentence[1].add_tag('ner', 'E-ORG')
    sentence[5].add_tag('ner', 'S-LOC')
    sentence[10].add_tag('ner', 'S-LOC')
    dict = sentence.to_dict('ner')
    assert ('Zalando Research is   located in Berlin, the capital of Germany.'
            == dict['text'])
    assert ('Zalando Research' == dict['entities'][0]['text'])
    assert ('Berlin' == dict['entities'][1]['text'])
    assert ('Germany' == dict['entities'][2]['text'])
    assert (1 == len(dict['labels']))
    sentence = Sentence(
        'Facebook, Inc. is a company, and Google is one as well.',
        use_tokenizer=True)
    sentence[0].add_tag('ner', 'B-ORG')
    sentence[1].add_tag('ner', 'I-ORG')
    sentence[2].add_tag('ner', 'E-ORG')
    sentence[8].add_tag('ner', 'S-ORG')
    dict = sentence.to_dict('ner')
    assert ('Facebook, Inc. is a company, and Google is one as well.' ==
            dict['text'])
    assert ('Facebook, Inc.' == dict['entities'][0]['text'])
    assert ('Google' == dict['entities'][1]['text'])
    assert (0 == len(dict['labels']))

Пример #3

0

Показать файл

def predict(sentence):
    """ Predict the sentiment of a sentence """
    if sentence == "":
        return 0
    text = Sentence(sentence)
    # stacked_embeddings.embed(text)
    classifier.predict(text)
    value = text.labels[0].to_dict()['value'] 
    if value == 'POSITIVE':
        result = text.to_dict()['labels'][0]['confidence']
    else:
        result = -(text.to_dict()['labels'][0]['confidence'])
    return round(result, 3)

Пример #4

0

Показать файл

def entity_recognition(text):
    #print('inside entity recognition')
    if isinstance(text, str):
        doc = text
    else:
        #print(type(text))
        doc = ''
        
    s = Sentence(doc.title())
    model.predict(s)
    #print('model')
    a = s.to_dict(tag_type = 'ner')
    #print(a)
    b = a['entities'][0]
    
    #print('inside entity recognition2')
    if len(b) > 0:
        #print('in if')
        #origText = b[0]['text']
        #print(b['labels'][0].to_dict())
        entity = b['labels'][0].to_dict()['value']
        #print(entity)
        confidence = round(b['labels'][0].to_dict()['confidence'],2)
        #print(confidence)
    else:
        #print('in else1')
        #origText = b[0]['text']
        entity = ''
        confidence = ''
    #print('inside entity recognition3')
    return entity, confidence

Пример #5

0

Показать файл

Файл: HindiNer.py Проект: avinsit123/HindiNLP

    def Predict_textfile(self, textfile, is_path=False, path=""):

        if is_path == False:
            if self.checkpoint_download == False:
                path = self.checkpoint_path
                print("Checkpoint File already present")
            else:
                if not os.path.exists(self.download_dir + "/resources"):
                    os.mkdir(self.download_dir + "/resources")
                if not os.path.exists(self.download_dir + "/resources/tagger"):
                    os.mkdir(self.download_dir + "/resources/tagger")
                if not os.path.exists(self.download_dir +
                                      "/resources/tagger/example-ner"):
                    os.mkdir(self.download_dir +
                             "/resources/tagger/example-ner")
                print("Checkpoint File will be downloaded from ....")
                download_file_from_google_drive(self.google_id,
                                                self.checkpoint_path)
                print("Checkpoint Downloaded successfully")
                path = self.checkpoint_path
                self.checkpoint_download = False

        tagger = SequenceTagger.load(path)

        dest_path = textfile[:-4] + "__NER.txt"
        out_f = open(dest_path, "w")
        with open(textfile, "r") as f:
            for i, line in enumerate(f):
                sentence = Sentence(line)
                tagger.predict(sentence)
                for word in sentence.to_dict(tag_type='ner')["entities"]:
                    out_f.write(word['text'] + "\t" + word['type'] + "\n")
                out_f.write("\n")

Пример #6

0

Показать файл

Файл: module_entity.py Проект: Huffon/factsumm

        def extract_entities_flair(sentences: List[str]):
            result = list()

            for sentence in sentences:
                sentence = Sentence(sentence)
                ner.predict(sentence)
                line_result = sentence.to_dict(tag_type="ner")

                cache = dict()
                dedup = list()

                for entity in line_result["entities"]:
                    existence = cache.get(entity["text"], False)

                    if not existence:
                        dedup.append({
                            "word": entity["text"],
                            "entity": entity["labels"][0].value,
                            "start": entity["start_pos"],
                            "end": entity["end_pos"],
                        })
                        cache[entity["text"]] = True

                result.append(dedup)

            return result

Пример #7

0

Показать файл

def get_flair_entities(input, score_threshold=0.9):

    sentence = Sentence(input, use_tokenizer=True)
    model.predict(sentence)

    # refactor flair output
    entities = []
    prev_end_pos = 0
    prev_entity_part = ''
    for entity in sentence.to_dict(tag_type='ner')['entities']:

        if entity['labels'][0]._score < score_threshold:
            continue

        logger.info('flair entity detected: ' + str(entity))
        if prev_end_pos + 1 == entity['start_pos']:
            del entities[-1]
            final_entity = prev_entity_part + ' ' + entity['text']
        else:
            final_entity = entity['text']

        entities.append(final_entity.strip())
        prev_end_pos = entity['end_pos']
        prev_entity_part += ' ' + entity['text']

    return entities

Пример #8

0

Показать файл

Файл: flair_ner.py Проект: eba-diary/thesis_app

def tag_entities(text):
    sentences = sent_tokenize(text)
    output = []
    for s in sentences:
        s = Sentence(s)
        tagger.predict(s)
        output.append(s.to_dict(tag_type='ner'))
    return output

Пример #9

0

Показать файл

Файл: OurResumeParser.py Проект: devilsocket/function_vault

def Resume(text):
    text = ' '.join(
        [line.replace('\t', ' ') for line in text.split('\n') if line])
    data = {
        'content': text,
        'person': {
            'count': 0,
            'source': [],
        },
        'location': {
            'count': 0,
            'source': []
        },
        'organization': {
            'count': 0,
            'source': []
        },
        'emails': {
            'count': 0,
            'source': []
        },
        'mobiles': {
            'count': 0,
            'source': []
        }
    }

    emails = find_emails(text)
    mobiles = mob_num_extractor(text)
    persons = extract_name(text)
    #print(persons)
    for email in emails:
        data["emails"]["count"] += 1
        data["emails"]["source"].append(email)
    for mobile in mobiles:
        data["mobiles"]["count"] += 1
        data["mobiles"]["source"].append(mobile)
    for person in persons:
        data["person"]["count"] += 1
        data["person"]["source"].append(person)

    sentence = Sentence(text)
    model = SequenceTagger.load(
        '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt')
    model.predict(sentence)
    d = sentence.to_dict(tag_type='ner')
    for item in d['entities']:
        if item["type"] == "PER" and item['confidence'] > 0.70:
            data["person"]["count"] += 1
            data["person"]["source"].append(item)
        elif item["type"] == "LOC" and item['confidence'] > 0.60:
            data["location"]["count"] += 1
            data["location"]["source"].append(item)
        elif item["type"] == "ORG" and item['confidence'] > 0.95:
            data["organization"]["count"] += 1
            data["organization"]["source"].append(item)
    return data

Пример #10

0

Показать файл

Файл: bot.py Проект: datahack-ru/Quiz-guys-bot

def do_NER(context):
    s = Sentence(context)
    NERmodel.predict(s)
    raw = s.to_dict(tag_type='ner')
    answers = []
    for item in raw['entities']:
        answers.append(item['text'])
    if not answers:
        answers = get_key_words(context)
    return list(map(lambda x: x.capitalize(), list(set(map(lambda x: x.lower(), answers)))))

Пример #11

0

Показать файл

Файл: server.py Проект: ochiba-leaf/feed_flask

def get_score(text):
    # create example sentence
    sentence = Sentence(text, use_tokenizer=japanese_tokenizer)
    # predict class and print
    classifier.predict(sentence)

    label_dict = sentence.to_dict()["labels"][0]

    return label_dict["confidence"] if label_dict[
        "value"] == "__label__O" else 0

Пример #12

0

Показать файл

Файл: ner.py Проект: ruanchaves/BLINK-1

 def predict(self, sentences):
     mentions = []
     for sent_idx, sent in enumerate(sentences):
         sent = Sentence(sent, use_tokenizer=True)
         self.model.predict(sent)
         sent_mentions = sent.to_dict(tag_type="ner")["entities"]
         for mention in sent_mentions:
             mention["sent_idx"] = sent_idx
         mentions.extend(sent_mentions)
     return {"sentences": sentences, "mentions": mentions}

Пример #13

0

Показать файл

Файл: OurListParser.py Проект: devilsocket/function_vault

def ListParser(text):
    data = {
        'content': text,
        'person': {
            'count': 0,
            'source': [],
        },
        'location': {
            'count': 0,
            'source': []
        },
        'organization': {
            'count': 0,
            'source': []
        },
        'emails': {
            'count': 0,
            'source': []
        },
        'mobiles': {
            'count': 0,
            'source': []
        }
    }
    arra = ""

    for line in text.split('\n'):
        line = line.split('\t')
        if len(line) > 1:
            arra += (" ".join(line) + ".\n")

    emails = find_emails(text)
    mobiles = mob_num_extractor(text)
    for email in emails:
        data["emails"]["count"] += 1
        data["emails"]["source"].append(email)
    for mobile in mobiles:
        data["mobiles"]["count"] += 1
        data["mobiles"]["source"].append(mobile)
    sentence = Sentence(arra)
    model = SequenceTagger.load(
        '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt')
    model.predict(sentence)
    d = sentence.to_dict(tag_type='ner')
    for item in d['entities']:
        if item["type"] == "PER" and item['confidence'] > 0.90:
            data["person"]["count"] += 1
            data["person"]["source"].append(item)
        elif item["type"] == "LOC" and item['confidence'] > 0.90:
            data["location"]["count"] += 1
            data["location"]["source"].append(item)
        elif item["type"] == "ORG" and item['confidence'] > 0.90:
            data["organization"]["count"] += 1
            data["organization"]["source"].append(item)
    return data

Пример #14

0

Показать файл

Файл: test_data.py Проект: ziyonghong/flair

def test_sentence_to_dict():
    sentence = Sentence(
        "Zalando Research is   located in Berlin, the capital of Germany.",
        labels=["business"],
        use_tokenizer=segtok_tokenizer,
    )

    # bioes tags
    sentence[0].add_tag("ner", "B-ORG")
    sentence[1].add_tag("ner", "E-ORG")
    sentence[5].add_tag("ner", "S-LOC")
    sentence[10].add_tag("ner", "S-LOC")

    dict = sentence.to_dict("ner")

    assert (
        "Zalando Research is   located in Berlin, the capital of Germany."
        == dict["text"]
    )
    assert "Zalando Research" == dict["entities"][0]["text"]
    assert "Berlin" == dict["entities"][1]["text"]
    assert "Germany" == dict["entities"][2]["text"]
    assert 1 == len(dict["labels"])

    sentence = Sentence(
        "Facebook, Inc. is a company, and Google is one as well.",
        use_tokenizer=segtok_tokenizer,
    )

    # bioes tags
    sentence[0].add_tag("ner", "B-ORG")
    sentence[1].add_tag("ner", "I-ORG")
    sentence[2].add_tag("ner", "E-ORG")
    sentence[8].add_tag("ner", "S-ORG")

    dict = sentence.to_dict("ner")

    assert "Facebook, Inc. is a company, and Google is one as well." == dict["text"]
    assert "Facebook, Inc." == dict["entities"][0]["text"]
    assert "Google" == dict["entities"][1]["text"]
    assert 0 == len(dict["labels"])

Пример #15

0

Показать файл

Файл: reader.py Проект: kiminh/Inference-Masked-Loss

 def posTagFinder(self):
     _it = 0
     for _it in range(1500):
         item = random.choice(self.data)
         temp = Sentence(item['sentence'])
         self.tagger.predict(temp)
         _dict = temp.to_dict(tag_type='pos')
         self.postags.extend([
             sample['type'] for sample in _dict['entities']
             if sample['type'] not in self.postags
         ])
     self.postags = list(set(self.postags))

Пример #16

0

Показать файл

def predict(model, selected_embeddings, data_file):
    """
			takes data in a form text, post_id, and saves both those plus 
			prediction results in the out file
	"""

    selected_embeddings_text = [
        key for key in selected_embeddings if selected_embeddings[key]
    ]
    selected_embeddings_text = '_'.join(selected_embeddings_text)

    print(selected_embeddings_text)

    model_dir = 'resources/taggers/CADECglove_char_flair'  #

    # load the model you trained
    model = SequenceTagger.load(model_dir + '/best-model.pt')

    line_counts = 0

    with bz2.open(f_in, 'rt') as f:

        with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug:
            with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis:

                header = "post_ID,matched,score,start_pos,end_pos\n"
                f_dis.write(header)
                f_drug.write(header)

                for line in tqdm(f, total=get_num_lines(f_in)):
                    if len(line) > 0:
                        line_dict = process_txt(line)
                        line_counts += 1

                        body = line_dict['text']
                        tweet_id = line_dict['id']

                        sentence = Sentence(str(body))
                        # print (r)
                        # # predict tags and print

                        model.predict(sentence)
                        res = sentence.to_dict(tag_type='ner')

                        for el in res['entities']:

                            if el['type'] == 'DIS':
                                f_dis.write(tweet_id+',"'+\
                                 el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
                            elif el['type'] == 'DRUG':
                                f_drug.write(tweet_id+',"'+\
                                 el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')

Пример #17

0

Показать файл

Файл: automated_flair12.py Проект: anderhos/MalKG

def flair12NER(title, text):
    s = Sentence(text)
    flair12class.predict(s)
    entities = s.to_dict(tag_type="ner")
    sentences = getSpaCySentences(entities["text"])
    vertexSet = getDocREDVertexSetFromFlairEntities(entities["entities"],
                                                    sentences)
    docREDDocumentObject = {
        "vertexSet": vertexSet,
        "title": title,
        "sents": [[word.text for word in sentence] for sentence in sentences]
    }
    return docREDDocumentObject

Пример #18

0

Показать файл

Файл: idebate_preprocess.py Проект: webis-de/acl20-target-inference-in-conclusion-generation

def tag_instance_using_flair(target_tagger, ner_tagger, pos_tagger, instance):
    print('processing:', instance[0])
    instance = instance[1]

    conclusion = instance['_claim']
    claims = list(instance['_argument_sentences'].values())

    #predict targets...
    conclusion_sent = Sentence(conclusion)
    claims_sents = list(map(lambda x: Sentence(x), claims))

    target_tagger.predict([conclusion_sent] + claims_sents)
    ner_tagger.predict([conclusion_sent] + claims_sents)
    pos_tagger.predict([conclusion_sent] + claims_sents)

    tagged_claims = []
    for i, c in enumerate(claims_sents):
        tagged_claims.append({
            'text':
            claims[i],
            'pos':
            c.to_dict(tag_type='pos')['entities'],
            'named_entities':
            c.to_dict(tag_type='ner')['entities'],
            'targets':
            c.to_dict(tag_type='ct')['entities']
        })

    return {
        '_debate_id': instance['_debate_id'],
        'conclusion': {
            'text': conclusion,
            'pos': conclusion_sent.to_dict(tag_type='pos')['entities'],
            'named_entities':
            conclusion_sent.to_dict(tag_type='ner')['entities'],
            'targets': conclusion_sent.to_dict(tag_type='ct')['entities']
        },
        'claims': tagged_claims
    }

Пример #19

0

Показать файл

def find_tags(user_input,keyword):
	sentence = Sentence(user_input)
	tagger.predict(sentence)
	tag_dict = sentence.to_dict(tag_type='ner')
	tag_dict = tag_dict['entities']
	tags = []
	for _ in tag_dict:
		label=_['labels']
		word = findWholeWord(keyword)(_['text'])
		if(word):
			return label[0].value
		
	return ""

Пример #20

0

Показать файл

Файл: ner.py Проект: mattmcgrattan/quick_nlp_compare

def flair_ner(text, tagger):
    """
    Tag with Flair

    :param text: source text to tag
    :param tagger: Flair initialised with tagging model
    :return: list of tuples (text, start, end, entity label)
    """
    sentence = Sentence(text, use_tokenizer=True)
    tagger.predict(sentence)
    s = sentence.to_dict(tag_type="ner")
    ents = [(e["text"], e["start_pos"], e["end_pos"], e["type"])
            for e in s["entities"]]
    return ents

Пример #21

0

Показать файл

Файл: claim_target_tagger.py Проект: webis-de/acl20-target-inference-in-conclusion-generation

def text_to_features(tagger, sents):
    if type(sents) != list:
        sents = nltk.sent_tokenize(sents)
    
    tokens = []
    tagged_sents = []
    for sent in sents:
        sent = Sentence(sent)
        tagger.predict(sent)
        tokens += [token.text+ u"\uFFE8" + token.tags['ct'].value for token in sent.tokens]

        tagged_sents.append(sent.to_dict(tag_type='ct'))

    return ' '.join(tokens), tagged_sents

Пример #22

0

Показать файл

Файл: flask-api-flairner.py Проект: Aayushpatel007/Doc_Profiler

def sifrank():
    req_data = request.get_json(force=True)
    text = req_data['text']
    sentence = Sentence(text)
    tagger.predict(sentence)
    o = sentence.to_dict(tag_type='ner')
    output = o['entities']
    print(output)
    GPE = []
    ORG = []
    LOC = []
    PERSON = []
    EVENT = []
    DATE = []
    MONEY = []
    NORP = []  #NATIONALITIES
    ADDITIONAL = []
    for i in range(len(output)):
        if "GPE" in str(output[i]["labels"]):
            GPE.append(output[i]["text"])
        elif "PERSON" in str(output[i]["labels"]):
            PERSON.append(output[i]["text"])
        elif "ORG" in str(output[i]["labels"]):
            ORG.append(output[i]["text"])
        elif "LOC" in str(output[i]["labels"]):
            LOC.append(output[i]["text"])
        elif "EVENT" in str(output[i]["labels"]):
            EVENT.append(output[i]["text"])
        elif "DATE" in str(output[i]["labels"]):
            DATE.append(output[i]["text"])
        elif "MONEY" in str(output[i]["labels"]):
            MONEY.append(output[i]["text"])
        elif "NORP" in str(output[i]["labels"]):
            NORP.append(output[i]["text"])
        else:
            ADDITIONAL.append(output[i]["text"])
    entities = {
        'GPE': list(set(GPE)),
        'ORG': list(set(ORG)),
        "PERSON": list(set(PERSON)),
        "EVENT": list(set(EVENT)),
        "DATE": list(set(DATE)),
        "MONEY": list(set(MONEY)),
        "NORP": list(set(NORP)),
        "LOC": list(set(LOC)),
        "ADDITIONAL": list(set(ADDITIONAL))
    }

    return jsonify(entities)

Пример #23

0

Показать файл

def sentence_to_org(sentence):
    try:
        sentence_tokenized = Sentence(sentence)
        tagger.predict(sentence_tokenized)
        sentence_dict = sentence_tokenized.to_dict(tag_type='ner')

        org_names = []
        for entity in sentence_dict['entities']:
            if entity['type'] == 'ORG':
                org_names.append(entity['text'])

        predicted_org = most_common(org_names)
        return predicted_org
    except:
        print("Did not found any organisations from the text")

Пример #24

0

Показать файл

Файл: fast_api_flair.py Проект: Aayushpatel007/Doc_Profiler

async def getNamedEntities(body: FLAIR_NER_MODEL):
    text = body.text
    text = re.sub('[^.,a-zA-Z0-9 \n\.]', '', text)
    sentence = Sentence(text)
    tagger.predict(sentence)
    o = sentence.to_dict(tag_type='ner')
    output = o['entities']
    #print(output)
    GPE = []
    ORG = []
    LOC = []
    PERSON = []
    EVENT = []
    DATE = []
    MONEY = []
    NORP = []  #NATIONALITIES
    ADDITIONAL = []
    for i in range(len(output)):
        if "GPE" in str(output[i]["labels"]):
            GPE.append(output[i]["text"])
        elif "PERSON" in str(output[i]["labels"]):
            PERSON.append(output[i]["text"])
        elif "ORG" in str(output[i]["labels"]):
            ORG.append(output[i]["text"])
        elif "LOC" in str(output[i]["labels"]):
            LOC.append(output[i]["text"])
        elif "EVENT" in str(output[i]["labels"]):
            EVENT.append(output[i]["text"])
        elif "DATE" in str(output[i]["labels"]):
            DATE.append(output[i]["text"])
        elif "MONEY" in str(output[i]["labels"]):
            MONEY.append(output[i]["text"])
        elif "NORP" in str(output[i]["labels"]):
            NORP.append(output[i]["text"])
        else:
            ADDITIONAL.append(output[i]["text"])
    entities = {
        'GPE': list(set(GPE)),
        'ORG': list(set(ORG)),
        "PERSON": list(set(PERSON)),
        "EVENT": list(set(EVENT)),
        "DATE": list(set(DATE)),
        "MONEY": list(set(MONEY)),
        "NORP": list(set(NORP)),
        "LOC": list(set(LOC)),
        "ADDITIONAL": list(set(ADDITIONAL))
    }
    return entities

Пример #25

0

Показать файл

Файл: predict_flair_CADEC.py Проект: sanja7s/MedRed

def predict(model, selected_embeddings, data_file):
    """
			takes data in a form text, post_id, and saves both those plus 
			prediction results in the out file
	"""

    selected_embeddings_text = [
        key for key in selected_embeddings if selected_embeddings[key]
    ]
    selected_embeddings_text = '_'.join(selected_embeddings_text)

    print(selected_embeddings_text)

    model_dir = 'resources/taggers/' + 'to_resume_' + model + selected_embeddings_text

    # load the model you trained
    model = SequenceTagger.load(model_dir + '/best-model.pt')

    data = pd.read_csv(f_in)
    # ,year,month,subreddit,body,clean_body,post_index
    print(data.head())

    with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug:
        with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis:
            header = "post_ID,matched,score,start_pos,end_pos\n"
            f_dis.write(header)
            f_drug.write(header)

            for i, row in tqdm.tqdm(data.iterrows(), total=data.shape[0]):
                #r = ' '.join(eval(row['body']))
                for r in eval(row['body']):
                    sentence = Sentence(str(r))
                    # print (r)
                    # # predict tags and print
                    model.predict(sentence)
                    res = sentence.to_dict(tag_type='ner')

                    for el in res['entities']:

                        if el['type'] == 'DIS':
                            f_dis.write(row['post_ID']+',"'+\
                             el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
                        elif el['type'] == 'DRUG':
                            f_drug.write(row['post_ID']+',"'+\
                             el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')

                if i == 10:
                    break

Пример #26

0

Показать файл

Файл: classifier_baseline.py Проект: AJSchelhaas/shared_task_project

def predict(model, predict_sentence):
    sentence = Sentence(predict_sentence)
    model.predict(sentence)
    print(predict_sentence)

    dic = sentence.to_dict(tag_type='tox')
    toxic_spans = []
    for token in dic['entities']:
        label = int(token['labels'][0].value)
        if label == 1:
            start_pos = token['start_pos']
            end_pos = token['end_pos']
            for i in range(start_pos, end_pos):
                toxic_spans.append(i)

    return [toxic_spans, predict_sentence]

Пример #27

0

Показать файл

Файл: ner_recognizer.py Проект: selimfirat/nino-server

    def get_ner_entities(self, text):
        entities = []

        try:
            sentences = nltk.sent_tokenize(text)
        except:
            sentences = [text]

        print(sentences)

        for sent in sentences:
            sentence = Sentence(sent)
            self.ontoner_tagger.predict(sentence)
            sent_tags = sentence.to_dict(tag_type='ner')
            entities.extend(sent_tags["entities"])

        return entities

Пример #28

0

Показать файл

def createNERFiles(statFilePath, resultTxt, tagger):
    if verbose:
        print("\tCreating named entity recognized file at: " + statFilePath)
    statFile = open(statFilePath, "w")
    try:
        sentence = Sentence(resultTxt)
        # predict NER tags
        tagger.predict(sentence)
    except RuntimeError as err:
        print("Runtime error: {0}".format(err))
        print("Failed at: " + statFilePath)

    taggedStr = sentence.to_tagged_string()
    details = sentence.to_dict(tag_type='ner')
    statFile.write(taggedStr)
    statFile.close()
    return (taggedStr, details)

Пример #29

0

Показать файл

Файл: train7s.py Проект: sanja7s/MedRed

def test(model, selected_embeddings):
  selected_embeddings_text = [key  for key in selected_embeddings if selected_embeddings[key]]
  selected_embeddings_text = '_'.join(selected_embeddings_text)

  print (selected_embeddings_text)

  model_dir = 'resources/taggers/' + model + selected_embeddings_text + '_fine-tuned7s'

  # load the model you trained
  model = SequenceTagger.load(model_dir + '/best-model.pt')

  sentence = Sentence("If you've been on a low calorie diet + exercise for a long time, probably you have low free T3 blood levels causing your hypo symptoms. You should ask specifically for freeT3 and freeT4 to be tested. The low conversion of T4 to T3 is your bodies way of ""protecting itself"" from any further calorie deficiet. The rest of this only matters if you do get low T3 confirmed: it is important you do not go on a T4 monotherapy, it would very likely make your situation worse because it's tricking your brain into thinking you have more then enough thyroid hormones, while your T3 deficit worsens. Either get T3 and T4 combination or no medication. Instead make sure you have enough Iodine, Selenium and Zinc in your diet and consider significantly increasing your calorie intake! It seems paradoxical but because this will eventually increase you T3 levels and basal metabolic rate it will not necessarily make you gain weight in the long term. Also dizzy spells could be low blood sugar (even if you don't who the classical symptoms of shaking/sweating.) If it is low blood sugar you need to be careful with that and make sure to get some glucose quick (both for preventing your dizzines causing accidents and also because every hypoglycemic state will stress out your metabolic system, autoamplifying the low T3)")

  # # predict tags and print
  model.predict(sentence)

  print(sentence.to_dict(tag_type='ner'))

Пример #30

0

Показать файл

def generateTextToNer(text):
    """
    Returns a dictionary with the following keys:
    'ents' contains a list of entities, 'text' contains the entire text string,
    and 'passToRelation' contains a list of possible combinations of two 
    entities in a sentence.
    """
    clean_text = normalize_corpus([text], to_lower=False, to_remove_html=False,
                                  to_remove_accent=True, to_expand_contractions=True,
                                  to_lemmatize=False, to_remove_special=False,
                                  to_remove_stopword=False)
    clean_text = clean_text[0]
    idTracker = defaultdict(int)
    res = {'ents': [], 'text': '', 'passToRelation': []}

    lst_sentences = nltk.sent_tokenize(clean_text)
    prevLen = 0
    for s in lst_sentences:
        sentence = Sentence(s, use_tokenizer=True)
        tagger_fast.predict(sentence)
        dict_flair = sentence.to_dict(tag_type='ner')
        for idx in dict_flair['entities']:
            idx['id'] = idTracker[idx['text']]
            idTracker[idx['text']] += 1
            idx['end'] = idx.pop('end_pos')
            idx['start'] = idx.pop('start_pos')
            full_label = idx.pop('labels')[0]
            full_label = str(full_label)
            idx['type'] = full_label[:full_label.find(' ')]
        dict_flair['ents'] = dict_flair.pop('entities')
        combination = combine(dict_flair)
        res['passToRelation'].extend(combination)

        for idx in dict_flair['ents']:
            idx['end'] = idx['end'] + prevLen
            idx['start'] = idx['start'] + prevLen

        dict_flair.pop('labels')
        res['text'] += ' '+dict_flair['text']
        res['ents'].extend(dict_flair['ents'])
        prevLen += len(dict_flair['text']) + 1

    res['text'] = res['text'].strip()

    return res

Python Sentence.to_dict примеры использования