Exemplos de Sentence.to_dict em Python, exemplos de flair.data.Sentence.to_dict em Python

Exemplo n.º 1

0

Exibir arquivo

    def predict(query: str):

        if query is None or len(query) == 0:
            return {'success': False, 'message': 'query is required'}

        try:
            sentence = Sentence(query)
            TaggerModel.load_model()
            TaggerModel._model.predict(sentence)

            temp = defaultdict(list)
            for entity in sentence.to_dict(tag_type='ner-fast').get('entities'):
                temp[entity['text']].extend(entity['labels'])
            for entity in sentence.to_dict(tag_type='ner-ontonotes-fast').get('entities'):
                temp[entity['text']].extend(entity['labels'])
            ner_entities = [{'text': text, 'labels': label} for text, label in temp.items()]

            entities = list()
            for item in ner_entities:
                entity = dict()
                entity['text'] = item['text']
                labels = TaggerModel.map_and_merge_labels(
                    [label.to_dict() for label in item['labels']],
                    ['PERSON', 'MISC', 'DATE']
                )
                entity['labels'] = labels
                if len(labels) > 0:
                    entities.append(entity)
            return {'success': True, 'data': entities}
        except RuntimeError as e:
            logging.error(e, exc_info=True)
            return {'success': False, 'message': "Runtime Error: {0}".format(e)}
        except Exception as e:
            logging.error(e, exc_info=True)
            return {'success': False, 'message': 'exception occurred'}

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_data.py Projeto: azawalich/flair

def test_sentence_to_dict():
    sentence = Sentence(
        'Zalando Research is   located in Berlin, the capital of Germany.',
        labels=['business'],
        use_tokenizer=True)
    sentence[0].add_tag('ner', 'B-ORG')
    sentence[1].add_tag('ner', 'E-ORG')
    sentence[5].add_tag('ner', 'S-LOC')
    sentence[10].add_tag('ner', 'S-LOC')
    dict = sentence.to_dict('ner')
    assert ('Zalando Research is   located in Berlin, the capital of Germany.'
            == dict['text'])
    assert ('Zalando Research' == dict['entities'][0]['text'])
    assert ('Berlin' == dict['entities'][1]['text'])
    assert ('Germany' == dict['entities'][2]['text'])
    assert (1 == len(dict['labels']))
    sentence = Sentence(
        'Facebook, Inc. is a company, and Google is one as well.',
        use_tokenizer=True)
    sentence[0].add_tag('ner', 'B-ORG')
    sentence[1].add_tag('ner', 'I-ORG')
    sentence[2].add_tag('ner', 'E-ORG')
    sentence[8].add_tag('ner', 'S-ORG')
    dict = sentence.to_dict('ner')
    assert ('Facebook, Inc. is a company, and Google is one as well.' ==
            dict['text'])
    assert ('Facebook, Inc.' == dict['entities'][0]['text'])
    assert ('Google' == dict['entities'][1]['text'])
    assert (0 == len(dict['labels']))

Exemplo n.º 3

0

Exibir arquivo

def predict(sentence):
    """ Predict the sentiment of a sentence """
    if sentence == "":
        return 0
    text = Sentence(sentence)
    # stacked_embeddings.embed(text)
    classifier.predict(text)
    value = text.labels[0].to_dict()['value'] 
    if value == 'POSITIVE':
        result = text.to_dict()['labels'][0]['confidence']
    else:
        result = -(text.to_dict()['labels'][0]['confidence'])
    return round(result, 3)

Exemplo n.º 4

0

Exibir arquivo

def entity_recognition(text):
    #print('inside entity recognition')
    if isinstance(text, str):
        doc = text
    else:
        #print(type(text))
        doc = ''
        
    s = Sentence(doc.title())
    model.predict(s)
    #print('model')
    a = s.to_dict(tag_type = 'ner')
    #print(a)
    b = a['entities'][0]
    
    #print('inside entity recognition2')
    if len(b) > 0:
        #print('in if')
        #origText = b[0]['text']
        #print(b['labels'][0].to_dict())
        entity = b['labels'][0].to_dict()['value']
        #print(entity)
        confidence = round(b['labels'][0].to_dict()['confidence'],2)
        #print(confidence)
    else:
        #print('in else1')
        #origText = b[0]['text']
        entity = ''
        confidence = ''
    #print('inside entity recognition3')
    return entity, confidence

Exemplo n.º 5

0

Exibir arquivo

Arquivo: HindiNer.py Projeto: avinsit123/HindiNLP

    def Predict_textfile(self, textfile, is_path=False, path=""):

        if is_path == False:
            if self.checkpoint_download == False:
                path = self.checkpoint_path
                print("Checkpoint File already present")
            else:
                if not os.path.exists(self.download_dir + "/resources"):
                    os.mkdir(self.download_dir + "/resources")
                if not os.path.exists(self.download_dir + "/resources/tagger"):
                    os.mkdir(self.download_dir + "/resources/tagger")
                if not os.path.exists(self.download_dir +
                                      "/resources/tagger/example-ner"):
                    os.mkdir(self.download_dir +
                             "/resources/tagger/example-ner")
                print("Checkpoint File will be downloaded from ....")
                download_file_from_google_drive(self.google_id,
                                                self.checkpoint_path)
                print("Checkpoint Downloaded successfully")
                path = self.checkpoint_path
                self.checkpoint_download = False

        tagger = SequenceTagger.load(path)

        dest_path = textfile[:-4] + "__NER.txt"
        out_f = open(dest_path, "w")
        with open(textfile, "r") as f:
            for i, line in enumerate(f):
                sentence = Sentence(line)
                tagger.predict(sentence)
                for word in sentence.to_dict(tag_type='ner')["entities"]:
                    out_f.write(word['text'] + "\t" + word['type'] + "\n")
                out_f.write("\n")

Exemplo n.º 6

0

Exibir arquivo

Arquivo: module_entity.py Projeto: Huffon/factsumm

        def extract_entities_flair(sentences: List[str]):
            result = list()

            for sentence in sentences:
                sentence = Sentence(sentence)
                ner.predict(sentence)
                line_result = sentence.to_dict(tag_type="ner")

                cache = dict()
                dedup = list()

                for entity in line_result["entities"]:
                    existence = cache.get(entity["text"], False)

                    if not existence:
                        dedup.append({
                            "word": entity["text"],
                            "entity": entity["labels"][0].value,
                            "start": entity["start_pos"],
                            "end": entity["end_pos"],
                        })
                        cache[entity["text"]] = True

                result.append(dedup)

            return result

Exemplo n.º 7

0

Exibir arquivo

def get_flair_entities(input, score_threshold=0.9):

    sentence = Sentence(input, use_tokenizer=True)
    model.predict(sentence)

    # refactor flair output
    entities = []
    prev_end_pos = 0
    prev_entity_part = ''
    for entity in sentence.to_dict(tag_type='ner')['entities']:

        if entity['labels'][0]._score < score_threshold:
            continue

        logger.info('flair entity detected: ' + str(entity))
        if prev_end_pos + 1 == entity['start_pos']:
            del entities[-1]
            final_entity = prev_entity_part + ' ' + entity['text']
        else:
            final_entity = entity['text']

        entities.append(final_entity.strip())
        prev_end_pos = entity['end_pos']
        prev_entity_part += ' ' + entity['text']

    return entities

Exemplo n.º 8

0

Exibir arquivo

Arquivo: flair_ner.py Projeto: eba-diary/thesis_app

def tag_entities(text):
    sentences = sent_tokenize(text)
    output = []
    for s in sentences:
        s = Sentence(s)
        tagger.predict(s)
        output.append(s.to_dict(tag_type='ner'))
    return output

Exemplo n.º 9

0

Exibir arquivo

Arquivo: OurResumeParser.py Projeto: devilsocket/function_vault

def Resume(text):
    text = ' '.join(
        [line.replace('\t', ' ') for line in text.split('\n') if line])
    data = {
        'content': text,
        'person': {
            'count': 0,
            'source': [],
        },
        'location': {
            'count': 0,
            'source': []
        },
        'organization': {
            'count': 0,
            'source': []
        },
        'emails': {
            'count': 0,
            'source': []
        },
        'mobiles': {
            'count': 0,
            'source': []
        }
    }

    emails = find_emails(text)
    mobiles = mob_num_extractor(text)
    persons = extract_name(text)
    #print(persons)
    for email in emails:
        data["emails"]["count"] += 1
        data["emails"]["source"].append(email)
    for mobile in mobiles:
        data["mobiles"]["count"] += 1
        data["mobiles"]["source"].append(mobile)
    for person in persons:
        data["person"]["count"] += 1
        data["person"]["source"].append(person)

    sentence = Sentence(text)
    model = SequenceTagger.load(
        '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt')
    model.predict(sentence)
    d = sentence.to_dict(tag_type='ner')
    for item in d['entities']:
        if item["type"] == "PER" and item['confidence'] > 0.70:
            data["person"]["count"] += 1
            data["person"]["source"].append(item)
        elif item["type"] == "LOC" and item['confidence'] > 0.60:
            data["location"]["count"] += 1
            data["location"]["source"].append(item)
        elif item["type"] == "ORG" and item['confidence'] > 0.95:
            data["organization"]["count"] += 1
            data["organization"]["source"].append(item)
    return data

Exemplo n.º 10

0

Exibir arquivo

Arquivo: bot.py Projeto: datahack-ru/Quiz-guys-bot

def do_NER(context):
    s = Sentence(context)
    NERmodel.predict(s)
    raw = s.to_dict(tag_type='ner')
    answers = []
    for item in raw['entities']:
        answers.append(item['text'])
    if not answers:
        answers = get_key_words(context)
    return list(map(lambda x: x.capitalize(), list(set(map(lambda x: x.lower(), answers)))))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: server.py Projeto: ochiba-leaf/feed_flask

def get_score(text):
    # create example sentence
    sentence = Sentence(text, use_tokenizer=japanese_tokenizer)
    # predict class and print
    classifier.predict(sentence)

    label_dict = sentence.to_dict()["labels"][0]

    return label_dict["confidence"] if label_dict[
        "value"] == "__label__O" else 0

Exemplo n.º 12

0

Exibir arquivo

Arquivo: ner.py Projeto: ruanchaves/BLINK-1

 def predict(self, sentences):
     mentions = []
     for sent_idx, sent in enumerate(sentences):
         sent = Sentence(sent, use_tokenizer=True)
         self.model.predict(sent)
         sent_mentions = sent.to_dict(tag_type="ner")["entities"]
         for mention in sent_mentions:
             mention["sent_idx"] = sent_idx
         mentions.extend(sent_mentions)
     return {"sentences": sentences, "mentions": mentions}

Exemplo n.º 13

0

Exibir arquivo

Arquivo: OurListParser.py Projeto: devilsocket/function_vault

def ListParser(text):
    data = {
        'content': text,
        'person': {
            'count': 0,
            'source': [],
        },
        'location': {
            'count': 0,
            'source': []
        },
        'organization': {
            'count': 0,
            'source': []
        },
        'emails': {
            'count': 0,
            'source': []
        },
        'mobiles': {
            'count': 0,
            'source': []
        }
    }
    arra = ""

    for line in text.split('\n'):
        line = line.split('\t')
        if len(line) > 1:
            arra += (" ".join(line) + ".\n")

    emails = find_emails(text)
    mobiles = mob_num_extractor(text)
    for email in emails:
        data["emails"]["count"] += 1
        data["emails"]["source"].append(email)
    for mobile in mobiles:
        data["mobiles"]["count"] += 1
        data["mobiles"]["source"].append(mobile)
    sentence = Sentence(arra)
    model = SequenceTagger.load(
        '/media/zeus/AREA_51/MY_WORKS/API/mods/eng_cpu.pt')
    model.predict(sentence)
    d = sentence.to_dict(tag_type='ner')
    for item in d['entities']:
        if item["type"] == "PER" and item['confidence'] > 0.90:
            data["person"]["count"] += 1
            data["person"]["source"].append(item)
        elif item["type"] == "LOC" and item['confidence'] > 0.90:
            data["location"]["count"] += 1
            data["location"]["source"].append(item)
        elif item["type"] == "ORG" and item['confidence'] > 0.90:
            data["organization"]["count"] += 1
            data["organization"]["source"].append(item)
    return data

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_data.py Projeto: ziyonghong/flair

def test_sentence_to_dict():
    sentence = Sentence(
        "Zalando Research is   located in Berlin, the capital of Germany.",
        labels=["business"],
        use_tokenizer=segtok_tokenizer,
    )

    # bioes tags
    sentence[0].add_tag("ner", "B-ORG")
    sentence[1].add_tag("ner", "E-ORG")
    sentence[5].add_tag("ner", "S-LOC")
    sentence[10].add_tag("ner", "S-LOC")

    dict = sentence.to_dict("ner")

    assert (
        "Zalando Research is   located in Berlin, the capital of Germany."
        == dict["text"]
    )
    assert "Zalando Research" == dict["entities"][0]["text"]
    assert "Berlin" == dict["entities"][1]["text"]
    assert "Germany" == dict["entities"][2]["text"]
    assert 1 == len(dict["labels"])

    sentence = Sentence(
        "Facebook, Inc. is a company, and Google is one as well.",
        use_tokenizer=segtok_tokenizer,
    )

    # bioes tags
    sentence[0].add_tag("ner", "B-ORG")
    sentence[1].add_tag("ner", "I-ORG")
    sentence[2].add_tag("ner", "E-ORG")
    sentence[8].add_tag("ner", "S-ORG")

    dict = sentence.to_dict("ner")

    assert "Facebook, Inc. is a company, and Google is one as well." == dict["text"]
    assert "Facebook, Inc." == dict["entities"][0]["text"]
    assert "Google" == dict["entities"][1]["text"]
    assert 0 == len(dict["labels"])

Exemplo n.º 15

0

Exibir arquivo

Arquivo: reader.py Projeto: kiminh/Inference-Masked-Loss

 def posTagFinder(self):
     _it = 0
     for _it in range(1500):
         item = random.choice(self.data)
         temp = Sentence(item['sentence'])
         self.tagger.predict(temp)
         _dict = temp.to_dict(tag_type='pos')
         self.postags.extend([
             sample['type'] for sample in _dict['entities']
             if sample['type'] not in self.postags
         ])
     self.postags = list(set(self.postags))

Exemplo n.º 16

0

Exibir arquivo

def predict(model, selected_embeddings, data_file):
    """
			takes data in a form text, post_id, and saves both those plus 
			prediction results in the out file
	"""

    selected_embeddings_text = [
        key for key in selected_embeddings if selected_embeddings[key]
    ]
    selected_embeddings_text = '_'.join(selected_embeddings_text)

    print(selected_embeddings_text)

    model_dir = 'resources/taggers/CADECglove_char_flair'  #

    # load the model you trained
    model = SequenceTagger.load(model_dir + '/best-model.pt')

    line_counts = 0

    with bz2.open(f_in, 'rt') as f:

        with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug:
            with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis:

                header = "post_ID,matched,score,start_pos,end_pos\n"
                f_dis.write(header)
                f_drug.write(header)

                for line in tqdm(f, total=get_num_lines(f_in)):
                    if len(line) > 0:
                        line_dict = process_txt(line)
                        line_counts += 1

                        body = line_dict['text']
                        tweet_id = line_dict['id']

                        sentence = Sentence(str(body))
                        # print (r)
                        # # predict tags and print

                        model.predict(sentence)
                        res = sentence.to_dict(tag_type='ner')

                        for el in res['entities']:

                            if el['type'] == 'DIS':
                                f_dis.write(tweet_id+',"'+\
                                 el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
                            elif el['type'] == 'DRUG':
                                f_drug.write(tweet_id+',"'+\
                                 el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')

Exemplo n.º 17

0

Exibir arquivo

Arquivo: automated_flair12.py Projeto: anderhos/MalKG

def flair12NER(title, text):
    s = Sentence(text)
    flair12class.predict(s)
    entities = s.to_dict(tag_type="ner")
    sentences = getSpaCySentences(entities["text"])
    vertexSet = getDocREDVertexSetFromFlairEntities(entities["entities"],
                                                    sentences)
    docREDDocumentObject = {
        "vertexSet": vertexSet,
        "title": title,
        "sents": [[word.text for word in sentence] for sentence in sentences]
    }
    return docREDDocumentObject

Exemplo n.º 18

0

Exibir arquivo

Arquivo: idebate_preprocess.py Projeto: webis-de/acl20-target-inference-in-conclusion-generation

def tag_instance_using_flair(target_tagger, ner_tagger, pos_tagger, instance):
    print('processing:', instance[0])
    instance = instance[1]

    conclusion = instance['_claim']
    claims = list(instance['_argument_sentences'].values())

    #predict targets...
    conclusion_sent = Sentence(conclusion)
    claims_sents = list(map(lambda x: Sentence(x), claims))

    target_tagger.predict([conclusion_sent] + claims_sents)
    ner_tagger.predict([conclusion_sent] + claims_sents)
    pos_tagger.predict([conclusion_sent] + claims_sents)

    tagged_claims = []
    for i, c in enumerate(claims_sents):
        tagged_claims.append({
            'text':
            claims[i],
            'pos':
            c.to_dict(tag_type='pos')['entities'],
            'named_entities':
            c.to_dict(tag_type='ner')['entities'],
            'targets':
            c.to_dict(tag_type='ct')['entities']
        })

    return {
        '_debate_id': instance['_debate_id'],
        'conclusion': {
            'text': conclusion,
            'pos': conclusion_sent.to_dict(tag_type='pos')['entities'],
            'named_entities':
            conclusion_sent.to_dict(tag_type='ner')['entities'],
            'targets': conclusion_sent.to_dict(tag_type='ct')['entities']
        },
        'claims': tagged_claims
    }

Exemplo n.º 19

0

Exibir arquivo

def find_tags(user_input,keyword):
	sentence = Sentence(user_input)
	tagger.predict(sentence)
	tag_dict = sentence.to_dict(tag_type='ner')
	tag_dict = tag_dict['entities']
	tags = []
	for _ in tag_dict:
		label=_['labels']
		word = findWholeWord(keyword)(_['text'])
		if(word):
			return label[0].value
		
	return ""

Exemplo n.º 20

0

Exibir arquivo

Arquivo: ner.py Projeto: mattmcgrattan/quick_nlp_compare

def flair_ner(text, tagger):
    """
    Tag with Flair

    :param text: source text to tag
    :param tagger: Flair initialised with tagging model
    :return: list of tuples (text, start, end, entity label)
    """
    sentence = Sentence(text, use_tokenizer=True)
    tagger.predict(sentence)
    s = sentence.to_dict(tag_type="ner")
    ents = [(e["text"], e["start_pos"], e["end_pos"], e["type"])
            for e in s["entities"]]
    return ents

Exemplo n.º 21

0

Exibir arquivo

Arquivo: claim_target_tagger.py Projeto: webis-de/acl20-target-inference-in-conclusion-generation

def text_to_features(tagger, sents):
    if type(sents) != list:
        sents = nltk.sent_tokenize(sents)
    
    tokens = []
    tagged_sents = []
    for sent in sents:
        sent = Sentence(sent)
        tagger.predict(sent)
        tokens += [token.text+ u"\uFFE8" + token.tags['ct'].value for token in sent.tokens]

        tagged_sents.append(sent.to_dict(tag_type='ct'))

    return ' '.join(tokens), tagged_sents

Exemplo n.º 22

0

Exibir arquivo

Arquivo: flask-api-flairner.py Projeto: Aayushpatel007/Doc_Profiler

def sifrank():
    req_data = request.get_json(force=True)
    text = req_data['text']
    sentence = Sentence(text)
    tagger.predict(sentence)
    o = sentence.to_dict(tag_type='ner')
    output = o['entities']
    print(output)
    GPE = []
    ORG = []
    LOC = []
    PERSON = []
    EVENT = []
    DATE = []
    MONEY = []
    NORP = []  #NATIONALITIES
    ADDITIONAL = []
    for i in range(len(output)):
        if "GPE" in str(output[i]["labels"]):
            GPE.append(output[i]["text"])
        elif "PERSON" in str(output[i]["labels"]):
            PERSON.append(output[i]["text"])
        elif "ORG" in str(output[i]["labels"]):
            ORG.append(output[i]["text"])
        elif "LOC" in str(output[i]["labels"]):
            LOC.append(output[i]["text"])
        elif "EVENT" in str(output[i]["labels"]):
            EVENT.append(output[i]["text"])
        elif "DATE" in str(output[i]["labels"]):
            DATE.append(output[i]["text"])
        elif "MONEY" in str(output[i]["labels"]):
            MONEY.append(output[i]["text"])
        elif "NORP" in str(output[i]["labels"]):
            NORP.append(output[i]["text"])
        else:
            ADDITIONAL.append(output[i]["text"])
    entities = {
        'GPE': list(set(GPE)),
        'ORG': list(set(ORG)),
        "PERSON": list(set(PERSON)),
        "EVENT": list(set(EVENT)),
        "DATE": list(set(DATE)),
        "MONEY": list(set(MONEY)),
        "NORP": list(set(NORP)),
        "LOC": list(set(LOC)),
        "ADDITIONAL": list(set(ADDITIONAL))
    }

    return jsonify(entities)

Exemplo n.º 23

0

Exibir arquivo

def sentence_to_org(sentence):
    try:
        sentence_tokenized = Sentence(sentence)
        tagger.predict(sentence_tokenized)
        sentence_dict = sentence_tokenized.to_dict(tag_type='ner')

        org_names = []
        for entity in sentence_dict['entities']:
            if entity['type'] == 'ORG':
                org_names.append(entity['text'])

        predicted_org = most_common(org_names)
        return predicted_org
    except:
        print("Did not found any organisations from the text")

Exemplo n.º 24

0

Exibir arquivo

Arquivo: fast_api_flair.py Projeto: Aayushpatel007/Doc_Profiler

async def getNamedEntities(body: FLAIR_NER_MODEL):
    text = body.text
    text = re.sub('[^.,a-zA-Z0-9 \n\.]', '', text)
    sentence = Sentence(text)
    tagger.predict(sentence)
    o = sentence.to_dict(tag_type='ner')
    output = o['entities']
    #print(output)
    GPE = []
    ORG = []
    LOC = []
    PERSON = []
    EVENT = []
    DATE = []
    MONEY = []
    NORP = []  #NATIONALITIES
    ADDITIONAL = []
    for i in range(len(output)):
        if "GPE" in str(output[i]["labels"]):
            GPE.append(output[i]["text"])
        elif "PERSON" in str(output[i]["labels"]):
            PERSON.append(output[i]["text"])
        elif "ORG" in str(output[i]["labels"]):
            ORG.append(output[i]["text"])
        elif "LOC" in str(output[i]["labels"]):
            LOC.append(output[i]["text"])
        elif "EVENT" in str(output[i]["labels"]):
            EVENT.append(output[i]["text"])
        elif "DATE" in str(output[i]["labels"]):
            DATE.append(output[i]["text"])
        elif "MONEY" in str(output[i]["labels"]):
            MONEY.append(output[i]["text"])
        elif "NORP" in str(output[i]["labels"]):
            NORP.append(output[i]["text"])
        else:
            ADDITIONAL.append(output[i]["text"])
    entities = {
        'GPE': list(set(GPE)),
        'ORG': list(set(ORG)),
        "PERSON": list(set(PERSON)),
        "EVENT": list(set(EVENT)),
        "DATE": list(set(DATE)),
        "MONEY": list(set(MONEY)),
        "NORP": list(set(NORP)),
        "LOC": list(set(LOC)),
        "ADDITIONAL": list(set(ADDITIONAL))
    }
    return entities

Exemplo n.º 25

0

Exibir arquivo

Arquivo: predict_flair_CADEC.py Projeto: sanja7s/MedRed

def predict(model, selected_embeddings, data_file):
    """
			takes data in a form text, post_id, and saves both those plus 
			prediction results in the out file
	"""

    selected_embeddings_text = [
        key for key in selected_embeddings if selected_embeddings[key]
    ]
    selected_embeddings_text = '_'.join(selected_embeddings_text)

    print(selected_embeddings_text)

    model_dir = 'resources/taggers/' + 'to_resume_' + model + selected_embeddings_text

    # load the model you trained
    model = SequenceTagger.load(model_dir + '/best-model.pt')

    data = pd.read_csv(f_in)
    # ,year,month,subreddit,body,clean_body,post_index
    print(data.head())

    with open(f_out.replace(".csv", "_drug.csv"), 'w') as f_drug:
        with open(f_out.replace(".csv", "_dis.csv"), 'w') as f_dis:
            header = "post_ID,matched,score,start_pos,end_pos\n"
            f_dis.write(header)
            f_drug.write(header)

            for i, row in tqdm.tqdm(data.iterrows(), total=data.shape[0]):
                #r = ' '.join(eval(row['body']))
                for r in eval(row['body']):
                    sentence = Sentence(str(r))
                    # print (r)
                    # # predict tags and print
                    model.predict(sentence)
                    res = sentence.to_dict(tag_type='ner')

                    for el in res['entities']:

                        if el['type'] == 'DIS':
                            f_dis.write(row['post_ID']+',"'+\
                             el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')
                        elif el['type'] == 'DRUG':
                            f_drug.write(row['post_ID']+',"'+\
                             el['text'].replace('\n', ' ')+'",'+str(el['confidence'])+','+str(el['start_pos'])+','+str(el['end_pos'])+'\n')

                if i == 10:
                    break

Exemplo n.º 26

0

Exibir arquivo

Arquivo: classifier_baseline.py Projeto: AJSchelhaas/shared_task_project

def predict(model, predict_sentence):
    sentence = Sentence(predict_sentence)
    model.predict(sentence)
    print(predict_sentence)

    dic = sentence.to_dict(tag_type='tox')
    toxic_spans = []
    for token in dic['entities']:
        label = int(token['labels'][0].value)
        if label == 1:
            start_pos = token['start_pos']
            end_pos = token['end_pos']
            for i in range(start_pos, end_pos):
                toxic_spans.append(i)

    return [toxic_spans, predict_sentence]

Exemplo n.º 27

0

Exibir arquivo

Arquivo: ner_recognizer.py Projeto: selimfirat/nino-server

    def get_ner_entities(self, text):
        entities = []

        try:
            sentences = nltk.sent_tokenize(text)
        except:
            sentences = [text]

        print(sentences)

        for sent in sentences:
            sentence = Sentence(sent)
            self.ontoner_tagger.predict(sentence)
            sent_tags = sentence.to_dict(tag_type='ner')
            entities.extend(sent_tags["entities"])

        return entities

Exemplo n.º 28

0

Exibir arquivo

def createNERFiles(statFilePath, resultTxt, tagger):
    if verbose:
        print("\tCreating named entity recognized file at: " + statFilePath)
    statFile = open(statFilePath, "w")
    try:
        sentence = Sentence(resultTxt)
        # predict NER tags
        tagger.predict(sentence)
    except RuntimeError as err:
        print("Runtime error: {0}".format(err))
        print("Failed at: " + statFilePath)

    taggedStr = sentence.to_tagged_string()
    details = sentence.to_dict(tag_type='ner')
    statFile.write(taggedStr)
    statFile.close()
    return (taggedStr, details)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: train7s.py Projeto: sanja7s/MedRed

def test(model, selected_embeddings):
  selected_embeddings_text = [key  for key in selected_embeddings if selected_embeddings[key]]
  selected_embeddings_text = '_'.join(selected_embeddings_text)

  print (selected_embeddings_text)

  model_dir = 'resources/taggers/' + model + selected_embeddings_text + '_fine-tuned7s'

  # load the model you trained
  model = SequenceTagger.load(model_dir + '/best-model.pt')

  sentence = Sentence("If you've been on a low calorie diet + exercise for a long time, probably you have low free T3 blood levels causing your hypo symptoms. You should ask specifically for freeT3 and freeT4 to be tested. The low conversion of T4 to T3 is your bodies way of ""protecting itself"" from any further calorie deficiet. The rest of this only matters if you do get low T3 confirmed: it is important you do not go on a T4 monotherapy, it would very likely make your situation worse because it's tricking your brain into thinking you have more then enough thyroid hormones, while your T3 deficit worsens. Either get T3 and T4 combination or no medication. Instead make sure you have enough Iodine, Selenium and Zinc in your diet and consider significantly increasing your calorie intake! It seems paradoxical but because this will eventually increase you T3 levels and basal metabolic rate it will not necessarily make you gain weight in the long term. Also dizzy spells could be low blood sugar (even if you don't who the classical symptoms of shaking/sweating.) If it is low blood sugar you need to be careful with that and make sure to get some glucose quick (both for preventing your dizzines causing accidents and also because every hypoglycemic state will stress out your metabolic system, autoamplifying the low T3)")

  # # predict tags and print
  model.predict(sentence)

  print(sentence.to_dict(tag_type='ner'))

Exemplo n.º 30

0

Exibir arquivo

def generateTextToNer(text):
    """
    Returns a dictionary with the following keys:
    'ents' contains a list of entities, 'text' contains the entire text string,
    and 'passToRelation' contains a list of possible combinations of two 
    entities in a sentence.
    """
    clean_text = normalize_corpus([text], to_lower=False, to_remove_html=False,
                                  to_remove_accent=True, to_expand_contractions=True,
                                  to_lemmatize=False, to_remove_special=False,
                                  to_remove_stopword=False)
    clean_text = clean_text[0]
    idTracker = defaultdict(int)
    res = {'ents': [], 'text': '', 'passToRelation': []}

    lst_sentences = nltk.sent_tokenize(clean_text)
    prevLen = 0
    for s in lst_sentences:
        sentence = Sentence(s, use_tokenizer=True)
        tagger_fast.predict(sentence)
        dict_flair = sentence.to_dict(tag_type='ner')
        for idx in dict_flair['entities']:
            idx['id'] = idTracker[idx['text']]
            idTracker[idx['text']] += 1
            idx['end'] = idx.pop('end_pos')
            idx['start'] = idx.pop('start_pos')
            full_label = idx.pop('labels')[0]
            full_label = str(full_label)
            idx['type'] = full_label[:full_label.find(' ')]
        dict_flair['ents'] = dict_flair.pop('entities')
        combination = combine(dict_flair)
        res['passToRelation'].extend(combination)

        for idx in dict_flair['ents']:
            idx['end'] = idx['end'] + prevLen
            idx['start'] = idx['start'] + prevLen

        dict_flair.pop('labels')
        res['text'] += ' '+dict_flair['text']
        res['ents'].extend(dict_flair['ents'])
        prevLen += len(dict_flair['text']) + 1

    res['text'] = res['text'].strip()

    return res