Python StanfordNERTagger._stanford_jar示例

编程语言: Python

命名空间/包名称: nltk.tag

方法/功能: _stanford_jar

hotexamples.com的示例: 6

Python StanfordNERTagger._stanford_jar - 已找到6个示例。这些是从开源项目中提取的最受好评的nltk.tag.StanfordNERTagger._stanford_jar现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

StanfordNERTagger(30)

tag(30)

tag_sents(16)

_stanford_jar(5)

java_options(2)

cur_tag(1)

get_entities(1)

tags(1)

示例#1

显示文件

文件： generate_dataset.py 项目： shubhampachori12110095/information-extraction-system

    def nonlocal_ner_tag_tokens(self):
        home = expanduser("~")
        os.environ['CLASSPATH'] = home + '/stanford-ner-2015-12-09'
        os.environ[
            'STANFORD_MODELS'] = home + '/stanford-ner-2015-12-09/classifiers'

        st = StanfordNERTagger("english.all.3class.distsim.crf.ser.gz",
                               java_options='-mx4000m')

        stanford_dir = st._stanford_jar[0].rpartition('/')[0]
        stanford_jars = find_jars_within_path(stanford_dir)

        st._stanford_jar = ':'.join(stanford_jars)

        # do not tokenise text
        nltk.internals.config_java(
            options=
            '-tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions "tokenizeNLs=true"'
        )

        self.nonlocal_ner_doc_tokens = []
        temp_nonlocal_bulk_process = []
        length_of_docs = [len(doc) for doc in self.tokenized_docs_by_lines]
        for doc_idx, doc in enumerate(self.tokenized_docs_by_lines):
            for line_idx, line in enumerate(doc):
                temp_nonlocal_bulk_process.append(line)

        temp_nonlocal_bulk_process = st.tag_sents(temp_nonlocal_bulk_process)

        current_idx = 0
        for doc_len_idx, doc_len in enumerate(length_of_docs):
            self.nonlocal_ner_doc_tokens.append(
                temp_nonlocal_bulk_process[current_idx:current_idx + doc_len])
            current_idx += doc_len
        print("NER nonlocal tagged tokens")

示例#2

显示文件

文件： NETagging.py 项目： nakulsarwate/TravelBlog

def stanford_ne_tagger(tokens):
    st = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz')
    stanford_dir = st._stanford_jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_dir)

    st._stanford_jar = ':'.join(stanford_jars)
    tags = st.tag(tokens)
    continuous_chunks = get_continuous_chunks(tags)
    named_entities_str_tag = set()
    for ne in continuous_chunks:
        if (ne[0][1] == u'LOCATION'):
            named_entities_str_tag.add(
                lower(u' '.join([token for token, tag in ne])))

    return named_entities_str_tag

示例#3

显示文件

文件： babiNER.py 项目： adityardesai/babI

 def tagNER(self):
     print("Named Entities are being identified...")
     from nltk.tag import StanfordNERTagger
     from nltk.tokenize import word_tokenize
     os.environ['JAVAHOME'] = "/usr/bin/"
     classpath = "/home/aditya/src/stanfordNER/stanford-ner-2015-12-09"
     myText = ""
     st = StanfordNERTagger(GlobalsClass.STANFORD_BABI_NER_CLASSIFIER,
                            GlobalsClass.STANFORD_NER_PATH,
                            encoding=GlobalsClass.ENCODING)
     st._stanford_jar = classpath
     for eachWord in self.vocabList:
         myText += eachWord + " "
     tokenized_text = word_tokenize(myText)
     self.namedEntitiesList = st.tag(tokenized_text)
     #print(self.namedEntitiesList)
     self.writeNERResults()

示例#4

显示文件

文件： Gross_margin.py 项目： VitalyShcherb/Backup

from nltk.tag import StanfordNERTagger
from nltk.tokenize import word_tokenize

st = StanfordNERTagger('/home/ubuntu/stanford-ner-2015-12-09/classifiers/english.all.3class.distsim.crf.ser.gz', path_to_jar='/home/ubuntu/stanford-ner-2015-12-09/stanford-ner.jar')
text = 'While in Frabce'

tokenized_text = word_tokenize(text)
#print tokenized_text
#classified_text = st.tag(tokenized_text)
#print(classified_text)




import nltk
from nltk.tag import StanfordNERTagger
st = StanfordNERTagger('/home/ubuntu/stanford-ner-2015-12-09/classifiers/english.all.3class.distsim.crf.ser.gz', path_to_jar='/home/ubuntu/stanford-ner-2015-12-09/stanford-ner.jar')
print st._stanford_jar
stanford_dir = st._stanford_jar.rpartition('/')[0]
from nltk.internals import find_jars_within_path
stanford_jars = find_jars_within_path(stanford_dir)
print ":".join(stanford_jars)
st._stanford_jar = ':'.join(stanford_jars)
print st._stanford_jar
text = st.tag('Rami Eid is studying at Stony Brook University in NY'.split())
print text

示例#5

显示文件

文件： create_class_legdocs.py 项目： evijit/ledam

import os
import numpy as np
import re
import nltk
import time
from nltk.tag import StanfordNERTagger

st = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz')
stanford_dir = st._stanford_jar.rpartition('/')[0]
from nltk.internals import find_jars_within_path

stanford_jars = find_jars_within_path(stanford_dir)
st._stanford_jar = ':'.join(stanford_jars)

from lexnlp.extract.en import money, citations, conditions, constraints, copyright, courts, definitions, regulations, trademarks, dates, amounts
from lexnlp.nlp.en import tokens

data_dir = '/home/ritam/Desktop/LeDAM/DATA/Task_1'
train_cp_dir = data_dir + '/' + 'Train_catches'
train_docs_dir = data_dir + '/' + 'Train_docs'


class Legal_Doc:
    def __init__(self, location):
        self.location = location
        self.npl = []
        self.nounns = []

    def getnps(self):
        return self.npl

示例#6

显示文件

文件： text_preprocessor.py 项目： codeguruji/DataScienceHub

def load_stanford_ner_tagger(stanford_ner_path):
    stanford_ner = StanfordNERTagger(os.path.join(stanford_ner_path,"classifiers/english.all.3class.distsim.crf.ser.gz"), 
											os.path.join(stanford_ner_path,"stanford-ner.jar"))
	stanford_ner._stanford_jar = stanford_ner_path+"stanford-ner.jar:"+stanford_ner_path+"lib/*"