示例#1
0
def preprocessing(language: str):
    from benepar.spacy_plugin import BeneparComponent
    import zh_core_web_trf
    import en_core_web_trf
    global ucb_parser
    if language == 'zh':
        nlp = zh_core_web_trf.load()
        ucb_parser = BeneparComponent('benepar_zh')
    elif language == 'en':
        nlp = en_core_web_trf.load()
        ucb_parser = BeneparComponent('benepar_en2')
    else:
        print('language error')
        exit(-1)

    nlp.disable_pipes('tagger', 'parser', 'attribute_ruler')
    if language == 'en':
        nlp.disable_pipe('lemmatizer')
    nlp.add_pipe('component', name='cp_parser', last=True)
    return nlp
示例#2
0
# Evaluation
from rouge_score import rouge_scorer

# SRL
# _jsonnet was really slowing down the process.
# pip install jsonnetbin was perfect and cleared the error.
# Performance is now at normal speed
from allennlp.predictors.predictor import Predictor
# Split text in sentences
from nltk import tokenize
# co-reference resolution
# import neuralcoref  # neuralcoref not compatible with spacy > 3.0.0

# SPACY
import en_core_web_trf
nlp = en_core_web_trf.load()

print("\nAllenNLP loading predictors...")
start = time.time()
from T2S.src.utils.coref_utils import coref_with_lemma
# SRL
predictor = Predictor.from_path(
    "https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz"
)
end = time.time()
print(f"Computation time - {round(end - start, 2)} seconds")

DECIMAL_FIGURES = 3


def verb_sem_eval(hyp_lemma, ref_lemma):
 def precalculate_spacy_english_lemmatizer(cls, datasets):
     cls._precalculate_spacy_lemmatizer(
         en_core_web_trf.load(disable=['ner', 'parser']), datasets,
         PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV
     )  # we only need the lemmatizer component, disable the other
def recognize_name_entity(summary):
	nlp = en_core_web_trf.load()
	doc = nlp(summary)
	result = [(X.text, X.label_) for X in doc.ents]
	return result