def __init__(self, annotator_class='relation_extractor', language='en', component_type='relation_extractor', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False): if 're_' in nlp_ref: annotator_class = 'relation_extractor' if 'redl' in nlp_ref: annotator_class = 'relation_extractor_dl' if model != None: self.model = model else: if annotator_class == 'relation_extractor': from nlu.components.relation_extractors.relation_extractor.relation_extractor import RelationExtraction if trainable: self.model = RelationExtraction.get_default_trainable_model( ) else: self.model = RelationExtraction.get_pretrained_model( nlp_ref, language, 'clinical/models') elif annotator_class == 'relation_extractor_dl': from nlu.components.relation_extractors.relation_extractor_dl.relation_extractor_dl import RelationExtractionDL # if trainable : self.model = RelationExtractionDL.get_default_trainable_model() self.model = RelationExtractionDL.get_pretrained_model( nlp_ref, language, 'clinical/models') SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='default_chunker', language='en', component_type='chunker', get_default=True, nlp_ref='', nlu_ref='', model=None, lang='en', loaded_from_pretrained_pipe=False): if model != None: self.model = model else: if annotator_class == 'default_chunker': from nlu import DefaultChunker if get_default: self.model = DefaultChunker.get_default_model() else: self.model = DefaultChunker.get_default_model( ) # there are no pretrained chunkers, only default 1 if annotator_class == 'ngram': from nlu import NGram if get_default: self.model = NGram.get_default_model() else: self.model = NGram.get_default_model( ) # there are no pretrained chunkers, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, lang, loaded_from_pretrained_pipe)
def __init__(self, annotator_class='context_spell', language ='en', component_type='spell_checker', get_default=True, model = None, nlp_ref='', dataset='', nlu_ref ='', is_licensed=False): if annotator_class == 'context' or annotator_class == 'norvig' or annotator_class == 'symmetric': annotator_class = annotator_class + '_spell' if dataset != '':annotator_class = dataset + '_spell' if 'spellcheck_dl' in nlp_ref : annotator_class ='context_spell' if 'spell.med' in nlu_ref : annotator_class ='context' if 'spell.clinical' in nlu_ref : annotator_class ='context' if '.med' in nlu_ref : annotator_class ='context' if model != None : self.model = model else : if 'context' in annotator_class: from nlu import ContextSpellChecker if is_licensed : self.model = ContextSpellChecker.get_pretrained_model(nlp_ref, language,bucket='clinical/models') elif get_default : self.model = ContextSpellChecker.get_default_model() else : self.model = ContextSpellChecker.get_pretrained_model(nlp_ref, language) elif 'norvig' in annotator_class: from nlu import NorvigSpellChecker if get_default : self.model = NorvigSpellChecker.get_default_model() else : self.model = NorvigSpellChecker.get_pretrained_model(nlp_ref, language) elif 'symmetric' in annotator_class : from nlu import SymmetricSpellChecker if get_default : self.model = SymmetricSpellChecker.get_default_model() else : self.model = SymmetricSpellChecker.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='normalizer', language='en', component_type='normalizer', get_default=True, nlp_ref='', nlu_ref='', model=None, is_licensed=False): if model != None: self.model = model else: if 'norm_document' in nlu_ref: annotator_class = 'document_normalizer' elif 'norm' in nlu_ref: annotator_class = 'normalizer' if annotator_class == 'normalizer': from nlu import SparkNLPNormalizer if get_default: self.model = SparkNLPNormalizer.get_default_model() else: self.model = SparkNLPNormalizer.get_pretrained_model( nlp_ref, language ) # there is no pretrained API for Normalizer in SparkNLP yet elif annotator_class == 'document_normalizer': from nlu import SparkNLPDocumentNormalizer if get_default: self.model = SparkNLPDocumentNormalizer.get_default_model() else: self.model = SparkNLPDocumentNormalizer.get_pretrained_model( nlp_ref, language ) # there is no pretrained API for Normalizer in SparkNLP yet SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='lemmatizer', language='en', component_type='lemmatizer', get_default=False, model = None, nlp_ref='', nlu_ref ='', is_licensed=False): if model != None : self.model = model else : if 'lemma' in annotator_class : from nlu import SparkNLPLemmatizer if get_default : self.model = SparkNLPLemmatizer.get_default_model() else : self.model = SparkNLPLemmatizer.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='unlabeled_dependency_parser', language='en', component_type='dependency_untyped', get_default = True, nlp_ref='', nlu_ref ='', model=None): if model != None :self.model = model elif 'dep' in annotator_class or 'dep.untyped' in annotator_class or annotator_class== 'unlabeled_dependency_parser': from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import UnlabeledDependencyParser if get_default : self.model = UnlabeledDependencyParser.get_default_model() else : self.model = UnlabeledDependencyParser.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='stemmer', component_type='stemmer', model=None, nlu_ref='', nlp_ref=''): if model != None: self.model = model else: if annotator_class == 'stemmer': from nlu import SparkNLPStemmer self.model = SparkNLPStemmer.get_default_model() SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='assertion_dl', language='en', component_type='assertion', get_default=True, model = None, nlp_ref ='', nlu_ref='',trainable=False, is_licensed=False): if model != None : self.model = model else : if annotator_class == 'assertion_dl': from nlu.components.assertions.assertion_dl.assertion_dl import AssertionDL if trainable : self.model = AssertionDL.get_default_trainable_model() elif get_default : self.model = AssertionDL.get_default_model() else : self.model = AssertionDL.get_pretrained_model(nlp_ref, language) elif annotator_class == 'assertion_log_reg': pass SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='sentence_detector', language='en', component_type='sentence_detector', get_default=True, model = None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False,lang='en',loaded_from_pretrained_pipe=False): if annotator_class == 'sentence_detector' and 'pragmatic' not in nlu_ref: annotator_class = 'deep_sentence_detector' #default else : annotator_class = 'pragmatic_sentence_detector' if model != None : self.model = model else: if annotator_class == 'deep_sentence_detector' or 'ner_dl' in nlp_ref: from nlu import SentenceDetectorDeep if trainable : self.model = SentenceDetectorDeep.get_trainable_model() elif get_default : self.model = SentenceDetectorDeep.get_default_model() else : self.model = SentenceDetectorDeep.get_pretrained_model(nlp_ref,language) elif annotator_class == 'pragmatic_sentence_detector' : from nlu import PragmaticSentenceDetector if get_default : self.model = PragmaticSentenceDetector.get_default_model() SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, lang,loaded_from_pretrained_pipe )
def __init__(self, annotator_class='sentiment_dl', component_type='classifier', model=None): self.model = model SparkNLUComponent.__init__(self, annotator_class, component_type) # Make sure input/output cols match up with NLU defaults if len(self.info.spark_input_column_names) == 1: model.setInputCols(self.info.spark_input_column_names[0]) else: model.setInputCols(self.info.spark_input_column_names) if len(self.info.spark_output_column_names) == 1: model.setOutputCol(self.info.spark_output_column_names[0]) else: model.setOutputCol(self.info.spark_output_column_names)
def __init__(self, annotator_class='default_tokenizer', language='en', component_type='tokenizer', get_default=True, nlp_ref='', nlu_ref='', lang='en', model=None, is_licensed=False, loaded_from_pretrained_pipe=False): if 'segment_words' in nlu_ref: annotator_class = 'word_segmenter' elif 'token' in annotator_class and language in nlu.AllComponentsInfo( ).all_right_to_left_langs_with_pretrained_tokenizer: annotator_class = 'word_segmenter' if model != None: self.model = model elif annotator_class == 'default_tokenizer': from nlu import DefaultTokenizer if get_default: self.model = DefaultTokenizer.get_default_model() else: self.model = DefaultTokenizer.get_default_model( ) # there are no pretrained tokenizrs, only default 1 elif annotator_class == 'word_segmenter': from nlu import WordSegmenter if get_default and language == '': self.model = WordSegmenter.get_default_model() elif get_default and language != '': self.model = WordSegmenter.get_default_model_for_lang(language) else: self.model = WordSegmenter.get_pretrained_model( nlp_ref, language ) # there are no pretrained tokenizrs, only default 1 SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref=nlu_ref, nlp_ref=nlp_ref, loaded_from_pretrained_pipe=True, lang=lang)
def __init__(self, annotator_class='sentence_entity_resolver', language='en', component_type='resolution', get_default=True, model = None, nlp_ref ='', nlu_ref='',trainable=False, is_licensed=True): if 'resolve' in nlu_ref and 'resolve_chunk' not in nlu_ref: annotator_class='sentence_entity_resolver' if 'resolve_chunk' in nlu_ref: annotator_class='chunk_entity_resolver' if model != None : self.model = model else : if annotator_class == 'sentence_entity_resolver': from nlu.components.resolutions.sentence_entity_resolver.sentence_resolver import SentenceResolver if trainable : self.model = SentenceResolver.get_default_trainable_model() elif get_default : self.model = SentenceResolver.get_default_model() else : self.model = SentenceResolver.get_pretrained_model(nlp_ref, language) elif annotator_class == 'chunk_entity_resolver': from nlu.components.resolutions.chunk_entity_resolver.chunk_resolver import ChunkResolver if trainable : self.model = ChunkResolver.get_default_trainable_model() elif get_default : self.model = ChunkResolver.get_default_model() else : self.model = ChunkResolver.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='deidentifier', language='en', component_type='deidentifier', get_default=False, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=True): annotator_class = 'deidentifier' if model != None: self.model = model else: if annotator_class == 'deidentifier': from nlu.components.deidentifiers.deidentifier.deidentifier import Deidentifier if get_default: self.model = Deidentifier.get_default_model() else: self.model = Deidentifier.get_pretrained_model( nlp_ref, language) print('model') SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='t5', language='en', component_type='seq2seq', get_default=True, model=None, nlp_ref='', nlu_ref='', dataset='', configs='', is_licensed=False): if 't5' in nlu_ref or 't5' in nlp_ref: annotator_class = 't5' elif 'marian' in nlu_ref or 'marian' in nlp_ref: annotator_class = 'marian' elif 'translate_to' in nlu_ref or 'translate_to' in nlp_ref or 'translate_to' in annotator_class: annotator_class = 'marian' if model != None: self.model = model else: if 't5' in annotator_class: from nlu import T5 if is_licensed: self.model = T5.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = T5.get_default_model() elif configs != '': self.model = T5.get_preconfigured_model( nlp_ref, language, configs) else: self.model = T5.get_pretrained_model(nlp_ref, language) elif 'marian' in annotator_class: from nlu import Marian if get_default: self.model = Marian.get_default_model() else: self.model = Marian.get_pretrained_model(nlp_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='document_assembler', component_type='util', model=None, loaded_from_pretrained_pipe=False, nlu_ref='', nlp_ref='', lang='en', is_licensed=False): # super(Tokenizer,self).__init__(annotator_class = annotator_class, component_type = component_type) if annotator_class == 'ner_converter': annotator_class = 'ner_to_chunk_converter' if model != None: self.model = model else: if annotator_class == 'document_assembler': from nlu import SparkNlpDocumentAssembler self.model = SparkNlpDocumentAssembler.get_default_model() elif annotator_class == 'deep_sentence_detector': from nlu import SentenceDetectorDeep self.model = SentenceDetectorDeep.get_default_model() elif annotator_class == 'sentence_detector': from nlu import SparkNLPSentenceDetector self.model = SparkNLPSentenceDetector.get_default_model() elif annotator_class == 'ner_to_chunk_converter': from nlu import NerToChunkConverter self.model = NerToChunkConverter.get_default_model() elif annotator_class == 'sentence_embeddings': from nlu import SparkNLPSentenceEmbeddings self.model = SparkNLPSentenceEmbeddings.get_default_model() elif annotator_class == 'feature_assembler': from nlu.components.utils.feature_assembler.feature_assembler import SparkNLPFeatureAssembler self.model = SparkNLPFeatureAssembler.get_default_model() elif annotator_class == 'ner_to_chunk_converter_licensed': from nlu.components.utils.ner_to_chunk_converter_licensed.ner_to_chunk_converter_licensed import NerToChunkConverterLicensed self.model = NerToChunkConverterLicensed.get_default_model() SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, lang, loaded_from_pretrained_pipe)
def __init__(self, annotator_class='date_matcher', language ='en', component_type='matcher', get_default=False,nlp_ref ='',model = None, nlu_ref='', dataset='' , is_licensed=False): if 'date' in nlp_ref or 'date' in nlu_ref : annotator_class= 'date_matcher' elif 'regex' in nlp_ref or 'regex' in nlu_ref : annotator_class= 'regex_matcher' elif 'text' in nlp_ref or 'text' in nlu_ref : annotator_class= 'text_matcher' elif '_matcher' not in annotator_class: annotator_class= annotator_class + '_matcher' if model != None : self.model = model else : if 'text' in annotator_class: from nlu import TextMatcher if get_default : self.model = TextMatcher.get_default_model() else : self.model = TextMatcher.get_pretrained_model(nlu_ref, language) elif 'date' in annotator_class: from nlu import DateMatcher if get_default : self.model = DateMatcher.get_default_model() elif 'regex' in annotator_class : from nlu import RegexMatcher if get_default : self.model = RegexMatcher.get_default_model() else : self.model = RegexMatcher.get_pretrained_model(nlu_ref, language) SparkNLUComponent.__init__(self, annotator_class, component_type)
def __init__(self, annotator_class='glove', lang ='en', component_type='embedding', get_default=True, model = None, nlp_ref ='', nlu_ref ='', is_licensed=False, resolution_ref='',loaded_from_pretrained_pipe=False,do_ref_checks=True ): if do_ref_checks: if 'use' in nlu_ref or 'tfhub_use' in nlp_ref: annotator_class = 'use' # first check for sentence then token embeddings. elif 'bert' in nlp_ref and 'albert' not in nlp_ref and 'sent' in nlp_ref : annotator_class= 'sentence_bert' elif 'bert' in nlu_ref and 'albert' not in nlu_ref and 'sent' in nlu_ref : annotator_class= 'sentence_bert' elif 'elmo' in nlp_ref : annotator_class= 'elmo' elif 'elmo' in nlu_ref : annotator_class= 'elmo' elif 'electra' in nlp_ref and 'sent' in nlp_ref : annotator_class= 'sentence_bert' elif 'electra' in nlu_ref and 'sent' in nlu_ref : annotator_class= 'sentence_bert' elif 'bert' in nlu_ref and 'albert' not in nlu_ref: annotator_class= 'bert' elif 'bert' in nlp_ref and 'albert' not in nlp_ref: annotator_class= 'bert' elif 'electra' in nlu_ref or 'electra' in nlp_ref: annotator_class= 'bert' elif 'labse' in nlu_ref or 'labse' in nlp_ref: annotator_class= 'sentence_bert' elif 'tfhub' in nlu_ref or 'tfhub' in nlp_ref: annotator_class= 'use' elif 'glove' in nlu_ref or 'glove' in nlp_ref : annotator_class = 'glove' elif 'cc_300d' in nlu_ref or 'cc_300d' in nlp_ref : annotator_class = 'glove' elif 'albert' in nlu_ref or 'albert' in nlp_ref : annotator_class = 'albert' elif 'xlnet' in nlu_ref or 'xlnet' in nlp_ref : annotator_class = 'xlnet' # Default component models for nlu actions that dont specify a particular model elif 'embed_sentence' in nlu_ref : annotator_class = 'glove' elif 'embed' in nlu_ref : annotator_class = 'glove' if model != None : self.model = model else : # Check if this lang has embeddings, if NOT set to multi lang xx! multi_lang_embeds = ['th'] if lang in multi_lang_embeds : lang ='xx' if 'albert' in annotator_class : from nlu import SparkNLPAlbert if get_default: self.model = SparkNLPAlbert.get_default_model() else : self.model = SparkNLPAlbert.get_pretrained_model(nlp_ref, lang) elif 'bert' in annotator_class and 'sent' in annotator_class : from nlu import BertSentence if get_default : self.model = BertSentence.get_default_model() elif is_licensed : self.model = BertSentence.get_pretrained_model(nlp_ref, lang,'clinical/models' ) else : self.model = BertSentence.get_pretrained_model(nlp_ref, lang) elif 'electra' in annotator_class and 'sent' in annotator_class : from nlu import BertSentence if get_default : self.model = BertSentence.get_default_model() elif is_licensed : self.model = BertSentence.get_pretrained_model(nlp_ref, lang,'clinical/models' ) else : self.model = BertSentence.get_pretrained_model(nlp_ref, lang) elif 'bert' in annotator_class : from nlu import SparkNLPBert if get_default : self.model = SparkNLPBert.get_default_model() elif is_licensed : self.model = SparkNLPBert.get_pretrained_model(nlp_ref, lang,'clinical/models' ) else : self.model = SparkNLPBert.get_pretrained_model(nlp_ref, lang) elif 'elmo' in annotator_class : from nlu import SparkNLPElmo if get_default : self.model = SparkNLPElmo.get_default_model() else : self.model =SparkNLPElmo.get_pretrained_model(nlp_ref, lang) elif 'xlnet' in annotator_class : from nlu import SparkNLPXlnet if get_default : self.model = SparkNLPXlnet.get_default_model() else : self.model = SparkNLPXlnet.get_pretrained_model(nlp_ref, lang) elif 'use' in annotator_class : from nlu import SparkNLPUse if get_default : self.model = SparkNLPUse.get_default_model() else : self.model = SparkNLPUse.get_pretrained_model(nlp_ref, lang) elif 'glove' in annotator_class : from nlu import Glove if annotator_class == 'glove' and get_default==True: self.model = Glove.get_default_model() else : if get_default : self.model = Glove.get_default_model() elif is_licensed : self.model = Glove.get_pretrained_model(nlp_ref,lang,'clinical/models') else : if nlp_ref == 'glove_840B_300' or nlp_ref== 'glove_6B_300': # if lang=='en' and nlp_ref=='glove_6B_300': #special case lang = 'xx' # For these particular Glove embeddings, anyreference to them is actually the reference to the multilingual onces self.model = Glove.get_pretrained_model(nlp_ref, lang) else : self.model = Glove.get_pretrained_model(nlp_ref, lang) SparkNLUComponent.__init__(self, annotator_class, component_type,nlu_ref,nlp_ref,lang)
def __init__(self, annotator_class='sentiment_dl', language='en', component_type='classifier', get_default=True, model=None, nlp_ref='', nlu_ref='', trainable=False, is_licensed=False, do_ref_checks=True, lang='en', loaded_from_pretrained_pipe=False): if do_ref_checks: if 'e2e' in nlu_ref or 'toxic' in nlu_ref: annotator_class = 'multi_classifier' elif 'e2e' in nlp_ref or 'toxic' in nlp_ref: annotator_class = 'multi_classifier' elif 'multiclassifierdl' in nlp_ref: annotator_class = 'multi_classifier' elif 'classifierdl' in nlp_ref: annotator_class = 'classifier_dl' elif 'yake' in nlu_ref: annotator_class = 'yake' elif 'yake' in nlp_ref: annotator_class = 'yake' elif 'sentimentdl' in nlp_ref: annotator_class = 'sentiment_dl' elif 'vivekn' in nlp_ref or 'vivekn' in nlp_ref: annotator_class = 'vivekn_sentiment' elif 'wiki_' in nlu_ref or 'wiki_' in nlp_ref: annotator_class = 'language_detector' elif 'pos' in nlu_ref and 'ner' not in nlu_ref: annotator_class = 'pos' elif 'pos' in nlp_ref and 'ner' not in nlp_ref: annotator_class = 'pos' elif 'icd' in nlu_ref: annotator_class = 'classifier_dl' elif 'med_ner' in nlu_ref: annotator_class = 'ner_healthcare' elif 'ner' in nlu_ref: annotator_class = 'ner' elif 'ner' in nlp_ref: annotator_class = 'ner' if model != None: self.model = model else: if 'sentiment' in annotator_class and 'vivekn' not in annotator_class: from nlu import SentimentDl if trainable: self.model = SentimentDl.get_default_trainable_model() elif is_licensed: self.model = SentimentDl.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = SentimentDl.get_default_model() else: self.model = SentimentDl.get_pretrained_model( nlp_ref, language) elif 'vivekn' in annotator_class: from nlu import ViveknSentiment if get_default: self.model = ViveknSentiment.get_default_model() else: self.model = ViveknSentiment.get_pretrained_model( nlp_ref, language) elif 'ner' in annotator_class and 'ner_healthcare' not in annotator_class: from nlu import NERDL if trainable: self.model = NERDL.get_default_trainable_model() elif is_licensed: self.model = NERDL.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = NERDL.get_default_model() else: self.model = NERDL.get_pretrained_model(nlp_ref, language) elif 'ner.crf' in annotator_class: from nlu import NERDLCRF if get_default: self.model = NERDLCRF.get_default_model() else: self.model = NERDLCRF.get_pretrained_model( nlp_ref, language) elif ('classifier_dl' in annotator_class or annotator_class == 'toxic') and not 'multi' in annotator_class: from nlu import ClassifierDl if trainable: self.model = ClassifierDl.get_trainable_model() elif is_licensed: self.model = ClassifierDl.get_pretrained_model( nlp_ref, language, bucket='clinical/models') elif get_default: self.model = ClassifierDl.get_default_model() else: self.model = ClassifierDl.get_pretrained_model( nlp_ref, language) elif 'language_detector' in annotator_class: from nlu import LanguageDetector if get_default: self.model = LanguageDetector.get_default_model() else: self.model = LanguageDetector.get_pretrained_model( nlp_ref, language) elif 'pos' in annotator_class: from nlu import PartOfSpeechJsl if trainable: self.model = PartOfSpeechJsl.get_default_trainable_model() elif get_default: self.model = PartOfSpeechJsl.get_default_model() elif is_licensed: self.model = PartOfSpeechJsl.get_pretrained_model( nlp_ref, language, bucket='clinical/models') else: self.model = PartOfSpeechJsl.get_pretrained_model( nlp_ref, language) elif 'yake' in annotator_class: from nlu import Yake self.model = Yake.get_default_model() elif 'multi_classifier' in annotator_class: from nlu import MultiClassifier if trainable: self.model = MultiClassifier.get_default_trainable_model() elif get_default: self.model = MultiClassifier.get_default_model() else: self.model = MultiClassifier.get_pretrained_model( nlp_ref, language) elif 'ner_healthcare' in annotator_class: from nlu.components.classifiers.ner_healthcare.ner_dl_healthcare import NERDLHealthcare if trainable: self.model = NERDLHealthcare.get_default_trainable_model() else: self.model = NERDLHealthcare.get_pretrained_model( nlp_ref, language, bucket='clinical/models') SparkNLUComponent.__init__(self, annotator_class, component_type, nlu_ref, nlp_ref, lang, loaded_from_pretrained_pipe, is_licensed)