def annotate(model_class, name, target, target_column=None): if not model_class.model: model_class.model = ResourceDownloader().downloadPipeline(name, "en") if type(target) is pyspark.sql.dataframe.DataFrame: if not target_column: raise Exception("annotate() target_column arg needed when targeting a DataFrame") return model_class.model.transform(target.withColumnRenamed(target_column, "text")) elif type(target) is list or type(target) is str: pip = LightPipeline(model_class.model) return pip.annotate(target)
def pretrained(): if not BasicPipeline.model: BasicPipeline.model = ResourceDownloader().downloadPipeline("pipeline_basic", "en") return BasicPipeline.model
def pretrained(): if not BasicPipeline.model: AdvancedPipeline.model = ResourceDownloader().downloadPipeline("pipeline_vivekn", "en") return AdvancedPipeline.model
def pretrained(name="vivekn_fast", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(ViveknSentimentModel, name, language, remote_loc)
def pretrained(name="spell_fast", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(NorvigSweetingModel, name, language, remote_loc)
def pretrained(name="lemma_fast", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(LemmatizerModel, name, language, remote_loc)
def pretrained(name="pos_fast", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(PerceptronModel, name, language, remote_loc)
def pretrained(name="ner_precise", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(NerDLModel, name, language, remote_loc)
def pretrained(name="context_spell_gen", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(ContextSpellCheckerModel, name, language, remote_loc)
def pretrained(name="as_fast_lg", language="en"): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(AssertionLogRegModel, name, language)
def pretrained(name="spell_sd_fast", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(SymmetricDeleteModel, name, language, remote_loc)
def pretrained(name="ner_fast", language="en"): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(NerCrfModel, name, language)
def runTest(self): ResourceDownloader.showPublicModels() ResourceDownloader.showPublicModels("NerDLModel") ResourceDownloader.showPublicModels("NerDLModel", "en") ResourceDownloader.showPublicModels("NerDLModel", "en", "2.5.0") ResourceDownloader.showAvailableAnnotators() ResourceDownloader.showPublicPipelines() ResourceDownloader.showPublicPipelines("en") ResourceDownloader.showPublicPipelines("en", "2.5.0") ResourceDownloader.showUnCategorizedResources()
def pretrained(name="bert_uncased", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(BertEmbeddings, name, language, remote_loc)
def pretrained(name="glove_100d", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(WordEmbeddingsModel, name, language, remote_loc)
def pretrained(name="tdp_fast", language="en", remote_loc=None): from sparknlp.pretrained import ResourceDownloader return ResourceDownloader.downloadModel(TypedDependencyParserModel, name, language, remote_loc)