def Explain(self, text, ID): """Does the NLP on the String and insert the result in BigQuery""" from sparknlp.base import LightPipeline print("Explain... " + ID) # In this case we use a Light-Pipeline. For furter information see: # https://medium.com/spark-nlp/spark-nlp-101-lightpipeline-a544e93f20f1 # To set up the Pipeline the prevous downloaded Modell is needed lp = LightPipeline(self.Model) # No let the Pipeline annotate our text r = lp.annotate(text) # Create empty list toret = [] # iterate through entities for i in r["entities"]: # and append it to the list with the corresponding ID toret.append((i, ID)) # Check if list is not empty if toret != []: # Create for each item a row in BigQuery errors = self.client.insert_rows(self.table, toret) # API request # Print possible errors if errors != []: print(errors) else: print("No entities found")
def annotate(model_class, name, target, target_column=None): if not model_class.model: model_class.model = ResourceDownloader().downloadPipeline(name, "en") if type(target) is pyspark.sql.dataframe.DataFrame: if not target_column: raise Exception("annotate() target_column arg needed when targeting a DataFrame") return model_class.model.transform(target.withColumnRenamed(target_column, "text")) elif type(target) is list or type(target) is str: pip = LightPipeline(model_class.model) return pip.annotate(target)
def main(): spark, sc = init_spark() tweetsDf = spark.read \ .load("C:\\sparkTmp\\why_i_wear_mask_tweets.csv", format="csv", sep=",", inferSchema="true", header="true", charset="UTF-8") \ .select("text") pipeline = simplePipeline() pipelineModel = pipeline.fit(tweetsDf) # result = pipelineModel.transform(tweetsDf) # result.show() lightModel = LightPipeline(pipelineModel, parse_embeddings=True) print( lightModel.annotate( "How did serfdom develop in and then leave Russia ?"))
prediction_data.show() prediction_model.transform(prediction_data).show(truncate=False) prediction_model.write().overwrite().save("ner_dl_model") !cp -r "ner_dl_model" "gdrive/My Drive/Colab Notebooks/SparkNLP/utils/ner_dl_model_base" from pyspark.ml import PipelineModel, Pipeline loaded_prediction_model = PipelineModel.read().load("ner_dl_model") prediction_data = spark.createDataFrame([["Maria is a nice place."],["any bbq places open before 5 nearby"]]).toDF("text") prediction_data.show() #loaded_prediction_model.transform(prediction_data).show(truncate=False) prediction = loaded_prediction_model.transform(prediction_data) prediction.select("finished_ner_metadata").show(truncate=False) prediction.select("finished_ner").show(truncate=False) prediction.select("finished_ner_converter_metadata").show(truncate=False) prediction.select("finished_ner_converter").show(truncate=False) #prediction.select("ner").show(truncate=False) from sparknlp.base import LightPipeline lp = LightPipeline(loaded_prediction_model) result = lp.annotate("Peter is a good person.") for e in list(zip(result['token'], result['ner']))[:10]: print(e) for stage in loaded_prediction_model.stages: print(stage) print(loaded_prediction_model.stages[-1].stages)