def Explain(self, text, ID):
        """Does the NLP on the String and insert the result in BigQuery"""
        from sparknlp.base import LightPipeline

        print("Explain... " + ID)

        # In this case we use a Light-Pipeline. For furter information see:
        # https://medium.com/spark-nlp/spark-nlp-101-lightpipeline-a544e93f20f1
        # To set up the Pipeline the prevous downloaded Modell is needed

        lp = LightPipeline(self.Model)

        # No let the Pipeline annotate our text
        r = lp.annotate(text)

        # Create empty list
        toret = []

        # iterate through entities
        for i in r["entities"]:
            # and append it to the list with the corresponding ID
            toret.append((i, ID))

        # Check if list is not empty
        if toret != []:

            # Create for each item a row in BigQuery
            errors = self.client.insert_rows(self.table, toret)  # API request

            # Print possible errors
            if errors != []:
                print(errors)
        else:
            print("No entities found")
Exemplo n.º 2
0
 def annotate(model_class, name, target, target_column=None):
     if not model_class.model:
         model_class.model = ResourceDownloader().downloadPipeline(name, "en")
     if type(target) is pyspark.sql.dataframe.DataFrame:
         if not target_column:
             raise Exception("annotate() target_column arg needed when targeting a DataFrame")
         return model_class.model.transform(target.withColumnRenamed(target_column, "text"))
     elif type(target) is list or type(target) is str:
         pip = LightPipeline(model_class.model)
         return pip.annotate(target)
Exemplo n.º 3
0
def main():
    spark, sc = init_spark()

    tweetsDf = spark.read \
        .load("C:\\sparkTmp\\why_i_wear_mask_tweets.csv",
              format="csv", sep=",", inferSchema="true",
              header="true", charset="UTF-8") \
        .select("text")

    pipeline = simplePipeline()
    pipelineModel = pipeline.fit(tweetsDf)
    # result = pipelineModel.transform(tweetsDf)
    # result.show()

    lightModel = LightPipeline(pipelineModel, parse_embeddings=True)
    print(
        lightModel.annotate(
            "How did serfdom develop in and then leave Russia ?"))
Exemplo n.º 4
0
prediction_data.show()
prediction_model.transform(prediction_data).show(truncate=False)

prediction_model.write().overwrite().save("ner_dl_model")

!cp -r "ner_dl_model" "gdrive/My Drive/Colab Notebooks/SparkNLP/utils/ner_dl_model_base"

from pyspark.ml import PipelineModel, Pipeline

loaded_prediction_model = PipelineModel.read().load("ner_dl_model")

prediction_data = spark.createDataFrame([["Maria is a nice place."],["any bbq places open before 5 nearby"]]).toDF("text")
prediction_data.show()
#loaded_prediction_model.transform(prediction_data).show(truncate=False)
prediction = loaded_prediction_model.transform(prediction_data)
prediction.select("finished_ner_metadata").show(truncate=False)
prediction.select("finished_ner").show(truncate=False)
prediction.select("finished_ner_converter_metadata").show(truncate=False)
prediction.select("finished_ner_converter").show(truncate=False)
#prediction.select("ner").show(truncate=False)

from sparknlp.base import LightPipeline

lp = LightPipeline(loaded_prediction_model)
result = lp.annotate("Peter is a good person.")
for e in list(zip(result['token'], result['ner']))[:10]:
    print(e)

for stage in loaded_prediction_model.stages:
    print(stage)
print(loaded_prediction_model.stages[-1].stages)