def get_authenticated_spark_HC(HC_LICENSE, HC_SECRET, AWS_ACCESS_KEY, AWS_SECRET_KEY, gpu): import_or_install_licensed_lib(HC_SECRET, 'healthcare') authenticate_enviroment_HC(HC_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY) import sparknlp import sparknlp_jsl params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } if is_env_pyspark_2_3(): return sparknlp_jsl.start(HC_SECRET, spark23=True, gpu=gpu, public=sparknlp.version(), params=params) if is_env_pyspark_2_4(): return sparknlp_jsl.start(HC_SECRET, spark24=True, gpu=gpu, public=sparknlp.version(), params=params) if is_env_pyspark_3_0() or is_env_pyspark_3_1(): return sparknlp_jsl.start(HC_SECRET, gpu=gpu, public=sparknlp.version(), params=params) raise ValueError( f"Current Spark version {get_pyspark_version()} not supported!")
def read_spark(): session = dict() session["Spark NLP Version"] = sparknlp.version() session["Spark NLP_JSL Version"] = sparknlp_jsl.version() session["App Name"] = spark.sparkContext.getConf().getAll()[6][1] print(session) return session
def get_authenticated_spark_OCR(OCR_LICENSE, OCR_SECRET, AWS_ACCESS_KEY, AWS_SECRET_KEY, gpu): import_or_install_licensed_lib(OCR_SECRET, 'ocr') authenticate_enviroment_OCR(OCR_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY) import sparkocr import sparknlp params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } OS_version = sparknlp.version() spark = sparkocr.start( secret=OCR_SECRET, nlp_version=OS_version, ) spark.sparkContext.setLogLevel('ERROR')
async def startup_event(): event_list['0_start_up'] = datetime.now() print(f'startup has been started at {datetime.now()}...', ) with open('license.json', 'r') as f: license_keys = json.load(f) # Defining license key-value pairs as local variables locals().update(license_keys) # Adding license key-value pairs to environment variables os.environ.update(license_keys) print("Spark NLP Version :", sparknlp.version()) print("Spark NLP_JSL Version :", sparknlp_jsl.version()) global spark spark = sparknlp_jsl.start(license_keys['SECRET']) print( f'****** spark nlp healthcare version fired up {datetime.now()} ******' ) event_list['1_sparknlp_fired'] = datetime.now() ner_models_clinical, ner_models_biobert = get_models_list() print( f'***** NER clinical and biobert models are listed {datetime.now()} .....' ) event_list['2_models_listed'] = datetime.now() # load NER clinical and biobert models print(f'***** Running with GLoVe Embeddings {datetime.now()} *****') model_dict = load_sparknlp_models() event_list['3_glove_embeddings'] = datetime.now() print(f'***** Running with BioBert Embeddings {datetime.now()} *****') model_dict = load_sparknlp_models_biobert() event_list['4_biobert_embeddings'] = datetime.now() print(event_list)
def get_authenticated_spark( SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, JSL_SECRET, gpu=False, ): """ Authenticates environment if not already done so and returns Spark Context with Healthcare Jar loaded 0. If no Spark-NLP-Healthcare, install it via PyPi 1. If not auth, run authenticate_enviroment() """ import sparknlp authenticate_enviroment_HC(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) import_or_install_licensed_lib(JSL_SECRET) import sparknlp_jsl params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } if is_env_pyspark_2_3(): return sparknlp_jsl.start(JSL_SECRET, spark23=True, gpu=gpu, params=params) if is_env_pyspark_2_4(): return sparknlp_jsl.start(JSL_SECRET, spark24=True, gpu=gpu, params=params) if is_env_pyspark_3_0() or is_env_pyspark_3_1(): return sparknlp_jsl.start(JSL_SECRET, gpu=gpu, public=sparknlp.version(), params=params) raise ValueError( f"Current Spark version {get_pyspark_version()} not supported!")
def get_authenticated_spark_HC_and_OCR(HC_LICENSE, HC_SECRET, OCR_LICENSE, OCR_SECRET, AWS_ACCESS_KEY, AWS_SECRET_KEY, gpu): import_or_install_licensed_lib(HC_SECRET, 'healthcare') import_or_install_licensed_lib(OCR_SECRET, 'ocr') authenticate_enviroment_HC_and_OCR(HC_LICENSE, OCR_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY) import sparkocr import sparknlp params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } HC_version = HC_SECRET.split('-')[0] OS_version = sparknlp.version() spark = sparkocr.start(secret=OCR_SECRET, nlp_secret=HC_SECRET, nlp_version=OS_version, nlp_internal=HC_version) spark.sparkContext.setLogLevel('ERROR')
from pyspark.sql import SparkSession spark = SparkSession.builder \ .appName("Global DEMO - Spark NLP Enterprise 2.3.4") \ .master("local[*]") \ .config("spark.rdd.compress","true") \ .config("spark.driver.memory","8G") \ .config("spark.driver.maxResultSize", "2G") \ .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \ .config("spark.kryoserializer.buffer.max", "600M") \ .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.3.4") \ .getOrCreate() #spark = sparknlp.start() print("Spark NLP version: ", sparknlp.version()) print("Apache Spark version: ", spark.version) """Create some data for testing purposes""" from pyspark.sql import Row R = Row('sentence', 'start', 'end') test_data = spark.createDataFrame([R('Peter is a good person, and he was working at IBM',0,1)]) """Create a custom pipeline""" !ls from sparknlp.training import CoNLL training_data = CoNLL().readDataset(spark, 'con_rest_train.bio') training_data.show()
from sparknlp.annotator import * from sparknlp.common import RegexRule from sparknlp.base import DocumentAssembler, Finisher # COMMAND ---------- # MAGIC %md #### 2. Load SparkSession if not already there # COMMAND ---------- import sparknlp spark = sparknlp.start() print("Spark NLP version") sparknlp.version() print("Apache Spark version") spark.version # COMMAND ---------- ! rm /tmp/sentiment.parquet.zip ! rm -rf /tmp/sentiment.parquet ! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip -P /tmp ! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/lemma-corpus-small/lemmas_small.txt -P /tmp ! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/default-sentiment-dict.txt -P /tmp # COMMAND ---------- ! unzip /tmp/sentiment.parquet.zip -d /tmp/
entities.append(n.metadata['entity']) st.write('') st.write('Entities') st.dataframe(pd.DataFrame({'chunks': chunks, 'entities': entities})) #st.write(annotated_text['entities']) if 'sentence' in annotated_text.keys(): st.write('') st.write('Sentences') st.write('') st.write(annotated_text['sentence']) #st.dataframe(pd.DataFrame({'sentences':annotated_text['sentence']})) if 'sentiment' in annotated_text.keys(): st.write('') st.write('Sentiment') st.write('') st.dataframe( pd.DataFrame({ 'sentence': annotated_text['sentence'], 'sentiment': annotated_text['sentiment'] })) st.subheader('Model Output') st.write(annotated_text) st.sidebar.markdown("Spark NLP version: {}".format(sparknlp.version())) st.sidebar.markdown("Apache Spark version: {}".format(spark.version))
import zipfile # %% print("Tensorflow: " + tf.__version__) print("Keras: " + tf.keras.__version__) sys.path.append('./tflow/ner/') sys.path.append('./tflow/lib/ner/') # %% from sparknlp.annotator import * from sparknlp.common import * from sparknlp.base import * from sparknlp.embeddings import * import sparknlp print("SparkNLP: " + sparknlp.version()) # %% from embeddings_resolver import BertEmbeddingsResolver from ner_model_saver import NerModelSaver # %% CORPUS_PATH = "/home/rcuesta/TFM/es.rcs.tfm/es.rcs.tfm.corpus/" DATASET_PATH = CORPUS_PATH + "datasets/" BERT_PATH = DATASET_PATH + 'bert/' BIOBERT_PATH = DATASET_PATH + 'biobert/' SPARKNLP_BERT_MODEL_PATH = CORPUS_PATH + "models/bert" # %% spark = sparknlp.start()