示例#1
0
def get_authenticated_spark_HC(HC_LICENSE, HC_SECRET, AWS_ACCESS_KEY,
                               AWS_SECRET_KEY, gpu):
    import_or_install_licensed_lib(HC_SECRET, 'healthcare')
    authenticate_enviroment_HC(HC_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY)
    import sparknlp
    import sparknlp_jsl
    params = {
        "spark.driver.memory": "16G",
        "spark.kryoserializer.buffer.max": "2000M",
        "spark.driver.maxResultSize": "2000M"
    }

    if is_env_pyspark_2_3():
        return sparknlp_jsl.start(HC_SECRET,
                                  spark23=True,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    if is_env_pyspark_2_4():
        return sparknlp_jsl.start(HC_SECRET,
                                  spark24=True,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    if is_env_pyspark_3_0() or is_env_pyspark_3_1():
        return sparknlp_jsl.start(HC_SECRET,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    raise ValueError(
        f"Current Spark version {get_pyspark_version()} not supported!")
示例#2
0
def get_authenticated_spark(
    SPARK_NLP_LICENSE,
    AWS_ACCESS_KEY_ID,
    AWS_SECRET_ACCESS_KEY,
    JSL_SECRET,
    gpu=False,
):
    """
    Authenticates enviroment if not already done so and returns Spark Context with Healthcare Jar loaded
    0. If no Spark-NLP-Healthcare, install it via PyPi
    1. If not auth, run authenticate_enviroment()

    """

    authenticate_enviroment(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID,
                            AWS_SECRET_ACCESS_KEY)
    install_and_import_healthcare(JSL_SECRET)

    import sparknlp_jsl
    if is_env_pyspark_2_3():
        return sparknlp_jsl.start(JSL_SECRET, spark23=True, gpu=gpu)
    if is_env_pyspark_2_4():
        return sparknlp_jsl.start(JSL_SECRET, spark24=True, gpu=gpu)
    if is_env_pyspark_3_0() or is_env_pyspark_3_1():
        return sparknlp_jsl.start(JSL_SECRET, gpu=gpu)
    print(f"Current Spark version {get_pyspark_version()} not supported!")
    raise ValueError
示例#3
0
def get_authenticated_spark(
    SPARK_NLP_LICENSE,
    AWS_ACCESS_KEY_ID,
    AWS_SECRET_ACCESS_KEY,
    JSL_SECRET,
    gpu=False,
):
    """
    Authenticates environment if not already done so and returns Spark Context with Healthcare Jar loaded
    0. If no Spark-NLP-Healthcare, install it via PyPi
    1. If not auth, run authenticate_enviroment()

    """
    import sparknlp
    authenticate_enviroment_HC(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID,
                               AWS_SECRET_ACCESS_KEY)
    import_or_install_licensed_lib(JSL_SECRET)
    import sparknlp_jsl
    params = {
        "spark.driver.memory": "16G",
        "spark.kryoserializer.buffer.max": "2000M",
        "spark.driver.maxResultSize": "2000M"
    }
    if is_env_pyspark_2_3():
        return sparknlp_jsl.start(JSL_SECRET,
                                  spark23=True,
                                  gpu=gpu,
                                  params=params)
    if is_env_pyspark_2_4():
        return sparknlp_jsl.start(JSL_SECRET,
                                  spark24=True,
                                  gpu=gpu,
                                  params=params)
    if is_env_pyspark_3_0() or is_env_pyspark_3_1():
        return sparknlp_jsl.start(JSL_SECRET,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    raise ValueError(
        f"Current Spark version {get_pyspark_version()} not supported!")
示例#4
0
async def startup_event():

    event_list['0_start_up'] = datetime.now()
    print(f'startup has been started at {datetime.now()}...', )

    with open('license.json', 'r') as f:
        license_keys = json.load(f)

    # Defining license key-value pairs as local variables
    locals().update(license_keys)

    # Adding license key-value pairs to environment variables
    os.environ.update(license_keys)

    print("Spark NLP Version :", sparknlp.version())
    print("Spark NLP_JSL Version :", sparknlp_jsl.version())

    global spark

    spark = sparknlp_jsl.start(license_keys['SECRET'])
    print(
        f'****** spark nlp healthcare version fired up {datetime.now()} ******'
    )
    event_list['1_sparknlp_fired'] = datetime.now()

    ner_models_clinical, ner_models_biobert = get_models_list()
    print(
        f'***** NER clinical and biobert models are listed {datetime.now()} .....'
    )
    event_list['2_models_listed'] = datetime.now()

    # load NER clinical and biobert models
    print(f'***** Running with GLoVe Embeddings  {datetime.now()} *****')
    model_dict = load_sparknlp_models()
    event_list['3_glove_embeddings'] = datetime.now()

    print(f'***** Running with BioBert Embeddings {datetime.now()} *****')
    model_dict = load_sparknlp_models_biobert()
    event_list['4_biobert_embeddings'] = datetime.now()

    print(event_list)
示例#5
0
from pyspark.sql import functions as F

from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.base import *
import sparknlp_jsl
import sparknlp

import warnings
warnings.filterwarnings('ignore')

params = {
    "spark.driver.memory": "16G",
    "spark.kryoserializer.buffer.max": "2000M",
    "spark.driver.maxResultSize": "2000M"
}

print("Spark NLP Version :", sparknlp.version())
print("Spark NLP_JSL Version :", sparknlp_jsl.version())

spark = sparknlp_jsl.start(SECRET, params=params)

from sparknlp.pretrained import PretrainedPipeline

ner_pipeline = PretrainedPipeline("ner_model_finder", "en", "clinical/models")

result = ner_pipeline.annotate("medication")

print(100 * '-')
print(result)
print(100 * '-')
示例#6
0
print("Spark NLP Version :", sparknlp.version())
print("Spark NLP_JSL Version :", sparknlp_jsl.version())

import json
with open('/content/sparknlp_keys.json', 'r') as f:
    license_keys = json.load(f)
# with open('/home/ubuntu/hasham/jsl_keys.json', 'r') as f:
#     license_keys = json.load(f)

secret = license_keys['SECRET']
os.environ['SPARK_NLP_LICENSE'] = license_keys['SPARK_NLP_LICENSE']
os.environ['AWS_ACCESS_KEY_ID'] = license_keys['AWS_ACCESS_KEY_ID']
os.environ['AWS_SECRET_ACCESS_KEY'] = license_keys['AWS_SECRET_ACCESS_KEY']

spark = sparknlp_jsl.start(license_keys['SECRET'])


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
def load_sparknlp_models():

    print('loading pretrained models')

    sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
        .setInputCols(["document"])\
        .setOutputCol("sentence")

    embeddings_clinical = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\
        .setInputCols(["sentence","token"])\
        .setOutputCol("embeddings")
st.title("Spark NLP Clinical NER Playground")

import json
import os
from pyspark.ml import Pipeline, PipelineModel
from pyspark.sql import SparkSession

from sparknlp.annotator import *
from sparknlp_jsl.annotator import *
from sparknlp.base import *
import sparknlp_jsl
import sparknlp

import json

spark = sparknlp_jsl.start(os.environ['SECRET'])

print("Spark NLP Version :", sparknlp.version())
print("Spark NLP_JSL Version :", sparknlp_jsl.version())


@st.cache(allow_output_mutation=True, suppress_st_warning=True)
def load_sparknlp_models():

    print('loading pretrained models')

    sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
        .setInputCols(["document"])\
        .setOutputCol("sentence")

    embeddings_clinical = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\
示例#8
0
from sparknlp_display import RelationExtractionVisualizer

import warnings
warnings.filterwarnings('ignore')

params = {
    "spark.driver.memory": "16G",
    "spark.kryoserializer.buffer.max": "2000M",
    "spark.driver.maxResultSize": "2000M"
}

#uploading license keys
with open('/content/spark_nlp_for_healthcare 4.json') as f:
    license_keys = json.load(f)

spark = sparknlp_jsl.start(license_keys['SECRET'], params=params)

import streamlit as st

#app page configurations(optional):
st.set_page_config(page_title="aemintek app",
                   page_icon=":shark:",
                   layout="centered")

#adding images
col1, col2, col3 = st.columns(3)

with col1:
    st.image(
        "https://repository-images.githubusercontent.com/104670986/2e728700-ace4-11ea-9cfc-f3e060b25ddf"
    )