Exemplo n.º 1
0
def convert_word_digits(string_value: str):
    """
    Replace all digits in ``text`` str with ``replace_with`` str,
     i.e., 123.34 to 000.00
    """
    t2d = text2digits.Text2Digits()
    return t2d.convert(string_value)
def validateDosageValue(result_dict, ehrc_df):
    try:
        drugName = result_dict.get('medicine_name').lower()
        extracted_dosage_value = result_dict.get('dosage_value', "")

        if extracted_dosage_value != "":
            if not (is_number(extracted_dosage_value)):
                t2d = text2digits.Text2Digits()
                extracted_dosage_value = t2d.convert(
                    extracted_dosage_value).strip()

            extracted_dosage_suffix = result_dict.get('dosage_suffix', "")
            dosage_value = extracted_dosage_value.lower().replace(
                " ", "").strip() + extracted_dosage_suffix.lower().replace(
                    " ", "").strip()
            ehrc_df['MOLECULE_NAME'] = ehrc_df['MOLECULE_NAME'].str.lower()
            if dosage_value != "" and sum(ehrc_df["MOLECULE_NAME"].astype(
                    "str").str.contains(drugName)) > 0:
                correct_dosage_values = ehrc_df.loc[
                    ehrc_df['MOLECULE_NAME'] == drugName.lower(),
                    'DOSAGE'].iloc[0].split(',')
                correct_dosage_v_list = [
                    i.replace(" ", "").strip() for i in correct_dosage_values
                ]

                if dosage_value in correct_dosage_v_list:
                    return True
                else:
                    return False
            elif sum(ehrc_df["MOLECULE_NAME"].astype("str").str.contains(
                    drugName)) == 0:
                print("Molecule Not Found in Master >> " + drugName)
                return "Molecule Not Found in Master"
            else:
                return "Exception"
#                 print("Something went wrong", result_dict)
        else:
            #             print("Dosage Value Missing")
            return "Dosage Value Missing"
    except Exception as e:
        print(e)
        return "Exception"
def guessDosageDurationValue(pres_text,
                             entity_frame,
                             entity_index,
                             guess_type="dosage",
                             replace_text="xxxDosageValuexxx"):
    try:
        guessed_dosage = False

        if guess_type == "dosage":
            prev_entity = "xxxDrugNamexxx"
            current_entity_index = 1
            index_cnt = 2
        else:
            prev_entity = "xxxTIMINGxxx"
            current_entity_index = 4
            replace_text = "xxxDurationValuexxx"
            index_cnt = 5

        if entity_index[current_entity_index] == -1:
            #finding closes neighbors
            # Left Neighbor should be drug name or 0
            if pres_text.find(prev_entity) != -1:
                left_start = pres_text.find(prev_entity) + len(prev_entity)
            #Right Neighbor should be found by walking till one reaches a found entity

            while entity_index[index_cnt] == -1:
                index_cnt += 1

            entity = entity_frame[index_cnt]
            right_end = pres_text.find(entity)
            bound_text = pres_text[left_start:right_end]
            bound_text = bound_text.strip()
            #start removing separators from left and right

            ptr = 0
            while ptr < len(bound_text) and bound_text[ptr] in [
                    "/", ",", ";", "-"
            ] and len(bound_text) >= 0:
                ptr += 1
                if ptr < len(bound_text):
                    bound_text = bound_text[ptr:]
                elif ptr >= len(bound_text):
                    bound_text = ""

            ptr = len(bound_text) - 1

            while len(bound_text) > 0 and ptr < len(
                    bound_text) and bound_text[ptr] in ["/", ",", ";", "-"]:
                if ptr < len(bound_text) and ptr != 0:
                    bound_text = bound_text[:ptr]
                    ptr -= 1
                elif ptr == 0:
                    bound_text = ""
                    ptr -= 1

            #double check if it is a dosage value or not
#             print("BOUND ", bound_text, "  len" , len(bound_text))

            if len(bound_text) > 0 and is_number(bound_text):
                guessed_dosage = True
                pres_text = pres_text[0:pres_text.find(
                    bound_text)] + replace_text + pres_text[right_end:]
                return pres_text, bound_text, guessed_dosage
            elif len(bound_text) > 0:
                t2di = text2digits.Text2Digits()
                extracted_value = t2di.convert(bound_text).strip()

                if is_number(extracted_value):
                    guessed_dosage = True
                    pres_text = pres_text[0:pres_text.find(
                        bound_text)] + replace_text + pres_text[right_end:]
                    return pres_text, bound_text, guessed_dosage
                else:
                    return pres_text, "Guessed Value May Not be a Dosage/Duration", False
        else:
            return pres_text, "No need to guess, it is already there.", False
    except Exception as ee:
        print(ee)
        track = traceback.format_exc()
        print(track)

    return pres_text, "Some exception occurred", False
Exemplo n.º 4
0
def getData():
    url = request.args.get('url')
    # From Newspaper Framework getting required data
    content = Article(url)
    content.download()
    content.parse()
    title = content.title
    rawText = content.text
    # Unformatted Data to show to user
    textDisplay = rawText.split("\n\n")
    textDisplay = ''.join(textDisplay)
    # Converting numbered text to digits
    t2d = text2digits.Text2Digits()
    numText = t2d.convert(rawText)
    text = numText.split("\n\n")
    text = ''.join(text)
    # Implemented API data limit restriction
    if len(text) < 5000:
        text = text
    else:
        text = text[:5000]
    jsonData = {"text": text}
    configDataResource = os.path.join(SITE_ROOT, "data", "configdata.json")
    configData = json.load(open(configDataResource))

    # NER API call request
    headers = {
        'x-api-key': configData["X_API_KEY"],
        'Content-type': 'application/json'
    }
    ner_response = requests.post(
        configData["NAMED_ENTITY_RECOGNITION_ENDPOINT"],
        headers=headers,
        data=json.dumps(jsonData))
    # print(ner_response.text)
    # Deserializing the response
    places = lambda: None
    places.__dict__ = json.loads(ner_response.text)
    print(places.LOC)

    json_url = os.path.join(SITE_ROOT, "data", "sg-citi.json")
    data = json.load(open(json_url))

    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    LOC = []
    CASE = []
    for ent in doc.ents:
        print(ent.text, ent.start_char, ent.end_char, ent.label_)
        if ent.label_ == "CARDINAL":
            CASE.append(ent.text)
        if ent.label_ == "GPE":
            LOC.append(ent.text)

    count = []
    for i in CASE:
        if i.isdigit():
            if i not in count:
                count.append(i)
    print("COUNT: ", count)
    if not len(count):
        count = list(i for i in range(80, 500, 7))
    returnJson = {
        "text": textDisplay,
        "location": [],
        "category": ner_response.text
    }
    for i in places.LOC:
        for citi in data:
            if i in citi["name"] and citi["name"] not in returnJson["location"]:
                returnJson["location"].append({
                    "name":
                    citi["name"],
                    "lat":
                    "no1",
                    "lon":
                    "no2",
                    "count":
                    count[random.randrange(0, len(count))]
                })
                break
    print(returnJson)
    return jsonify(returnJson)
Exemplo n.º 5
0
def oblicz(sentence):
    from text2digits import text2digits
    from googletrans import Translator
    from infix import shift_infix as infix
    #tłumaczy podane zdanie
    translator = Translator()
    translated = translator.translate(sentence).text

    @infix
    def percent(a, b):
        return((a * b)/100)
    #zamienia przetłumaczone słowa opisujące liczby na liczby int
    t2d = text2digits.Text2Digits()
    converted = t2d.convert(translated)
    #zamienia słowa typu plus na operatory typu +
    def strToOperator(word):
        if word =="by" or word =="to" or word == "of" or word == "power" or word == "into" or word == "out" or word == "calculate" or word == "do" or word=="yourself" or word =="and" or word =="enbis" or word =="nbis":
            word = " "
        elif word == "times" or word == "multiply":
            word = "*"
        elif word == "minus":
            word = "-"
        elif word == "add" or word == "plus":
            word = "+"
        elif word =="divided" or word == "divide":
            word = "/"
        elif word == "the":
            word = "**"
        elif word=="percent":
            word = "<<percent>>"
        return(word)
    #rozdziela przekonwertowany wyraz converted na liste, i szuka w niej operatorów, jeśli je znajdzie to zamienia je na odpowiedni znak. Tworzy nową listę toBeGlued - z niej powstanie wyrażenie do obliczenia
    toBeGlued = []
    glue = " "
    splitted = str(converted).split()

    

    for word in splitted:
        try:
            operator = strToOperator(word)
            toBeGlued.append(operator)
        except:
            toBeGlued.append(word)
    #to skleja naszą listę toBeGlued w jednolite wyrażenie matematyczne
    
    for x in toBeGlued:
        if x == " ":
            toBeGlued.remove(x)
        else:
            pass

    if toBeGlued[0] == "+" or toBeGlued[0] == "*" or toBeGlued[0] == "/":
        test = toBeGlued[0]
        toBeGlued[0] = toBeGlued[1]
        toBeGlued[1] = test
    else:
        pass   
    joined = glue.join(toBeGlued)
    try:
        return(print(eval(str(joined)))) #ta linia printuje wynik jeżeli jest git
    except:
        #a ten kod poniżej jeśli jest błąd z procentami
        splitted = joined.split()
        new = []
        newest = []
        final = []
        glue2 = " "
        for word in splitted:
            try:
                converted = int(word)
                new.append(converted)
            except:
                new.append(word)
        for n, i in enumerate(new):
            if i == "<<percent>>":
                new[n-1] = new[n-1] * 0.01
                new[n] = " "
            else:
                pass 
        for x in new:
            try:
                y = str(x)
                newest.append(y)
            except:
                newest.append(x)
        joined = glue.join(newest)
        return(print(eval(joined)))
Exemplo n.º 6
0
def recuperarOrden(fraseGrabada, traductor):
    # Se traduce y tokeniza la frase para obtener solo las palabras con valor
    fraseFinal, fraseTraducida = escucharOrdenes(fraseGrabada, traductor)
    # print(fraseFinal)

    # Palabras que detecta + frases de respuesta para esas palabras clave
    # Dichas palabras no coinciden y sirven para identificar distintas ordenes
    LEER_LIBRO = ("read", "study", "research", "studying", "scan")
    LEER_LIBRO_OK = [
        "Cusha oreha", "Vamos a leer!", "Empecemos.",
        "Prepárate, que empezamos.", "Escucha bien.",
        "Ponte el cinturón, que llegan curvas"
    ]

    NO_ENTENDIDO = [
        "Lo siento, no te he entendido",
        "No te he entendido bien",
        "Vuelve a repetir, por fa",
        "¿Puedes repetirlo otra vez?",
        "Repítemelo otra vez, por favor",
    ]

    PAGINA_ATRAS = ("back", "come", "return", "behind")
    PAGINA_ATRAS_OK = [
        "Okey jefe, voy  para atrás", "Entendido", "Volvemos atrás", "Okey",
        "Sí", "Estupendo"
    ]

    N_PAGINA_ADELANTE = ("advance", "get", "move", "proceed", "forward",
                         "ahead")
    N_PAGINA_ADELANTE_OK = [
        "Okey, vamos adelante", "Avanzando", "Vamos", "Entendido"
    ]

    N_PAGINAS_ATRAS = ("back", "behind", "return", "behind")
    N_PAGINAS_ATRAS_OK = [
        "Entendido",
        "Vamos hacia detrás",
        "Okey, retrocedemos",
    ]

    APAGAR = ("close", "down", "night", "evening", "goodbye", "bye")
    APAGAR_OK = [
        "Nos vemos la próxima vez", "Hasta la vista, beiby", "Adiós", "bai bai"
    ]

    print("")
    print("Frase traducida :", fraseTraducida)
    print("Frase final :", fraseFinal)

    # Se inicializa el detector de numeros
    detectorNumeros = t2d.Text2Digits()

    # Se usa el detector de numeros para transformar los numeros de letras en digitos (two -> 2)
    fraseNumerada = detectorNumeros.convert(fraseTraducida)

    # Se inicializan variables relevantes para detectar cierto tipos de ordenes
    numPaginas = 0

    #print("Frase Digitalizada :", fraseNumerada)
    # Comprobamos que la frase tenga o no numeros para elegir un set de ordenes u otras
    tieneDigitos = any(map(str.isdigit, fraseNumerada))

    orden = gv.UNKNOWN
    '''
    Si algunas de las palabras de la frase contiene una palabra clave devuelve una orden.
    El orden de los if's es importante porque hay palabras mas relevantes que otras
    
    Ordenes listadas:
        - 0: No se entendio (default)
        - 1*: Parar (acción implementada dentro de otra función)
        - 2*: Ir una pagina atras (acción absorbida por Ir X paginas hacia detras)
        - 3: Leer el libro
        - 4: Ir X paginas hacia delante
        - 5: Ir X paginas hacia detras
        - 6: Apagar
    '''
    fraseRespuesta = 'Lo siento, no lo he entendido. ¿Me lo puedes repetir?'
    if not tieneDigitos:
        for palabra in fraseFinal:
            #print("palabra leyendo :", word[0].lower())
            if palabra[gv.FIRST].lower() in PAGINA_ATRAS:
                fraseRespuesta = random.choice(PAGINA_ATRAS_OK)
                orden = gv.BACK_PAGE
                numPaginas = 1
                break

            elif palabra[gv.FIRST].lower() in APAGAR:
                fraseRespuesta = random.choice(APAGAR_OK)
                orden = gv.TURN_DOWN
                break

            elif palabra[gv.FIRST].lower() in LEER_LIBRO:
                fraseRespuesta = random.choice(LEER_LIBRO_OK)
                orden = gv.READ_BOOK
                break

            else:
                orden = gv.UNKNOWN

    else:
        # Si la frase contiene un digito, se estima automaticamente que es una orden de avanzar
        # o retroceder X numero de paginas
        for palabra in fraseFinal:
            if palabra[gv.FIRST].lower() in N_PAGINA_ADELANTE:
                fraseRespuesta = random.choice(N_PAGINA_ADELANTE_OK)
                orden = gv.ADVANCE_N_PAGES

                break
            elif palabra[gv.FIRST].lower() in N_PAGINAS_ATRAS:
                fraseRespuesta = random.choice(N_PAGINAS_ATRAS_OK)
                orden = gv.BACK_N_PAGES
                break

            #default
            else:
                orden = gv.UNKNOWN

    # Si no ha encontrado ninguna coincidencia, devuelve error
    if (orden == gv.UNKNOWN):
        fraseRespuesta = random.choice(NO_ENTENDIDO)
    # Si la orden es >= 5 es una orden de tipo "mover X numero de paginas"
    elif orden >= gv.ADVANCE_N_PAGES:
        for palabra in fraseNumerada:
            if str.isdigit(palabra):
                numPaginas = int(palabra)

    #print("valor de read: ", readOrder)
    return orden, numPaginas, fraseRespuesta