Exemplo n.º 1
0
def check_if_keyword(section):
    global removed_keywords
    removed_keywords = [normalize_arabic(word) for word in removed_keywords]
    for key in interactions_key_words:
        key = normalize_arabic(key)
        if normalize_arabic(key) in normalize_arabic(section):
            text_parts = section.split(key)
            if len(text_parts) >= 2:
                interactions_text = ' '.join(text_parts[1:])
                interactions_text = interactions_text.strip()
                possible_interactions = re.split(
                    "[(-,;\*\\.،،؛::)\n-]| و | او | و", interactions_text)
                possible_interactions = [
                    interaction.strip()
                    for interaction in possible_interactions
                    if len(interaction) > 1
                ]

                interactions = []
                for inter in possible_interactions:
                    inter = ' '.join([
                        inter_word for inter_word in inter.split(" ")
                        if not any(xs in inter_word for xs in removed_keywords)
                    ])
                    inter = re.sub(r'[^ء-يa-zA-Z \n]', '', inter)
                    if check_if_interaction(inter):
                        interactions.append(inter)
                    else:
                        pass
                        # print("out : ", inter)
                return interactions

    return ""
Exemplo n.º 2
0
def read_human_parts_and_stop_words():
    global human_parts
    file = "Data/utils/human_parts_2.txt"
    file = pkg_resources.resource_filename(__name__, file)
    file = open(file, 'r', encoding="utf-8")
    parts = file.read()
    parts = normalize_arabic(parts)
    human_parts = re.split('[\n ]', parts)
Exemplo n.º 3
0
def insert_drug_database(drug_name):

    drug_id = 0
    if drug_name:
        drug_id = insert_drug(drug_name,normalize_arabic(drug_name))
    else:
        drug_id = insert_drug("default","default")
        drug_name = "default"

    return drug_id, drug_name
Exemplo n.º 4
0
def read_stop_words():

    global patterns
    path = "utils/stopwords.txt"
    path = pkg_resources.resource_filename(__name__, path)
    file = open(path, 'r', encoding="utf-8")
    stop_words = file.read()
    stop_words = normalize_arabic(stop_words)
    stop_words_ar = re.split('[\n ]', stop_words)
    for arr in chunks(stop_words_ar, 15):
        stop_words_arr = ' | '.join(arr)
        stop_words_arr = " " + stop_words_arr + " "
        patterns.append(re.compile(stop_words_arr))
Exemplo n.º 5
0
def extract_interactions_for_one_file(file_name, drug_id):
    global OUTPUT_DIR
    file_path = INPUT_DIR + os.path.sep + file_name
    file_path = pkg_resources.resource_filename(__name__, file_path)
    section = ''
    if os.path.isfile(file_path):
        file = open(file_path, 'r', encoding='utf8')
        section = file.read()
    else:
        print("No section found")
        return
    section = normalize_arabic(section)
    section = remove_stop_words(section)
    results = check_if_keyword(section)
    results = [result.strip() for result in results if len(result) > 1]
    OUTPUT_DIR = OUTPUT_DIR + os.path.sep + file_name
    OUTPUT_DIR = pkg_resources.resource_filename(__name__, OUTPUT_DIR)
    out_file = open(OUTPUT_DIR, 'w', encoding='utf8')
    out_file.write('\n'.join(results))
    for interaction in results:
        insert_drug_interaction(interaction, drug_id)
Exemplo n.º 6
0
def insert_drug_database(drug_name):

    drug_id = 0
    if drug_name:
        soundex = Soundex()
        metaphone = Metaphone()
        rs = RefinedSoundex()
        file_path = "utils//DRUGS_ALL_EDITTED.csv"
        file_path = pkg_resources.resource_filename(__name__, file_path)
        file = open(file_path, "r")
        section = file.read()
        parts = re.split('[\n]', section)
        min_dist = 100
        new_name = re.sub("چ", "غ", drug_name)
        new_name = re.sub("ﻏ", "غ", new_name)
        new_name = normalize_arabic(new_name)
        name_en = translate_drug_name(drug_name)
        equals = []
        min_index = -1
        min_dist_all = 100
        min_index_all = -1
        chosen = False

        for part in parts:

            if distance_words(name_en, part) == 0 or distance_words(name_en, part) == 1:
                chosen = True
                print(" Matched To ->", part)
                drug_id = insert_drug(drug_name, normalize_arabic(drug_name), part)
                return drug_id, drug_name

            dist = rs.distance(name_en, part)
            if dist <min_dist_all:
                min_dist_all = dist
                min_index_all = parts.index(part)

            if soundex.sounds_like(new_name, part) or soundex.sounds_like(name_en, part):

                if rs.distance(new_name, part) < min_dist:
                    min_dist = rs.distance(new_name, part)
                    min_index = parts.index(part)
                equals.append((part,metaphone.phonetics(part)))

        if min_index != -1:
            for equ in equals:
                if equ[1] == metaphone.phonetics(name_en) or equ == metaphone.phonetics(new_name):
                    drug_id = insert_drug(drug_name, normalize_arabic(drug_name), equ[0])
                    chosen = True
                    return drug_id, drug_name

        if not chosen and min_index != -1:
            chosen = True
            drug_id = insert_drug(drug_name, normalize_arabic(drug_name), parts[min_index])
            return drug_id, drug_name

        if not chosen:
            drug_id = insert_drug(drug_name, normalize_arabic(drug_name), parts[min_index_all])
            return drug_id, drug_name

    else:
        drug_id = insert_drug("----------", "----------", "----------")
        drug_name = "default"
        return drug_id, drug_name

    return drug_id, drug_name