示例#1
0
def check_microinfarcts(pNum):
    dict_entry = pf.get_files([pNum])
    filename = dict_entry[pNum]
    #print(f'Filename: {filename}')
    lines = pf.open_file(filename)

    microinfarct_sxn = case_finder.get_microinfarct_section(lines)
    lines = []
    for line in microinfarct_sxn:
        if 'FOUND' in line:
            lines.append(line)

    distinct_sentences_list = []

    for line in lines:
        distinct_sentences = line.split('.')
        for sentence in distinct_sentences:
            sentence = sentence.replace('FINDINGS:', '')
            sentence = sentence.replace(r'\t', '')
            sentence = sentence.strip()
            if sentence == '':
                continue
            distinct_sentences_list.append(sentence)
    #print(f'{pNum} sentences: {distinct_sentences_list}')

    values = add_microinfarct_score(distinct_sentences_list)
    microinfarct_values = dict(zip(microinfarct_columns, values))
    #for key, value in microinfarct_values.items():
    #    print(f'{key}: {value}')
    return list(microinfarct_values.values())
示例#2
0
def examine_CTE():
    file_list = parser_functions.get_files()
    for pNum, filename in file_list.items():
        lines = parser_functions.open_file(filename)
        for line in lines:
            if any(x in line for x in ('hronic traumatic', 'CTE')):
                print(f'{pNum}: {line}')
示例#3
0
def percent_authorship():
    files = parser_functions.get_files()
    pNums = files.keys()
    author_dict = {}
    for pNum in pNums:
        try:
            author_dict[pNum] = parser_functions.get_author(pNum)
        except IndexError:
            continue
    bill_count = 0
    lea_count = 0
    salvo_count = 0
    total_count = 0
    for author in author_dict.values():
        if 'Seeley' in author:
            bill_count += 1
        if 'Grinberg' in author:
            lea_count += 1
        if "Spina" in author:
            salvo_count += 1
        total_count += 1
    bill_percent = (bill_count/total_count)*100
    salvo_percent = (salvo_count/total_count)*100
    lea_percent = (lea_count/total_count)*100
    print(f'Total cases: {total_count}\nBill {bill_count}, Salvo {salvo_count}, Lea {lea_count}\n{bill_percent}% Bill, {salvo_percent}% Salvo, {lea_percent}% Lea\n')
示例#4
0
def examine_lbd():
    file_list = parser_functions.get_files()
    lbd_specs = {}

    for pNum, filename in file_list.items():
        lines = parser_functions.get_dx_sxn(filename)
        for line in lines:
            if 'LEWY' in line:
                lewy_index = lines.index(line)
                line = line.split(':')[1]
                line = line.strip()
                if 'DEMENTIA WITH' in line:
                    print(f'{pNum}: {line}')
                if line in lbd_specs:
                    continue
                else:
                    lbd_specs[line] = pNum
    for item in lbd_specs.items():
        print(item)
示例#5
0
def parser_rows():
    file_dict = pf.get_files(pNums_to_check)

    # Set working fields to column names spanning entire DDS

    # Create empty dataframe with parser fields as columns
    data = []

    for pNum in file_dict.keys():
        print(f' --- {pNum} ---')
        filename = file_dict[pNum]
        all_contents = pf.open_file(filename)
        dx_sxn = pf.get_dx_sxn(filename)
        grossObs = pf.get_grossObs(filename)
        site = 'UCSF NDBB'
        author = pf.get_author(pNum)
        ADNC_dict = pf.get_ADNC(dx_sxn)
        Thal_phase = ADNC_dict['Thal Phase']
        AD_Braak = ADNC_dict['Braak Stage']
        AD_CERAD_NP = ADNC_dict['CERAD NP Score']
        AD_CERAD_DP = ADNC_dict['CERAD DP Score']
        NIAReag = ADNC_dict['NIA-Reagan']
        CAA = pf.get_CAA(dx_sxn)
        ADNC_level = ADNC_dict['ADNC level']
        LBD = pf.get_lbd_stage(dx_sxn)
        PD_Braak = pf.get_PDBraak(dx_sxn)
        ATAC = pf.get_ATAC(dx_sxn)
        #CTE = pf.get_CTE(dx_sxn)
        #HS = 'NA'
        #HS_laterality = 'NA'
        Arterio = pf.get_arterio(dx_sxn)
        Athero = pf.get_athero(grossObs)
        #TDP_proteinopathy = 'NA'
        AGD = pf.get_AGD(dx_sxn)
        HD = pf.get_huntington(dx_sxn)
        microinfarcts = check_microinfarcts(pNum)

        parser_values = [
            pNum, site, author, Thal_phase, AD_Braak, AD_CERAD_NP, AD_CERAD_DP,
            NIAReag, CAA, ADNC_level, LBD, PD_Braak, ATAC, Arterio, Athero,
            AGD, HD
        ] + microinfarcts
        data.append(parser_values)

        working_data = dict(zip(working_fields, parser_values))
        print(working_data)
        """
        primDx_list = pf.get_PrimDx(dx_sxn)
        print(f'\nNumber of primary diagnoses: {len(primDx_list)}')
        for counter, dx in enumerate(primDx_list, 1):
            print(f'{counter}: {dx}')

        contributingDx_list = pf.get_ContributingDx(dx_sxn)
        print(f'\nNumber of contributing diagnoses: {len(contributingDx_list)}')
        for counter, dx in enumerate(contributingDx_list, 1):
            print(f'{counter}: {dx}')

        incidentalDx_list = pf.get_IncidentalDx(dx_sxn)
        print(f'\nNumber of incidental diagnoses: {len(incidentalDx_list)}')
        for counter, dx in enumerate(incidentalDx_list, 1):
            print(f'{counter}: {dx}')
        """
    working_df = pd.DataFrame(data, columns=working_fields)
    #print(working_df)
    book = load_workbook(error_checking_sheet)
    with pd.ExcelWriter(error_checking_sheet, engine='openpyxl') as writer:
        writer.book = book
        working_df.to_excel(writer, 'parser values')
    return working_df
示例#6
0
def examine_vbi_regions():

    all_vbi_terms = []
    distinct_sentences_list = []

    for pNum in examine_vbi():
        dict_entry = parser_functions.get_files([pNum])
        filename = dict_entry[pNum]
        print(f'Filename: {filename}')
        lines = parser_functions.open_file(filename)

        lines = get_microinfarct_section(lines)
        print(lines)

        for line in lines:
            line = line.upper()
            if any(x in line for x in microinfarct_phrases):
                print(line)
                distinct_sentences = line.split('.')
                for sentence in distinct_sentences:
                    distinct_sentences_list.append(sentence)
                    print(f'Sentence: {sentence}')
                    distinct_clauses = sentence.split('AND')
                    for clause in distinct_clauses:
                        print(f'Clause: {clause}')
                        distinct_terms = clause.split(',')
                        for term in distinct_terms:
                            print(f'Term: {term}')
                            all_vbi_terms.append(term)

    clean_vbi_terms = []
    before_vbi_terms = []
    gray_matter_terms = []
    white_matter_terms = []
    before_region_terms = []
    after_region_terms = []

    for term in all_vbi_terms:
        term = term.strip()
        if any(x in term for x in ['GRAY', 'CORTEX OF', '(CORTEX)', '(GRAY)', '(GRAY']):
            gray_matter_terms.append(term)
        if any(x in term for x in ['SUBCORTICAL', '(SUBCORTICAL', '(SUBCORTICAL)''WHITE MATTER', 'MATTER)']):
            white_matter_terms.append(term)
        if 'NO MICROINFARCTION' in term:
            continue
        elif r'FINDINGS:\t' in term:
            terms = term.split(r'FINDINGS:\t')
            if 'FOUND IN' in terms[1]:
                terms = term[1].split('FOUND IN')
                before_region_terms.append(terms[0])
                for article in ['THE ', 'A ']:
                    if article in terms[1]:
                        terms = terms[1].split(article)
                clean_vbi_terms.append(terms[1].strip())
            else:
                clean_vbi_terms.append(terms[1].strip())
        elif 'FOUND IN' in term:
            terms = term.split('FOUND IN')
            before_vbi_terms.append(terms[0])
            has_article = 0
            for article in ['THE ', 'A ']:
                if article in terms[1]:
                    has_article = 1
                    terms = terms[1].split(article)
                    clean_vbi_terms.append(term.strip())
            if has_article == 0:
                clean_vbi_terms.append(terms[1].strip())
        elif term == '':
            continue
        else:
            clean_vbi_terms.append(term.strip())
    clean_vbi_terms = set(clean_vbi_terms)
    before_vbi_terms = set(before_vbi_terms)
    gray_matter_terms = sorted(set(gray_matter_terms))
    white_matter_terms = sorted(set(white_matter_terms))
    #for term in sorted(clean_vbi_terms):
    #    print(term)
    #print(len(clean_vbi_terms))
    #for term in sorted(before_vbi_terms):
    #    print(term)
    #print(len(before_vbi_terms))
    print(f'Gray matter terms:')
    for term in gray_matter_terms:
        print(term)
    print(f'White matter terms:')
    for term in white_matter_terms:
        print(term)
    for sentence in set(distinct_sentences_list):
        print(sentence)
    print(len(set(distinct_sentences_list)))
示例#7
0
pNum_list = []

section_names = ['']

def pNum_input():
    with open('R:/groups/seeley/Mack/NP report parser/working_pNums.csv', 'rt') as pNum_file:
        pNums = csv.reader(pNum_file)
        for pNum in pNums:
            pNum_list.append(pNum[0]) #The [0] is added because pNums is a list of list, the way the code is written

# pNum list test
    print(f'pNum list:\n{pNum_list}\n')

# pNum files test
pNum_files = parser_functions.get_files()
print(f'pNum files:\n{pNum_files}\n')

## Make dictionary with pNum + filepath

filepath_dict = {value: key for key, value in pNum_files.items()}

## Print the pNums and filenames (test)

for pNum in pNum_files:
    print(f'{pNum}: {pNum_files[pNum]}')

## Main loop

def get_all_dx():
    for f in pNum_files.values():