def check_microinfarcts(pNum): dict_entry = pf.get_files([pNum]) filename = dict_entry[pNum] #print(f'Filename: {filename}') lines = pf.open_file(filename) microinfarct_sxn = case_finder.get_microinfarct_section(lines) lines = [] for line in microinfarct_sxn: if 'FOUND' in line: lines.append(line) distinct_sentences_list = [] for line in lines: distinct_sentences = line.split('.') for sentence in distinct_sentences: sentence = sentence.replace('FINDINGS:', '') sentence = sentence.replace(r'\t', '') sentence = sentence.strip() if sentence == '': continue distinct_sentences_list.append(sentence) #print(f'{pNum} sentences: {distinct_sentences_list}') values = add_microinfarct_score(distinct_sentences_list) microinfarct_values = dict(zip(microinfarct_columns, values)) #for key, value in microinfarct_values.items(): # print(f'{key}: {value}') return list(microinfarct_values.values())
def examine_CTE(): file_list = parser_functions.get_files() for pNum, filename in file_list.items(): lines = parser_functions.open_file(filename) for line in lines: if any(x in line for x in ('hronic traumatic', 'CTE')): print(f'{pNum}: {line}')
def percent_authorship(): files = parser_functions.get_files() pNums = files.keys() author_dict = {} for pNum in pNums: try: author_dict[pNum] = parser_functions.get_author(pNum) except IndexError: continue bill_count = 0 lea_count = 0 salvo_count = 0 total_count = 0 for author in author_dict.values(): if 'Seeley' in author: bill_count += 1 if 'Grinberg' in author: lea_count += 1 if "Spina" in author: salvo_count += 1 total_count += 1 bill_percent = (bill_count/total_count)*100 salvo_percent = (salvo_count/total_count)*100 lea_percent = (lea_count/total_count)*100 print(f'Total cases: {total_count}\nBill {bill_count}, Salvo {salvo_count}, Lea {lea_count}\n{bill_percent}% Bill, {salvo_percent}% Salvo, {lea_percent}% Lea\n')
def examine_lbd(): file_list = parser_functions.get_files() lbd_specs = {} for pNum, filename in file_list.items(): lines = parser_functions.get_dx_sxn(filename) for line in lines: if 'LEWY' in line: lewy_index = lines.index(line) line = line.split(':')[1] line = line.strip() if 'DEMENTIA WITH' in line: print(f'{pNum}: {line}') if line in lbd_specs: continue else: lbd_specs[line] = pNum for item in lbd_specs.items(): print(item)
def parser_rows(): file_dict = pf.get_files(pNums_to_check) # Set working fields to column names spanning entire DDS # Create empty dataframe with parser fields as columns data = [] for pNum in file_dict.keys(): print(f' --- {pNum} ---') filename = file_dict[pNum] all_contents = pf.open_file(filename) dx_sxn = pf.get_dx_sxn(filename) grossObs = pf.get_grossObs(filename) site = 'UCSF NDBB' author = pf.get_author(pNum) ADNC_dict = pf.get_ADNC(dx_sxn) Thal_phase = ADNC_dict['Thal Phase'] AD_Braak = ADNC_dict['Braak Stage'] AD_CERAD_NP = ADNC_dict['CERAD NP Score'] AD_CERAD_DP = ADNC_dict['CERAD DP Score'] NIAReag = ADNC_dict['NIA-Reagan'] CAA = pf.get_CAA(dx_sxn) ADNC_level = ADNC_dict['ADNC level'] LBD = pf.get_lbd_stage(dx_sxn) PD_Braak = pf.get_PDBraak(dx_sxn) ATAC = pf.get_ATAC(dx_sxn) #CTE = pf.get_CTE(dx_sxn) #HS = 'NA' #HS_laterality = 'NA' Arterio = pf.get_arterio(dx_sxn) Athero = pf.get_athero(grossObs) #TDP_proteinopathy = 'NA' AGD = pf.get_AGD(dx_sxn) HD = pf.get_huntington(dx_sxn) microinfarcts = check_microinfarcts(pNum) parser_values = [ pNum, site, author, Thal_phase, AD_Braak, AD_CERAD_NP, AD_CERAD_DP, NIAReag, CAA, ADNC_level, LBD, PD_Braak, ATAC, Arterio, Athero, AGD, HD ] + microinfarcts data.append(parser_values) working_data = dict(zip(working_fields, parser_values)) print(working_data) """ primDx_list = pf.get_PrimDx(dx_sxn) print(f'\nNumber of primary diagnoses: {len(primDx_list)}') for counter, dx in enumerate(primDx_list, 1): print(f'{counter}: {dx}') contributingDx_list = pf.get_ContributingDx(dx_sxn) print(f'\nNumber of contributing diagnoses: {len(contributingDx_list)}') for counter, dx in enumerate(contributingDx_list, 1): print(f'{counter}: {dx}') incidentalDx_list = pf.get_IncidentalDx(dx_sxn) print(f'\nNumber of incidental diagnoses: {len(incidentalDx_list)}') for counter, dx in enumerate(incidentalDx_list, 1): print(f'{counter}: {dx}') """ working_df = pd.DataFrame(data, columns=working_fields) #print(working_df) book = load_workbook(error_checking_sheet) with pd.ExcelWriter(error_checking_sheet, engine='openpyxl') as writer: writer.book = book working_df.to_excel(writer, 'parser values') return working_df
def examine_vbi_regions(): all_vbi_terms = [] distinct_sentences_list = [] for pNum in examine_vbi(): dict_entry = parser_functions.get_files([pNum]) filename = dict_entry[pNum] print(f'Filename: {filename}') lines = parser_functions.open_file(filename) lines = get_microinfarct_section(lines) print(lines) for line in lines: line = line.upper() if any(x in line for x in microinfarct_phrases): print(line) distinct_sentences = line.split('.') for sentence in distinct_sentences: distinct_sentences_list.append(sentence) print(f'Sentence: {sentence}') distinct_clauses = sentence.split('AND') for clause in distinct_clauses: print(f'Clause: {clause}') distinct_terms = clause.split(',') for term in distinct_terms: print(f'Term: {term}') all_vbi_terms.append(term) clean_vbi_terms = [] before_vbi_terms = [] gray_matter_terms = [] white_matter_terms = [] before_region_terms = [] after_region_terms = [] for term in all_vbi_terms: term = term.strip() if any(x in term for x in ['GRAY', 'CORTEX OF', '(CORTEX)', '(GRAY)', '(GRAY']): gray_matter_terms.append(term) if any(x in term for x in ['SUBCORTICAL', '(SUBCORTICAL', '(SUBCORTICAL)''WHITE MATTER', 'MATTER)']): white_matter_terms.append(term) if 'NO MICROINFARCTION' in term: continue elif r'FINDINGS:\t' in term: terms = term.split(r'FINDINGS:\t') if 'FOUND IN' in terms[1]: terms = term[1].split('FOUND IN') before_region_terms.append(terms[0]) for article in ['THE ', 'A ']: if article in terms[1]: terms = terms[1].split(article) clean_vbi_terms.append(terms[1].strip()) else: clean_vbi_terms.append(terms[1].strip()) elif 'FOUND IN' in term: terms = term.split('FOUND IN') before_vbi_terms.append(terms[0]) has_article = 0 for article in ['THE ', 'A ']: if article in terms[1]: has_article = 1 terms = terms[1].split(article) clean_vbi_terms.append(term.strip()) if has_article == 0: clean_vbi_terms.append(terms[1].strip()) elif term == '': continue else: clean_vbi_terms.append(term.strip()) clean_vbi_terms = set(clean_vbi_terms) before_vbi_terms = set(before_vbi_terms) gray_matter_terms = sorted(set(gray_matter_terms)) white_matter_terms = sorted(set(white_matter_terms)) #for term in sorted(clean_vbi_terms): # print(term) #print(len(clean_vbi_terms)) #for term in sorted(before_vbi_terms): # print(term) #print(len(before_vbi_terms)) print(f'Gray matter terms:') for term in gray_matter_terms: print(term) print(f'White matter terms:') for term in white_matter_terms: print(term) for sentence in set(distinct_sentences_list): print(sentence) print(len(set(distinct_sentences_list)))
pNum_list = [] section_names = [''] def pNum_input(): with open('R:/groups/seeley/Mack/NP report parser/working_pNums.csv', 'rt') as pNum_file: pNums = csv.reader(pNum_file) for pNum in pNums: pNum_list.append(pNum[0]) #The [0] is added because pNums is a list of list, the way the code is written # pNum list test print(f'pNum list:\n{pNum_list}\n') # pNum files test pNum_files = parser_functions.get_files() print(f'pNum files:\n{pNum_files}\n') ## Make dictionary with pNum + filepath filepath_dict = {value: key for key, value in pNum_files.items()} ## Print the pNums and filenames (test) for pNum in pNum_files: print(f'{pNum}: {pNum_files[pNum]}') ## Main loop def get_all_dx(): for f in pNum_files.values():