# drugs_file = FR("../PUBMED_DATA/drugbank2606.latest.txt", # sep = "\t", suppress_newlines = True, encoding = "utf-8", skiplines = 0) fda_file = FR("../FDA/FDA_DATABASE_2018_07.txt", sep = "", suppress_newlines = True, encoding = "CP1252", skiplines = 1) strippattern = "^\"|\"$|^ +| +$" fda_file2 = FR("../FDA/FDA_DATABASE_2018_07.txt", sep = "\t", suppress_newlines = True, encoding = "CP1252", skiplines = 0, strip_chars_pattern = strippattern) # fda_dict = fda_file.as_dict(lines_askeys = True) fda_lines = fda_file.readlines() fda_dict = fda_file2.as_dict(lines_askeys = True) header,drugs_dict = drugs_file.as_dict(ret_header = True) fda_cols_retained = ["SubmissionStatusDate", "SubmissionStatus", "SponsorName", "ActiveIngredient"] app = "FDA_" header.append("HAS_FDA_ENTRY") for col in fda_cols_retained: header.append(app+col) for key in drugs_dict.keys(): # alias = ";".join([drugs_dict[key]["COMMON_DRUGBANK_ALIAS"], # drugs_dict[key]["MINED_ALIAS"]]) if drugs_dict[key]["MINED_ALIAS"] else drugs_dict[key]["COMMON_DRUGBANK_ALIAS"] alias = drugs_dict[key]["COMMON_DRUGBANK_ALIAS"] found_indexes = [] for line in fda_lines: # if alias.lower() in line.lower(): if any([a.lower() in line.lower() for a in alias.split(";")]): found_indexes.append(fda_lines.index(line))
from utils import File_Reader as FR drugbank_alias_file = FR("../PUBMED_DATA/drugbank2606.latest.txt", sep="\t", suppress_newlines=True, encoding="utf-8", skiplines=0) drugbank_dict = drugbank_alias_file.as_dict() drugbank_names = [key for key in drugbank_dict.keys()] drugbank_names.sort() for k in drugbank_names: print(k)