Exemplo n.º 1
0
                     skiplines=0)

fda_file = FR("../FDA/FDA_DRUG_DATABASE_cured_cleaned.latest.txt",
              sep="\t",
              suppress_newlines=True,
              encoding="utf-8",
              skiplines=0)

drugbank_alias_file = FR(
    "../DRUGBANK/drugbank_extracted_identifiers.latest.txt",
    sep="\t",
    suppress_newlines=True,
    encoding="utf-8",
    skiplines=0)

lines = annotation_file.readlines()

annotation_header = lines.pop(0)

# print(lines[0])

drug_dict = {}

for line in lines:
    drug_dict[line[0]] = {}
    for i in range(len(annotation_header)):
        drug_dict[line[0]][annotation_header[i]] = line[i]
    drug_dict[line[0]]["FDA_APPROUVED"] = 0

problem_list = []
change_list = []

if __name__ == '__main__':

    # Load files and data
    print("reading files")
    pmid = []
    drugs_data = []

    drug_data_file = FR("../DRUG_LISTS/full_drug_list.latest.txt",
                        sep=';',
                        suppress_newlines=True,
                        skiplines=0,
                        encoding="utf-8")

    drugs_data = drug_data_file.readlines()

    # for line in drug_data_file.iter():
    # 	pmid.append(line[0])
    # 	a = line[1]
    # 	if a!="NA" or a!="":
    # 		drugs_data.append(a)

    # Parse xml file.
    print("parsing xml")
    tree = ET.parse('../DRUGBANK/drugbank_db.xml')
    root = tree.getroot()

    sample = [
        "Erlotinib", "Irinotecan", "Cisplatin", "Pembrolizumab", "Bevacizumab",
        "(4R)-limonene", "Obinutuzumab", "Rituximab"
Exemplo n.º 3
0
dlist_file = FR(
    "../DRUG_LISTS/tbl_Abst_drug_vs_symbols_match - DRUG_LIST 2018 05 17.txt",
    sep="\t",
    suppress_newlines=True,
    skiplines=1,
    strip_chars_pattern=charstrip,
    encoding="utf-16")
dmatch_file = FR(
    "../DRUG_LISTS/tbl_Abst_drug_vs_symbols_match - DRUG_MATCH 2018 05 27.txt",
    sep="\t",
    suppress_newlines=True,
    skiplines=1,
    strip_chars_pattern=charstrip,
    encoding="utf-16")

nosym = nosym_file.readlines()
supp = supp_file.readlines()
dlist = dlist_file.readlines()
dmatch = dmatch_file.readlines()

# Remove Blank lines
for f in [supp, nosym, dlist, dmatch]:
    for s in f:
        while "" in s:
            s.remove("")

final_ref = []
pool = []

# Extract lines
for f in [supp, nosym, dlist, dmatch]:
from utils import File_Reader as FR
from utils import File_Maker as FM
from utils import Task_Follower as TF
import random
import re
from string import punctuation

strippattern = "^\"|\"$"
pubmed_file = FR("../PUBMED_DATA/pubmed_data_2606.txt",
	sep = "\t", suppress_newlines = True, skiplines = 1, encoding = "CP1252",
	strip_chars_pattern = strippattern)

drugs_file = FR("../DRUG_LISTS/drug_list_2606_curated_cleaned.latest.txt",
	sep = ";", suppress_newlines = True, encoding = "utf-8")

pubmed = pubmed_file.readlines()
drugs = drugs_file.readlines()


match = {}

for article in pubmed:
	match[int(article[1])] = ("","",[],"")



# tf = TF(len(drugs))
for names in drugs:
	# tf.step()
	count=+1
	for article in pubmed:
from utils import File_Reader as FR
from utils import File_Maker as FM

annotation_file = FR("../DRUG_LISTS/drug_pivot_0_1.clean.latest.txt",
                     sep="\t",
                     suppress_newlines=True,
                     encoding="utf-8")
drugs_file = FR("../DRUG_LISTS/drug_list_2606_curated_cleaned.latest.txt",
                sep="",
                suppress_newlines=True,
                encoding="utf-8")

old_drugs = []
annotations = annotation_file.readlines()

header = annotations.pop(0)

old_drug_dict = {}
for line in annotations:
    old_drug_dict[line[0]] = {}
    for i in range(len(header)):
        val = ""
        if i in range(len(line)):
            val = line[i]
        old_drug_dict[line[0]][header[i]] = val

print(header)

for line in annotations:
    old_drugs.append(line[0])
Exemplo n.º 6
0
	sep = "\t", suppress_newlines = True, encoding = "utf-8", skiplines = 0)

# drugs_file = FR("../PUBMED_DATA/drugbank2606.latest.txt",
# 	sep = "\t", suppress_newlines = True, encoding = "utf-8", skiplines = 0)


fda_file = FR("../FDA/FDA_DATABASE_2018_07.txt",
	sep = "", suppress_newlines = True, encoding = "CP1252", skiplines = 1)

strippattern = "^\"|\"$|^ +| +$"
fda_file2 = FR("../FDA/FDA_DATABASE_2018_07.txt",
	sep = "\t", suppress_newlines = True, encoding = "CP1252", skiplines = 0, strip_chars_pattern = strippattern)


# fda_dict = fda_file.as_dict(lines_askeys = True)
fda_lines = fda_file.readlines()
fda_dict = fda_file2.as_dict(lines_askeys = True)
header,drugs_dict = drugs_file.as_dict(ret_header = True)
fda_cols_retained = ["SubmissionStatusDate", "SubmissionStatus", "SponsorName", "ActiveIngredient"]
app = "FDA_"
header.append("HAS_FDA_ENTRY")
for col in fda_cols_retained:
	header.append(app+col)
for key in drugs_dict.keys():
	# alias = ";".join([drugs_dict[key]["COMMON_DRUGBANK_ALIAS"],
	# 	drugs_dict[key]["MINED_ALIAS"]]) if drugs_dict[key]["MINED_ALIAS"] else drugs_dict[key]["COMMON_DRUGBANK_ALIAS"]
	alias = drugs_dict[key]["COMMON_DRUGBANK_ALIAS"]
	found_indexes = []
	for line in fda_lines:
		# if alias.lower() in line.lower():
		if any([a.lower() in line.lower() for a in alias.split(";")]):
Exemplo n.º 7
0
from utils import File_Reader as FR
from utils import File_Maker as FM
from utils import Task_Follower as TF
import cirpy
import pubchempy as pcp
from datetime import datetime

pivot_file = FR(
    "../PUBMED_DATA/drugs2606minedalias_with_found_identifiers.latest.txt",
    sep="\t",
    suppress_newlines=True,
    encoding="utf-8")

lines = pivot_file.readlines()
print(len(lines))

header = lines.pop(0)


def cirpy_getter(drug):
    c = cirpy.resolve(drug, 'names')
    if c:
        return ";".join(c)
    else:
        return ""


def pcp_getter(drug):
    p = pcp.get_synonyms(drug, 'name')
    if p:
        return ";".join(p[0]["Synonym"])
Exemplo n.º 8
0
from utils import File_Reader as FR
from utils import File_Maker as FM
from utils import head
import re
from datetime import datetime

annotation_file = FR("../DRUG_LISTS/drug_pivot_2606.latest.txt",
                     sep="\t",
                     suppress_newlines=True,
                     encoding="utf-8")

annotations = annotation_file.readlines()

header = annotations.pop(0)
header.append("DRUGBANK_SYNONYS_AND_PRODUCTS")
header.append("DRUGBANK_ID")
header.append("CAS_NUMBER")
header.append("UNII")
header.append("ASSOCIATED_PMID")
header.append("OLDEST_PMID")
header.append("OLDEST_DATE_OF_PUBLICATION")

print(header)

pool_data = []
pool_data.append(header)

drug_dict = {}
for line in annotations:
    drug_dict[line[0]] = {}
    for i in range(len(header)):