Python FM 예제들, utils.FM Python 예제들

예제 #1

0

파일 보기

파일: count.py 프로젝트: virgile-tellier/RESOLVED2

          encoding="utf-8")

counter = {}

for line in file.iter():
    drug = line[3]
    drug = drug.split(";")[0]
    if drug:
        if drug not in counter:
            counter[drug] = 1
        else:
            counter[drug] += 1

print(counter)

print(len(counter))

s = sorted(counter.items(), key=lambda x: x[1])[::-1]

print(s)

save_file = FM("../PUBMED_DATA/drug_counter", extension=".txt")

fp = save_file.get_filepointer()
for i in s:
    fp.write(("\t".join([i[0], str(i[1])]) + '\n'))
fp.close()

# with open("../PUBMED_DATA/drug_counter.latest.txt", 'w', encoding = "utf8") as fp:
# 	for i in s:
# 		fp.write(("\t".join([i[0],str(i[1])])+'\n'))

예제 #2

0

파일 보기

파일: fda_adder.py 프로젝트: virgile-tellier/RESOLVED2

            compacted_fda_dict[new_key]["drug"].lower().split(";"),
            fda_dict[key]["drug"].split(";"))
        compacted_fda_dict[new_key]["industry"] = setify_lists(
            compacted_fda_dict[new_key]["industry"].lower().split(";"),
            fda_dict[key]["industry"].split(";"))
        compacted_fda_dict[new_key]["date"] = setify_lists(
            compacted_fda_dict[new_key]["date"].split(";"),
            fda_dict[key]["date"].split(";"))
        compacted_fda_dict[new_key]["indication"] = setify_lists(
            compacted_fda_dict[new_key]["indication"].lower().split(";"),
            fda_dict[key]["indication"].split(";"))

# fda["drug"], fda["COMMON_DRUG_BANK_ALIAS"], fda["industry"], fda["date"], fda["min_date"]

pool_file = FM("../FDA/FDA_by_drugbank2606",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/FDA")

pool_data = []

pool_data.append(
    "FDA_APPROUVED_DRUG	COMMON_DRUG_BANK_ALIAS	INDUSTRY	INDICATION	DATES_OF_APPROVAL	FIRST_DATE_OF_APPROVAL"
)


def try_parsing_date(text):
    for fmt in ("%B %d %Y", "%b. %d %Y", "%b %Y", "%B %Y", "%B %d%Y", "%B %Y",
                "%B%d %Y", "%B%d%Y"):
        try:
            return datetime.strptime(text, fmt)
        except ValueError:

예제 #3

0

파일 보기

파일: fda_cleaner.py 프로젝트: virgile-tellier/RESOLVED2

'''
Cleaning of the fda database.
Specifically extra trailling spaces and tabs.
'''

from utils import File_Reader as FR
from utils import File_Maker as FM

charstrip = "^[ ]+|[ ]+$"
fda_file = FR("../FDA/FDA_DRUG_DATABASE_cured.txt",
              encoding="utf-16",
              sep="\t",
              strip_chars_pattern=charstrip)

fda = []
for f in fda_file.iter():
    fda.append(f[:5])

print(fda)

file = FM("../FDA/FDA_DRUG_DATABASE_cured_cleaned",
          data_stream=fda,
          extension=".txt")
file.save()

# with open("../FDA/FDA_DRUG_DATABASE_cured_cleaned.latest.txt", 'w', encoding = 'utf-8') as fp:
# 	for i in fda:
# 		fp.write("\t".join(i)+'\n')

예제 #4

0

파일 보기

파일: pubmed2606_drugs_joiner.py 프로젝트: virgile-tellier/RESOLVED2

			match[pmid][2].append(names)
			match[pmid] = (year, title, match[pmid][2], description)
		else:
			match[pmid] = (year, title, match[pmid][2] , description)
		
	# tf.step()


missing= 0
for k,v in match.items():
	if v==("","",[],""):
		missing+=1



pubmedNdrugs = FM("../PUBMED_DATA/pubmed2606_with_drugs",
	extension = ".txt", olddata_dir = "../OLD_DATA/PUBMED_DATA/")
with pubmedNdrugs.get_filepointer() as fp:
	for k,v in match.items():
		if v[2]:
			res = ""
			for i in range(len(v[2])):
				v[2][i] = ";".join(v[2][i])

			topop = []
			for i in v[2]:
				for j in v[2]:
					# " " not in j.replace(i, "")  and not " "+i+" " in v[1]
					if i in j and len(i) < len(j) and i not in topop  and "-" in j.replace(i, "") and not v[1].count(i)>1:
						topop.append(i)
			for to in topop:
				v[2].pop(v[2].index(to))

예제 #5

0

파일 보기

파일: annotated_druglist2606_to_pivot2606.py 프로젝트: virgile-tellier/RESOLVED2

        choices = [new_drugs[c] for c in index]
        print("conflict")

print(len(new_dict))

for new in new_drugs:
    if not new in new_dict:
        new_dict[new] = {}
        for i in range(len(header)):
            new_dict[new][header[i]] = ""

print(len(new_dict))

for new in new_drugs:
    new_dict[new]["DRUG_NAMES"] = new

pool_file = FM("../DRUG_LISTS/drug_pivot_2606",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/PUBMED_DATA")

pool_data = []
pool_data.append(header)

for drug, values in sorted(new_dict.items()):
    pool_data.append(values.values())

pool_file.set_datastream(pool_data)
pool_file.save()
pool_file.close()

예제 #6

0

파일 보기

파일: drug_referencer.py 프로젝트: virgile-tellier/RESOLVED2

            pool.append(item)

# #Build final result from pool
while pool:
    elems = pool.pop()
    res = []
    while elems:
        searching = elems.pop()
        if searching not in res:
            res.append(searching)
            for p in pool:
                if searching in p:
                    for i in p:
                        elems.append(i)
                    indices = [i for i, x in enumerate(pool) if x == p]
                    pool = [x for i, x in enumerate(pool) if i not in indices]
    final_ref.append(sorted(res))

csv = []
for i in final_ref:
    csv.append(";".join(i))

csv = list(set(csv))
csv.sort()

full_drug_list = FM("../DRUG_LISTS/full_drug_list", extension=".txt")

with full_drug_list.get_filepointer() as fp:
    for drug in csv:
        fp.write(drug + "\n")
    fp.close()

예제 #7

0

파일 보기

파일: xsdparser.py 프로젝트: virgile-tellier/RESOLVED2

# Parse xml file.
print("parsing xml")
tree = ET.parse('../DRUGBANK/drugbank_db_schema.xml')
root = tree.getroot()

res = []

def tree_builder(node,depth):
	for child in node:
		print(child.tag)
		res.append(depth * '\t' + child.tag)
		tree_builder(child, depth+1)

tree_builder(root, 0)

tree_tagging = FM("../DRUGBANK/tree_tagging", extension = ".txt")

with tree_tagging.get_filepointer() as fp:
	for t in res:
		fp.write(t+'\n')
	fp.close()

# def tree_builder(node):
# 	children = []
# 	for elem in node:
# 		children.append(elem.tag)
# 		next_elem = tree_builder(elem)
# 		if next_elem:
# 			children.append(next_elem)

# 	return children

예제 #8

0

파일 보기

	if min_index!=-1:
		entry["FIRST_FDA_SubmissionStatusDate"] = dates[min_index]
		entry["FIRST_FDA_SponsorName"] = sponsors[min_index]
	else:
		entry["FIRST_FDA_SubmissionStatusDate"] = ""
		entry["FIRST_FDA_SponsorName"] = ""


	entry[app+"SponsorName"] = ";".join(list(set(entry[app+"SponsorName"].split(";"))))
	entry[app+"ActiveIngredient"] = ";".join(list(set(entry[app+"ActiveIngredient"].split(";"))))
	entry[app+"SubmissionStatusDate"] = ";".join(list(set(entry[app+"SubmissionStatusDate"].split(";"))))
	entry[app+"SubmissionStatus"] = ";".join(list(set(entry[app+"SubmissionStatus"].split(";"))))


pool_file = FM("../FDA/FDA2018_by_drugbank2606",
	extension = ".txt", format = "tsv", olddata_dir = "../OLD_DATA/FDA")


pool_data = []

pool_data.append([head for head in header])

for entry in drugs_dict.values():
	pool_data.append([str(v) for v in entry.values()])


pool_file.set_datastream(pool_data)
pool_file.save()
pool_file.close()

예제 #9

0

파일 보기

파일: file_maker.py 프로젝트: virgile-tellier/RESOLVED2

from utils import File_Maker as FM

file = FM("GRAPH/test78324264.txt", replace_old=True, version_control=False)

print(file.get_filename())
print(file.get_extension())
print(file.get_savedir())
print(file.original_dir)

fp = file.get_filepointer()

fp.write("adzaf")
fp.write("sth")
fp.write("rthter")
fp.write("rthet")

fp.close()

예제 #10

0

파일 보기

                        "%m/%Y"))
                if line_min[0] is not current_min[0]:
                    pmids = [line_min[0], current_min[0]]
                    dates = [line_min[1], current_min[1]]
                    drugbank_dict[db_alias][
                        "OLDEST_DATE_OF_PUBLICATION"] = datetime.strftime(
                            dates[dates.index(min(dates))], "%m/%Y")
                    drugbank_dict[db_alias]["OLDEST_PMID"] = pmids[dates.index(
                        min(dates))]

            print(line[0])
            print(line_min)
            print(drugbank_dict[db_alias]["OLDEST_DATE_OF_PUBLICATION"])
            print(drugbank_dict[db_alias])
            print("")

        # print(drugbank_dict[db_alias])

pool_file = FM("../PUBMED_DATA/drugbank2606",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/PUBMED_DATA")

pool_data = []
pool_data.append(keys)
for drug in drugbank_dict.values():
    pool_data.append(drug.values())
print(len([v for v in drugbank_dict.values()]))
pool_file.set_datastream(pool_data)
pool_file.save()
pool_file.close()

예제 #11

0

파일 보기

pubmed_dict = {}
for line in pubmed_file.iter():
    pubmed_dict[line[1]] = " ".join([line[4], line[3]])

for drug in drug_dict.values():
    if drug["ASSOCIATED_PMID"]:
        pmids = drug["ASSOCIATED_PMID"].split(";")
        date_list = []
        for pmid in pmids:
            date_list.append(pubmed_dict[pmid])
        for i in range(len(date_list)):
            date_list[i] = datetime.strptime(date_list[i], "%m %Y")
        min_index = date_list.index(min(date_list))
        drug["OLDEST_PMID"] = pmids[min_index]
        drug["OLDEST_DATE_OF_PUBLICATION"] = datetime.strftime(
            date_list[min_index], "%m/%Y")

# Save stuff

pool_file = FM("../PUBMED_DATA/drugs2606minedalias_with_found_identifiers",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/PUBMED_DATA")

for drug in drug_dict.values():
    pool_data.append(drug.values())

pool_file.set_datastream(pool_data)
pool_file.save()
pool_file.close()

예제 #12

0

파일 보기

        d.extend(drugs[index].split(";"))
    drugs.append(";".join(sorted(list(set(d)))))

for d in todel:
    if d in drugs and drugs.index(d) < maxdel - len(todel):
        drugs.pop(drugs.index(d))

for d in range(len(drugs)):
    drugs[d] = ";".join(sorted(drugs[d].split(";")))

drugs = list(set(drugs))
drugs.sort()

pool_file = FM("../DRUG_LISTS/drug_list_2606_curated_cleaned",
               extension=".txt",
               format="tsv",
               olddata_dir="../OLD_DATA/DRUG_LISTS",
               encoding="utf-8")

pool_file.set_datastream(drugs)
pool_file.save()
pool_file.close()

# annotations = annotation_file.readlines()

# header = annotations.pop(0)
# drug_dict = {}
# for line in annotations:
# 	drug_dict[line[0]] = {}
# 	for i in range(len(header)):
# 		val = ""

예제 #13

0

파일 보기

파일: pubmed_drugs_joiner.py 프로젝트: virgile-tellier/RESOLVED2

                found = True
        if found:
            match[pmid] = (year, title, match[pmid][2] + names, description)

    # tf.step()

print(match[28980060])

missing = 0
for k, v in match.items():
    if v == ("", "", [], ""):
        missing += 1

print(missing)

pubmedNdrugs = FM("../PUBMED_DATA/pubmedNdrugs", extension=".txt")
with pubmedNdrugs.get_filepointer() as fp:
    for k, v in match.items():
        if v[2]:
            fp.write("\t".join([str(k), v[0], v[1], v[2][0]]) + "\n")
        else:
            fp.write("\t".join([str(k), v[0], v[1], ""]) + "\n")
    fp.close()
# with open("../PUBMED_DATA/pubmedNdrugs.latest.txt", "w", encoding = "utf-8") as fp:
# 	for k,v in match.items():
# 		if v[2]:
# 			fp.write("\t".join([str(k),v[0], v[1],v[2][0]])+"\n")
# 		else:
# 			fp.write("\t".join([str(k),v[0], v[1],""])+"\n")

noDrug = FM("../PUBMED_DATA/pubmed_data_2606_noDRUG", extension=".txt")