def get_disease_specific_drugs(parser, selected_drugs, phenotypes): import text_utilities disease_to_drugs = {} indication_to_diseases = {} for drug, indication in parser.drug_to_indication.iteritems(): if drug not in selected_drugs: continue if indication is None: continue #if any(map(lambda x: x is not None, [ exp.search(indication) for exp in exps ])): #disease = keywords[0] #disease_to_drugs.setdefault(disease, set()).add(drug) #for disease, exp in zip(phenotypes, exps): # if exp.search(indication.lower()) is not None: # disease_to_drugs.setdefault(disease, set()).add(drug) indication = indication.lower() for disease in phenotypes: #if all([ indication.find(word.strip()) != -1 for word in disease.split(",") ]): # disease_to_drugs.setdefault(disease, set()).add(drug) values = text_utilities.tokenize_disease_name(disease) #print disease, values indication_to_diseases.setdefault(indication, set()) if all([indication.find(word.strip()) != -1 for word in values]): #print disease, drug disease_to_drugs.setdefault(disease, set()).add(drug) indication_to_diseases.setdefault(indication, set()).add(disease) else: values = text_utilities.tokenize_disease_name( disease.replace("2", "II")) if all( [indication.find(word.strip()) != -1 for word in values]): disease_to_drugs.setdefault(disease, set()).add(drug) indication_to_diseases.setdefault(indication, set()).add(disease) else: values = text_utilities.tokenize_disease_name( disease.replace("1", "I")) if all([ indication.find(word.strip()) != -1 for word in values ]): disease_to_drugs.setdefault(disease, set()).add(drug) indication_to_diseases.setdefault(indication, set()).add(disease) # Print non-matching indications #! for indication, diseases in indication_to_diseases.iteritems(): if len(diseases) == 0: continue print indication.encode('ascii', 'ignore') elif indication.find(" not ") != -1 or indication.find( " except ") != -1: continue print diseases, indication.encode('ascii', 'ignore') #print disease_to_drugs["diabetes mellitus, type 2"] return disease_to_drugs
def get_disease_specific_drugs(parser, selected_drugs, phenotypes): import text_utilities disease_to_drugs = {} indication_to_diseases = {} for drug, indication in parser.drug_to_indication.iteritems(): if drug not in selected_drugs: continue if indication is None: continue #if any(map(lambda x: x is not None, [ exp.search(indication) for exp in exps ])): #disease = keywords[0] #disease_to_drugs.setdefault(disease, set()).add(drug) #for disease, exp in zip(phenotypes, exps): # if exp.search(indication.lower()) is not None: # disease_to_drugs.setdefault(disease, set()).add(drug) indication = indication.lower() for disease in phenotypes: #if all([ indication.find(word.strip()) != -1 for word in disease.split(",") ]): # disease_to_drugs.setdefault(disease, set()).add(drug) values = text_utilities.tokenize_disease_name(disease) #print disease, values indication_to_diseases.setdefault(indication, set()) if all([ indication.find(word.strip()) != -1 for word in values ]): #print disease, drug disease_to_drugs.setdefault(disease, set()).add(drug) indication_to_diseases.setdefault(indication, set()).add(disease) else: values = text_utilities.tokenize_disease_name(disease.replace("2", "II")) if all([ indication.find(word.strip()) != -1 for word in values ]): disease_to_drugs.setdefault(disease, set()).add(drug) indication_to_diseases.setdefault(indication, set()).add(disease) else: values = text_utilities.tokenize_disease_name(disease.replace("1", "I")) if all([ indication.find(word.strip()) != -1 for word in values ]): disease_to_drugs.setdefault(disease, set()).add(drug) indication_to_diseases.setdefault(indication, set()).add(disease) # Print non-matching indications #! for indication, diseases in indication_to_diseases.iteritems(): if len(diseases) == 0: continue print indication.encode('ascii','ignore') elif indication.find(" not ") != -1 or indication.find(" except ") != -1: continue print diseases, indication.encode('ascii','ignore') #print disease_to_drugs["diabetes mellitus, type 2"] return disease_to_drugs
def convert_fda_name_to_mesh(disease, mesh_name_to_ids): phenotype = None disease = disease.replace("^s", "").replace("'s","") if disease in mesh_name_to_ids: phenotype = disease return phenotype # Get words skipping disease / disorder / syndrome / plural / 's values = text_utilities.tokenize_disease_name(disease, exact=False) val_and_phenotypes = [] for mesh_name in mesh_name_to_ids: val = sum([ mesh_name.lower().find(word.strip()) != -1 for word in values ]) #print mesh_name, val if val > len(values) / 2.0: #print mesh_name, disease val_and_phenotypes.append((float(val)/len(mesh_name.split()), mesh_name)) #print values, val_and_phenotypes if len(val_and_phenotypes) > 0: val_and_phenotypes.sort() phenotype = val_and_phenotypes[-1][1] return phenotype
def convert_fda_name_to_mesh(disease, mesh_name_to_ids): phenotype = None disease = disease.replace("^s", "").replace("'s", "") if disease in mesh_name_to_ids: phenotype = disease return phenotype # Get words skipping disease / disorder / syndrome / plural / 's values = text_utilities.tokenize_disease_name(disease, exact=False) val_and_phenotypes = [] for mesh_name in mesh_name_to_ids: val = sum( [mesh_name.lower().find(word.strip()) != -1 for word in values]) #print mesh_name, val if val > len(values) / 2.0: #print mesh_name, disease val_and_phenotypes.append( (float(val) / len(mesh_name.split()), mesh_name)) #print values, val_and_phenotypes if len(val_and_phenotypes) > 0: val_and_phenotypes.sort() phenotype = val_and_phenotypes[-1][1] return phenotype