def get_driver_df(driver_dict): """Function to process driver annotation results dictionary. It collapses info from different sources into one representation. Then creates dataframe from updated dictionary.""" for key, annotation in driver_dict.items(): role = set() var_type = set() source = [] source_pmid = [] reference = [] reference_source = set() tumor = [] db_tumor = [] for driver_info in annotation: # fn updating global role_set helper.populate_set(driver_info, "driver_role", role) # fn updating global variant type helper.populate_set(driver_info, "var_type", var_type) # fn updating global source_list helper.populate_list(driver_info, "source_name", source) # fn updating global source_pmid_list helper.populate_list(driver_info, "source_pmid", source_pmid) # fn updating global reference_id list helper.populate_list(driver_info, "reference_id", reference) # fn updating global reference_source list helper.populate_set(driver_info, "reference_source", reference_source) # fn updating global tumor_list helper.populate_list(driver_info, "tumor_list", tumor) # fn updating global db disease keywords helper.populate_list(driver_info, "db_tumor_repr", db_tumor) driver_role = assign_driver_role(role) agg_var_type = "|".join(list(var_type)) agg_source = "|".join(source) agg_pmid = "|".join(source_pmid) agg_ref = "|".join(reference) agg_ref_source = "|".join(list(reference_source)) agg_tumor = "|".join([t for t in tumor if t != ""]) agg_db_tumor = "|".join([t for t in db_tumor if t != ""]) # replace the value of the dict. collapse list into one dict driver_dict[key] = { "driver_role": driver_role, "source_name": agg_source, "source_pmid": agg_pmid, "reference_id": agg_ref, "reference_source": agg_ref_source, "tumor_list": agg_tumor, "db_tumor_repr": agg_db_tumor, "var_type": agg_var_type } # driver information dictionary to dataframe driver_dataframe = dict_to_dataframe(driver_dict) return driver_dataframe
# collapse info from sources into one representation temp = {} for key, annotation in cnv_mechanistic_drug_targets.items(): temp[key] = [] for db_id, info in annotation.items(): if len(info) == 1: temp[key].append(info[0]) elif db_id != "null": approval = set() drug = set() source = [] source_pmid = [] reference = [] reference_source = set() for drug_info in info: helper.populate_set(drug_info, "approval_status", approval) helper.populate_set(drug_info, "drug_name", drug) helper.populate_list(drug_info, "source_name", source) helper.populate_list(drug_info, "source_pmid", source_pmid) helper.populate_list(drug_info, "reference_id", reference) helper.populate_set(drug_info, "reference_source", reference_source) approval_status = mechanistic.assign_approval_status(approval) name = list(drug)[0] agg_source = "|".join(source) agg_pmid = "|".join(source_pmid) agg_ref = "|".join(reference) agg_ref_source = "|".join(list(reference_source)) temp[key].append({