def study_type(study): ancestor_dict = {} exps = v5.study_to_exps(study) for exp in exps: ancestors = v5.exp_to_celltypes(exp) for celltype in v5.exp_to_celltypes(exp): ancestors += co.get_ancestors(celltype) ancestor_dict[exp] = ancestors if len(exps) == 1: ancestors = ancestor_dict[exps[0]] else: first = True for exp in ancestor_dict: if first: ancestors = set(ancestor_dict[exp]) first = False continue ancestors = ancestors | set(ancestor_dict[exp]) ancestors = list(ancestors) common = [] for term_id in co.get_terms_without_children(list(ancestors)): common.append(co.get_term_name(term_id)) common = '; '.join(common) return common
def is_type_available(celltype, study): study = v5.study_to_exps(study) for exp in v5.get_exps(): if exp in study: continue else: if celltype in v5.exp_to_celltypes(exp): return True return False
def get_exp_proportion(exp): study = v5.exp_to_study(exp) nonzero = pd.read_csv(decon_temp + 'nonzero_Weight_' + study + '.tsv', sep = '\t',index_col=0) exps = v5.study_to_exps(study) if len(exps) == 1: singleExp = True else: singleExp = False return get_proportion(exp, nonzero,singleExp)
def get_study_proportion(study): nonzero = pd.read_csv(decon_temp + 'nonzero_Weight_' + study + '.tsv', sep = '\t',index_col=0) exps = v5.study_to_exps(study) if study not in single_exp_list: singleExp = False else: singleExp = True proportion = [] for exp in exps: proportion.append(get_proportion(exp, nonzero, singleExp)) avg = np.mean(np.array(proportion)) return avg
def scatterplot(ax, exp, celltype, remove_study = False): study = v5.exp_to_study(exp) if remove_study: if not is_type_available(celltype, study): print("Cell type ({}) not provided in reference ".format(celltype) + "matrix for this study.") return query = get_query_expression(exp) exp_list = v5.study_to_exps(study) reference = get_reference_expression(celltype, exp_list, remove_study) ax.scatter(reference[0], query) ax.set(xlim=(0, 450000), ylim=(0, 450000)) diag_line, = ax.plot(ax.get_xlim(), ax.get_ylim(), ls="--", c=".3") xstick = np.arange(0, 450000 ,100000) ax.set_xticks(xstick) ax.set_yticks(xstick) ax.set(xlabel = "{} ({} experiments)".format(co.get_term_name(celltype), reference[1]), ylabel = exp)
def proportions_all(): """ Returns a dictionary with experiment accession numbers as keys and proportions correct as values. """ # update because that makes no sense proportions = {} studies = v5.get_studies() for study in studies: # try: nonzero = pd.read_csv(decon_temp + 'nonzero_Normal_' + study + '.tsv', sep='\t') exps = v5.study_to_exps(study) if len(exps) == 1: singleExp = True else: singleExp = False for exp in exps: proportions[exp] = get_proportion(exp, nonzero, singleExp) # for exp in exps: # proportions[exp] = get_proportion(exp, nonzero) # if len(exps) == 1: # print(study) # continue # else: # for exp in exps: # proportions[exp] = get_proportion(exp, nonzero) # except: # print("Shit") # pass return proportions