def stEvalInfo(d1, d2, bg): """ Compute Match,Pval between d1 & d2. Return as string. """ match = utils_graph.intersectLists([d1, d2]) pval = utils_stats.prob3(len(bg.keys()), len(d1.keys()), len(d2.keys()), len(match.keys())) return str(len(d1.keys())) + '\t' + str(len(match.keys())) + '\t' + str(pval)
def getEnrichedClusters(gmeans_clusters, target_sets, background_genes, pval_cut): """ Return {} of cluster_id to [enriched_cat, enriched_genes {}, pval] target_sets is {} of cat to genes. """ # check to make sure everything is in background for cluster in gmeans_clusters.keys(): for gene in gmeans_clusters[cluster].keys(): if not background_genes.has_key(gene): print 'cluster gene not in background genes' sys.exit(0) for target in target_sets.keys(): for gene in target_sets[target].keys(): if not background_genes.has_key(gene): print 'target set gene not in background genes' sys.exit(0) ret_clusters = {} background_len = len(background_genes.keys()) for a_set in target_sets.keys(): target_genes = target_sets[a_set] target_len = len(target_genes.keys()) for cluster in gmeans_clusters.keys(): cluster_genes = gmeans_clusters[cluster] cluster_len = len(cluster_genes.keys()) match_genes = utils_graph.intersectLists([cluster_genes, target_genes]) match_len = len(match_genes.keys()) pval = utils_stats.prob3(background_len, cluster_len, target_len, match_len) if pval < pval_cut: if not ret_clusters.has_key(cluster): ret_clusters[cluster] = {} cluster_per = int(float(100)*float(match_len)/float(cluster_len)) target_per = int(float(100)*float(match_len)/float(target_len)) ret_clusters[cluster][a_set] = [match_genes, cluster_per, target_per, pval] return ret_clusters
if hhe_vp2hp.has_key(vp): hhe = utils_graph.intersectLists([hhe_vp2hp[vp], all_hps]) hhe_len = len(hhe.keys()) preds = pred2vp2hp[predtype][vp] preds_len = len(preds.keys()) match = utils_graph.intersectLists([hhe, preds]) match_len = len(match.keys()) precision = int(round(float(100) * float(match_len) / float(preds_len))) if hhe_len > 0: recall = int(round(float(100) * float(match_len) / float(hhe_len))) else: recall = "NA" random_precision = int(round(float(100) * float(hhe_len) / float(len(all_hps.keys())))) if match_len != 0: pval = utils_stats.prob3(len(all_hps.keys()), preds_len, hhe_len, match_len) else: pval = "No Matches" fout.write( predtype + "\t" + vp + "\t" + str(hhe_len) + "\t" + str(preds_len) + "\t" + str(match_len) + "\t" + str(precision) + "\t"