def give_genelist_for_trait(trait, cutoff, exp, gxe_boolean, TFset): """ Check enriched genes for each trait for presence of regulator Perhaps write the genelists to a file for any threshold For later use... """ #print "collecting data for %s of %s with %s"%(trait, exp, cutoff) ########################################## ####Run functions towards enrichment###### ########################################## #Get genelist from eQTL l1 = marker_logp_list(trait, gxe_boolean, exp) l2 = add_chromosome_and_position(l1) l3 = retrieve_logp_above_cutoff(l2, cutoff) l4 = check_region(l3) l5 = iterate_marker_tuple(l4) l6 = set_region_from_adjacent_markers(l5, l2) l7 = combine_marker_region_data(trait, l6) gene_dict = find_genes_in_regions(l7) #Count the number of found qtls qtls = len(gene_dict) #delete all objects after they become useless #because django is a memory horder #when gc.collect() is run it will delete all unreferenced dataobjects del l1 del l2 del l3 del l4 del l5 del l6 del l7 #print "trait %s has %s eQTLs"%(trait, qtls) if qtls != 0: gene_list = read_distinct_genes(gene_dict) geneset = set(gene_list) TF_in_eQTL = TFset & geneset del TFset del geneset if TF_in_eQTL: del TF_in_eQTL print "winner winner chicken dinner!" return gene_list else: del gene_list return []
def give_genelist_for_trait(trait, cutoff, exp, gxe_boolean): """ Check enriched genes for each trait for presence of regulator Perhaps write the genelists to a file for any threshold For later use... """ eQTLsize_list = [] #print "collecting data for %s of %s with %s"%(trait, exp, cutoff) ########################################## ####Run functions towards enrichment###### ########################################## #Get genelist from eQTL l1 = marker_logp_list(trait, gxe_boolean, exp) l2 = add_chromosome_and_position(l1) l3 = retrieve_logp_above_cutoff(l2, cutoff) l4 = check_region(l3) l5 = iterate_marker_tuple(l4) l6 = set_region_from_adjacent_markers(l5, l2) l7 = combine_marker_region_data(trait, l6) gene_dict = find_genes_in_regions(l7) #Count the number of found qtls qtls = len(gene_dict) #delete all objects after they become useless #because django is a memory horder #when gc.collect() is run it will delete all unreferenced dataobjects del l1 del l2 del l3 del l4 del l5 del l6 del l7 #print "trait %s has %s eQTLs"%(trait, qtls) if qtls != 0: for key, value in gene_dict.iteritems(): region = "%s_%s - %s_%s"%(key[1], key[2], key[1], key[3]) eQTLsize_list.append([region, len(value)]) return eQTLsize_list
def give_genelist_for_trait(trait, cutoff, exp, gxe_boolean, fisher_alpha_name, mult_alpha_name, postA_name, postB_name, data_dict): """ Check enriched genes for each trait for presence of regulator Perhaps write the genelists to a file for any threshold For later use... """ ########################################## ####Run functions towards enrichment###### ########################################## #Get genelist from eQTL l1 = marker_logp_list(trait, gxe_boolean, exp) l2 = add_chromosome_and_position(l1) l3 = retrieve_logp_above_cutoff(l2, cutoff) l4 = check_region(l3) l5 = iterate_marker_tuple(l4) l6 = set_region_from_adjacent_markers(l5, l2) l7 = combine_marker_region_data(trait, l6) gene_dict = find_genes_in_regions(l7) #Count the number of found qtls qtls = len(gene_dict) #delete all objects after they become useless #because django is a memory horder #when gc.collect() is run it will delete all unreferenced dataobjects del l1 del l2 del l3 del l4 del l5 del l6 del l7 if qtls != 0: gene_list = read_distinct_genes(gene_dict) absent_genes, present_genes = segregate_gene_list(data_dict, gene_list) gene_inside_qtl_dict, gene_outside_qtl_dict, tot_in_qtl, tot_out_qtl = annotate_from_csv(data_dict, present_genes) golist_unique = unique_GO_list(gene_inside_qtl_dict) #Prepare for counted genes inside qtl: golist_in_flat = flatten_array(gene_inside_qtl_dict) c_go_in_qtl_dict = count_all_goterms(golist_in_flat) #Prepare for counted genes outside qtl: golist_out_flat = flatten_array(gene_outside_qtl_dict) c_go_out_qtl_dict = count_all_goterms(golist_out_flat) #Create contingency tables for the Fishers exact test c_array, total_genes = populate_contingency_table(c_go_in_qtl_dict, c_go_out_qtl_dict, golist_unique, tot_in_qtl, tot_out_qtl) #perform the fisher exact test on all created contingency tables fisher_python = fish_for_python(c_array) #only allow the results with p values below fisher_alpha to pass #fisher_alpha default is 0.05 unless another value is given #front end significant_info_fish = extract_significant_result_fish(fisher_python, fisher_alpha_name) #perform a multiple test enriched_golist_mult = correct_pvalues_for_multiple_testing(significant_info_fish) significant_info_mult = extract_significant_result_mult(enriched_golist_mult, mult_alpha_name) #Post processing qtl_go_dict = make_qtl_go_dict(gene_inside_qtl_dict, gene_dict) #A approved_golistA = post_process_A(enriched_golist_mult, qtl_go_dict, postA_name) #B godict_full = post_process_B(approved_golistA, gene_inside_qtl_dict, gene_outside_qtl_dict, postB_name) #go_gene_dict_full: #dict[i, go, fu_p_value, adj(fu_p_val, go_frac_scA, go_frac_scB]) = [gene list] del gene_list del absent_genes del present_genes del gene_inside_qtl_dict del gene_outside_qtl_dict #del tot_in_qtl #del tot_out_qtl del golist_unique del golist_in_flat #del c_go_in_qtl_dict del golist_out_flat #del c_go_out_qtl_dict del c_array del total_genes del fisher_python del significant_info_fish del significant_info_mult del enriched_golist_mult del qtl_go_dict del approved_golistA return godict_full, gene_dict, qtls, c_go_in_qtl_dict, c_go_out_qtl_dict, tot_in_qtl, tot_out_qtl else: return {}, {}, qtls, {}, {}, 0, 0