def dstruc_loadin(self, curs): ''' ''' sys.stderr.write("Loading Data STructure...\n") from codense.common import get_go_no2go_id, get_gene_no2gene_id, get_go_no2name, get_gene_id2gene_no, get_gene_no2go_no self.go_no2go_id = get_go_no2go_id(curs) self.go_no2go_name = get_go_no2name(curs) self.gene_no2gene_id = get_gene_no2gene_id(curs) self.gene_id2gene_no = get_gene_id2gene_no(curs) self.global_gene_to_go_dict = get_gene_no2go_no(curs) #04-01-05 the second kind in label_dict gene_no2no = {} for gene_no in self.gene_no2gene_id: gene_no2no[gene_no] = gene_no self.label_dict = {1:self.gene_no2gene_id, 2: gene_no2no} curs.execute("select gene_no,go_functions from gene") if self.type == 3: curs.execute("select array_upper(recurrence_array,1) from %s limit 1"%self.table) rows = curs.fetchall() self.no_of_datasets = int(rows[0][0]) sys.stderr.write("Done\n")
def on_button_cluster_info_clicked(self, button_cluster_info, *args): if self.curs==None: print "db_connect first" return self.no_of_datasets = int(self.entry_no_of_datasets.get_text()) self.treeview_init(self.no_of_datasets) self.gene_no2gene_id = get_gene_no2gene_id(self.curs) self.gene_no2go_no = get_gene_no2go_no(self.curs) self.window_cluster_info1.show() self.window_cluster_info2.show()
def run(self): communicator = MPI.world.duplicate() node_rank = communicator.rank free_computing_nodes = range(1,communicator.size-1) #exclude the last node if node_rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) schema_instance = form_schema_tables(self.fname, self.acc_cutoff, self.lm_bit) gene_id2no = get_gene_id2gene_no(curs) gene2enc_array = self.get_gene2enc_array(self.gim_inputfname, gene_id2no) gene2enc_array_pickle = cPickle.dumps(gene2enc_array, -1) gene_no2id = get_gene_no2gene_id(curs) gene_no2go_no = get_gene_no2go_no(curs) gene_no2id_pickle = cPickle.dumps(gene_no2id, -1) gene_no2go_no_pickle = cPickle.dumps(gene_no2go_no, -1) for node in free_computing_nodes: #send it to the computing_node communicator.send(gene2enc_array_pickle, node, 0) communicator.send(gene_no2id_pickle, communicator.size-1, 0) communicator.send(gene_no2go_no_pickle, communicator.size-1, 0) elif node_rank in free_computing_nodes: data, source, tag = communicator.receiveString(0, 0) gene2enc_array = cPickle.loads(data) #take the data elif node_rank==communicator.size-1: schema_instance = form_schema_tables(self.fname, self.acc_cutoff, self.lm_bit) data, source, tag = communicator.receiveString(0, 0) gene_no2id = cPickle.loads(data) data, source, tag = communicator.receiveString(0, 0) gene_no2go_no = cPickle.loads(data) mpi_synchronize(communicator) if node_rank == 0: curs.execute("DECLARE crs CURSOR FOR SELECT p.id, p.vertex_set, p.edge_set, p.recurrence_array,\ g.go_no_list from %s p, %s g where g.mcl_id=p.id"%(schema_instance.pattern_table, schema_instance.good_cluster_table)) input_node(communicator, curs, free_computing_nodes, self.message_size, self.report) elif node_rank in free_computing_nodes: parameter_list = [gene2enc_array, self.dataset_signature_set, self.p_value_cut_off] computing_node(communicator, parameter_list, self.computing_node_handler, report=self.report) elif node_rank==communicator.size-1: if not os.path.isdir(self.pic_output_dir): os.makedirs(self.pic_output_dir) cluster_info_instance = cluster_info() ofname = os.path.join(self.pic_output_dir, '%s_p%s'%(schema_instance.good_cluster_table, self.p_value_cut_off)) writer = csv.writer(open(ofname, 'w'), delimiter='\t') parameter_list = [self.pic_output_dir, cluster_info_instance, gene_no2id, gene_no2go_no, writer] output_node(communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report) del writer
def prepare_gene_no2go_no(self, curs): """ 04-15-05 different from get_gene_no2go_no, the value is a set. 04-27-05 only depth ==5 """ sys.stderr.write("Preparing gene_no2go_no...") #from codense.common import get_gene_no2go_no, get_go_no2depth go_no2depth = get_go_no2depth(curs) gene_no2go_no = get_gene_no2go_no(curs) gene_no2go_no_set = {} for gene_no,go_no_list in gene_no2go_no.iteritems(): gene_no2go_no_set[gene_no] = Set() for go_no in go_no_list: if go_no2depth[go_no] == 5: gene_no2go_no_set[gene_no].add(go_no) sys.stderr.write("Done.\n") return gene_no2go_no_set
def run(self): """ 10-17-05 bit control whether that setting has linear model """ schema_instance1 = form_schema_tables(self.fname1, self.acc_cutoff1, self.lm_bit1) schema_instance2 = form_schema_tables(self.fname2, self.acc_cutoff2, self.lm_bit2) (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) p_gene_id_set1 = p_gene_id_set_from_gene_p_table(curs, schema_instance1.gene_p_table) p_gene_id_set2 = p_gene_id_set_from_gene_p_table(curs, schema_instance2.gene_p_table) p_gene_id_set_total = p_gene_id_set_from_gene_p_table(curs, schema_instance2.p_gene_table) catI_set = p_gene_id_set1 - p_gene_id_set2 catII_set = p_gene_id_set1 & p_gene_id_set2 catIII_set = p_gene_id_set2 - p_gene_id_set1 catIV_set = p_gene_id_set_total-(p_gene_id_set1|p_gene_id_set2) sample_ls_ls = [] for p_gene_id_set in [catI_set, catII_set, catIII_set, catIV_set]: sample_ls_ls.append(self.sample_p_gene_id_set(p_gene_id_set, self.no_of_samples)) writer = csv.writer(open(self.ofname, 'w'), delimiter = '\t') writer.writerow(['linear model coeffs of two settings']) writer.writerow([]) writer.writerow(['No.','intercept', 'coeff1', 'coeff2', 'coeff3', 'coeff4', 'coeff5', 'intercept_p_value',\ 'coeff1_p_value', 'coeff2_p_value', 'coeff3_p_value', 'coeff4_p_value', 'coeff5_p_value',\ 'score_cut_off']) #fetch linear model coefficients pga_instance_list = [None, None] #10-17-05 default is nothing, none of them have linear model if self.bit[0] == '1': pga_instance1 = p_gene_analysis() pga_instance1.go_no2lm_results, lm_results_2d_list = pga_instance1.get_go_no2lm_results(curs, schema_instance1.lm_table) pga_instance1.general_lm_results = pga_instance1.get_general_lm_results(lm_results_2d_list) pga_instance_list[0] = pga_instance1 self.output_lm_model(curs, schema_instance1, writer) if self.bit[1] == '1': pga_instance2 = p_gene_analysis() pga_instance2.go_no2lm_results, lm_results_2d_list = pga_instance2.get_go_no2lm_results(curs, schema_instance2.lm_table) pga_instance2.general_lm_results = pga_instance2.get_general_lm_results(lm_results_2d_list) pga_instance_list[1] = pga_instance2 self.output_lm_model(curs, schema_instance2, writer) #following is for drawing graph in output_p_gene_id_list() self.gene_no2gene_id = get_gene_no2gene_id(curs) self.gene_no2go_no = get_gene_no2go_no(curs) cluster_info_instance = cluster_info() for i in range(len(sample_ls_ls)): cat_no = i+1 sys.stderr.write("Category %s...\n"%cat_no) writer.writerow(['Category %s'%cat_no]) writer.writerow([self.category_no2information[cat_no]]) cat_dir = 'cat%s'%cat_no if not os.path.isdir(cat_dir): os.makedirs(cat_dir) if i==0: #this is different, prediction only in schema_instance1, so swap it self.output_p_gene_id_list(curs, schema_instance2, schema_instance1, sample_ls_ls[i], writer, cat_dir, \ pga_instance_list[1], pga_instance_list[0], cluster_info_instance, self.simple) else: self.output_p_gene_id_list(curs, schema_instance1, schema_instance2, sample_ls_ls[i], writer, cat_dir, \ pga_instance_list[0], pga_instance_list[1], cluster_info_instance, self.simple) sys.stderr.write("End Category %s.\n"%cat_no)