def run(self): """ """ import MySQLdb conn = MySQLdb.connect(db=self.dbname,host=self.hostname, user = self.user, passwd = self.passwd) curs = conn.cursor() snpData = SNPData(input_fname=self.input_fname, turn_into_array=1, ignore_2nd_column=1) from OutputPopulation import OutputPopulation popid2ecotypeid_ls = OutputPopulation.get_popid2ecotypeid_ls(curs, self.population_table) ecotypeid2popid = {} for popid, ecotypeid_ls in popid2ecotypeid_ls.iteritems(): for ecotypeid in ecotypeid_ls: ecotypeid2popid[ecotypeid] = popid pop_id_ls = [] rows_to_be_tossed_out = Set() for i in range(len(snpData.row_id_ls)): ecotype_id = int(snpData.row_id_ls[i]) if ecotype_id not in ecotypeid2popid: rows_to_be_tossed_out.add(i) pop_id_ls.append(None) #dont' know population, a placeholder else: pop_id_ls.append(ecotypeid2popid[ecotype_id]) snpData.strain_acc_list = snpData.row_id_ls snpData.category_list = pop_id_ls snpData.tofile(self.output_fname, rows_to_be_tossed_out = rows_to_be_tossed_out)
def run(self): import MySQLdb conn = MySQLdb.connect(db=self.dbname,host=self.hostname, user = self.user, passwd = self.passwd) curs = conn.cursor() from dbSNP2data import dbSNP2data dbSNP2data_instance = dbSNP2data(user=self.user, passwd=self.passwd, output_fname='whatever') snp_id2index, snp_id_list, snp_id2info = dbSNP2data_instance.get_snp_id2index_m(curs, self.input_table, self.snp_locus_table) #strain_id2index, strain_id_list strain_id2index, strain_id_list, nativename2strain_id, strain_id2acc, strain_id2category = dbSNP2data_instance.get_strain_id2index_m(curs, self.input_table, self.strain_info_table) #2008-06-02 stuff returned by get_strain_id2index_m is totally changed. ecotype_id2row_index = {} for strain_id, acc in strain_id2acc.iteritems(): row_index = strain_id2index[strain_id] ecotype_id2row_index[acc] = row_index #strain_id2acc, strain_id2category = dbSNP2data_instance.get_strain_id_info_m(curs, strain_id_list, self.strain_info_table) snp_id2acc = dbSNP2data_instance.get_snp_id_info_m(curs, snp_id_list, self.snp_locus_table) data_matrix = dbSNP2data_instance.get_data_matrix_m(curs, strain_id2index, snp_id2index, nt2number, self.input_table, need_heterozygous_call=1) from OutputPopulation import OutputPopulation popid2ecotypeid_ls = OutputPopulation.get_popid2ecotypeid_ls(curs, self.population_table) from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() from RemoveBadSNPs import RemoveBadSNPs RemoveBadSNPs_instance = RemoveBadSNPs() popid2strain_id_snp_id_ls = {} for popid, ecotypeid_ls in popid2ecotypeid_ls.iteritems(): if len(ecotypeid_ls)>=self.min_no_of_strains_per_pop: sys.stderr.write("Population %s\n"%popid) sub_data_matrix, new_ecotypeid_ls = self.create_sub_data_matrix(popid, data_matrix, ecotypeid_ls, ecotype_id2row_index) if len(new_ecotypeid_ls)>=self.min_no_of_strains_per_pop: sys.stderr.write("\tPopulation %s has %s strains\n"%(popid, len(new_ecotypeid_ls))) strain_id_selected, snp_id_selected = self.cleanup_one_population(FilterStrainSNPMatrix_instance, RemoveBadSNPs_instance, sub_data_matrix, new_ecotypeid_ls, snp_id_list, self.min_no_of_strains_per_pop, self.row_cutoff, self.col_cutoff, self.min_log_prob) if strain_id_selected and snp_id_selected: popid2strain_id_snp_id_ls[popid] = [strain_id_selected, snp_id_selected] if self.commit: self.create_popid2snpid_table(curs, self.output_table) self.mark_strain_id_selected(curs, popid2strain_id_snp_id_ls, self.population_table) self.submit_popid2snpid_list(curs, popid2strain_id_snp_id_ls, self.population_table, self.output_table) conn.commit()