def ribosome_cut(species): coor_dic = sqlcommon.as_coor(species) fna_dic = sqlcommon.as_fna(coor_dic, species) if species == "hs": file_array = ['SRR970538.fdp', 'SRR970490.fdp', 'SRR970565.fdp', 'SRR970561.fdp', 'SRR970587.fdp', 'SRR970588.fdp', 'H_Rep1.fdp', 'H_Rep2.fdp', 'N_Rep1.fdp', 'N_Rep2.fdp'] else: file_array = ['SRR3208406.fdp'] f_ri_depth = "original_data/" ri_depth_dic = {} f_count = 0 # Open and read the file for i in file_array: with open(f_ri_depth + i, 'r') as fh: data = csv.reader(fh) for line in data: print i print line[0] temp_array = line[0].split('\t') temp_depth_array = [temp_array[1]] if temp_array[1] != 'NA': temp_ri_len = len(fna_dic[temp_array[0]]) + 1 for depth in line[1:temp_ri_len]: temp_depth_array.append(depth) temp_depth_str = ';'.join(temp_depth_array) count_depth = len(temp_depth_str) / 5000; for k in range(count_depth): temp_key = temp_array[0] + "_" + str(k) temp_split_str = temp_depth_str[k * 5000 : (k + 1) * 5000] if not temp_key in ri_depth_dic: ri_depth_dic[temp_key] = [] while f_count != len(ri_depth_dic[temp_key]): ri_depth_dic[temp_key].append('') ri_depth_dic[temp_key].append(temp_split_str) temp_key = temp_array[0] + "_" + str(count_depth) temp_split_str = temp_depth_str[count_depth * 5000::] if not temp_key in ri_depth_dic: ri_depth_dic[temp_key] = [] while f_count != len(ri_depth_dic[temp_key]): ri_depth_dic[temp_key].append('') ri_depth_dic[temp_key].append(temp_split_str) if len(line) != len(fna_dic[temp_array[0]]): print line f_count += 1 fh.close() return ri_depth_dic
if not coor_dic[q][0] in gene_uorf_list: gene_uorf_list.append(coor_dic[q][0]) return uorf_dic # ---------------- ---------------- ---------------- ---------------- # # Main Program # ---------------- ---------------- ---------------- ---------------- # if __name__ == "__main__": #./sql_main.py mouse species = sys.argv[1] gene_list = [] coor_dic = sqlcommon.as_coor(species) nm_list = [] for nm in coor_dic.keys(): if coor_dic[nm][1] == 0: del coor_dic[nm] else: if not coor_dic[nm][0] in gene_list: gene_list.append(coor_dic[nm][0]) if not nm in nm_list: nm_list.append(coor_dic[nm][1]) IREZone_dic = data_IRES_IREZone(species, coor_dic) uorf_dic = data_uorf_sort(coor_dic, species) all_data_dic = {}