def merge_data_out(FILE,just_params=False): G=list() with open(FILE) as FH: for line in FH: if "#" == line[0]: chrom,info = line[1:].strip("\n").split(":") start_stop, N,aN = info.split(",") start,stop = start_stop.split("-") G.append(mdt.segment(chrom,int(start),int(stop),float(N), annotation_N=int(aN))) elif "~" == line[0]: G[-1].insert_model_info(line) elif "N:"==line[:2] or "U:"==line[:2]: G[-1].insert_component(line) elif not just_params: line_array = line.strip("\n").split(",") data_type,peak, data = line_array[0], line_array[1],",".join(line_array[2:]) if data_type != "dbSNP": data = [(float(d.split(",")[0]),float(d.split(",")[1])) for d in data.split(":") ] else: data = [(float(d.split(",")[0]), d.split(",")) for d in data.split(":") ] setattr(G[-1], data_type, data) setattr(G[-1], data_type+"_peak", bool(peak=="True")) if not hasattr(G[-1], "data_types"): setattr(G[-1], "data_types", list()) G[-1].data_types.append(data_type) return G
def run(merged_file, out_file_name, penality,diff_threshold): FHW = open(out_file_name+"_" + str(penality) + "_" + str(diff_threshold) ,"w" ) I = None with open(merged_file) as FH: for line in FH: if "#" == line[0]: if I is not None: output(I, FHW, penality,diff_threshold) chrom,info = line[1:].strip("\n").split(":") start_stop, N,aN = info.split(",") start,stop = start_stop.split("-") I = mdt.segment(chrom,int(start),int(stop),float(N), annotation_N=int(aN)) elif "~" == line[0]: I.insert_model_info(line) elif "N:"==line[:2] or "U:"==line[:2]: I.insert_component(line) FHW.close()
def run(FILE, penality, diff_threshold, out_file_name, si_thresh, l_thresh, w_thresh, pi_thresh): FHW = open( out_file_name + "_" + str(penality) + "_" + str(diff_threshold) + "_" + str(si_thresh) + "_" + str(l_thresh) + "_" + str(w_thresh) + "_" + str(pi_thresh), "w", ) I = None with open(FILE) as FH: for line in FH: if "#" == line[0]: if I is not None: ouput(I, FHW) chrom, info = line[1:].strip("\n").split(":") start_stop, N, aN = info.split(",") start, stop = start_stop.split("-") I = mdt.segment(chrom, int(start), int(stop), float(N), annotation_N=int(aN)) elif "~" == line[0]: I.insert_model_info(line) elif "N:" == line[:2] or "U:" == line[:2]: I.insert_component(line) else: line_array = line.strip("\n").split(",") data_type, peak, data = line_array[0], line_array[1], ",".join(line_array[2:]) if data_type != "dbSNP" and data_type != "ClinVar": data = [(float(d.split(",")[0]), float(d.split(",")[1])) for d in data.split(":")] else: data = [(float(d.split(",")[0]), d.split(",")) for d in data.split(":")] setattr(I, data_type, data) setattr(I, data_type + "_peak", bool(peak == "True")) if not hasattr(I, "data_types"): setattr(I, "data_types", list()) I.data_types.append(data_type)
def parse_file(FILE, G): with open(FILE) as FH: header,I = True,None for line in FH: if header: if "#" == line[0]: if I is not None: output(I, G) chrom,info = line[1:].strip("\n").split(":") start_stop, N = info.split(",") start,stop = start_stop.split("-") I = mdt.segment(chrom,int(start),int(stop),float(N) ) elif "~" == line[0]: I.insert_model_info(line) elif "N:"==line[:2] or "U:"==line[:2]: I.insert_component(line) else: header=False