示例#1
0
文件: load.py 项目: dmalmer/EMG
def merge_data_out(FILE,just_params=False):
	G=list()
	with open(FILE) as FH:
		for line in FH:
			if "#" == line[0]:
				chrom,info 			= line[1:].strip("\n").split(":")
				start_stop, N,aN 	= info.split(",")
				start,stop 			= start_stop.split("-")
				G.append(mdt.segment(chrom,int(start),int(stop),float(N), annotation_N=int(aN)))
			elif "~" == line[0]:
				G[-1].insert_model_info(line)
			elif "N:"==line[:2] or "U:"==line[:2]:
				G[-1].insert_component(line)
			elif not just_params:
				line_array 				= line.strip("\n").split(",")
				data_type,peak, data 	= line_array[0], line_array[1],",".join(line_array[2:])
				if data_type != "dbSNP":
					data 					= [(float(d.split(",")[0]),float(d.split(",")[1])) for d in data.split(":") ]
				else:
					data 					= [(float(d.split(",")[0]), d.split(",")) for d in data.split(":")  ]
				setattr(G[-1], data_type, data)
				setattr(G[-1], data_type+"_peak", bool(peak=="True"))
				if not hasattr(G[-1], "data_types"):
					setattr(G[-1], "data_types", list())
				G[-1].data_types.append(data_type)
	return G
示例#2
0
def run(merged_file, out_file_name, penality,diff_threshold):
	FHW 	= open(out_file_name+"_" + str(penality) + "_" + str(diff_threshold) ,"w"  )
	I 		= None
	with open(merged_file) as FH:
		for line in FH:
			if "#" == line[0]:
				if I is not None:
					output(I, FHW, penality,diff_threshold)
				chrom,info 			= line[1:].strip("\n").split(":")
				start_stop, N,aN 	= info.split(",")
				start,stop 			= start_stop.split("-")
				I 					= mdt.segment(chrom,int(start),int(stop),float(N), annotation_N=int(aN))
			elif "~" == line[0]:
				I.insert_model_info(line)
			elif "N:"==line[:2] or "U:"==line[:2]:
				I.insert_component(line)
	FHW.close()
示例#3
0
def run(FILE, penality, diff_threshold, out_file_name, si_thresh, l_thresh, w_thresh, pi_thresh):
    FHW = open(
        out_file_name
        + "_"
        + str(penality)
        + "_"
        + str(diff_threshold)
        + "_"
        + str(si_thresh)
        + "_"
        + str(l_thresh)
        + "_"
        + str(w_thresh)
        + "_"
        + str(pi_thresh),
        "w",
    )
    I = None
    with open(FILE) as FH:
        for line in FH:
            if "#" == line[0]:
                if I is not None:
                    ouput(I, FHW)
                chrom, info = line[1:].strip("\n").split(":")
                start_stop, N, aN = info.split(",")
                start, stop = start_stop.split("-")
                I = mdt.segment(chrom, int(start), int(stop), float(N), annotation_N=int(aN))
            elif "~" == line[0]:
                I.insert_model_info(line)
            elif "N:" == line[:2] or "U:" == line[:2]:
                I.insert_component(line)
            else:
                line_array = line.strip("\n").split(",")
                data_type, peak, data = line_array[0], line_array[1], ",".join(line_array[2:])
                if data_type != "dbSNP" and data_type != "ClinVar":
                    data = [(float(d.split(",")[0]), float(d.split(",")[1])) for d in data.split(":")]
                else:
                    data = [(float(d.split(",")[0]), d.split(",")) for d in data.split(":")]
                setattr(I, data_type, data)
                setattr(I, data_type + "_peak", bool(peak == "True"))
                if not hasattr(I, "data_types"):
                    setattr(I, "data_types", list())
                I.data_types.append(data_type)
示例#4
0
def parse_file(FILE, G):
	with open(FILE) as FH:

		header,I	= True,None
		for line in FH:
			if header:
				if "#" == line[0]:
					if I is not None:
						output(I, G)
					chrom,info 			= line[1:].strip("\n").split(":")
					start_stop, N  		= info.split(",")
					start,stop 			= start_stop.split("-")
					I 					= mdt.segment(chrom,int(start),int(stop),float(N) )
				elif "~" == line[0]:
					I.insert_model_info(line)
				elif "N:"==line[:2] or "U:"==line[:2]:
					I.insert_component(line)
			else:
				header=False