def load_subdata(ids, sec, config): import os import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color input_file = tools.config_getpath(config, sec, "path", default = "") if os.path.exists(input_file) == False: print ("[ERROR] file is not exist. %s" % input_file) return None sept = tools.config_getstr(config, sec, "sept").replace("\\t", "\t").replace("\\n", "\n").replace("\\r", "\r") mode = tools.config_getstr(config, sec, "mode") comment = tools.config_getstr(config, sec, "comment") title = tools.config_getstr(config, sec, "title") label = [] item = [] colors_n_di = {} colors_h_di = {} for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") for i in range(len(name_set_split)): text = name_set_split[i] if i == 0: item.append(text) if len(name_set_split) == 1: label.append(text) elif i == 1: label.append(text) elif i == 2: colors_n_di[name_set_split[0]] = color.name_to_value(text) elif i == 3: colors_h_di[name_set_split[0]] = color.name_to_value(text) # fill in undefined items colors_n_di = color.create_color_dict(item, colors_n_di, color.osaka_subway_colors) colors_h_di2 = {} for key in colors_n_di: if key in colors_h_di: continue colors_h_di2[key] = color.saturation_down(colors_n_di[key]) # dict to value colors_n = [] for key in item: colors_n.append(colors_n_di[key]) if mode == "range": item.remove(item[0]) header = [] if tools.config_getboolean(config, sec, "header") == True: pos_value = -1 pos_id = -1 else: pos_value = tools.config_getint(config, sec, "col_value")-1 pos_id = tools.config_getint(config, sec, "col_ID")-1 header = ["",""] # copy id_list for find check unlookup = [] for iid in ids: unlookup.append(iid) # read data_text = "" values = [] for line in open(input_file): line = line.strip() if len(line.replace(sept, "")) == 0: continue if comment != "" and line.find(comment) == 0: continue if len(header) == 0: header = convert.text_to_list(line,sept) try: colname = tools.config_getstr(config, sec, "col_value") pos_value = header.index(colname) colname = tools.config_getstr(config, sec, "col_ID") pos_id = header.index(colname) except Exception as e: print(e.message) return None continue cols = convert.text_to_list(line,sept) if (cols[pos_id] in ids) == False: continue else: unlookup.remove(cols[pos_id]) id_pos = ids.index(cols[pos_id]) if mode == "fix": if cols[pos_value] in item: data_text += subdata_data_template.format(id = id_pos, item = item.index(cols[pos_value])) else: print("[" + sec + "] name_set: data is undefined." + cols[pos_value] + "\n") continue elif mode == "range" or mode == "gradient": try: values.append(float(cols[pos_value])) except Exception as e: print(colname + ": data type is invalid.\n" + e.message) continue data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value]) if len(unlookup) > 0: print("[WARNING] can't find IDs subplot data.") print(unlookup) if mode == "gradient" and len(values) > 0: item[0] = min(values) item[1] = max(values) return [data_text, item, colors_n, label, title]
def convert_tojs(input_file, output_file, positions, config): ''' Convert the input files to Json data and write them to the Javascript file Also write functions and methods to process those data Parameters ---------- input_file : str : The absolute path of formatted data file output_file: str : The absolute path of JavaScript file positions : dict: A nested dictionary with "must" and "option" as keys config : configparser.RawConfigParser Return ------ On success, return a dictionary: {"id_list": [...] "group_list": [...], "color": [...]} id_list : The values for id column group_list: The names of groups color : The colors in groups ''' import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import os import math # genome_size: a nested list # [ [ A chromosome number in lowercase letters, # The size of the 1st element, # The color of the 1st element, # The original name of the 1st element(that is not necessarily lowercase) or a user-defined name, ], ... ] genome_size = load_genome_size(config) if len(genome_size) == 0: return None # genome: dictionary-style string like this # {"chr":"00", "size":249250621, "color":"#BBBBBB", "label":"1",}, # {"chr":"01", "size":243199373, "color":"#BBBBBB", "label":"2",}, # ... # chr : Sequential number # size : Size corresponding to the label # color: Color corresponding to the label # label: Name corresponding to chromosome genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3]) # Create a data frame that has title and data attributions # title is a list like ['Break1', 'Break2', 'Chr1', 'Chr2', 'Sample'] # data is a nested list like [[16019088, 62784483, '14', '12', 'SAMPLE1'], ...] try: df = data_frame.load_file( input_file, header=1, sept=tools.config_getstr(config, "result_format_ca", "sept"), comment=tools.config_getstr(config, "result_format_ca", "comment") ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # Create groups, labels, and colors_n # cols_di: a dictionary that merges must and option values # : ex) {'chr1': 'Chr1', 'break1': 'Break1', 'chr2': 'Chr2', 'break2': 'Break2', 'id': 'Sample'} cols_di = merge.position_to_dict(positions) if "group" in cols_di: for i in range(len(df.data)): # A title may be stored in cols_di["group"] group_pos = df.name_to_index(cols_di["group"]) # Get group(title) index group = df.data[i][group_pos] # Get group(title) value for row i # Modify group value df.data[i][group_pos] = group.replace(" ", "_") if group == "": df.data[i][group_pos] = "_blank_" # groups : list: group names # labels : list: group names # colors_n: list: color values for groups [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter-chromosome", "Intra-chromosome"] colors_n = ["#9E4A98", "#51BF69"] # purple, green # Create group_text that is a dictionary-style string with name, label, color conbined = [] for i in range(len(groups)): conbined.append(group_template.format(name=groups[i], label=labels[i], color=colors_n[i])) group_text = ",".join(conbined) # id_list: Values for "id" column # : Sorted without duplicates id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] # iid: column value for "id" title if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # option_keys: Store the option keys of the positions dictionary option_keys = tools.dict_keys(cols_di) # option_keys: list: sorted keys of cols_di option_keys.remove("id") # option key option_keys.remove("chr1") # must key option_keys.remove("break1") # must key option_keys.remove("chr2") # must key option_keys.remove("break2") # must key if "group" in option_keys: option_keys.remove("group") # option key # node_size: Size to divide chromosomes node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000) # Write header and dataset of JavaScript file f = open(output_file, "w") f.write(js_header + js_dataset.format( node_size_detail=calc_node_size(genome_size, 500), # node size for detailed thumbnails node_size_thumb=calc_node_size(genome_size, 250), # node size for rough thumbnails node_size_select=node_size_select, # node size for bar graph genome_size=genome, # A dictionary-style string containing keys of "chr", "size", "color", and "label" IDs=convert.list_to_text(id_list), # A comma-separated string of id column values group=group_text, # A dictionary-style string containing keys of "name", "label", and "color" tooltip=convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), # A dictionary-style string containing keys of "name", "label", and "color" link_header=convert.list_to_text(option_keys), )) # Write link of JavaScript file f.write(js_links_1) # Write the leading part data_links = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] # iid: the value of "id" column # Ignore empty string if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) # chromosome1 pos1 = row[df.name_to_index(cols_di["break1"])] # break point1 chr2 = str(row[df.name_to_index(cols_di["chr2"])]) # chromosome2 pos2 = row[df.name_to_index(cols_di["break2"])] # break point2 # Check if chr1 and chr2 is in the genome list # Check if pos1 and pos2 is in the chr1 length # index1 and index2 are indexes of the genome_size for chr1 and chr2 [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue # Whether chr1 and chr2 are the same chromosome inner_flg = "false" if (chr1 == chr2): inner_flg = "true" # Set group_id: -1, 0, 1, index values of groups # : Sequential numbers identifying groups group_id = -1 # Not belong to any groups if "group" in cols_di: # If the value of group column is in group list, then group_id is the index of the list # Others, group_id is -1 group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 # chr1 and chr2 are in the different group else: group_id = 1 # chr1 and chr2 are in the same group # Add an element to data_links data_links.append([iid, index1, pos1, index2, pos2, group_id]) # tooltip_items: Data for tooltip tooltip_items = [] for k in range(len(option_keys)): # Loop in the column titles except group, id, and must keys (chr1, chr2, break1, and break2) key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) # Write link f.write(links_template.format( ID=iid, Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, inner_flg=inner_flg, group_id=group_id, tooltip="[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) # Write the ending part # Write integral bar item # link: [{bp1: iid, bp2: iid}, {...}, ...] # : Separate elements by group_id link = [] for g in range(len(groups)): link.append({}) for dl in data_links: # dl = [iid, index1, pos1, index2, pos2, group_id] # iid : The value of id title # index1/2: The index of genome_size # pos1/2 : Bareak point # group_id: Index of groups # Chr: The index of genome_size # Pos: A break position based on node bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[1], Pos=int(math.floor(dl[2] / node_size_select))) bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[3], Pos=int(math.floor(dl[4] / node_size_select))) group_id = dl[5] # For bp1 if bp1 not in link[group_id]: link[group_id][bp1] = [] link[group_id][bp1].append(dl[0]) # Append iid # For bp2 if bp1 != bp2: if bp2 not in link[group_id]: link[group_id][bp2] = [] link[group_id][bp2].append(dl[0]) # Append iid select_value_text = "" select_key_text = "" select_item_text = "" for i in range(len(groups)): values = [] # [Number of id, ...] keys = [] # [[genome_size index, Break position], ...] items = [] # [[id_list index, ...], ...] for bp in sorted(link[i].keys()): # values element # link[i][bp]: list that stores id column values at a break position of a chromosome in a group # : Duplicate values are stored values.append(len(link[i][bp])) # keys element parts = bp.split(".")[2].split("_") # parts: [Chr, Pos] keys.append([int(parts[0]), int(parts[1])]) # items element sort = sorted(list(set(link[i][bp]))) # Delete duplicates temp = [] for t in sort: temp.append(id_list.index(t)) # id_list that stores values of id column items.append(temp) select_value_text += "[%s]," % (",".join(map(str, values)).replace(" ", "")) # += [1,1,...], select_key_text += "[%s]," % (",".join(map(str, keys)).replace(" ", "")) # += [[0,1],[0,25],...], select_item_text += "[%s]," % (",".join(map(str, items)).replace(" ", "")) # += [[9],[8],...], f.write(js_selection.format( value=select_value_text, key=select_key_text, item=select_item_text )) # Write rest of JavaScript file and footer f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_ca.js") # ./templates/data_ca.js js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() return {"id_list": id_list, "group_list": groups, "color": colors_n}
def convert_tojs(params, config): import json import math import itertools import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color # data read try: jsonData = json.load(open(params["data"])) except Exception as e: print ("failure open data %s, %s" % (params["data"], e.message)) return None key_Ids = tools.config_getstr(config, "result_format_signature", "key_id") key_signature = tools.config_getstr(config, "result_format_signature", "key_signature") key_mutations = tools.config_getstr(config, "result_format_signature", "key_mutation") key_mutation_count = tools.config_getstr(config, "result_format_signature", "key_mutation_count") sig_num = len(jsonData[key_signature]) if sig_num == 0: print ("no data %s" % params["data"]) return {} # signature names signature_list = [] for s in range(sig_num): signature_list.append("Signature %d" % (s+1)) # each signature colors sig_color_list = color.create_color_array(sig_num, color.r_set2) # use background? if tools.config_getboolean(config, "result_format_signature", "background"): signature_list.append("Background ") sig_color_list.append(color.r_set2_gray) # axis-y max sig_y_max = tools.config_getint(config, "signature", "signature_y_max") if (sig_y_max < 0): for sig in jsonData[key_signature]: for sub in sig: m = max(sub) if sig_y_max < m: sig_y_max = m # route list sub_num = len(jsonData[key_signature][0][0]) log = math.log(sub_num, 4) if log % 1 > 0: print ("substitution's list length is invalid (%d, not number 4^N)" % sub_num) return None route_id = [] route_list = [] for p in itertools.product(("A","C","G","T"), repeat = int(log)): route_id.append("".join(p)) route_list.append(p) # substruction sub_di = [ {"name":"C > A", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoA")}, {"name":"C > G", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoG")}, {"name":"C > T", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoT")}, {"name":"T > A", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoA")}, {"name":"T > C", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoC")}, {"name":"T > G", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoG")}, ] substruction = "" for sub in sub_di: route = [] for r in route_list: route.append("p".join(r[0:int(log/2)]) + "p" + sub["ref"] + "p" + "p".join(r[int(log/2):])) substruction += js_substruction_template.format(name = sub["name"], color = sub["color"], route = convert.list_to_text(route)) # Id list id_txt = "" if key_Ids in jsonData: id_txt = convert.list_to_text(jsonData[key_Ids]) # mutations mutations_txt = "" if key_mutations in jsonData: for m in jsonData[key_mutations]: mutations_txt += "[%d,%d,%f]," % (m[0],m[1],m[2]) # signature dataset_sig = "" for sig in jsonData[key_signature]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_sig += ("[" + tmp + "],") mutation_count_txt = "" if (key_mutation_count != "") and (key_mutation_count in jsonData.keys()): for v in jsonData[key_mutation_count]: mutation_count_txt += "%d," % v # output sig_num_sift = 0 if tools.config_getboolean(config, "result_format_signature", "background"): sig_num_sift = 1 ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift)) js_file = "data_%s.js" % ellipsis html_file = "graph_%s.html" % ellipsis keys_di = {"sig":"", "route":"", "id":""} f = open(params["dir"] + "/" + js_file, "w") f.write(js_header \ + js_dataset.format(Ids = id_txt, \ signatures = convert.list_to_text(signature_list), \ colors = convert.list_to_text(sig_color_list), \ dataset_sig_max = sig_y_max, \ mutations = mutations_txt, \ dataset_sig = dataset_sig, \ route_id = convert.list_to_text(route_id), \ substruction = substruction, \ signature_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_title"), \ signature_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_partial"), \ mutation_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_title"), \ mutation_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_partial"), \ mutation_count = mutation_count_txt, \ ) + js_function) f.close() integral = True if key_Ids == "" or key_mutations == "" or key_mutation_count == "": integral = False return {"sig_num": sig_num, "js": js_file, "html": html_file, "intergral": integral, }
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert genome_size = load_genome_size(config) if len(genome_size) == 0: return None genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size = genome_size[i][1], color = genome_size[i][2], label = genome_size[i][3]) cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_sv", "sept"), \ comment = tools.config_getstr(config, "result_format_sv", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # group list if "group" in cols_di: for f in range(len(df.data)): group_pos = df.name_to_index(cols_di["group"]) group = df.data[f][group_pos] df.data[f][group_pos] = group.replace(" ", "_") if group == "": df.data[f][group_pos] = "_blank_" [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "sv", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter Chromosome", "Intra Chromosome"] colors_n = ["#9E4A98", "#51BF69"] conbined = [] for i in range(len(groups)): conbined.append(group_template.format(name = groups[i], label = labels[i], color = colors_n[i])) group_text = ",".join(conbined) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() option_keys = cols_di.keys() option_keys.remove("id") option_keys.remove("chr1") option_keys.remove("break1") option_keys.remove("chr2") option_keys.remove("break2") if "group" in option_keys: option_keys.remove("group") f = open(output_file, "w") f.write(js_header \ + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \ node_size_thumb = calc_node_size(genome_size, 250), \ node_size_select = tools.config_getint(config, "sv", "selector_split_size", 5000000),\ genome_size = genome, \ IDs = convert.list_to_text(Ids), \ group = group_text, \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "sv", "result_format_sv", "tooltip_format"), \ link_header = convert.list_to_text(option_keys), \ )) # write links f.write(js_links_1) for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) pos1 = row[df.name_to_index(cols_di["break1"])] chr2 = str(row[df.name_to_index(cols_di["chr2"])]) pos2 = row[df.name_to_index(cols_di["break2"])] [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue inner_flg = "false" if (chr1 == chr2): inner_flg = "true" tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) group_id = -1 if "group" in cols_di: group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 else: group_id = 1 f.write(links_template.format(ID = iid, \ Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \ inner_flg = inner_flg, \ group_id = group_id , \ tooltip = "[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) f.write(js_function) f.close() return {"id_list":Ids, "group_list":groups, "color":colors_n}
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import math genome_size = load_genome_size(config) if len(genome_size) == 0: return None genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3]) cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_ca", "sept"), \ comment = tools.config_getstr(config, "result_format_ca", "comment") \ ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # group list if "group" in cols_di: for f in range(len(df.data)): group_pos = df.name_to_index(cols_di["group"]) group = df.data[f][group_pos] df.data[f][group_pos] = group.replace(" ", "_") if group == "": df.data[f][group_pos] = "_blank_" [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter-chromosome", "Intra-chromosome"] colors_n = ["#9E4A98", "#51BF69"] conbined = [] for i in range(len(groups)): conbined.append( group_template.format(name=groups[i], label=labels[i], color=colors_n[i])) group_text = ",".join(conbined) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() option_keys = tools.dict_keys(cols_di) option_keys.remove("id") option_keys.remove("chr1") option_keys.remove("break1") option_keys.remove("chr2") option_keys.remove("break2") if "group" in option_keys: option_keys.remove("group") # node_size node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000) f = open(output_file, "w") f.write(js_header \ + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \ node_size_thumb = calc_node_size(genome_size, 250), \ node_size_select = node_size_select,\ genome_size = genome, \ IDs = convert.list_to_text(Ids), \ group = group_text, \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), \ link_header = convert.list_to_text(option_keys), \ )) # write links data_links = [] f.write(js_links_1) for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) pos1 = row[df.name_to_index(cols_di["break1"])] chr2 = str(row[df.name_to_index(cols_di["chr2"])]) pos2 = row[df.name_to_index(cols_di["break2"])] [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue inner_flg = "false" if (chr1 == chr2): inner_flg = "true" tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) group_id = -1 if "group" in cols_di: group_id = convert.value_to_index( groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 else: group_id = 1 data_links.append([iid, index1, pos1, index2, pos2, group_id]) f.write(links_template.format(ID = iid, \ Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \ inner_flg = inner_flg, \ group_id = group_id , \ tooltip = "[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) # integral bar item link = [] for g in range(len(groups)): link.append({}) for l in data_links: bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format( Chr=l[1], Pos=int(math.floor(l[2] / node_size_select))) bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format( Chr=l[3], Pos=int(math.floor(l[4] / node_size_select))) group = l[5] #print group # add bp1 if not bp1 in link[group]: link[group][bp1] = [] link[group][bp1].append(l[0]) # add bp2 if bp1 != bp2: if not bp2 in link[group]: link[group][bp2] = [] link[group][bp2].append(l[0]) select_item_text = "" select_value_text = "" select_key_text = "" for g in range(len(groups)): items = [] values = [] keys = [] for i in link[g]: values.append(len(link[g][i])) # split key to chr and pos parts = i.split(".")[2].split("_") keys.append([int(parts[0]), int(parts[1])]) # delete duplication sort = sorted(list(set(link[g][i]))) temp = [] for t in sort: temp.append(Ids.index(t)) items.append(temp) select_value_text += "[%s]," % (",".join(map(str, values)).replace( " ", "")) select_key_text += "[%s]," % (",".join(map(str, keys)).replace( " ", "")) select_item_text += "[%s]," % (",".join(map(str, items)).replace( " ", "")) f.write( js_selection.format(value=select_value_text, key=select_key_text, item=select_item_text)) f.write(js_function) f.close() return {"id_list": Ids, "group_list": groups, "color": colors_n}
def load_subdata(ids, sec, config): import os import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color input_file = tools.config_getpath(config, sec, "path", default = "../../example/sample_summary.csv") if os.path.exists(input_file) == False: print ("[ERROR] file is not exist. %s" % input_file) return None sept = tools.config_getstr(config, sec, "sept") mode = tools.config_getstr(config, sec, "mode") comment = tools.config_getstr(config, sec, "comment") title = tools.config_getstr(config, sec, "title") label = [] item = [] colors_n_di = {} colors_h_di = {} for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") for i in range(len(name_set_split)): text = name_set_split[i] if i == 0: item.append(text) if len(name_set_split) == 1: label.append(text) elif i == 1: label.append(text) elif i == 2: colors_n_di[name_set_split[0]] = color.name_to_value(text) elif i == 3: colors_h_di[name_set_split[0]] = color.name_to_value(text) # fill in undefined items colors_n_di = color.create_color_dict(item, colors_n_di, color.osaka_subway_colors) colors_h_di2 = {} for key in colors_n_di: if colors_h_di.has_key(key): continue colors_h_di2[key] = color.Saturation_down(colors_n_di[key]) # dict to value colors_n = [] for key in item: colors_n.append(colors_n_di[key]) if mode == "range": item.remove(item[0]) header = [] if tools.config_getboolean(config, sec, "header") == True: pos_value = -1 pos_ID = -1 else: pos_value = tools.config_getint(config, sec, "col_value") pos_ID = tools.config_getint(config, sec, "col_ID") header = ["",""] # copy Ids for find check unlookup = [] for iid in ids: unlookup.append(iid) # read data_text = "" values = [] for line in open(input_file): line = line.strip() if len(line.replace(sept, "")) == 0: continue if comment != "" and line.find(comment) == 0: continue if len(header) == 0: header = convert.text_to_list(line,sept) try: colname = tools.config_getstr(config, sec, "col_value") pos_value = header.index(colname) colname = tools.config_getstr(config, sec, "col_ID") pos_ID = header.index(colname) except Exception as e: print(e.message) return None continue cols = convert.text_to_list(line,sept) if (cols[pos_ID] in ids) == False: continue else: unlookup.remove(cols[pos_ID]) id_pos = ids.index(cols[pos_ID]) if mode == "fix": if cols[pos_value] in item: data_text += subdata_data_template.format(id = id_pos, item = item.index(cols[pos_value])) else: print("[" + sec + "] name_set: data is undefined." + cols[pos_value] + "\n") continue elif mode == "range": try: values.append(float(cols[pos_value])) except Exception as e: print(colname + ": data type is invalid.\n" + e.message) continue data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value]) elif mode == "gradient": try: values.append(float(cols[pos_value])) except Exception as e: print(colname + ": data type is invalid.\n" + e.message) continue data_text += subdata_data_template.format(id = id_pos, item = cols[pos_value]) if len(unlookup) > 0: print("[WARNING] can't find IDs subplot data.") print(unlookup) if mode == "gradient" and len(values) > 0: item[0] = min(values) item[1] = max(values) return [data_text, item, colors_n, label, title]