def convert_tojs(params, config): import os import json import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color # data read try: json_data = json.load(open(params["data"])) except Exception as e: print("failure open data %s, %s" % (params["data"], e.message)) return None key_id_list = tools.config_getstr(config, "result_format_pmsignature", "key_id") key_ref = tools.config_getstr(config, "result_format_pmsignature", "key_ref") key_alt = tools.config_getstr(config, "result_format_pmsignature", "key_alt") key_strand = tools.config_getstr(config, "result_format_pmsignature", "key_strand") key_mutations = tools.config_getstr(config, "result_format_pmsignature", "key_mutation") key_mutation_count = tools.config_getstr(config, "result_format_pmsignature", "key_mutation_count") sig_num = len(json_data[key_ref]) if sig_num == 0: print("no data %s" % params["data"]) return {} # signature names signature_list = [] for s in range(sig_num): signature_list.append("Signature %d" % (s + 1)) # each signature colors sig_color_list = color.create_color_array(sig_num, color.r_set2) # use background? if tools.config_getboolean(config, "result_format_pmsignature", "background"): signature_list.append("Background ") sig_color_list.append(color.r_set2_gray) # Id list id_txt = "" if key_id_list in json_data: id_txt = convert.list_to_text(json_data[key_id_list]) # mutations mutations_txt = "" if key_mutations in json_data: for m in json_data[key_mutations]: mutations_txt += "[%d,%d,%f]," % (m[0], m[1], m[2]) # signature dataset_ref = "" for sig in json_data[key_ref]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_ref += ("[" + tmp + "],") dataset_alt = "" for sig in json_data[key_alt]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_alt += ("[" + tmp + "],") dataset_strand = "" for sig in json_data[key_strand]: dataset_strand += "[" + ",".join(map(str, sig)) + "]," # tooltips # for ref keys_di = { "a": "", "c": "", "g": "", "t": "", "ca": "", "cg": "", "ct": "", "ta": "", "tc": "", "tg": "", "plus": "", "minus": "", "id": "", "sig": "" } tooltip_refs_txt = "" for r in range(len(json_data[key_ref][0])): tooltip_refs_txt += js_tooltip_ref_template.format( index=r, tooltip_format=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_ref")) mutation_count_txt = "" if (key_mutation_count != "") and (key_mutation_count in json_data.keys()): for v in json_data[key_mutation_count]: mutation_count_txt += "%d," % v # output sig_num_sift = 0 if tools.config_getboolean(config, "result_format_pmsignature", "background"): sig_num_sift = 1 ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift)) js_file = "data_%s.js" % ellipsis html_file = "graph_%s.html" % ellipsis f = open(params["dir"] + "/" + js_file, "w") f.write(js_header + js_dataset.format( Ids=id_txt, color_A=tools.config_getstr(config, "pmsignature", "color_A", "#06B838"), color_C=tools.config_getstr(config, "pmsignature", "color_C", "#609CFF"), color_G=tools.config_getstr(config, "pmsignature", "color_G", "#B69D02"), color_T=tools.config_getstr(config, "pmsignature", "color_T", "#F6766D"), color_plus=tools.config_getstr(config, "pmsignature", "color_plus", "#00BEC3"), color_minus=tools.config_getstr(config, "pmsignature", "color_minus", "#F263E2"), signatures=convert.list_to_text(signature_list), colors=convert.list_to_text(sig_color_list), mutations=mutations_txt, dataset_ref=dataset_ref, dataset_alt=dataset_alt, dataset_strand=dataset_strand, tooltip_ref=tooltip_refs_txt, tooltip_alt=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_alt"), tooltip_strand=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_strand"), mutation_title=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_mutation_title"), mutation_partial=convert.pyformat_to_jstooltip_text( keys_di, config, "pmsignature", "", "tooltip_format_mutation_partial"), mutation_count=mutation_count_txt, )) f_template = open( os.path.dirname(os.path.abspath(__file__)) + "/templates/data_pmsignature.js") js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() integral = True if key_id_list == "" or key_mutations == "" or key_mutation_count == "": integral = False return { "sig_num": sig_num, "js": js_file, "html": html_file, "intergral": integral, }
def convert_tojs(input_file, output_file, positions, config): import os import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_mutation", "sept"), \ comment = tools.config_getstr(config, "result_format_mutation", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # func replace for f in range(len(df.data)): func_pos = df.name_to_index(cols_di["group"]) if df.data[f][func_pos] == "": df.data[f][func_pos] = "_blank_" [funcs, colors_n] = convert.group_list(df.column(cols_di["group"]), "mutation", "group", config) # ID list id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # gene list genes = genes_list(df.column(cols_di["gene"]), \ df.column(cols_di["group"]), \ df.column(cols_di["id"]), \ funcs, id_list, config) option_keys = tools.dict_keys(cols_di) option_keys.remove("id") option_keys.remove("group") option_keys.remove("gene") # mutation list f = open(output_file, "w") f.write(js_header) f.write(js_mutations_1) mutations = {} tooltips = {} for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue if (iid in mutations) == False: mutations[iid] = {} tooltips[iid] = {} func_split = convert.text_to_list(row[df.name_to_index(cols_di["group"])], \ tools.config_getstr(config, "result_format_mutation", "sept_group")) tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) for func in func_split: if (func in mutations[iid]) == False: mutations[iid][func] = {} tooltips[iid][func] = {} gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \ tools.config_getstr(config, "result_format_mutation", "sept_gene")) for gene in gene_split: if (gene in mutations[iid][func]) == False: mutations[iid][func][gene] = 1 tooltips[iid][func][gene] = [] else: mutations[iid][func][gene] += 1 tooltips[iid][func][gene].append(tooltip_items) mutations_sum = 0 for iid in tools.dict_keys(mutations): for func in tools.dict_keys(mutations[iid]): for gene in tools.dict_keys(mutations[iid][func]): idx_i = convert.value_to_index(id_list, iid, -1) idx_f = convert.value_to_index(funcs, func, -1) idx_g = convert.value_to_index(genes, gene, -1) if idx_i >= 0 and idx_f >= 0 and idx_g >= 0: tooltip_items = "" for tips in tooltips[iid][func][gene]: tooltip_items += "[" + convert.list_to_text(tips) + "]," f.write(mu_mutations_template.format(ID = idx_i, \ func = idx_f , \ gene = idx_g, \ num = mutations[iid][func][gene], tooltip = tooltip_items)) mutations_sum += mutations[iid][func][gene] f.write(js_mutations_2.format(mutations_sum = mutations_sum)) # write id, func, gene ... list f.write(js_dataset.format( Ids = convert.list_to_text(id_list), \ genes = convert.list_to_text(convert.list_prohibition(genes)), \ funcs = convert.list_to_text(convert.list_prohibition(funcs)), \ func_colors_n = convert.list_to_text(colors_n), \ mutation_header = convert.list_to_text(option_keys), \ checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_title"), \ checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_partial"), \ gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_title"), \ gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_partial"), \ id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_title"), \ id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_partial"), \ )) dataset = {} ##### subdata ##### f.write(js_subdata_1) subdata = [] counter = 0 for sec in config.sections(): if sec.startswith("mutation_subplot_type1_"): ret_val = load_subdata(id_list, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 1 counter += 1 elif sec.startswith("mutation_subplot_type2_"): ret_val = load_subdata(id_list, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 2 counter += 1 else: continue f.write(subdata_template.format(name = name, \ title = title, \ type = tools.config_getstr(config, sec, "mode"), \ item = convert.list_to_text(item), \ label = convert.list_to_text(label), \ colors_n = convert.list_to_text(colors_n), \ data = data_text )) subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title}) f.write(js_subdata_2) ##### functions ##### f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_mutation.js") js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() dataset["subdata"] = subdata return dataset
def convert_tojs(params, config): import json import math import itertools import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color # data read try: jsonData = json.load(open(params["data"])) except Exception as e: print ("failure open data %s, %s" % (params["data"], e.message)) return None key_Ids = tools.config_getstr(config, "result_format_signature", "key_id") key_signature = tools.config_getstr(config, "result_format_signature", "key_signature") key_mutations = tools.config_getstr(config, "result_format_signature", "key_mutation") key_mutation_count = tools.config_getstr(config, "result_format_signature", "key_mutation_count") sig_num = len(jsonData[key_signature]) if sig_num == 0: print ("no data %s" % params["data"]) return {} # signature names signature_list = [] for s in range(sig_num): signature_list.append("Signature %d" % (s+1)) # each signature colors sig_color_list = color.create_color_array(sig_num, color.r_set2) # use background? if tools.config_getboolean(config, "result_format_signature", "background"): signature_list.append("Background ") sig_color_list.append(color.r_set2_gray) # axis-y max sig_y_max = tools.config_getint(config, "signature", "signature_y_max") if (sig_y_max < 0): for sig in jsonData[key_signature]: for sub in sig: m = max(sub) if sig_y_max < m: sig_y_max = m # route list sub_num = len(jsonData[key_signature][0][0]) log = math.log(sub_num, 4) if log % 1 > 0: print ("substitution's list length is invalid (%d, not number 4^N)" % sub_num) return None route_id = [] route_list = [] for p in itertools.product(("A","C","G","T"), repeat = int(log)): route_id.append("".join(p)) route_list.append(p) # substruction sub_di = [ {"name":"C > A", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoA")}, {"name":"C > G", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoG")}, {"name":"C > T", "ref":"C", "color":tools.config_getstr(config, "signature", "alt_color_CtoT")}, {"name":"T > A", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoA")}, {"name":"T > C", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoC")}, {"name":"T > G", "ref":"T", "color":tools.config_getstr(config, "signature", "alt_color_TtoG")}, ] substruction = "" for sub in sub_di: route = [] for r in route_list: route.append("p".join(r[0:int(log/2)]) + "p" + sub["ref"] + "p" + "p".join(r[int(log/2):])) substruction += js_substruction_template.format(name = sub["name"], color = sub["color"], route = convert.list_to_text(route)) # Id list id_txt = "" if key_Ids in jsonData: id_txt = convert.list_to_text(jsonData[key_Ids]) # mutations mutations_txt = "" if key_mutations in jsonData: for m in jsonData[key_mutations]: mutations_txt += "[%d,%d,%f]," % (m[0],m[1],m[2]) # signature dataset_sig = "" for sig in jsonData[key_signature]: tmp = "" for sub in sig: tmp += "[" + ",".join(map(str, sub)) + "]," dataset_sig += ("[" + tmp + "],") mutation_count_txt = "" if (key_mutation_count != "") and (key_mutation_count in jsonData.keys()): for v in jsonData[key_mutation_count]: mutation_count_txt += "%d," % v # output sig_num_sift = 0 if tools.config_getboolean(config, "result_format_signature", "background"): sig_num_sift = 1 ellipsis = "%s%d" % (params["ellipsis"], (sig_num + sig_num_sift)) js_file = "data_%s.js" % ellipsis html_file = "graph_%s.html" % ellipsis keys_di = {"sig":"", "route":"", "id":""} f = open(params["dir"] + "/" + js_file, "w") f.write(js_header \ + js_dataset.format(Ids = id_txt, \ signatures = convert.list_to_text(signature_list), \ colors = convert.list_to_text(sig_color_list), \ dataset_sig_max = sig_y_max, \ mutations = mutations_txt, \ dataset_sig = dataset_sig, \ route_id = convert.list_to_text(route_id), \ substruction = substruction, \ signature_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_title"), \ signature_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_signature_partial"), \ mutation_title = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_title"), \ mutation_partial = convert.pyformat_to_jstooltip_text(keys_di, config, "signature", "", "tooltip_format_mutation_partial"), \ mutation_count = mutation_count_txt, \ ) + js_function) f.close() integral = True if key_Ids == "" or key_mutations == "" or key_mutation_count == "": integral = False return {"sig_num": sig_num, "js": js_file, "html": html_file, "intergral": integral, }
def convert_tojs(input_file, output_file, positions, config): ''' Convert the input files to Json data and write them to the Javascript file Also write functions and methods to process those data Parameters ---------- input_file : str : The absolute path of formatted data file output_file: str : The absolute path of JavaScript file positions : dict: A nested dictionary with "must" and "option" as keys config : configparser.RawConfigParser Return ------ On success, return a dictionary: {"id_list": [...] "group_list": [...], "color": [...]} id_list : The values for id column group_list: The names of groups color : The colors in groups ''' import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import os import math # genome_size: a nested list # [ [ A chromosome number in lowercase letters, # The size of the 1st element, # The color of the 1st element, # The original name of the 1st element(that is not necessarily lowercase) or a user-defined name, ], ... ] genome_size = load_genome_size(config) if len(genome_size) == 0: return None # genome: dictionary-style string like this # {"chr":"00", "size":249250621, "color":"#BBBBBB", "label":"1",}, # {"chr":"01", "size":243199373, "color":"#BBBBBB", "label":"2",}, # ... # chr : Sequential number # size : Size corresponding to the label # color: Color corresponding to the label # label: Name corresponding to chromosome genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3]) # Create a data frame that has title and data attributions # title is a list like ['Break1', 'Break2', 'Chr1', 'Chr2', 'Sample'] # data is a nested list like [[16019088, 62784483, '14', '12', 'SAMPLE1'], ...] try: df = data_frame.load_file( input_file, header=1, sept=tools.config_getstr(config, "result_format_ca", "sept"), comment=tools.config_getstr(config, "result_format_ca", "comment") ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # Create groups, labels, and colors_n # cols_di: a dictionary that merges must and option values # : ex) {'chr1': 'Chr1', 'break1': 'Break1', 'chr2': 'Chr2', 'break2': 'Break2', 'id': 'Sample'} cols_di = merge.position_to_dict(positions) if "group" in cols_di: for i in range(len(df.data)): # A title may be stored in cols_di["group"] group_pos = df.name_to_index(cols_di["group"]) # Get group(title) index group = df.data[i][group_pos] # Get group(title) value for row i # Modify group value df.data[i][group_pos] = group.replace(" ", "_") if group == "": df.data[i][group_pos] = "_blank_" # groups : list: group names # labels : list: group names # colors_n: list: color values for groups [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter-chromosome", "Intra-chromosome"] colors_n = ["#9E4A98", "#51BF69"] # purple, green # Create group_text that is a dictionary-style string with name, label, color conbined = [] for i in range(len(groups)): conbined.append(group_template.format(name=groups[i], label=labels[i], color=colors_n[i])) group_text = ",".join(conbined) # id_list: Values for "id" column # : Sorted without duplicates id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] # iid: column value for "id" title if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # option_keys: Store the option keys of the positions dictionary option_keys = tools.dict_keys(cols_di) # option_keys: list: sorted keys of cols_di option_keys.remove("id") # option key option_keys.remove("chr1") # must key option_keys.remove("break1") # must key option_keys.remove("chr2") # must key option_keys.remove("break2") # must key if "group" in option_keys: option_keys.remove("group") # option key # node_size: Size to divide chromosomes node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000) # Write header and dataset of JavaScript file f = open(output_file, "w") f.write(js_header + js_dataset.format( node_size_detail=calc_node_size(genome_size, 500), # node size for detailed thumbnails node_size_thumb=calc_node_size(genome_size, 250), # node size for rough thumbnails node_size_select=node_size_select, # node size for bar graph genome_size=genome, # A dictionary-style string containing keys of "chr", "size", "color", and "label" IDs=convert.list_to_text(id_list), # A comma-separated string of id column values group=group_text, # A dictionary-style string containing keys of "name", "label", and "color" tooltip=convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), # A dictionary-style string containing keys of "name", "label", and "color" link_header=convert.list_to_text(option_keys), )) # Write link of JavaScript file f.write(js_links_1) # Write the leading part data_links = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] # iid: the value of "id" column # Ignore empty string if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) # chromosome1 pos1 = row[df.name_to_index(cols_di["break1"])] # break point1 chr2 = str(row[df.name_to_index(cols_di["chr2"])]) # chromosome2 pos2 = row[df.name_to_index(cols_di["break2"])] # break point2 # Check if chr1 and chr2 is in the genome list # Check if pos1 and pos2 is in the chr1 length # index1 and index2 are indexes of the genome_size for chr1 and chr2 [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue # Whether chr1 and chr2 are the same chromosome inner_flg = "false" if (chr1 == chr2): inner_flg = "true" # Set group_id: -1, 0, 1, index values of groups # : Sequential numbers identifying groups group_id = -1 # Not belong to any groups if "group" in cols_di: # If the value of group column is in group list, then group_id is the index of the list # Others, group_id is -1 group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 # chr1 and chr2 are in the different group else: group_id = 1 # chr1 and chr2 are in the same group # Add an element to data_links data_links.append([iid, index1, pos1, index2, pos2, group_id]) # tooltip_items: Data for tooltip tooltip_items = [] for k in range(len(option_keys)): # Loop in the column titles except group, id, and must keys (chr1, chr2, break1, and break2) key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) # Write link f.write(links_template.format( ID=iid, Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, inner_flg=inner_flg, group_id=group_id, tooltip="[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) # Write the ending part # Write integral bar item # link: [{bp1: iid, bp2: iid}, {...}, ...] # : Separate elements by group_id link = [] for g in range(len(groups)): link.append({}) for dl in data_links: # dl = [iid, index1, pos1, index2, pos2, group_id] # iid : The value of id title # index1/2: The index of genome_size # pos1/2 : Bareak point # group_id: Index of groups # Chr: The index of genome_size # Pos: A break position based on node bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[1], Pos=int(math.floor(dl[2] / node_size_select))) bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[3], Pos=int(math.floor(dl[4] / node_size_select))) group_id = dl[5] # For bp1 if bp1 not in link[group_id]: link[group_id][bp1] = [] link[group_id][bp1].append(dl[0]) # Append iid # For bp2 if bp1 != bp2: if bp2 not in link[group_id]: link[group_id][bp2] = [] link[group_id][bp2].append(dl[0]) # Append iid select_value_text = "" select_key_text = "" select_item_text = "" for i in range(len(groups)): values = [] # [Number of id, ...] keys = [] # [[genome_size index, Break position], ...] items = [] # [[id_list index, ...], ...] for bp in sorted(link[i].keys()): # values element # link[i][bp]: list that stores id column values at a break position of a chromosome in a group # : Duplicate values are stored values.append(len(link[i][bp])) # keys element parts = bp.split(".")[2].split("_") # parts: [Chr, Pos] keys.append([int(parts[0]), int(parts[1])]) # items element sort = sorted(list(set(link[i][bp]))) # Delete duplicates temp = [] for t in sort: temp.append(id_list.index(t)) # id_list that stores values of id column items.append(temp) select_value_text += "[%s]," % (",".join(map(str, values)).replace(" ", "")) # += [1,1,...], select_key_text += "[%s]," % (",".join(map(str, keys)).replace(" ", "")) # += [[0,1],[0,25],...], select_item_text += "[%s]," % (",".join(map(str, items)).replace(" ", "")) # += [[9],[8],...], f.write(js_selection.format( value=select_value_text, key=select_key_text, item=select_item_text )) # Write rest of JavaScript file and footer f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_ca.js") # ./templates/data_ca.js js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() return {"id_list": id_list, "group_list": groups, "color": colors_n}
def convert_tojs(input_file, output_file, positions, config): import os import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_qc", "sept"), \ comment = tools.config_getstr(config, "result_format_qc", "comment") \ ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # chart list plots_text = "" plots_option = [] config_sections = config.sections() config_sections.sort() if "qc_chart_brush" in config_sections: config_sections.remove("qc_chart_brush") config_sections.insert(0, "qc_chart_brush") for sec in config.sections(): if not sec.startswith("qc_chart_"): continue chart_id = sec.replace("qc_chart_", "chart_") stack_id = [] label = [] colors_di = {} counter = 0 for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") if len(name_set_split) == 0: continue stack_id.append("stack" + str(counter)) label.append(name_set_split[0]) if len(name_set_split) > 1: colors_di[name_set_split[0]] = color.name_to_value( name_set_split[1]) counter += 1 # fill in undefined items colors_di = color.create_color_dict(label, colors_di, color.metro_colors) # dict to value colors_li = [] for key in label: colors_li.append(colors_di[key]) plots_text += plot_template.format( chart_id = chart_id, \ title = tools.config_getstr(config, sec, "title"), \ title_y = tools.config_getstr(config, sec, "title_y"), \ stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \ stack_id = convert.list_to_text(stack_id), \ label = convert.list_to_text(label), \ color = convert.list_to_text(colors_li), \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \ ) plots_option.append(chart_id) # ID list id_list = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": id_list.append(iid) id_list = list(set(id_list)) id_list.sort() # header headers = tools.dict_keys(cols_di) f = open(output_file, "w") f.write(js_header) f.write(js_dataset.format(IDs = convert.list_to_text(id_list), \ header = convert.list_to_text(headers), \ plots = plots_text)) f.write(js_data1) # values for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue values = "" for item in headers: if len(values) > 0: values += "," val = row[df.name_to_index(cols_di[item])] if type(val) == type(""): values += "'" + val + "'" elif type(val) == type(0.0): values += str('%.2f' % val) else: values += str(val) f.write("[" + values + "],") f.write(js_data2) f_template = open( os.path.dirname(os.path.abspath(__file__)) + "/templates/data_qc.js") js_function = f_template.read() f_template.close() f.write(js_function) f.write(js_footer) f.close() return {"plots": plots_option}
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import paplot.color as color cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_qc", "sept"), \ comment = tools.config_getstr(config, "result_format_qc", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # chart list plots_text = "" plots_option = [] config_sections = config.sections() config_sections.sort() if "qc_chart_brush" in config_sections: config_sections.remove("qc_chart_brush") config_sections.insert(0, "qc_chart_brush") for sec in config.sections(): if not sec.startswith("qc_chart_"): continue chart_id = sec.replace("qc_chart_", "chart_") stack_id = [] label = [] colors_di = {} counter = 0 for name_set in tools.config_getstr(config, sec, "name_set").split(","): name_set_split = convert.text_to_list(name_set, ":") if len(name_set_split) == 0: continue stack_id.append("stack" + str(counter)) label.append(name_set_split[0]) if len(name_set_split) > 1: colors_di[name_set_split[0]] = color.name_to_value(name_set_split[1]) counter += 1 # fill in undefined items colors_di = color.create_color_dict(label, colors_di, color.metro_colors) # dict to value colors_li = [] for key in label: colors_li.append(colors_di[key]) plots_text += plot_template.format( chart_id = chart_id, \ title = tools.config_getstr(config, sec, "title"), \ title_y = tools.config_getstr(config, sec, "title_y"), \ stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \ stack_id = convert.list_to_text(stack_id), \ label = convert.list_to_text(label), \ color = convert.list_to_text(colors_li), \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \ ) plots_option.append(chart_id) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() # header headers = cols_di.keys() headers.sort() f = open(output_file, "w") f.write(js_header) f.write(js_dataset.format(IDs = convert.list_to_text(Ids), \ header = convert.list_to_text(headers), \ plots = plots_text)) f.write(js_data1) # values for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue values = "" for item in headers: if len(values) > 0: values += "," val = row[df.name_to_index(cols_di[item])] if type(val) == type(""): values += "'" + val + "'" elif type(val) == type(0.0): values += str('%.2f' % val) else: values += str(val) f.write("[" + values + "],") f.write(js_data2) f.write(js_function) f.close() return {"plots": plots_option}
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert genome_size = load_genome_size(config) if len(genome_size) == 0: return None genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size = genome_size[i][1], color = genome_size[i][2], label = genome_size[i][3]) cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_sv", "sept"), \ comment = tools.config_getstr(config, "result_format_sv", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # group list if "group" in cols_di: for f in range(len(df.data)): group_pos = df.name_to_index(cols_di["group"]) group = df.data[f][group_pos] df.data[f][group_pos] = group.replace(" ", "_") if group == "": df.data[f][group_pos] = "_blank_" [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "sv", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter Chromosome", "Intra Chromosome"] colors_n = ["#9E4A98", "#51BF69"] conbined = [] for i in range(len(groups)): conbined.append(group_template.format(name = groups[i], label = labels[i], color = colors_n[i])) group_text = ",".join(conbined) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() option_keys = cols_di.keys() option_keys.remove("id") option_keys.remove("chr1") option_keys.remove("break1") option_keys.remove("chr2") option_keys.remove("break2") if "group" in option_keys: option_keys.remove("group") f = open(output_file, "w") f.write(js_header \ + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \ node_size_thumb = calc_node_size(genome_size, 250), \ node_size_select = tools.config_getint(config, "sv", "selector_split_size", 5000000),\ genome_size = genome, \ IDs = convert.list_to_text(Ids), \ group = group_text, \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "sv", "result_format_sv", "tooltip_format"), \ link_header = convert.list_to_text(option_keys), \ )) # write links f.write(js_links_1) for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) pos1 = row[df.name_to_index(cols_di["break1"])] chr2 = str(row[df.name_to_index(cols_di["chr2"])]) pos2 = row[df.name_to_index(cols_di["break2"])] [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue inner_flg = "false" if (chr1 == chr2): inner_flg = "true" tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) group_id = -1 if "group" in cols_di: group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 else: group_id = 1 f.write(links_template.format(ID = iid, \ Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \ inner_flg = inner_flg, \ group_id = group_id , \ tooltip = "[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) f.write(js_function) f.close() return {"id_list":Ids, "group_list":groups, "color":colors_n}
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert import math genome_size = load_genome_size(config) if len(genome_size) == 0: return None genome = "" for i in range(len(genome_size)): if len(genome) > 0: genome += ",\n" genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3]) cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "result_format_ca", "sept"), \ comment = tools.config_getstr(config, "result_format_ca", "comment") \ ) except Exception as e: print("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print("no data %s" % input_file) return None # group list if "group" in cols_di: for f in range(len(df.data)): group_pos = df.name_to_index(cols_di["group"]) group = df.data[f][group_pos] df.data[f][group_pos] = group.replace(" ", "_") if group == "": df.data[f][group_pos] = "_blank_" [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config) labels = groups else: groups = ["outer", "inner"] labels = ["Inter-chromosome", "Intra-chromosome"] colors_n = ["#9E4A98", "#51BF69"] conbined = [] for i in range(len(groups)): conbined.append( group_template.format(name=groups[i], label=labels[i], color=colors_n[i])) group_text = ",".join(conbined) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() option_keys = tools.dict_keys(cols_di) option_keys.remove("id") option_keys.remove("chr1") option_keys.remove("break1") option_keys.remove("chr2") option_keys.remove("break2") if "group" in option_keys: option_keys.remove("group") # node_size node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000) f = open(output_file, "w") f.write(js_header \ + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \ node_size_thumb = calc_node_size(genome_size, 250), \ node_size_select = node_size_select,\ genome_size = genome, \ IDs = convert.list_to_text(Ids), \ group = group_text, \ tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), \ link_header = convert.list_to_text(option_keys), \ )) # write links data_links = [] f.write(js_links_1) for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue chr1 = str(row[df.name_to_index(cols_di["chr1"])]) pos1 = row[df.name_to_index(cols_di["break1"])] chr2 = str(row[df.name_to_index(cols_di["chr2"])]) pos2 = row[df.name_to_index(cols_di["break2"])] [index1, rang] = insite_genome(genome_size, chr1, pos1) if rang > 0: print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang)) continue if rang < 0: #print("chr1 is undefined. %s" % (chr1)) continue [index2, rang] = insite_genome(genome_size, chr2, pos2) if rang > 0: print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang)) continue if rang < 0: #print("chr2 is undefined. %s" % (chr2)) continue inner_flg = "false" if (chr1 == chr2): inner_flg = "true" tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) group_id = -1 if "group" in cols_di: group_id = convert.value_to_index( groups, row[df.name_to_index(cols_di["group"])], -1) else: if inner_flg == "false": group_id = 0 else: group_id = 1 data_links.append([iid, index1, pos1, index2, pos2, group_id]) f.write(links_template.format(ID = iid, \ Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \ inner_flg = inner_flg, \ group_id = group_id , \ tooltip = "[" + convert.list_to_text(tooltip_items) + "],")) f.write(js_links_2) # integral bar item link = [] for g in range(len(groups)): link.append({}) for l in data_links: bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format( Chr=l[1], Pos=int(math.floor(l[2] / node_size_select))) bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format( Chr=l[3], Pos=int(math.floor(l[4] / node_size_select))) group = l[5] #print group # add bp1 if not bp1 in link[group]: link[group][bp1] = [] link[group][bp1].append(l[0]) # add bp2 if bp1 != bp2: if not bp2 in link[group]: link[group][bp2] = [] link[group][bp2].append(l[0]) select_item_text = "" select_value_text = "" select_key_text = "" for g in range(len(groups)): items = [] values = [] keys = [] for i in link[g]: values.append(len(link[g][i])) # split key to chr and pos parts = i.split(".")[2].split("_") keys.append([int(parts[0]), int(parts[1])]) # delete duplication sort = sorted(list(set(link[g][i]))) temp = [] for t in sort: temp.append(Ids.index(t)) items.append(temp) select_value_text += "[%s]," % (",".join(map(str, values)).replace( " ", "")) select_key_text += "[%s]," % (",".join(map(str, keys)).replace( " ", "")) select_item_text += "[%s]," % (",".join(map(str, items)).replace( " ", "")) f.write( js_selection.format(value=select_value_text, key=select_key_text, item=select_item_text)) f.write(js_function) f.close() return {"id_list": Ids, "group_list": groups, "color": colors_n}
def convert_tojs(input_file, output_file, positions, config): import paplot.subcode.data_frame as data_frame import paplot.subcode.merge as merge import paplot.subcode.tools as tools import paplot.convert as convert cols_di = merge.position_to_dict(positions) # data read try: df = data_frame.load_file(input_file, header = 1, \ sept = tools.config_getstr(config, "merge_format_mutation", "sept"), \ comment = tools.config_getstr(config, "result_format_mutation", "comment") \ ) except Exception as e: print ("failure open data %s, %s" % (input_file, e.message)) return None if len(df.data) == 0: print ("no data %s" % input_file) return None # func replace for f in range(len(df.data)): func_pos = df.name_to_index(cols_di["func"]) func = df.data[f][func_pos] df.data[f][func_pos] = func.replace(" ", "_") if func == "": df.data[f][func_pos] = "_blank_" [funcs, colors_n] = convert.group_list(df.column(cols_di["func"]), "mut", "func", config) # ID list Ids = [] for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid != "": Ids.append(iid) Ids = list(set(Ids)) Ids.sort() genes = genes_list(df.column(cols_di["gene"]), \ df.column(cols_di["func"]), \ df.column(cols_di["id"]), \ funcs, Ids, config) option_keys = cols_di.keys() option_keys.remove("id") option_keys.remove("func") option_keys.remove("gene") f = open(output_file, "w") f.write(js_header \ + js_dataset.format(Ids = convert.list_to_text(Ids), \ genes = convert.list_to_text(genes), \ funcs = convert.list_to_text(funcs), \ func_colors_n = convert.list_to_text(colors_n), \ mutation_header = convert.list_to_text(option_keys), \ checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_title"), \ checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_partial"), \ gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_title"), \ gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_partial"), \ id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_title"), \ id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_partial"), \ )) # mutation list f.write(js_mutations_1) mutations = {} tooltips = {} for row in df.data: iid = row[df.name_to_index(cols_di["id"])] if iid == "": continue if (iid in mutations) == False: mutations[iid] = {} tooltips[iid] = {} func_split = convert.text_to_list(row[df.name_to_index(cols_di["func"])], \ tools.config_getstr(config, "result_format_mutation", "sept_func")) tooltip_items = [] for k in range(len(option_keys)): key = option_keys[k] if cols_di[key] == "": continue tooltip_items.append(row[df.name_to_index(cols_di[key])]) for func in func_split: if (func in mutations[iid]) == False: mutations[iid][func] = {} tooltips[iid][func] = {} gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \ tools.config_getstr(config, "result_format_mutation", "sept_gene")) for gene in gene_split: if (gene in mutations[iid][func]) == False: mutations[iid][func][gene] = 1 tooltips[iid][func][gene] = [] else: mutations[iid][func][gene] += 1 tooltips[iid][func][gene].append(tooltip_items) mutations_sum = 0 for iid in mutations: for func in mutations[iid]: for gene in mutations[iid][func]: idx_i = convert.value_to_index(Ids, iid, -1) idx_f = convert.value_to_index(funcs, func, -1) idx_g = convert.value_to_index(genes, gene, -1) if idx_i >= 0 and idx_f >= 0 and idx_g >= 0: tooltip_items = "" for tips in tooltips[iid][func][gene]: tooltip_items += "[" + convert.list_to_text(tips) + "]," f.write(mu_mutations_template.format(ID = idx_i, \ func = idx_f , \ gene = idx_g, \ num = mutations[iid][func][gene], tooltip = tooltip_items)) mutations_sum += mutations[iid][func][gene] f.write(js_mutations_2.format(mutations_sum = mutations_sum)) dataset = {"func":funcs, "color":colors_n} ##### subdata ##### f.write(js_subdata_1) subdata = [] counter = 0 for sec in config.sections(): if sec.startswith("mut_subplot_type1_"): ret_val = load_subdata(Ids, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 1 counter += 1 elif sec.startswith("mut_subplot_type2_"): ret_val = load_subdata(Ids, sec, config) if ret_val == None: continue [data_text, item, colors_n, label, title] = ret_val name = "sub%d" % (counter) pos = 2 counter += 1 else: continue f.write(subdata_template.format(name = name, \ title = title, \ type = tools.config_getstr(config, sec, "mode"), \ item = convert.list_to_text(item), \ label = convert.list_to_text(label), \ colors_n = convert.list_to_text(colors_n), \ data = data_text )) subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title}) f.write(js_subdata_2) ##### functions ##### f.write(js_function) f.close() dataset["subdata"] = subdata return dataset