Пример #1
0
def convert_tojs(input_file, output_file, positions, config):
    '''
    Convert the input files to Json data and write them to the Javascript file
    Also write functions and methods to process those data

    Parameters
    ----------
    input_file : str : The absolute path of formatted data file
    output_file: str : The absolute path of JavaScript file
    positions  : dict: A nested dictionary with "must" and "option" as keys
    config     : configparser.RawConfigParser

    Return
    ------
    On success, return a dictionary: {"id_list": [...] "group_list": [...], "color": [...]}
        id_list   : The values for id column
        group_list: The names of groups
        color     : The colors in groups
    '''
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import os
    import math

    # genome_size: a nested list
    # [ [ A chromosome number in lowercase letters,
    #     The size of the 1st element,
    #     The color of the 1st element,
    #     The original name of the 1st element(that is not necessarily lowercase) or a user-defined name, ], ... ]
    genome_size = load_genome_size(config)
    if len(genome_size) == 0:
        return None

    # genome: dictionary-style string like this
    # {"chr":"00", "size":249250621, "color":"#BBBBBB", "label":"1",},
    # {"chr":"01", "size":243199373, "color":"#BBBBBB", "label":"2",},
    # ...
    # chr  : Sequential number
    # size : Size corresponding to the label
    # color: Color corresponding to the label
    # label: Name corresponding to chromosome
    genome = ""
    for i in range(len(genome_size)):
        if len(genome) > 0:
            genome += ",\n"
        genome += genome_size_template.format(Chr=i, size=genome_size[i][1], color=genome_size[i][2], label=genome_size[i][3])

    # Create a data frame that has title and data attributions
    # title is a list like ['Break1', 'Break2', 'Chr1', 'Chr2', 'Sample']
    # data is a nested list like [[16019088, 62784483, '14', '12', 'SAMPLE1'], ...]
    try:
        df = data_frame.load_file(
            input_file, header=1,
            sept=tools.config_getstr(config, "result_format_ca", "sept"),
            comment=tools.config_getstr(config, "result_format_ca", "comment")
        )
    except Exception as e:
        print("failure open data %s, %s" % (input_file, e.message))
        return None
    if len(df.data) == 0:
        print("no data %s" % input_file)
        return None

    # Create groups, labels, and colors_n
    # cols_di: a dictionary that merges must and option values
    #        : ex) {'chr1': 'Chr1', 'break1': 'Break1', 'chr2': 'Chr2', 'break2': 'Break2', 'id': 'Sample'}
    cols_di = merge.position_to_dict(positions)
    if "group" in cols_di:
        for i in range(len(df.data)):
            # A title may be stored in cols_di["group"]
            group_pos = df.name_to_index(cols_di["group"])  # Get group(title) index
            group = df.data[i][group_pos]                   # Get group(title) value for row i
            # Modify group value
            df.data[i][group_pos] = group.replace(" ", "_")
            if group == "":
                df.data[i][group_pos] = "_blank_"
        # groups  : list: group names
        # labels  : list: group names
        # colors_n: list: color values for groups
        [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "ca", "group", config)
        labels = groups
    else:
        groups = ["outer", "inner"]
        labels = ["Inter-chromosome", "Intra-chromosome"]
        colors_n = ["#9E4A98", "#51BF69"]  # purple, green

    # Create group_text that is a dictionary-style string with name, label, color
    conbined = []
    for i in range(len(groups)):
        conbined.append(group_template.format(name=groups[i], label=labels[i], color=colors_n[i]))
    group_text = ",".join(conbined)

    # id_list: Values for "id" column
    #        : Sorted without duplicates
    id_list = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]  # iid: column value for "id" title
        if iid != "":
            id_list.append(iid)
    id_list = list(set(id_list))
    id_list.sort()

    # option_keys: Store the option keys of the positions dictionary
    option_keys = tools.dict_keys(cols_di)  # option_keys: list: sorted keys of cols_di
    option_keys.remove("id")      # option key
    option_keys.remove("chr1")    # must key
    option_keys.remove("break1")  # must key
    option_keys.remove("chr2")    # must key
    option_keys.remove("break2")  # must key
    if "group" in option_keys:
        option_keys.remove("group")  # option key

    # node_size: Size to divide chromosomes
    node_size_select = tools.config_getint(config, "ca", "selector_split_size", 5000000)

    # Write header and dataset of JavaScript file

    f = open(output_file, "w")
    f.write(js_header + js_dataset.format(
        node_size_detail=calc_node_size(genome_size, 500),  # node size for detailed thumbnails
        node_size_thumb=calc_node_size(genome_size, 250),   # node size for rough thumbnails
        node_size_select=node_size_select,                  # node size for bar graph
        genome_size=genome,                 # A dictionary-style string containing keys of "chr", "size", "color", and "label"
        IDs=convert.list_to_text(id_list),  # A comma-separated string of id column values
        group=group_text,                   # A dictionary-style string containing keys of "name", "label", and "color"
        tooltip=convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"),  # A dictionary-style string containing keys of "name", "label", and "color"
        link_header=convert.list_to_text(option_keys),
    ))

    # Write link of JavaScript file

    f.write(js_links_1)  # Write the leading part

    data_links = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]  # iid: the value of "id" column
        # Ignore empty string
        if iid == "":
            continue

        chr1 = str(row[df.name_to_index(cols_di["chr1"])])  # chromosome1
        pos1 = row[df.name_to_index(cols_di["break1"])]     # break point1
        chr2 = str(row[df.name_to_index(cols_di["chr2"])])  # chromosome2
        pos2 = row[df.name_to_index(cols_di["break2"])]     # break point2

        # Check if chr1 and chr2 is in the genome list
        # Check if pos1 and pos2 is in the chr1 length
        # index1 and index2 are indexes of the genome_size for chr1 and chr2
        [index1, rang] = insite_genome(genome_size, chr1, pos1)
        if rang > 0:
            print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang))
            continue
        if rang < 0:
            #print("chr1 is undefined. %s" % (chr1))
            continue
        [index2, rang] = insite_genome(genome_size, chr2, pos2)
        if rang > 0:
            print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang))
            continue
        if rang < 0:
            #print("chr2 is undefined. %s" % (chr2))
            continue

        # Whether chr1 and chr2 are the same chromosome
        inner_flg = "false"
        if (chr1 == chr2):
            inner_flg = "true"

        # Set group_id: -1, 0, 1, index values of groups
        #             : Sequential numbers identifying groups
        group_id = -1  # Not belong to any groups
        if "group" in cols_di:
            # If the value of group column is in group list, then group_id is the index of the list
            # Others, group_id is -1
            group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1)
        else:
            if inner_flg == "false":
                group_id = 0  # chr1 and chr2 are in the different group
            else:
                group_id = 1  # chr1 and chr2 are in the same group

        # Add an element to data_links
        data_links.append([iid, index1, pos1, index2, pos2, group_id])

        # tooltip_items: Data for tooltip
        tooltip_items = []
        for k in range(len(option_keys)):  # Loop in the column titles except group, id, and must keys (chr1, chr2, break1, and break2)
            key = option_keys[k]
            if cols_di[key] == "":
                continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])

        # Write link
        f.write(links_template.format(
            ID=iid,
            Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2,
            inner_flg=inner_flg,
            group_id=group_id,
            tooltip="[" + convert.list_to_text(tooltip_items) + "],"))

    f.write(js_links_2)  # Write the ending part

    # Write integral bar item

    # link: [{bp1: iid, bp2: iid}, {...}, ...]
    #     : Separate elements by group_id
    link = []
    for g in range(len(groups)):
        link.append({})

    for dl in data_links:
        # dl = [iid, index1, pos1, index2, pos2, group_id]
        # iid     : The value of id title
        # index1/2: The index of genome_size
        # pos1/2  : Bareak point
        # group_id: Index of groups

        # Chr: The index of genome_size
        # Pos: A break position based on node
        bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[1], Pos=int(math.floor(dl[2] / node_size_select)))
        bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(Chr=dl[3], Pos=int(math.floor(dl[4] / node_size_select)))

        group_id = dl[5]

        # For bp1
        if bp1 not in link[group_id]:
            link[group_id][bp1] = []
        link[group_id][bp1].append(dl[0])  # Append iid

        # For bp2
        if bp1 != bp2:
            if bp2 not in link[group_id]:
                link[group_id][bp2] = []
            link[group_id][bp2].append(dl[0])  # Append iid

    select_value_text = ""
    select_key_text = ""
    select_item_text = ""
    for i in range(len(groups)):
        values = []  # [Number of id, ...]
        keys = []    # [[genome_size index, Break position], ...]
        items = []   # [[id_list index, ...], ...]

        for bp in sorted(link[i].keys()):
            # values element
            # link[i][bp]: list that stores id column values at a break position of a chromosome in a group
            #            : Duplicate values are stored
            values.append(len(link[i][bp]))

            # keys element
            parts = bp.split(".")[2].split("_")  # parts: [Chr, Pos]
            keys.append([int(parts[0]), int(parts[1])])

            # items element
            sort = sorted(list(set(link[i][bp])))  # Delete duplicates
            temp = []
            for t in sort:
                temp.append(id_list.index(t))  # id_list that stores values of id column
            items.append(temp)

        select_value_text += "[%s]," % (",".join(map(str, values)).replace(" ", ""))  # += [1,1,...],
        select_key_text += "[%s]," % (",".join(map(str, keys)).replace(" ", ""))      # += [[0,1],[0,25],...],
        select_item_text += "[%s]," % (",".join(map(str, items)).replace(" ", ""))    # += [[9],[8],...],

    f.write(js_selection.format(
        value=select_value_text,
        key=select_key_text,
        item=select_item_text
    ))

    # Write rest of JavaScript file and footer

    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_ca.js")  # ./templates/data_ca.js
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    return {"id_list": id_list, "group_list": groups, "color": colors_n}
Пример #2
0
def convert_tojs(input_file, output_file, positions, config):
    import os
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "result_format_mutation", "sept"), \
            comment = tools.config_getstr(config, "result_format_mutation", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # func replace 
    for f in range(len(df.data)):
        func_pos = df.name_to_index(cols_di["group"])
        
        if df.data[f][func_pos] == "":
            df.data[f][func_pos] = "_blank_"
            
    [funcs, colors_n] = convert.group_list(df.column(cols_di["group"]), "mutation", "group", config)

    # ID list
    id_list = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": id_list.append(iid)
    id_list = list(set(id_list))
    id_list.sort()
    
    # gene list
    genes = genes_list(df.column(cols_di["gene"]), \
                        df.column(cols_di["group"]), \
                        df.column(cols_di["id"]), \
                        funcs, id_list, config)    

    option_keys = tools.dict_keys(cols_di)
    option_keys.remove("id")
    option_keys.remove("group")
    option_keys.remove("gene")
            
    # mutation list
    f = open(output_file, "w")
    f.write(js_header)
    f.write(js_mutations_1)

    mutations = {}
    tooltips = {}
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue
            
        if (iid in mutations) == False:
            mutations[iid] = {}
            tooltips[iid] = {}
                
        func_split = convert.text_to_list(row[df.name_to_index(cols_di["group"])], \
            tools.config_getstr(config, "result_format_mutation", "sept_group"))
        
        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])
            
        for func in func_split:
            if (func in mutations[iid]) == False:
                mutations[iid][func] = {}
                tooltips[iid][func] = {}

            gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \
                tools.config_getstr(config, "result_format_mutation", "sept_gene"))
                
            for gene in gene_split:
                if (gene in mutations[iid][func]) == False:
                    mutations[iid][func][gene] = 1
                    tooltips[iid][func][gene] = []
                else:
                    mutations[iid][func][gene] += 1

                tooltips[iid][func][gene].append(tooltip_items)

    mutations_sum = 0
    for iid in tools.dict_keys(mutations):
        for func in tools.dict_keys(mutations[iid]):
            for gene in tools.dict_keys(mutations[iid][func]):
                idx_i = convert.value_to_index(id_list, iid, -1)
                idx_f = convert.value_to_index(funcs, func, -1)
                idx_g = convert.value_to_index(genes, gene, -1)

                if idx_i >= 0 and idx_f >= 0 and idx_g >= 0:
                    
                    tooltip_items = ""
                    for tips in tooltips[iid][func][gene]: 
                        tooltip_items += "[" + convert.list_to_text(tips) + "],"

                    f.write(mu_mutations_template.format(ID = idx_i, \
                        func = idx_f , \
                        gene = idx_g, \
                        num = mutations[iid][func][gene],
                        tooltip = tooltip_items))
                        
                    mutations_sum += mutations[iid][func][gene]
                    
    f.write(js_mutations_2.format(mutations_sum = mutations_sum))
    
    # write id, func, gene ... list
    f.write(js_dataset.format(
        Ids = convert.list_to_text(id_list), \
        genes = convert.list_to_text(convert.list_prohibition(genes)), \
        funcs = convert.list_to_text(convert.list_prohibition(funcs)), \
        func_colors_n = convert.list_to_text(colors_n), \
        mutation_header = convert.list_to_text(option_keys), \
        checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_title"), \
        checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_checker_partial"), \
        gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_title"), \
        gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_gene_partial"), \
        id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_title"), \
        id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mutation", "result_format_mutation", "tooltip_format_id_partial"), \
    ))
    
    dataset = {}
    
    ##### subdata #####
    f.write(js_subdata_1)
    subdata = []
    counter = 0
    for sec in config.sections():
        if sec.startswith("mutation_subplot_type1_"):
            ret_val = load_subdata(id_list, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 1
            counter += 1
            
        elif sec.startswith("mutation_subplot_type2_"):
            ret_val = load_subdata(id_list, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 2
            counter += 1
            
        else: continue
    
        f.write(subdata_template.format(name = name, \
                title = title, \
                type = tools.config_getstr(config, sec, "mode"), \
                item = convert.list_to_text(item), \
                label = convert.list_to_text(label), \
                colors_n = convert.list_to_text(colors_n), \
                data = data_text ))

        subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title})
         
    f.write(js_subdata_2)
    
    ##### functions #####
    f_template = open(os.path.dirname(os.path.abspath(__file__)) + "/templates/data_mutation.js")
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    dataset["subdata"] = subdata
    return dataset 
Пример #3
0
def convert_tojs(input_file, output_file, positions, config):

    import os
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color

    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "result_format_qc", "sept"), \
            comment = tools.config_getstr(config, "result_format_qc", "comment") \
            )
    except Exception as e:
        print("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print("no data %s" % input_file)
        return None

    # chart list
    plots_text = ""
    plots_option = []

    config_sections = config.sections()
    config_sections.sort()
    if "qc_chart_brush" in config_sections:
        config_sections.remove("qc_chart_brush")
        config_sections.insert(0, "qc_chart_brush")

    for sec in config.sections():
        if not sec.startswith("qc_chart_"):
            continue

        chart_id = sec.replace("qc_chart_", "chart_")

        stack_id = []
        label = []
        colors_di = {}
        counter = 0
        for name_set in tools.config_getstr(config, sec,
                                            "name_set").split(","):
            name_set_split = convert.text_to_list(name_set, ":")
            if len(name_set_split) == 0:
                continue

            stack_id.append("stack" + str(counter))
            label.append(name_set_split[0])

            if len(name_set_split) > 1:
                colors_di[name_set_split[0]] = color.name_to_value(
                    name_set_split[1])
            counter += 1

        # fill in undefined items
        colors_di = color.create_color_dict(label, colors_di,
                                            color.metro_colors)

        # dict to value
        colors_li = []
        for key in label:
            colors_li.append(colors_di[key])

        plots_text += plot_template.format(
            chart_id = chart_id, \
            title = tools.config_getstr(config, sec, "title"), \
            title_y = tools.config_getstr(config, sec, "title_y"), \
            stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \
            stack_id = convert.list_to_text(stack_id), \
            label = convert.list_to_text(label), \
            color = convert.list_to_text(colors_li), \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \
            )
        plots_option.append(chart_id)

    # ID list
    id_list = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": id_list.append(iid)
    id_list = list(set(id_list))
    id_list.sort()

    # header
    headers = tools.dict_keys(cols_di)

    f = open(output_file, "w")
    f.write(js_header)
    f.write(js_dataset.format(IDs = convert.list_to_text(id_list), \
                            header = convert.list_to_text(headers), \
                            plots = plots_text))
    f.write(js_data1)

    # values
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue

        values = ""
        for item in headers:
            if len(values) > 0:
                values += ","
            val = row[df.name_to_index(cols_di[item])]
            if type(val) == type(""):
                values += "'" + val + "'"
            elif type(val) == type(0.0):
                values += str('%.2f' % val)
            else:
                values += str(val)

        f.write("[" + values + "],")

    f.write(js_data2)

    f_template = open(
        os.path.dirname(os.path.abspath(__file__)) + "/templates/data_qc.js")
    js_function = f_template.read()
    f_template.close()
    f.write(js_function)
    f.write(js_footer)

    f.close()

    return {"plots": plots_option}
Пример #4
0
def convert_tojs(input_file, output_file, positions, config):

    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    import paplot.color as color
    
    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "merge_format_qc", "sept"), \
            comment = tools.config_getstr(config, "result_format_qc", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # chart list
    plots_text = ""
    plots_option = []
    
    config_sections = config.sections()
    config_sections.sort()
    if "qc_chart_brush" in config_sections:
        config_sections.remove("qc_chart_brush")
        config_sections.insert(0, "qc_chart_brush")
        
    for sec in config.sections():
        if not sec.startswith("qc_chart_"):
            continue
        
        chart_id = sec.replace("qc_chart_", "chart_")
        
        stack_id = []
        label = []
        colors_di = {}
        counter = 0
        for name_set in tools.config_getstr(config, sec, "name_set").split(","):
            name_set_split = convert.text_to_list(name_set, ":")
            if len(name_set_split) == 0:
                continue
            
            stack_id.append("stack" + str(counter))
            label.append(name_set_split[0])
            
            if len(name_set_split) > 1:
                colors_di[name_set_split[0]] = color.name_to_value(name_set_split[1])
            counter += 1
            
        # fill in undefined items
        colors_di = color.create_color_dict(label, colors_di, color.metro_colors) 
        
        # dict to value
        colors_li = []
        for key in label:
            colors_li.append(colors_di[key])
        
        plots_text += plot_template.format(
            chart_id = chart_id, \
            title = tools.config_getstr(config, sec, "title"), \
            title_y = tools.config_getstr(config, sec, "title_y"), \
            stack = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "stack"), \
            stack_id = convert.list_to_text(stack_id), \
            label = convert.list_to_text(label), \
            color = convert.list_to_text(colors_li), \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, sec, "result_format_qc", "tooltip_format"), \
            )
        plots_option.append(chart_id)
    
    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()
    
    # header 
    headers = cols_di.keys()
    headers.sort()
    
    f = open(output_file, "w")
    f.write(js_header)
    f.write(js_dataset.format(IDs = convert.list_to_text(Ids), \
                            header = convert.list_to_text(headers), \
                            plots = plots_text))    
    f.write(js_data1)
                        
    # values
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue
            
        values = ""
        for item in headers:
            if len(values) > 0:
                values += ","
            val = row[df.name_to_index(cols_di[item])]
            if type(val) == type(""):
                values += "'" + val + "'"
            elif type(val) == type(0.0):
                values += str('%.2f' % val)
            else:
                values += str(val)
        
        f.write("[" + values + "],")

    f.write(js_data2)
    f.write(js_function)
    f.close()
    
    return {"plots": plots_option}
Пример #5
0
def convert_tojs(input_file, output_file, positions, config):

    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    genome_size = load_genome_size(config)

    if len(genome_size) == 0:
        return None

    genome = ""
    for i in range(len(genome_size)):
        if len(genome) > 0:
            genome += ",\n"
        genome += genome_size_template.format(Chr=i, size = genome_size[i][1], color = genome_size[i][2], label = genome_size[i][3])

    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "merge_format_sv", "sept"), \
            comment = tools.config_getstr(config, "result_format_sv", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # group list
    if "group" in cols_di:
        for f in range(len(df.data)):
            group_pos = df.name_to_index(cols_di["group"])
            group = df.data[f][group_pos]
            df.data[f][group_pos] = group.replace(" ", "_")
            if group == "":
                df.data[f][group_pos] = "_blank_"
    
        [groups, colors_n] = convert.group_list(df.column(cols_di["group"]), "sv", "group", config)
        labels = groups
        
    else:
        groups = ["outer", "inner"]
        labels = ["Inter Chromosome", "Intra Chromosome"]
        colors_n = ["#9E4A98", "#51BF69"]
    
    conbined = []
    for i in range(len(groups)):
        conbined.append(group_template.format(name = groups[i], label = labels[i], color = colors_n[i]))
        
    group_text = ",".join(conbined)
    
    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()

    option_keys = cols_di.keys()
    option_keys.remove("id")
    option_keys.remove("chr1")
    option_keys.remove("break1")
    option_keys.remove("chr2")
    option_keys.remove("break2")
    if "group" in option_keys:
        option_keys.remove("group")
            
    f = open(output_file, "w")

    f.write(js_header \
        + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \
            node_size_thumb = calc_node_size(genome_size, 250), \
            node_size_select = tools.config_getint(config, "sv", "selector_split_size", 5000000),\
            genome_size = genome, \
            IDs = convert.list_to_text(Ids), \
            group = group_text, \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "sv", "result_format_sv", "tooltip_format"), \
            link_header = convert.list_to_text(option_keys), \
            ))
            
    # write links
    f.write(js_links_1)

    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue

        chr1 = str(row[df.name_to_index(cols_di["chr1"])])
        pos1 = row[df.name_to_index(cols_di["break1"])]
        chr2 = str(row[df.name_to_index(cols_di["chr2"])])
        pos2 = row[df.name_to_index(cols_di["break2"])]        

        [index1, rang] = insite_genome(genome_size, chr1, pos1)
        if rang > 0:
            print("breakpoint 1 is over range. chr%s: input=%d, range=%d" % (chr1, pos1, rang))
            continue
        if rang < 0:
            #print("chr1 is undefined. %s" % (chr1))
            continue
        
        [index2, rang] = insite_genome(genome_size, chr2, pos2)
        if rang > 0:
            print("breakpoint 2 is over range. chr%s: input=%d, range=%d" % (chr2, pos2, rang))
            continue
        if rang < 0:
            #print("chr2 is undefined. %s" % (chr2))
            continue
        
        inner_flg = "false"
        if (chr1 == chr2):
            inner_flg = "true"
        
        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])
        
        group_id = -1
        if "group" in cols_di:
            group_id = convert.value_to_index(groups, row[df.name_to_index(cols_di["group"])], -1)
        else:
            if inner_flg == "false":
                group_id = 0
            else:
                group_id = 1
                
        f.write(links_template.format(ID = iid, \
            Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \
            inner_flg = inner_flg, \
            group_id = group_id , \
            tooltip = "[" + convert.list_to_text(tooltip_items) + "],"))

    f.write(js_links_2)
    f.write(js_function)
    f.close()
    
    return {"id_list":Ids, "group_list":groups, "color":colors_n}
Пример #6
0
def convert_tojs(input_file, output_file, positions, config):

    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert

    import math

    genome_size = load_genome_size(config)

    if len(genome_size) == 0:
        return None

    genome = ""
    for i in range(len(genome_size)):
        if len(genome) > 0:
            genome += ",\n"
        genome += genome_size_template.format(Chr=i,
                                              size=genome_size[i][1],
                                              color=genome_size[i][2],
                                              label=genome_size[i][3])

    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "result_format_ca", "sept"), \
            comment = tools.config_getstr(config, "result_format_ca", "comment") \
            )
    except Exception as e:
        print("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print("no data %s" % input_file)
        return None

    # group list
    if "group" in cols_di:
        for f in range(len(df.data)):
            group_pos = df.name_to_index(cols_di["group"])
            group = df.data[f][group_pos]
            df.data[f][group_pos] = group.replace(" ", "_")
            if group == "":
                df.data[f][group_pos] = "_blank_"

        [groups, colors_n] = convert.group_list(df.column(cols_di["group"]),
                                                "ca", "group", config)
        labels = groups

    else:
        groups = ["outer", "inner"]
        labels = ["Inter-chromosome", "Intra-chromosome"]
        colors_n = ["#9E4A98", "#51BF69"]

    conbined = []
    for i in range(len(groups)):
        conbined.append(
            group_template.format(name=groups[i],
                                  label=labels[i],
                                  color=colors_n[i]))

    group_text = ",".join(conbined)

    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()

    option_keys = tools.dict_keys(cols_di)
    option_keys.remove("id")
    option_keys.remove("chr1")
    option_keys.remove("break1")
    option_keys.remove("chr2")
    option_keys.remove("break2")
    if "group" in option_keys:
        option_keys.remove("group")

    # node_size
    node_size_select = tools.config_getint(config, "ca", "selector_split_size",
                                           5000000)

    f = open(output_file, "w")

    f.write(js_header \
        + js_dataset.format(node_size_detail = calc_node_size(genome_size, 500), \
            node_size_thumb = calc_node_size(genome_size, 250), \
            node_size_select = node_size_select,\
            genome_size = genome, \
            IDs = convert.list_to_text(Ids), \
            group = group_text, \
            tooltip = convert.pyformat_to_jstooltip_text(cols_di, config, "ca", "result_format_ca", "tooltip_format"), \
            link_header = convert.list_to_text(option_keys), \
            ))

    # write links
    data_links = []

    f.write(js_links_1)

    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue

        chr1 = str(row[df.name_to_index(cols_di["chr1"])])
        pos1 = row[df.name_to_index(cols_di["break1"])]
        chr2 = str(row[df.name_to_index(cols_di["chr2"])])
        pos2 = row[df.name_to_index(cols_di["break2"])]

        [index1, rang] = insite_genome(genome_size, chr1, pos1)
        if rang > 0:
            print("breakpoint 1 is over range. chr%s: input=%d, range=%d" %
                  (chr1, pos1, rang))
            continue
        if rang < 0:
            #print("chr1 is undefined. %s" % (chr1))
            continue

        [index2, rang] = insite_genome(genome_size, chr2, pos2)
        if rang > 0:
            print("breakpoint 2 is over range. chr%s: input=%d, range=%d" %
                  (chr2, pos2, rang))
            continue
        if rang < 0:
            #print("chr2 is undefined. %s" % (chr2))
            continue

        inner_flg = "false"
        if (chr1 == chr2):
            inner_flg = "true"

        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])

        group_id = -1
        if "group" in cols_di:
            group_id = convert.value_to_index(
                groups, row[df.name_to_index(cols_di["group"])], -1)
        else:
            if inner_flg == "false":
                group_id = 0
            else:
                group_id = 1

        data_links.append([iid, index1, pos1, index2, pos2, group_id])

        f.write(links_template.format(ID = iid, \
            Chr1=index1, pos1=pos1, Chr2=index2, pos2=pos2, \
            inner_flg = inner_flg, \
            group_id = group_id , \
            tooltip = "[" + convert.list_to_text(tooltip_items) + "],"))

    f.write(js_links_2)

    # integral bar item
    link = []
    for g in range(len(groups)):
        link.append({})

    for l in data_links:

        bp1 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(
            Chr=l[1], Pos=int(math.floor(l[2] / node_size_select)))
        bp2 = "root.{Chr:0>2}.{Chr:0>2}_{Pos:0>3}".format(
            Chr=l[3], Pos=int(math.floor(l[4] / node_size_select)))

        group = l[5]
        #print group
        # add bp1
        if not bp1 in link[group]:
            link[group][bp1] = []
        link[group][bp1].append(l[0])

        # add bp2
        if bp1 != bp2:
            if not bp2 in link[group]:
                link[group][bp2] = []
            link[group][bp2].append(l[0])

    select_item_text = ""
    select_value_text = ""
    select_key_text = ""

    for g in range(len(groups)):
        items = []
        values = []
        keys = []

        for i in link[g]:

            values.append(len(link[g][i]))

            # split key to chr and pos
            parts = i.split(".")[2].split("_")
            keys.append([int(parts[0]), int(parts[1])])

            # delete duplication
            sort = sorted(list(set(link[g][i])))

            temp = []
            for t in sort:
                temp.append(Ids.index(t))
            items.append(temp)

        select_value_text += "[%s]," % (",".join(map(str, values)).replace(
            " ", ""))
        select_key_text += "[%s]," % (",".join(map(str, keys)).replace(
            " ", ""))
        select_item_text += "[%s]," % (",".join(map(str, items)).replace(
            " ", ""))

    f.write(
        js_selection.format(value=select_value_text,
                            key=select_key_text,
                            item=select_item_text))

    f.write(js_function)
    f.close()

    return {"id_list": Ids, "group_list": groups, "color": colors_n}
Пример #7
0
def convert_tojs(input_file, output_file, positions, config):
    
    import paplot.subcode.data_frame as data_frame
    import paplot.subcode.merge as merge
    import paplot.subcode.tools as tools
    import paplot.convert as convert
    
    cols_di = merge.position_to_dict(positions)

    # data read
    try:
        df = data_frame.load_file(input_file, header = 1, \
            sept = tools.config_getstr(config, "merge_format_mutation", "sept"), \
            comment = tools.config_getstr(config, "result_format_mutation", "comment") \
            )
    except Exception as e:
        print ("failure open data %s, %s" % (input_file, e.message))
        return None

    if len(df.data) == 0:
        print ("no data %s" % input_file)
        return None

    # func replace 
    for f in range(len(df.data)):
        func_pos = df.name_to_index(cols_di["func"])
        func = df.data[f][func_pos]
        df.data[f][func_pos] = func.replace(" ", "_")
        if func == "":
            df.data[f][func_pos] = "_blank_"

    [funcs, colors_n] = convert.group_list(df.column(cols_di["func"]), "mut", "func", config)

    # ID list
    Ids = []
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid != "": Ids.append(iid)
    Ids = list(set(Ids))
    Ids.sort()
    
    genes = genes_list(df.column(cols_di["gene"]), \
                        df.column(cols_di["func"]), \
                        df.column(cols_di["id"]), \
                        funcs, Ids, config)    

    option_keys = cols_di.keys()
    option_keys.remove("id")
    option_keys.remove("func")
    option_keys.remove("gene")
            
    f = open(output_file, "w")
    f.write(js_header \
        + js_dataset.format(Ids = convert.list_to_text(Ids), \
            genes = convert.list_to_text(genes), \
            funcs = convert.list_to_text(funcs), \
            func_colors_n = convert.list_to_text(colors_n), \
            mutation_header = convert.list_to_text(option_keys), \
            checker_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_title"), \
            checker_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_checker_partial"), \
            gene_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_title"), \
            gene_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_gene_partial"), \
            id_title = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_title"), \
            id_partial = convert.pyformat_to_jstooltip_text(cols_di, config, "mut", "result_format_mutation", "tooltip_format_id_partial"), \
            ))
            
    # mutation list
    f.write(js_mutations_1)

    mutations = {}
    tooltips = {}
    for row in df.data:
        iid = row[df.name_to_index(cols_di["id"])]
        if iid == "": continue
            
        if (iid in mutations) == False:
            mutations[iid] = {}
            tooltips[iid] = {}
                
        func_split = convert.text_to_list(row[df.name_to_index(cols_di["func"])], \
                                tools.config_getstr(config, "result_format_mutation", "sept_func"))
                                
        tooltip_items = []
        for k in range(len(option_keys)):
            key = option_keys[k]
            if cols_di[key] == "": continue
            tooltip_items.append(row[df.name_to_index(cols_di[key])])
            
        for func in func_split:
            if (func in mutations[iid]) == False:
                mutations[iid][func] = {}
                tooltips[iid][func] = {}

            gene_split = convert.text_to_list(row[df.name_to_index(cols_di["gene"])], \
                                tools.config_getstr(config, "result_format_mutation", "sept_gene"))
            for gene in gene_split:
                if (gene in mutations[iid][func]) == False:
                    mutations[iid][func][gene] = 1
                    tooltips[iid][func][gene] = []
                else:
                    mutations[iid][func][gene] += 1

                tooltips[iid][func][gene].append(tooltip_items)

    mutations_sum = 0
    for iid in mutations:
        for func in mutations[iid]:
            for gene in mutations[iid][func]:
                idx_i = convert.value_to_index(Ids, iid, -1)
                idx_f = convert.value_to_index(funcs, func, -1)
                idx_g = convert.value_to_index(genes, gene, -1)

                if idx_i >= 0 and idx_f >= 0 and idx_g >= 0:
                    
                    tooltip_items = ""
                    for tips in tooltips[iid][func][gene]: 
                        tooltip_items += "[" + convert.list_to_text(tips) + "],"

                    f.write(mu_mutations_template.format(ID = idx_i, \
                        func = idx_f , \
                        gene = idx_g, \
                        num = mutations[iid][func][gene],
                        tooltip = tooltip_items))
                        
                    mutations_sum += mutations[iid][func][gene]
                    
    f.write(js_mutations_2.format(mutations_sum = mutations_sum))
    
    dataset = {"func":funcs, "color":colors_n}
    
    ##### subdata #####
    f.write(js_subdata_1)
    subdata = []
    counter = 0
    for sec in config.sections():
        if sec.startswith("mut_subplot_type1_"):
            ret_val = load_subdata(Ids, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 1
            counter += 1
            
        elif sec.startswith("mut_subplot_type2_"):
            ret_val = load_subdata(Ids, sec, config)
            if ret_val == None: continue
            [data_text, item, colors_n, label, title] = ret_val
            
            name = "sub%d" % (counter)
            pos = 2
            counter += 1
            
        else: continue
    
        f.write(subdata_template.format(name = name, \
                title = title, \
                type = tools.config_getstr(config, sec, "mode"), \
                item = convert.list_to_text(item), \
                label = convert.list_to_text(label), \
                colors_n = convert.list_to_text(colors_n), \
                data = data_text ))

        subdata.append({"pos":pos, "label":label, "color":colors_n, "title": title})
         
    f.write(js_subdata_2)
    
    ##### functions #####
    f.write(js_function)
    f.close()

    dataset["subdata"] = subdata
    return dataset