Exemplo n.º 1
0
def run(args):
    infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name]
    infmat_index = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)
        
    if args.perm_type == "heat":
        addtl_genes = hnio.load_genes(args.permutation_genes_file) if args.permutation_genes_file else None
        deltas = get_deltas_for_heat(infmat, infmat_index, heat, addtl_genes,
                                         args.num_permutations, args.parallel)
    elif args.perm_type == "mutations":
        deltas = get_deltas_for_mutations(args, infmat, infmat_index, heat_params)
    else:
        raise ValueError("Invalid mutation permutation type: %s" % args.perm_type)
    
    #find the multiple of the median delta s.t. the size of the largest CC in the real data
    #is <= MAX_CC_SIZE
    medianDelta = np.median(deltas[MIN_CC_SIZE])
    M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys()))
    h = hn.heat_vec(heat, gene_index)
    sim = hn.similarity_matrix(M, h)
    
    for i in range(1, 11):
        G = hn.weighted_graph(sim, gene_index, i*medianDelta)
        max_cc_size = max([len(cc) for cc in hn.connected_components(G)])
        if max_cc_size <= MAX_CC_SIZE:
            break
    
    #and recommend running HotNet with that multiple and the next 4 multiples
    recommended_deltas = [i*medianDelta for i in range(i, i+5)]

    output_file = open(args.output_file, 'w') if args.output_file else sys.stdout
    json.dump({"parameters": vars(args), "heat_parameters": heat_params,
               "recommended_deltas": recommended_deltas}, output_file, indent=4)
    if (args.output_file): output_file.close()
Exemplo n.º 2
0
def run(args):
    # create output directory if doesn't exist; warn if output files already exist
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)
    dir_contents = os.listdir(args.output_directory)
    if JSON_OUTPUT in dir_contents or COMPONENTS_TSV in dir_contents or SIGNIFICANCE_TSV in dir_contents:
        print("WARNING: Output directory already contains HotNet results file(s), which will be "
              "overwritten. (Ctrl-c to cancel).")
    
    # load data
    infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name]  
    infmat_index = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)
  
    # compute similarity matrix and extract connected components
    M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys()), quiet=False)
    h = hn.heat_vec(heat, gene_index)
    sim = hn.similarity_matrix(M, h)
    G = hn.weighted_graph(sim, gene_index, args.delta)
    ccs = hn.connected_components(G, args.min_cc_size)
    
    # calculate significance
    if args.permutation_type != "none":
        if args.permutation_type == "heat":
            sizes2stats = heat_permutation_significance(args, heat, infmat, infmat_index, G)
        elif args.permutation_type == "mutations":
            if heat_params["heat_fn"] != "load_mutation_heat":
                raise RuntimeError("Heat scores must be based on mutation data to perform\
                                    significance testing based on mutation data permutation.")
            sizes2stats = mutation_permutation_significance(args, infmat, infmat_index, G, heat_params)
        else:
            raise ValueError("Unrecognized permutation type %s" % (args.permutation_type))
    
    #sort ccs list such that genes within components are sorted alphanumerically, and components
    #are sorted first by length, then alphanumerically by name of the first gene in the component 
    ccs = [sorted(cc) for cc in ccs]
    ccs.sort(key=lambda comp: comp[0])
    ccs.sort(key=len, reverse=True)
    
    # write output
    output_dict = {"parameters": vars(args), "heat_parameters": heat_params,
                   "sizes": hn.component_sizes(ccs), "components": ccs}
    if args.permutation_type != "none":
        output_dict["statistics"] = sizes2stats
        hnio.write_significance_as_tsv(os.path.abspath(args.output_directory) + "/" + SIGNIFICANCE_TSV,
                                       sizes2stats)
    
    json_out = open(os.path.abspath(args.output_directory) + "/" + JSON_OUTPUT, 'w')
    json.dump(output_dict, json_out, indent=4)
    json_out.close()
    
    hnio.write_components_as_tsv(os.path.abspath(args.output_directory) + "/" + COMPONENTS_TSV, ccs)
Exemplo n.º 3
0
def run(args):
    infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name]
    infmat_index = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)

    if args.perm_type == "heat":
        addtl_genes = hnio.load_genes(
            args.permutation_genes_file
        ) if args.permutation_genes_file else None
        deltas = get_deltas_for_heat(infmat, infmat_index, heat, addtl_genes,
                                     args.num_permutations, args.parallel)
    elif args.perm_type == "mutations":
        deltas = get_deltas_for_mutations(args, infmat, infmat_index,
                                          heat_params)
    else:
        raise ValueError("Invalid mutation permutation type: %s" %
                         args.perm_type)

    #find the multiple of the median delta s.t. the size of the largest CC in the real data
    #is <= MAX_CC_SIZE
    medianDelta = np.median(deltas[MIN_CC_SIZE])
    M, gene_index = hn.induce_infmat(infmat, infmat_index, sorted(heat.keys()))
    h = hn.heat_vec(heat, gene_index)
    sim = hn.similarity_matrix(M, h)

    for i in range(1, 11):
        G = hn.weighted_graph(sim, gene_index, i * medianDelta)
        max_cc_size = max([len(cc) for cc in hn.connected_components(G)])
        if max_cc_size <= MAX_CC_SIZE:
            break

    #and recommend running HotNet with that multiple and the next 4 multiples
    recommended_deltas = [i * medianDelta for i in range(i, i + 5)]

    output_file = open(args.output_file,
                       'w') if args.output_file else sys.stdout
    json.dump(
        {
            "parameters": vars(args),
            "heat_parameters": heat_params,
            "recommended_deltas": recommended_deltas
        },
        output_file,
        indent=4)
    if (args.output_file):
        output_file.close()
Exemplo n.º 4
0
def run(args):
    # create output directory if doesn't exist; warn if output files already exist
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)
    dir_contents = os.listdir(args.output_directory)
    if JSON_OUTPUT in dir_contents or COMPONENTS_TSV in dir_contents or SIGNIFICANCE_TSV in dir_contents:
        print(
            "WARNING: Output directory already contains HotNet results file(s), which will be "
            "overwritten. (Ctrl-c to cancel).")

    # load data
    infmat = scipy.io.loadmat(args.infmat_file)[args.infmat_name]
    infmat_index = hnio.load_index(args.infmat_index_file)
    heat, heat_params = hnio.load_heat_json(args.heat_file)

    # compute similarity matrix and extract connected components
    M, gene_index = hn.induce_infmat(infmat,
                                     infmat_index,
                                     sorted(heat.keys()),
                                     quiet=False)
    h = hn.heat_vec(heat, gene_index)
    sim = hn.similarity_matrix(M, h)
    G = hn.weighted_graph(sim, gene_index, args.delta)
    ccs = hn.connected_components(G, args.min_cc_size)

    # calculate significance
    if args.permutation_type != "none":
        if args.permutation_type == "heat":
            sizes2stats = heat_permutation_significance(
                args, heat, infmat, infmat_index, G)
        elif args.permutation_type == "mutations":
            if heat_params["heat_fn"] != "load_mutation_heat":
                raise RuntimeError(
                    "Heat scores must be based on mutation data to perform\
                                    significance testing based on mutation data permutation."
                )
            sizes2stats = mutation_permutation_significance(
                args, infmat, infmat_index, G, heat_params)
        else:
            raise ValueError("Unrecognized permutation type %s" %
                             (args.permutation_type))

    #sort ccs list such that genes within components are sorted alphanumerically, and components
    #are sorted first by length, then alphanumerically by name of the first gene in the component
    ccs = [sorted(cc) for cc in ccs]
    ccs.sort(key=lambda comp: comp[0])
    ccs.sort(key=len, reverse=True)

    # write output
    output_dict = {
        "parameters": vars(args),
        "heat_parameters": heat_params,
        "sizes": hn.component_sizes(ccs),
        "components": ccs
    }
    if args.permutation_type != "none":
        output_dict["statistics"] = sizes2stats
        hnio.write_significance_as_tsv(
            os.path.abspath(args.output_directory) + "/" + SIGNIFICANCE_TSV,
            sizes2stats)

    json_out = open(
        os.path.abspath(args.output_directory) + "/" + JSON_OUTPUT, 'w')
    json.dump(output_dict, json_out, indent=4)
    json_out.close()

    hnio.write_components_as_tsv(
        os.path.abspath(args.output_directory) + "/" + COMPONENTS_TSV, ccs)