Пример #1
0
def export_pathways():
    change_dist = load_pickle(SingleRegion.change_dist_filename)
    matlab_g2i = {g:(i+1) for i,g in enumerate(change_dist.genes)} # NOTE that matlab is one based
    
    pathways = pathway_lists.read_all_pathways()
    pathway_names = pathways.keys() # make sure the order stays fixed
    pathway_genes_names = np.array([list_of_strings_to_matlab_cell_array(pathways[p]) for p in pathway_names], dtype=object)
    pathway_genes_idx = np.array([np.array([matlab_g2i[g] for g in pathways[p]]) for p in pathway_names], dtype=object)

    matlab_p2i = {p:(i+1) for i,p in enumerate(pathway_names)} # NOTE matlab indexing is one based
    list_names = pathway_lists.all_pathway_lists()
    list_pathway_names = np.empty(len(list_names), dtype=object)
    list_pathway_idx = np.empty(len(list_names), dtype=object)
    for i,listname in enumerate(list_names):
        pathways_in_list = pathway_lists.list_to_pathway_names(listname)
        list_pathway_names[i] = list_of_strings_to_matlab_cell_array(pathways_in_list)
        list_pathway_idx[i] = [matlab_p2i[p] for p in pathways_in_list]
    README = """\
pathway_names:
Cell array of all pathway names. The name in cell number k is the name of the
pathway at position k in "pathway_genes_names" and "pathway_genes_idx".

pathway_genes_names:
Cell array (size <n-pathways>). Each cell contains a cell array of strings which 
are the gene symbols of the genes in that pathway.

pathway_genes_idx:
Same as pathway_genes_names, but each cell in the outer cell array is now an 
array of gene indices corresponding to the gene positions in cube.mat and change-distributions.mat.
Hopefully this should be easier to use in matlab.

list_names:
Names of pathway lists prepared by Noa

list_pathway_names:
Call array. One item per list. Each item is a cell array of strings which are 
the names of the pathways belonging to that list.

list_pathway_idx:
Same as list_pathway_names, but instead of listing the pathways by name, they 
are given as indices into the previous pathway_xxx structures.
"""
    mdict = dict(
        README_PATHWAYS = README,
        pathway_names = list_of_strings_to_matlab_cell_array(pathway_names),
        pathway_genes_names = pathway_genes_names,
        pathway_genes_idx = pathway_genes_idx,
        list_names = list_of_strings_to_matlab_cell_array(list_names),
        list_pathway_names = list_pathway_names,
        list_pathway_idx = list_pathway_idx,
    )
    save_matfile(mdict, join(results_dir(), 'export', 'pathways.mat'))
Пример #2
0
    def __init__(self, listname='all'):
        self.listname = listname
        self.pathways = pathway_lists.read_all_pathways(listname)

        self.change_dist = load_pickle(SingleRegion.change_dist_filename, 'change distribution for all genes and regions')
        self.genes = self.change_dist.genes
        self.regions = self.change_dist.regions
        self.g2i = {g:i for i,g in enumerate(self.genes)}
        self.r2i = {r:i for i,r in enumerate(self.regions)}
        self.age_scaler = self.change_dist.age_scaler
        self.mu = self.change_dist.mu
        self.std = self.change_dist.std
        self.bin_edges = self.change_dist.bin_edges
        self.bin_centers = self.change_dist.bin_centers
        self.weights = self.change_dist.weights
Пример #3
0
    def __init__(self, listname='all'):
        self.listname = listname
        self.pathways = pathway_lists.read_all_pathways(listname)

        self.change_dist = load_pickle(
            SingleRegion.change_dist_filename,
            'change distribution for all genes and regions')
        self.genes = self.change_dist.genes
        self.regions = self.change_dist.regions
        self.g2i = {g: i for i, g in enumerate(self.genes)}
        self.r2i = {r: i for i, r in enumerate(self.regions)}
        self.age_scaler = self.change_dist.age_scaler
        self.mu = self.change_dist.mu
        self.std = self.change_dist.std
        self.bin_edges = self.change_dist.bin_edges
        self.bin_centers = self.change_dist.bin_centers
        self.weights = self.change_dist.weights
Пример #4
0
def export_pathways():
    change_dist = load_pickle(SingleRegion.change_dist_filename)
    matlab_g2i = {g: (i + 1)
                  for i, g in enumerate(change_dist.genes)
                  }  # NOTE that matlab is one based

    pathways = pathway_lists.read_all_pathways()
    pathway_names = pathways.keys()  # make sure the order stays fixed
    pathway_genes_names = np.array([
        list_of_strings_to_matlab_cell_array(pathways[p])
        for p in pathway_names
    ],
                                   dtype=object)
    pathway_genes_idx = np.array([
        np.array([matlab_g2i[g] for g in pathways[p]]) for p in pathway_names
    ],
                                 dtype=object)

    matlab_p2i = {p: (i + 1)
                  for i, p in enumerate(pathway_names)
                  }  # NOTE matlab indexing is one based
    list_names = pathway_lists.all_pathway_lists()
    list_pathway_names = np.empty(len(list_names), dtype=object)
    list_pathway_idx = np.empty(len(list_names), dtype=object)
    for i, listname in enumerate(list_names):
        pathways_in_list = pathway_lists.list_to_pathway_names(listname)
        list_pathway_names[i] = list_of_strings_to_matlab_cell_array(
            pathways_in_list)
        list_pathway_idx[i] = [matlab_p2i[p] for p in pathways_in_list]
    README = """\
pathway_names:
Cell array of all pathway names. The name in cell number k is the name of the
pathway at position k in "pathway_genes_names" and "pathway_genes_idx".

pathway_genes_names:
Cell array (size <n-pathways>). Each cell contains a cell array of strings which 
are the gene symbols of the genes in that pathway.

pathway_genes_idx:
Same as pathway_genes_names, but each cell in the outer cell array is now an 
array of gene indices corresponding to the gene positions in cube.mat and change-distributions.mat.
Hopefully this should be easier to use in matlab.

list_names:
Names of pathway lists prepared by Noa

list_pathway_names:
Call array. One item per list. Each item is a cell array of strings which are 
the names of the pathways belonging to that list.

list_pathway_idx:
Same as list_pathway_names, but instead of listing the pathways by name, they 
are given as indices into the previous pathway_xxx structures.
"""
    mdict = dict(
        README_PATHWAYS=README,
        pathway_names=list_of_strings_to_matlab_cell_array(pathway_names),
        pathway_genes_names=pathway_genes_names,
        pathway_genes_idx=pathway_genes_idx,
        list_names=list_of_strings_to_matlab_cell_array(list_names),
        list_pathway_names=list_pathway_names,
        list_pathway_idx=list_pathway_idx,
    )
    save_matfile(mdict, join(results_dir(), 'export', 'pathways.mat'))