示例#1
0
    '''Test to see if most mutations are due to single gene'''
    counts = hit_matrix.ix[run.gene_sets[p]].sum(1).dropna().order()
    with_top = hit_matrix.ix[run.gene_sets[p]].sum().clip_upper(1).sum()
    without = hit_matrix.ix[run.gene_sets[p] - 
                            {counts.idxmax()}].sum().clip_upper(1).sum()
    return ((with_top - without) / without) > .5

meta_matrix = meta_matrix[size_filter(meta_matrix)] 
s = Series({p: is_one_gene(p) for p in meta_matrix.index})
meta_matrix = meta_matrix.ix[s==False]
hit_matrix = hit_matrix[size_filter(hit_matrix)] 

'''Add passing features to the Data Object''' 
mut.features = meta_matrix.append(hit_matrix)
mut.compress()
mut.uncompress()

'''Save updated Data Object (with additional features field'''
mut.save()
mut.uncompress()

'''Draw pathway_plots for pathway level features'''
meta_features = [f for f in mut.features.index if f in run.gene_sets]
pathway_plot_folder = mut.path + '/Figures/PathwayPlots/'
if not os.path.isdir(pathway_plot_folder):
    os.makedirs(pathway_plot_folder)
        
for i,p in enumerate(meta_features):
    df = mut.df.ix[run.gene_sets[p]]
    pathway_plot(df) 
    savefig(pathway_plot_folder + p)
示例#2
0
meta_matrix = meta_matrix[size_filter(meta_matrix)]
s = Series({p: is_one_gene(p) for p in meta_matrix.index})
meta_matrix = meta_matrix.ix[s==False]
hit_matrix = hit_matrix[size_filter(hit_matrix)]

hit_genes = hit_matrix.copy()
hit_genes.index = hit_genes.index.get_level_values(0)
non_redundant = merge_redundant(hit_genes.append(meta_matrix))

'''Add passing features to the Data Object'''
data.features = non_redundant

'''Save updated Data Object (with additional features field)'''
data.df = data.df.replace([1,-1], 0)
data.save()
data.uncompress()


'''Draw pathway_plots for pathway level features'''
meta_features = [f for f in data.features.index if f in run.gene_sets]
pathway_plot_folder = data.path + '/Figures/PathwayPlots/'
if not os.path.isdir(pathway_plot_folder):
    os.makedirs(pathway_plot_folder)
    
hit_mat = data.df.copy()
hit_mat.index = hit_mat.index.get_level_values(2)
hit_mat = (hit_mat == data.hit_val).astype(float)
        
for i,p in enumerate(meta_features):
    if os.path.isfile(pathway_plot_folder + p):
        continue