def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="") parser.add_argument("-cutoff_zscore",dest="cutoff_zscore", type=float, default=1.5, help="") parser.add_argument("-percentile_expr",dest="percentile_expr", type=int, default=95, help="") parser.add_argument("-flag_use_precomputed",dest="flag_use_precomputed", action="store_true", help="") parser.add_argument("-root",dest="root", default=None, help="") parser.add_argument("-preference",dest="preference", default=None, help="") parser.add_argument("-cutoff_logfc",dest="cutoff_logfc", type=float, default=0.25, help="") parser.add_argument("-num_genes",dest="num_genes", type=int, default=15, help="") parser.add_argument("-n_jobs",dest="n_jobs", type=int, default=8, help="") args = parser.parse_args() workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) preference = args.preference.split(',') st.detect_de_genes(adata,cutoff_zscore=args.cutoff_zscore,cutoff_logfc=args.cutoff_logfc,percentile_expr=args.percentile_expr,n_jobs=args.n_jobs, use_precomputed=args.flag_use_precomputed, root=args.root, preference=preference) st.plot_de_genes(adata, num_genes=args.num_genes,cutoff_zscore=args.cutoff_zscore, cutoff_logfc=args.cutoff_logfc, save_fig=True,fig_path=None,fig_size=(args.fig_width,args.fig_height)) st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-epg_n_nodes",dest="epg_n_nodes", type=int, default=50, help="") parser.add_argument("-incr_n_nodes",dest="incr_n_nodes", type=int, default=30, help="") parser.add_argument("-epg_trimmingradius",dest="epg_trimmingradius", default='Inf', help="") parser.add_argument("-epg_alpha",dest="epg_alpha", type=float, default=0.02, help="") parser.add_argument("-epg_beta",dest="epg_beta", type=float, default=0.0, help="") parser.add_argument("-epg_n_processes",dest="epg_n_processes", type=int, default=1, help="") parser.add_argument("-epg_lambda",dest="epg_lambda", type=float, default=0.02, help="") parser.add_argument("-epg_mu",dest="epg_mu", type=float, default=0.1, help="") parser.add_argument("-epg_finalenergy",dest="epg_finalenergy", default='Penalized', help="") parser.add_argument("-comp1",dest="comp1", type = int, default=0, help="") parser.add_argument("-comp2",dest="comp2", type = int, default=1, help="") parser.add_argument("-n_comp",dest="n_comp", type = int, default=3, help="") parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) st.elastic_principal_graph(adata,epg_n_nodes=args.epg_n_nodes,incr_n_nodes=args.incr_n_nodes,epg_trimmingradius=args.epg_trimmingradius,epg_alpha=args.epg_alpha, epg_n_processes=args.epg_n_processes, epg_lambda=args.epg_lambda,epg_mu=args.epg_mu, epg_beta=args.epg_beta, epg_finalenergy=args.epg_finalenergy) st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height)) st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol) st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-nb_pct","--percent_neighbor_cells",dest="nb_pct", type=float, default=0.1, help="") parser.add_argument("-n_clusters",dest="n_clusters", type = int, default=10, help="") parser.add_argument("-damping",dest="damping", type=float, default=0.75, help="") parser.add_argument("-pref_perc",dest="pref_perc", type=int, default=50, help="") parser.add_argument("-max_n_clusters",dest="max_n_clusters", type=int, default=200, help="") parser.add_argument("-clustering",dest="clustering", default='kmeans', help="") parser.add_argument("-comp1",dest="comp1", type = int, default=0, help="") parser.add_argument("-comp2",dest="comp2", type = int, default=1, help="") parser.add_argument("-n_comp",dest="n_comp", type = int, default=3, help="") parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() print('Starting validation procedure...') workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) st.seed_elastic_principal_graph(adata, clustering=args.clustering, n_clusters=args.n_clusters, damping=args.damping, pref_perc=args.pref_perc, max_n_clusters=args.max_n_clusters, nb_pct=args.nb_pct) st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height)) st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol) st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--matrix", dest="input_filename", default=None, help="input file name", metavar="FILE") parser.add_argument("-l", "--cell_labels", dest="cell_label_filename", default=None, help="filename of cell labels") parser.add_argument("-c", "--cell_labels_colors", dest="cell_label_color_filename", default=None, help="filename of cell label colors") parser.add_argument( "-s", "--select_features", dest="s_method", default='LOESS', help= "LOESS,PCA or all: Select variable genes using LOESS or principal components using PCA or all the genes are kept" ) parser.add_argument("--TG", "--detect_TG_genes", dest="flag_gene_TG_detection", action="store_true", help="detect transition genes automatically") parser.add_argument("--DE", "--detect_DE_genes", dest="flag_gene_DE_detection", action="store_true", help="detect DE genes automatically") parser.add_argument("--LG", "--detect_LG_genes", dest="flag_gene_LG_detection", action="store_true", help="detect leaf genes automatically") parser.add_argument( "-g", "--genes", dest="genes", default=None, help= "genes to visualize, it can either be filename which contains all the genes in one column or a set of gene names separated by comma" ) parser.add_argument( "-p", "--use_precomputed", dest="use_precomputed", action="store_true", help= "use precomputed data files without re-computing structure learning part" ) parser.add_argument("--new", dest="new_filename", default=None, help="file name of data to be mapped") parser.add_argument("--new_l", dest="new_label_filename", default=None, help="filename of new cell labels") parser.add_argument("--new_c", dest="new_label_color_filename", default=None, help="filename of new cell label colors") parser.add_argument("--log2", dest="flag_log2", action="store_true", help="perform log2 transformation") parser.add_argument("--norm", dest="flag_norm", action="store_true", help="normalize data based on library size") parser.add_argument("--atac", dest="flag_atac", action="store_true", help="indicate scATAC-seq data") parser.add_argument( "--n_jobs", dest="n_jobs", type=int, default=1, help="Specify the number of processes to use. (default, 1") parser.add_argument( "--loess_frac", dest="loess_frac", type=float, default=0.1, help="The fraction of the data used in LOESS regression") parser.add_argument( "--loess_cutoff", dest="loess_cutoff", type=int, default=95, help= "the percentile used in variable gene selection based on LOESS regression" ) parser.add_argument("--pca_first_PC", dest="flag_first_PC", action="store_true", help="keep first PC") parser.add_argument("--pca_n_PC", dest="pca_n_PC", type=int, default=15, help="The number of selected PCs,it's 15 by default") parser.add_argument( "--dr_method", dest="dr_method", default='se', help= "Method used for dimension reduction. Choose from {{'se','mlle','umap','pca'}}" ) parser.add_argument("--n_neighbors", dest="n_neighbors", type=float, default=50, help="The number of neighbor cells") parser.add_argument( "--nb_pct", dest="nb_pct", type=float, default=None, help= "The percentage of neighbor cells (when sepcified, it will overwrite n_neighbors)." ) parser.add_argument("--n_components", dest="n_components", type=int, default=3, help="Number of components to keep.") parser.add_argument( "--clustering", dest="clustering", default='kmeans', help= "Clustering method used for seeding the intial structure, choose from 'ap','kmeans','sc'" ) parser.add_argument("--damping", dest="damping", type=float, default=0.75, help="Affinity Propagation: damping factor") parser.add_argument( "--n_clusters", dest="n_clusters", type=int, default=10, help="Number of clusters for spectral clustering or kmeans") parser.add_argument("--EPG_n_nodes", dest="EPG_n_nodes", type=int, default=50, help=" Number of nodes for elastic principal graph") parser.add_argument( "--EPG_lambda", dest="EPG_lambda", type=float, default=0.02, help="lambda parameter used to compute the elastic energy") parser.add_argument("--EPG_mu", dest="EPG_mu", type=float, default=0.1, help="mu parameter used to compute the elastic energy") parser.add_argument( "--EPG_trimmingradius", dest="EPG_trimmingradius", type=float, default=np.inf, help="maximal distance of point from a node to affect its embedment") parser.add_argument( "--EPG_alpha", dest="EPG_alpha", type=float, default=0.02, help= "positive numeric, the value of the alpha parameter of the penalized elastic energy" ) parser.add_argument("--EPG_collapse", dest="flag_EPG_collapse", action="store_true", help="collapsing small branches") parser.add_argument( "--EPG_collapse_mode", dest="EPG_collapse_mode", default="PointNumber", help= "the mode used to collapse branches. PointNumber,PointNumber_Extrema, PointNumber_Leaves,EdgesNumber or EdgesLength" ) parser.add_argument( "--EPG_collapse_par", dest="EPG_collapse_par", type=float, default=5, help= "positive numeric, the cotrol paramter used for collapsing small branches" ) parser.add_argument("--disable_EPG_optimize", dest="flag_disable_EPG_optimize", action="store_true", help="disable optimizing branching") parser.add_argument("--EPG_shift", dest="flag_EPG_shift", action="store_true", help="shift branching point ") parser.add_argument( "--EPG_shift_mode", dest="EPG_shift_mode", default='NodeDensity', help= "the mode to use to shift the branching points NodePoints or NodeDensity" ) parser.add_argument( "--EPG_shift_DR", dest="EPG_shift_DR", type=float, default=0.05, help= "positive numeric, the radius to be used when computing point density if EPG_shift_mode is NodeDensity" ) parser.add_argument( "--EPG_shift_maxshift", dest="EPG_shift_maxshift", type=int, default=5, help= "positive integer, the maxium distance (as number of edges) to consider when exploring the branching point neighborhood" ) parser.add_argument("--disable_EPG_ext", dest="flag_disable_EPG_ext", action="store_true", help="disable extending leaves with additional nodes") parser.add_argument( "--EPG_ext_mode", dest="EPG_ext_mode", default='QuantDists', help= " the mode used to extend the graph,QuantDists, QuantCentroid or WeigthedCentroid" ) parser.add_argument( "--EPG_ext_par", dest="EPG_ext_par", type=float, default=0.5, help= "the control parameter used for contribution of the different data points when extending leaves with nodes" ) parser.add_argument("--DE_zscore_cutoff", dest="DE_zscore_cutoff", default=2, help="Differentially Expressed Genes z-score cutoff") parser.add_argument( "--DE_logfc_cutoff", dest="DE_logfc_cutoff", default=0.25, help="Differentially Expressed Genes log fold change cutoff") parser.add_argument("--TG_spearman_cutoff", dest="TG_spearman_cutoff", default=0.4, help="Transition Genes Spearman correlation cutoff") parser.add_argument("--TG_logfc_cutoff", dest="TG_logfc_cutoff", default=0.25, help="Transition Genes log fold change cutoff") parser.add_argument("--LG_zscore_cutoff", dest="LG_zscore_cutoff", default=1.5, help="Leaf Genes z-score cutoff") parser.add_argument("--LG_pvalue_cutoff", dest="LG_pvalue_cutoff", default=1e-2, help="Leaf Genes p value cutoff") parser.add_argument( "--umap", dest="flag_umap", action="store_true", help="whether to use UMAP for visualization (default: No)") parser.add_argument("-r", dest="root", default=None, help="root node for subwaymap_plot and stream_plot") parser.add_argument("--stream_log_view", dest="flag_stream_log_view", action="store_true", help="use log2 scale for y axis of stream_plot") parser.add_argument("-o", "--output_folder", dest="output_folder", default=None, help="Output folder") parser.add_argument("--for_web", dest="flag_web", action="store_true", help="Output files for website") parser.add_argument( "--n_genes", dest="n_genes", type=int, default=5, help= "Number of top genes selected from each output marker gene file for website gene visualization" ) args = parser.parse_args() if (args.input_filename is None) and (args.new_filename is None): parser.error("at least one of -m, --new required") new_filename = args.new_filename new_label_filename = args.new_label_filename new_label_color_filename = args.new_label_color_filename flag_stream_log_view = args.flag_stream_log_view flag_gene_TG_detection = args.flag_gene_TG_detection flag_gene_DE_detection = args.flag_gene_DE_detection flag_gene_LG_detection = args.flag_gene_LG_detection flag_web = args.flag_web flag_first_PC = args.flag_first_PC flag_umap = args.flag_umap genes = args.genes DE_zscore_cutoff = args.DE_zscore_cutoff DE_logfc_cutoff = args.DE_logfc_cutoff TG_spearman_cutoff = args.TG_spearman_cutoff TG_logfc_cutoff = args.TG_logfc_cutoff LG_zscore_cutoff = args.LG_zscore_cutoff LG_pvalue_cutoff = args.LG_pvalue_cutoff root = args.root input_filename = args.input_filename cell_label_filename = args.cell_label_filename cell_label_color_filename = args.cell_label_color_filename s_method = args.s_method use_precomputed = args.use_precomputed n_jobs = args.n_jobs loess_frac = args.loess_frac loess_cutoff = args.loess_cutoff pca_n_PC = args.pca_n_PC flag_log2 = args.flag_log2 flag_norm = args.flag_norm flag_atac = args.flag_atac dr_method = args.dr_method nb_pct = args.nb_pct # neighbour percent n_neighbors = args.n_neighbors n_components = args.n_components #number of components to keep clustering = args.clustering damping = args.damping n_clusters = args.n_clusters EPG_n_nodes = args.EPG_n_nodes EPG_lambda = args.EPG_lambda EPG_mu = args.EPG_mu EPG_trimmingradius = args.EPG_trimmingradius EPG_alpha = args.EPG_alpha flag_EPG_collapse = args.flag_EPG_collapse EPG_collapse_mode = args.EPG_collapse_mode EPG_collapse_par = args.EPG_collapse_par flag_EPG_shift = args.flag_EPG_shift EPG_shift_mode = args.EPG_shift_mode EPG_shift_DR = args.EPG_shift_DR EPG_shift_maxshift = args.EPG_shift_maxshift flag_disable_EPG_optimize = args.flag_disable_EPG_optimize flag_disable_EPG_ext = args.flag_disable_EPG_ext EPG_ext_mode = args.EPG_ext_mode EPG_ext_par = args.EPG_ext_par output_folder = args.output_folder #work directory n_genes = args.n_genes if (flag_web): flag_savefig = False else: flag_savefig = True gene_list = [] if (genes != None): if (os.path.exists(genes)): gene_list = pd.read_csv(genes, sep='\t', header=None, index_col=None, compression='gzip' if genes.split('.')[-1] == 'gz' else None).iloc[:, 0].tolist() gene_list = list(set(gene_list)) else: gene_list = genes.split(',') print('Genes to visualize: ') print(gene_list) if (new_filename is None): if (output_folder == None): workdir = os.path.join(os.getcwd(), 'stream_result') else: workdir = output_folder if (use_precomputed): print('Importing the precomputed pkl file...') adata = st.read(file_name='stream_result.pkl', file_format='pkl', file_path=workdir, workdir=workdir) else: if (flag_atac): print('Reading in atac zscore matrix...') adata = st.read(file_name=input_filename, workdir=workdir, experiment='atac-seq') else: adata = st.read(file_name=input_filename, workdir=workdir) print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') adata.var_names_make_unique() adata.obs_names_make_unique() if (cell_label_filename != None): st.add_cell_labels(adata, file_name=cell_label_filename) else: st.add_cell_labels(adata) if (cell_label_color_filename != None): st.add_cell_colors(adata, file_name=cell_label_color_filename) else: st.add_cell_colors(adata) if (flag_atac): print('Selecting top principal components...') st.select_top_principal_components(adata, n_pc=pca_n_PC, first_pc=flag_first_PC, save_fig=True) st.dimension_reduction(adata, method=dr_method, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='top_pcs') else: if (flag_norm): st.normalize_per_cell(adata) if (flag_log2): st.log_transform(adata) if (s_method != 'all'): print('Filtering genes...') st.filter_genes(adata, min_num_cells=5) print('Removing mitochondrial genes...') st.remove_mt_genes(adata) if (s_method == 'LOESS'): print('Selecting most variable genes...') st.select_variable_genes(adata, loess_frac=loess_frac, percentile=loess_cutoff, save_fig=True) pd.DataFrame(adata.uns['var_genes']).to_csv( os.path.join(workdir, 'selected_variable_genes.tsv'), sep='\t', index=None, header=False) st.dimension_reduction(adata, method=dr_method, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='var_genes') if (s_method == 'PCA'): print('Selecting top principal components...') st.select_top_principal_components( adata, n_pc=pca_n_PC, first_pc=flag_first_PC, save_fig=True) st.dimension_reduction(adata, method=dr_method, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='top_pcs') else: print('Keep all the genes...') st.dimension_reduction(adata, n_components=n_components, n_neighbors=n_neighbors, nb_pct=nb_pct, n_jobs=n_jobs, feature='all') st.plot_dimension_reduction(adata, save_fig=flag_savefig) st.seed_elastic_principal_graph(adata, clustering=clustering, damping=damping, n_clusters=n_clusters) st.plot_branches( adata, save_fig=flag_savefig, fig_name='seed_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='seed_elastic_principal_graph.pdf') st.elastic_principal_graph(adata, epg_n_nodes=EPG_n_nodes, epg_lambda=EPG_lambda, epg_mu=EPG_mu, epg_trimmingradius=EPG_trimmingradius, epg_alpha=EPG_alpha) st.plot_branches(adata, save_fig=flag_savefig, fig_name='elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells(adata, save_fig=flag_savefig, fig_name='elastic_principal_graph.pdf') if (not flag_disable_EPG_optimize): st.optimize_branching(adata, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='optimizing_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='optimizing_elastic_principal_graph.pdf') if (flag_EPG_shift): st.shift_branching(adata, epg_shift_mode=EPG_shift_mode, epg_shift_radius=EPG_shift_DR, epg_shift_max=EPG_shift_maxshift, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='shifting_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='shifting_elastic_principal_graph.pdf') if (flag_EPG_collapse): st.prune_elastic_principal_graph( adata, epg_collapse_mode=EPG_collapse_mode, epg_collapse_par=EPG_collapse_par, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='pruning_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='pruning_elastic_principal_graph.pdf') if (not flag_disable_EPG_ext): st.extend_elastic_principal_graph( adata, epg_ext_mode=EPG_ext_mode, epg_ext_par=EPG_ext_par, epg_trimmingradius=EPG_trimmingradius) st.plot_branches( adata, save_fig=flag_savefig, fig_name='extending_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='extending_elastic_principal_graph.pdf') st.plot_branches( adata, save_fig=flag_savefig, fig_name='finalized_elastic_principal_graph_skeleton.pdf') st.plot_branches_with_cells( adata, save_fig=flag_savefig, fig_name='finalized_elastic_principal_graph.pdf') st.plot_flat_tree(adata, save_fig=flag_savefig) if (flag_umap): print('UMAP visualization based on top MLLE components...') st.plot_visualization_2D(adata, save_fig=flag_savefig, fig_name='umap_cells') st.plot_visualization_2D(adata, color_by='branch', save_fig=flag_savefig, fig_name='umap_branches') if (root is None): print('Visualization of subwaymap and stream plots...') flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] for ns in list_node_start: if (flag_web): st.subwaymap_plot(adata, percentile_dist=100, root=ns, save_fig=flag_savefig) st.stream_plot(adata, root=ns, fig_size=(8, 8), save_fig=True, flag_log_view=flag_stream_log_view, fig_legend=False, fig_name='stream_plot.png') else: st.subwaymap_plot(adata, percentile_dist=100, root=ns, save_fig=flag_savefig) st.stream_plot(adata, root=ns, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: st.subwaymap_plot(adata, percentile_dist=100, root=root, save_fig=flag_savefig) st.stream_plot(adata, root=root, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) output_cell_info(adata) if (flag_web): output_for_website(adata) st.write(adata) if (flag_gene_TG_detection): print('Identifying transition genes...') st.detect_transistion_genes(adata, cutoff_spearman=TG_spearman_cutoff, cutoff_logfc=TG_logfc_cutoff, n_jobs=n_jobs) if (flag_web): ## Plot top5 genes flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] gene_list = [] for x in adata.uns['transition_genes'].keys(): gene_list = gene_list + adata.uns['transition_genes'][ x].index[:n_genes].tolist() gene_list = np.unique(gene_list) for ns in list_node_start: output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.plot_transition_genes(adata, save_fig=flag_savefig) if (flag_gene_DE_detection): print('Identifying differentially expressed genes...') st.detect_de_genes(adata, cutoff_zscore=DE_logfc_cutoff, cutoff_logfc=DE_logfc_cutoff, n_jobs=n_jobs) if (flag_web): flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] gene_list = [] for x in adata.uns['de_genes_greater'].keys(): gene_list = gene_list + adata.uns['de_genes_greater'][ x].index[:n_genes].tolist() for x in adata.uns['de_genes_less'].keys(): gene_list = gene_list + adata.uns['de_genes_less'][ x].index[:n_genes].tolist() gene_list = np.unique(gene_list) for ns in list_node_start: output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.plot_de_genes(adata, save_fig=flag_savefig) if (flag_gene_LG_detection): print('Identifying leaf genes...') st.detect_leaf_genes(adata, cutoff_zscore=LG_zscore_cutoff, cutoff_pvalue=LG_pvalue_cutoff, n_jobs=n_jobs) if (flag_web): ## Plot top5 genes flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] gene_list = [] for x in adata.uns['leaf_genes'].keys(): gene_list = gene_list + adata.uns['leaf_genes'][ x].index[:n_genes].tolist() gene_list = np.unique(gene_list) for ns in list_node_start: output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') if ((genes != None) and (len(gene_list) > 0)): print('Visualizing genes...') flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] if (root is None): for ns in list_node_start: if (flag_web): output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.subwaymap_plot_gene(adata, percentile_dist=100, root=ns, genes=gene_list, save_fig=flag_savefig) st.stream_plot_gene(adata, root=ns, fig_size=(8, 8), genes=gene_list, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: if (flag_web): output_for_website_subwaymap_gene(adata, gene_list) st.stream_plot_gene(adata, root=root, fig_size=(8, 8), genes=gene_list, save_fig=True, flag_log_view=flag_stream_log_view, fig_format='png') else: st.subwaymap_plot_gene(adata, percentile_dist=100, root=root, genes=gene_list, save_fig=flag_savefig) st.stream_plot_gene(adata, root=root, fig_size=(8, 8), genes=gene_list, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: print('Starting mapping procedure...') if (output_folder == None): workdir_ref = os.path.join(os.getcwd(), 'stream_result') else: workdir_ref = output_folder adata = st.read(file_name='stream_result.pkl', file_format='pkl', file_path=workdir_ref, workdir=workdir_ref) workdir = os.path.join(workdir_ref, os.pardir, 'mapping_result') adata_new = st.read(file_name=new_filename, workdir=workdir) st.add_cell_labels(adata_new, file_name=new_label_filename) st.add_cell_colors(adata_new, file_name=new_label_color_filename) if (s_method == 'LOESS'): st.map_new_data(adata, adata_new, feature='var_genes') if (s_method == 'all'): st.map_new_data(adata, adata_new, feature='all') if (flag_umap): st.plot_visualization_2D(adata, adata_new=adata_new, use_precomputed=False, save_fig=flag_savefig, fig_name='umap_new_cells') st.plot_visualization_2D(adata, adata_new=adata_new, show_all_colors=True, save_fig=flag_savefig, fig_name='umap_all_cells') st.plot_visualization_2D(adata, adata_new=adata_new, color_by='branch', save_fig=flag_savefig, fig_name='umap_branches') if (root is None): flat_tree = adata.uns['flat_tree'] list_node_start = [ value for key, value in nx.get_node_attributes( flat_tree, 'label').items() ] for ns in list_node_start: st.subwaymap_plot(adata, adata_new=adata_new, percentile_dist=100, show_all_cells=False, root=ns, save_fig=flag_savefig) st.stream_plot(adata, adata_new=adata_new, show_all_colors=False, root=ns, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: st.subwaymap_plot(adata, adata_new=adata_new, percentile_dist=100, show_all_cells=False, root=root, save_fig=flag_savefig) st.stream_plot(adata, adata_new=adata_new, show_all_colors=False, root=root, fig_size=(8, 8), save_fig=flag_savefig, flag_log_view=flag_stream_log_view) if ((genes != None) and (len(gene_list) > 0)): if (root is None): for ns in list_node_start: st.subwaymap_plot_gene(adata, adata_new=adata_new, percentile_dist=100, root=ns, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) else: st.subwaymap_plot_gene(adata, adata_new=adata_new, percentile_dist=100, root=root, save_fig=flag_savefig, flag_log_view=flag_stream_log_view) st.write(adata_new, file_name='stream_mapping_result.pkl') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--matrix", dest="input_filename", default=None, help="input file name", metavar="FILE") parser.add_argument("-l", "--cell_labels", dest="cell_label_filename", default=None, help="filename of cell labels") parser.add_argument("-c", "--cell_labels_colors", dest="cell_label_color_filename", default=None, help="filename of cell label colors") parser.add_argument("--log2", dest="flag_log2", action="store_true", help="perform log2 transformation") parser.add_argument("--norm", dest="flag_norm", action="store_true", help="normalize data based on library size") parser.add_argument("-o", "--output_folder", dest="output_folder", default=None, help="Output folder") parser.add_argument("-rmt", "--remove_mt_genes", dest="flag_remove_mt_genes", action="store_true", default=False, help="Remove Mitochondrial genes") parser.add_argument("-mcg", "--min_count_genes", dest="min_count_genes", type=int, default=None, help="filter cells with less than this many genes") parser.add_argument("-mpg", "--min_percent_genes", dest="min_percent_genes", type=float, default=None, help="The minimum percent genes") parser.add_argument("-mpc", "--min_percent_cells", dest="min_percent_cells", type=float, default=None, help="The minimum percent cells") parser.add_argument("-mcc", "--min_count_cells", dest="min_count_cells", type=int, default=None, help="The minimum count cells") parser.add_argument("-mnc", "--min_num_cells", dest="min_num_cells", type=int, default=None, help="The minimum number of cells") parser.add_argument("-ec", "--expression_cutoff", dest="expression_cutoff", type=float, default=None, help="The expression cutoff") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamOutput", help="output file name prefix") args = parser.parse_args() print(args) input_filename = args.input_filename cell_label_filename = args.cell_label_filename cell_label_color_filename = args.cell_label_color_filename flag_norm = args.flag_norm flag_log2 = args.flag_log2 output_folder = args.output_folder #work directory flag_remove_mt_genes = args.flag_remove_mt_genes min_count_genes = args.min_count_genes min_percent_cells = args.min_percent_cells min_percent_genes = args.min_percent_genes min_count_cells = args.min_count_cells min_num_cells = args.min_num_cells expression_cutoff = args.expression_cutoff output_filename_prefix = args.output_filename_prefix print('Starting mapping procedure...') if (output_folder == None): workdir_ref = os.path.join(os.getcwd(), 'stream_result') else: workdir_ref = output_folder workdir = "./" if (input_filename.endswith('pkl')): adata = st.read(file_name=input_filename, file_format='pkl', workdir=workdir) else: adata = st.read(file_name=input_filename, workdir=workdir) print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') adata.var_names_make_unique() adata.obs_names_make_unique() if (cell_label_filename != None): st.add_cell_labels(adata, file_name=cell_label_filename) else: st.add_cell_labels(adata) if (cell_label_color_filename != None): st.add_cell_colors(adata, file_name=cell_label_color_filename) else: st.add_cell_colors(adata) if (flag_norm): st.normalize_per_cell(adata) if (flag_log2): st.log_transform(adata, base=2) if (flag_remove_mt_genes): st.remove_mt_genes(adata) st.filter_cells(adata, min_pct_genes=min_percent_genes, min_count=min_count_genes, expr_cutoff=expression_cutoff) st.filter_genes(adata, min_num_cells=min_num_cells, min_pct_cells=min_percent_cells, min_count=min_count_genes, expr_cutoff=expression_cutoff) print("Writing " + output_filename_prefix + " " + args.output_filename_prefix + "_stream_result.pkl") st.write(adata, file_name=(output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("--flag_useprecomputed", dest="flag_useprecomputed", action="store_true", help="use precomputed features for PCA") parser.add_argument("--flag_firstpc", dest="flag_firstpc", action="store_true", help="Use the first principal component") parser.add_argument("--flag_pca", dest="flag_pca", action="store_true", help="perform PCA") parser.add_argument("--flag_variable", dest="flag_variable", action="store_true", help="find variable genes") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-lf", "--loess_fraction", dest="loess_fraction", type=float, default=None, help="loess fraction") parser.add_argument("-per", dest="percentile", type=int, default=None, help="percent of variable genes to find") parser.add_argument("-n_g", dest="num_genes", type=int, default=None, help="num genes") parser.add_argument("-n_j", dest="num_jobs", type=int, default=None, help="num jobs") parser.add_argument("-feat", dest="feature", default=None, help="feature") parser.add_argument("-n_pc", dest="num_principal_components", type=int, default=None, help="num principal components") parser.add_argument("-max_pc", dest="max_principal_components", type=int, default=None, help="max principal components") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("--flag", dest="flag", action="store_true", help="debugging flag") args = parser.parse_args() print('Starting feature selection procedure...') print(args) workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') #print('N_genes is ' + str(args.num_genes)) if (args.flag_variable): st.select_variable_genes(adata, loess_frac=args.loess_fraction, percentile=args.percentile, n_genes=args.num_genes, n_jobs=args.num_jobs, save_fig=True, fig_name=(args.output_filename_prefix + '_variable_genes.png'), fig_size=(args.fig_width, args.fig_height), fig_path="./") if (args.flag_pca): st.select_top_principal_components( adata, feature=args.feature, n_pc=args.num_principal_components, max_pc=args.max_principal_components, first_pc=args.flag_firstpc, use_precomputed=args.flag_useprecomputed, save_fig=True, fig_name=(args.output_filename_prefix + '_pca.png'), fig_size=(args.fig_width, args.fig_height), fig_path='./') st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-nb_pct", "--percent_neighbor_cells", dest="nb_pct", type=float, default=None, help="") parser.add_argument("-n_comp_k", dest="n_comp_k", type=int, default=None, help="") parser.add_argument("-feat", dest="feature", default=None, help="feature") parser.add_argument("-method", dest="method", default=None, help="") parser.add_argument("-nc_plot", dest="nc_plot", type=int, default=None, help="") parser.add_argument("-comp1", dest="comp1", default=None, help="feature") parser.add_argument("-comp2", dest="comp2", type=int, default=None, help="") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("-n_jobs", dest="n_jobs", type=int, default=2, help="") parser.add_argument("-fig_legend_ncol", dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() print(args) print('Starting dimension reduction procedure...') workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) print("Feature ", args.feature, type(args.feature)) st.dimension_reduction(adata, method=args.method, feature='var_genes', nb_pct=args.nb_pct, n_components=args.n_comp_k, n_jobs=args.n_jobs, eigen_solver=None) fig_size = (args.fig_width, args.fig_height) st.plot_dimension_reduction(adata, n_components=args.nc_plot, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix + '_stddev_dotplot.png'), fig_path="./", fig_size=fig_size, fig_legend_ncol=args.fig_legend_ncol) st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' + str(adata.var.shape[0]) + ' genes') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol", dest="fig_legend_ncol", type=int, default=None, help="") parser.add_argument("-root", dest="root", default=None, help="") parser.add_argument("-preference", dest="preference", help="") parser.add_argument("-subway_factor", dest="subway_factor", type=float, default=2.0, help="") parser.add_argument("-color_by", dest="color_by", default='label', help="") parser.add_argument("-factor_num_win", dest="factor_num_win", type=int, default=10, help="") parser.add_argument("-factor_min_win", dest="factor_min_win", type=float, default=2.0, help="") parser.add_argument("-factor_width", dest="factor_width", type=float, default=2.5, help="") parser.add_argument("-flag_log_view", dest="flag_log_view", action="store_true", help="") parser.add_argument("-factor_zoomin", dest="factor_zoomin", type=float, default=100.0, help="") parser.add_argument("-flag_cells", dest="flag_cells", action="store_true", help="") parser.add_argument("-flag_genes", dest="flag_genes", action="store_true", help="") parser.add_argument("-genes", dest="genes", default=None, help="") parser.add_argument("-percentile_dist", dest="percentile_dist", type=float, default=100, help="") args = parser.parse_args() workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) preference = args.preference.split(',') if (args.flag_cells != None): st.plot_flat_tree(adata, save_fig=True, fig_path="./", fig_name=(args.output_filename_prefix + '_flat_tree.png'), fig_size=(args.fig_width, args.fig_height), fig_legend_ncol=args.fig_legend_ncol) st.subwaymap_plot(adata, root=args.root, percentile_dist=args.percentile_dist, preference=preference, factor=args.subway_factor, color_by=args.color_by, save_fig=True, fig_path="./", fig_name=(args.output_filename_prefix + '_cell_subway_map.png'), fig_size=(args.fig_width, args.fig_height), fig_legend_ncol=args.fig_legend_ncol) st.stream_plot(adata, root=args.root, preference=preference, factor_num_win=args.factor_num_win, factor_min_win=args.factor_min_win, factor_width=args.factor_width, flag_log_view=args.flag_log_view, factor_zoomin=args.factor_zoomin, save_fig=True, fig_path="./", fig_name=(args.output_filename_prefix + '_cell_stream_plot.png'), fig_size=(args.fig_width, args.fig_height), fig_legend=True, fig_legend_ncol=args.fig_legend_ncol, tick_fontsize=20, label_fontsize=25) if (args.flag_genes != None): genes = args.genes.split(',') st.subwaymap_plot_gene(adata, root=args.root, genes=genes, preference=preference, percentile_dist=args.percentile_dist, factor=args.subway_factor, save_fig=True, fig_path="./", fig_format='png', fig_size=(args.fig_width, args.fig_height)) # , fig_name=(args.output_filename_prefix + '_gene_subway_plot.png')) st.stream_plot_gene(adata, root=args.root, genes=genes, preference=preference, factor_min_win=args.factor_min_win, factor_num_win=args.factor_num_win, factor_width=args.factor_width, save_fig=True, fig_path="./", fig_format='png', fig_size=(args.fig_width, args.fig_height), tick_fontsize=20, label_fontsize=25) # , fig_name=(args.output_filename_prefix + '_gene_stream_plot.png')) st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Finished computation.')
def main(): sns.set_style('white') sns.set_context('poster') parser = argparse.ArgumentParser( description='%s Parameters' % __tool_name__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-m", "--data-file", dest="input_filename", default=None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE") parser.add_argument("-of", "--of", dest="output_filename_prefix", default="StreamiFSOutput", help="output file name prefix") parser.add_argument("--flag_useprecomputed", dest="flag_useprecomputed", action="store_true", help="Save the figure") parser.add_argument("-nb_pct", "--percent_neighbor_cells", dest="nb_pct", type=float, default=None, help="") parser.add_argument("-n_comp_k", dest="n_comp_k", type=int, default=None, help="") parser.add_argument("-perplexity", dest="perplexity", type=float, default=None, help="") parser.add_argument("-method", dest="method", default=None, help="") parser.add_argument("-color_by", dest="color_by", default='label', help="") parser.add_argument("-fig_width", dest="fig_width", type=int, default=8, help="") parser.add_argument("-fig_height", dest="fig_height", type=int, default=8, help="") parser.add_argument("-fig_legend_ncol", dest="fig_legend_ncol", type=int, default=None, help="") args = parser.parse_args() print('Starting ...') workdir = "./" adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir) st.plot_visualization_2D(adata, method=args.method, nb_pct=args.nb_pct, perplexity=args.perplexity, color_by=args.color_by, use_precomputed=args.flag_useprecomputed, save_fig=True, fig_path='./', fig_name=(args.output_filename_prefix + "_2D_plot.png"), fig_size=(args.fig_width, args.fig_height), fig_legend_ncol=args.fig_legend_ncol) st.write(adata, file_name=(args.output_filename_prefix + '_stream_result.pkl'), file_path='./', file_format='pkl') print('Finished computation.')