Exemplo n.º 1
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE")
    parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput",  help="output file name prefix")
    parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="")
    parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="")
    parser.add_argument("-cutoff_zscore",dest="cutoff_zscore", type=float, default=1.5, help="")
    parser.add_argument("-percentile_expr",dest="percentile_expr", type=int, default=95, help="")
    parser.add_argument("-flag_use_precomputed",dest="flag_use_precomputed", action="store_true", help="")
    parser.add_argument("-root",dest="root", default=None, help="")
    parser.add_argument("-preference",dest="preference", default=None, help="")
    parser.add_argument("-cutoff_logfc",dest="cutoff_logfc", type=float, default=0.25, help="")
    parser.add_argument("-num_genes",dest="num_genes", type=int, default=15, help="")
    parser.add_argument("-n_jobs",dest="n_jobs", type=int, default=8, help="")

    args = parser.parse_args()
    
    workdir = "./"

    adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir)
    preference = args.preference.split(',')
   
    st.detect_de_genes(adata,cutoff_zscore=args.cutoff_zscore,cutoff_logfc=args.cutoff_logfc,percentile_expr=args.percentile_expr,n_jobs=args.n_jobs,
                   use_precomputed=args.flag_use_precomputed, root=args.root, preference=preference)
    st.plot_de_genes(adata, num_genes=args.num_genes,cutoff_zscore=args.cutoff_zscore, cutoff_logfc=args.cutoff_logfc, save_fig=True,fig_path=None,fig_size=(args.fig_width,args.fig_height))

    st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') 

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE")
    parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput",  help="output file name prefix")


    parser.add_argument("-epg_n_nodes",dest="epg_n_nodes", type=int, default=50, help="")
    parser.add_argument("-incr_n_nodes",dest="incr_n_nodes", type=int, default=30, help="")
    parser.add_argument("-epg_trimmingradius",dest="epg_trimmingradius",  default='Inf', help="")
    parser.add_argument("-epg_alpha",dest="epg_alpha", type=float, default=0.02, help="")
    parser.add_argument("-epg_beta",dest="epg_beta", type=float, default=0.0, help="")
    parser.add_argument("-epg_n_processes",dest="epg_n_processes", type=int, default=1, help="")
    parser.add_argument("-epg_lambda",dest="epg_lambda", type=float, default=0.02, help="")
    parser.add_argument("-epg_mu",dest="epg_mu", type=float, default=0.1, help="")
    parser.add_argument("-epg_finalenergy",dest="epg_finalenergy",  default='Penalized', help="")
   
 
    parser.add_argument("-comp1",dest="comp1", type = int, default=0,  help="")   
    parser.add_argument("-comp2",dest="comp2", type = int, default=1,  help="")      
    parser.add_argument("-n_comp",dest="n_comp", type = int, default=3,  help="")      
    parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="")        
    parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="")
    parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="")                                   


    args = parser.parse_args()
    
    workdir = "./"

    adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir)

    st.elastic_principal_graph(adata,epg_n_nodes=args.epg_n_nodes,incr_n_nodes=args.incr_n_nodes,epg_trimmingradius=args.epg_trimmingradius,epg_alpha=args.epg_alpha, epg_n_processes=args.epg_n_processes, epg_lambda=args.epg_lambda,epg_mu=args.epg_mu, epg_beta=args.epg_beta, epg_finalenergy=args.epg_finalenergy)
    
    st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height))
    st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') 

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(description='%s Parameters' % __tool_name__ ,formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--data-file", dest="input_filename",default = None, help="input file name, pkl format from Stream preprocessing module", metavar="FILE")
    parser.add_argument("-of","--of",dest="output_filename_prefix", default="StreamiFSOutput",  help="output file name prefix")

    parser.add_argument("-nb_pct","--percent_neighbor_cells",dest="nb_pct", type=float, default=0.1, help="")
    parser.add_argument("-n_clusters",dest="n_clusters", type = int, default=10,  help="")
    parser.add_argument("-damping",dest="damping", type=float, default=0.75,   help="")
    parser.add_argument("-pref_perc",dest="pref_perc", type=int, default=50,   help="")
    parser.add_argument("-max_n_clusters",dest="max_n_clusters", type=int, default=200,   help="")

    parser.add_argument("-clustering",dest="clustering",  default='kmeans',  help="")
    
    parser.add_argument("-comp1",dest="comp1", type = int, default=0,  help="")   
    parser.add_argument("-comp2",dest="comp2", type = int, default=1,  help="")      
    parser.add_argument("-n_comp",dest="n_comp", type = int, default=3,  help="")      


    parser.add_argument("-fig_width",dest="fig_width", type=int, default=8, help="")        
    parser.add_argument("-fig_height",dest="fig_height", type=int, default=8, help="")
    parser.add_argument("-fig_legend_ncol",dest="fig_legend_ncol", type=int, default=None, help="")                                   


    args = parser.parse_args()
    
    print('Starting validation procedure...')
    workdir = "./"

    adata = st.read(file_name=args.input_filename, file_format='pkl', experiment='rna-seq', workdir=workdir)

    st.seed_elastic_principal_graph(adata, clustering=args.clustering, n_clusters=args.n_clusters, damping=args.damping, pref_perc=args.pref_perc,  max_n_clusters=args.max_n_clusters, nb_pct=args.nb_pct)
    st.plot_branches(adata, n_components=args.n_comp, comp1=args.comp1, comp2=args.comp2, save_fig=True, fig_name=(args.output_filename_prefix +'_branches.png'), fig_path=None,fig_size=(args.fig_width, args.fig_height))
    st.plot_branches_with_cells(adata,n_components=args.n_comp,comp1=args.comp1,comp2=args.comp2, save_fig=True,fig_name=(args.output_filename_prefix +'_branches_with_cells.png'),fig_path=None,fig_size=(args.fig_width, args.fig_height),fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,file_name=(args.output_filename_prefix + '_stream_result.pkl'),file_path='./',file_format='pkl') 

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m",
                        "--matrix",
                        dest="input_filename",
                        default=None,
                        help="input file name",
                        metavar="FILE")
    parser.add_argument("-l",
                        "--cell_labels",
                        dest="cell_label_filename",
                        default=None,
                        help="filename of cell labels")
    parser.add_argument("-c",
                        "--cell_labels_colors",
                        dest="cell_label_color_filename",
                        default=None,
                        help="filename of cell label colors")
    parser.add_argument(
        "-s",
        "--select_features",
        dest="s_method",
        default='LOESS',
        help=
        "LOESS,PCA or all: Select variable genes using LOESS or principal components using PCA or all the genes are kept"
    )
    parser.add_argument("--TG",
                        "--detect_TG_genes",
                        dest="flag_gene_TG_detection",
                        action="store_true",
                        help="detect transition genes automatically")
    parser.add_argument("--DE",
                        "--detect_DE_genes",
                        dest="flag_gene_DE_detection",
                        action="store_true",
                        help="detect DE genes automatically")
    parser.add_argument("--LG",
                        "--detect_LG_genes",
                        dest="flag_gene_LG_detection",
                        action="store_true",
                        help="detect leaf genes automatically")
    parser.add_argument(
        "-g",
        "--genes",
        dest="genes",
        default=None,
        help=
        "genes to visualize, it can either be filename which contains all the genes in one column or a set of gene names separated by comma"
    )
    parser.add_argument(
        "-p",
        "--use_precomputed",
        dest="use_precomputed",
        action="store_true",
        help=
        "use precomputed data files without re-computing structure learning part"
    )
    parser.add_argument("--new",
                        dest="new_filename",
                        default=None,
                        help="file name of data to be mapped")
    parser.add_argument("--new_l",
                        dest="new_label_filename",
                        default=None,
                        help="filename of new cell labels")
    parser.add_argument("--new_c",
                        dest="new_label_color_filename",
                        default=None,
                        help="filename of new cell label colors")
    parser.add_argument("--log2",
                        dest="flag_log2",
                        action="store_true",
                        help="perform log2 transformation")
    parser.add_argument("--norm",
                        dest="flag_norm",
                        action="store_true",
                        help="normalize data based on library size")
    parser.add_argument("--atac",
                        dest="flag_atac",
                        action="store_true",
                        help="indicate scATAC-seq data")
    parser.add_argument(
        "--n_jobs",
        dest="n_jobs",
        type=int,
        default=1,
        help="Specify the number of processes to use. (default, 1")
    parser.add_argument(
        "--loess_frac",
        dest="loess_frac",
        type=float,
        default=0.1,
        help="The fraction of the data used in LOESS regression")
    parser.add_argument(
        "--loess_cutoff",
        dest="loess_cutoff",
        type=int,
        default=95,
        help=
        "the percentile used in variable gene selection based on LOESS regression"
    )
    parser.add_argument("--pca_first_PC",
                        dest="flag_first_PC",
                        action="store_true",
                        help="keep first PC")
    parser.add_argument("--pca_n_PC",
                        dest="pca_n_PC",
                        type=int,
                        default=15,
                        help="The number of selected PCs,it's 15 by default")
    parser.add_argument(
        "--dr_method",
        dest="dr_method",
        default='se',
        help=
        "Method used for dimension reduction. Choose from {{'se','mlle','umap','pca'}}"
    )
    parser.add_argument("--n_neighbors",
                        dest="n_neighbors",
                        type=float,
                        default=50,
                        help="The number of neighbor cells")
    parser.add_argument(
        "--nb_pct",
        dest="nb_pct",
        type=float,
        default=None,
        help=
        "The percentage of neighbor cells (when sepcified, it will overwrite n_neighbors)."
    )
    parser.add_argument("--n_components",
                        dest="n_components",
                        type=int,
                        default=3,
                        help="Number of components to keep.")
    parser.add_argument(
        "--clustering",
        dest="clustering",
        default='kmeans',
        help=
        "Clustering method used for seeding the intial structure, choose from 'ap','kmeans','sc'"
    )
    parser.add_argument("--damping",
                        dest="damping",
                        type=float,
                        default=0.75,
                        help="Affinity Propagation: damping factor")
    parser.add_argument(
        "--n_clusters",
        dest="n_clusters",
        type=int,
        default=10,
        help="Number of clusters for spectral clustering or kmeans")
    parser.add_argument("--EPG_n_nodes",
                        dest="EPG_n_nodes",
                        type=int,
                        default=50,
                        help=" Number of nodes for elastic principal graph")
    parser.add_argument(
        "--EPG_lambda",
        dest="EPG_lambda",
        type=float,
        default=0.02,
        help="lambda parameter used to compute the elastic energy")
    parser.add_argument("--EPG_mu",
                        dest="EPG_mu",
                        type=float,
                        default=0.1,
                        help="mu parameter used to compute the elastic energy")
    parser.add_argument(
        "--EPG_trimmingradius",
        dest="EPG_trimmingradius",
        type=float,
        default=np.inf,
        help="maximal distance of point from a node to affect its embedment")
    parser.add_argument(
        "--EPG_alpha",
        dest="EPG_alpha",
        type=float,
        default=0.02,
        help=
        "positive numeric, the value of the alpha parameter of the penalized elastic energy"
    )
    parser.add_argument("--EPG_collapse",
                        dest="flag_EPG_collapse",
                        action="store_true",
                        help="collapsing small branches")
    parser.add_argument(
        "--EPG_collapse_mode",
        dest="EPG_collapse_mode",
        default="PointNumber",
        help=
        "the mode used to collapse branches. PointNumber,PointNumber_Extrema, PointNumber_Leaves,EdgesNumber or EdgesLength"
    )
    parser.add_argument(
        "--EPG_collapse_par",
        dest="EPG_collapse_par",
        type=float,
        default=5,
        help=
        "positive numeric, the cotrol paramter used for collapsing small branches"
    )
    parser.add_argument("--disable_EPG_optimize",
                        dest="flag_disable_EPG_optimize",
                        action="store_true",
                        help="disable optimizing branching")
    parser.add_argument("--EPG_shift",
                        dest="flag_EPG_shift",
                        action="store_true",
                        help="shift branching point ")
    parser.add_argument(
        "--EPG_shift_mode",
        dest="EPG_shift_mode",
        default='NodeDensity',
        help=
        "the mode to use to shift the branching points NodePoints or NodeDensity"
    )
    parser.add_argument(
        "--EPG_shift_DR",
        dest="EPG_shift_DR",
        type=float,
        default=0.05,
        help=
        "positive numeric, the radius to be used when computing point density if EPG_shift_mode is NodeDensity"
    )
    parser.add_argument(
        "--EPG_shift_maxshift",
        dest="EPG_shift_maxshift",
        type=int,
        default=5,
        help=
        "positive integer, the maxium distance (as number of edges) to consider when exploring the branching point neighborhood"
    )
    parser.add_argument("--disable_EPG_ext",
                        dest="flag_disable_EPG_ext",
                        action="store_true",
                        help="disable extending leaves with additional nodes")
    parser.add_argument(
        "--EPG_ext_mode",
        dest="EPG_ext_mode",
        default='QuantDists',
        help=
        " the mode used to extend the graph,QuantDists, QuantCentroid or WeigthedCentroid"
    )
    parser.add_argument(
        "--EPG_ext_par",
        dest="EPG_ext_par",
        type=float,
        default=0.5,
        help=
        "the control parameter used for contribution of the different data points when extending leaves with nodes"
    )
    parser.add_argument("--DE_zscore_cutoff",
                        dest="DE_zscore_cutoff",
                        default=2,
                        help="Differentially Expressed Genes z-score cutoff")
    parser.add_argument(
        "--DE_logfc_cutoff",
        dest="DE_logfc_cutoff",
        default=0.25,
        help="Differentially Expressed Genes log fold change cutoff")
    parser.add_argument("--TG_spearman_cutoff",
                        dest="TG_spearman_cutoff",
                        default=0.4,
                        help="Transition Genes Spearman correlation cutoff")
    parser.add_argument("--TG_logfc_cutoff",
                        dest="TG_logfc_cutoff",
                        default=0.25,
                        help="Transition Genes log fold change cutoff")
    parser.add_argument("--LG_zscore_cutoff",
                        dest="LG_zscore_cutoff",
                        default=1.5,
                        help="Leaf Genes z-score cutoff")
    parser.add_argument("--LG_pvalue_cutoff",
                        dest="LG_pvalue_cutoff",
                        default=1e-2,
                        help="Leaf Genes p value cutoff")
    parser.add_argument(
        "--umap",
        dest="flag_umap",
        action="store_true",
        help="whether to use UMAP for visualization (default: No)")
    parser.add_argument("-r",
                        dest="root",
                        default=None,
                        help="root node for subwaymap_plot and stream_plot")
    parser.add_argument("--stream_log_view",
                        dest="flag_stream_log_view",
                        action="store_true",
                        help="use log2 scale for y axis of stream_plot")
    parser.add_argument("-o",
                        "--output_folder",
                        dest="output_folder",
                        default=None,
                        help="Output folder")
    parser.add_argument("--for_web",
                        dest="flag_web",
                        action="store_true",
                        help="Output files for website")
    parser.add_argument(
        "--n_genes",
        dest="n_genes",
        type=int,
        default=5,
        help=
        "Number of top genes selected from each output marker gene file for website gene visualization"
    )

    args = parser.parse_args()
    if (args.input_filename is None) and (args.new_filename is None):
        parser.error("at least one of -m, --new required")

    new_filename = args.new_filename
    new_label_filename = args.new_label_filename
    new_label_color_filename = args.new_label_color_filename
    flag_stream_log_view = args.flag_stream_log_view
    flag_gene_TG_detection = args.flag_gene_TG_detection
    flag_gene_DE_detection = args.flag_gene_DE_detection
    flag_gene_LG_detection = args.flag_gene_LG_detection
    flag_web = args.flag_web
    flag_first_PC = args.flag_first_PC
    flag_umap = args.flag_umap
    genes = args.genes
    DE_zscore_cutoff = args.DE_zscore_cutoff
    DE_logfc_cutoff = args.DE_logfc_cutoff
    TG_spearman_cutoff = args.TG_spearman_cutoff
    TG_logfc_cutoff = args.TG_logfc_cutoff
    LG_zscore_cutoff = args.LG_zscore_cutoff
    LG_pvalue_cutoff = args.LG_pvalue_cutoff
    root = args.root

    input_filename = args.input_filename
    cell_label_filename = args.cell_label_filename
    cell_label_color_filename = args.cell_label_color_filename
    s_method = args.s_method
    use_precomputed = args.use_precomputed
    n_jobs = args.n_jobs
    loess_frac = args.loess_frac
    loess_cutoff = args.loess_cutoff
    pca_n_PC = args.pca_n_PC
    flag_log2 = args.flag_log2
    flag_norm = args.flag_norm
    flag_atac = args.flag_atac
    dr_method = args.dr_method
    nb_pct = args.nb_pct  # neighbour percent
    n_neighbors = args.n_neighbors
    n_components = args.n_components  #number of components to keep
    clustering = args.clustering
    damping = args.damping
    n_clusters = args.n_clusters
    EPG_n_nodes = args.EPG_n_nodes
    EPG_lambda = args.EPG_lambda
    EPG_mu = args.EPG_mu
    EPG_trimmingradius = args.EPG_trimmingradius
    EPG_alpha = args.EPG_alpha
    flag_EPG_collapse = args.flag_EPG_collapse
    EPG_collapse_mode = args.EPG_collapse_mode
    EPG_collapse_par = args.EPG_collapse_par
    flag_EPG_shift = args.flag_EPG_shift
    EPG_shift_mode = args.EPG_shift_mode
    EPG_shift_DR = args.EPG_shift_DR
    EPG_shift_maxshift = args.EPG_shift_maxshift
    flag_disable_EPG_optimize = args.flag_disable_EPG_optimize
    flag_disable_EPG_ext = args.flag_disable_EPG_ext
    EPG_ext_mode = args.EPG_ext_mode
    EPG_ext_par = args.EPG_ext_par
    output_folder = args.output_folder  #work directory
    n_genes = args.n_genes

    if (flag_web):
        flag_savefig = False
    else:
        flag_savefig = True
    gene_list = []
    if (genes != None):
        if (os.path.exists(genes)):
            gene_list = pd.read_csv(genes,
                                    sep='\t',
                                    header=None,
                                    index_col=None,
                                    compression='gzip' if genes.split('.')[-1]
                                    == 'gz' else None).iloc[:, 0].tolist()
            gene_list = list(set(gene_list))
        else:
            gene_list = genes.split(',')
        print('Genes to visualize: ')
        print(gene_list)
    if (new_filename is None):
        if (output_folder == None):
            workdir = os.path.join(os.getcwd(), 'stream_result')
        else:
            workdir = output_folder
        if (use_precomputed):
            print('Importing the precomputed pkl file...')
            adata = st.read(file_name='stream_result.pkl',
                            file_format='pkl',
                            file_path=workdir,
                            workdir=workdir)
        else:
            if (flag_atac):
                print('Reading in atac zscore matrix...')
                adata = st.read(file_name=input_filename,
                                workdir=workdir,
                                experiment='atac-seq')
            else:
                adata = st.read(file_name=input_filename, workdir=workdir)
                print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' +
                      str(adata.var.shape[0]) + ' genes')
            adata.var_names_make_unique()
            adata.obs_names_make_unique()
            if (cell_label_filename != None):
                st.add_cell_labels(adata, file_name=cell_label_filename)
            else:
                st.add_cell_labels(adata)
            if (cell_label_color_filename != None):
                st.add_cell_colors(adata, file_name=cell_label_color_filename)
            else:
                st.add_cell_colors(adata)
            if (flag_atac):
                print('Selecting top principal components...')
                st.select_top_principal_components(adata,
                                                   n_pc=pca_n_PC,
                                                   first_pc=flag_first_PC,
                                                   save_fig=True)
                st.dimension_reduction(adata,
                                       method=dr_method,
                                       n_components=n_components,
                                       n_neighbors=n_neighbors,
                                       nb_pct=nb_pct,
                                       n_jobs=n_jobs,
                                       feature='top_pcs')
            else:
                if (flag_norm):
                    st.normalize_per_cell(adata)
                if (flag_log2):
                    st.log_transform(adata)
                if (s_method != 'all'):
                    print('Filtering genes...')
                    st.filter_genes(adata, min_num_cells=5)
                    print('Removing mitochondrial genes...')
                    st.remove_mt_genes(adata)
                    if (s_method == 'LOESS'):
                        print('Selecting most variable genes...')
                        st.select_variable_genes(adata,
                                                 loess_frac=loess_frac,
                                                 percentile=loess_cutoff,
                                                 save_fig=True)
                        pd.DataFrame(adata.uns['var_genes']).to_csv(
                            os.path.join(workdir,
                                         'selected_variable_genes.tsv'),
                            sep='\t',
                            index=None,
                            header=False)
                        st.dimension_reduction(adata,
                                               method=dr_method,
                                               n_components=n_components,
                                               n_neighbors=n_neighbors,
                                               nb_pct=nb_pct,
                                               n_jobs=n_jobs,
                                               feature='var_genes')
                    if (s_method == 'PCA'):
                        print('Selecting top principal components...')
                        st.select_top_principal_components(
                            adata,
                            n_pc=pca_n_PC,
                            first_pc=flag_first_PC,
                            save_fig=True)
                        st.dimension_reduction(adata,
                                               method=dr_method,
                                               n_components=n_components,
                                               n_neighbors=n_neighbors,
                                               nb_pct=nb_pct,
                                               n_jobs=n_jobs,
                                               feature='top_pcs')
                else:
                    print('Keep all the genes...')
                    st.dimension_reduction(adata,
                                           n_components=n_components,
                                           n_neighbors=n_neighbors,
                                           nb_pct=nb_pct,
                                           n_jobs=n_jobs,
                                           feature='all')
            st.plot_dimension_reduction(adata, save_fig=flag_savefig)
            st.seed_elastic_principal_graph(adata,
                                            clustering=clustering,
                                            damping=damping,
                                            n_clusters=n_clusters)
            st.plot_branches(
                adata,
                save_fig=flag_savefig,
                fig_name='seed_elastic_principal_graph_skeleton.pdf')
            st.plot_branches_with_cells(
                adata,
                save_fig=flag_savefig,
                fig_name='seed_elastic_principal_graph.pdf')

            st.elastic_principal_graph(adata,
                                       epg_n_nodes=EPG_n_nodes,
                                       epg_lambda=EPG_lambda,
                                       epg_mu=EPG_mu,
                                       epg_trimmingradius=EPG_trimmingradius,
                                       epg_alpha=EPG_alpha)
            st.plot_branches(adata,
                             save_fig=flag_savefig,
                             fig_name='elastic_principal_graph_skeleton.pdf')
            st.plot_branches_with_cells(adata,
                                        save_fig=flag_savefig,
                                        fig_name='elastic_principal_graph.pdf')
            if (not flag_disable_EPG_optimize):
                st.optimize_branching(adata,
                                      epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='optimizing_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='optimizing_elastic_principal_graph.pdf')
            if (flag_EPG_shift):
                st.shift_branching(adata,
                                   epg_shift_mode=EPG_shift_mode,
                                   epg_shift_radius=EPG_shift_DR,
                                   epg_shift_max=EPG_shift_maxshift,
                                   epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='shifting_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='shifting_elastic_principal_graph.pdf')
            if (flag_EPG_collapse):
                st.prune_elastic_principal_graph(
                    adata,
                    epg_collapse_mode=EPG_collapse_mode,
                    epg_collapse_par=EPG_collapse_par,
                    epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='pruning_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='pruning_elastic_principal_graph.pdf')
            if (not flag_disable_EPG_ext):
                st.extend_elastic_principal_graph(
                    adata,
                    epg_ext_mode=EPG_ext_mode,
                    epg_ext_par=EPG_ext_par,
                    epg_trimmingradius=EPG_trimmingradius)
                st.plot_branches(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='extending_elastic_principal_graph_skeleton.pdf')
                st.plot_branches_with_cells(
                    adata,
                    save_fig=flag_savefig,
                    fig_name='extending_elastic_principal_graph.pdf')
            st.plot_branches(
                adata,
                save_fig=flag_savefig,
                fig_name='finalized_elastic_principal_graph_skeleton.pdf')
            st.plot_branches_with_cells(
                adata,
                save_fig=flag_savefig,
                fig_name='finalized_elastic_principal_graph.pdf')
            st.plot_flat_tree(adata, save_fig=flag_savefig)
            if (flag_umap):
                print('UMAP visualization based on top MLLE components...')
                st.plot_visualization_2D(adata,
                                         save_fig=flag_savefig,
                                         fig_name='umap_cells')
                st.plot_visualization_2D(adata,
                                         color_by='branch',
                                         save_fig=flag_savefig,
                                         fig_name='umap_branches')
            if (root is None):
                print('Visualization of subwaymap and stream plots...')
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                for ns in list_node_start:
                    if (flag_web):
                        st.subwaymap_plot(adata,
                                          percentile_dist=100,
                                          root=ns,
                                          save_fig=flag_savefig)
                        st.stream_plot(adata,
                                       root=ns,
                                       fig_size=(8, 8),
                                       save_fig=True,
                                       flag_log_view=flag_stream_log_view,
                                       fig_legend=False,
                                       fig_name='stream_plot.png')
                    else:
                        st.subwaymap_plot(adata,
                                          percentile_dist=100,
                                          root=ns,
                                          save_fig=flag_savefig)
                        st.stream_plot(adata,
                                       root=ns,
                                       fig_size=(8, 8),
                                       save_fig=flag_savefig,
                                       flag_log_view=flag_stream_log_view)
            else:
                st.subwaymap_plot(adata,
                                  percentile_dist=100,
                                  root=root,
                                  save_fig=flag_savefig)
                st.stream_plot(adata,
                               root=root,
                               fig_size=(8, 8),
                               save_fig=flag_savefig,
                               flag_log_view=flag_stream_log_view)
            output_cell_info(adata)
            if (flag_web):
                output_for_website(adata)
            st.write(adata)

        if (flag_gene_TG_detection):
            print('Identifying transition genes...')
            st.detect_transistion_genes(adata,
                                        cutoff_spearman=TG_spearman_cutoff,
                                        cutoff_logfc=TG_logfc_cutoff,
                                        n_jobs=n_jobs)
            if (flag_web):
                ## Plot top5 genes
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                gene_list = []
                for x in adata.uns['transition_genes'].keys():
                    gene_list = gene_list + adata.uns['transition_genes'][
                        x].index[:n_genes].tolist()
                gene_list = np.unique(gene_list)
                for ns in list_node_start:
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=ns,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')
            else:
                st.plot_transition_genes(adata, save_fig=flag_savefig)

        if (flag_gene_DE_detection):
            print('Identifying differentially expressed genes...')
            st.detect_de_genes(adata,
                               cutoff_zscore=DE_logfc_cutoff,
                               cutoff_logfc=DE_logfc_cutoff,
                               n_jobs=n_jobs)
            if (flag_web):
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                gene_list = []
                for x in adata.uns['de_genes_greater'].keys():
                    gene_list = gene_list + adata.uns['de_genes_greater'][
                        x].index[:n_genes].tolist()
                for x in adata.uns['de_genes_less'].keys():
                    gene_list = gene_list + adata.uns['de_genes_less'][
                        x].index[:n_genes].tolist()
                gene_list = np.unique(gene_list)
                for ns in list_node_start:
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=ns,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')
            else:
                st.plot_de_genes(adata, save_fig=flag_savefig)

        if (flag_gene_LG_detection):
            print('Identifying leaf genes...')
            st.detect_leaf_genes(adata,
                                 cutoff_zscore=LG_zscore_cutoff,
                                 cutoff_pvalue=LG_pvalue_cutoff,
                                 n_jobs=n_jobs)
            if (flag_web):
                ## Plot top5 genes
                flat_tree = adata.uns['flat_tree']
                list_node_start = [
                    value for key, value in nx.get_node_attributes(
                        flat_tree, 'label').items()
                ]
                gene_list = []
                for x in adata.uns['leaf_genes'].keys():
                    gene_list = gene_list + adata.uns['leaf_genes'][
                        x].index[:n_genes].tolist()
                gene_list = np.unique(gene_list)
                for ns in list_node_start:
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=ns,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')

        if ((genes != None) and (len(gene_list) > 0)):
            print('Visualizing genes...')
            flat_tree = adata.uns['flat_tree']
            list_node_start = [
                value for key, value in nx.get_node_attributes(
                    flat_tree, 'label').items()
            ]
            if (root is None):
                for ns in list_node_start:
                    if (flag_web):
                        output_for_website_subwaymap_gene(adata, gene_list)
                        st.stream_plot_gene(adata,
                                            root=ns,
                                            fig_size=(8, 8),
                                            genes=gene_list,
                                            save_fig=True,
                                            flag_log_view=flag_stream_log_view,
                                            fig_format='png')
                    else:
                        st.subwaymap_plot_gene(adata,
                                               percentile_dist=100,
                                               root=ns,
                                               genes=gene_list,
                                               save_fig=flag_savefig)
                        st.stream_plot_gene(adata,
                                            root=ns,
                                            fig_size=(8, 8),
                                            genes=gene_list,
                                            save_fig=flag_savefig,
                                            flag_log_view=flag_stream_log_view)
            else:
                if (flag_web):
                    output_for_website_subwaymap_gene(adata, gene_list)
                    st.stream_plot_gene(adata,
                                        root=root,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=True,
                                        flag_log_view=flag_stream_log_view,
                                        fig_format='png')
                else:
                    st.subwaymap_plot_gene(adata,
                                           percentile_dist=100,
                                           root=root,
                                           genes=gene_list,
                                           save_fig=flag_savefig)
                    st.stream_plot_gene(adata,
                                        root=root,
                                        fig_size=(8, 8),
                                        genes=gene_list,
                                        save_fig=flag_savefig,
                                        flag_log_view=flag_stream_log_view)

    else:
        print('Starting mapping procedure...')
        if (output_folder == None):
            workdir_ref = os.path.join(os.getcwd(), 'stream_result')
        else:
            workdir_ref = output_folder
        adata = st.read(file_name='stream_result.pkl',
                        file_format='pkl',
                        file_path=workdir_ref,
                        workdir=workdir_ref)
        workdir = os.path.join(workdir_ref, os.pardir, 'mapping_result')
        adata_new = st.read(file_name=new_filename, workdir=workdir)
        st.add_cell_labels(adata_new, file_name=new_label_filename)
        st.add_cell_colors(adata_new, file_name=new_label_color_filename)
        if (s_method == 'LOESS'):
            st.map_new_data(adata, adata_new, feature='var_genes')
        if (s_method == 'all'):
            st.map_new_data(adata, adata_new, feature='all')
        if (flag_umap):
            st.plot_visualization_2D(adata,
                                     adata_new=adata_new,
                                     use_precomputed=False,
                                     save_fig=flag_savefig,
                                     fig_name='umap_new_cells')
            st.plot_visualization_2D(adata,
                                     adata_new=adata_new,
                                     show_all_colors=True,
                                     save_fig=flag_savefig,
                                     fig_name='umap_all_cells')
            st.plot_visualization_2D(adata,
                                     adata_new=adata_new,
                                     color_by='branch',
                                     save_fig=flag_savefig,
                                     fig_name='umap_branches')
        if (root is None):
            flat_tree = adata.uns['flat_tree']
            list_node_start = [
                value for key, value in nx.get_node_attributes(
                    flat_tree, 'label').items()
            ]
            for ns in list_node_start:
                st.subwaymap_plot(adata,
                                  adata_new=adata_new,
                                  percentile_dist=100,
                                  show_all_cells=False,
                                  root=ns,
                                  save_fig=flag_savefig)
                st.stream_plot(adata,
                               adata_new=adata_new,
                               show_all_colors=False,
                               root=ns,
                               fig_size=(8, 8),
                               save_fig=flag_savefig,
                               flag_log_view=flag_stream_log_view)
        else:
            st.subwaymap_plot(adata,
                              adata_new=adata_new,
                              percentile_dist=100,
                              show_all_cells=False,
                              root=root,
                              save_fig=flag_savefig)
            st.stream_plot(adata,
                           adata_new=adata_new,
                           show_all_colors=False,
                           root=root,
                           fig_size=(8, 8),
                           save_fig=flag_savefig,
                           flag_log_view=flag_stream_log_view)
        if ((genes != None) and (len(gene_list) > 0)):
            if (root is None):
                for ns in list_node_start:
                    st.subwaymap_plot_gene(adata,
                                           adata_new=adata_new,
                                           percentile_dist=100,
                                           root=ns,
                                           save_fig=flag_savefig,
                                           flag_log_view=flag_stream_log_view)
            else:
                st.subwaymap_plot_gene(adata,
                                       adata_new=adata_new,
                                       percentile_dist=100,
                                       root=root,
                                       save_fig=flag_savefig,
                                       flag_log_view=flag_stream_log_view)
        st.write(adata_new, file_name='stream_mapping_result.pkl')
    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m",
                        "--matrix",
                        dest="input_filename",
                        default=None,
                        help="input file name",
                        metavar="FILE")
    parser.add_argument("-l",
                        "--cell_labels",
                        dest="cell_label_filename",
                        default=None,
                        help="filename of cell labels")
    parser.add_argument("-c",
                        "--cell_labels_colors",
                        dest="cell_label_color_filename",
                        default=None,
                        help="filename of cell label colors")
    parser.add_argument("--log2",
                        dest="flag_log2",
                        action="store_true",
                        help="perform log2 transformation")
    parser.add_argument("--norm",
                        dest="flag_norm",
                        action="store_true",
                        help="normalize data based on library size")
    parser.add_argument("-o",
                        "--output_folder",
                        dest="output_folder",
                        default=None,
                        help="Output folder")
    parser.add_argument("-rmt",
                        "--remove_mt_genes",
                        dest="flag_remove_mt_genes",
                        action="store_true",
                        default=False,
                        help="Remove Mitochondrial genes")
    parser.add_argument("-mcg",
                        "--min_count_genes",
                        dest="min_count_genes",
                        type=int,
                        default=None,
                        help="filter cells with less than this many genes")
    parser.add_argument("-mpg",
                        "--min_percent_genes",
                        dest="min_percent_genes",
                        type=float,
                        default=None,
                        help="The minimum percent genes")
    parser.add_argument("-mpc",
                        "--min_percent_cells",
                        dest="min_percent_cells",
                        type=float,
                        default=None,
                        help="The minimum percent cells")
    parser.add_argument("-mcc",
                        "--min_count_cells",
                        dest="min_count_cells",
                        type=int,
                        default=None,
                        help="The minimum count cells")
    parser.add_argument("-mnc",
                        "--min_num_cells",
                        dest="min_num_cells",
                        type=int,
                        default=None,
                        help="The minimum number of cells")
    parser.add_argument("-ec",
                        "--expression_cutoff",
                        dest="expression_cutoff",
                        type=float,
                        default=None,
                        help="The expression cutoff")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamOutput",
                        help="output file name prefix")

    args = parser.parse_args()

    print(args)

    input_filename = args.input_filename
    cell_label_filename = args.cell_label_filename
    cell_label_color_filename = args.cell_label_color_filename
    flag_norm = args.flag_norm
    flag_log2 = args.flag_log2
    output_folder = args.output_folder  #work directory
    flag_remove_mt_genes = args.flag_remove_mt_genes
    min_count_genes = args.min_count_genes
    min_percent_cells = args.min_percent_cells
    min_percent_genes = args.min_percent_genes

    min_count_cells = args.min_count_cells
    min_num_cells = args.min_num_cells
    expression_cutoff = args.expression_cutoff
    output_filename_prefix = args.output_filename_prefix

    print('Starting mapping procedure...')
    if (output_folder == None):
        workdir_ref = os.path.join(os.getcwd(), 'stream_result')
    else:
        workdir_ref = output_folder
    workdir = "./"

    if (input_filename.endswith('pkl')):
        adata = st.read(file_name=input_filename,
                        file_format='pkl',
                        workdir=workdir)
    else:
        adata = st.read(file_name=input_filename, workdir=workdir)
    print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    adata.var_names_make_unique()
    adata.obs_names_make_unique()
    if (cell_label_filename != None):
        st.add_cell_labels(adata, file_name=cell_label_filename)
    else:
        st.add_cell_labels(adata)
    if (cell_label_color_filename != None):
        st.add_cell_colors(adata, file_name=cell_label_color_filename)
    else:
        st.add_cell_colors(adata)

    if (flag_norm):
        st.normalize_per_cell(adata)

    if (flag_log2):
        st.log_transform(adata, base=2)

    if (flag_remove_mt_genes):
        st.remove_mt_genes(adata)

    st.filter_cells(adata,
                    min_pct_genes=min_percent_genes,
                    min_count=min_count_genes,
                    expr_cutoff=expression_cutoff)
    st.filter_genes(adata,
                    min_num_cells=min_num_cells,
                    min_pct_cells=min_percent_cells,
                    min_count=min_count_genes,
                    expr_cutoff=expression_cutoff)

    print("Writing " + output_filename_prefix + " " +
          args.output_filename_prefix + "_stream_result.pkl")
    st.write(adata,
             file_name=(output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')
    print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("--flag_useprecomputed",
                        dest="flag_useprecomputed",
                        action="store_true",
                        help="use precomputed features for PCA")
    parser.add_argument("--flag_firstpc",
                        dest="flag_firstpc",
                        action="store_true",
                        help="Use the first principal component")
    parser.add_argument("--flag_pca",
                        dest="flag_pca",
                        action="store_true",
                        help="perform PCA")
    parser.add_argument("--flag_variable",
                        dest="flag_variable",
                        action="store_true",
                        help="find variable genes")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")

    parser.add_argument("-lf",
                        "--loess_fraction",
                        dest="loess_fraction",
                        type=float,
                        default=None,
                        help="loess fraction")
    parser.add_argument("-per",
                        dest="percentile",
                        type=int,
                        default=None,
                        help="percent of variable genes to find")
    parser.add_argument("-n_g",
                        dest="num_genes",
                        type=int,
                        default=None,
                        help="num genes")
    parser.add_argument("-n_j",
                        dest="num_jobs",
                        type=int,
                        default=None,
                        help="num jobs")
    parser.add_argument("-feat", dest="feature", default=None, help="feature")
    parser.add_argument("-n_pc",
                        dest="num_principal_components",
                        type=int,
                        default=None,
                        help="num principal components")
    parser.add_argument("-max_pc",
                        dest="max_principal_components",
                        type=int,
                        default=None,
                        help="max principal components")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")

    parser.add_argument("--flag",
                        dest="flag",
                        action="store_true",
                        help="debugging flag")

    args = parser.parse_args()

    print('Starting feature selection procedure...')
    print(args)
    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)

    print('Input: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')
    #print('N_genes is ' + str(args.num_genes))

    if (args.flag_variable):
        st.select_variable_genes(adata,
                                 loess_frac=args.loess_fraction,
                                 percentile=args.percentile,
                                 n_genes=args.num_genes,
                                 n_jobs=args.num_jobs,
                                 save_fig=True,
                                 fig_name=(args.output_filename_prefix +
                                           '_variable_genes.png'),
                                 fig_size=(args.fig_width, args.fig_height),
                                 fig_path="./")

    if (args.flag_pca):
        st.select_top_principal_components(
            adata,
            feature=args.feature,
            n_pc=args.num_principal_components,
            max_pc=args.max_principal_components,
            first_pc=args.flag_firstpc,
            use_precomputed=args.flag_useprecomputed,
            save_fig=True,
            fig_name=(args.output_filename_prefix + '_pca.png'),
            fig_size=(args.fig_width, args.fig_height),
            fig_path='./')

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')
    print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    print('Finished computation.')
Exemplo n.º 7
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")

    parser.add_argument("-nb_pct",
                        "--percent_neighbor_cells",
                        dest="nb_pct",
                        type=float,
                        default=None,
                        help="")
    parser.add_argument("-n_comp_k",
                        dest="n_comp_k",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-feat", dest="feature", default=None, help="feature")

    parser.add_argument("-method", dest="method", default=None, help="")
    parser.add_argument("-nc_plot",
                        dest="nc_plot",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-comp1", dest="comp1", default=None, help="feature")
    parser.add_argument("-comp2",
                        dest="comp2",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-n_jobs", dest="n_jobs", type=int, default=2, help="")

    parser.add_argument("-fig_legend_ncol",
                        dest="fig_legend_ncol",
                        type=int,
                        default=None,
                        help="")

    args = parser.parse_args()
    print(args)

    print('Starting dimension reduction procedure...')

    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)
    print("Feature ", args.feature, type(args.feature))
    st.dimension_reduction(adata,
                           method=args.method,
                           feature='var_genes',
                           nb_pct=args.nb_pct,
                           n_components=args.n_comp_k,
                           n_jobs=args.n_jobs,
                           eigen_solver=None)

    fig_size = (args.fig_width, args.fig_height)
    st.plot_dimension_reduction(adata,
                                n_components=args.nc_plot,
                                comp1=args.comp1,
                                comp2=args.comp2,
                                save_fig=True,
                                fig_name=(args.output_filename_prefix +
                                          '_stddev_dotplot.png'),
                                fig_path="./",
                                fig_size=fig_size,
                                fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')
    print('Output: ' + str(adata.obs.shape[0]) + ' cells, ' +
          str(adata.var.shape[0]) + ' genes')

    print('Finished computation.')
Exemplo n.º 8
0
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_legend_ncol",
                        dest="fig_legend_ncol",
                        type=int,
                        default=None,
                        help="")

    parser.add_argument("-root", dest="root", default=None, help="")
    parser.add_argument("-preference", dest="preference", help="")
    parser.add_argument("-subway_factor",
                        dest="subway_factor",
                        type=float,
                        default=2.0,
                        help="")
    parser.add_argument("-color_by", dest="color_by", default='label', help="")
    parser.add_argument("-factor_num_win",
                        dest="factor_num_win",
                        type=int,
                        default=10,
                        help="")
    parser.add_argument("-factor_min_win",
                        dest="factor_min_win",
                        type=float,
                        default=2.0,
                        help="")
    parser.add_argument("-factor_width",
                        dest="factor_width",
                        type=float,
                        default=2.5,
                        help="")
    parser.add_argument("-flag_log_view",
                        dest="flag_log_view",
                        action="store_true",
                        help="")
    parser.add_argument("-factor_zoomin",
                        dest="factor_zoomin",
                        type=float,
                        default=100.0,
                        help="")
    parser.add_argument("-flag_cells",
                        dest="flag_cells",
                        action="store_true",
                        help="")
    parser.add_argument("-flag_genes",
                        dest="flag_genes",
                        action="store_true",
                        help="")

    parser.add_argument("-genes", dest="genes", default=None, help="")
    parser.add_argument("-percentile_dist",
                        dest="percentile_dist",
                        type=float,
                        default=100,
                        help="")

    args = parser.parse_args()

    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)
    preference = args.preference.split(',')
    if (args.flag_cells != None):
        st.plot_flat_tree(adata,
                          save_fig=True,
                          fig_path="./",
                          fig_name=(args.output_filename_prefix +
                                    '_flat_tree.png'),
                          fig_size=(args.fig_width, args.fig_height),
                          fig_legend_ncol=args.fig_legend_ncol)

        st.subwaymap_plot(adata,
                          root=args.root,
                          percentile_dist=args.percentile_dist,
                          preference=preference,
                          factor=args.subway_factor,
                          color_by=args.color_by,
                          save_fig=True,
                          fig_path="./",
                          fig_name=(args.output_filename_prefix +
                                    '_cell_subway_map.png'),
                          fig_size=(args.fig_width, args.fig_height),
                          fig_legend_ncol=args.fig_legend_ncol)

        st.stream_plot(adata,
                       root=args.root,
                       preference=preference,
                       factor_num_win=args.factor_num_win,
                       factor_min_win=args.factor_min_win,
                       factor_width=args.factor_width,
                       flag_log_view=args.flag_log_view,
                       factor_zoomin=args.factor_zoomin,
                       save_fig=True,
                       fig_path="./",
                       fig_name=(args.output_filename_prefix +
                                 '_cell_stream_plot.png'),
                       fig_size=(args.fig_width, args.fig_height),
                       fig_legend=True,
                       fig_legend_ncol=args.fig_legend_ncol,
                       tick_fontsize=20,
                       label_fontsize=25)

    if (args.flag_genes != None):
        genes = args.genes.split(',')
        st.subwaymap_plot_gene(adata,
                               root=args.root,
                               genes=genes,
                               preference=preference,
                               percentile_dist=args.percentile_dist,
                               factor=args.subway_factor,
                               save_fig=True,
                               fig_path="./",
                               fig_format='png',
                               fig_size=(args.fig_width, args.fig_height))
        #              , fig_name=(args.output_filename_prefix + '_gene_subway_plot.png'))

        st.stream_plot_gene(adata,
                            root=args.root,
                            genes=genes,
                            preference=preference,
                            factor_min_win=args.factor_min_win,
                            factor_num_win=args.factor_num_win,
                            factor_width=args.factor_width,
                            save_fig=True,
                            fig_path="./",
                            fig_format='png',
                            fig_size=(args.fig_width, args.fig_height),
                            tick_fontsize=20,
                            label_fontsize=25)
        #           , fig_name=(args.output_filename_prefix + '_gene_stream_plot.png'))

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')

    print('Finished computation.')
def main():
    sns.set_style('white')
    sns.set_context('poster')
    parser = argparse.ArgumentParser(
        description='%s Parameters' % __tool_name__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "-m",
        "--data-file",
        dest="input_filename",
        default=None,
        help="input file name, pkl format from Stream preprocessing module",
        metavar="FILE")
    parser.add_argument("-of",
                        "--of",
                        dest="output_filename_prefix",
                        default="StreamiFSOutput",
                        help="output file name prefix")

    parser.add_argument("--flag_useprecomputed",
                        dest="flag_useprecomputed",
                        action="store_true",
                        help="Save the figure")

    parser.add_argument("-nb_pct",
                        "--percent_neighbor_cells",
                        dest="nb_pct",
                        type=float,
                        default=None,
                        help="")
    parser.add_argument("-n_comp_k",
                        dest="n_comp_k",
                        type=int,
                        default=None,
                        help="")
    parser.add_argument("-perplexity",
                        dest="perplexity",
                        type=float,
                        default=None,
                        help="")

    parser.add_argument("-method", dest="method", default=None, help="")
    parser.add_argument("-color_by", dest="color_by", default='label', help="")
    parser.add_argument("-fig_width",
                        dest="fig_width",
                        type=int,
                        default=8,
                        help="")
    parser.add_argument("-fig_height",
                        dest="fig_height",
                        type=int,
                        default=8,
                        help="")

    parser.add_argument("-fig_legend_ncol",
                        dest="fig_legend_ncol",
                        type=int,
                        default=None,
                        help="")

    args = parser.parse_args()

    print('Starting ...')
    workdir = "./"

    adata = st.read(file_name=args.input_filename,
                    file_format='pkl',
                    experiment='rna-seq',
                    workdir=workdir)

    st.plot_visualization_2D(adata,
                             method=args.method,
                             nb_pct=args.nb_pct,
                             perplexity=args.perplexity,
                             color_by=args.color_by,
                             use_precomputed=args.flag_useprecomputed,
                             save_fig=True,
                             fig_path='./',
                             fig_name=(args.output_filename_prefix +
                                       "_2D_plot.png"),
                             fig_size=(args.fig_width, args.fig_height),
                             fig_legend_ncol=args.fig_legend_ncol)

    st.write(adata,
             file_name=(args.output_filename_prefix + '_stream_result.pkl'),
             file_path='./',
             file_format='pkl')

    print('Finished computation.')