def load_signatures(fname: str) -> Sequence[Type[GeneSignature]]: """ Load genes signatures from disk. Supported file formats are GMT, DAT (pickled), YAML or CSV (enriched motifs). :param fname: The name of the file that contains the signatures. :return: A list of gene signatures. """ extension = PurePath(fname).suffixes if is_valid_suffix(extension, 'ctx'): # csv/tsv return df2regulons( load_motifs(fname, sep=suffixes_to_separator(extension))) elif is_valid_suffix(extension, 'ctx_yaml'): return load_from_yaml(fname) elif '.gmt' in extension: sep = guess_separator(fname) return GeneSignature.from_gmt(fname, field_separator=sep, gene_separator=sep) elif '.dat' in extension: with openfile(fname, 'rb') as f: return pickle.load(f) else: raise ValueError("Unknown file format \"{}\".".format(fname))
def load_signatures(fname: str) -> Sequence[Type[GeneSignature]]: """ Load genes signatures from disk. Supported file formats are GMT, DAT (pickled), YAML or CSV (enriched motifs). :param fname: The name of the file that contains the signatures. :return: A list of gene signatures. """ extension = os.path.splitext(fname)[1].lower() if extension in FILE_EXTENSION2SEPARATOR.keys(): return df2regulons( load_motifs(fname, sep=FILE_EXTENSION2SEPARATOR[extension])) elif extension in {'.yaml', '.yml'}: return load_from_yaml(fname) elif extension.endswith('.gmt'): sep = guess_separator(fname) return GeneSignature.from_gmt(fname, field_separator=sep, gene_separator=sep) elif extension == '.dat': with open(fname, 'rb') as f: return pickle.load(f) else: raise ValueError("Unknown file format \"{}\".".format(fname))
adjacencies, ex_matrix)) # identifies modules from GENIE3 # save GRNBoost2 product so we don't have to repeat again adjacencies.to_csv("grnboost_output.csv") # load product in case something goes wrong adjacencies = pd.read_csv("grnboost_output.csv", index_col=0) # cisTarget process: IDs cis-regulatory footprints from motifs around the TSS with ProgressBar( ): # calculate a list of enriched motifs and the corresponding target genes for all modules df = prune2df(dbs, modules, "motifs-v9-nr-mgi.txt") regulons = df2regulons( df) # create regulons from this table of enriched motifs # save the discovered motifs and regulons df.to_csv(motifs_filename) with open(regulons_filename, "wb") as f: pickle.dump(regulons, f) # load the discovered motifs and regulons if saved previously df = load_motifs(motifs_filename) with open(regulons_filename, "rb") as f: regulons = pickle.load(f) # AUCell process: finds enrichment of each discovered regulon auc_matrix = aucell(ex_matrix, regulons, num_workers=4) # export the product back to R for analysis auc_matrix.to_csv("SCENIC_export.csv")
import os sc.settings.set_figure_params(frameon=True, color_map='Spectral_r') sc.settings.verbosity = 3 plt.rcParams["axes.grid"] = False # Run time for demo data: several minutes. DATA_FOLDER = './Data/' exp_matrix = pd.read_csv(os.path.join(DATA_FOLDER, 'DC_exp.csv'), index_col=0) exp_meta = pd.read_csv(os.path.join(DATA_FOLDER, 'DC_meta.txt'), sep='\t', index_col=0) ## Generate the regulons Motifs_NAME = 'exp_matrix' motifs = load_motifs( os.path.join(DATA_FOLDER, '{}.motifs.csv'.format(Motifs_NAME))) regulons = df2regulons(motifs) ## Make a meta matrix for the regulon reg_num = [] reg_target = [] reg_tf = [] for i in regulons: reg_tf.append(i.transcription_factor) reg_target.append(list(i.gene2weight.keys())) reg_num.append(len(list(i.gene2weight.keys()))) reg_meta = pd.DataFrame([reg_num, reg_target]).T reg_meta.index = reg_tf reg_meta.columns = ['n_targets', 'targets']