def gmt2regions(gmt_fname, db_fname, delineation_code, fraction): db = RegionRankingDatabase(fname=db_fname, name=os.path.basename(db_fname)) signatures = GeneSignature.from_gmt(gmt_fname, nomenclature=db.nomenclature) delineation = CODE2DELINEATION[delineation_code] for signature in signatures: sys.stdout( signature.name + ',' + ','.join(convert(signature, db, delineation, fraction).genes))
def load_modules(fname: str) -> Sequence[Type[GeneSignature]]: # Loading from YAML is extremely slow. Therefore this is a potential performance improvement. # Potential improvements are switching to JSON or to use a CLoader: # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml # The alternative for which was opted in the end is binary pickling. extension = PurePath(fname).suffixes if is_valid_suffix(extension, 'ctx_yaml'): return load_from_yaml(fname) elif '.dat' in extension: with openfile(fname, 'rb') as f: return pickle.load(f) elif '.gmt' in extension: return GeneSignature.from_gmt(fname) else: raise ValueError("Unknown file format for \"{}\".".format(fname))
def load_modules(fname: str) -> Sequence[Type[GeneSignature]]: # Loading from YAML is extremely slow. Therefore this is a potential performance improvement. # Potential improvements are switching to JSON or to use a CLoader: # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml # The alternative for which was opted in the end is binary pickling. if fname.endswith('.yaml') or fname.endswith('.yml'): return load_from_yaml(fname) elif fname.endswith('.dat'): with open(fname, 'rb') as f: return pickle.load(f) elif fname.endswith('.gmt'): sep = guess_separator(fname) return GeneSignature.from_gmt(fname, field_separator=sep, gene_separator=sep) else: raise ValueError("Unknown file format for \"{}\".".format(fname))
def aucell_command(args): """ Calculate regulon enrichment (as AUC values) for cells. """ LOGGER.info("Loading expression matrix.") ex_mtx = _load_expression_matrix(args) if any(args.regulons_fname.name.endswith(ext) for ext in FILE_EXTENSION2SEPARATOR.keys()): LOGGER.info("Creating regulons.") regulons = _df2regulons(args.regulons_fname.name) elif args.regulons_fname.name.endswith('.gmt'): LOGGER.info("Loading regulons.") regulons = GeneSignature.from_gmt(args.regulons_fname.name, field_separator='\t', gene_separator='\t') else: LOGGER.info("Loading regulons.") regulons = _load_modules(args.regulons_fname.name) LOGGER.info("Calculating enrichment.") auc_heatmap = aucell(ex_mtx, regulons, auc_threshold=args.auc_threshold, noweights=args.weights != 'yes', num_workers=args.num_workers) LOGGER.info("Writing results to file.") auc_heatmap.to_csv(args.output)
def test_add(): gss = GeneSignature.from_gmt(field_separator='\t', gene_separator='\t', fname=TEST_SIGNATURE_FNAME) res = gss[0].add("MEF2") assert "MEF2" in res
def gs(): return GeneSignature.from_gmt(TEST_SIGNATURE_FNAME, gene_separator="\t", field_separator="\t", )[0]