def __call__(self, content): from subprocess import Popen, PIPE from sys import stdout if not isinstance(content, (str, bytes)): raise TypeError('Unknown input type for paging.') try: cmd = ['less', '-F', '-R', '-S', '-X', '-K'] proc = Popen(cmd, stdin=PIPE, stdout=stdout) if isinstance(content, str): proc._stdin_write(content.encode()) else: proc._stdin_write(content) proc.stdin.close() proc.wait() except KeyboardInterrupt: pass
def gsva( expression: Union[ExpressionWithControls, Profile], gene_sets_path: str, method: str = 'gsva', single_sample=False, permutations=1000, mx_diff=True, cores=1, _cache=True, limit_to_gene_sets=False, verbose=False ): """ Excerpt from GSVA documentation: An important argument of the gsva() function is the flag mx.diff which is set to TRUE by default. Under this default setting, GSVA enrichment scores are calculated using Equation 5, and therefore, are more amenable by analysis techniques that assume the data to be normally distributed. When setting mx.diff=FALSE , then Equation 4 is employed, calculating enrichment in an analogous way to classical GSEA which typically provides a bimodal distribution of GSVA enrichment scores for each gene. """ if not single_sample and permutations: raise warn('permutations are not supported when not single_sample') key = (expression.hashable, method, gene_sets_path) if key in GSVA_CACHE: return GSVA_CACHE[key] if single_sample: assert isinstance(expression, Profile) joined = DataFrame( concat([expression.top.up, expression.top.down]), ) joined.columns = ['condition'] joined['control'] = 0 joined.index = joined.index.astype(str) expression_classes = Series(['case', 'control']) expression = joined else: joined = expression.joined joined = DataFrame(joined) joined.index = joined.index.astype(str) nulls = joined.isnull().any(axis=0).reset_index(drop=True) if nulls.any(): print(f'Following columns contain nulls and will be skipped: {list(joined.columns[nulls])}') joined = joined[joined.columns[~nulls]] expression_classes = expression.classes.loc[~nulls.reset_index(drop=True)] expression = joined mx_diff = 'T' if mx_diff else 'F' procedure = 'gene_permutation' if single_sample else 'bayes' cwd = Path(__file__).parent with NamedTemporaryFile(mode='w', prefix=gsva_tmp_dir) as f_expression, NamedTemporaryFile(prefix=gsva_tmp_dir) as f_result: expression.to_csv(f_expression) script = f""" source("{cwd}/gsva.R") expression = read.csv('{f_expression.name}', row.names=1) expression_classes = c{tuple(expression_classes)} gene_sets = readRDS('{gene_sets_path}') result = gsva.with_probabilities( expression, expression_classes, gene_sets, '{procedure}', method = '{method}', mx.diff={mx_diff}, include_control=F, cores={cores}, limit_to_gene_sets={'c' + str(tuple(limit_to_gene_sets)) if limit_to_gene_sets is not False else 'F'}, progress=F {', permutations = ' + str(permutations) if procedure == 'permutations' else ''} ) write.csv(result, '{f_result.name}') """ process = Popen(vanilla_R, stdin=PIPE, stdout=PIPE, stderr=PIPE) r = process._stdin_write(script.encode()) if verbose: from helpers.streams import handle_streams from signature_scoring.evaluation import display handlers = {'out': display, 'err': warn} handle_streams(process, handlers) else: process.wait() try: result = read_csv(f_result.name, index_col=0) except EmptyDataError: result = DataFrame() if _cache: GSVA_CACHE[key] = result return result