def run_zscore(gse, indir, outdir, probe_sets): start = time.time() path = os.path.join(indir, gse[0], gse[1] + '*.csv') probe_set_files = [os.path.join(probe_set_file_location, i, gse[0] + '-' + i.lower().replace('-', '_') + '.csv') for i in probe_sets] outpath = os.path.join(outdir, gse[0]) make_sure_path_exists(outpath) gse_file = glob.glob(path) if len(gse_file) == 1: zscores.script_run( gse_file[0], gse[2], gse[3], metadata_feature_rows=gse[4:], probe_set_files=probe_set_files, outdir=outpath) print 'Finished', gse[1], ', took', time.time() - start, 's'
def do_threaded_work(multivariates): for i, col in multivariates.iteritems(): gpl = col.loc['GPL'] gse = col.loc['GSE'].upper() col = col.dropna() path = os.path.join('multivariate-clinical', 'Microarray-datasets', gpl, '*' + gse + '*.csv') fnames = glob.glob(path) print fnames if len(fnames) != 1: print "Failed to find file for: ", gpl, gse sys.exit(1) fname = fnames[0] feature_names = make_feature_names(col) out_path = os.path.join('multivariate-out', gpl) make_sure_path_exists(out_path) if len(glob.glob(os.path.join(out_path, gse + '*.csv'))) >= 1: continue zscores.script_run(fname, col.loc['Time'], col.loc['Censor'], metadata_feature_rows=feature_names, outdir=out_path)