def test_interp(self): x = np.random.normal(loc=0.5, scale=0.1, size=100000) bins = np.linspace(0.0, 1.0, num=51) h = np.histogram(x, bins=bins)[0] fp = np.zeros(len(bins), dtype=np.float) fp[1:] = h.cumsum() x = np.linspace(0,1,21) y = np.interp(x, bins, fp) y2 = np.array([interp(v, bins, fp) for v in x]) self.assertTrue(np.allclose(y, y2, rtol=1e-6, atol=1e-6))
def compute_qvalues(json_iterator, hists_file): ''' computes fdr q values from json Result objects sorted by abs(NES) (low to high) json_iterator: iterator that yields json objects in sorted order hists_file: contains histogram data from null distribution ''' # load histogram data hists = np.load(hists_file) # compute cumulative sums for fdr interpolation cdfs = {} for k in ('null_nes_neg', 'null_nes_pos', 'obs_nes_neg', 'obs_nes_pos'): h = hists[k] cdf = np.zeros(h.shape[0] + 1, dtype=np.float) cdf[1:] = h.cumsum() cdfs[k] = cdf # keep track of minimum FDR and rank for positive # and negative NES separately NEG = 0 POS = 1 null_keys = ['null_nes_neg', 'null_nes_pos'] obs_keys = ['obs_nes_neg', 'obs_nes_pos'] tot_obs = [cdfs['obs_nes_neg'][-1], cdfs['obs_nes_pos'][-1]] cur_ranks = [tot_obs[0], tot_obs[1]] min_fdrs = [1.0, 1.0] # perform merge of sorted json files for line in json_iterator: # load json document (one per line) res = Result.from_json(line.strip()) es = res.es log_nes_clip = np.log10(np.clip(abs(res.nes), NES_MIN, NES_MAX)) if es != 0: if es < 0: sign_ind = NEG sign = -1.0 else: sign_ind = POS sign = 1.0 # For a given NES(S) = NES* >= 0, the FDR is the ratio of the # percentage of all permutations NES(S,null) >= 0, whose # NES(S,null) >= NES*, divided by the percentage of observed S with # NES(S) >= 0, whose NES(S) >= NES*, and similarly for # NES(S) = NES* <= 0. # to compute a sample set specific FDR q value we look at the # aggregated enrichment scores for all tests of that sample set # compute the cumulative sums of NES histograms use interpolation # to find fraction NES(null) >= NES* and account for the observed # permutation in the null set interpolate NES in log space null_nes_cumsum = cdfs[null_keys[sign_ind]] null_n = interp(log_nes_clip, LOG_NES_BINS, null_nes_cumsum) obs_nes_cumsum = cdfs[obs_keys[sign_ind]] obs_n = interp(log_nes_clip, LOG_NES_BINS, obs_nes_cumsum) n = 1.0 - (null_n / null_nes_cumsum[-1]) d = 1.0 - (obs_n / obs_nes_cumsum[-1]) #print 'SS_ID=%d ES=%f NES=%f n=%f (%f / %f) d=%f (%f / %f)' % (i, res.es, res.nes, n, null_n, null_nes_cumsum[-1], d, obs_n, obs_nes_cumsum[-1]) # update json dict if (n <= 0.0) or (d <= 0.0): res.ss_fdr_q_value = 0.0 else: res.ss_fdr_q_value = n / d #print 'SS_ID=%d ES=%f NES=%f fdr=%f minfdr=%f' % (i, res.es, res.nes, res.ss_fdr_q_value, min_fdrs[i]) # compare with minimum FDR and adjust minimum FDR if necessary if res.ss_fdr_q_value < min_fdrs[sign_ind]: min_fdrs[sign_ind] = res.ss_fdr_q_value else: res.ss_fdr_q_value = min_fdrs[sign_ind] res.ss_rank = cur_ranks[sign_ind] res.ss_frac = sign * (1.0 - ( (res.ss_rank - 1) / float(tot_obs[sign_ind]))) cur_ranks[sign_ind] -= 1 # convert back to json yield res.to_json() yield os.linesep # cleanup hists.close()
def compute_qvalues(json_iterator, hists_file): ''' computes fdr q values from json Result objects sorted by abs(NES) (low to high) json_iterator: iterator that yields json objects in sorted order hists_file: contains histogram data from null distribution ''' # load histogram data hists = np.load(hists_file) # compute cumulative sums for fdr interpolation cdfs = {} for k in ('null_nes_neg', 'null_nes_pos', 'obs_nes_neg', 'obs_nes_pos'): h = hists[k] cdf = np.zeros(h.shape[0]+1, dtype=np.float) cdf[1:] = h.cumsum() cdfs[k] = cdf # keep track of minimum FDR and rank for positive # and negative NES separately NEG = 0 POS = 1 null_keys = ['null_nes_neg', 'null_nes_pos'] obs_keys = ['obs_nes_neg', 'obs_nes_pos'] tot_obs = [cdfs['obs_nes_neg'][-1], cdfs['obs_nes_pos'][-1]] cur_ranks = [tot_obs[0], tot_obs[1]] min_fdrs = [1.0, 1.0] # perform merge of sorted json files for line in json_iterator: # load json document (one per line) res = Result.from_json(line.strip()) es = res.es log_nes_clip = np.log10(np.clip(abs(res.nes), NES_MIN, NES_MAX)) if es != 0: if es < 0: sign_ind = NEG sign = -1.0 else: sign_ind = POS sign = 1.0 # For a given NES(S) = NES* >= 0, the FDR is the ratio of the # percentage of all permutations NES(S,null) >= 0, whose # NES(S,null) >= NES*, divided by the percentage of observed S with # NES(S) >= 0, whose NES(S) >= NES*, and similarly for # NES(S) = NES* <= 0. # to compute a sample set specific FDR q value we look at the # aggregated enrichment scores for all tests of that sample set # compute the cumulative sums of NES histograms use interpolation # to find fraction NES(null) >= NES* and account for the observed # permutation in the null set interpolate NES in log space null_nes_cumsum = cdfs[null_keys[sign_ind]] null_n = interp(log_nes_clip, LOG_NES_BINS, null_nes_cumsum) obs_nes_cumsum = cdfs[obs_keys[sign_ind]] obs_n = interp(log_nes_clip, LOG_NES_BINS, obs_nes_cumsum) n = 1.0 - (null_n / null_nes_cumsum[-1]) d = 1.0 - (obs_n / obs_nes_cumsum[-1]) #print 'SS_ID=%d ES=%f NES=%f n=%f (%f / %f) d=%f (%f / %f)' % (i, res.es, res.nes, n, null_n, null_nes_cumsum[-1], d, obs_n, obs_nes_cumsum[-1]) # update json dict if (n <= 0.0) or (d <= 0.0): res.ss_fdr_q_value = 0.0 else: res.ss_fdr_q_value = n / d #print 'SS_ID=%d ES=%f NES=%f fdr=%f minfdr=%f' % (i, res.es, res.nes, res.ss_fdr_q_value, min_fdrs[i]) # compare with minimum FDR and adjust minimum FDR if necessary if res.ss_fdr_q_value < min_fdrs[sign_ind]: min_fdrs[sign_ind] = res.ss_fdr_q_value else: res.ss_fdr_q_value = min_fdrs[sign_ind] res.ss_rank = cur_ranks[sign_ind] res.ss_frac = sign * (1.0 - ((res.ss_rank - 1) / float(tot_obs[sign_ind]))) cur_ranks[sign_ind] -= 1 # convert back to json yield res.to_json() yield os.linesep # cleanup hists.close()