def compute_pval_oneclass(X, null_dist, Y=None, single=False, B=9, c=5): mictools.utils.check_data(X, Y=Y) bins = np.linspace(0, 1, NULL_HIST_RES + 1) # observed values/distribution names = ['Var1', 'Var2'] Xa = X.values if Y is None: _, tic = minepy.pstats(Xa, alpha=B, c=c, est="mic_e") index = pd.MultiIndex.from_tuples(list( itertools.combinations(X.index, 2)), names=names) else: Ya = Y.values if single: _, tic = mictools.utils.sstats(Xa, Ya, alpha=B, c=c, est="mic_e") index = pd.MultiIndex.from_arrays([X.index, Y.index], names=names) else: _, tic = minepy.cstats(Xa, Ya, alpha=B, c=c, est="mic_e") index = pd.MultiIndex.from_product([X.index, Y.index], names=names) tic = tic.flatten() observed_hist = np.histogram(tic, bins)[0].astype(np.int64) # right-tailed area observed_hist_cum = np.cumsum(observed_hist[::-1])[::-1] # p-values null_hist_cum = null_dist["NullCountCum"].values pval = (np.interp(tic, bins[:-1], null_hist_cum) + 1) / \ (null_hist_cum[0] + 1) pval = pd.Series(pval, index=index) # observed obs = pd.Series(tic, index=index) # distribution index = pd.MultiIndex.from_arrays([bins[:-1], bins[1:]], names=('BinStart', 'BinEnd')) obs_dist = pd.DataFrame( { "ObsCount": observed_hist, "ObsCountCum": observed_hist_cum }, index=index, columns=["ObsCount", "ObsCountCum"]) return obs_dist, obs, pval
def main(params, inputs, outputs): ### 输入数据 ### X = inputs.x Y = inputs.y ### 输入参数 ### type = params.type ### 计算X变量间的最大信息系数MIC和总信息系数TIC ### if type == 'X之间': mic, tic = pstats(X, alpha=9, c=5, est="mic_e") ### 计算X与Y间的最大信息系数MIC和总信息系数TIC ### if type == 'X与Y之间': mic, tic = cstats(X, Y, alpha=9, c=5, est="mic_e") ### 输出结果 ### pickle.dump(mic, open(outputs.mic, "wb")) pickle.dump(tic, open(outputs.tic, "wb"))
import numpy as np from minepy import pstats, cstats import time np.random.seed(0) # build the X matrix, 8 variables, 320 samples X = np.random.rand(8, 320) # build the Y matrix, 4 variables, 320 samples Y = np.random.rand(4, 320) # compute pairwise statistics MIC_e and normalized TIC_e between samples in X, # B=9, c=5 mic_p, tic_p = pstats(X, alpha=9, c=5, est="mic_e") # compute statistics between each pair of samples in X and Y mic_c, tic_c = cstats(X, Y, alpha=9, c=5, est="mic_e") print "normalized TIC_e (X):" print tic_p print "MIC_e (X vs. Y):" print mic_c
# encoding=utf-8 import numpy as np from minepy import pstats, cstats import time np.random.seed(0) # build the X matrix, 8 variables, 320 samples X = np.random.rand(8, 320) # build the Y matrix, 4 variables, 320 samples Y = np.random.rand(4, 320) # compute pairwise statistics MIC_e and normalized TIC_e between samples in X, # B=9, c=5 mic_p, tic_p = pstats(X, alpha=9, c=5, est="mic_e") # compute statistics between each pair of samples in X and Y mic_c, tic_c = cstats(X, Y, alpha=9, c=5, est="mic_e") print("normalized TIC_e (X):") print(tic_p) print("MIC_e (X vs. Y):") print(mic_c)
def score_interaction(bait, prey, bait_monomer_sec_id, prey_monomer_sec_id): def longest_intersection(arr): # Compute longest continuous stretch n = len(arr) s = set() ans = 0 for ele in arr: s.add(ele) for i in range(n): if (arr[i] - 1) not in s: j = arr[i] while (j in s): j += 1 ans = max(ans, j - arr[i]) return ans def normalized_xcorr(a, b): # Normalize matrices a = (a - np.mean(a, axis=1, keepdims=True)) / (np.std( a, axis=1, keepdims=True)) b = (b - np.mean(b, axis=1, keepdims=True)) / (np.std( b, axis=1, keepdims=True)) nxcorr = [] # normalized cross-correlation lxcorr = [] # cross-correlation lag if np.array_equal(a, b): # Compare all rows of a against all rows of a, including itself (auto-correlation) for i in range(0, len(a)): for j in range(i, len(a)): nxcorr.append( np.correlate(a[i], a[j], 'valid')[0] / len(a[i])) # Normalize by length lxcorr.append(np.argmax(np.correlate(a[i], a[j], 'same'))) # Peak else: # Compare all rows of a against all rows of b for i in range(0, len(a)): for j in range(0, len(b)): nxcorr.append( np.correlate(a[i], b[j], 'valid')[0] / len(a[i])) # Normalize by length lxcorr.append(np.argmax(np.correlate(a[i], b[j], 'same'))) # Peak return np.array(nxcorr), np.array(lxcorr) def sec_xcorr(bm, pm): # Compute SEC xcorr scores bnx, blx = normalized_xcorr(bm, bm) pnx, plx = normalized_xcorr(pm, pm) bpnx, bplx = normalized_xcorr(bm, pm) xcorr_shape = np.mean(bpnx) xcorr_apex = np.mean(bplx) xcorr_shift = max( [abs(xcorr_apex - np.mean(blx)), abs(xcorr_apex - np.mean(plx))]) return xcorr_shape, xcorr_shift, xcorr_apex def mass_similarity(bm, pm): # Sum bait and prey peptides bpabundance = np.sum(bm, axis=1, keepdims=True).mean() ppabundance = np.sum(pm, axis=1, keepdims=True).mean() # Compute abundance ratio of bait and prey protein abundance_ratio = bpabundance / ppabundance if abundance_ratio > 1: abundance_ratio = 1 / abundance_ratio return abundance_ratio # Compute bait and prey overlap overlap = (np.nansum(bait, axis=0) > 0) | (np.nansum(prey, axis=0) > 0) total_overlap = np.count_nonzero(overlap) # Compute bait and prey intersection intersection = (np.nansum(bait, axis=0) > 0) & (np.nansum(prey, axis=0) > 0) total_intersection = np.count_nonzero(intersection) if total_intersection > 0: longest_intersection = longest_intersection(intersection.nonzero()[0]) # Require at least three consecutive overlapping data points if longest_intersection > 2: # Prepare total bait and prey profiles & Replace nan with 0 total_bait = np.nan_to_num(bait) total_prey = np.nan_to_num(prey) # Remove non-overlapping segments bait[:, ~intersection] = np.nan prey[:, ~intersection] = np.nan # Remove completely empty peptides bait = bait[(np.nansum(bait, axis=1) > 0), :] prey = prey[(np.nansum(prey, axis=1) > 0), :] # Replace nan with 0 bait = np.nan_to_num(bait) prey = np.nan_to_num(prey) # Require at least one remaining peptide for bait and prey if (bait.shape[0] > 0) and (prey.shape[0] > 0): # Compute cross-correlation scores xcorr_shape, xcorr_shift, xcorr_apex = sec_xcorr(bait, prey) # Compute MIC/TIC scores mic_stat, tic_stat = cstats(bait[:, intersection], prey[:, intersection], est="mic_e") mic = mic_stat.mean(axis=0).mean( ) # Axis 0: summary for prey peptides / Axis 1: summary for bait peptides tic = tic_stat.mean(axis=0).mean( ) # Axis 0: summary for prey peptides / Axis 1: summary for bait peptides # Compute mass similarity score abundance_ratio = mass_similarity(bait, prey) # Compute total mass similarity score total_abundance_ratio = mass_similarity(total_bait, total_prey) # Compute relative intersection score relative_overlap = total_intersection / total_overlap # Compute delta monomer score delta_monomer = np.abs(bait_monomer_sec_id - prey_monomer_sec_id) # Compute apex monomer score apex_monomer = np.min( np.array(bait_monomer_sec_id - xcorr_apex, prey_monomer_sec_id - xcorr_apex)) return ({ 'var_xcorr_shape': xcorr_shape, 'var_xcorr_shift': xcorr_shift, 'var_abundance_ratio': abundance_ratio, 'var_total_abundance_ratio': total_abundance_ratio, 'var_mic': mic, 'var_tic': tic, 'var_sec_overlap': relative_overlap, 'var_sec_intersection': longest_intersection, 'var_delta_monomer': delta_monomer, 'var_apex_monomer': apex_monomer })