def run(TFIntervaldict,pad,threshold,bins): distances = dict() for TF in TFIntervaldict: x = list() for array in TFIntervaldict[TF]: for interval in array: for position in interval: if position[2] != np.inf and position[2] > threshold: x.append((position[0]+position[1]/2)-pad) if len(x) > 0: counts,edges = np.histogram(x, bins=bins) edges = edges[1:] X = np.zeros((len(counts), 2)) X[:,0] = edges X[:,1] = counts w = em.fit(X) start = min(x) stop = max(x) sigma = np.std(x) mu = np.mean(x) N = len(x) y = np.random.uniform(start, stop, N) y = np.linspace(start,stop,N) z = mu/(sigma/math.sqrt(N)) p = 1 - scipy.special.ndtr(z) k = scipy.stats.ks_2samp(x,y) m = scipy.stats.mode(x)[0][0] if -0.25 < m < 0.25: m = 0 else: m = 1 distances[TF] = [w,k[1],p,m,x] return distances
def run(bidirfile, fimodir): distances = dict() directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item] for item in directorylist: print item TF = item.split('/')[6].split('_')[0] x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500) for i in range(len(x)): x[i] = x[i]*1500 if len(x) != 0: counts,edges = np.histogram(x, bins=200) edges = edges[1:] X = np.zeros((len(counts), 2)) X[:,0] = edges X[:,1] = counts w = em.fit(X) w2 = ds2.get_w(X) ks = list() for a in range(1000): d = ds.simulate ks.append(scipy.stats.ks_2samp(x,d)) d = np.mean(ks) start = min(x) stop = max(x) sigma = np.std(x) mu = np.mean(x) N = len(x) y = np.random.uniform(start, stop, N) y = np.linspace(start,stop,N) z = mu/(sigma/math.sqrt(N)) p = 1 - scipy.special.ndtr(z) k = scipy.stats.ks_2samp(x,y) m = scipy.stats.mode(x)[0][0] if -0.25 < m < 0.25: m = 0 else: m = 1 distances[TF] = [w,w2,k[1],d,p,m,x] return distances