示例#1
0
def run(TFIntervaldict,pad,threshold,bins):
    distances = dict()
    for TF in TFIntervaldict:
        x = list()
        for array in TFIntervaldict[TF]:
            for interval in array:
                for position in interval:
                    if position[2] != np.inf and position[2] > threshold:
                       x.append((position[0]+position[1]/2)-pad)
        if len(x) > 0:
            counts,edges 	= np.histogram(x, bins=bins)
            edges 			= edges[1:]
            X 				= np.zeros((len(counts), 2))
            X[:,0] 			= edges
            X[:,1] 			= counts
            w = em.fit(X)
            start = min(x)
            stop = max(x)
            sigma = np.std(x)
            mu = np.mean(x)
            N = len(x)
            y = np.random.uniform(start, stop, N)
            y = np.linspace(start,stop,N)
            z = mu/(sigma/math.sqrt(N))
            p = 1 - scipy.special.ndtr(z)
            k = scipy.stats.ks_2samp(x,y)
            m = scipy.stats.mode(x)[0][0]
            if -0.25 < m < 0.25:
                m = 0
            else:
                m = 1
            distances[TF] = [w,k[1],p,m,x]
        
    return distances
def run(bidirfile, fimodir):
    
    distances = dict()
    directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item]
    for item in directorylist:
        print item
        TF = item.split('/')[6].split('_')[0]
        x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500)
        for i in range(len(x)):
            x[i] = x[i]*1500
            
        if len(x) != 0:
            counts,edges 	= np.histogram(x, bins=200)
            edges 			= edges[1:]
            X 				= np.zeros((len(counts), 2))
            X[:,0] 			= edges
            X[:,1] 			= counts
            w = em.fit(X)
            w2 = ds2.get_w(X)
            ks = list()
            for a in range(1000):
                d = ds.simulate
                ks.append(scipy.stats.ks_2samp(x,d))
            d = np.mean(ks)
            start = min(x)
            stop = max(x)
            sigma = np.std(x)
            mu = np.mean(x)
            N = len(x)
            y = np.random.uniform(start, stop, N)
            y = np.linspace(start,stop,N)
            z = mu/(sigma/math.sqrt(N))
            p = 1 - scipy.special.ndtr(z)
            k = scipy.stats.ks_2samp(x,y)
            m = scipy.stats.mode(x)[0][0]
            if -0.25 < m < 0.25:
                m = 0
            else:
                m = 1
            distances[TF] = [w,w2,k[1],d,p,m,x]
        
    return distances