def run(bidirfile, fimodir): distances = dict() directorylist = [fimodir + '/' + item for item in os.listdir(fimodir) if 'fimo_out' in item] for item in directorylist: print item TF = item.split('/')[6].split('_')[0] x = Functions.get_distances_pad_v3(bidirfile, item + "/fimo.cut.txt", True, 1500) for i in range(len(x)): x[i] = x[i]*1500 if len(x) != 0: counts,edges = np.histogram(x, bins=200) edges = edges[1:] X = np.zeros((len(counts), 2)) X[:,0] = edges X[:,1] = counts w = em.fit(X) w2 = ds2.get_w(X) ks = list() for a in range(1000): d = ds.simulate ks.append(scipy.stats.ks_2samp(x,d)) d = np.mean(ks) start = min(x) stop = max(x) sigma = np.std(x) mu = np.mean(x) N = len(x) y = np.random.uniform(start, stop, N) y = np.linspace(start,stop,N) z = mu/(sigma/math.sqrt(N)) p = 1 - scipy.special.ndtr(z) k = scipy.stats.ks_2samp(x,y) m = scipy.stats.mode(x)[0][0] if -0.25 < m < 0.25: m = 0 else: m = 1 distances[TF] = [w,w2,k[1],d,p,m,x] return distances
filedir = 'C:/Users/Jonathan/Google Drive/Colorado University/Taatjes-Dowell Lab/BidirectionalTFAnalyzer/Danko2013_E2_25-1_bidirectional_hits_intervals.EM.txt' TF = 'FOXA1' with open(filedir) as file1: for line in file1: if TF in line: y = line.strip().split()[5].split(',')[:-1] x = list() for item in y: x.append(float(item)) bins = 200 #F = plt.figure() #ax1 = F.add_subplot(1,2,1) #ax2 = F.add_subplot(1,2,2) #ax1.hist(x,bins=bins) # #counts,edges = np.histogram(x, bins=bins) #X = np.zeros((bins,2)) #X[:,0] = edges[1:] #X[:,1] = counts counts,edges = np.histogram(x, bins=200) edges = edges[1:] X = np.zeros((len(counts), 2)) X[:,0] = edges X[:,1] = counts print ds.get_w(X)