def main(): X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i) # print X, xheader, yheader print "Matrix shape: " + str(np.shape(X)) # Step 1 # Rows and columns for which more than 30% of the entries were either undefined or zeros # were removed from the matrix print "Removing NAs..." X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader, 0.65) # print X_step1, xheader, yheader print "Matrix shape: " + str(np.shape(X_step1)) # Step 2 # Determine whether to transpose matrix if args.c: # Transpose matrix print "Transposing matrix..." X_step2 = np.transpose(X_step1) else: X_step2 = X_step1 # Step 3 # Transform matrix to zscores by row print "Transforming rows to zscores..." X_step3 = stats.zscore(X_step2, axis=1) # Step 4 # Train hmm model print "Training HMM" model = hmm.GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000) model.fit(X_step3) print "HMM output" classes = model.predict(X_step3) if args.c: print "# of columns: " + str(len(xheader)) print "# of class entries: " + str(len(classes)) OUT = open("even_compartments.tab", "w") for i in range(len(xheader)): OUT.write(xheader[i] + "\t" + str(classes[i]) + "\n") OUT.close() else: print "# of rows: " + str(len(yheader)) print "# of class entries: " + str(len(classes)) OUT = open("odd_compartments.tab", "w") for i in range(len(yheader)): OUT.write(yheader[i] + "\t" + str(classes[i]) + "\n") OUT.close()
def main(): X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i) #print X, xheader, yheader print 'Matrix shape: ' + str(np.shape(X)) # Step 1 # Rows and columns for which more than 30% of the entries were either undefined or zeros # were removed from the matrix print 'Removing NAs...' X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader,.65) #print X_step1, xheader, yheader print 'Matrix shape: ' + str(np.shape(X_step1)) #Calculate contact enrichment contact_enrich = np.zeros((5,5), dtype=object) for i in range(len(contact_enrich)): for j in range(len(contact_enrich[i])): contact_enrich[i,j] = [] ODD_FH = open(args.o, 'r') EVEN_FH = open(args.e, 'r') odd = [] even = [] for line in ODD_FH: odd.append(int(line.split()[1])) ODD_FH.close() for line in EVEN_FH: even.append(int(line.split()[1])) EVEN_FH.close() # Size check if len(odd) != len(X_step1) or len(even) != len(X_step1[0,:]): print 'ERROR: size mismatches exist' quit() for i in range(len(X_step1)): if i % 1000 == 0: print 'On row: ' + str(i) for j in range(len(X_step1[0,:])): odd_class = odd[i] even_class = even[j] contact_enrich[odd_class, even_class].append(X_step1[i,j]) for i in range(len(contact_enrich)): for j in range(len(contact_enrich[0,:])): contact_enrich[i,j] = np.mean(contact_enrich[i,j]) np.savetxt('contact_enrichment.tab', contact_enrich, delimiter='\t', fmt='%1.2f')
def main(): parser = argparse.ArgumentParser( description= 'Calculate contact enrichment to merge odd/even subcompartment calls') parser.add_argument('-i', help='input interaction matrix file', type=str, required=True) parser.add_argument( '-o', help='odd chrom subcompartment calls (ex. odd_compartments.tab)', type=str, required=True) parser.add_argument( '-e', help='even chrom subcompartment calls (ex. even_compartments.tab)', type=str, required=True) args = parser.parse_args() X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i) #print X, xheader, yheader print 'Matrix shape: ' + str(np.shape(X)) # Step 1 # Rows and columns for which more than 30% of the entries were either undefined or zeros # were removed from the matrix print 'Removing NAs...' X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader, .65) #print X_step1, xheader, yheader print 'Matrix shape: ' + str(np.shape(X_step1)) #Calculate contact enrichment contact_enrich = np.zeros((5, 5), dtype=object) for i in range(len(contact_enrich)): for j in range(len(contact_enrich[i])): contact_enrich[i, j] = [] ODD_FH = open(args.o, 'r') EVEN_FH = open(args.e, 'r') odd = [] even = [] for line in ODD_FH: odd.append(int(line.split()[1])) ODD_FH.close() for line in EVEN_FH: even.append(int(line.split()[1])) EVEN_FH.close() # Size check if len(odd) != len(X_step1) or len(even) != len(X_step1[0, :]): print 'ERROR: size mismatches exist' quit() for i in range(len(X_step1)): if i % 1000 == 0: print 'On row: ' + str(i) for j in range(len(X_step1[0, :])): odd_class = odd[i] even_class = even[j] contact_enrich[odd_class, even_class].append(X_step1[i, j]) for i in range(len(contact_enrich)): for j in range(len(contact_enrich[0, :])): contact_enrich[i, j] = np.mean(contact_enrich[i, j]) np.savetxt('contact_enrichment.tab', contact_enrich, delimiter='\t', fmt='%1.2f')
def main(): parser = argparse.ArgumentParser( description='Perform Rao et al. (2014) HMM subcompartment analysis') parser.add_argument('-i', help='input interaction matrix file', type=str, required=True) parser.add_argument('-c', help='run HMM on columns/even chromosomes)', type=bool, default=False) args = parser.parse_args() X, xheader, yheader = mf.dekker_2_numpy_matrix(args.i) #print X, xheader, yheader print 'Matrix shape: ' + str(np.shape(X)) # Step 1 # Rows and columns for which more than 30% of the entries were either undefined or zeros # were removed from the matrix print 'Removing NAs...' X_step1, xheader, yheader = mf.remove_NA_zeros(X, xheader, yheader, .65) #print X_step1, xheader, yheader print 'Matrix shape: ' + str(np.shape(X_step1)) # Step 2 # Determine whether to transpose matrix if args.c: # Transpose matrix print 'Transposing matrix...' X_step2 = np.transpose(X_step1) else: X_step2 = X_step1 # Step 3 # Transform matrix to zscores by row print 'Transforming rows to zscores...' X_step3 = stats.zscore(X_step2, axis=1) # Step 4 # Train hmm model print 'Training HMM' model = hmm.GaussianHMM(n_components=5, covariance_type='diag', n_iter=1000) model.fit(X_step3) print 'HMM output' classes = model.predict(X_step3) if args.c: print '# of columns: ' + str(len(xheader)) print '# of class entries: ' + str(len(classes)) OUT = open('even_compartments.tab', 'w') for i in range(len(xheader)): OUT.write(xheader[i] + '\t' + str(classes[i]) + '\n') OUT.close() else: print '# of rows: ' + str(len(yheader)) print '# of class entries: ' + str(len(classes)) OUT = open('odd_compartments.tab', 'w') for i in range(len(yheader)): OUT.write(yheader[i] + '\t' + str(classes[i]) + '\n') OUT.close()