import MLR_8regions partition_num = int(sys.argv[1]) reg_lambda = float(sys.argv[2]) maxfun = int(sys.argv[3]) mer_len=6 data = sio.loadmat('../results/N7_A5SS_Model_Predictions/padded_6mer_matrix.mat') A6pos = data['X'] Y = scipy.matrix(data['Y']) partition = np.genfromtxt('../results/N7_A5SS_Model_Predictions/Ten_fold_partition.txt') training_inds = find((partition!=partition_num) & (partition!=((partition_num-1)%10))) test_inds = find(partition==((partition_num-1)%10)) w,wfull,w0,f,d = MLR_8regions.MLR(A6pos[training_inds[:1000],:],Y[training_inds[:1000],:],reg_lambda,mer_len,maxfun=maxfun) B = MLR_8regions.get_energy(A6pos,wfull,w0)[test_inds,:]; # Calculate kl-divergence Ytest = Y[test_inds,:] vv = scipy.sum(np.multiply(B,Ytest),axis=1) Ypl = log(Ytest) Ypl[Ytest==0]=0 Ypl = np.multiply(Ypl,Ytest) vv = vv-scipy.sum(Ypl,axis=1) V = -sum(vv)/float(len(Ytest)) f = open('../results/N7_A5SS_Model_Predictions/Reg8_lambdas.txt','a') f.write(str(partition_num)+'\t'+str(reg_lambda)+'\t'+str(V)+'\n') f.close()
training = (partition != partition_num) if (partition_num == -1): test = (partition > -1) else: test = (partition == partition_num) training_inds = find(training) test_inds = find(test) w, wfull, w0, f, d = MLR_8regions.MLR(A6pos[training_inds, :], Y[training_inds, :], reg_lambda, mer_len, maxfun=maxfun) Ypred = exp(MLR_8regions.get_energy(A6pos, wfull, w0)) MLR_data = { 'Mer_scores': w, 'w0': w0, 'Prediction': Ypred, 'Training': training, 'Test': test, 'Data': Y, 'Reads': reads, 'NotNull': not_null } if not os.path.exists('../results/N7_A5SS_Model_Predictions/Partition' + str(partition_num)): os.makedirs('../results/N7_A5SS_Model_Predictions/Partition' + str(partition_num))
Y = sio.loadmat(full_filepath)[filename].todense() else: Y += sio.loadmat(full_filepath)[filename].todense() GT_inds = sio.loadmat('/net/shendure/vol7/abros/nobackup/Splicing_Project/Library_Files/GT_GC_Positions.mat') Y = np.multiply(Y,GT_inds['inds'].todense()) A6pos = sio.loadmat('../results/N7_A5SS_Model_Predictions/padded_6mer_matrix.mat')['X'] training = np.loadtxt('/net/shendure/vol7/abros/nobackup/Splicing_Project/Library_Files/training.csv')==1 print shape(training) training_inds = find(training==1) Y = scipy.hstack((Y[:,:80],Y[:,-1:])) reads = float64(sum(Y,axis=1)) Y = Y/reads not_null = np.array(reads>0).reshape(len(training)) print shape(not_null) temp_training = (training==True) print shape(temp_training) temp_training[not_null==False] = False temp_training_inds = find(temp_training)[:10000] print 'Starting MLR...' w,wfull,w0,f,d = MLR_8regions.MLR(A6pos[temp_training_inds,:],Y[temp_training_inds,:],reg_lambda,mer_len,maxfun=maxfun) Ypred = exp(MLR_8regions.get_energy(A6pos,wfull,w0)) MLR_data = {'mer_scores':Wss,'w0':w0,'w':w,'Prediction':Ypred,'Training':training,'Data':Y,'Reads':reads,'NotNull':not_null} if not os.path.exists(output_path): os.makedirs(output_path) sio.savemat(output_path + 'Training_data.mat',MLR_data)