def load_image_data(pdb,load_all=False): g = utils.load_GREMLIN_dataset(pdb) fasta,seq = utils.load_seq(pdb) NATIVE = utils.load_contact_map(pdb) # Sanity checks assert(len(seq) == g.shape[0]) assert(len(seq) == NATIVE.shape[0]) # APC corrections and whiten g = APC_L2(g) N = g.shape[0] IDX = generate_matrix_IDX(N,kernel_window) idx_true_pos = set() idx_true_neg = set() # Load the set of TP and TN for i,j in IDX: if NATIVE[i,j]: idx_true_pos.add((i,j)) else: idx_true_neg.add((i,j)) # Shuffle the contacts idx_true_neg = list(idx_true_neg) np.random.shuffle(idx_true_neg) # If we are only loading a subset of TN, truncate here if load_all: FP_choosen = len(idx_true_neg) else: FP_choosen = int(ratio_TP_to_TN*len(idx_true_pos)) ratio = float(len(idx_true_neg))/ len(idx_true_pos) status_str = "{} {:5d} {:5d} {:0.4f}" print status_str.format(pdb, len(idx_true_pos), FP_choosen, ratio) idx_true_neg = idx_true_neg[:FP_choosen] X0 = generate_feature_vectors(g,seq,idx_true_pos,kernel_window) Y0 = [1,]*len(X0) X1 = generate_feature_vectors(g,seq,idx_true_neg,kernel_window) Y1 = [0,]*len(X1) # Concatenate the two samples and make them a numpy array X = np.array(X0+X1) Y = np.array(Y0+Y1) return X,Y
def compute_predictions(pdb, clf): # Load the GREMLIN data g = utils.load_GREMLIN_dataset(pdb) fasta, seq = utils.load_seq(pdb) # Sanity checks assert len(seq) == g.shape[0] # APC corrections and whiten g = APC_L2(g) N = g.shape[0] IDX = generate_matrix_IDX(N, kernel_window) # Load the native contacts # NATIVE_MATRIX = utils.load_contact_map(pdb) # native = [NATIVE_MATRIX[idx] for idx in IDX] ################################################################# X = generate_feature_vectors(g, seq, IDX, kernel_window) Yp = clf.predict_proba(X)[:, 1] g2 = np.zeros(g.shape) for (i, j), y in zip(IDX, Yp): g2[i, j] = g2[j, i] = y """ # Reorder based off of ranking order = np.argsort(Yp)[::-1] IDX0 = np.array(map(list,IDX)) IDX0 = IDX0[order] W = np.array([G[i,j] for i,j in IDX]) order = np.argsort(W)[::-1] IDX1 = np.array(map(list,IDX)) IDX1 = IDX1[order] """ return g2
def build_constraint_text(pdb, f_prediction): IDX = np.loadtxt(f_prediction).astype(int) fasta,_ = utils.load_seq(pdb) constraint_text = [] for i,j in IDX: atom_i = fasta[i] atom_j = fasta[j] fade = {"res1":i+index_offset, "res2":j+index_offset, "score":FADE_strength} fade["atom1"] = choose_atom(atom_i) fade["atom2"] = choose_atom(atom_j) line = FADE_line.format(**fade) constraint_text.append(line) constraint_text = '\n'.join(constraint_text) return constraint_text