def distmatrix(uni,idxs,chosenselections,co,mindist=False, mindist_mode=None, type1char='p',type2char='n'): numframes = uni.trajectory.numframes final_percmat = np.zeros((len(chosenselections),len(chosenselections))) log.info("Distance matrix will be %dx%d (%d elements)" % (len(idxs),len(idxs),len(idxs)**2)) a=1 distmats=[] if mindist: P = [] N = [] Pidxs = [] Nidxs = [] Psizes = [] Nsizes = [] for i in range(len(idxs)): if idxs[i][3][-1] == type1char: P.append(chosenselections[i]) Pidxs.append(idxs[i]) Psizes.append(len(chosenselections[i])) elif idxs[i][3][-1] == type2char: N.append(chosenselections[i]) Nidxs.append(idxs[i]) Nsizes.append(len(chosenselections[i])) else: raise Nsizes = np.array(Nsizes, dtype=np.int) Psizes = np.array(Psizes, dtype=np.int) if mindist_mode == "diff": sets = [(P,N)] sets_idxs = [(Pidxs,Nidxs)] sets_sizes = [(Psizes,Nsizes)] elif mindist_mode == "same": sets = [(P,P),(N,N)] sets_idxs = [(Pidxs,Pidxs),(Nidxs,Nidxs)] sets_sizes = [(Psizes,Psizes),(Nsizes,Nsizes)] elif mindist_mode == "both": sets = [(chosenselections, chosenselections)] sets_idxs = [(idxs, idxs)] sizes = [len(s) for s in chosenselections] sets_sizes = [(sizes,sizes)] else: raise percmats = [] coords = [] for s in sets: coords.append([[],[]]) for ts in uni.trajectory: sys.stdout.write( "Caching coordinates: frame %d / %d (%3.1f%%)\r" % (a,numframes,float(a)/float(numframes)*100.0) ) sys.stdout.flush() a+=1 for si,s in enumerate(sets): if s[0] == s[1]: # triangular case log.info("Caching coordinates...") for group in s[0]: coords[si][0].append(group.coordinates()) coords[si][1].append(group.coordinates()) else: # square case log.info("Caching coordinates...") for group in s[0]: coords[si][0].append(group.coordinates()) for group in s[1]: coords[si][1].append(group.coordinates()) for si,s in enumerate(sets): # recover the final matrix if s[0] == s[1]: this_coords = np.array(np.concatenate(coords[si][0]),dtype=np.float64) inner_loop = LoopDistances(this_coords, this_coords, co) percmats.append(inner_loop.run_triangular_mindist(sets_sizes[si][0])) else: this_coords1 = np.array(np.concatenate(coords[si][0]),dtype=np.float64) this_coords2 = np.array(np.concatenate(coords[si][1]),dtype=np.float64) inner_loop = LoopDistances(this_coords1, this_coords2, co) percmats.append( inner_loop.run_square_mindist(sets_sizes[si][0], sets_sizes[si][1])) for si,s in enumerate(sets): # recover the final matrix Pidxs = sets_idxs[si][0] Nidxs = sets_idxs[si][1] if s[0] == s[1]: # triangular case for j in range(len(s[0])): for k in range(0,j): final_percmat[idxs.index(Pidxs[j]), idxs.index(Pidxs[k])] = percmats[si][j,k] final_percmat[idxs.index(Pidxs[k]), idxs.index(Pidxs[j])] = percmats[si][j,k] else: # square case for j in range(len(s[0])): for k in range(len(s[1])): final_percmat[idxs.index(Pidxs[j]), idxs.index(Nidxs[k])] = percmats[si][j,k] final_percmat[idxs.index(Nidxs[k]), idxs.index(Pidxs[j])] = percmats[si][j,k] final_percmat = np.array(final_percmat, dtype=np.float)/numframes*100.0 else: all_coms = [] for ts in uni.trajectory: sys.stdout.write( "now analyzing: frame %d / %d (%3.1f%%)\r" % (a,numframes,float(a)/float(numframes)*100.0) ) sys.stdout.flush() a+=1 distmat = np.zeros((len(chosenselections),len(chosenselections))) coms = np.zeros([len(chosenselections),3]) for j in range(len(chosenselections)): coms[j,:] = chosenselections[j].centerOfMass() all_coms.append(coms) all_coms = np.concatenate(all_coms) inner_loop = LoopDistances(all_coms, all_coms, co) percmat = inner_loop.run_triangular_distmatrix(coms.shape[0]) distmats = [] final_percmat = np.array(percmat, dtype=np.float)/numframes*100.0 return (final_percmat,distmats)
def dopotential(kbp_atomlist, residues_list, potential_file, seq_dist_co = 0, grof = None, xtcf = None, pdbf = None, uni = None, pdb = None, dofullmatrix = True, kbT=1.0): residues_list = ["ALA","ARG","ASN","ASP","CYS","GLN","GLU","HIS", "ILE","LEU","LYS","MET","PHE","PRO","SER","THR","TRP","TYR","VAL"] # Residues for which the potential is defined: all except G log.info("Loading potential definition . . .") sparses = parse_sparse(potential_file) log.info("Loading input files...") if not pdb or not uni: if not pdbf or not grof or not xtcf: raise ValueError pdb,uni = loadsys(pdbf,grof,xtcf) ok_residues = [] discarded_residues = set() residue_pairs = [] atom_selections = [] ordered_sparses = [] numframes = len(uni.trajectory) for i in range(len(uni.residues)): if uni.residues[i].name in residues_list: ok_residues.append(i) else: discarded_residues.add(uni.residues[i]) continue for j in ok_residues[:-1]: ii = i if not (abs(i-j) < seq_dist_co or uni.residues[ii].segment.name != uni.residues[j].segment.name): if uni.residues[j].name < uni.residues[ii].name: ii,j = j,ii this_sparse = sparses[uni.residues[ii].name][uni.residues[j].name] this_atoms = (kbp_atomlist[uni.residues[ii].name][this_sparse.p1_1], kbp_atomlist[uni.residues[ii].name][this_sparse.p1_2], kbp_atomlist[uni.residues[j].name][this_sparse.p2_1], kbp_atomlist[uni.residues[j].name][this_sparse.p2_2]) try: selected_atoms = mda.core.AtomGroup.AtomGroup((uni.residues[ii].atoms[uni.residues[ii].atoms.names().index(this_atoms[0])], uni.residues[ii].atoms[uni.residues[ii].atoms.names().index(this_atoms[1])], uni.residues[j].atoms[uni.residues[j].atoms.names().index(this_atoms[2])], uni.residues[j].atoms[uni.residues[j].atoms.names().index(this_atoms[3])])) except: log.warning("could not identify essential atoms for the analysis (%s%s, %s%s)" % ( uni.residues[ii].name, uni.residues[ii].id, uni.residues[j].name, uni.residues[j].id )) continue residue_pairs.append((ii,j)) atom_selections.append(selected_atoms) ordered_sparses.append(this_sparse) scores = np.zeros((len(residue_pairs)), dtype=float) a=0 #log.info("the following residues will not be considered: %s" % (", ".join(["%s %s"%(res.name,res.id) for res in discarded_residues] ))) coords = None #coords = atom_selections[0].coordinates() #for sel in atom_selections[1:]: #coords = np.concatenate((coords, sel.coordinates())) for ts in uni.trajectory: tmp_coords = [] sys.stdout.write( "now analyzing: frame %d / %d (%3.1f%%)\r" % (a,numframes,float(a)/float(numframes)*100.0) ) sys.stdout.flush() a+=1 for sel in atom_selections: #coords = np.concatenate((coords, sel.coordinates())) tmp_coords.append(sel.coordinates()) coords = np.array(np.concatenate(tmp_coords),dtype=np.float64) #if coords == None: #coords = tmp_coords #else: #coords = np.concatenate([coords] + [tmp_coords]) #coords = np.array(coords, dtype=np.float64) #coords = np.array(np.concatenate(coords),dtype=np.float64) inner_loop = LoopDistances(coords, coords, None) distances = inner_loop.run_potential_distances(len(atom_selections), 4, 1) scores += calc_potential(distances, ordered_sparses, pdb, uni, seq_dist_co, kbT=kbT) scores /= float(len(uni.trajectory)) outstr = "" for i,s in enumerate(scores): if abs(s) > 0.000001: outstr += "%s-%s%s:%s-%s%s\t%.3f\n" % (pdb.residues[residue_pairs[i][0]].segment.name, pdb.residues[residue_pairs[i][0]].name, pdb.residues[residue_pairs[i][0]].id, pdb.residues[residue_pairs[i][1]].segment.name, pdb.residues[residue_pairs[i][1]].name, pdb.residues[residue_pairs[i][1]].id, s) dm = None if dofullmatrix: dm = np.zeros((len(pdb.residues), len(pdb.residues))) for i,k in enumerate(residue_pairs): dm[k[0],k[1]] = scores[i] dm[k[1],k[0]] = scores[i] return (outstr, dm)