def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains): # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index # per residue, since it's probably generated by make_ndx) ndx_atoms = res_selection.read_ndx(ndx_file) # Map them to each affected residue so we just get the residue numbers back selection = res_selection.res_select(start, ndx_atoms) n = int(n) # number of points in the string, including start and end point use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending dihedrals for later interpolation startpts = readxvg.readxvg(start_xvg, selection) endpts = readxvg.readxvg(end_xvg, selection) else: # Have to generate the dihedrals ourselves from the given initial structures # Note: when we get an initial_confs[] array, we use it for all points and # the start/end input parameters are completely ignored # TODO: assert that len(initial_confs) == n otherwise? ramaprocs = {} # Run g_rama (in parallel) on each structure and output to a temporary .xvg FNULL = open(os.devnull, 'w') # dont generate spam from g_rama for i in range(n): # TODO: check for and use g_rama_mpi.. like everywhere else ramaprocs[i] = Popen(['g_rama', '-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i], stdout=FNULL, stderr=FNULL) # Go through the output from the rama sub-processes and read the xvg outputs stringpts = {} # Will have 4 levels: stringpoint, residue, chain, phi/psi value for i in range(n): # Start array indexed by residue xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i) # Make sure the corresponding g_rama task has ended ramaprocs[i].communicate() # Read back and parse like for the start/end_xvg above stringpts[i] = readxvg.readxvg(xvg_i, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k,'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the dihedrals for each point for k in range(n): for mol in range(Nchains): # TODO: use with statement for restraint_itp as well restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) # write the initial part of the topology file # Note: gromacs 4.6+ required restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top,'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr; except KeyError: dih_atoms[a.resnr] = { a.atomname: a.atomnr } # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. for r in selection: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # Write phi, psi angles and the associated k factor into a row in the restraint file # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Also see reparametrize.py if use_interpolation: # k is from 0 to n-1, so map it so we get a factor from 0 to 1 phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1) psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1) else: # Use the values extracted from the initial_confs[] structures above phi_val = stringpts[k][r][mol][0] psi_val = stringpts[k][r][mol][1] # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later. KFAC is normally # a %8.4f number. restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)) restraint_itp.close()
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg, top): Nswarms = len(diheds[0]) rsel = res_selection.res_select('%s'%start_conf,'%s'%selection) # calculate average drift in collective variables space sys.stderr.write('Residue selection: %s' %rsel) newpts = [] for interp in range(len(diheds)): avg = [] for r in rsel: driftList = [] for i in range(len(diheds[interp])): vec=[] xvg = open(diheds[interp][i],'r') for line in xvg: if re.search(r'\-%d\n'%r,line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) vec+=[phi_val,psi_val] driftList.append(vec) # driftList has phi,psi values for residue in every swarm avg+=[scale((1/float(Nswarms)),reduce(mapadd,driftList))] newpts+=avg # extract initial and target dihedral values initpt = [] for r in rsel: xvg = open(start_xvg,'r') for line in xvg: if re.search(r'\-%d\n'%r,line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) initpt+=[phi_val,psi_val] targetpt = [] for r in rsel: xvg = open(end_xvg,'r') for line in xvg: if re.search(r'\-%d\n'%r,line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) targetpt+=[phi_val,psi_val] # something with 1 indexing makes this padding necessary. paddingpt=[0]*len(initpt) newpts.insert(0,initpt) newpts.append(targetpt) newpts.append(paddingpt) sys.stderr.write('The new list of points is: %s\n' %newpts) for pt in newpts: sys.stderr.write('%s %s\n'%(pt[0],pt[1])) adjusted=rep_pts(newpts) # TODO implement a dist_treshold=1.0 iters=[adjusted] for i in range(100): iters.append(rep_pts(iters[i])) adjusted=iters[-1] # delete the padding point adjusted=adjusted[:-1] sys.stderr.write('The adjusted points are:\n') for pt in adjusted: sys.stderr.write('%s %s\n'%(pt[0],pt[1])) # calculate reparam distance # TODO measure the distance between the reparametrized points and the input points # write the topology for the next iteration # treat the reparam values as a stack for k in range(1,len(adjusted)-1): for chain in range(Nchains): restraint_itp=open('topol_%d_chain_%d.top'%(k,chain),'w') in_itp=open(include[k][chain], 'w') moltop=restraint_itp.split('[ dihedral_restraints ]')[0] restraint_itp.write('%s'%moltop) sys.stderr.write("Writing restraints for interpolant point %d chain %d\n"%(k,chain)) restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type label phi dphi kfac power\n") pathpoint=adjusted[k] # just a list of phi/psi angles protein=molecule('%s'%include[k][chain]) # keep track of the position in the path point pos=0 for r in rsel: # there may be multiple residues matching the resnr, e.g., dimers phi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r)-1 and a.atomname == 'C')] psi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r)+1 and a.atomname == 'N')] # get phi and psi values from the reparametrization vector phi_val=stack[pos+chain] psi_val=stack[pos+chain+1] # write phi, psi angles restraint_itp.write("%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n" %(phi[0],phi[1],phi[2],phi[3],1,1,phi_val,0,1,2)) restraint_itp.write("%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n" %(psi[0],psi[1],psi[2],psi[3],1,1,psi_val,0,1,2)) #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr, # phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2)) #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr, # psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2)) # delete the already added values from the stack pos+=2*Nchains restraint_itp.close()
def reparametrize(use_posres, fix_endpoints, cvs, ndx_file, Nchains, start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top, includes): Nswarms = len(cvs[0]) ndx_atoms = res_selection.read_ndx(ndx_file) # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have # to first expand the index so it covers all chains. # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering. # We can detect the chain-repeat in rwgro, by looking for repeating first residue name. # Hardcode a repeat for testing for now. if use_posres == 0: # Map atoms to residues for the dihedral selection rsel = res_selection.res_select('%s' % start_conf, ndx_atoms) #sys.stderr.write('Residue selection: %s' %rsel) # else: # selected_atoms = [] # for ch in range(5): # for i in range(len(ndx_atoms)): # selected_atoms += [ ndx_atoms[i] + ch * 5566 ] # Calculate the average drift in CV space # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length) newpts = [] # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed, # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below. for pathpt in range(len(cvs)): swarmpts = [] for i in range(len(cvs[pathpt])): if use_posres == 1: zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms) #sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt))) else: zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel) swarmpts.append(zpt) zptsum = reduce(mapadd, swarmpts) avgdrift = scale((1 / float(Nswarms)), zptsum) newpts.append(avgdrift) # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will # be allowed to drift just like the other points, and they will already then be a part of the newpts array) if fix_endpoints == 1: if use_posres == 1: # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/ # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number. initpt = rwgro.readgro_flat(start_conf, ndx_atoms) targetpt = rwgro.readgro_flat(end_conf, ndx_atoms) else: initpt = readxvg.readxvg_flat(start_xvg, rsel) targetpt = readxvg.readxvg_flat(end_xvg, rsel) sys.stderr.write('Length of initpt %d, targetpt %d\n' % (len(initpt), len(targetpt))) # Insert the start/end in the beginning and last of newpts newpts.insert(0, initpt) newpts.append(targetpt) # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore paddingpt = [0] * len(newpts[0]) newpts.append(paddingpt) # Do the actual reparameterization # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted # points in [1] # Initial iteration rep_it1 = ext_rep_pts(newpts) adjusted = rep_it1[1] # get the points only, ignore the spread result # Keep iterating, feeding the result of the previous result into rep_pts again # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts. # We can abort early when the maximum spread between points in the updated string goes # below a threshold iters = [adjusted] i = 0 maxspread = 100.0 # Do max 150 iterations even if we don't reach our goal while i < 150 and maxspread > 0.012: sys.stderr.write('Rep iter %d: \n' % i) sys.stderr.flush() rep_it = ext_rep_pts(iters[i]) maxspread = rep_it[0] sys.stderr.write(' maxspread was %f\n' % maxspread) # Remember the adjusted points iters.append(rep_it[1]) i = i + 1 sys.stderr.write('Final maximum spread %f after %d iterations.\n' % (maxspread, i)) # Get the final iteration's result adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] newpts = newpts[:-1] #sys.stderr.write('Pts before repa:\n %s\n' % newpts) #sys.stderr.write('The adjusted pts:\n %s\n' % adjusted) # Possibility to test skipping reparametrize by uncommenting the next row. # The stringpoints will drift along the string and probably end up in the # endpoints or a minima along the string. #adjusted = newpts # calculate reparam distance sys.stderr.write('Length of the adjusted vector: %d\n' % len(adjusted)) # TODO Nchains should depend on the specific residue (?) # Given as function argument now. #Nchains = len(initpt) / (2 * len(rsel)) # write the CV control data for the next iteration # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint. # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain. # for k in range(len(adjusted)): # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is # just bypassed in the caller script if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)): continue if use_posres == 1: # Open the output resconf which will go into the next iteration as minimization target with open('rep_resconf_%d.gro' % k, 'w') as rep_resconf: # Open and read the previous (input) resconf, which has basically tagged along since the last # reparametrization step (or was set initially at swarm-start) with open(last_resconfs[k], 'r') as in_resconf_f: in_resconf = in_resconf_f.readlines() # TODO: maybe this chunk of code could be done by the rwgro module for us. # Copy the first 2 rows (title and number of atoms) straight over rep_resconf.write(in_resconf[0]) rep_resconf.write(in_resconf[1]) # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize # step moved # Note: we are only copying over positions here. The velocities are not needed as the use for these files # will only be as a base for the next iterations position restraint coordinates. pathpoint = adjusted[ k] # the 1-D list of CVs (positions): x,y,z * nbr atoms in index if len(pathpoint) != (1555 * 3): # assert on GLIC length (TODO) sys.stderr.write('adjusted[] entry of wrong length %d\n' % len(pathpoint)) cvpos = 0 for line in in_resconf[2:][:-1]: resname = line[ 0: 8] # python-ranges are inclusive the first index and exclusive the second... atname = line[8:15] atomnr = int(line[15:20]) x = float(line[20:28]) y = float(line[28:36]) z = float(line[36:44]) if atomnr in ndx_atoms: # Update to new coords x = pathpoint[cvpos] y = pathpoint[cvpos + 1] z = pathpoint[cvpos + 2] cvpos += 3 # Write out the row, updated or not rep_resconf.write('%s%s%5d%8.3f%8.3f%8.3f\n' % (resname, atname, atomnr, x, y, z)) # Copy the last row which was the cell dimensions rep_resconf.write(in_resconf[len(in_resconf) - 1]) else: for chain in range(Nchains): with open('res_%d_chain_%d.itp' % (k, chain), 'w') as restraint_itp: with open(includes[k][chain], 'r') as in_itpf: in_itp = in_itpf.read() moltop = in_itp.split('[ dihedral_restraints ]')[0] restraint_itp.write('%s' % moltop) sys.stderr.write( "Writing restraints for stringpoint %d chain %d\n" % (k, chain)) # Note: this format is for Gromacs 4.6+ restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write( "; ai aj ak al type phi dphi kfac phiB dphiB kfacB\n" ) pathpoint = adjusted[k] # just a list of phi/psi angles if Nchains == 1: protein = molecule(top) else: protein = molecule('%s' % includes[k][chain]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr except KeyError: dih_atoms[a.resnr] = {a.atomname: a.atomnr} # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. pos = 0 for r in rsel: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # get phi and psi values from the reparametrization vector phi_val = pathpoint[pos + chain] psi_val = pathpoint[pos + chain + 1] # Go to the next residue (phi,phi vals * number of chains apart) pos += 2 * Nchains # write phi, psi angles and k-factor # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg, top): Nswarms = len(diheds[0]) rsel = res_selection.res_select('%s'%start_conf,'%s'%selection) # calculate average drift in collective variables space sys.stderr.write('Residue selection: %s' %rsel) newpts = [] for interp in range(len(diheds)): avg = [] for r in rsel: driftList = [] for i in range(len(diheds[interp])): vec=[] xvg = open(diheds[interp][i],'r') for line in xvg: if re.search(r'\-%d\n'%r,line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) vec+=[phi_val,psi_val] driftList.append(vec) # driftList has phi,psi values for residue in every swarm driftdat=open('dihedrals%d.dat'%interp,'w') for pt in driftList: driftdat.write('%f %f\n'%(pt[0],pt[1])) avg+=[scale((1/float(Nswarms)),reduce(mapadd,driftList))] newpts+=avg # extract initial and target dihedral values initpt = [] for r in rsel: xvg = open(start_xvg,'r') for line in xvg: if re.search(r'\-%d\n'%r,line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) initpt+=[phi_val,psi_val] targetpt = [] for r in rsel: xvg = open(end_xvg,'r') for line in xvg: if re.search(r'\-%d\n'%r,line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) targetpt+=[phi_val,psi_val] # something with 1 indexing makes this padding necessary. paddingpt=[0]*len(initpt) newpts.insert(0,initpt) newpts.append(targetpt) newpts.append(paddingpt) sys.stderr.write('The new list of points is: %s\n' %newpts) for pt in newpts: sys.stderr.write('%s %s\n'%(pt[0],pt[1])) adjusted=rep_pts(newpts) # TODO implement a dist_treshold=1.0 iters=[adjusted] for i in range(100): iters.append(rep_pts(iters[i])) adjusted=iters[-1] # delete the padding point adjusted=adjusted[:-1] sys.stderr.write('The adjusted points are:\n') for pt in adjusted: sys.stderr.write('%s %s\n'%(pt[0],pt[1])) # calculate reparam distance # TODO measure the distance between the reparametrized points and the input points # write the topology for the next iteration # treat the reparam values as a stack # temporary additional restraints for alanine dipeptide theta_val=[1.6, 1.48, 1.36, 1.24, 1.12, 1.0, 0.8799999999999999, 0.7599999999999999, 0.6399999999999999, 0.5199999999999998, 0.3999999999999999, 0.2799999999999998, 0.1599999999999997, 0.039999999999999813, -0.0800000000000003, -0.20000000000000018, -0.3200000000000003, -0.4400000000000004, -0.5600000000000005, -0.8] zeta_val=[-4.3, -3.8, -3.3, -2.8, -2.3, -1.7999999999999998, -1.2999999999999998, -0.7999999999999998, -0.2999999999999998, 0.20000000000000018, 0.7000000000000002, 1.2000000000000002, 1.7000000000000002, 2.2, 2.7, 3.2, 3.7, 4.2, 4.7, 5.7] top=open(top,'r').read().split('#include dihedral_restraints') for k in range(1,len(adjusted)-1): newtop=open('%d.top'%k,'w') newtop.write('%s'%top[0]) sys.stderr.write("Writing restraints for interpolant index %i\n" %k) newtop.write("[ dihedral_restraints ]\n") newtop.write("; ai aj ak al type label phi dphi kfac power\n") stack=adjusted[k] protein = res_selection.protein('%s'%start_conf) for r in rsel: # there may be multiple residues matching the resnr, e.g., dimers phi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r)-1 and a.atomname == 'C')] psi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r)+1 and a.atomname == 'N')] # get phi and psi values from the reparametrization vector numres = len(phi)/4 for i in range(numres): phi_val=stack[i] psi_val=stack[i+1] # write phi, psi angles # TODO EXPLICIT DIHEDRALS FOR ALANINE DIPEPTIDE newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(5,7,9,15,1,1,phi_val,0,1,2)) newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(7,9,15,17,1,1,psi_val,0,1,2)) newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(1,5,7,9,1,1,theta_val[k],0,1,2)) newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(9,15,17,19,1,1,zeta_val[k],0,1,2)) #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr, # phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2)) #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr, # psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2)) # delete the already added values from the stack stack = stack[numres*2-1:] newtop.write('%s'%top[1])
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx, Nchains): start_xvg = open(start_xvg, 'r').readlines() end_xvg = open(end_xvg, 'r').readlines() selection = res_selection.res_select(start, ndx) n = int(n) # create the path startpts = {} endpts = {} for r in selection: startpts[r] = [] chain = 0 for line in start_xvg: if re.search(r'\-%s$' % r, line): startpts[r].append( [float(line.split()[0]), float(line.split()[1])]) chain += 1 for r in selection: endpts[r] = [] chain = 0 for line in end_xvg: if re.search(r'\-%s$' % r, line): endpts[r].append( [float(line.split()[0]), float(line.split()[1])]) chain += 1 sys.stderr.write('%s' % includes) for k in range(1, n - 1): in_top = open(top).read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'dihre_%d_chain_%d.itp' % (k, mol), in_top) out_top = open('topol_%d.top' % k, 'w') # sys.stderr.write('%s'%in_top) out_top.write(in_top) for k in range(1, n - 1): # make the directory for the restraints for mol in range(Nchains): restraint_itp = open('dihre_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: moltop = open(includes[mol]).read() restraint_itp.write(moltop) # write the initial part of the topology file restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: out_top = open('topol_%d.top' % k, 'w') protein = molecule(top) in_itp = open( top, 'r').read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "dihre_%d_chain_%d.itp"\n' % (k, mol)) #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol)) out_top.write(in_itp[1]) out_top.close() for r in selection: phi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r) - 1 and a.atomname == 'C') ] psi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r) + 1 and a.atomname == 'N') ] # write phi, psi angles phi_val = startpts[r][mol][0] + (endpts[r][mol][0] - startpts[r][mol][0]) / n * k psi_val = startpts[r][mol][1] + (endpts[r][mol][1] - startpts[r][mol][1]) / n * k restraint_itp.write( "%5d%5d%5d%5d %8.4f%5d%5d\n" % (phi[0].atomnr, phi[1].atomnr, phi[2].atomnr, phi[3].atomnr, phi_val, 0, 1)) restraint_itp.write( "%5d%5d%5d%5d %8.4f%5d%5d\n" % (psi[0].atomnr, psi[1].atomnr, psi[2].atomnr, psi[3].atomnr, psi_val, 0, 1)) restraint_itp.close()
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg, top): Nswarms = len(diheds[0]) rsel = res_selection.res_select('%s' % start_conf, '%s' % selection) # calculate average drift in collective variables space sys.stderr.write('Residue selection: %s' % rsel) newpts = [] for interp in range(len(diheds)): avg = [] for r in rsel: driftList = [] for i in range(len(diheds[interp])): vec = [] xvg = open(diheds[interp][i], 'r') for line in xvg: if re.search(r'\-%d\n' % r, line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) vec += [phi_val, psi_val] driftList.append(vec) # driftList has phi,psi values for residue in every swarm avg += [scale((1 / float(Nswarms)), reduce(mapadd, driftList))] newpts += avg # extract initial and target dihedral values initpt = [] for r in rsel: xvg = open(start_xvg, 'r') for line in xvg: if re.search(r'\-%d\n' % r, line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) initpt += [phi_val, psi_val] targetpt = [] for r in rsel: xvg = open(end_xvg, 'r') for line in xvg: if re.search(r'\-%d\n' % r, line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) targetpt += [phi_val, psi_val] # something with 1 indexing makes this padding necessary. paddingpt = [0] * len(initpt) newpts.insert(0, initpt) newpts.append(targetpt) newpts.append(paddingpt) sys.stderr.write('The new list of points is: %s\n' % newpts) for pt in newpts: sys.stderr.write('%s %s\n' % (pt[0], pt[1])) adjusted = rep_pts(newpts) # TODO implement a dist_treshold=1.0 iters = [adjusted] for i in range(100): iters.append(rep_pts(iters[i])) adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] sys.stderr.write('The adjusted points are:\n') for pt in adjusted: sys.stderr.write('%s %s\n' % (pt[0], pt[1])) # calculate reparam distance # TODO measure the distance between the reparametrized points and the input points # write the topology for the next iteration # treat the reparam values as a stack for k in range(1, len(adjusted) - 1): for chain in range(Nchains): restraint_itp = open('topol_%d_chain_%d.top' % (k, chain), 'w') in_itp = open(include[k][chain], 'w') moltop = restraint_itp.split('[ dihedral_restraints ]')[0] restraint_itp.write('%s' % moltop) sys.stderr.write( "Writing restraints for interpolant point %d chain %d\n" % (k, chain)) restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write( "; ai aj ak al type label phi dphi kfac power\n") pathpoint = adjusted[k] # just a list of phi/psi angles protein = molecule('%s' % include[k][chain]) # keep track of the position in the path point pos = 0 for r in rsel: # there may be multiple residues matching the resnr, e.g., dimers phi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r) - 1 and a.atomname == 'C') ] psi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r) + 1 and a.atomname == 'N') ] # get phi and psi values from the reparametrization vector phi_val = stack[pos + chain] psi_val = stack[pos + chain + 1] # write phi, psi angles restraint_itp.write( "%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n" % (phi[0], phi[1], phi[2], phi[3], 1, 1, phi_val, 0, 1, 2)) restraint_itp.write( "%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n" % (psi[0], psi[1], psi[2], psi[3], 1, 1, psi_val, 0, 1, 2)) #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr, # phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2)) #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr, # psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2)) # delete the already added values from the stack pos += 2 * Nchains restraint_itp.close()
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg, top): Nswarms = len(diheds[0]) rsel = res_selection.res_select('%s' % start_conf, '%s' % selection) # calculate average drift in collective variables space sys.stderr.write('Residue selection: %s' % rsel) newpts = [] for interp in range(len(diheds)): avg = [] for r in rsel: driftList = [] for i in range(len(diheds[interp])): vec = [] xvg = open(diheds[interp][i], 'r') for line in xvg: if re.search(r'\-%d\n' % r, line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) vec += [phi_val, psi_val] driftList.append(vec) # driftList has phi,psi values for residue in every swarm driftdat = open('dihedrals%d.dat' % interp, 'w') for pt in driftList: driftdat.write('%f %f\n' % (pt[0], pt[1])) avg += [scale((1 / float(Nswarms)), reduce(mapadd, driftList))] newpts += avg # extract initial and target dihedral values initpt = [] for r in rsel: xvg = open(start_xvg, 'r') for line in xvg: if re.search(r'\-%d\n' % r, line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) initpt += [phi_val, psi_val] targetpt = [] for r in rsel: xvg = open(end_xvg, 'r') for line in xvg: if re.search(r'\-%d\n' % r, line): phi_val = float(line.split()[0]) psi_val = float(line.split()[1]) targetpt += [phi_val, psi_val] # something with 1 indexing makes this padding necessary. paddingpt = [0] * len(initpt) newpts.insert(0, initpt) newpts.append(targetpt) newpts.append(paddingpt) sys.stderr.write('The new list of points is: %s\n' % newpts) for pt in newpts: sys.stderr.write('%s %s\n' % (pt[0], pt[1])) adjusted = rep_pts(newpts) # TODO implement a dist_treshold=1.0 iters = [adjusted] for i in range(100): iters.append(rep_pts(iters[i])) adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] sys.stderr.write('The adjusted points are:\n') for pt in adjusted: sys.stderr.write('%s %s\n' % (pt[0], pt[1])) # calculate reparam distance # TODO measure the distance between the reparametrized points and the input points # write the topology for the next iteration # treat the reparam values as a stack # temporary additional restraints for alanine dipeptide theta_val = [ 1.6, 1.48, 1.36, 1.24, 1.12, 1.0, 0.8799999999999999, 0.7599999999999999, 0.6399999999999999, 0.5199999999999998, 0.3999999999999999, 0.2799999999999998, 0.1599999999999997, 0.039999999999999813, -0.0800000000000003, -0.20000000000000018, -0.3200000000000003, -0.4400000000000004, -0.5600000000000005, -0.8 ] zeta_val = [ -4.3, -3.8, -3.3, -2.8, -2.3, -1.7999999999999998, -1.2999999999999998, -0.7999999999999998, -0.2999999999999998, 0.20000000000000018, 0.7000000000000002, 1.2000000000000002, 1.7000000000000002, 2.2, 2.7, 3.2, 3.7, 4.2, 4.7, 5.7 ] top = open(top, 'r').read().split('#include dihedral_restraints') for k in range(1, len(adjusted) - 1): newtop = open('%d.top' % k, 'w') newtop.write('%s' % top[0]) sys.stderr.write("Writing restraints for interpolant index %i\n" % k) newtop.write("[ dihedral_restraints ]\n") newtop.write( "; ai aj ak al type label phi dphi kfac power\n") stack = adjusted[k] protein = res_selection.protein('%s' % start_conf) for r in rsel: # there may be multiple residues matching the resnr, e.g., dimers phi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r) - 1 and a.atomname == 'C') ] psi = [ a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r) + 1 and a.atomname == 'N') ] # get phi and psi values from the reparametrization vector numres = len(phi) / 4 for i in range(numres): phi_val = stack[i] psi_val = stack[i + 1] # write phi, psi angles # TODO EXPLICIT DIHEDRALS FOR ALANINE DIPEPTIDE newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" % (5, 7, 9, 15, 1, 1, phi_val, 0, 1, 2)) newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" % (7, 9, 15, 17, 1, 1, psi_val, 0, 1, 2)) newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" % (1, 5, 7, 9, 1, 1, theta_val[k], 0, 1, 2)) newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" % (9, 15, 17, 19, 1, 1, zeta_val[k], 0, 1, 2)) #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr, # phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2)) #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr, # psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2)) # delete the already added values from the stack stack = stack[numres * 2 - 1:] newtop.write('%s' % top[1])
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains): cmdnames = cmds.GromacsCommands() # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index # per residue, since it's probably generated by make_ndx) ndx_atoms = res_selection.read_ndx(ndx_file) # Map them to each affected residue so we just get the residue numbers back selection = res_selection.res_select(start, ndx_atoms) n = int(n) # number of points in the string, including start and end point use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending dihedrals for later interpolation startpts = readxvg.readxvg(start_xvg, selection) endpts = readxvg.readxvg(end_xvg, selection) else: # Have to generate the dihedrals ourselves from the given initial structures # Note: when we get an initial_confs[] array, we use it for all points and # the start/end input parameters are completely ignored # TODO: assert that len(initial_confs) == n otherwise? ramaprocs = {} # Run g_rama (in parallel) on each structure and output to a temporary .xvg FNULL = open(os.devnull, 'w') # dont generate spam from g_rama for i in range(n): # TODO: check for and use g_rama_mpi.. like everywhere else cmd = cmdnames.rama.split() + ['-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i] ramaprocs[i] = Popen(cmd, stdout=FNULL, stderr=FNULL) # Go through the output from the rama sub-processes and read the xvg outputs stringpts = {} # Will have 4 levels: stringpoint, residue, chain, phi/psi value for i in range(n): # Start array indexed by residue xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i) # Make sure the corresponding g_rama task has ended ramaprocs[i].communicate() # Read back and parse like for the start/end_xvg above stringpts[i] = readxvg.readxvg(xvg_i, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k,'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the dihedrals for each point for k in range(n): for mol in range(Nchains): # TODO: use with statement for restraint_itp as well restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) # write the initial part of the topology file # Note: gromacs 4.6+ required restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top,'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr; except KeyError: dih_atoms[a.resnr] = { a.atomname: a.atomnr } # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. for r in selection: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # Write phi, psi angles and the associated k factor into a row in the restraint file # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Also see reparametrize.py if use_interpolation: # k is from 0 to n-1, so map it so we get a factor from 0 to 1 phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1) psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1) else: # Use the values extracted from the initial_confs[] structures above phi_val = stringpts[k][r][mol][0] psi_val = stringpts[k][r][mol][1] # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later. KFAC is normally # a %8.4f number. restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)) restraint_itp.close()
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx, Nchains): start_xvg=open(start_xvg,'r').readlines() end_xvg=open(end_xvg,'r').readlines() selection=res_selection.res_select(start,ndx) n=int(n) # create the path startpts={} endpts={} for r in selection: startpts[r]=[] chain=0 for line in start_xvg: if re.search(r'\-%s$'%r,line): startpts[r].append([float(line.split()[0]),float(line.split()[1])]) chain+=1 for r in selection: endpts[r]=[] chain=0 for line in end_xvg: if re.search(r'\-%s$'%r,line): endpts[r].append([float(line.split()[0]),float(line.split()[1])]) chain+=1 sys.stderr.write('%s'%includes) for k in range(1,n-1): in_top=open(top).read() for mol in range(Nchains): if len(includes)>0: includename=includes[mol].split('/')[-1] in_top=re.sub(includename,'dihre_%d_chain_%d.itp'%(k,mol),in_top) out_top=open('topol_%d.top'%k,'w') # sys.stderr.write('%s'%in_top) out_top.write(in_top) for k in range(1,n-1): # make the directory for the restraints for mol in range(Nchains): restraint_itp=open('dihre_%d_chain_%d.itp'%(k,mol),'w') if Nchains>1: moltop=open(includes[mol]).read() restraint_itp.write(moltop) # write the initial part of the topology file restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al phi dphi kfac\n") if len(includes)>0: protein=molecule(includes[mol]) # replace the chain names with the chain names else: out_top=open('topol_%d.top'%k,'w') protein=molecule(top) in_itp=open(top,'r').read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol)) #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol)) out_top.write(in_itp[1]) out_top.close() for r in selection: phi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or (a.resnr == int(r)-1 and a.atomname == 'C')] psi = [a for a in protein if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or (a.resnr == int(r)+1 and a.atomname == 'N')] # write phi, psi angles phi_val=startpts[r][mol][0]+(endpts[r][mol][0]-startpts[r][mol][0])/n*k psi_val=startpts[r][mol][1]+(endpts[r][mol][1]-startpts[r][mol][1])/n*k restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n" %(phi[0].atomnr,phi[1].atomnr,phi[2].atomnr, phi[3].atomnr, phi_val, 0, 1)) restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n" %(psi[0].atomnr,psi[1].atomnr,psi[2].atomnr, psi[3].atomnr, psi_val, 0, 1)) restraint_itp.close()
import os import argparse import res_selection parser = argparse.ArgumentParser() parser.add_argument('-i',required=True, help='The initial .gro file') parser.add_argument('-n',required=True, help='The number of interpolation steps') parser.add_argument('-x',required=True, help='The desired index file (.ndx)') parser.add_argument('-p',required=True, help='A topology file for the restraints, must contain the line "#include dihedral_restraints" where the restraints are to be written') args = vars(parser.parse_args()) conf=args['i'] ndx=args['x'] n=int(args['n']) top = open(args['p'],'r').read() selection = res_selection.res_select(conf,ndx) protein = res_selection.protein(conf) top=top.split('#include dihedral_restraints') print selection theta_val=[1.6, 1.48, 1.36, 1.24, 1.12, 1.0, 0.8799999999999999, 0.7599999999999999, 0.6399999999999999, 0.5199999999999998, 0.3999999999999999, 0.2799999999999998, 0.1599999999999997, 0.039999999999999813, -0.0800000000000003, -0.20000000000000018, -0.3200000000000003, -0.4400000000000004, -0.5600000000000005, -0.8] zeta_val=[-4.3, -3.8, -3.3, -2.8, -2.3, -1.7999999999999998, -1.2999999999999998, -0.7999999999999998, -0.2999999999999998, 0.20000000000000018, 0.7000000000000002, 1.2000000000000002, 1.7000000000000002, 2.2, 2.7, 3.2, 3.7, 4.2, 4.7, 5.7] for k in range(1,n): newtop=open('%s.top'%k,'w') # write the initial part of the topology file newtop.write('%s'%top[0]) xvg = open('%s.xvg'%k,'r').readlines() #print "Writing restraints for interpolant number %i" %k
def reparametrize( use_posres, fix_endpoints, cvs, ndx_file, Nchains, start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top, includes, ): Nswarms = len(cvs[0]) ndx_atoms = res_selection.read_ndx(ndx_file) # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have # to first expand the index so it covers all chains. # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering. # We can detect the chain-repeat in rwgro, by looking for repeating first residue name. # Hardcode a repeat for testing for now. if use_posres == 0: # Map atoms to residues for the dihedral selection rsel = res_selection.res_select("%s" % start_conf, ndx_atoms) # sys.stderr.write('Residue selection: %s' %rsel) # else: # selected_atoms = [] # for ch in range(5): # for i in range(len(ndx_atoms)): # selected_atoms += [ ndx_atoms[i] + ch * 5566 ] # Calculate the average drift in CV space # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length) newpts = [] # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed, # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below. for pathpt in range(len(cvs)): swarmpts = [] for i in range(len(cvs[pathpt])): if use_posres == 1: zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms) # sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt))) else: zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel) swarmpts.append(zpt) zptsum = reduce(mapadd, swarmpts) avgdrift = scale((1 / float(Nswarms)), zptsum) newpts.append(avgdrift) # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will # be allowed to drift just like the other points, and they will already then be a part of the newpts array) if fix_endpoints == 1: if use_posres == 1: # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/ # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number. initpt = rwgro.readgro_flat(start_conf, ndx_atoms) targetpt = rwgro.readgro_flat(end_conf, ndx_atoms) else: initpt = readxvg.readxvg_flat(start_xvg, rsel) targetpt = readxvg.readxvg_flat(end_xvg, rsel) sys.stderr.write("Length of initpt %d, targetpt %d\n" % (len(initpt), len(targetpt))) # Insert the start/end in the beginning and last of newpts newpts.insert(0, initpt) newpts.append(targetpt) # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore paddingpt = [0] * len(newpts[0]) newpts.append(paddingpt) # Do the actual reparameterization # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted # points in [1] # Initial iteration rep_it1 = ext_rep_pts(newpts) adjusted = rep_it1[1] # get the points only, ignore the spread result # Keep iterating, feeding the result of the previous result into rep_pts again # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts. # We can abort early when the maximum spread between points in the updated string goes # below a threshold iters = [adjusted] i = 0 maxspread = 100.0 # Do max 150 iterations even if we don't reach our goal while i < 150 and maxspread > 0.012: sys.stderr.write("Rep iter %d: \n" % i) sys.stderr.flush() rep_it = ext_rep_pts(iters[i]) maxspread = rep_it[0] sys.stderr.write(" maxspread was %f\n" % maxspread) # Remember the adjusted points iters.append(rep_it[1]) i = i + 1 sys.stderr.write("Final maximum spread %f after %d iterations.\n" % (maxspread, i)) # Get the final iteration's result adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] newpts = newpts[:-1] # sys.stderr.write('Pts before repa:\n %s\n' % newpts) # sys.stderr.write('The adjusted pts:\n %s\n' % adjusted) # Possibility to test skipping reparametrize by uncommenting the next row. # The stringpoints will drift along the string and probably end up in the # endpoints or a minima along the string. # adjusted = newpts # calculate reparam distance sys.stderr.write("Length of the adjusted vector: %d\n" % len(adjusted)) # TODO Nchains should depend on the specific residue (?) # Given as function argument now. # Nchains = len(initpt) / (2 * len(rsel)) # write the CV control data for the next iteration # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint. # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain. # for k in range(len(adjusted)): # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is # just bypassed in the caller script if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)): continue if use_posres == 1: # Open the output resconf which will go into the next iteration as minimization target with open("rep_resconf_%d.gro" % k, "w") as rep_resconf: # Open and read the previous (input) resconf, which has basically tagged along since the last # reparametrization step (or was set initially at swarm-start) with open(last_resconfs[k], "r") as in_resconf_f: in_resconf = in_resconf_f.readlines() # TODO: maybe this chunk of code could be done by the rwgro module for us. # Copy the first 2 rows (title and number of atoms) straight over rep_resconf.write(in_resconf[0]) rep_resconf.write(in_resconf[1]) # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize # step moved # Note: we are only copying over positions here. The velocities are not needed as the use for these files # will only be as a base for the next iterations position restraint coordinates. pathpoint = adjusted[k] # the 1-D list of CVs (positions): x,y,z * nbr atoms in index if len(pathpoint) != (1555 * 3): # assert on GLIC length (TODO) sys.stderr.write("adjusted[] entry of wrong length %d\n" % len(pathpoint)) cvpos = 0 for line in in_resconf[2:][:-1]: resname = line[0:8] # python-ranges are inclusive the first index and exclusive the second... atname = line[8:15] atomnr = int(line[15:20]) x = float(line[20:28]) y = float(line[28:36]) z = float(line[36:44]) if atomnr in ndx_atoms: # Update to new coords x = pathpoint[cvpos] y = pathpoint[cvpos + 1] z = pathpoint[cvpos + 2] cvpos += 3 # Write out the row, updated or not rep_resconf.write("%s%s%5d%8.3f%8.3f%8.3f\n" % (resname, atname, atomnr, x, y, z)) # Copy the last row which was the cell dimensions rep_resconf.write(in_resconf[len(in_resconf) - 1]) else: for chain in range(Nchains): with open("res_%d_chain_%d.itp" % (k, chain), "w") as restraint_itp: with open(includes[k][chain], "r") as in_itpf: in_itp = in_itpf.read() moltop = in_itp.split("[ dihedral_restraints ]")[0] restraint_itp.write("%s" % moltop) sys.stderr.write("Writing restraints for stringpoint %d chain %d\n" % (k, chain)) # Note: this format is for Gromacs 4.6+ restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac phiB dphiB kfacB\n") pathpoint = adjusted[k] # just a list of phi/psi angles if Nchains == 1: protein = molecule(top) else: protein = molecule("%s" % includes[k][chain]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if a.atomname == "CA" or a.atomname == "N" or a.atomname == "C": try: dih_atoms[a.resnr][a.atomname] = a.atomnr except KeyError: dih_atoms[a.resnr] = {a.atomname: a.atomnr} # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. pos = 0 for r in rsel: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [dih_atoms[r - 1]["C"], dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"]] # psi is N, CA and C on this residue and N on the next psi = [dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"], dih_atoms[r + 1]["N"]] # get phi and psi values from the reparametrization vector phi_val = pathpoint[pos + chain] psi_val = pathpoint[pos + chain + 1] # Go to the next residue (phi,phi vals * number of chains apart) pos += 2 * Nchains # write phi, psi angles and k-factor # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0) ) restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0) )