Пример #1
0
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains):

    # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index
    # per residue, since it's probably generated by make_ndx)
    ndx_atoms = res_selection.read_ndx(ndx_file)
    # Map them to each affected residue so we just get the residue numbers back
    selection = res_selection.res_select(start, ndx_atoms)

    n = int(n)  # number of points in the string, including start and end point

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending dihedrals for later interpolation
        startpts = readxvg.readxvg(start_xvg, selection)
        endpts = readxvg.readxvg(end_xvg, selection)
    else:
        # Have to generate the dihedrals ourselves from the given initial structures
        # Note: when we get an initial_confs[] array, we use it for all points and 
        # the start/end input parameters are completely ignored
        # TODO: assert that len(initial_confs) == n otherwise?

        ramaprocs = {}

        # Run g_rama (in parallel) on each structure and output to a temporary .xvg
        FNULL = open(os.devnull, 'w') # dont generate spam from g_rama 
        for i in range(n):
            # TODO: check for and use g_rama_mpi.. like everywhere else
            ramaprocs[i] = Popen(['g_rama', '-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i], 
                                 stdout=FNULL, stderr=FNULL)

        # Go through the output from the rama sub-processes and read the xvg outputs

        stringpts = {}  # Will have 4 levels: stringpoint, residue, chain, phi/psi value

        for i in range(n):
            # Start array indexed by residue
            xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i)
            # Make sure the corresponding g_rama task has ended
            ramaprocs[i].communicate()
            # Read back and parse like for the start/end_xvg above
            stringpts[i] = readxvg.readxvg(xvg_i, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k,'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the dihedrals for each point
    for k in range(n):
        for mol in range(Nchains):
            # TODO: use with statement for restraint_itp as well
            restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                with open(includes[mol]) as moltop_f:
                    moltop = moltop_f.read()
                    restraint_itp.write(moltop)
            # write the initial part of the topology file
            # Note: gromacs 4.6+ required
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                with open('topol_%d.top' % k, 'w') as out_top:
                    protein = molecule(top)
                    with open(top,'r') as in_itp_f:
                        in_itp = in_itp_f.read().split('; Include Position restraint file')
                        out_top.write(in_itp[0])
                        out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                        out_top.write(in_itp[1])

            # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
            # backbone atom indices for N, CA and C.

            dih_atoms = {}

            for a in protein:
                if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'):
                    try:
                        dih_atoms[a.resnr][a.atomname] = a.atomnr;
                    except KeyError:
                        dih_atoms[a.resnr] = { a.atomname: a.atomnr }

            # Use the lookup-table built above and get the dihedral specification atoms needed for each
            # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

            for r in selection:
                # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                # phi is C on the previous residue, and N, CA, C on this
                phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ]
                
                # psi is N, CA and C on this residue and N on the next
                psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ]

                # Write phi, psi angles and the associated k factor into a row in the restraint file
                # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                # dihre_fc.
                # Also see reparametrize.py

                if use_interpolation:
                    # k is from 0 to n-1, so map it so we get a factor from 0 to 1
                    phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1)
                    psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1)
                else:
                    # Use the values extracted from the initial_confs[] structures above
                    phi_val = stringpts[k][r][mol][0]
                    psi_val = stringpts[k][r][mol][1]

                # Since we need different force constants in different stages, we need to put
                # a searchable placeholder in the file here and replace it later. KFAC is normally 
                # a %8.4f number.
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))

            restraint_itp.close()
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg, top): 
    Nswarms = len(diheds[0])
    rsel = res_selection.res_select('%s'%start_conf,'%s'%selection)
    
    # calculate average drift in collective variables space
    sys.stderr.write('Residue selection: %s' %rsel)
    newpts = []
    for interp in range(len(diheds)):
            avg = []
            for r in rsel:
                    driftList = []
                    for i in range(len(diheds[interp])):
                            vec=[]
                            xvg = open(diheds[interp][i],'r')
                            for line in xvg:
                                    if re.search(r'\-%d\n'%r,line):
                                            phi_val = float(line.split()[0])
                                            psi_val = float(line.split()[1])
                                            vec+=[phi_val,psi_val]
                            driftList.append(vec)
                    # driftList has phi,psi values for residue in every swarm
                    avg+=[scale((1/float(Nswarms)),reduce(mapadd,driftList))]
            newpts+=avg

    # extract initial and target dihedral values
    initpt = []
    for r in rsel:
            xvg = open(start_xvg,'r')
            for line in xvg:
                    if re.search(r'\-%d\n'%r,line):
                            phi_val = float(line.split()[0])
                            psi_val = float(line.split()[1])
                            initpt+=[phi_val,psi_val]

    targetpt = []
    for r in rsel:
            xvg = open(end_xvg,'r')
            for line in xvg:
                    if re.search(r'\-%d\n'%r,line):
                            phi_val = float(line.split()[0])
                            psi_val = float(line.split()[1])
                            targetpt+=[phi_val,psi_val]
    # something with 1 indexing makes this padding necessary.
    paddingpt=[0]*len(initpt)
    newpts.insert(0,initpt)
    newpts.append(targetpt)
    newpts.append(paddingpt)
    sys.stderr.write('The new list of points is: %s\n' %newpts)
    for pt in newpts:
        sys.stderr.write('%s %s\n'%(pt[0],pt[1]))
    adjusted=rep_pts(newpts)
    # TODO implement a dist_treshold=1.0
    iters=[adjusted]
    for i in range(100):
        iters.append(rep_pts(iters[i]))

    adjusted=iters[-1]
    # delete the padding point
    adjusted=adjusted[:-1]
    sys.stderr.write('The adjusted points are:\n')
    for pt in adjusted:
        sys.stderr.write('%s %s\n'%(pt[0],pt[1]))

    # calculate reparam distance

    # TODO measure the distance between the reparametrized points and the input points

    # write the topology for the next iteration
    # treat the reparam values as a stack
    
    for k in range(1,len(adjusted)-1):
        for chain in range(Nchains):
            restraint_itp=open('topol_%d_chain_%d.top'%(k,chain),'w')
            in_itp=open(include[k][chain], 'w')
            moltop=restraint_itp.split('[ dihedral_restraints ]')[0]
            restraint_itp.write('%s'%moltop)
            sys.stderr.write("Writing restraints for interpolant point %d chain %d\n"%(k,chain))
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type  label  phi  dphi  kfac  power\n")
            pathpoint=adjusted[k] # just a list of phi/psi angles
            protein=molecule('%s'%include[k][chain])
            # keep track of the position in the path point
            pos=0
            for r in rsel:
                    # there may be multiple residues matching the resnr, e.g., dimers
                    phi = [a for a in protein if (a.resnr == int(r) and
                          (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or
                          (a.resnr == int(r)-1 and a.atomname == 'C')]

                    psi = [a for a in protein if (a.resnr == int(r) and
                          (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or
                          (a.resnr == int(r)+1 and a.atomname == 'N')]

                    # get phi and psi values from the reparametrization vector
                    phi_val=stack[pos+chain]
                    psi_val=stack[pos+chain+1]
                    
                    
                    # write phi, psi angles
                    restraint_itp.write("%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n"
                                        %(phi[0],phi[1],phi[2],phi[3],1,1,phi_val,0,1,2))
                    restraint_itp.write("%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n"
                                        %(psi[0],psi[1],psi[2],psi[3],1,1,psi_val,0,1,2))

                        #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr,
                        #                  phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2))
                        #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr,
                        #                  psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2))

                    # delete the already added values from the stack
                    pos+=2*Nchains
            restraint_itp.close()
Пример #3
0
def reparametrize(use_posres, fix_endpoints, cvs, ndx_file, Nchains,
                  start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top,
                  includes):

    Nswarms = len(cvs[0])

    ndx_atoms = res_selection.read_ndx(ndx_file)

    # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and
    # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have
    # to first expand the index so it covers all chains.

    # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times
    # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering.
    # We can detect the chain-repeat in rwgro, by looking for repeating first residue name.
    # Hardcode a repeat for testing for now.

    if use_posres == 0:
        # Map atoms to residues for the dihedral selection
        rsel = res_selection.res_select('%s' % start_conf, ndx_atoms)
        #sys.stderr.write('Residue selection: %s' %rsel)

#    else:
#            selected_atoms = []
#            for ch in range(5):
#                    for i in range(len(ndx_atoms)):
#                            selected_atoms += [ ndx_atoms[i] + ch * 5566 ]

# Calculate the average drift in CV space

# newpts is a per-swarm-point list of CV points (each a list of the CV dimension length)
    newpts = []

    # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed,
    # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include
    # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below.
    for pathpt in range(len(cvs)):
        swarmpts = []
        for i in range(len(cvs[pathpt])):
            if use_posres == 1:
                zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms)
                #sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt)))
            else:
                zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel)
            swarmpts.append(zpt)
        zptsum = reduce(mapadd, swarmpts)
        avgdrift = scale((1 / float(Nswarms)), zptsum)
        newpts.append(avgdrift)

    # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will
    # be allowed to drift just like the other points, and they will already then be a part of the newpts array)
    if fix_endpoints == 1:
        if use_posres == 1:
            # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/
            # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number.
            initpt = rwgro.readgro_flat(start_conf, ndx_atoms)
            targetpt = rwgro.readgro_flat(end_conf, ndx_atoms)
        else:
            initpt = readxvg.readxvg_flat(start_xvg, rsel)
            targetpt = readxvg.readxvg_flat(end_xvg, rsel)

        sys.stderr.write('Length of initpt %d, targetpt %d\n' %
                         (len(initpt), len(targetpt)))

        # Insert the start/end in the beginning and last of newpts
        newpts.insert(0, initpt)
        newpts.append(targetpt)

    # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore
    paddingpt = [0] * len(newpts[0])
    newpts.append(paddingpt)

    # Do the actual reparameterization
    # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs

    # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted
    # points in [1]

    # Initial iteration
    rep_it1 = ext_rep_pts(newpts)
    adjusted = rep_it1[1]  # get the points only, ignore the spread result

    # Keep iterating, feeding the result of the previous result into rep_pts again
    # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time
    # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts.
    # We can abort early when the maximum spread between points in the updated string goes
    # below a threshold
    iters = [adjusted]
    i = 0
    maxspread = 100.0
    # Do max 150 iterations even if we don't reach our goal
    while i < 150 and maxspread > 0.012:
        sys.stderr.write('Rep iter %d: \n' % i)
        sys.stderr.flush()
        rep_it = ext_rep_pts(iters[i])
        maxspread = rep_it[0]
        sys.stderr.write('  maxspread was %f\n' % maxspread)
        # Remember the adjusted points
        iters.append(rep_it[1])
        i = i + 1

    sys.stderr.write('Final maximum spread %f after %d iterations.\n' %
                     (maxspread, i))

    # Get the final iteration's result
    adjusted = iters[-1]

    # delete the padding point
    adjusted = adjusted[:-1]
    newpts = newpts[:-1]

    #sys.stderr.write('Pts before repa:\n %s\n' % newpts)
    #sys.stderr.write('The adjusted pts:\n %s\n' % adjusted)

    # Possibility to test skipping reparametrize by uncommenting the next row.
    # The stringpoints will drift along the string and probably end up in the
    # endpoints or a minima along the string.
    #adjusted = newpts

    # calculate reparam distance

    sys.stderr.write('Length of the adjusted vector: %d\n' % len(adjusted))
    # TODO Nchains should depend on the specific residue (?)
    # Given as function argument now.
    #Nchains = len(initpt) / (2 * len(rsel))

    # write the CV control data for the next iteration

    # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint.
    # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain.
    #
    for k in range(len(adjusted)):
        # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is
        # just bypassed in the caller script
        if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)):
            continue

        if use_posres == 1:
            # Open the output resconf which will go into the next iteration as minimization target
            with open('rep_resconf_%d.gro' % k, 'w') as rep_resconf:
                # Open and read the previous (input) resconf, which has basically tagged along since the last
                # reparametrization step (or was set initially at swarm-start)
                with open(last_resconfs[k], 'r') as in_resconf_f:
                    in_resconf = in_resconf_f.readlines()
                # TODO: maybe this chunk of code could be done by the rwgro module for us.
                # Copy the first 2 rows (title and number of atoms) straight over
                rep_resconf.write(in_resconf[0])
                rep_resconf.write(in_resconf[1])
                # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize
                # step moved
                # Note: we are only copying over positions here. The velocities are not needed as the use for these files
                # will only be as a base for the next iterations position restraint coordinates.
                pathpoint = adjusted[
                    k]  # the 1-D list of CVs (positions): x,y,z * nbr atoms in index
                if len(pathpoint) != (1555 *
                                      3):  # assert on GLIC length (TODO)
                    sys.stderr.write('adjusted[] entry of wrong length %d\n' %
                                     len(pathpoint))
                cvpos = 0
                for line in in_resconf[2:][:-1]:
                    resname = line[
                        0:
                        8]  # python-ranges are inclusive the first index and exclusive the second...
                    atname = line[8:15]
                    atomnr = int(line[15:20])
                    x = float(line[20:28])
                    y = float(line[28:36])
                    z = float(line[36:44])
                    if atomnr in ndx_atoms:
                        # Update to new coords
                        x = pathpoint[cvpos]
                        y = pathpoint[cvpos + 1]
                        z = pathpoint[cvpos + 2]
                        cvpos += 3
                    # Write out the row, updated or not
                    rep_resconf.write('%s%s%5d%8.3f%8.3f%8.3f\n' %
                                      (resname, atname, atomnr, x, y, z))
                # Copy the last row which was the cell dimensions
                rep_resconf.write(in_resconf[len(in_resconf) - 1])
        else:
            for chain in range(Nchains):
                with open('res_%d_chain_%d.itp' % (k, chain),
                          'w') as restraint_itp:
                    with open(includes[k][chain], 'r') as in_itpf:
                        in_itp = in_itpf.read()
                        moltop = in_itp.split('[ dihedral_restraints ]')[0]
                        restraint_itp.write('%s' % moltop)

                    sys.stderr.write(
                        "Writing restraints for stringpoint %d chain %d\n" %
                        (k, chain))
                    # Note: this format is for Gromacs 4.6+
                    restraint_itp.write("[ dihedral_restraints ]\n")
                    restraint_itp.write(
                        "; ai   aj   ak   al  type     phi    dphi    kfac   phiB    dphiB    kfacB\n"
                    )
                    pathpoint = adjusted[k]  # just a list of phi/psi angles

                    if Nchains == 1:
                        protein = molecule(top)
                    else:
                        protein = molecule('%s' % includes[k][chain])

                    # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
                    # backbone atom indices for N, CA and C.

                    dih_atoms = {}

                    for a in protein:
                        if (a.atomname == 'CA' or a.atomname == 'N'
                                or a.atomname == 'C'):
                            try:
                                dih_atoms[a.resnr][a.atomname] = a.atomnr
                            except KeyError:
                                dih_atoms[a.resnr] = {a.atomname: a.atomnr}

                    # Use the lookup-table built above and get the dihedral specification atoms needed for each
                    # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

                    pos = 0

                    for r in rsel:
                        # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                        # phi is C on the previous residue, and N, CA, C on this
                        phi = [
                            dih_atoms[r - 1]['C'], dih_atoms[r]['N'],
                            dih_atoms[r]['CA'], dih_atoms[r]['C']
                        ]

                        # psi is N, CA and C on this residue and N on the next
                        psi = [
                            dih_atoms[r]['N'], dih_atoms[r]['CA'],
                            dih_atoms[r]['C'], dih_atoms[r + 1]['N']
                        ]

                        # get phi and psi values from the reparametrization vector
                        phi_val = pathpoint[pos + chain]
                        psi_val = pathpoint[pos + chain + 1]

                        # Go to the next residue (phi,phi vals * number of chains apart)
                        pos += 2 * Nchains

                        # write phi, psi angles and k-factor
                        # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                        # dihre_fc.

                        # Since we need different force constants in different stages, we need to put
                        # a searchable placeholder in the file here and replace it later
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" %
                            (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" %
                            (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))
Пример #4
0
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg, top): 
    Nswarms = len(diheds[0])
    rsel = res_selection.res_select('%s'%start_conf,'%s'%selection)
    
    # calculate average drift in collective variables space
    sys.stderr.write('Residue selection: %s' %rsel)
    newpts = []
    for interp in range(len(diheds)):
            avg = []
            for r in rsel:
                    driftList = []
                    for i in range(len(diheds[interp])):
                            vec=[]
                            xvg = open(diheds[interp][i],'r')
                            for line in xvg:
                                    if re.search(r'\-%d\n'%r,line):
                                            phi_val = float(line.split()[0])
                                            psi_val = float(line.split()[1])
                                            vec+=[phi_val,psi_val]
                            driftList.append(vec)
                    # driftList has phi,psi values for residue in every swarm
                    driftdat=open('dihedrals%d.dat'%interp,'w')
                    for pt in driftList:
                        driftdat.write('%f %f\n'%(pt[0],pt[1]))
                    avg+=[scale((1/float(Nswarms)),reduce(mapadd,driftList))]
            newpts+=avg

    # extract initial and target dihedral values
    initpt = []
    for r in rsel:
            xvg = open(start_xvg,'r')
            for line in xvg:
                    if re.search(r'\-%d\n'%r,line):
                            phi_val = float(line.split()[0])
                            psi_val = float(line.split()[1])
                            initpt+=[phi_val,psi_val]

    targetpt = []
    for r in rsel:
            xvg = open(end_xvg,'r')
            for line in xvg:
                    if re.search(r'\-%d\n'%r,line):
                            phi_val = float(line.split()[0])
                            psi_val = float(line.split()[1])
                            targetpt+=[phi_val,psi_val]
    # something with 1 indexing makes this padding necessary.
    paddingpt=[0]*len(initpt)
    newpts.insert(0,initpt)
    newpts.append(targetpt)
    newpts.append(paddingpt)
    sys.stderr.write('The new list of points is: %s\n' %newpts)
    for pt in newpts:
        sys.stderr.write('%s %s\n'%(pt[0],pt[1]))
    adjusted=rep_pts(newpts)
    # TODO implement a dist_treshold=1.0
    iters=[adjusted]
    for i in range(100):
        iters.append(rep_pts(iters[i]))

    adjusted=iters[-1]
    # delete the padding point
    adjusted=adjusted[:-1]
    sys.stderr.write('The adjusted points are:\n')
    for pt in adjusted:
        sys.stderr.write('%s %s\n'%(pt[0],pt[1]))

    # calculate reparam distance

    # TODO measure the distance between the reparametrized points and the input points

    # write the topology for the next iteration
    # treat the reparam values as a stack
    
    # temporary additional restraints for alanine dipeptide
    theta_val=[1.6, 1.48, 1.36, 1.24, 1.12, 1.0, 0.8799999999999999, 0.7599999999999999, 0.6399999999999999, 0.5199999999999998, 0.3999999999999999, 0.2799999999999998, 0.1599999999999997, 0.039999999999999813, -0.0800000000000003, -0.20000000000000018, -0.3200000000000003, -0.4400000000000004, -0.5600000000000005, -0.8]
    zeta_val=[-4.3, -3.8, -3.3, -2.8, -2.3, -1.7999999999999998, -1.2999999999999998, -0.7999999999999998, -0.2999999999999998, 0.20000000000000018, 0.7000000000000002, 1.2000000000000002, 1.7000000000000002, 2.2, 2.7, 3.2, 3.7, 4.2, 4.7, 5.7]

    top=open(top,'r').read().split('#include dihedral_restraints')
    for k in range(1,len(adjusted)-1):
            newtop=open('%d.top'%k,'w')
            newtop.write('%s'%top[0])
            sys.stderr.write("Writing restraints for interpolant index %i\n" %k)
            newtop.write("[ dihedral_restraints ]\n")
            newtop.write("; ai   aj   ak   al  type  label  phi  dphi  kfac  power\n")
            stack=adjusted[k]
            protein = res_selection.protein('%s'%start_conf)
            for r in rsel:
                    # there may be multiple residues matching the resnr, e.g., dimers
                    phi = [a for a in protein if (a.resnr == int(r) and
                          (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or
                          (a.resnr == int(r)-1 and a.atomname == 'C')]

                    psi = [a for a in protein if (a.resnr == int(r) and
                          (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or
                          (a.resnr == int(r)+1 and a.atomname == 'N')]

                    # get phi and psi values from the reparametrization vector
                    numres = len(phi)/4
                    for i in range(numres):
                            phi_val=stack[i]
                            psi_val=stack[i+1]
                            # write phi, psi angles
                            # TODO EXPLICIT DIHEDRALS FOR ALANINE DIPEPTIDE
                            newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(5,7,9,15,1,1,phi_val,0,1,2))
                            newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(7,9,15,17,1,1,psi_val,0,1,2))

                            newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(1,5,7,9,1,1,theta_val[k],0,1,2))
                            newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(9,15,17,19,1,1,zeta_val[k],0,1,2))

                            #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr,
                            #                  phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2))
                            #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr,
                            #                  psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2))

                    # delete the already added values from the stack
                    stack = stack[numres*2-1:]
            newtop.write('%s'%top[1])
Пример #5
0
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx,
                     Nchains):

    start_xvg = open(start_xvg, 'r').readlines()
    end_xvg = open(end_xvg, 'r').readlines()
    selection = res_selection.res_select(start, ndx)
    n = int(n)

    # create the path
    startpts = {}
    endpts = {}
    for r in selection:
        startpts[r] = []
        chain = 0
        for line in start_xvg:
            if re.search(r'\-%s$' % r, line):
                startpts[r].append(
                    [float(line.split()[0]),
                     float(line.split()[1])])
                chain += 1
    for r in selection:
        endpts[r] = []
        chain = 0
        for line in end_xvg:
            if re.search(r'\-%s$' % r, line):
                endpts[r].append(
                    [float(line.split()[0]),
                     float(line.split()[1])])
                chain += 1

    sys.stderr.write('%s' % includes)
    for k in range(1, n - 1):
        in_top = open(top).read()
        for mol in range(Nchains):
            if len(includes) > 0:
                includename = includes[mol].split('/')[-1]
                in_top = re.sub(includename,
                                'dihre_%d_chain_%d.itp' % (k, mol), in_top)
        out_top = open('topol_%d.top' % k, 'w')
        # sys.stderr.write('%s'%in_top)
        out_top.write(in_top)

    for k in range(1, n - 1):
        # make the directory for the restraints
        for mol in range(Nchains):
            restraint_itp = open('dihre_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                moltop = open(includes[mol]).read()
                restraint_itp.write(moltop)
            # write the initial part of the topology file
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                out_top = open('topol_%d.top' % k, 'w')
                protein = molecule(top)
                in_itp = open(
                    top, 'r').read().split('; Include Position restraint file')
                out_top.write(in_itp[0])
                out_top.write('#include "dihre_%d_chain_%d.itp"\n' % (k, mol))
                #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol))
                out_top.write(in_itp[1])
                out_top.close()

            for r in selection:
                phi = [
                    a for a in protein
                    if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname
                                               == 'N' or a.atomname == 'C')) or
                    (a.resnr == int(r) - 1 and a.atomname == 'C')
                ]

                psi = [
                    a for a in protein
                    if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname
                                               == 'CA' or a.atomname == 'C'))
                    or (a.resnr == int(r) + 1 and a.atomname == 'N')
                ]

                # write phi, psi angles
                phi_val = startpts[r][mol][0] + (endpts[r][mol][0] -
                                                 startpts[r][mol][0]) / n * k
                psi_val = startpts[r][mol][1] + (endpts[r][mol][1] -
                                                 startpts[r][mol][1]) / n * k

                restraint_itp.write(
                    "%5d%5d%5d%5d %8.4f%5d%5d\n" %
                    (phi[0].atomnr, phi[1].atomnr, phi[2].atomnr,
                     phi[3].atomnr, phi_val, 0, 1))
                restraint_itp.write(
                    "%5d%5d%5d%5d %8.4f%5d%5d\n" %
                    (psi[0].atomnr, psi[1].atomnr, psi[2].atomnr,
                     psi[3].atomnr, psi_val, 0, 1))
            restraint_itp.close()
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg,
                  top):
    Nswarms = len(diheds[0])
    rsel = res_selection.res_select('%s' % start_conf, '%s' % selection)

    # calculate average drift in collective variables space
    sys.stderr.write('Residue selection: %s' % rsel)
    newpts = []
    for interp in range(len(diheds)):
        avg = []
        for r in rsel:
            driftList = []
            for i in range(len(diheds[interp])):
                vec = []
                xvg = open(diheds[interp][i], 'r')
                for line in xvg:
                    if re.search(r'\-%d\n' % r, line):
                        phi_val = float(line.split()[0])
                        psi_val = float(line.split()[1])
                        vec += [phi_val, psi_val]
                driftList.append(vec)
            # driftList has phi,psi values for residue in every swarm
            avg += [scale((1 / float(Nswarms)), reduce(mapadd, driftList))]
        newpts += avg

    # extract initial and target dihedral values
    initpt = []
    for r in rsel:
        xvg = open(start_xvg, 'r')
        for line in xvg:
            if re.search(r'\-%d\n' % r, line):
                phi_val = float(line.split()[0])
                psi_val = float(line.split()[1])
                initpt += [phi_val, psi_val]

    targetpt = []
    for r in rsel:
        xvg = open(end_xvg, 'r')
        for line in xvg:
            if re.search(r'\-%d\n' % r, line):
                phi_val = float(line.split()[0])
                psi_val = float(line.split()[1])
                targetpt += [phi_val, psi_val]
    # something with 1 indexing makes this padding necessary.
    paddingpt = [0] * len(initpt)
    newpts.insert(0, initpt)
    newpts.append(targetpt)
    newpts.append(paddingpt)
    sys.stderr.write('The new list of points is: %s\n' % newpts)
    for pt in newpts:
        sys.stderr.write('%s %s\n' % (pt[0], pt[1]))
    adjusted = rep_pts(newpts)
    # TODO implement a dist_treshold=1.0
    iters = [adjusted]
    for i in range(100):
        iters.append(rep_pts(iters[i]))

    adjusted = iters[-1]
    # delete the padding point
    adjusted = adjusted[:-1]
    sys.stderr.write('The adjusted points are:\n')
    for pt in adjusted:
        sys.stderr.write('%s %s\n' % (pt[0], pt[1]))

    # calculate reparam distance

    # TODO measure the distance between the reparametrized points and the input points

    # write the topology for the next iteration
    # treat the reparam values as a stack

    for k in range(1, len(adjusted) - 1):
        for chain in range(Nchains):
            restraint_itp = open('topol_%d_chain_%d.top' % (k, chain), 'w')
            in_itp = open(include[k][chain], 'w')
            moltop = restraint_itp.split('[ dihedral_restraints ]')[0]
            restraint_itp.write('%s' % moltop)
            sys.stderr.write(
                "Writing restraints for interpolant point %d chain %d\n" %
                (k, chain))
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write(
                "; ai   aj   ak   al  type  label  phi  dphi  kfac  power\n")
            pathpoint = adjusted[k]  # just a list of phi/psi angles
            protein = molecule('%s' % include[k][chain])
            # keep track of the position in the path point
            pos = 0
            for r in rsel:
                # there may be multiple residues matching the resnr, e.g., dimers
                phi = [
                    a for a in protein
                    if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname
                                               == 'N' or a.atomname == 'C')) or
                    (a.resnr == int(r) - 1 and a.atomname == 'C')
                ]

                psi = [
                    a for a in protein
                    if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname
                                               == 'CA' or a.atomname == 'C'))
                    or (a.resnr == int(r) + 1 and a.atomname == 'N')
                ]

                # get phi and psi values from the reparametrization vector
                phi_val = stack[pos + chain]
                psi_val = stack[pos + chain + 1]

                # write phi, psi angles
                restraint_itp.write(
                    "%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n" %
                    (phi[0], phi[1], phi[2], phi[3], 1, 1, phi_val, 0, 1, 2))
                restraint_itp.write(
                    "%5d%5d%5d%5d%5d%5d %8.4f%5d%5d%5d\n" %
                    (psi[0], psi[1], psi[2], psi[3], 1, 1, psi_val, 0, 1, 2))

                #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr,
                #                  phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2))
                #restraint_itp.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr,
                #                  psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2))

                # delete the already added values from the stack
                pos += 2 * Nchains
            restraint_itp.close()
Пример #7
0
def reparametrize(diheds, selection, start_conf, start_xvg, end_conf, end_xvg,
                  top):
    Nswarms = len(diheds[0])
    rsel = res_selection.res_select('%s' % start_conf, '%s' % selection)

    # calculate average drift in collective variables space
    sys.stderr.write('Residue selection: %s' % rsel)
    newpts = []
    for interp in range(len(diheds)):
        avg = []
        for r in rsel:
            driftList = []
            for i in range(len(diheds[interp])):
                vec = []
                xvg = open(diheds[interp][i], 'r')
                for line in xvg:
                    if re.search(r'\-%d\n' % r, line):
                        phi_val = float(line.split()[0])
                        psi_val = float(line.split()[1])
                        vec += [phi_val, psi_val]
                driftList.append(vec)
            # driftList has phi,psi values for residue in every swarm
            driftdat = open('dihedrals%d.dat' % interp, 'w')
            for pt in driftList:
                driftdat.write('%f %f\n' % (pt[0], pt[1]))
            avg += [scale((1 / float(Nswarms)), reduce(mapadd, driftList))]
        newpts += avg

    # extract initial and target dihedral values
    initpt = []
    for r in rsel:
        xvg = open(start_xvg, 'r')
        for line in xvg:
            if re.search(r'\-%d\n' % r, line):
                phi_val = float(line.split()[0])
                psi_val = float(line.split()[1])
                initpt += [phi_val, psi_val]

    targetpt = []
    for r in rsel:
        xvg = open(end_xvg, 'r')
        for line in xvg:
            if re.search(r'\-%d\n' % r, line):
                phi_val = float(line.split()[0])
                psi_val = float(line.split()[1])
                targetpt += [phi_val, psi_val]
    # something with 1 indexing makes this padding necessary.
    paddingpt = [0] * len(initpt)
    newpts.insert(0, initpt)
    newpts.append(targetpt)
    newpts.append(paddingpt)
    sys.stderr.write('The new list of points is: %s\n' % newpts)
    for pt in newpts:
        sys.stderr.write('%s %s\n' % (pt[0], pt[1]))
    adjusted = rep_pts(newpts)
    # TODO implement a dist_treshold=1.0
    iters = [adjusted]
    for i in range(100):
        iters.append(rep_pts(iters[i]))

    adjusted = iters[-1]
    # delete the padding point
    adjusted = adjusted[:-1]
    sys.stderr.write('The adjusted points are:\n')
    for pt in adjusted:
        sys.stderr.write('%s %s\n' % (pt[0], pt[1]))

    # calculate reparam distance

    # TODO measure the distance between the reparametrized points and the input points

    # write the topology for the next iteration
    # treat the reparam values as a stack

    # temporary additional restraints for alanine dipeptide
    theta_val = [
        1.6, 1.48, 1.36, 1.24, 1.12, 1.0, 0.8799999999999999,
        0.7599999999999999, 0.6399999999999999, 0.5199999999999998,
        0.3999999999999999, 0.2799999999999998, 0.1599999999999997,
        0.039999999999999813, -0.0800000000000003, -0.20000000000000018,
        -0.3200000000000003, -0.4400000000000004, -0.5600000000000005, -0.8
    ]
    zeta_val = [
        -4.3, -3.8, -3.3, -2.8, -2.3, -1.7999999999999998, -1.2999999999999998,
        -0.7999999999999998, -0.2999999999999998, 0.20000000000000018,
        0.7000000000000002, 1.2000000000000002, 1.7000000000000002, 2.2, 2.7,
        3.2, 3.7, 4.2, 4.7, 5.7
    ]

    top = open(top, 'r').read().split('#include dihedral_restraints')
    for k in range(1, len(adjusted) - 1):
        newtop = open('%d.top' % k, 'w')
        newtop.write('%s' % top[0])
        sys.stderr.write("Writing restraints for interpolant index %i\n" % k)
        newtop.write("[ dihedral_restraints ]\n")
        newtop.write(
            "; ai   aj   ak   al  type  label  phi  dphi  kfac  power\n")
        stack = adjusted[k]
        protein = res_selection.protein('%s' % start_conf)
        for r in rsel:
            # there may be multiple residues matching the resnr, e.g., dimers
            phi = [
                a for a in protein
                if (a.resnr == int(r) and (a.atomname == 'CA' or a.atomname ==
                                           'N' or a.atomname == 'C')) or
                (a.resnr == int(r) - 1 and a.atomname == 'C')
            ]

            psi = [
                a for a in protein
                if (a.resnr == int(r) and (a.atomname == 'N' or a.atomname ==
                                           'CA' or a.atomname == 'C')) or
                (a.resnr == int(r) + 1 and a.atomname == 'N')
            ]

            # get phi and psi values from the reparametrization vector
            numres = len(phi) / 4
            for i in range(numres):
                phi_val = stack[i]
                psi_val = stack[i + 1]
                # write phi, psi angles
                # TODO EXPLICIT DIHEDRALS FOR ALANINE DIPEPTIDE
                newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" %
                             (5, 7, 9, 15, 1, 1, phi_val, 0, 1, 2))
                newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" %
                             (7, 9, 15, 17, 1, 1, psi_val, 0, 1, 2))

                newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" %
                             (1, 5, 7, 9, 1, 1, theta_val[k], 0, 1, 2))
                newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n" %
                             (9, 15, 17, 19, 1, 1, zeta_val[k], 0, 1, 2))

                #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(phi[i*4].atomnr,phi[i*4+1].atomnr,
                #                  phi[i*4+2].atomnr,phi[i*4+3].atomnr,1,1,phi_val,0,1,2))
                #newtop.write("%5d%5d%5d%5d%5d%5d%8.4f%5d%5d%5d\n"%(psi[i*4].atomnr,psi[i*4+1].atomnr,
                #                  psi[i*4+2].atomnr,psi[i*4+3].atomnr,1,1,psi_val,0,1,2))

            # delete the already added values from the stack
            stack = stack[numres * 2 - 1:]
        newtop.write('%s' % top[1])
Пример #8
0
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains):
    cmdnames = cmds.GromacsCommands()
    # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index
    # per residue, since it's probably generated by make_ndx)
    ndx_atoms = res_selection.read_ndx(ndx_file)
    # Map them to each affected residue so we just get the residue numbers back
    selection = res_selection.res_select(start, ndx_atoms)

    n = int(n)  # number of points in the string, including start and end point

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending dihedrals for later interpolation
        startpts = readxvg.readxvg(start_xvg, selection)
        endpts = readxvg.readxvg(end_xvg, selection)
    else:
        # Have to generate the dihedrals ourselves from the given initial structures
        # Note: when we get an initial_confs[] array, we use it for all points and 
        # the start/end input parameters are completely ignored
        # TODO: assert that len(initial_confs) == n otherwise?

        ramaprocs = {}

        # Run g_rama (in parallel) on each structure and output to a temporary .xvg
        FNULL = open(os.devnull, 'w') # dont generate spam from g_rama 
        for i in range(n):
            # TODO: check for and use g_rama_mpi.. like everywhere else
            cmd = cmdnames.rama.split() + ['-f', initial_confs[i], '-s', tpr,
                                           '-o', '0%3d.xvg' % i]
            ramaprocs[i] = Popen(cmd, stdout=FNULL, stderr=FNULL)

        # Go through the output from the rama sub-processes and read the xvg outputs

        stringpts = {}  # Will have 4 levels: stringpoint, residue, chain, phi/psi value

        for i in range(n):
            # Start array indexed by residue
            xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i)
            # Make sure the corresponding g_rama task has ended
            ramaprocs[i].communicate()
            # Read back and parse like for the start/end_xvg above
            stringpts[i] = readxvg.readxvg(xvg_i, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k,'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the dihedrals for each point
    for k in range(n):
        for mol in range(Nchains):
            # TODO: use with statement for restraint_itp as well
            restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                with open(includes[mol]) as moltop_f:
                    moltop = moltop_f.read()
                    restraint_itp.write(moltop)
            # write the initial part of the topology file
            # Note: gromacs 4.6+ required
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                with open('topol_%d.top' % k, 'w') as out_top:
                    protein = molecule(top)
                    with open(top,'r') as in_itp_f:
                        in_itp = in_itp_f.read().split('; Include Position restraint file')
                        out_top.write(in_itp[0])
                        out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                        out_top.write(in_itp[1])

            # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
            # backbone atom indices for N, CA and C.

            dih_atoms = {}

            for a in protein:
                if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'):
                    try:
                        dih_atoms[a.resnr][a.atomname] = a.atomnr;
                    except KeyError:
                        dih_atoms[a.resnr] = { a.atomname: a.atomnr }

            # Use the lookup-table built above and get the dihedral specification atoms needed for each
            # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

            for r in selection:
                # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                # phi is C on the previous residue, and N, CA, C on this
                phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ]
                
                # psi is N, CA and C on this residue and N on the next
                psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ]

                # Write phi, psi angles and the associated k factor into a row in the restraint file
                # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                # dihre_fc.
                # Also see reparametrize.py

                if use_interpolation:
                    # k is from 0 to n-1, so map it so we get a factor from 0 to 1
                    phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1)
                    psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1)
                else:
                    # Use the values extracted from the initial_confs[] structures above
                    phi_val = stringpts[k][r][mol][0]
                    psi_val = stringpts[k][r][mol][1]

                # Since we need different force constants in different stages, we need to put
                # a searchable placeholder in the file here and replace it later. KFAC is normally 
                # a %8.4f number.
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))

            restraint_itp.close()
Пример #9
0
def write_restraints(start, end, start_xvg, end_xvg, top, includes, n, ndx, Nchains):
    
    start_xvg=open(start_xvg,'r').readlines()
    end_xvg=open(end_xvg,'r').readlines()
    selection=res_selection.res_select(start,ndx)
    n=int(n)


    # create the path
    startpts={}
    endpts={}
    for r in selection:
        startpts[r]=[]
        chain=0
        for line in start_xvg:
            if re.search(r'\-%s$'%r,line):
                startpts[r].append([float(line.split()[0]),float(line.split()[1])])
                chain+=1
    for r in selection:
        endpts[r]=[]
        chain=0
        for line in end_xvg:
            if re.search(r'\-%s$'%r,line):
                endpts[r].append([float(line.split()[0]),float(line.split()[1])])
                chain+=1
    
    sys.stderr.write('%s'%includes)
    for k in range(1,n-1):
        in_top=open(top).read()       
        for mol in range(Nchains):
            if len(includes)>0:
                includename=includes[mol].split('/')[-1]
                in_top=re.sub(includename,'dihre_%d_chain_%d.itp'%(k,mol),in_top)
        out_top=open('topol_%d.top'%k,'w')
       # sys.stderr.write('%s'%in_top)
        out_top.write(in_top)   
            
    
    for k in range(1,n-1):
        # make the directory for the restraints
        for mol in range(Nchains):
            restraint_itp=open('dihre_%d_chain_%d.itp'%(k,mol),'w')
            if Nchains>1:
                moltop=open(includes[mol]).read()
                restraint_itp.write(moltop)
            # write the initial part of the topology file
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  phi  dphi  kfac\n")
            if len(includes)>0:
                protein=molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                out_top=open('topol_%d.top'%k,'w')
                protein=molecule(top)
                in_itp=open(top,'r').read().split('; Include Position restraint file')
                out_top.write(in_itp[0])
                out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol))
                #out_top.write('#include "dihre_%d_chain_%d.itp"\n'%(k,mol))
                out_top.write(in_itp[1])
                out_top.close()

            for r in selection:
                phi = [a for a in protein if (a.resnr == int(r) and
                      (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C')) or
                      (a.resnr == int(r)-1 and a.atomname == 'C')]

                psi = [a for a in protein if (a.resnr == int(r) and
                      (a.atomname == 'N' or a.atomname == 'CA' or a.atomname == 'C')) or
                      (a.resnr == int(r)+1 and a.atomname == 'N')]

                # write phi, psi angles
                phi_val=startpts[r][mol][0]+(endpts[r][mol][0]-startpts[r][mol][0])/n*k
                psi_val=startpts[r][mol][1]+(endpts[r][mol][1]-startpts[r][mol][1])/n*k

                restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n"
                                    %(phi[0].atomnr,phi[1].atomnr,phi[2].atomnr,                                      phi[3].atomnr, phi_val, 0, 1))
                restraint_itp.write("%5d%5d%5d%5d %8.4f%5d%5d\n"
                                    %(psi[0].atomnr,psi[1].atomnr,psi[2].atomnr,                                      psi[3].atomnr, psi_val, 0, 1))
            restraint_itp.close()
Пример #10
0
import os
import argparse
import res_selection

parser = argparse.ArgumentParser()
parser.add_argument('-i',required=True, help='The initial .gro file')
parser.add_argument('-n',required=True, help='The number of interpolation steps')
parser.add_argument('-x',required=True, help='The desired index file (.ndx)')
parser.add_argument('-p',required=True, help='A topology file for the restraints, must contain the line "#include dihedral_restraints" where the restraints are to be written')
args = vars(parser.parse_args())

conf=args['i']
ndx=args['x']
n=int(args['n'])
top = open(args['p'],'r').read()
selection = res_selection.res_select(conf,ndx)
protein = res_selection.protein(conf)

top=top.split('#include dihedral_restraints')
print selection

theta_val=[1.6, 1.48, 1.36, 1.24, 1.12, 1.0, 0.8799999999999999, 0.7599999999999999, 0.6399999999999999, 0.5199999999999998, 0.3999999999999999, 0.2799999999999998, 0.1599999999999997, 0.039999999999999813, -0.0800000000000003, -0.20000000000000018, -0.3200000000000003, -0.4400000000000004, -0.5600000000000005, -0.8]
zeta_val=[-4.3, -3.8, -3.3, -2.8, -2.3, -1.7999999999999998, -1.2999999999999998, -0.7999999999999998, -0.2999999999999998, 0.20000000000000018, 0.7000000000000002, 1.2000000000000002, 1.7000000000000002, 2.2, 2.7, 3.2, 3.7, 4.2, 4.7, 5.7]


for k in range(1,n):
    newtop=open('%s.top'%k,'w')
    # write the initial part of the topology file
    newtop.write('%s'%top[0])
    xvg = open('%s.xvg'%k,'r').readlines()
    #print "Writing restraints for interpolant number %i" %k
Пример #11
0
def reparametrize(
    use_posres,
    fix_endpoints,
    cvs,
    ndx_file,
    Nchains,
    start_conf,
    start_xvg,
    end_conf,
    end_xvg,
    last_resconfs,
    top,
    includes,
):

    Nswarms = len(cvs[0])

    ndx_atoms = res_selection.read_ndx(ndx_file)

    # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and
    # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have
    # to first expand the index so it covers all chains.

    # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times
    # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering.
    # We can detect the chain-repeat in rwgro, by looking for repeating first residue name.
    # Hardcode a repeat for testing for now.

    if use_posres == 0:
        # Map atoms to residues for the dihedral selection
        rsel = res_selection.res_select("%s" % start_conf, ndx_atoms)
        # sys.stderr.write('Residue selection: %s' %rsel)

    #    else:
    #            selected_atoms = []
    #            for ch in range(5):
    #                    for i in range(len(ndx_atoms)):
    #                            selected_atoms += [ ndx_atoms[i] + ch * 5566 ]

    # Calculate the average drift in CV space

    # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length)
    newpts = []

    # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed,
    # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include
    # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below.
    for pathpt in range(len(cvs)):
        swarmpts = []
        for i in range(len(cvs[pathpt])):
            if use_posres == 1:
                zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms)
                # sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt)))
            else:
                zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel)
            swarmpts.append(zpt)
        zptsum = reduce(mapadd, swarmpts)
        avgdrift = scale((1 / float(Nswarms)), zptsum)
        newpts.append(avgdrift)

    # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will
    # be allowed to drift just like the other points, and they will already then be a part of the newpts array)
    if fix_endpoints == 1:
        if use_posres == 1:
            # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/
            # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number.
            initpt = rwgro.readgro_flat(start_conf, ndx_atoms)
            targetpt = rwgro.readgro_flat(end_conf, ndx_atoms)
        else:
            initpt = readxvg.readxvg_flat(start_xvg, rsel)
            targetpt = readxvg.readxvg_flat(end_xvg, rsel)

        sys.stderr.write("Length of initpt %d, targetpt %d\n" % (len(initpt), len(targetpt)))

        # Insert the start/end in the beginning and last of newpts
        newpts.insert(0, initpt)
        newpts.append(targetpt)

    # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore
    paddingpt = [0] * len(newpts[0])
    newpts.append(paddingpt)

    # Do the actual reparameterization
    # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs

    # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted
    # points in [1]

    # Initial iteration
    rep_it1 = ext_rep_pts(newpts)
    adjusted = rep_it1[1]  # get the points only, ignore the spread result

    # Keep iterating, feeding the result of the previous result into rep_pts again
    # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time
    # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts.
    # We can abort early when the maximum spread between points in the updated string goes
    # below a threshold
    iters = [adjusted]
    i = 0
    maxspread = 100.0
    # Do max 150 iterations even if we don't reach our goal
    while i < 150 and maxspread > 0.012:
        sys.stderr.write("Rep iter %d: \n" % i)
        sys.stderr.flush()
        rep_it = ext_rep_pts(iters[i])
        maxspread = rep_it[0]
        sys.stderr.write("  maxspread was %f\n" % maxspread)
        # Remember the adjusted points
        iters.append(rep_it[1])
        i = i + 1

    sys.stderr.write("Final maximum spread %f after %d iterations.\n" % (maxspread, i))

    # Get the final iteration's result
    adjusted = iters[-1]

    # delete the padding point
    adjusted = adjusted[:-1]
    newpts = newpts[:-1]

    # sys.stderr.write('Pts before repa:\n %s\n' % newpts)
    # sys.stderr.write('The adjusted pts:\n %s\n' % adjusted)

    # Possibility to test skipping reparametrize by uncommenting the next row.
    # The stringpoints will drift along the string and probably end up in the
    # endpoints or a minima along the string.
    # adjusted = newpts

    # calculate reparam distance

    sys.stderr.write("Length of the adjusted vector: %d\n" % len(adjusted))
    # TODO Nchains should depend on the specific residue (?)
    # Given as function argument now.
    # Nchains = len(initpt) / (2 * len(rsel))

    # write the CV control data for the next iteration

    # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint.
    # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain.
    #
    for k in range(len(adjusted)):
        # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is
        # just bypassed in the caller script
        if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)):
            continue

        if use_posres == 1:
            # Open the output resconf which will go into the next iteration as minimization target
            with open("rep_resconf_%d.gro" % k, "w") as rep_resconf:
                # Open and read the previous (input) resconf, which has basically tagged along since the last
                # reparametrization step (or was set initially at swarm-start)
                with open(last_resconfs[k], "r") as in_resconf_f:
                    in_resconf = in_resconf_f.readlines()
                # TODO: maybe this chunk of code could be done by the rwgro module for us.
                # Copy the first 2 rows (title and number of atoms) straight over
                rep_resconf.write(in_resconf[0])
                rep_resconf.write(in_resconf[1])
                # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize
                # step moved
                # Note: we are only copying over positions here. The velocities are not needed as the use for these files
                # will only be as a base for the next iterations position restraint coordinates.
                pathpoint = adjusted[k]  # the 1-D list of CVs (positions): x,y,z * nbr atoms in index
                if len(pathpoint) != (1555 * 3):  # assert on GLIC length (TODO)
                    sys.stderr.write("adjusted[] entry of wrong length %d\n" % len(pathpoint))
                cvpos = 0
                for line in in_resconf[2:][:-1]:
                    resname = line[0:8]  # python-ranges are inclusive the first index and exclusive the second...
                    atname = line[8:15]
                    atomnr = int(line[15:20])
                    x = float(line[20:28])
                    y = float(line[28:36])
                    z = float(line[36:44])
                    if atomnr in ndx_atoms:
                        # Update to new coords
                        x = pathpoint[cvpos]
                        y = pathpoint[cvpos + 1]
                        z = pathpoint[cvpos + 2]
                        cvpos += 3
                    # Write out the row, updated or not
                    rep_resconf.write("%s%s%5d%8.3f%8.3f%8.3f\n" % (resname, atname, atomnr, x, y, z))
                # Copy the last row which was the cell dimensions
                rep_resconf.write(in_resconf[len(in_resconf) - 1])
        else:
            for chain in range(Nchains):
                with open("res_%d_chain_%d.itp" % (k, chain), "w") as restraint_itp:
                    with open(includes[k][chain], "r") as in_itpf:
                        in_itp = in_itpf.read()
                        moltop = in_itp.split("[ dihedral_restraints ]")[0]
                        restraint_itp.write("%s" % moltop)

                    sys.stderr.write("Writing restraints for stringpoint %d chain %d\n" % (k, chain))
                    # Note: this format is for Gromacs 4.6+
                    restraint_itp.write("[ dihedral_restraints ]\n")
                    restraint_itp.write("; ai   aj   ak   al  type     phi    dphi    kfac   phiB    dphiB    kfacB\n")
                    pathpoint = adjusted[k]  # just a list of phi/psi angles

                    if Nchains == 1:
                        protein = molecule(top)
                    else:
                        protein = molecule("%s" % includes[k][chain])

                    # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
                    # backbone atom indices for N, CA and C.

                    dih_atoms = {}

                    for a in protein:
                        if a.atomname == "CA" or a.atomname == "N" or a.atomname == "C":
                            try:
                                dih_atoms[a.resnr][a.atomname] = a.atomnr
                            except KeyError:
                                dih_atoms[a.resnr] = {a.atomname: a.atomnr}

                    # Use the lookup-table built above and get the dihedral specification atoms needed for each
                    # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

                    pos = 0

                    for r in rsel:
                        # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                        # phi is C on the previous residue, and N, CA, C on this
                        phi = [dih_atoms[r - 1]["C"], dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"]]

                        # psi is N, CA and C on this residue and N on the next
                        psi = [dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"], dih_atoms[r + 1]["N"]]

                        # get phi and psi values from the reparametrization vector
                        phi_val = pathpoint[pos + chain]
                        psi_val = pathpoint[pos + chain + 1]

                        # Go to the next residue (phi,phi vals * number of chains apart)
                        pos += 2 * Nchains

                        # write phi, psi angles and k-factor
                        # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                        # dihre_fc.

                        # Since we need different force constants in different stages, we need to put
                        # a searchable placeholder in the file here and replace it later
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)
                        )
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)
                        )