def computeEachAtomsUnnormalisedAutocorrelation(trajectories, avgR):
    """
        Computes the autocorrelation with the formula:
            C(k) = 1/[(n-k)] \sum_{t=1}^{n-k} (Xt - mu)(Xt+k - mu)
        To normalise it wihitn [-1:1]
            c(k) = C(k) / var
        When the true mean \mu and variance \sigma^2 are known, this estimate is unbiased.
    """
    rirj = []
    rirjMeasures = []

    for i, traj in enumerate(trajectories):
        #trajectory = prody.parsePDB(traj)
        trajectory = prody.parsePDB(traj, subset='calpha')
        coordinates = trajectory.getCoordsets()

        #superpose
        ensembleTrajectory = prody.PDBEnsemble("Complex")
        ensembleTrajectory.setAtoms(trajectory)
        ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:])
        ensembleTrajectory.setCoords(coordinates[0])  #reference
        ensembleTrajectory.superpose()

        #ensembleTrajectory = trajectory

        srirj, sMeasures = computeEachAtomsUnnormalisedAutocorrelationForASingleTrajectory(
            ensembleTrajectory.getCoordsets(), avgR)
        rirj.append(srirj)
        rirjMeasures.append(sMeasures)

    rirj = sumOverTrajectories(rirj)
    rirjMeasures = sumOverTrajectories(rirjMeasures)

    return rirj / rirjMeasures
Exemplo n.º 2
0
def calc_average(trajectory, cutoff):
    output = prody.AtomGroup('Cartesian average coordinates')
    output_coords = trajectory.getCoords()
    output.setCoords(trajectory.getCoords())
    output.setNames(trajectory.getNames())
    output.setResnums(trajectory.getResnums())
    output.setResnames(trajectory.getResnames())

    ensemble = prody.PDBEnsemble(trajectory)
    ensemble.iterpose()

    cutoff = numpy.mean(ensemble.getRMSDs()) * cutoff

    print 'Using cutoff of {}'.format(cutoff)

    input_coords = ensemble.getCoordsets()

    n_atoms = output_coords.shape[0]
    for i in range(n_atoms):
        print 'Computing residue {} of {}:'.format(i + 1, n_atoms)
        average = get_network_average(input_coords[:, i, :], cutoff)
        output_coords[i, :] = average

    output.setCoords(output_coords)
    return output
def computeEachAtomAllTrajectoriesMean(trajectories):
    """
        Computes the mean of each atom's position in all the trajectories
    """
    ri = []
    riMeasures = []

    for i, traj in enumerate(trajectories):
        #trajectory = prody.parsePDB(traj)
        trajectory = prody.parsePDB(traj, subset='calpha')
        coordinates = trajectory.getCoordsets()

        ensembleTrajectory = prody.PDBEnsemble("Complex")
        ensembleTrajectory.setAtoms(trajectory)
        ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:])
        ensembleTrajectory.setCoords(coordinates[0])  #reference
        ensembleTrajectory.superpose()
        #ensembleTrajectory = trajectory
        sri, sriMeasures = addTrajectoryCoordinates(
            ensembleTrajectory.getCoordsets(), trajectory.numAtoms())

        #sri, sriMeasures = addTrajectoryCoordinates(trajectory.getCoordsets(), trajectory.numAtoms())
        ri.append(sri)
        riMeasures.append(sriMeasures)

    return average(ri, riMeasures)
Exemplo n.º 4
0
def calc_average(trajectory):
    output = prody.AtomGroup('Cartesian average coordinates')
    output.setCoords(trajectory.getCoords())
    output.setNames(trajectory.getNames())
    output.setResnums(trajectory.getResnums())
    output.setResnames(trajectory.getResnames())

    ensemble = prody.PDBEnsemble(trajectory)
    ensemble.iterpose()

    coords = ensemble.getCoordsets()
    average_coords = numpy.mean(coords, axis=0)
    output.setCoords(average_coords)
    return output
Exemplo n.º 5
0
def get_closest_frame(trajectory, average_structure):
    output = prody.AtomGroup('Cartesian average coordinates')
    output.setCoords(trajectory.getCoords())
    output.setNames(trajectory.getNames())
    output.setResnums(trajectory.getResnums())
    output.setResnames(trajectory.getResnames())

    ensemble = prody.PDBEnsemble(trajectory)
    ensemble.setCoords(average_structure)
    ensemble.superpose()
    rmsds = ensemble.getRMSDs()
    min_index = numpy.argmin(rmsds)

    output.setCoords(ensemble.getCoordsets(min_index))
    return output
Exemplo n.º 6
0
    def setSelections(self, ensemble):
        '''
        put selections also into key arguments
        '''        
        
        custom_selection = False
        
        if custom_selection == False:
            self.selection_ref_structure = self.ref_chain
            return ensemble
        else:
            self.selection2="name CA"
            structure =  ensemble.getAtoms().select(self.selection2)
            sel_string = structure.getSelstr()
            #selec = structure.getSelstr().split()
            selec = structure.getIndices()
            if self.debug:
                print("***DEBUG***", selec)
                print("***DEBUG***", len(selec))
        
            #create new ensemble containing only the selected atoms        
            ensemble2 = prody.PDBEnsemble(ensemble.getTitle())
            ensemble2.setAtoms(structure)
            ensemble2.setCoords(structure.getCoords())
            self.selection_ref_structure = structure
            
            #ensemble2.addCoordset(structure)                
            for coords in ensemble.iterCoordsets():
                new_coords = np.zeros((ensemble2.numSelected(), 3))
                count = 0
                for coord in range(len(coords)):
                    if coord in selec:
                        new_coords[count] = coords[coord]
                        #print(count, new_coords[count]
                        count+=1
                ensemble2.addCoordset(new_coords) 
                
            ensemble2.getConformation(0).setLabel(ensemble2.getTitle())    

            if self.debug:
                print("***DEBUG*** Ensemble before selections: ", repr(ensemble))
                print("***DEBUG*** Ensemble after selections: ", repr(ensemble2))
            
            return ensemble2
Exemplo n.º 7
0
def get_coords_and_superimpose_with_prody(trajectories,
                                          skip,
                                          max_frames,
                                          iterpose=True):
    all_coordsets = []
    for traj_path in trajectories:
        trajectory = prody.parsePDB(traj_path, subset='calpha')
        coordinates = trajectory.getCoordsets()

        ensembleTrajectory = prody.PDBEnsemble("Complex")
        ensembleTrajectory.setAtoms(trajectory)
        ensembleTrajectory.addCoordset(
            coordinates[skip:min(trajectory.numCoordsets(), max_frames +
                                 skip)])
        ensembleTrajectory.setCoords(coordinates[0])  #reference
        if iterpose:
            print "\t- Using iterposition on trajectory"
            ensembleTrajectory.iterpose()
        else:
            ensembleTrajectory.superpose()
        all_coordsets.append(ensembleTrajectory.getCoordsets())
    return all_coordsets
Exemplo n.º 8
0
import prody
import numpy
import sys
import os.path

pdb_data = prody.parsePDB(sys.argv[1])
pdb_name = os.path.basename(sys.argv[1])
pdb_trajectory = prody.PDBEnsemble("aligned_CA")

prot = pdb_data.select("name CA not resname CA")
pdb_trajectory.setCoords(pdb_data.getCoordsets()[0])
pdb_trajectory.addCoordset(pdb_data.getCoordsets())
pdb_trajectory.setAtoms(prot)
pdb_trajectory.superpose()

prody.writePDB(pdb_name + ".aligned_CA.pdb", pdb_trajectory)
with file(pdb_name + "aligned_CA.coords", 'w') as outfile:
    outfile.write("%d %d %d\n" % pdb_trajectory.getCoordsets().shape)
    for coordset in pdb_trajectory.getCoordsets():
        numpy.savetxt(outfile, coordset)

lig = pdb_data.select("resname BEN not name H1 H2 H3 H4 H5 H6 H7 HN1 HN2")
pdb_trajectory.setAtoms(lig)
rmsds = pdb_trajectory.getRMSDs()

prody.writePDB(pdb_name + ".aligned_BEN.pdb", pdb_trajectory)
with file(pdb_name + ".aligned_BEN.coords", 'w') as outfile:
    outfile.write("%d %d %d\n" % pdb_trajectory.getCoordsets().shape)
    for coordset in pdb_trajectory.getCoordsets():
        numpy.savetxt(outfile, coordset)
    def createEnsemble(self, pdbs, logger):
        '''
        Create a prody ensemble based on getPDBs return
        Take into account, that system can be prepared or not
        and take gaps and multiple chains into account
        take single files and trajectory file into account
        
        pdbs: list-array with pdb-filenames
        
        ToDo: 
            - set reference depending on longest chain with no gaps
            - check for duplicate chains and only select this one
        '''
        logger.info("Create Ensemble")
        ref_chids = []
        
        #set the reference structure, if not chosen by user, the first Frame or PDB is taken
        #remove reference Frame/PDB from list
        if self.ref == "none":
            self.ref = pdbs[0]
            pdbs.pop(0)
        else:
            if self.ref in pdbs:
                if self.debug:
                    logger.info("***DEBUG*** Found Ref:", self.ref)
                    logger.info("***DEBUG*** Index Ref", pdbs.index(self.ref))
                    logger.info("Removing reference from pdblist")
                pdbs.pop(pdbs.index(self.ref))
   
        if self.debug:
            logger.info("***DEBUG*** Reference is",self.ref)
            
        #open reference file and check for HID, HIE; if found, replace by HIS
        try:
            pdbinfile = open(self.ref)   
        except IOError as e:
            env.logger.info(self.ref)
            env.logger.info("I/O error({0}): {1}".format(e.errno, e.strerror))
            exit()
        
        pdbfiledata = pdbinfile.read()
        for line in pdbinfile:
            if line[17:20] == "HID" or line[17:20] == "HIE":
                env.logger.info("HID or HIE found in pdb-structure. The names will be changed to HIS in the input file")
                break
        pdbinfile.close()
        
        pdbfiledata = pdbfiledata.replace("HID","HIS")
        pdbfiledata = pdbfiledata.replace("HIE","HIS")
        
        logger.info("Warning!!! Rewriting reference file {}".format(self.ref))
        pdbfile = open(self.ref,'w')
        pdbfile.write(pdbfiledata)
        pdbfile.close()
        
        #set reference and create first structure  
        if self.selection == "all":
            ref_structure = prody.parsePDB(self.ref)
        else:
            ref_structure = prody.parsePDB(self.ref, subset=self.selection)
    

        reference_hierview = ref_structure.getHierView()
        
        logger.info(f"Found {reference_hierview.numChains()} Chain(s) in {reference_hierview._atoms.getTitle()}")
        ensemble_ref_title = reference_hierview._atoms.getTitle()
        logger.info(reference_hierview[0])
        
        '''
        at the moment all chains are taken
        --> check for duplicate chains and only take on of those
        '''
        
        for chain in reference_hierview:
            ref_chids.append(chain.getChid())
        if self.debug:
            env.logger.info(f"***DEBUG*** {ref_chids}")
        
        reference_chains = [reference_hierview[chid] for chid in ref_chids] 
        
        if self.debug:
            env.logger.info(f"***DEBUG*** {reference_chains}")
            
        
        
        ref_chain = reference_chains[0]
        for i in range (1, len(reference_chains), 1):
            ref_chain = ref_chain + reference_chains[i]
        if self.debug:
            env.logger.info("***DEBUG***", ref_chain)
        # save globally
        self.ref_chain = ref_chain
        #Create Ensemble of structures
        ensemble = prody.PDBEnsemble(ensemble_ref_title)
        ensemble.setAtoms(ref_chain)
        ensemble.setCoords(ref_chain)
        #Set ref_structure as first coordinate set
        ensemble.addCoordset(ref_structure)
        
        if self.debug:
            env.logger.info("***DEBUG***", ref_chain.getResnames())
            env.logger.info("***DEBUG***", ref_chain.getResnums())
        
        unmapped = []
        # map remaining structures to reference chain and add to ensemble if mapped
        for pdb in pdbs:
             if self.debug:
                 env.logger.info("***DEBUG*** Processing ", pdb)
             if self.selection == "all":
                 #structure = prody.parsePDB(self.ref)
                 structure = prody.parsePDB(pdb)
             else:
                 structure = prody.parsePDB(pdb, subset=self.selection)
             
             atommaps = []
             for reference_chain in reference_chains:
                 # Map current PDB file to the reference chain
                 mappings = prody.mapOntoChain(structure, reference_chain,
                                         seqid=90,
                                         coverage=50,
                                         subset=self.selection)

                 if len(mappings) == 0:
                     env.logger.info('Failed to map', pdb)
                     break
                 atommaps.append(mappings[0][0])
                 # Make sure all chains are mapped
             if len(atommaps) != len(reference_chains):
                 unmapped.append(pdb)
                 continue
             #env.logger.info(atommaps[0]
             atommap = atommaps[0]
             for i in range (1, len(reference_chains), 1):
                  atommap = atommap + atommaps[i]
             # Add the atommap (mapped coordinates) to the ensemble
             # Note that some structures do not completely map (missing residues)
             # so we pass weights (1 for mapped atoms, 0 for unmapped atoms)
             ensemble.addCoordset(atommap, weights=atommap.getFlags('mapped'))
     
        if self.debug:
            env.logger.info("***DEBUG*** Number of conformations:", ensemble.numConfs())
            env.logger.info("***DEBUG*** ", ensemble)
            env.logger.info("***DEBUG*** Unmapped structures:", unmapped)
        ensemble.iterpose()

        #env.logger.info(ensemble.getTitle()
        return ensemble
Exemplo n.º 10
0
import prody
import numpy

pdb_data = prody.parsePDB(
    "../Models/prot_stretching/stretching_trajectory_offset_ligand.pdb")
pdb_trajectory = prody.PDBEnsemble("iterposed_CA")

# Write the initial coordsets
prot = pdb_data.select("name CA")
prody.writePDB("stretching_trajectory_offset_ligand.iterposed_all.pdb", prot)
with file("stretching_trajectory_offset_ligand.initial_CA.coords",
          'w') as outfile:
    outfile.write("%d %d %d\n" % prot.getCoordsets().shape)
    for coordset in prot.getCoordsets():
        numpy.savetxt(outfile, coordset)

# We only want to work with CAs. If we use the 'all coordinates+atom selection" trick
# Prody will still use all coordinates for iterative superposition
pdb_trajectory.setCoords(prot.getCoordsets()[0])
pdb_trajectory.addCoordset(prot.getCoordsets())
pdb_trajectory.setAtoms(prot)
pdb_trajectory.iterpose()

prody.writePDB("stretching_trajectory_offset_ligand.iterposed_CA.pdb",
               pdb_trajectory)
with file("stretching_trajectory_offset_ligand.iterposed_CA.coords",
          'w') as outfile:
    outfile.write("%d %d %d\n" % pdb_trajectory.getCoordsets().shape)
    for coordset in pdb_trajectory.getCoordsets():
        numpy.savetxt(outfile, coordset)
'''

import prody
import time
import numpy
from pyRMSD.RMSDCalculator import RMSDCalculator

if __name__ == '__main__':
    TRAJECTORY_FILE = "data/ubi_amber_100.pdb"

    # Coordinates loading
    pdb_data = prody.parsePDB(TRAJECTORY_FILE)
    num_atoms = pdb_data.numAtoms()
    total_frames = pdb_data.numCoordsets()

    trajectory = prody.PDBEnsemble("UBI")
    trajectory.setAtoms(pdb_data)
    trajectory.addCoordset(pdb_data.getCoordsets())
    coordinates = numpy.copy(pdb_data.getCoordsets())

    # RMSD matrix computation
    start_t = time.time()
    prody_rmsd_values = []
    for i in range(0, total_frames):
        trajectory.setCoords(pdb_data.getCoordsets()[i])
        trajectory.superpose()
        prody_rmsd_values.extend(trajectory.getRMSDs()[1:])
        trajectory.delCoordset(0)
    stop_t = time.time()
    print "Prody's computation time:", stop_t - start_t
Exemplo n.º 12
0
    subset='ca', chain=args.ref_chain_id)
  if reference_structure is None:
    raise Exception('Error loading reference structure')
else:
  raise Exception('Reference structure not found')
reference_hierview = reference_structure.getHierView()
reference_chain = reference_hierview[args.ref_chain_id]

# Generate an ensemble of structures from the pdb files
ens_FN = 'prody.ens.npz'
mappings_FN = 'prody.mappings.pkl'
if os.path.isfile(ens_FN) and os.path.isfile(mappings_FN):
  ensemble = prody.loadEnsemble(ens_FN)
  mappings = pickle.load(open(mappings_FN,'r'))
else:
  ensemble = prody.PDBEnsemble()
  # Set ensemble atoms
  ensemble.setAtoms(reference_chain)
  # Set reference coordinates
  ensemble.setCoords(reference_chain.getCoords())

  mappings = []
  new_pdb_FNs = []
  for (pdb_id,chain_id) in chain_hits:
    pdb_FN = 'chains_original/%s%s.pdb'%(pdb_id,chain_id)
    if pdb_id in exclude:
      continue
    structure = prody.parsePDB(pdb_FN, subset='ca', chain=chain_id)
    if structure is None:
      prody.plog('Failed to parse ' + pdb_FN)
      continue
Exemplo n.º 13
0
import prody
import numpy
import sys


pdb_data = prody.parsePDB(sys.argv[1])
offsets = numpy.array([[[12, 40, 2]]*pdb_data.numAtoms(),
           [[5, -5, 8]]*pdb_data.numAtoms(),
           [[-13, -14, 7]]*pdb_data.numAtoms(),
           [[13, 14, 23]]*pdb_data.numAtoms(),
           #[[-8,-4,-2]]*3239,
           [[6,89,2]]*pdb_data.numAtoms()])
pdb_trajectory = prody.PDBEnsemble(sys.argv[1])
pdb_trajectory.setAtoms(pdb_data)
pdb_trajectory.addCoordset(pdb_data.getCoordsets()+offsets[0:len(pdb_data.getCoordsets())])
pdb_trajectory.setCoords(pdb_data.getCoordsets()[0])
prody.writePDB(sys.argv[2], pdb_trajectory)