Exemplo n.º 1
0
    def __init__(self,
                 Protein,
                 solvent_box,
                 MD_program_path='orac',
                 number_of_cores_per_node=64,
                 number_of_replicas=8):

        self.Protein = Protein

        self.solvent_box = solvent_box

        self.MD_program_path = path.absolute_programpath(
            program=MD_program_path)

        self.number_of_cores_per_node = number_of_cores_per_node

        self.orac_in_file = f"HREM.in"

        #dummy useless value
        self.kind_of_processor = 'skylake'

        #an instance of orient.Orient class
        self.orient = orient.Orient(self.Protein,
                                    self.Protein.get_ligand_list())

        self.BATTERIES = self._get_BATTERIES()

        self.replicas = number_of_replicas
Exemplo n.º 2
0
    def __init__(self, Protein, MD_program_path='orac'):

        self.Protein = Protein

        self.MD_program_path = MD_program_path

        self.orient = orient.Orient(Protein=self.Protein,
                                    Ligand=self.Protein.get_ligand_list())

        #will be filled in later on
        self.template = []
Exemplo n.º 3
0
    def __init__(self, Protein, solvent_pdb=None, MD_program_path='orac'):
        """
        Protein :: HPC_Drug.structures.protein.Protein instance

        solvent_pdb :: string, it is the pdb file that contains the coordinates of a solvent molecule 
        it is needed if there has to be added a solvent box around the protein 
        default HPC_Drug.lib "water.pdb"

        MD_program_path :: string, the absolute path to the orac executable 
        dafault will look for an executable called orac in the PATH and the working directory (in this order)
        """

        self.Protein = Protein

        self.orac_in_file = os.getcwd() + f"/{self.Protein.protein_id}_orac.in"

        self.solvent_pdb = solvent_pdb
        #if no path is given searches the standard water.pdb inside lib module
        if self.solvent_pdb == None:
            with importlib_resources.path('HPC_Drug.lib',
                                          'water.pdb') as _path:
                self.solvent_pdb = str(_path.resolve())

        self.MD_program_path = path.absolute_programpath(
            program=MD_program_path)

        self.output_pdb_file = os.getcwd(
        ) + f"/{self.Protein.protein_id}_orac.pdb"

        #an instance of orient.Orient class
        self.orient = orient.Orient(self.Protein,
                                    self.Protein.get_ligand_list())

        self.template = []

        #some values that are needed many times are calculated in the constructor

        #The box sizes (lx, ly, lz)
        #the structure is rotated in its tensor of inertia ref
        self.box = self._create_selfbox()
Exemplo n.º 4
0
    def __init__(self,
                 tpg_file,
                 prm_file,
                 solvent_box=None,
                 MD_program_path="orac",
                 chain="A"):

        self.tpg_file = tpg_file

        self.prm_file = prm_file

        self.solvent_box = solvent_box
        if self.solvent_box is None:
            with importlib_resources.path('HPC_Drug.lib',
                                          'only_water.pdb') as path:
                shutil.copy(str(path.resolve()), os.getcwd())

            self.solvent_box = os.getcwd() + "/" + "only_water.pdb"

        self.MD_program_path = MD_program_path

        self.chain = chain

        self.orac_in_file = os.getcwd() + "/only_water_orac.in"

        self.output_pdb_file = "optimized_only_solvent_box.pdb"

        self.Protein = protein.Protein(protein_id="slvt",
                                       pdb_file=self.solvent_box,
                                       chain=self.chain,
                                       file_type="pdb")

        self.orient = orient.Orient(Protein=self.Protein, Ligand=[])

        self.box = self._create_selfbox()

        self.template = [
            "#&T NTHREADS    8   CACHELINE   16",
            "#&T NT-LEVEL1   2   CACHELINE   16",
            "#&T NT-LEVEL2   4   CACHELINE   16",
            "###############################################################",
            "#  Minimize Crystallographic structure from PDBank",
            "###############################################################",
            "", "! this is a comment",
            "!! two exclamation points: system-dependent section"
            "! one exclamation point: system indipendent section (same for all inputs)",
            "#", "# Set MD cell and read pdb coordinates", "#", "&SETUP",
            self._write_box(), f"READ_PDB {self.solvent_box}", "&END", "#",
            "# reads the force fields", "#", "&PARAMETERS",
            f"   READ_TPG_ASCII {self.tpg_file} ! protein",
            f"   READ_PRM_ASCII {self.prm_file} ! protein",
            f"   WRITE_TPGPRM_BIN  only_water.tpgprm",
            "   JOIN SOLVENT   ! solvent", "       tip3", "   END", "&END",
            "&SOLVENT", f"ADD_UNITS {self._get_number_of_residues()}", "&END",
            "&SIMULATION  ! simulation parameters (same for all)", "   MDSIM",
            "   TEMPERATURE   280.0 20.0",
            "   ISOSTRESS PRESS-EXT 0.1 BARO-MASS 30.0", "   THERMOS",
            "      solute  10.0", "      solvent 10.0", "      cofm    10.0",
            "      temp_limit 1000.0", "   END", "&END",
            "&INTEGRATOR     ! integration parameters (same for all)",
            "   TIMESTEP       9.0", "   MTS_RESPA", "      step intra 2",
            "      step intra 2", "      step nonbond 2  5.1",
            "      step nonbond 5  7.8   reciprocal",
            "      step nonbond 1  10.0", "      test_times OPEN  G0.tt 20",
            "      very_cold_start 0.1", "  END", "&END",
            "&POTENTIAL  !! potential parameters",
            self._write_EWALD_PME(),
            self._write_ADD_STR_COM(), "   UPDATE      60.0   1.8",
            self._write_LINKED_CELL(), "   STRETCHING HEAVY",
            "   QQ-FUDGE  0.83333", "   LJ-FUDGE  0.50", "&END",
            "&RUN  ! run lenght (same for all)", "   CONTROL      0",
            "   PROPERTY     20000.0", "   REJECT       20000.0",
            "   TIME         6000.0", "   STEER        0.0 30000.0",
            "   PRINT        300.0", "&END", "", "#",
            "# write restart file every 60.0 (approximately)", "#",
            "&INOUT ! files I/O", "   RESTART",
            f"      write  15000.0  OPEN  {self.output_pdb_file.rsplit('.', 1)[0].strip()}.rst",
            "   END", f"   ASCII   3000.0 OPEN {self.output_pdb_file}",
            f"   PLOT STEER_ANALYTIC  500.0  OPEN {self.output_pdb_file.rsplit('.', 1)[0].strip()}.dat",
            "&END"
        ]
Exemplo n.º 5
0
    def __init__(self,
                Protein,
                MD_program_path = 'gmx',
                kind_of_processor = 'skylake',
                number_of_cores_per_node = 64,
                use_gpu = 'auto',
                gpus_per_node = 1,
                number_of_replicas = 8,
                batteries = None,
                n_steps=None,
                timestep=None,
                constraints='h-bonds'):

        super().__init__(Protein = Protein,
                        MD_program_path = MD_program_path)


        #the input directory that will be created
        self.HREM_dir = f"{self.Protein.protein_id}_HREM"

        self.elaborated_top_file = f"{self.Protein.protein_id}_elaborated_topology.top"

        self.mdp_file = f"{self.Protein.protein_id}_HREM.mdp"

        self.output_tpr_file = f"HREM.tpr"

        self.kind_of_processor = kind_of_processor
        self.number_of_cores_per_node = number_of_cores_per_node

        #an instance of orient.Orient class
        self.orient = orient.Orient(self.Protein, self.Protein.get_ligand_list())

        #gromacs has various options to use gpu
        #auto (default) that will use all the available ones automatically
        #cpu uses no GPU even if available
        #gpu forces the use of GPU (but in case you want to use a gpu auto would be safer and more robust)
        self.use_gpu = use_gpu.lower().strip()
        if self.use_gpu not in ('auto', 'cpu', 'gpu'):
            raise ValueError(f"{self.use_gpu} is not a valid gpu option, valid options are auto cpu gpu")

        self.gpus_per_node = gpus_per_node

        if batteries is None:
            
            self.BATTERIES = self._get_BATTERIES()

        else:

            self.BATTERIES = batteries

        #the replicas for BATTERY
        self.replicas = number_of_replicas

        self.temperature = 298.15

        if timestep is None:

            self.timestep = 0.002

        else:

            self.timestep = timestep

        if n_steps is None:

            self.n_steps = int(self._get_ns_per_day() * 1.E+3)

        else:

            self.n_steps = int(n_steps)

        if constraints is None:

            constraints = 'h-bonds'

        self.constraints = constraints

        self.template = [
            "; VARIOUS PREPROCESSING OPTIONS",
            "; Preprocessor information: use cpp syntax.",
            "; e.g.: -I/home/joe/doe -I/home/mary/roe",
            "include                  =",
            "; e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)",
            "define                   =",
            "",
            "; RUN CONTROL PARAMETERS",
            "integrator               = md",
            "; Start time and timestep in ps",

            self._write_TIME_TIMESTEP_string(),

            "; For exact run continuation or redoing part of a run",
            "init-step                = 0",
            "; Part index is updated automatically on checkpointing (keeps files separate)",
            "simulation-part          = 1",
            "; mode for center of mass motion removal",
            "comm-mode                = Linear",
            "; number of steps for center of mass motion removal",
            "nstcomm                  = 100",
            "; group(s) for center of mass motion removal",
            "comm-grps                =",
            "",
            "; TEST PARTICLE INSERTION OPTIONS",
            "rtpi                     = 0.05",
            "",
            "; OUTPUT CONTROL OPTIONS",
            "; Output frequency for coords (x), velocities (v) and forces (f)",
            "nstxout                  = 100000",
            "nstvout                  = 100000",
            "nstfout                  = 100000",
            "; Output frequency for energies to log file and energy file",
            "nstlog                   = 100000",
            "nstcalcenergy            = 100",
            "nstenergy                = 100000",
            "; Output frequency and precision for .xtc file",
            "nstxtcout                = 80000",
            "xtc-precision            = 1000",
            "; This selects the subset of atoms for the .xtc file. You can",
            "; select multiple groups. By default all atoms will be written.",
            "xtc-grps                 =",
            "; Selection of energy groups",
            "energygrps               = System",
            "",
            "; NEIGHBORSEARCHING PARAMETERS",
            "; cut-off scheme (group: using charge groups, Verlet: particle based cut-offs)",
            "; nblist update frequency",
            "cutoff-scheme            = Verlet",
            "nstlist                  = 20",
            "verlet-buffer-tolerance  = 0.0001",
            "; ns algorithm (simple or grid)",
            "ns_type                  = grid",
            "; Periodic boundary conditions: xyz, no, xy",
            "pbc                      = xyz",
            "periodic-molecules       = no",
            "; Allowed energy drift due to the Verlet buffer in kJ/mol/ps per atom,",
            "; a value of -1 means: use rlist",
            "; nblist cut-off",
            "rlist                    = 1.0",
            "; long-range cut-off for switched potentials",
            "rlistlong                = -1",
            "",
            "; OPTIONS FOR ELECTROSTATICS AND VDW",
            "; Method for doing electrostatics",
            "coulombtype              = PME",
            "rcoulomb-switch          = 0",
            "rcoulomb                 = 1.0",
            "; Relative dielectric constant for the medium and the reaction field",
            "epsilon-r                = 1",
            "epsilon-rf               = 0",
            "; Method for doing Van der Waals",
            "vdw-type                 = Cut-off",
            "; cut-off lengths",
            "rvdw-switch              = 0",
            "rvdw                     = 1.0",
            "; Apply long range dispersion corrections for Energy and Pressure",
            "DispCorr                 = EnerPres",
            "; Extension of the potential lookup tables beyond the cut-off",
            "table-extension          = 1",
            "; Separate tables between energy group pairs",
            "energygrp-table          =",
            "; Spacing for the PME/PPPM FFT grid",
            "fourierspacing           = 0.12",
            "; FFT grid size, when a value is 0 fourierspacing will be used",
            "fourier-nx               = 0",
            "fourier-ny               = 0",
            "fourier-nz               = 0",
            "; EWALD/PME/PPPM parameters",
            "pme-order                = 4",
            "ewald-rtol               = 1e-06",
            "ewald-geometry           = 3d",
            "epsilon-surface          =",
            "optimize-fft             = no",
            "",
            "; IMPLICIT SOLVENT ALGORITHM",
            "implicit-solvent         = No",
            "",
            "; OPTIONS FOR WEAK COUPLING ALGORITHMS",
            "; Temperature coupling",
            "tcoupl                   = v-rescale",
            "nsttcouple               = -1",
            "nh-chain-length          = 1",
            "; Groups to couple separately",
            "tc-grps                  = System",
            "; Time constant (ps) and reference temperature (K)",
            "tau-t                    = 0.2",

            f"ref-t                    = {self.temperature}",

            "; pressure coupling",
            "pcoupl                   = Parrinello-Rahman",
            "pcoupltype               = Isotropic",
            "nstpcouple               = -1",
            "; Time constant (ps), compressibility (1/bar) and reference P (bar)",
            "tau-p                    = 1.0",
            "compressibility          = 4.6e-5",
            "ref-p                    = 1",
            "; Scaling of reference coordinates, No, All or COM",
            "refcoord-scaling         = COM",
            "",
            "; GENERATE VELOCITIES FOR STARTUP RUN",
            "gen-vel                  = no",
            "gen-temp                 = 500",
            "gen-seed                 = 173529",
            "",
            "; OPTIONS FOR BONDS",

            f"constraints              = {self.constraints}",

            "; Type of constraint algorithm",
            "constraint-algorithm     = Lincs",
            "; Do not constrain the start configuration",
            "continuation             = no",
            "; Use successive overrelaxation to reduce the number of shake iterations",
            "Shake-SOR                = no",
            "; Relative tolerance of shake",
            "shake-tol                = 0.00001",
            "; Highest order in the expansion of the constraint coupling matrix",
            "lincs-order              = 5",
            "; Number of iterations in the final step of LINCS. 1 is fine for",
            "; normal simulations, but use 2 to conserve energy in NVE runs.",
            "; For energy minimization with constraints it should be 4 to 8.",
            "lincs-iter               = 2",
            "; Lincs will write a warning to the stderr if in one step a bond",
            "; rotates over more degrees than",
            "lincs-warnangle          = 30",
            "; Convert harmonic bonds to morse potentials",
            "morse                    = no"
        ]
Exemplo n.º 6
0
def get_metal_binding_residues_with_no_header(structure,
                                            cutoff = 3.0,
                                            protein_chain = 'A',
                                            protein_model = 0,
                                            COM_distance = 10.0,
                                            metals = important_lists.metals):

    """
    This function gets called by get_metalbinding_disulf_ligands

    This function iterates through the structure many times in order
    to return the metal binding residues through a substitution dictionary

    {residue_id : [residue_name, binding_atom, binding_metal]}

    It uses biopython structures

    structure :: a biopython structure of the protein

    cutoff :: double the maximum distance that a residue's center of mass and a metal ion
    can have to be considered binding default 3.0 angstrom

    protein_chain :: string default 'A', if == None no chain selection will be done

    protein_model :: integer default 0, if == None no model and no chain selection will be done

    metals :: a list (or tuple etc) that contains all the resnames (in capital letters) of metals necessary to look for,
    default HPC_Drug.important_lists.metals (Actually the easiest way to personalize metals is to append your custom values to this list)


    this function is slow and error prone
    and should only be used if there is no mmCIF with a good header
    
    It should not be necessary to change COM_distance because it simply is the distance between the center of mass of
    a residue and the metal that is used to know which atom distances to calculate
    """

        
    orient_object = orient.Orient()

    substitutions_dict = {}

    #selects model and chain if required
    if protein_model != None:

        try:

            model = structure[protein_model]

        except KeyError:

            model = structure

        # select only the right chain
        if protein_chain != None:

            try:

                chain = model[protein_chain.strip().upper()]

            except KeyError:

                chain = model

        else:
            chain = model

    else:
        chain = structure

    _chain = copy.deepcopy(chain)

    for residue in _chain:
        
        if residue.resname.strip().upper() in metals:
            for atom in residue:
                    
                #I get a second copy of all the residues in the chain
                tmp_struct = copy.deepcopy(chain)
                all_residues = tmp_struct.get_residues()

                #and iterate though them
                for other_residue in all_residues:
                    
                    #I avoid scanning the metal against it's self and against trash residues
                    if other_residue.resname.strip().upper() not in important_lists.metals:
                        COM_1, COM_2, distance = orient_object.center_mass_distance(structure_1 = residue, structure_2 = other_residue)
                        
                        if distance <= COM_distance:
                            
                            TMP_atom_dist = [1.E+20, 'DUMMY']
                            #check for the nearest atom of the binding residue
                            for other_atom in other_residue:

                                d = (atom.coord[0] - other_atom.coord[0])**2. + (atom.coord[1] - other_atom.coord[1])**2. + (atom.coord[2] - other_atom.coord[2])**2.
                                d = d ** (0.5)
                                
                                if d < TMP_atom_dist[0]:
                                    try:
                                        TMP_atom_dist = [d, other_atom.name.upper()]
                                    except:
                                        TMP_atom_dist = [d, other_atom.element.upper()]
                            
                            #checking if the nearest atom is near enough to be part of a binding residue
                            if TMP_atom_dist[0] <= cutoff:
                                #I add the other residue _id to the dictionary keys and give a value
                                substitutions_dict[other_residue.id[1]] = [other_residue.resname.strip().upper(), TMP_atom_dist[1], residue.resname.strip().upper()]

        
    #useless variables
    COM_1 = None
    COM_2 = None

    return substitutions_dict
Exemplo n.º 7
0
    def execute(self):
        """
        A pipeline that returns a clean and repaired "protein and ions" PDB and
        a PDB file for any not trash organic lingand
        starting from both a PDB, an mmCIF file or a protein id

        returns a Protein instance
        """

        #If requested in input will download pdb file
        #If the given local file doesn't exist raises FileNotFounfError
        #otherwise updates self.protein_filename with the given path
        #all the paths are converted to absolute paths
        self.get_protein_file()
        

        # creating protein instance
        Protein = protein.Protein(protein_id = self.protein_id,
                                    pdb_file = self.protein_filename,
                                    model = self.model,
                                    chain = self.chain,
                                    file_type = self.protein_filetype,
                                    tpg_file = self.protein_tpg_file,
                                    prm_file = self.protein_prm_file)
        

        #Get Protein.substitutions_dict Protein.sulf_bonds
        #repairs the Protein.pdb_file
        #returns a list containing the resnames and resnumbers of organic ligands
        # [[resname, resnum], [...], ...]
        #if there are none will be None item
        Info_rep = structural_information_and_repair.InfoRepair(Protein = Protein, repairing_method = self.repairing_method)

        Protein, ligand_resnames_resnums = Info_rep.get_info_and_repair()

        #remove still present disordered atoms (if any)
        Protein = remove_disordered_atoms.remove_disordered_atoms(Protein = Protein)

        #selects only a selected model and chain (Protein.model Protein.chain)
        Protein = select_model_chain.select_model_chain(Protein = Protein)

        #if the protein was a mmCIF I convert it to PDB
        Protein = mmcif2pdb.mmcif2pdb(Protein = Protein)

        #create the Ligand instances and add them to Protein._ligands
        Protein = get_ligands.get_ligands(Protein = Protein, ligand_resnames_resnums = ligand_resnames_resnums)

        Protein.update_structure(struct_type = "prody")

        prody_select = prody.ProdySelect(structure = Protein.structure)
        
        #gets the protein's structure from the pdb
        #The only HETATM remaining are the metal ions
        Protein.structure = prody_select.protein_and_ions()

        #Write Protein only pdb
        Protein.write(file_name = f"{Protein.protein_id}_protein.pdb", struct_type = 'prody')

        #removes the remaining trash ions
        Protein = remove_trash_metal_ions.remove_trash_metal_ions(Protein = Protein)


        #quick patch, will do it better
        #The structure is put in the reference system of the
        #inertia tensor
        orient_obj = orient.Orient(Protein = Protein)
        _, _, Rot_matrix = orient_obj.calculate_moment_of_intertia_tensor()
        Protein.structure = orient_obj.base_change_structure()
        Protein.write()

        Ligand = Protein.get_ligand_list()
        for i in range(len(Ligand)):
            Ligand[i].update_structure(struct_type = "biopython")

            Ligand[i].structure = orient_obj.base_change_structure(structure = Ligand[i].structure, rot_matrix = Rot_matrix)

            Ligand[i].write()


        return Protein
def get_metal_binding_residues_with_no_header(protein_id=None,
                                              pdb_file=None,
                                              mmcif_file=None,
                                              cutoff=3.0,
                                              substitutions_dict={},
                                              protein_chain='A',
                                              protein_model=0,
                                              COM_distance=10.0):
    """This function iterates through the structure many times in order
    to return the metal binding residues through a substitution dictionary

    {residue_id : [residue_name, binding_atom, binding_metal]}

    it can be used both for pdb files and mmcif files (give the path to the files as a string
    to pdb_file or mmcif_file)

    cutoff :: double the maximum distance that a residue's center of mass and a metal ion
    can have to be considered binding default 3.0 angstrom

    if you already have a substitution dictionary and you want to update it give it as input
    as substitutions_dict

    protein_chain :: string default 'A'

    this function is slow and error prone
    and should only be used if there is no mmCIF with a good header
    
    It should not be necessary to change COM_distance because it simply is the distance between the center of mass of
    a residue and the metal that is used to know which atom distances to calculate"""

    if pdb_file == None and mmcif_file == None:
        raise ValueError(
            "I need a pdb_file or a mmcif_file filename cannot both be None type"
        )

    elif pdb_file != None and mmcif_file != None:
        raise ValueError(
            f"You can only pass a pdb_file or a mmcif_file not both\npdb_file = {pdb_file} mmcif_file = {mmcif_file}"
        )

    elif pdb_file != None:

        if protein_id == None:
            protein_id = pdb_file[0:3]  # Get from filename

        p = Bio.PDB.PDBParser()
        structure = p.get_structure(protein_id, pdb_file)

    elif mmcif_file != None:

        if protein_id == None:
            protein_id = mmcif_file[0:3]  # Get from filename

        p = Bio.PDB.MMCIFParser()
        structure = p.get_structure(protein_id, mmcif_file)

    orient_object = orient.Orient()

    #select the right model
    model = structure[protein_model]
    # select only the right chain
    chain = model[protein_chain.strip().upper()]
    for residue in chain:

        if residue.resname.strip().upper() in important_lists.metals:
            for atom in residue:

                #I get a second copy of all the residues in the chain
                #Will have to refactor and clean this mess
                if pdb_file != None:
                    tmp_struct = p.get_structure(protein_id, pdb_file)
                else:
                    tmp_struct = p.get_structure(protein_id, mmcif_file)
                tmp_struct = tmp_struct[protein_model][
                    protein_chain.strip().upper()]
                all_residues = tmp_struct.get_residues()
                #and iterate though them
                for other_residue in all_residues:

                    #I avoid scanning the metal against it's self and against trash residues
                    if other_residue.resname.strip().upper(
                    ) not in important_lists.metals:
                        COM_1, COM_2, distance = orient_object.center_mass_distance(
                            structure_1=residue, structure_2=other_residue)

                        if distance <= COM_distance:

                            TMP_atom_dist = [1.E+20, 'DUMMY']
                            #check for the nearest atom of the binding residue
                            for other_atom in other_residue:

                                d = (atom.coord[0] - other_atom.coord[0]
                                     )**2. + (atom.coord[1] -
                                              other_atom.coord[1])**2. + (
                                                  atom.coord[2] -
                                                  other_atom.coord[2])**2.
                                d = d**(0.5)

                                if d < TMP_atom_dist[0]:
                                    try:
                                        TMP_atom_dist = [
                                            d, other_atom.name.upper()
                                        ]
                                    except:
                                        TMP_atom_dist = [
                                            d, other_atom.element.upper()
                                        ]

                            #checking if the nearest atom is near enough to be part of a binding residue
                            if TMP_atom_dist[0] <= cutoff:
                                #I add the other residue _id to the dictionary keys and give a value
                                substitutions_dict[str(
                                    other_residue.id[1])] = [
                                        other_residue.resname.strip().upper(),
                                        TMP_atom_dist[1],
                                        residue.resname.strip().upper()
                                    ]

    #useless variables
    COM_1 = None
    COM_2 = None

    return substitutions_dict