示例#1
0
    def _edit_top(ligand_top, only_solvent_top):

        only_solvent_lines = read_file.read_file(file_name=only_solvent_top)

        for i in range(len(only_solvent_lines) - 1, -1, -1):

            if only_solvent_lines[i].strip():

                solvent_line = only_solvent_lines[i].rstrip()

                break

        del only_solvent_lines

        ligand_lines = read_file.read_file(file_name=ligand_top)

        for i in range(len(ligand_lines) - 1, -1, -1):

            if ligand_lines[i].strip():

                ligand_lines[i] = solvent_line + '\n' + ligand_lines[i]

                break

        write_on_files.write_file(lines=ligand_lines, file_name=ligand_top)
    def test_wrong_input(self):

        wrong_inputs = (1, 1.5)

        with self.assertRaises(TypeError):

            for i in wrong_inputs:
                
                read_file.read_file(file_name = i)
    def test_with_file(self):

        file_name = "tests/files4tests/file_read_file.txt"

        lines = read_file.read_file(file_name = file_name)

        self.assertEqual(lines, ["aaa\n", "BBB\n", "111\n", "2.2.2\n"])
def add_chain_id(pdb_file, chain="A"):
    """
    This is a patch because orac and primadorac remove the chain id from
    pdb files and this confuses some pdb parsers (works on PDB files only)

    pdb_file :: string, the pdb file to edit

    chain :: string, default A, the chain id to add to the pdb_file

    returns nothing
    """

    chain = chain.upper().strip()

    lines = read_file.read_file(file_name=pdb_file)

    for i in range(len(lines)):

        if lines[i][0:4] == 'ATOM' or lines[i][0:6] == 'HETATM' or lines[i][
                0:3] == 'TER':

            lines[i] = lines[i][:20] + "{0:>2}".format(chain) + lines[i][22:]

            lines[i] = lines[i].strip('\n') + '\n'

    write_on_files.write_file(lines=lines, file_name=pdb_file)
示例#5
0
    def _get_ligand_name_from_tpg(self):
        """
        private
        
        Gets the name of the ligand from the tpg file
        for any given ligand
        """

        Ligand = self.Protein.get_ligand_list()

        if Ligand == [] or Ligand == None:
            return ""

        residue_strings = []

        for ligand in Ligand:

            lines = read_file.read_file(file_name=ligand.tpg_file)

            for line in lines:

                if 'RESIDUE' in line:

                    string = f"      {line.split()[1].strip()} !! ligand name in tpg file"

                    residue_strings.append(string)

                    break

            else:
                raise RuntimeError(
                    f'Could not find the residue name in {ligand.tpg_file}')

        return '\n'.join(residue_strings)
示例#6
0
    def _change_absolute_ligand_itp_include_in_relative(self, top_file):
        """
        private

        changes the #include of the ligand itp file in the protein top from the absolute one in the relative one
        in the given topology file in order to get a meaningful #include in the self.HREM_dir directory
        that will be copied on a different Computer (the HPC cluster)
        """

        lines = read_file.read_file(file_name = top_file)

        ligand_itp_files = []

        for lgand in self.Protein.get_ligand_list():
            ligand_itp_files.append(lgand.itp_file)

        for i in range(len(lines)):

            if lines[i].strip()[:8] == "#include":

                if lines[i].split()[1].replace('"', '').strip() in ligand_itp_files or 'DUM' in lines[i]:

                    itp_file = lines[i].split()[1].replace('"', '').strip()
                    itp_file = itp_file.split("/")[-1]

                    lines[i] = f'#include "{itp_file}"\n'

        write_on_files.write_file(lines = lines, file_name = top_file)
示例#7
0
    def _get_ligand_name_from_tpg(self, Ligand):
        """
        private
        
        Gets the name of the ligand from the tpg file
        """

        lines = read_file.read_file(file_name=Ligand.tpg_file)

        for line in lines:

            if 'RESIDUE' in line:

                string = f"      {line.split()[1].strip()} !! ligand name in tpg file"

                break

        else:
            raise RuntimeError(
                f'Could not find the residue name in {Ligand.tpg_file}')

        return string
示例#8
0
    def _edit_itp(self, ligand_resname, itp_file):
        """
        private

        primadorac itp call any lignd LIG i change it to the ligand_resname

        and removes the first 9 lines of the file (they make gromacs fail)
        """

        lines = read_file.read_file(file_name=itp_file)

        del lines[0:9]

        for i in range(len(lines)):

            lines[i] = lines[i].replace('LIG', ligand_resname)
            lines[i] = lines[i].replace('name-p', ligand_resname)
            lines[i] = lines[i].strip('\n')
            lines[i] = lines[i] + '\n'

        write_on_files.write_file(lines=lines, file_name=itp_file)

        return path.absolute_filepath(path=itp_file)
示例#9
0
    def _edit_top_file(self):
        """
        Private
        
        Adds the needed #include and other informations
        to the protein top file in order
        to include the ligand
        """

        Ligand = self.Protein.get_ligand_list()

        top = read_file.read_file(file_name=self.Protein.top_file)

        itp_insertion_string = ''
        for lgand in Ligand:
            itp_insertion_string = itp_insertion_string + f'#include "{lgand.itp_file}"\n'
            compound_string = f'{lgand.resname}              1 \n'
            top.append(compound_string)

        for i in range(len(top)):
            if top[i].strip() == "":
                pass
            elif top[i].strip()[0] == ";":
                pass
            elif top[i].strip()[0:8] == "#include":
                top[i] = top[i] + '\n' + itp_insertion_string + '\n'
                break
            elif top[i].strip().replace(" ",
                                        "").split(";")[0] == '[moleculetype]':
                top[i] = itp_insertion_string + '\n' + top[i]
                break

        write_on_files.write_file(lines=top, file_name=self.output_top_file)

        self.Protein.top_file = self.output_top_file

        return self.Protein
示例#10
0
    def execute(self):

        if self.HREM_dir != os.getcwd():

            os.chdir(self.HREM_dir)

        fsdam_dir = "RESTART"

        os.makedirs(fsdam_dir, exist_ok=True)
        os.makedirs(f'{fsdam_dir}/not_used_frames', exist_ok=True)

        #making some defaults
        useful_info = {
            "ligand_resname": 'LIG',
            "top_file": "topol.top",
            "ligand_itp": "LIG.itp",
            f"only_solvent_gro": None,
            f"only_solvent_top": None
        }

        lines = read_file.read_file(file_name="important_info.dat")
        for line in lines:

            line = line.strip()

            if line:

                line = line.split("=")
                useful_info[line[0].strip()] = line[1].strip()

        starting_configurations = self._create_restart_configs(
            fsdam_dir=fsdam_dir)

        starting_configurations = self._select_frames_to_use(
            starting_configurations,
            not_used_dir=f'{fsdam_dir}/not_used_frames')

        if self.creation:

            self._add_water_box(useful_info['only_solvent_gro'],
                                starting_configurations)

            shutil.copy(useful_info["top_file"], 'ligand_solvent_topology.top')

            useful_info["top_file"] = 'ligand_solvent_topology.top'

            self._edit_top(ligand_top=useful_info["top_file"],
                           only_solvent_top=useful_info["only_solvent_top"])

        output_dictionary = self._make_input_files(
            ligand_resname=useful_info["ligand_resname"],
            top_file=useful_info["top_file"],
            starting_structures=starting_configurations,
        )

        #writes the TPR scripts in the FSDAM dir
        self._make_TPR_files_script(
            q_lines=output_dictionary['make_q_tpr'],
            vdw_lines=output_dictionary['make_vdw_tpr'],
            fsdamdir=fsdam_dir)

        #lazily copy all the mdp, itp and top files in the new dir
        for i in os.listdir(os.getcwd()):

            if i[-4:] in ('.mdp', '.itp', '.top'):

                shutil.copy(i, fsdam_dir)

        #write a suggestion of run file
        with open(f'{fsdam_dir}/RUN_Q.sh', 'w') as f:

            number_of_runs = len(output_dictionary['run_q'])
            f.write(f'# there are {number_of_runs} runs to do\n')
            f.write('# I suggest to use one GPU per run\n\n\n')

            f.write('\n'.join(output_dictionary['run_q']))

        with open(f'{fsdam_dir}/RUN_VDW.sh', 'w') as f:

            number_of_runs = len(output_dictionary['run_vdw'])
            f.write(f'# there are {number_of_runs} runs to do\n')
            f.write(
                '# I suggest to use one GPU per run or even better a job array\n\n\n'
            )

            f.write('\n'.join(output_dictionary['run_vdw']))
示例#11
0
    def _edit_top_file(self, top_file = None):
        """PRIVATE"""

        if top_file is None:
            top_file = self.elaborated_top_file

        #get the hot residues
        hot_residues = self.orient.get_hot_residues_for_rem(Protein = self.Protein, Ligand = self.Protein.get_ligand_list(), cutoff = 4.5, residue_dist = 10.0)
        hot_ids = []
        for residue in hot_residues:
            hot_ids.append(str(residue[1]).strip())

        #get the ligand resnames
        ligands_resnames = []
        for lgand in self.Protein.get_ligand_list():
            ligands_resnames.append(lgand.resname)

        #read the topology
        lines = read_file.read_file(file_name = top_file)


        #heat the right atoms

        #auxiliary bool variable
        is_atoms = False
        for i in range(len(lines)):

            #if the line is empty or a comment i can go on with the for loop
            if lines[i].strip() != "":
                if lines[i].strip()[0] != ";":
                
                    tmp_line = lines[i].strip().split()

                else:
                    continue
            
            else:
                continue

            #check if we are in the atoms part
            if lines[i].strip().replace(" ", "").split(";")[0] == "[atoms]":
                is_atoms = True

                continue

            #end of atoms part
            elif tmp_line[0][0] == "[":
                is_atoms = False

                continue

            if is_atoms:

                if len(tmp_line) >= 4:

                    #do not heat solvent
                    if tmp_line[3].strip() == "SOL":
                        continue

                    #heat the ligand
                    if tmp_line[3].strip() in ligands_resnames:

                        tmp_line[1] = tmp_line[1] + "_"

                        lines[i] = " ".join(tmp_line) + "\n"

                    #heat the near residues
                    elif tmp_line[2].strip() in hot_ids:

                        tmp_line[1] = tmp_line[1] + "_"

                        lines[i] = " ".join(tmp_line) + "\n"


        write_on_files.write_file(lines = lines, file_name = top_file)
示例#12
0
def merge_pdb(Protein):
    """
    Will put all the given ligands after the protein and update the ligand resnums
    this function is brutal and memory consuming I should do it better in the future

    both the protein and the ligands should be in PDB files (no check will be done)

    Protein :: HPC_Drug.structures.protein.Protein instance with a valid _ligands value

    return Protein with updated Protein.pdb_file
    """

    protein_file = read_file.read_file(file_name=Protein.pdb_file)

    ligands = Protein.get_ligand_list()

    #get the index of the line with the last ATOM HETATM or TER line
    #and get the resnum of this last residue
    for i in range(len(protein_file) - 1, -1, -1):

        if protein_file[i][0:4] == 'ATOM' or protein_file[i][
                0:6] == 'HETATM' or protein_file[i][0:3] == 'TER':

            #some TER lines are non standard and don't contain the residue number
            try:
                residue_number = int(protein_file[i][22:26].strip())
            except:
                residue_number = int(protein_file[i - 1][22:26].strip())

            index_protein_file = i + 1

            break

    #create the ligands list of strings
    ligand_file = []
    for j in range(len(ligands)):

        residue_number = residue_number + 1

        #update resnum
        ligands[j].resnum = residue_number

        tmp_ligand = read_file.read_file(file_name=ligands[j].pdb_file)

        #update the residue numbers in the file
        for k in range(len(tmp_ligand)):

            tmp_ligand[k] = tmp_ligand[k][:22] + "{0:>4}".format(
                residue_number) + tmp_ligand[k][26:]

        ligand_file = ligand_file + tmp_ligand

    #insert the ligands in the right place of the protein_file list
    protein_file[index_protein_file:index_protein_file] = ligand_file

    #be sure to get the right formatting
    for i in range(len(protein_file)):

        protein_file[i] = protein_file[i].strip('\n') + '\n'

    #overwrite Protein.pdb_file
    write_on_files.write_file(lines=protein_file,
                              file_name=f"{Protein.protein_id}_joined.pdb")

    Protein.pdb_file = path.absolute_filepath(
        path=f"{Protein.protein_id}_joined.pdb")

    return Protein
示例#13
0
def remove_trash_metal_ions(Protein, trash = important_lists.trash_ions):
    """This function removes unwanted metal ions
    that are still inside the structure after it went through prody
    selection (updates Protein.pdb_file)

    This is a brutal function I will need to do a better job
    
    Protein :: HPC_Drug.structures.protein.Protein instance

    Protein.file_type must be pdb or cif otherwise TypeError will be raised

    return Protein
    """

    def determine_pdb(line, trash = trash):

        #if it is a line containing atom coordinates
        if line[0:4] == 'ATOM' or line[0:6] == 'HETATM' or line[0:3] == 'TER':

            bool_output = line[17:20].strip().upper() not in trash

        #if it is not will not be kept anyway
        else:

            bool_output = False

        return bool_output

    def determine_cif(line, trash = trash):

        _line = line.split()

        #if it is a line containing atom coordinates
        if _line[0].strip() == 'ATOM' or _line[0].strip() == 'HETATM':

            bool_output = _line[5].strip().upper() not in trash

        #if it is not will be kept anyway
        else:

            bool_output = True

        return bool_output

    file_name = Protein.pdb_file

    lines = read_file.read_file(file_name = file_name)

    #keeping only the "good" lines
    if Protein.file_type == 'pdb':

        lines[:] = [x for x in lines if determine_pdb(x)]

        lines.append("end")

    elif Protein.file_type == 'cif':

        lines[:] = [x for x in lines if determine_cif(x)]

    else:
        raise TypeError(f"Protein.file_type must be pdb or cif not {Protein.file_type}") 

    for i in range(len(lines)):
        lines[i] = lines[i].strip() + '\n'

    write_on_files.write_file(lines = lines, file_name = file_name)

    Protein.pdb_file = file_name

    return Protein