def _edit_top(ligand_top, only_solvent_top): only_solvent_lines = read_file.read_file(file_name=only_solvent_top) for i in range(len(only_solvent_lines) - 1, -1, -1): if only_solvent_lines[i].strip(): solvent_line = only_solvent_lines[i].rstrip() break del only_solvent_lines ligand_lines = read_file.read_file(file_name=ligand_top) for i in range(len(ligand_lines) - 1, -1, -1): if ligand_lines[i].strip(): ligand_lines[i] = solvent_line + '\n' + ligand_lines[i] break write_on_files.write_file(lines=ligand_lines, file_name=ligand_top)
def test_wrong_input(self): wrong_inputs = (1, 1.5) with self.assertRaises(TypeError): for i in wrong_inputs: read_file.read_file(file_name = i)
def test_with_file(self): file_name = "tests/files4tests/file_read_file.txt" lines = read_file.read_file(file_name = file_name) self.assertEqual(lines, ["aaa\n", "BBB\n", "111\n", "2.2.2\n"])
def add_chain_id(pdb_file, chain="A"): """ This is a patch because orac and primadorac remove the chain id from pdb files and this confuses some pdb parsers (works on PDB files only) pdb_file :: string, the pdb file to edit chain :: string, default A, the chain id to add to the pdb_file returns nothing """ chain = chain.upper().strip() lines = read_file.read_file(file_name=pdb_file) for i in range(len(lines)): if lines[i][0:4] == 'ATOM' or lines[i][0:6] == 'HETATM' or lines[i][ 0:3] == 'TER': lines[i] = lines[i][:20] + "{0:>2}".format(chain) + lines[i][22:] lines[i] = lines[i].strip('\n') + '\n' write_on_files.write_file(lines=lines, file_name=pdb_file)
def _get_ligand_name_from_tpg(self): """ private Gets the name of the ligand from the tpg file for any given ligand """ Ligand = self.Protein.get_ligand_list() if Ligand == [] or Ligand == None: return "" residue_strings = [] for ligand in Ligand: lines = read_file.read_file(file_name=ligand.tpg_file) for line in lines: if 'RESIDUE' in line: string = f" {line.split()[1].strip()} !! ligand name in tpg file" residue_strings.append(string) break else: raise RuntimeError( f'Could not find the residue name in {ligand.tpg_file}') return '\n'.join(residue_strings)
def _change_absolute_ligand_itp_include_in_relative(self, top_file): """ private changes the #include of the ligand itp file in the protein top from the absolute one in the relative one in the given topology file in order to get a meaningful #include in the self.HREM_dir directory that will be copied on a different Computer (the HPC cluster) """ lines = read_file.read_file(file_name = top_file) ligand_itp_files = [] for lgand in self.Protein.get_ligand_list(): ligand_itp_files.append(lgand.itp_file) for i in range(len(lines)): if lines[i].strip()[:8] == "#include": if lines[i].split()[1].replace('"', '').strip() in ligand_itp_files or 'DUM' in lines[i]: itp_file = lines[i].split()[1].replace('"', '').strip() itp_file = itp_file.split("/")[-1] lines[i] = f'#include "{itp_file}"\n' write_on_files.write_file(lines = lines, file_name = top_file)
def _get_ligand_name_from_tpg(self, Ligand): """ private Gets the name of the ligand from the tpg file """ lines = read_file.read_file(file_name=Ligand.tpg_file) for line in lines: if 'RESIDUE' in line: string = f" {line.split()[1].strip()} !! ligand name in tpg file" break else: raise RuntimeError( f'Could not find the residue name in {Ligand.tpg_file}') return string
def _edit_itp(self, ligand_resname, itp_file): """ private primadorac itp call any lignd LIG i change it to the ligand_resname and removes the first 9 lines of the file (they make gromacs fail) """ lines = read_file.read_file(file_name=itp_file) del lines[0:9] for i in range(len(lines)): lines[i] = lines[i].replace('LIG', ligand_resname) lines[i] = lines[i].replace('name-p', ligand_resname) lines[i] = lines[i].strip('\n') lines[i] = lines[i] + '\n' write_on_files.write_file(lines=lines, file_name=itp_file) return path.absolute_filepath(path=itp_file)
def _edit_top_file(self): """ Private Adds the needed #include and other informations to the protein top file in order to include the ligand """ Ligand = self.Protein.get_ligand_list() top = read_file.read_file(file_name=self.Protein.top_file) itp_insertion_string = '' for lgand in Ligand: itp_insertion_string = itp_insertion_string + f'#include "{lgand.itp_file}"\n' compound_string = f'{lgand.resname} 1 \n' top.append(compound_string) for i in range(len(top)): if top[i].strip() == "": pass elif top[i].strip()[0] == ";": pass elif top[i].strip()[0:8] == "#include": top[i] = top[i] + '\n' + itp_insertion_string + '\n' break elif top[i].strip().replace(" ", "").split(";")[0] == '[moleculetype]': top[i] = itp_insertion_string + '\n' + top[i] break write_on_files.write_file(lines=top, file_name=self.output_top_file) self.Protein.top_file = self.output_top_file return self.Protein
def execute(self): if self.HREM_dir != os.getcwd(): os.chdir(self.HREM_dir) fsdam_dir = "RESTART" os.makedirs(fsdam_dir, exist_ok=True) os.makedirs(f'{fsdam_dir}/not_used_frames', exist_ok=True) #making some defaults useful_info = { "ligand_resname": 'LIG', "top_file": "topol.top", "ligand_itp": "LIG.itp", f"only_solvent_gro": None, f"only_solvent_top": None } lines = read_file.read_file(file_name="important_info.dat") for line in lines: line = line.strip() if line: line = line.split("=") useful_info[line[0].strip()] = line[1].strip() starting_configurations = self._create_restart_configs( fsdam_dir=fsdam_dir) starting_configurations = self._select_frames_to_use( starting_configurations, not_used_dir=f'{fsdam_dir}/not_used_frames') if self.creation: self._add_water_box(useful_info['only_solvent_gro'], starting_configurations) shutil.copy(useful_info["top_file"], 'ligand_solvent_topology.top') useful_info["top_file"] = 'ligand_solvent_topology.top' self._edit_top(ligand_top=useful_info["top_file"], only_solvent_top=useful_info["only_solvent_top"]) output_dictionary = self._make_input_files( ligand_resname=useful_info["ligand_resname"], top_file=useful_info["top_file"], starting_structures=starting_configurations, ) #writes the TPR scripts in the FSDAM dir self._make_TPR_files_script( q_lines=output_dictionary['make_q_tpr'], vdw_lines=output_dictionary['make_vdw_tpr'], fsdamdir=fsdam_dir) #lazily copy all the mdp, itp and top files in the new dir for i in os.listdir(os.getcwd()): if i[-4:] in ('.mdp', '.itp', '.top'): shutil.copy(i, fsdam_dir) #write a suggestion of run file with open(f'{fsdam_dir}/RUN_Q.sh', 'w') as f: number_of_runs = len(output_dictionary['run_q']) f.write(f'# there are {number_of_runs} runs to do\n') f.write('# I suggest to use one GPU per run\n\n\n') f.write('\n'.join(output_dictionary['run_q'])) with open(f'{fsdam_dir}/RUN_VDW.sh', 'w') as f: number_of_runs = len(output_dictionary['run_vdw']) f.write(f'# there are {number_of_runs} runs to do\n') f.write( '# I suggest to use one GPU per run or even better a job array\n\n\n' ) f.write('\n'.join(output_dictionary['run_vdw']))
def _edit_top_file(self, top_file = None): """PRIVATE""" if top_file is None: top_file = self.elaborated_top_file #get the hot residues hot_residues = self.orient.get_hot_residues_for_rem(Protein = self.Protein, Ligand = self.Protein.get_ligand_list(), cutoff = 4.5, residue_dist = 10.0) hot_ids = [] for residue in hot_residues: hot_ids.append(str(residue[1]).strip()) #get the ligand resnames ligands_resnames = [] for lgand in self.Protein.get_ligand_list(): ligands_resnames.append(lgand.resname) #read the topology lines = read_file.read_file(file_name = top_file) #heat the right atoms #auxiliary bool variable is_atoms = False for i in range(len(lines)): #if the line is empty or a comment i can go on with the for loop if lines[i].strip() != "": if lines[i].strip()[0] != ";": tmp_line = lines[i].strip().split() else: continue else: continue #check if we are in the atoms part if lines[i].strip().replace(" ", "").split(";")[0] == "[atoms]": is_atoms = True continue #end of atoms part elif tmp_line[0][0] == "[": is_atoms = False continue if is_atoms: if len(tmp_line) >= 4: #do not heat solvent if tmp_line[3].strip() == "SOL": continue #heat the ligand if tmp_line[3].strip() in ligands_resnames: tmp_line[1] = tmp_line[1] + "_" lines[i] = " ".join(tmp_line) + "\n" #heat the near residues elif tmp_line[2].strip() in hot_ids: tmp_line[1] = tmp_line[1] + "_" lines[i] = " ".join(tmp_line) + "\n" write_on_files.write_file(lines = lines, file_name = top_file)
def merge_pdb(Protein): """ Will put all the given ligands after the protein and update the ligand resnums this function is brutal and memory consuming I should do it better in the future both the protein and the ligands should be in PDB files (no check will be done) Protein :: HPC_Drug.structures.protein.Protein instance with a valid _ligands value return Protein with updated Protein.pdb_file """ protein_file = read_file.read_file(file_name=Protein.pdb_file) ligands = Protein.get_ligand_list() #get the index of the line with the last ATOM HETATM or TER line #and get the resnum of this last residue for i in range(len(protein_file) - 1, -1, -1): if protein_file[i][0:4] == 'ATOM' or protein_file[i][ 0:6] == 'HETATM' or protein_file[i][0:3] == 'TER': #some TER lines are non standard and don't contain the residue number try: residue_number = int(protein_file[i][22:26].strip()) except: residue_number = int(protein_file[i - 1][22:26].strip()) index_protein_file = i + 1 break #create the ligands list of strings ligand_file = [] for j in range(len(ligands)): residue_number = residue_number + 1 #update resnum ligands[j].resnum = residue_number tmp_ligand = read_file.read_file(file_name=ligands[j].pdb_file) #update the residue numbers in the file for k in range(len(tmp_ligand)): tmp_ligand[k] = tmp_ligand[k][:22] + "{0:>4}".format( residue_number) + tmp_ligand[k][26:] ligand_file = ligand_file + tmp_ligand #insert the ligands in the right place of the protein_file list protein_file[index_protein_file:index_protein_file] = ligand_file #be sure to get the right formatting for i in range(len(protein_file)): protein_file[i] = protein_file[i].strip('\n') + '\n' #overwrite Protein.pdb_file write_on_files.write_file(lines=protein_file, file_name=f"{Protein.protein_id}_joined.pdb") Protein.pdb_file = path.absolute_filepath( path=f"{Protein.protein_id}_joined.pdb") return Protein
def remove_trash_metal_ions(Protein, trash = important_lists.trash_ions): """This function removes unwanted metal ions that are still inside the structure after it went through prody selection (updates Protein.pdb_file) This is a brutal function I will need to do a better job Protein :: HPC_Drug.structures.protein.Protein instance Protein.file_type must be pdb or cif otherwise TypeError will be raised return Protein """ def determine_pdb(line, trash = trash): #if it is a line containing atom coordinates if line[0:4] == 'ATOM' or line[0:6] == 'HETATM' or line[0:3] == 'TER': bool_output = line[17:20].strip().upper() not in trash #if it is not will not be kept anyway else: bool_output = False return bool_output def determine_cif(line, trash = trash): _line = line.split() #if it is a line containing atom coordinates if _line[0].strip() == 'ATOM' or _line[0].strip() == 'HETATM': bool_output = _line[5].strip().upper() not in trash #if it is not will be kept anyway else: bool_output = True return bool_output file_name = Protein.pdb_file lines = read_file.read_file(file_name = file_name) #keeping only the "good" lines if Protein.file_type == 'pdb': lines[:] = [x for x in lines if determine_pdb(x)] lines.append("end") elif Protein.file_type == 'cif': lines[:] = [x for x in lines if determine_cif(x)] else: raise TypeError(f"Protein.file_type must be pdb or cif not {Protein.file_type}") for i in range(len(lines)): lines[i] = lines[i].strip() + '\n' write_on_files.write_file(lines = lines, file_name = file_name) Protein.pdb_file = file_name return Protein