def select_waters(self): src = 'water.pdb' tgt = self.directory + '/inputfiles/water.pdb' cofactor_coordinates = [] waters_remove = [] waters = {} if self.cofactor[0] != None: for line in self.cofactor: with open(line + '.pdb') as infile: for line in infile: if line.startswith(self.include): line = IO.pdb_parse_in(line) cofactor_coordinates.append( [line[8], line[9], line[10]]) with open(src) as infile, open(tgt, 'w') as outfile: outfile.write('{} SPHERE\n'.format(self.radius)) for line in infile: line = IO.pdb_parse_in(line) coord_wat = [line[8], line[9], line[10]] try: waters[line[6]].append(line) except: waters[line[6]] = [line] for coord_co in cofactor_coordinates: if f.euclidian_overlap(coord_wat, coord_co, 1.6) == True: waters_remove.append(line[6]) for key in waters: if key not in waters_remove: for water in waters[key]: outfile.write(IO.pdb_parse_out(water) + '\n')
def write_pdb_out(self): waters ={'HOH': ['O', 'H1', 'H2'], 'SOL': ['OW1', 'HW1', 'HW2'] } waters_tokeep = [] with open('top_p.pdb') as infile: for line in infile: if line.startswith(self.include): line = IO.pdb_parse_in(line) if line[4].strip() in waters and \ line[2].strip() == waters[line[4].strip()][0]: coord1 = self.center coord2 = [float(line[8]), float(line[9]), float(line[10]) ] if f.euclidian_overlap(coord1, coord2, self.radius) == True: waters_tokeep.append(line[6]) with open('top_p.pdb') as infile, \ open('water.pdb', 'w') as watout, \ open('protein.pdb', 'w') as protout: for line in infile: if line.startswith(self.include): line = IO.pdb_parse_in(line) if line[6] in waters_tokeep: outline = IO.pdb_parse_out(line) + '\n' watout.write(outline) if line[4] not in waters: outline = IO.pdb_parse_out(line) + '\n' protout.write(outline)
def get_CYX(self): cys = [] cyx = [] cys_bond = 2.2 cys_mat = [] i = 0 k = -1 # Reduce coordinate array for chain in self.PDB: for key in self.PDB[chain]: at = self.PDB[chain][key] if at[4] == 'CYS' or at[4] == 'CYX' and at[2].strip() == 'SG': cys.append([at[6], (at[8], at[9], at[10])]) # Construct S-S bond matrix for SG_1 in cys: cys_list = [SG_1[0]] for SG_2 in cys: cys_list.append( f.euclidian_overlap(SG_1[1], SG_2[1], cys_bond)) cys_mat.append(cys_list) # Fix to better handling try: total = len(cys_mat[0]) - 1 for line in cys_mat: k += 1 for j in range(i, total): if cys_mat[i][ j + 1] == True and cys_mat[k][0] != cys_mat[j][0]: cyx.append(cys_mat[k][0]) cyx.append(cys_mat[j][0]) outline = '{:<10}{:<10}{:<10}{:<10}{:<10}{:<10}'.format( self.log['QRESN'][chain][cys_mat[k][0]], self.log['QRESN'][chain][cys_mat[j][0]], cys_mat[k][0], chain, cys_mat[j][0], chain) self.log['CYX'].append(outline) i += 1 for chain in self.PDB: for key in self.PDB[chain]: at = self.PDB[chain][key] if at[6] in cyx and at[4] == 'CYS': self.PDB[chain][key][4] = 'CYX' except: return None
def get_mutations(self): test = [] with open('protein.pdb') as infile: for line in infile: if line.startswith(self.include): line = IO.pdb_parse_in(line) if int(line[6]) in self.CYS: continue if line[4] in self.exclude: continue coord = (line[8], line[9], line[10]) self.prot_coord[line[1]] = [coord, line[6], line[4]] for lig in self.lig: self.liglist.append("'{}'".format(lig)) with open(lig + '.pdb') as infile: for line in infile: if line.startswith(self.include): line = IO.pdb_parse_in(line) coord1 = (line[8], line[9], line[10]) for at in self.prot_coord: coord2 = self.prot_coord[at][0] if f.euclidian_overlap(coord1, coord2, float(self.dist)) \ == True: res = self.prot_coord[at][1] if self.prot_coord[at][2] != 'ALA': mutation = self.prot_coord[at][2] + str( self.mapping[res]) + 'A' else: mutation = self.prot_coord[at][2] + str( self.mapping[res]) + 'G' if mutation not in self.mutations: self.mutations.append(mutation)
def prepwizard_parse(self): if self.origin == 'gromacs': with open(self.prot) as infile, \ open(self.prot[:-4] + '_noH.pdb', 'w') as outfile: for line in infile: tmp = line if line.startswith(self.include) == False: continue line = IO.pdb_parse_in(line) if tmp[13] == 'H': # and line[4] != 'SOL': write = False else: write = True # Change residue name of waters if line[4] == 'SOL': line[4] = 'HOH' if line[2] == 'OW': line[2] = 'O' coord1 = self.center coord2 = [ float(line[8]), float(line[9]), float(line[10]) ] write = f.euclidian_overlap( coord1, coord2, self.radius + 5) if self.water != False: line_out = IO.pdb_parse_out(line) else: continue elif line[4] == 'ILE' and line[2] == 'CD': line[2] = 'CD1' line_out = IO.pdb_parse_out(line) elif line[4] == 'CL-': continue line_out = IO.pdb_parse_out(line) # Get the charges from the hydrogen connections # NOTE: this might be more common and thus less lines of code might be # needed, check when implementing MolProbity!! if line[4] in IO.charged_res: if line[2] in IO.charged_res[line[4]]: if line[5] not in self.original_charges: self.original_charges[line[5]] = {} if line[2] not in IO.charged_res[line[4]]: self.original_charges[line[5]][line[6]] = 'HIP' else: self.original_charges[line[5]][line[6]] = \ IO.charged_res[line[4]][line[2]] if write == True: outfile.write(line_out + '\n') elif self.origin == 'maestro': with open(self.prot) as infile, \ open(self.prot[:-4] + '_noH.pdb', 'w') as outfile: for line in infile: if line.startswith(self.include): line = IO.pdb_parse_in(line) if line[2][0] != 'H': outline = IO.pdb_parse_out(line) outfile.write(outline + '\n') # Get the charges from the hydrogen connections if line[4] in IO.charged_res: if line[2] in IO.charged_res[line[4]]: if line[5] not in self.original_charges: self.original_charges[line[5]] = {} if line[2] not in IO.charged_res[line[4]]: self.original_charges[line[5]][ line[6]] = 'HIP' else: self.original_charges[line[5]][line[ 6]] = IO.charged_res[line[4]][line[2]]
def decharge(self): charged_res = { 'GLU': ['GLH', 'CD', -1], 'ASP': ['ASH', 'CG', -1], 'ARG': ['ARN', 'CZ', 1], 'LYS': ['LYN', 'NZ', 1], 'HIP': ['HID', 'CG', 1] } coord1 = self.center decharge = {} # Distance for decharging residues in boundary rest_bound = float(self.radius) - 3.0 for chain in self.PDB: for key in self.PDB[chain]: at = self.PDB[chain][key] if at[4] in charged_res: if at[2].strip() == charged_res[at[4]][1]: coord2 = [float(at[8]), float(at[9]), float(at[10])] if f.euclidian_overlap(coord1, coord2, rest_bound) == False: if not at[5] in decharge: decharge[at[5]] = [at[6]] else: decharge[at[5]].append(at[6]) outline = '{:<10}{:<10}{:<10}{:<10}'.format( self.log['QRESN'][chain][at[6]], at[6], chain, at[4]) self.log['DECHARGE'].append(outline) # Check if the decharged residue is part of a salt bridge and # neutralize this residue as well for chain in self.PDB: for key in self.PDB[chain]: at = self.PDB[chain][key] if chain not in decharge: continue if at[6] in decharge[chain] and at[2].strip() == charged_res[ at[4]][1]: coord1 = [float(at[8]), float(at[9]), float(at[10])] for chain2 in self.PDB: for key2 in self.PDB[chain2]: at_2 = self.PDB[chain2][key2] if at_2[4] in charged_res: if at_2[2].strip() == charged_res[at_2[4]][1]: coord2 = [ float(at_2[8]), float(at_2[9]), float(at_2[10]) ] if at == at_2: continue if at_2[6] in decharge[chain]: continue if at_2[6] in decharge[chain2]: continue if f.euclidian_overlap( coord1, coord2, 4.0) == True: decharge[chain2].append(at_2[6]) outline = '{:<10}{:<10}{:<10}{:<10}'.format( self.log['QRESN'][chain2][at_2[6]], at_2[6], chain2, at_2[4]) self.log['DECHARGE'].append(outline) # Get the charged residues in the sphere and the total charge of these residues in the sphere for chain in self.PDB: for key in self.PDB[chain]: at = self.PDB[chain][key] if chain not in decharge: continue if at[6] in decharge[chain]: at[4] = charged_res[at[4]][0] continue else: # at[4] in charged_res: if at[4] in charged_res: if at[2].strip() == charged_res[at[4]][1]: self.log['CHARGE'].append( '{:<10}{:<10}{:<10}{:<10}'.format( self.log['QRESN'][chain][at[6]], at[6], chain, at[4])) self.log['TOTAL_CHARGE'] += charged_res[at[4]][2]