def main(): from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("--ftfile-line", "-n", type=int, default=0, help="0 indexed") parser.add_argument("--limit", "-l", type=int, default=None, help="limit for number of ft lines read") parser.add_argument("ligand", help="ligand used for docking") parser.add_argument("ft_file", help="ftresults file to draw transformations from.") parser.add_argument( "rot_file", help="Rotation file used during PIPER run to make ft_file.") parser.add_argument("--ft_limit", help="Speficy how many ftfile lines to read") args = parser.parse_args() rotations = read_rotations(args.rot_file) #read ft_files ftresults = read_ftresults(args.ft_file, args.limit) #get center information for original ligands lig_orig = parsePDB(args.ligand) lig_center = np.mean(lig_orig.getCoords(), axis=0) lig_name = args.ligand lig_base = lig_name.rsplit('/', 1)[-1][:-4] #apply ft lines to the aligned ligands coords_apply = apply_ftresults_atom_group(lig_orig, ftresults, rotations, center=lig_center) #create new atom group for translated ligand lig_new = AtomGroup('%s.%s.pdb' % (lig_base, args.ftfile_line)) #only choose one coordinate set (0 indexed) coords_apply.setACSIndex(args.ftfile_line) lig_new.setCoords(coords_apply.getCoords()) lig_new.setNames(lig_orig.getNames()) lig_new.setResnames(lig_orig.getResnames()) lig_new.setResnums(lig_orig.getResnums()) #write new ligand file writePDB("%s.%s.pdb" % (lig_base, args.ftfile_line), lig_new)
def _get_box_from_ag_coe(self, ag: prody.AtomGroup): coords = ag.getCoords() cmax, cmin = coords.max(0), coords.min(0) coe = (cmax + cmin) / 2 size = self._get_box_size_from_ag_and_center(ag, coe) origin = coe - size * self.cell / 2 return origin.astype(float), size.astype(int) + 1
def get_alpha_indices(protein: pd.AtomGroup) -> List[int]: """ Get indices of alpha carbons of pd AtomGroup object """ return [ i for i, a in enumerate(protein.iterAtoms()) if a.getName() == "CA" ]
def from_prody_atomgroup( cls, name: ProteinKey, protein: pd.AtomGroup, split_type: SplitType = SplitType.KMER, split_size: int = 16, selection: str = "calpha", upsample_rate: int = 50, moment_types: List[MomentType] = ( MomentType.O_3, MomentType.O_4, MomentType.O_5, MomentType.F, ), ): """ Construct MomentInvariants instance from a ProDy AtomGroup object. Selects according to `selection` string, (default = alpha carbons) `moment_types` determines which moments are calculated. Example -------- >>> invariants = MomentInvariants.from_prody_atomgroup(atom_group, split_type=SplitType.RADIUS, moment_types=[MomentType.O_3, MomentType.F, MomentType.phi_7, MomentType.phi_12]) """ protein: pd.AtomGroup = protein.select("protein").select(selection) coordinates: np.ndarray = protein.getCoords() residue_splits = group_indices(protein.getResindices()) shape = cls( name, len(residue_splits), coordinates, residue_splits, protein.getIndices(), sequence=protein.getSequence(), split_type=split_type, split_size=split_size, upsample_rate=upsample_rate, moment_types=moment_types, ) shape._split(split_type) return shape
def get_beta_indices(protein: pd.AtomGroup) -> List[int]: """ Get indices of beta carbons of pd AtomGroup object (If beta carbon doesn't exist, alpha carbon index is returned) """ residue_splits = group_indices(protein.getResindices()) i = 0 indices = [] for split in residue_splits: ca = None cb = None for _ in split: if protein[i].getName() == "CB": cb = protein[i].getIndex() if protein[i].getName() == "CA": ca = protein[i].getIndex() i += 1 if cb is not None: indices.append(cb) else: assert ca is not None indices.append(ca) return indices
def _get_com(self, ag: prody.AtomGroup): return ag.getCoords().mean(axis=0)
def _get_coe(self, ag: prody.AtomGroup): coords = ag.getCoords() cmax, cmin = coords.max(0), coords.min(0) coe = (cmax + cmin) / 2 return coe
def _get_box_from_ag_com(self, ag: prody.AtomGroup): coords = ag.getCoords() com = coords.mean(axis=0) size = self._get_box_size_from_ag_and_center(ag, com) origin = com - size * self.cell / 2 return origin.astype(float), size.astype(int) + 1
def DefineNewAtom(atom_name, element, coordinates, resname, resnum, chain_id): """ This function creates a new AtomGroup instance containing one atom. """ new_atom = AtomGroup() new_atom.setNames([atom_name]) new_atom.setElements([element]) new_atom.setCoords([coordinates]) new_atom.setResnames(resname) new_atom.setResnums(resnum) new_atom.setChids(chain_id) new_atom.setAltlocs(['']) new_atom.setBetas([0]) new_atom.setIcodes(['']) new_atom.setOccupancies([1]) new_atom.setSegnames(['']) new_atom.setSerials([0]) # new_atom.setAnisous([[0.0, 0.0, 0.0]]) return new_atom
def AddAtoms(initial_residue, atoms2add, atomnames_of_2_letters, residue_data, zmatrix, verbose=True): """ This function returns a mutated residue with the atoms added. This function gets a residue and a list of atomnames to add to the mutation. :rtype : object :param initial_residue: :param atoms2add: :param atomnames_of_2_letters: :param residue_data: :param zmatrix: :param verbose: """ if verbose: print 'Adding new atoms:' new_atoms_elements = [] for atom in atoms2add: if verbose: print " # {}".format(atom) if atom[:2] in atomnames_of_2_letters: new_atoms_elements.append(atom[:2]) else: new_atoms_elements.append(atom[0]) new_atoms = AtomGroup('New atoms') number_of_new_atoms = len(atoms2add) new_atoms.setNames(list(atoms2add)) new_atoms.setElements(new_atoms_elements) new_atoms.setResnames([residue_data['fin_resname']] * number_of_new_atoms) new_atoms.setResnums([residue_data['resnum']] * number_of_new_atoms) new_atoms.setChids([residue_data['chain']] * number_of_new_atoms) new_atoms.setAltlocs([''] * number_of_new_atoms) new_atoms.setBetas([0] * number_of_new_atoms) new_atoms.setIcodes([''] * number_of_new_atoms) new_atoms.setOccupancies([1] * number_of_new_atoms) new_atoms.setSegnames([''] * number_of_new_atoms) new_atoms.setSerials([0] * number_of_new_atoms) temporary_coords = np.ones([len(atoms2add), 3]) new_atoms.setCoords(temporary_coords) incomplete_residue = initial_residue + new_atoms for atom in new_atoms.iterAtoms(): atom_new_coordinates = GenerateCoordinatesFromZmatrix(incomplete_residue, [atom.getName()], zmatrix) atom.setCoords(atom_new_coordinates) incomplete_residue = initial_residue + new_atoms if verbose: print 'Done' return initial_residue + new_atoms
def AddHfromPRO(initial_residue, zmatrix, mutation): new_atom = AtomGroup('Hydrogen') new_atom.setNames(['H']) new_atom.setElements(['H']) new_atom.setResnames([zmatrix.Name]) new_atom.setResnums([mutation['resnum']]) new_atom.setChids([mutation['chain']]) new_atom.setAltlocs(['']) new_atom.setBetas([0]) new_atom.setIcodes(['']) new_atom.setOccupancies([1]) new_atom.setSegnames(['']) new_atom.setSerials([0]) new_atom.setCoords(ModifyCoordinatesPRO(initial_residue, zmatrix, 'H', 'CD')) return initial_residue + new_atom
def get_alpha_indices(protein: pd.AtomGroup) -> List[int]: """ Get indices of alpha carbons of pd AtomGroup object """ return [a.getIndex() for a in protein.iterAtoms() if a.getName() == "CA"]
def visualizemapApp(inp_file, out_file, sel_type, atom_group: prody.AtomGroup, vmin_fltr, vmax_fltr, dmin_fltr, dmax_fltr, cyl_rad): ########################################################################## selectedAtoms = atom_group.select('protein and name CA') ########################################################################## minColorBarLimit = 0.0 maxColorBarLimit = 1.0 # Read data file and assign to a numpy array if sel_type.lower() == "ndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) # Check the data range in the matrix. minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) if minCorrelationValue < 0.0: # Assume that it is an nDCC file minColorBarLimit = -1.0 if maxCorrelationValue > 1.00001: print("This correlation map is not normalized!") # TODO: At this point, one can ask the user if s/he wants to normalize it! sys.exit(-1) else: maxColorBarLimit = 1.0 elif sel_type.lower() == "dcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) # Check the data range in the matrix. minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue elif sel_type.lower() == "absndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False)) else: ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float)) minColorBarLimit = 0.0 maxColorBarLimit = 1.0 elif sel_type.lower() == "lmi": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue #minColorBarLimit = 0.0 elif sel_type.lower() == "nlmi": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False) #minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = 0.0 # Ideally, it is supposed to be 1 but I used 1.00001 to avoid # rounding problems if maxCorrelationValue > 1.00001: print("This LMI map is not normalized!") # TODO: At this point, one can ask the user if s/he wants to normalize it! sys.exit(-1) else: maxColorBarLimit = 1.0 elif sel_type.lower() == "coeviz": ccMatrix = np.loadtxt(inp_file, dtype=float) minColorBarLimit = 0.0 maxColorBarLimit = 1.0 elif sel_type.lower() == "evcouplings": ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue elif sel_type.lower() == "generic": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue elif sel_type.lower() == "eg": # The data type is elasticity graph ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \ writeAllOutput=False) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue else: print( "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n" ) print( "@> coeviz or evcouplings. If you have your data in full \n" ) print( "@> matrix format and your data type is none of the options\n" ) print("@> mentionned, you can set data type 'generic'.\n") sys.exit(-1) # Set vmin_fltr and vmax_fltr if (vmin_fltr == None): vmin_fltr = minColorBarLimit if (vmax_fltr == None): vmax_fltr = maxColorBarLimit print(f"""@> Min. value filter: {vmin_fltr}""") print(f"""@> Max. value filter: {vmax_fltr}""") ########################################################################## # Call overall correlation calculation overallCorrelationMap(ccMatrix, minColorBarLimit, maxColorBarLimit, out_file, " ", selectedAtoms) plotDistributions = True VMDcylinderRadiusScale = 0.5 PMLcylinderRadiusScale = 0.3 if (cyl_rad == None): if sel_type.lower() == "evcouplings": VMDcylinderRadiusScale = 0.02 PMLcylinderRadiusScale = 0.02 else: VMDcylinderRadiusScale = 0.5 PMLcylinderRadiusScale = 0.3 print(f"""@> VMD Cylinder radius: {VMDcylinderRadiusScale}""") print(f"""@> PyMol Cylinder radius: {PMLcylinderRadiusScale}""") else: VMDcylinderRadiusScale = float(cyl_rad) PMLcylinderRadiusScale = float(cyl_rad) print(f"""@> Cylinder radius: {cyl_rad}""") if plotDistributions: if sel_type.lower() == "ndcc": distanceDistribution(ccMatrix, out_file, "nDCC", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "dcc": distanceDistribution(ccMatrix, out_file, "DCC", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "absndcc": distanceDistribution(ccMatrix, out_file, "Abs(nDCC)", selectedAtoms, absoluteValues=True, writeAllOutput=False) elif sel_type.lower() == "lmi": distanceDistribution(ccMatrix, out_file, "LMI", selectedAtoms, absoluteValues=True, writeAllOutput=True) elif sel_type.lower() == "nlmi": distanceDistribution(ccMatrix, out_file, "nLMI", selectedAtoms, absoluteValues=True, writeAllOutput=True) elif sel_type.lower() == "coeviz": distanceDistribution(ccMatrix, out_file, "CoeViz", selectedAtoms, absoluteValues=True, writeAllOutput=True) elif sel_type.lower() == "evcouplings": distanceDistribution(ccMatrix, out_file, "EVcoupling Score", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "generic": distanceDistribution(ccMatrix, out_file, "Correlation", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "eg": distanceDistribution(ccMatrix, out_file, "Force Constants", selectedAtoms, absoluteValues=False, writeAllOutput=True) else: print("Warning: Unknows correlation data.\n") print(" Correlations can be dcc, ndcc, absndcc, lmi,\n") print(" nlmi, coeviz or evcouplings!\n") ########################################################################## # Check number of chains. If there are multiple chains, plot inter and # intra chain correlations chains = Counter(selectedAtoms.getChids()).keys() saveMatrix = False plotChains = True if len(chains) > 1 and plotChains: intraChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit, out_file, " ", selectedAtoms, saveMatrix) interChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit, out_file, " ", selectedAtoms, saveMatrix) # Here, we can filter some correlation values closer than a distance. # Typically, it is supposed to filter out the correlation within the # same secondary structure etc. filterByDistance = True if filterByDistance: disMinValue = float(dmin_fltr) disMaxValue = float(dmax_fltr) ccMatrix = filterCorrelationMapByDistance(ccMatrix, out_file, " ", selectedAtoms, disMinValue, disMaxValue, absoluteValues=False, writeAllOutput=False) # Overall projection projectCorrelationsOntoProteinVMD( out_file, ccMatrix, out_file, selectedAtoms, vminFilter=float(vmin_fltr), vmaxFilter=float(vmax_fltr), cylinderRadiusScaler=VMDcylinderRadiusScale, absoluteValues=True, writeAllOutput=True) projectCorrelationsOntoProteinPyMol( out_file, ccMatrix, out_file, selectedAtoms, vminFilter=float(vmin_fltr), vmaxFilter=float(vmax_fltr), cylinderRadiusScaler=PMLcylinderRadiusScale, absoluteValues=True, writeAllOutput=True)