def _fillMolecule(name, resname, chain, resid, insertion, coords, segid, element, occupancy, beta, charge, record): numAtoms = len(name) mol = Molecule() mol.empty(numAtoms) mol.name = np.array(name, dtype=mol._dtypes['name']) mol.resname = np.array(resname, dtype=mol._dtypes['resname']) mol.chain = np.array(chain, dtype=mol._dtypes['chain']) mol.resid = np.array(resid, dtype=mol._dtypes['resid']) mol.insertion = np.array(insertion, dtype=mol._dtypes['insertion']) mol.coords = np.array(np.atleast_3d(np.vstack(coords)), dtype=mol._dtypes['coords']) mol.segid = np.array(segid, dtype=mol._dtypes['segid']) mol.element = np.array(element, dtype=mol._dtypes['element']) mol.occupancy = np.array(occupancy, dtype=mol._dtypes['occupancy']) mol.beta = np.array(beta, dtype=mol._dtypes['beta']) # mol.charge = np.array(charge, dtype=mol._dtypes['charge']) # mol.record = np.array(record, dtype=mol._dtypes['record']) return mol
def _applyProteinCaps(mol, caps): # AMBER capping # ============= # This is the (horrible) way of adding caps in tleap: # For now, this is hardwired for ACE and NME # 1. Change one of the hydrogens of the N terminal (H[T]?[123]) to the ACE C atom, giving it a new resid # 1a. If no hydrogen present, create the ACE C atom. # 2. Change one of the oxygens of the C terminal ({O,OT1,OXT}) to the NME N atom, giving it a new resid # 2a. If no oxygen present, create the NME N atom. # 3. Reorder to put the new atoms first and last # 4. Remove the lingering hydrogens of the N terminal and oxygens of the C terminal. # Define the atoms to be replaced (0 and 1 corresponds to N- and C-terminal caps) terminalatoms = { 'ACE': ['H1', 'H2', 'H3', 'HT1', 'HT2', 'HT3'], 'NME': ['OXT', 'OT1', 'O'] } # XPLOR names for H[123] and OXT are HT[123] # and OT1, respectively. capresname = ['ACE', 'NME'] capatomtype = ['C', 'N'] # For each caps definition for seg in caps: prot = mol.atomselect( 'protein' ) # Can't move this out since we remove atoms in this loop # Get the segment segment = np.where(mol.segid == seg)[0] # Test segment if len(segment) == 0: raise RuntimeError( 'There is no segment {} in the molecule.'.format(seg)) if not np.any(prot & (mol.segid == seg)): raise RuntimeError( 'Segment {} is not protein. Capping for non-protein segments is not supported.' .format(seg)) # For each cap passed = False for i, cap in enumerate(caps[seg]): if cap is None or (isinstance(cap, str) and cap == 'none'): continue # Get info on segment and its terminals segidm = mol.segid == seg # Mask for segid segididx = np.where(segidm)[0] resids = mol.resid[segididx] terminalids = [segididx[0], segididx[-1]] terminalresids = [resids[0], resids[-1]] residm = mol.resid == terminalresids[i] # Mask for resid if not passed: orig_terminalresids = terminalresids passed = True if cap is None or cap == '': # In case there is no cap defined logger.warning( 'No cap provided for resid {} on segment {}. Did not apply it.' .format(terminalresids[i], seg)) continue elif cap not in capresname: # If it is defined, test if supported raise RuntimeError( 'In segment {}, the {} cap is not supported. Try using {} instead.' .format(seg, cap, capresname)) # Test if cap is already applied testcap = np.where(segidm & residm & (mol.resname == cap))[0] if len(testcap) != 0: logger.warning( 'Cap {} already exists on segment {}. Did not re-apply it.' .format(cap, seg)) continue # Test if the atom to change exists termatomsids = np.zeros(residm.shape, dtype=bool) for atm in terminalatoms[cap]: termatomsids |= mol.name == atm termatomsids = np.where(termatomsids & segidm & residm)[0] if len(termatomsids) == 0: # Create new atom termcaid = np.where(segidm & residm & (mol.name == 'CA'))[0] termcenterid = np.where(segidm & residm & (mol.name == capatomtype[1 - i]))[0] atom = Molecule() atom.empty(1) atom.record = np.array(['ATOM'], dtype=Molecule._dtypes['record']) atom.name = np.array([capatomtype[i]], dtype=Molecule._dtypes['name']) atom.resid = np.array([terminalresids[i] - 1 + 2 * i], dtype=Molecule._dtypes['resid']) atom.resname = np.array([cap], dtype=Molecule._dtypes['resname']) atom.segid = np.array([seg], dtype=Molecule._dtypes['segid']) atom.element = np.array([capatomtype[i]], dtype=Molecule._dtypes['element']) atom.chain = np.array( [np.unique(mol.chain[segidm])], dtype=Molecule._dtypes['chain'] ) # TODO: Assumption of single chain in a segment might be wrong atom.coords = mol.coords[termcenterid] + 0.33 * np.subtract( mol.coords[termcenterid], mol.coords[termcaid]) mol.insert(atom, terminalids[i]) # logger.info('In segment {}, resid {} had none of these atoms: {}. Capping was performed by creating ' # 'a new atom for cap construction by tleap.'.format(seg, terminalresids[i], # ' '.join(terminalatoms[cap]))) else: # Select atom to change, do changes to cap, and change resid newatom = np.max(termatomsids) mol.set('resname', cap, sel=newatom) mol.set('name', capatomtype[i], sel=newatom) mol.set('element', capatomtype[i], sel=newatom) mol.set('resid', terminalresids[i] - 1 + 2 * i, sel=newatom) # if i=0 => resid-1; i=1 => resid+1 # Reorder neworder = np.arange(mol.numAtoms) neworder[newatom] = terminalids[i] neworder[terminalids[i]] = newatom _reorderMol(mol, neworder) # For each cap for i, cap in enumerate(caps[seg]): if cap is None or (isinstance(cap, str) and cap == 'none'): continue # Remove lingering hydrogens or oxygens in terminals mol.remove('segid {} and resid "{}" and name {}'.format( seg, orig_terminalresids[i], ' '.join(terminalatoms[cap])), _logger=False) # Remove terminal hydrogens regardless of caps or no caps. tleap confuses itself when it makes residues into terminals. # For example HID has an atom H which in NHID should become H[123] and crashes tleap. for seg in np.unique(mol.get('segid', 'protein')): segidm = mol.segid == seg # Mask for segid segididx = np.where(segidm)[0] resids = mol.resid[segididx] mol.remove('(resid "{}" "{}") and segid {} and hydrogen'.format( resids[0], resids[-1], seg), _logger=False)
def _applyProteinCaps(mol, caps): # AMBER capping # ============= # This is the (horrible) way of adding caps in tleap: # For now, this is hardwired for ACE and NME # 1. Change one of the hydrogens of the N terminal (H[T]?[123]) to the ACE C atom, giving it a new resid # 1a. If no hydrogen present, create the ACE C atom. # 2. Change one of the oxygens of the C terminal ({O,OT1,OXT}) to the NME N atom, giving it a new resid # 2a. If no oxygen present, create the NME N atom. # 3. Reorder to put the new atoms first and last # 4. Remove the lingering hydrogens of the N terminal and oxygens of the C terminal. # Define the atoms to be replaced (0 and 1 corresponds to N- and C-terminal caps) terminalatoms = { "ACE": ["H1", "H2", "H3", "HT1", "HT2", "HT3"], "NME": ["OXT", "OT1", "O"], } # XPLOR names for H[123] and OXT are HT[123] # and OT1, respectively. capresname = ["ACE", "NME"] capatomtype = ["C", "N"] # For each caps definition for seg in caps: prot = mol.atomselect( "protein" ) # Can't move this out since we remove atoms in this loop # Get the segment segment = np.where(mol.segid == seg)[0] # Test segment if len(segment) == 0: raise RuntimeError(f"There is no segment {seg} in the molecule.") if not np.any(prot & (mol.segid == seg)): raise RuntimeError( f"Segment {seg} is not protein. Capping for non-protein segments is not supported." ) # For each cap passed = False for i, cap in enumerate(caps[seg]): if cap is None or (isinstance(cap, str) and cap == "none"): continue # Get info on segment and its terminals segidm = mol.segid == seg # Mask for segid segididx = np.where(segidm)[0] resids = mol.resid[segididx] terminalids = [segididx[0], segididx[-1]] terminalresids = [resids[0], resids[-1]] residm = mol.resid == terminalresids[i] # Mask for resid if not passed: orig_terminalresids = terminalresids passed = True if cap is None or cap == "": # In case there is no cap defined logger.warning( f"No cap provided for resid {terminalresids[i]} on segment {seg}. Did not apply it." ) continue elif cap not in capresname: # If it is defined, test if supported raise RuntimeError( f"In segment {seg}, the {cap} cap is not supported. Try using {capresname} instead." ) # Test if cap is already applied testcap = np.where(segidm & residm & (mol.resname == cap))[0] if len(testcap) != 0: logger.warning( f"Cap {cap} already exists on segment {seg}. Did not re-apply it." ) continue # Test if the atom to change exists termatomsids = np.zeros(residm.shape, dtype=bool) for atm in terminalatoms[cap]: termatomsids |= mol.name == atm termatomsids = np.where(termatomsids & segidm & residm)[0] if len(termatomsids) == 0: # Create new atom termcaid = np.where(segidm & residm & (mol.name == "CA"))[0] termcenterid = np.where(segidm & residm & (mol.name == capatomtype[1 - i]))[0] atom = Molecule() atom.empty(1) atom.record = np.array(["ATOM"], dtype=Molecule._dtypes["record"]) atom.name = np.array([capatomtype[i]], dtype=Molecule._dtypes["name"]) atom.resid = np.array([terminalresids[i] - 1 + 2 * i], dtype=Molecule._dtypes["resid"]) atom.resname = np.array([cap], dtype=Molecule._dtypes["resname"]) atom.segid = np.array([seg], dtype=Molecule._dtypes["segid"]) atom.element = np.array([capatomtype[i]], dtype=Molecule._dtypes["element"]) atom.chain = np.array( [np.unique(mol.chain[segidm])], dtype=Molecule._dtypes["chain"] ) # TODO: Assumption of single chain in a segment might be wrong atom.coords = mol.coords[termcenterid] + 0.33 * np.subtract( mol.coords[termcenterid], mol.coords[termcaid]) mol.insert(atom, terminalids[i]) else: # Select atom to change, do changes to cap, and change resid newatom = np.max(termatomsids) mol.set("resname", cap, sel=newatom) mol.set("name", capatomtype[i], sel=newatom) mol.set("element", capatomtype[i], sel=newatom) mol.set("resid", terminalresids[i] - 1 + 2 * i, sel=newatom) # if i=0 => resid-1; i=1 => resid+1 # Reorder neworder = np.arange(mol.numAtoms) neworder[newatom] = terminalids[i] neworder[terminalids[i]] = newatom _reorderMol(mol, neworder) # For each cap for i, cap in enumerate(caps[seg]): if cap is None or (isinstance(cap, str) and cap == "none"): continue # Remove lingering hydrogens or oxygens in terminals mol.remove( 'segid {} and resid "{}" and name {}'.format( seg, orig_terminalresids[i], " ".join(terminalatoms[cap])), _logger=False, ) # Remove terminal hydrogens regardless of caps or no caps. tleap confuses itself when it makes residues into terminals. # For example HID has an atom H which in NHID should become H[123] and crashes tleap. for seg in np.unique(mol.get("segid", "protein")): segidm = mol.segid == seg # Mask for segid segididx = np.where(segidm)[0] resids = mol.resid[segididx] mol.remove( f'(resid "{resids[0]}" "{resids[-1]}") and segid {seg} and hydrogen', _logger=False, )