def __init__(self, file): """Load a prmtop file.""" ## The Topology read from the prmtop file self.topology = top = Topology() self.elements = [] # Load the prmtop file prmtop = amber_file_parser.PrmtopLoader(file) self._prmtop = prmtop # Add atoms to the topology PDBFile._loadNameReplacementTables() lastResidue = None c = top.addChain() for index in range(prmtop.getNumAtoms()): resNumber = prmtop.getResidueNumber(index) if resNumber != lastResidue: lastResidue = resNumber resName = prmtop.getResidueLabel(iAtom=index).strip() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = prmtop.getAtomName(index).strip() if atomName in atomReplacements: atomName = atomReplacements[atomName] # Get the element from the prmtop file if available if prmtop.has_atomic_number: try: element = elem.Element.getByAtomicNumber( int(prmtop._raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = None else: # Try to guess the element from the atom name. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None top.addAtom(atomName, element, r) self.elements.append(element) # Add bonds to the topology atoms = list(top.atoms()) for bond in prmtop.getBondsWithH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) for bond in prmtop.getBondsNoH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) # Set the periodic box size. if prmtop.getIfBox(): box = prmtop.getBoxBetaAndDimensions() top.setPeriodicBoxVectors( computePeriodicBoxVectors(*(box[1:4] + box[0:1] * 3)))
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] PDBFile._loadNameReplacementTables() # Load the file. inputFile = file ownHandle = False if isinstance(file, str): inputFile = open(file) ownHandle = True reader = PdbxReader(inputFile) data = [] reader.read(data) if ownHandle: inputFile.close() block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('auth_atom_id') if atomNameCol == -1: atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('auth_comp_id') if resNameCol == -1: resNameCol = atomData.getAttributeIndex('label_comp_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') if resNumCol == -1: resNumCol = atomData.getAttributeIndex('label_seq_id') resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code') chainIdCol = atomData.getAttributeIndex('auth_asym_id') if chainIdCol == -1: chainIdCol = atomData.getAttributeIndex('label_asym_id') altChainIdCol = -1 else: altChainIdCol = atomData.getAttributeIndex('label_asym_id') if altChainIdCol != -1: # Figure out which column is best to use for chain IDs. idSet = set(row[chainIdCol] for row in atomData.getRowList()) altIdSet = set(row[altChainIdCol] for row in atomData.getRowList()) if len(altIdSet) > len(idSet): chainIdCol, altChainIdCol = (altChainIdCol, chainIdCol) elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastAltChainId = None lastResId = None lastInsertionCode = '' atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if resInsertionCol == -1: insertionCode = '' else: insertionCode = row[resInsertionCol] if insertionCode in ('.', '?'): insertionCode = '' if lastChainId != row[chainIdCol] or ( altChainIdCol != -1 and lastAltChainId != row[altChainIdCol]): # The start of a new chain. chain = top.addChain(row[chainIdCol]) lastChainId = row[chainIdCol] lastResId = None if altChainIdCol != -1: lastAltChainId = row[altChainIdCol] if lastResId != row[resNumCol] or lastChainId != row[ chainIdCol] or lastInsertionCode != insertionCode or ( lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. resId = (None if resNumCol == -1 else row[resNumCol]) resIC = insertionCode resName = row[resNameCol] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] res = top.addResidue(resName, chain, resId, resIC) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} lastResId = row[resNumCol] lastInsertionCode = insertionCode atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atomName = row[atomNameCol] if atomName in atomReplacements: atomName = atomReplacements[atomName] atom = top.addAtom(atomName, element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(atomName) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resNumCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ('length_a', 'length_b', 'length_c') ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma') ] self.topology.setPeriodicBoxVectors( computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)