Пример #1
0
    def _finish_molecule():
        """
        Perform some operations after reading entire PDB chain:
          - rebuild (infer) bonds
          - rename molecule to reflect a chain ID
          - delete protein object if this is not a protein
          - append the molecule to the molecule list
        """
        
        if mol == water:
            # Skip water, to be added explicitly at the end.
            return
        
        if mol.atoms:  
            ###print "READING PDB ", (mol, numconects, chainId)
            
            mol.name = pdbid.lower() + chainId

            ###idzialprint "SEQ = ", mol.protein.get_sequence_string()
            ###print "SEC = ", mol.protein.get_secondary_structure_string()
            
            if mol.protein.count_c_alpha_atoms() == 0:
                # If there is no C-alpha atoms, consider the chunk 
                # as a non-protein. But! Split it into individual 
                # hetero groups.
                res_list = mol.protein.get_amino_acids()
                assy.part.ensure_toplevel_group()
                hetgroup = Group("Heteroatoms", assy, assy.part.topnode) 
                for res in res_list:
                    hetmol = Chunk(assy, 
                                   res.get_three_letter_code().replace(" ", "") + \
                                   "[" + str(res.get_id()) + "]")
                    for atom in res.get_atom_list():
                        newatom = Atom(atom.element.symbol, atom.posn(), hetmol)
                    # New chunk - infer the bonds anyway (this is not
                    # correct, should first check connectivity read from
                    # the PDB file CONECT records).
                    inferBonds(hetmol)
                    hetgroup.addchild(hetmol)
                mollist.append(hetgroup)
            else:
                #if numconects == 0:
                #    msg = orangemsg("PDB file has no bond info; inferring bonds")
                #    env.history.message(msg)
                #    # let user see message right away (bond inference can take significant 
                #    # time) [bruce 060620]
                #    env.history.h_update() 

                # For protein - infer the bonds anyway.
                inferBonds(mol)
                    
                mol.protein.set_chain_id(chainId)
                mol.protein.set_pdb_id(pdbid)
                if mol.atoms:
                    mollist.append(mol)                
        else:
            env.history.message( redmsg( "Warning: Pdb file contained no atoms"))
            env.history.h_update() 
Пример #2
0
    def build_struct(self, name, params, position, mol=None, createPrinted=False):
        """
        Build a peptide from a sequence entered through the Property Manager dialog.
        """

        if len(self.peptide_cache) == 0:
            return None

        # Create a molecule
        mol = Chunk(self.win.assy,name)

        # Generate dummy atoms positions

        self.prev_coords[0][0] = position[0] - 1.499
        self.prev_coords[0][1] = position[1] + 1.539
        self.prev_coords[0][2] = position[2]

        self.prev_coords[1][0] = position[0] - 1.499
        self.prev_coords[1][1] = position[1]
        self.prev_coords[1][2] = position[2]

        self.prev_coords[2][0] = position[0]
        self.prev_coords[2][1] = position[1]
        self.prev_coords[2][2] = position[2]

        # Add a N-terminal hydrogen
        atom = Atom("H", position, mol)
        atom._is_aromatic = False
        atom._is_single = False
        self.nterm_hydrogen = atom

        # Generate the peptide chain.
        self.length = 1
        for index, phi, psi in self.peptide_cache:
            name, short_name, symbol, zmatrix, size = AMINO_ACIDS[index]
            self._buildResiduum(mol, zmatrix, size, phi, psi, None, symbol)

        # Add a C-terminal OH group
        self._buildResiduum(mol, CTERM_ZMATRIX, 5, 0.0, 0.0, None, symbol)        
        
        # Compute bonds (slow!)
        # This should be replaced by a proper bond assignment.
        inferBonds(mol)

        mol._protein_helix = []
        mol._protein_sheet = []
        
        # Assign proper bond orders.
        i = 1
        for atom in mol.atoms.itervalues():
            if self.ss_idx == 1:
                mol._protein_helix.append(i) 
            elif self.ss_idx == 2:
                mol._protein_sheet.append(i)  
            if atom.bonds:
                for bond in atom.bonds:
                    if bond.atom1.getAtomTypeName()=="sp2" and \
                       bond.atom2.getAtomTypeName()=="sp2":
                        if (bond.atom1._is_aromatic and
                            bond.atom2._is_aromatic):
                            bond.set_v6(V_AROMATIC)
                        elif ((bond.atom1._is_aromatic == False and
                               bond.atom1._is_aromatic == False) and
                               not (bond.atom1._is_single and
                                    bond.atom2._is_single)):
                            bond.set_v6(V_DOUBLE)
            i += 1
                            
        # Remove temporary attributes.
        for atom in mol.atoms.itervalues():
            del atom._is_aromatic
            del atom._is_single

        return mol
Пример #3
0
    def make_aligned(self,
                     assy,
                     name,
                     aa_idx,
                     phi, psi,
                     pos1, pos2,
                     secondary = SS_COIL,
                     fake_chain = False,
                     length = None):
        """
        Build and return a chunk that is a h**o-peptide aligned to
        a pos2-pos1 vector.

        @param aa_idx: amino acid type (index in AMINO_ACIDS list)
        @type aa_idx: int

        @param name: chunk name
        @type name: string

        @param phi, psi: peptide bond angles
        @type phi, psi: float

        @param pos1, pos2: desired peptide positions (beginning and end)
        @type pos1, pos2: V

        @param secondary: secondary structure class, used for visual representation
        The actual peptide chain conformation is based on phi / psi angles.
        @type secondary: int

        @param fake_chain: if True, create only C-alpha atoms. used for drawing
        peptide trace image during interactive peptide placement (used by
        PeptideLine_GraphicsMode.py)
        @type fake_chain: boolean

        @param length: optional peptide length (number of amino acids), if
        not specified, pos1 and pos2 are used to figure out the length
        @type length: int

        @return: A h**o-polypeptide chain.
        @rtype:  L{Chunk}
        """

        if not length:
            self.length = self.get_number_of_res(pos1, pos2, phi, psi)
            if self.length == 0:
                return None
        else:
            self.length = length

        # Create a molecule
        mol = Chunk(assy, name)

        if not fake_chain:
            mol.protein = Protein()
            mol.protein.set_chain_id('A')

        # Generate dummy atoms positions
        self.prev_coords[0][0] = pos1[0] - 1.0
        self.prev_coords[0][1] = pos1[1] - 1.0
        self.prev_coords[0][2] = pos1[2]

        self.prev_coords[1][0] = pos1[0] - 1.0
        self.prev_coords[1][1] = pos1[1]
        self.prev_coords[1][2] = pos1[2]

        self.prev_coords[2][0] = pos1[0]
        self.prev_coords[2][1] = pos1[1]
        self.prev_coords[2][2] = pos1[2]

        name, short_name, symbol, zmatrix, size = AMINO_ACIDS[aa_idx]

        # Add a N-terminal hydrogen
        self.nterm_hydrogen = None

        # Initially, the Peptide Builder was creating peptide structures
        # saturated at both ends, i.e. with N-terminal hydrogen and C-terminal
        # OH group present. Currently, this code is commented out to allow
        # connecting multiple peptide structure be creating bonds between
        # the C- and N- terminal ends of two individual structures.
        """
        if not fake_chain:
            atom = Atom("H", pos1, mol)
            atom._is_aromatic = False
            atom._is_single = False
            self.nterm_hydrogen = atom
            mol.protein.add_pdb_atom(atom, "H", 1, name)
            atom.pdb_info = {}
            atom.pdb_info['atom_name'] = "H"
            atom.pdb_info['residue_name'] = short_name
            atom.pdb_info['residue_id'] = "  1 "
            atom.pdb_info['standard_atom'] = True
        """

        self.init_ca = None

        # Generate the peptide chain.
        for idx in range(int(self.length)):
            self._buildResidue(mol, zmatrix, size, idx+1, phi, psi, secondary, None, short_name, fake_chain=fake_chain)

        # See the comment above.
        """
        # Add a C-terminal OH group
        self._buildResidue(mol, CTERM_ZMATRIX, 5, int(self.length), 0.0, 0.0, secondary, None, short_name, fake_chain=fake_chain)
        """

        # Compute bonds (slow!)
        # This should be replaced by a proper bond assignment.

        if not fake_chain:
            inferBonds(mol)

        # Assign proper bond orders.
        i = 1
        for atom in mol.atoms.itervalues():
            if atom.bonds:
                for bond in atom.bonds:
                    if bond.atom1.getAtomTypeName()=="sp2" and \
                       bond.atom2.getAtomTypeName()=="sp2":
                        if (bond.atom1._is_aromatic and
                            bond.atom2._is_aromatic):
                            bond.set_v6(V_AROMATIC)
                        elif ((bond.atom1._is_aromatic == False and
                               bond.atom1._is_aromatic == False) and
                               not (bond.atom1._is_single and
                                    bond.atom2._is_single)):
                            bond.set_v6(V_DOUBLE)
            i += 1

        # Remove temporary attributes.
        for atom in mol.atoms.itervalues():
            del atom._is_aromatic
            del atom._is_single

        # Axis of first selected chunk
        ax = V(0.,0.,1.)
        mol.rot(Q(mol.getaxis(),ax))

        self._orient(mol, pos2, pos1)

        if self.init_ca:
            mol.move(pos1 - self.init_ca.posn())

        mol_dummy = None

        return mol
Пример #4
0
def _readpdb(assy, 
             filename, 
             isInsert = False, 
             showProgressDialog = False, 
             chainId = None):
    """
    Read a Protein DataBank-format file into a single new chunk, which is 
    returned unless there are no atoms in the file, in which case a warning
    is printed and None is returned. (The new chunk (if returned) is in assy,
    but is not yet added into any Group or Part in assy -- caller must do that.)
    Unless isInsert = True, set assy.filename to match the file we read,
    even if we return None.
    
    @param assy: The assembly.
    @type  assy: L{assembly}
    
    @param filename: The PDB filename to read.
    @type  filename: string
    
    @param isInsert: If True, the PDB file will be inserted into the current
                     assembly. If False (default), the PDB is opened as the 
                     assembly.
    @param isInsert: boolean
    
    @param showProgressDialog: if True, display a progress dialog while reading
                               a file.
    @type  showProgressDialog: boolean
    
    @return: A chunk containing the contents of the PDB file.
    @rtype:  L{Chunk}
    
    @see: U{B{PDB File Format}<http://www.wwpdb.org/documentation/format23/v2.3.html>}
    """
        
    fi = open(filename,"rU")
    lines = fi.readlines()
    fi.close()
    
    dir, nodename = os.path.split(filename)
    if not isInsert:
        assy.filename = filename
    ndix = {}
    mol = Chunk(assy, nodename)
    numconects = 0

    atomname_exceptions = {
        "HB":"H", #k these are all guesses -- I can't find this documented 
                  # anywhere [bruce 070410]
        ## "HE":"H", ### REVIEW: I'm not sure about this one -- 
                    ###          leaving it out means it's read as Helium,
        # but including it erroneously might prevent reading an actual Helium 
        # if that was intended.
        # Guess for now: include it for ATOM but not HETATM. (So it's 
        # specialcased below, rather than being included in this table.)
        # (Later: can't we use the case of the 'E' to distinguish it from He?)
        "HN":"H",
     }
    
    # Create and display a Progress dialog while reading the MMP file. 
    # One issue with this implem is that QProgressDialog always displays 
    # a "Cancel" button, which is not hooked up. I think this is OK for now,
    # but later we should either hook it up or create our own progress
    # dialog that doesn't include a "Cancel" button. --mark 2007-12-06
    if showProgressDialog:
        _progressValue = 0
        _progressFinishValue = len(lines)
        win = env.mainwindow()
        win.progressDialog.setLabelText("Reading file...")
        win.progressDialog.setRange(0, _progressFinishValue)
        _progressDialogDisplayed = False
        _timerStart = time.time()
    for card in lines:
        key = card[:6].lower().replace(" ", "")
        if key in ["atom", "hetatm"]:
            ## sym = capitalize(card[12:14].replace(" ", "").replace("_", "")) 
            # bruce 080508 revision (guess at a bugfix for reading NE1-saved
            # pdb files):
            # get a list of atomnames to try; use the first one we recognize.
            # Note that full atom name is in columns 13-16 i.e. card[12:16];
            # see http://www.wwpdb.org/documentation/format2.3-0108-us.pdf,
            # page 156. The old code only looked at two characters,
            # card[12:14] == columns 13-14, and discarded ' ' and '_',
            # and capitalized (the first character only). The code as I revised
            # it on 070410 also discarded digits, and handled HB, HE, HN
            # (guesses) using the atomname_exceptions dict.
            name4 = card[12:16].replace(" ", "").replace("_", "")
            name3 = card[12:15].replace(" ", "").replace("_", "")
            name2 = card[12:14].replace(" ", "").replace("_", "")
            def nodigits(name):
                for bad in "0123456789":
                    name = name.replace(bad, "")
                return name
            atomnames_to_try = [
                name4, # as seems best according to documentation
                name3,
                name2, # like old code
                nodigits(name4),
                nodigits(name3),
                nodigits(name2) # like code as revised on 070410
            ]
            foundit = False
            for atomname in atomnames_to_try:
                atomname = atomname_exceptions.get(atomname, atomname)
                if atomname == "HE" and key == "atom":
                    atomname = "H" # see comment in atomname_exceptions
                sym = capitalize(atomname) # turns either 'he' or 'HE' into 'He'
                try:
                    PeriodicTable.getElement(sym)
                except:
                    # note: this typically fails with AssertionError 
                    # (not e.g. KeyError) [bruce 050322]
                    continue
                else:
                    foundit = True
                    break
                pass
            if not foundit:
                msg = "Warning: Pdb file: will use Carbon in place of unknown element %s in: %s" \
                    % (name4, card)
                print msg #bruce 070410 added this print
                env.history.message( redmsg( msg ))

                ##e It would probably be better to create a fake atom, so the 
                # CONECT records would still work.
                #bruce 080508 let's do that:
                sym = "C"
                
                # Better still might be to create a fake element, 
                # so we could write out the pdb file again
                # (albeit missing lots of info). [bruce 070410 comment]
                
                # Note: an advisor tells us:
                #   PDB files sometimes encode atomtypes,
                #   using C_R instead of C, for example, to represent sp2 
                #   carbons.
                # That particular case won't trigger this exception, since we
                # only look at 2 characters [eventually, after trying more, as of 080508],
                # i.e. C_ in that case. It would be better to realize this means
                # sp2 and set the atomtype here (and perhaps then use it when
                # inferring bonds,  which we do later if the file doesn't have 
                # any bonds). [bruce 060614/070410 comment]

            # Now the element name is in sym.
            xyz = map(float, [card[30:38], card[38:46], card[46:54]] )
            n = int(card[6:11])
            a = Atom(sym, A(xyz), mol)
            ndix[n] = a            
        elif key == "conect":
            try:
                a1 = ndix[int(card[6:11])]
            except:
                #bruce 050322 added this level of try/except and its message;
                # see code below for at least two kinds of errors this might
                # catch, but we don't try to distinguish these here. BTW this 
                # also happens as a consequence of not finding the element 
                # symbol, above,  since atoms with unknown elements are not 
                # created.
                env.history.message( redmsg( "Warning: Pdb file: can't find first atom in CONECT record: %s" % (card,) ))
            else:
                for i in range(11, 70, 5):
                    try:
                        a2 = ndix[int(card[i:i+5])]
                    except ValueError:
                        # bruce 050323 comment:
                        # we assume this is from int('') or int(' ') etc;
                        # this is the usual way of ending this loop.
                        break
                    except KeyError:
                        #bruce 050322-23 added history warning for this,
                        # assuming it comes from ndix[] lookup.
                        env.history.message( redmsg( "Warning: Pdb file: can't find atom %s in: %s" % (card[i:i+5], card) ))
                        continue
                    bond_atoms(a1, a2)
                    numconects += 1
            
        if showProgressDialog: # Update the progress dialog.
            _progressValue += 1
            if _progressValue >= _progressFinishValue:
                win.progressDialog.setLabelText("Building model...")
            elif _progressDialogDisplayed:
                win.progressDialog.setValue(_progressValue)
            else:
                _timerDuration = time.time() - _timerStart
                if _timerDuration > 0.25: 
                    # Display progress dialog after 0.25 seconds
                    win.progressDialog.setValue(_progressValue)
                    _progressDialogDisplayed = True
    
    if showProgressDialog: # Make the progress dialog go away.
        win.progressDialog.setValue(_progressFinishValue) 
    
    #bruce 050322 part of fix for bug 433: don't return an empty chunk
    if not mol.atoms:
        env.history.message( redmsg( "Warning: Pdb file contained no atoms"))
        return None
    if numconects == 0:
        msg = orangemsg("PDB file has no bond info; inferring bonds")
        env.history.message(msg)
        # let user see message right away (bond inference can take significant 
        # time) [bruce 060620]
        env.history.h_update() 
        inferBonds(mol)
    return mol
    def build_struct(self,
                     name,
                     params,
                     position,
                     mol=None,
                     createPrinted=False):
        """
        Build a peptide from a sequence entered through the Property Manager dialog.
        """

        if len(self.peptide_cache) == 0:
            return None

        # Create a molecule
        mol = Chunk(self.win.assy, name)

        # Generate dummy atoms positions

        self.prev_coords[0][0] = position[0] - 1.499
        self.prev_coords[0][1] = position[1] + 1.539
        self.prev_coords[0][2] = position[2]

        self.prev_coords[1][0] = position[0] - 1.499
        self.prev_coords[1][1] = position[1]
        self.prev_coords[1][2] = position[2]

        self.prev_coords[2][0] = position[0]
        self.prev_coords[2][1] = position[1]
        self.prev_coords[2][2] = position[2]

        # Add a N-terminal hydrogen
        atom = Atom("H", position, mol)
        atom._is_aromatic = False
        atom._is_single = False
        self.nterm_hydrogen = atom

        # Generate the peptide chain.
        self.length = 1
        for index, phi, psi in self.peptide_cache:
            name, short_name, symbol, zmatrix, size = AMINO_ACIDS[index]
            self._buildResiduum(mol, zmatrix, size, phi, psi, None, symbol)

        # Add a C-terminal OH group
        self._buildResiduum(mol, CTERM_ZMATRIX, 5, 0.0, 0.0, None, symbol)

        # Compute bonds (slow!)
        # This should be replaced by a proper bond assignment.
        inferBonds(mol)

        mol._protein_helix = []
        mol._protein_sheet = []

        # Assign proper bond orders.
        i = 1
        for atom in mol.atoms.itervalues():
            if self.ss_idx == 1:
                mol._protein_helix.append(i)
            elif self.ss_idx == 2:
                mol._protein_sheet.append(i)
            if atom.bonds:
                for bond in atom.bonds:
                    if bond.atom1.getAtomTypeName()=="sp2" and \
                       bond.atom2.getAtomTypeName()=="sp2":
                        if (bond.atom1._is_aromatic
                                and bond.atom2._is_aromatic):
                            bond.set_v6(V_AROMATIC)
                        elif ((bond.atom1._is_aromatic == False
                               and bond.atom1._is_aromatic == False)
                              and not (bond.atom1._is_single
                                       and bond.atom2._is_single)):
                            bond.set_v6(V_DOUBLE)
            i += 1

        # Remove temporary attributes.
        for atom in mol.atoms.itervalues():
            del atom._is_aromatic
            del atom._is_single

        return mol
Пример #6
0
    def make_aligned(self,
                     assy,
                     name,
                     aa_idx,
                     phi,
                     psi,
                     pos1,
                     pos2,
                     secondary=SS_COIL,
                     fake_chain=False,
                     length=None):
        """
        Build and return a chunk that is a h**o-peptide aligned to
        a pos2-pos1 vector.

        @param aa_idx: amino acid type (index in AMINO_ACIDS list)
        @type aa_idx: int

        @param name: chunk name
        @type name: string

        @param phi, psi: peptide bond angles
        @type phi, psi: float

        @param pos1, pos2: desired peptide positions (beginning and end)
        @type pos1, pos2: V

        @param secondary: secondary structure class, used for visual representation
        The actual peptide chain conformation is based on phi / psi angles.
        @type secondary: int

        @param fake_chain: if True, create only C-alpha atoms. used for drawing
        peptide trace image during interactive peptide placement (used by
        PeptideLine_GraphicsMode.py)
        @type fake_chain: boolean

        @param length: optional peptide length (number of amino acids), if
        not specified, pos1 and pos2 are used to figure out the length
        @type length: int

        @return: A h**o-polypeptide chain.
        @rtype:  L{Chunk}
        """

        if not length:
            self.length = self.get_number_of_res(pos1, pos2, phi, psi)
            if self.length == 0:
                return None
        else:
            self.length = length

        # Create a molecule
        mol = Chunk(assy, name)

        if not fake_chain:
            mol.protein = Protein()
            mol.protein.set_chain_id('A')

        # Generate dummy atoms positions
        self.prev_coords[0][0] = pos1[0] - 1.0
        self.prev_coords[0][1] = pos1[1] - 1.0
        self.prev_coords[0][2] = pos1[2]

        self.prev_coords[1][0] = pos1[0] - 1.0
        self.prev_coords[1][1] = pos1[1]
        self.prev_coords[1][2] = pos1[2]

        self.prev_coords[2][0] = pos1[0]
        self.prev_coords[2][1] = pos1[1]
        self.prev_coords[2][2] = pos1[2]

        name, short_name, symbol, zmatrix, size = AMINO_ACIDS[aa_idx]

        # Add a N-terminal hydrogen
        self.nterm_hydrogen = None

        # Initially, the Peptide Builder was creating peptide structures
        # saturated at both ends, i.e. with N-terminal hydrogen and C-terminal
        # OH group present. Currently, this code is commented out to allow
        # connecting multiple peptide structure be creating bonds between
        # the C- and N- terminal ends of two individual structures.
        """
        if not fake_chain:
            atom = Atom("H", pos1, mol)
            atom._is_aromatic = False
            atom._is_single = False
            self.nterm_hydrogen = atom
            mol.protein.add_pdb_atom(atom, "H", 1, name)
            atom.pdb_info = {}
            atom.pdb_info['atom_name'] = "H"
            atom.pdb_info['residue_name'] = short_name
            atom.pdb_info['residue_id'] = "  1 "
            atom.pdb_info['standard_atom'] = True
        """

        self.init_ca = None

        # Generate the peptide chain.
        for idx in range(int(self.length)):
            self._buildResidue(mol,
                               zmatrix,
                               size,
                               idx + 1,
                               phi,
                               psi,
                               secondary,
                               None,
                               short_name,
                               fake_chain=fake_chain)

        # See the comment above.
        """
        # Add a C-terminal OH group
        self._buildResidue(mol, CTERM_ZMATRIX, 5, int(self.length), 0.0, 0.0, secondary, None, short_name, fake_chain=fake_chain)
        """

        # Compute bonds (slow!)
        # This should be replaced by a proper bond assignment.

        if not fake_chain:
            inferBonds(mol)

        # Assign proper bond orders.
        i = 1
        for atom in mol.atoms.itervalues():
            if atom.bonds:
                for bond in atom.bonds:
                    if bond.atom1.getAtomTypeName()=="sp2" and \
                       bond.atom2.getAtomTypeName()=="sp2":
                        if (bond.atom1._is_aromatic
                                and bond.atom2._is_aromatic):
                            bond.set_v6(V_AROMATIC)
                        elif ((bond.atom1._is_aromatic == False
                               and bond.atom1._is_aromatic == False)
                              and not (bond.atom1._is_single
                                       and bond.atom2._is_single)):
                            bond.set_v6(V_DOUBLE)
            i += 1

        # Remove temporary attributes.
        for atom in mol.atoms.itervalues():
            del atom._is_aromatic
            del atom._is_single

        # Axis of first selected chunk
        ax = V(0., 0., 1.)
        mol.rot(Q(mol.getaxis(), ax))

        self._orient(mol, pos2, pos1)

        if self.init_ca:
            mol.move(pos1 - self.init_ca.posn())

        mol_dummy = None

        return mol