Exemplo n.º 1
0
def Loadpdb(pdb=None, hetatm= True, verbose=False):
    try:
        assert(pdb != None) #Check if filehandle to PDB file is passed
    except AssertionError:
        sys.exit("**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb.")
    
    AtomNumber=0 #Keeps track of atom indices (assigned in the order atoms listed in input file)
    mol_data={} #Key: molid; Value: Molecule_Type Object; Keep track of different molecules (different chains or molecule type) in input structure
    first_res =True  #To identify molecule type of every molecule in input structure and accordingly define Molecule object.
    Prev_res=0 # to keep track of residue change 
    Prev_chain='aa' # to keep track of chain change in HETATM section
    atmTohet = True #To determine transition from ATOM to HETATM record
    frame_tag = '' # To keep track of multi-frame entry (multiple entry for same molecule type with same chain id)
    '''Load the PDB structure file'''
    for line in pdb:
        if line[0:4]=="ATOM" and line[12:16].upper().strip() not in ["OXT"]:
            AtomNumber+=1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ)
            
            #Check for unrecognized residue and new molecule
            if not first_res:
                if ResName.lower() not in Mol_types['protein'] + Mol_types['lipid'] + Mol_types['ligand']:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\nAdded %s as Ligand." % ResName
                    sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types")
                if ResNo != Prev_res or (Prev_chain != Chain and mol.molecule_type().lower()=='ligand'):
                    if mol.molecule_type().lower()=='ligand':
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                    elif Prev_chain != Chain or ResName.lower() not in Mol_types[mol.molecule_type().lower()]: #Either Chain is different or New residue doesn't belong to current molecule type
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                    elif frame_tag.lower() in ['endmdl', 'ter', 'end'] and Prev_chain == Chain: #Different molecule (of same molecule type) with same chain id; as in trajectory frames
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                        
                        
            if first_res: #Initialize mol for new chain or molecule
                if ResName.lower() in Mol_types['protein']:
                    mol = Protein()
                elif ResName.lower() in Mol_types['lipid']:
                    mol = Lipid()
                elif ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Molecule object."
                    sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types")
                first_res = False
                frame_tag = ''
            
            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            Prev_res= ResNo
            Prev_chain=Chain
        elif line[0:6]=="HETATM" and hetatm == True:
            if atmTohet:
                mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                first_res = True
                atmTohet = False
            AtomNumber+=1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ)
            
            #Check for new ligand molecule 
            if not first_res and (ResNo != Prev_res or Prev_chain != Chain):
                mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                first_res = True
            
            #Initialize mol for new chain or molecule            
            if first_res: 
                if ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Ligand object."
                    sys.exit("In file configstruc.py: Add missing residue name ("+ ResName+ ") to ligand molecule in Mol_types")
                first_res = False
            
            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            Prev_res= ResNo
            Prev_chain=Chain
        elif line[0:3].lower() in ["ter", "end"] or line[0:6].lower() == "endmdl":
            frame_tag = line[0:3]
    #append the last mol object to mol_data
    mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
    
    if verbose:
        print "Number of molecules in input file: ", len(mol_data), "\n"
    #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules
    for key in sorted(mol_data):
        if verbose:
            print "Molid:", key,"Molecule_Type:",mol_data[key].molecule_type()
        mol_data[key].resids = sorted(mol_data[key].residue)
        mol_data[key].nor = len(mol_data[key].resids)
        if mol_data[key].molecule_type().lower() != 'ligand':
            #Check for chain breaks in non-ligand molecule
            resids_diff=numpy.array(mol_data[key].resids[1:]) - numpy.array(mol_data[key].resids[:-1])
            if mol_data[key].nor != (numpy.sum(resids_diff)+1):
                break_indices = numpy.where(resids_diff > 1)
                print "Chain break encountered in molecule",key, "at residue positions: "
                for res in break_indices[0]:
                    print mol_data[key].resids[res],
                print "\n"
                mol_data[key].chain_break = True
    return mol_data 
Exemplo n.º 2
0
def Loadpdb(pdb=None, hetatm=True, verbose=False):
    try:
        assert (pdb != None)  #Check if filehandle to PDB file is passed
    except AssertionError:
        sys.exit(
            "**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb. "
        )

    AtomNumber = 0  #Keeps track of atom indices (assigned in the order atoms listed i input file)
    mol_data = {
    }  #Key: molid; Keep track of different molecules (different chains or molecule type) in input structure
    check_het = False  #To keep track of new Hetero residue
    first_res = True  #To identify molecule type of every molecule in input structure and accordingly define Molecule object.
    Prev_res = 0  # to keep track of residue change in HETATM section; a new Molecule object is assigned for every residue.
    Prev_chain = 'a'  # to keep track of chain change in HETATM section
    '''Load the PDB structure file'''
    for line in pdb:
        if line[0:4] == "ATOM" and line[12:16].upper().strip() not in ["OXT"]:
            AtomNumber += 1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(
                line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX,
                       CordY, CordZ)

            if first_res:  #Initialize mol for new chain or molecule
                if ResName.lower() in Mol_types['protein']:
                    mol = Protein()
                elif ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Molecule object."
                    sys.exit(
                        "In file configstruc.py: Add missing residue name(" +
                        ResName + ") to appropriate molecule in Mol_types")
                first_res = False

            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
        elif line[0:3] == "TER":
            mol_data[Molecule.molid] = deepcopy(
                mol)  #copy mol object into dictionary
            first_res = True  # mol object will be initialized to molecule type of next molecule
        elif line[0:6] == "HETATM" and hetatm == True:
            AtomNumber += 1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(
                line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX,
                       CordY, CordZ)

            #Check for new molecule
            if (
                    ResNo != Prev_res or Prev_chain != Chain
            ) and check_het == True:  #For first HETATM check_het is always False
                mol_data[Molecule.molid] = deepcopy(
                    mol)  #copy mol object into dictionary
                first_res = True

            #Initialize mol for new chain or molecule
            if first_res:
                if ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Ligand object."
                    sys.exit(
                        "In file configstruc.py: Add missing residue name (" +
                        ResName + ") to ligand molecule in Mol_types")
                first_res = False

            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            if Prev_res == 0:
                check_het = True
            Prev_res = ResNo
            Prev_chain = Chain
    if hetatm:  #If HETATM record was added; append the last hetero residue object to mol_data
        mol_data[Molecule.molid] = deepcopy(
            mol)  #copy mol object into dictionary

    if verbose:
        print "Number of molecules in input file: ", len(mol_data), "\n"
    #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules
    for key in sorted(mol_data):
        if verbose:
            print "Molid:", key, "Molecule_Type:", mol_data[key].molecule_type(
            )
        if mol_data[key].molecule_type().lower() != 'ligand':
            mol_data[key].resids = sorted(mol_data[key].residue)
            mol_data[key].nor = len(mol_data[key].resids)
            #Check for chain breaks in protein
            resids_diff = numpy.array(mol_data[key].resids[1:]) - numpy.array(
                mol_data[key].resids[:-1])
            if mol_data[key].nor != (numpy.sum(resids_diff) + 1):
                break_indices = numpy.where(resids_diff > 1)
                print "Chain break encountered in molecule", key, "at residue positions: "
                for res in break_indices[0]:
                    print mol_data[key].resids[res],
                print "\n"
    return mol_data