def __init__(self, sequence_from_structure=False, library_bonds=False, distance_bonds=False, auto_sort=True, **args): ## allocate a new Structure object for building if one was not ## passed to the StructureBuilder if args.has_key("structure"): self.struct = args["structure"] elif args.has_key("struct"): self.struct = args["struct"] else: self.struct = Structure.Structure() ## set structure_id if args.has_key("structure_id"): self.struct.structure_id = args["structure_id"] ## options self.calc_sequence = sequence_from_structure self.library_bonds = library_bonds self.distance_bonds = distance_bonds self.auto_sort = auto_sort ## caches used while building self.cache_chain = None self.cache_frag = None ## if anything goes wrong, setting self.halt=True will stop the madness self.halt = False ## build the structure by executing this fixed sequence of methods self.read_start(args["fil"]) if not self.halt: self.read_start_finalize() if not self.halt: self.read_atoms() if not self.halt: self.read_atoms_finalize() if not self.halt: self.read_metadata() if not self.halt: self.read_metadata_finalize() if not self.halt: self.read_end() if not self.halt: self.read_end_finalize() ## self.struct is now built and ready for use if self.halt == True: ConsoleOutput.fatal("self.halt == True")
def __init__(self, sequence_from_structure = False, library_bonds = False, distance_bonds = False, auto_sort = True, **args): ## allocate a new Structure object for building if one was not ## passed to the StructureBuilder if args.has_key("structure"): self.struct = args["structure"] elif args.has_key("struct"): self.struct = args["struct"] else: self.struct = Structure.Structure() ## set structure_id if args.has_key("structure_id"): self.struct.structure_id = args["structure_id"] ## options self.calc_sequence = sequence_from_structure self.library_bonds = library_bonds self.distance_bonds = distance_bonds self.auto_sort = auto_sort ## caches used while building self.cache_chain = None self.cache_frag = None ## if anything goes wrong, setting self.halt=True will stop the madness self.halt = False ## build the structure by executing this fixed sequence of methods self.read_start(args["fil"]) if not self.halt: self.read_start_finalize() if not self.halt: self.read_atoms() if not self.halt: self.read_atoms_finalize() if not self.halt: self.read_metadata() if not self.halt: self.read_metadata_finalize() if not self.halt: self.read_end() if not self.halt: self.read_end_finalize() ## self.struct is now built and ready for use if self.halt == True: ConsoleOutput.fatal("self.halt == True")
def name_service(self): """Runs the name service on all atoms needing to be named. This is a complicated function which corrects most commonly found errors and omitions from PDB files. """ if len(self.name_service_list) == 0: return ## returns the next available chain_id in self.struct ## XXX: it's possible to run out of chain IDs! def next_chain_id(suggest_chain_id): if suggest_chain_id != "": chain = self.struct.get_chain(suggest_chain_id) if not chain: return suggest_chain_id for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789": chain = self.struct.get_chain(chain_id) if not chain: return chain_id raise StructureBuilderError("name_service exhausted new chain_ids") ## NAME SERVICE FOR POLYMER ATOMS ## what if we are given a list of atoms with res_name, frag_id, ## and model_id where the frag_id are sequental? they can be ## sequental several ways using insertion codes, but large breaks ## often denote chain breaks ## I need to handle the special case of a list of polymer residues ## which do not have chain_ids. This requires a first pass over ## the atom list usind different rules than what I use for sorting ## out non-polymers current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None polymer_model_dict = {} current_frag = None current_frag_list = None for atm in self.name_service_list[:]: ## determine the polymer type of the atom if Library.library_is_amino_acid(atm.res_name): polymer_type = "protein" elif Library.library_is_nucleic_acid(atm.res_name): polymer_type = "dna" else: ## if the atom is not a polymer, we definately have a break ## in this chain current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None current_frag = None current_frag_list = None continue fragment_id = Structure.FragmentID(atm.fragment_id) ## now we deal with conditions which can terminate the current ## polymer chain if polymer_type!=current_polymer_type or \ atm.model_id!=current_polymer_model_id or \ atm.chain_id!=current_polymer_chain_id or \ fragment_id<current_polymer_frag_id: current_polymer_type = polymer_type current_polymer_model_id = atm.model_id current_polymer_chain_id = atm.chain_id current_polymer_frag_id = Structure.FragmentID(atm.fragment_id) current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment current_frag = [atm] current_frag_list = [current_frag] ## create new fragment list (chain) try: model = polymer_model_dict[atm.model_id] except KeyError: model = [current_frag_list] polymer_model_dict[atm.model_id] = model else: model.append(current_frag_list) ## we have now dealt with the atom, so it can be removed ## from the name service list self.name_service_list.remove(atm) continue ## if we get here, then we know this atom is destine for the ## current chain, and the algorithm needs to place the atom ## in the current fragment, or create a new fragment for it ## to go into; the conditions for it going into the current ## fragment are: it has it have the same res_name, and its ## atom name cannot conflict with the names of atoms already in ## in the fragment if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key( atm.name): current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment and add it to the current fragment list current_frag = [atm] current_frag_list.append(current_frag) ## we have now dealt with the atom, so it can be removed ## from the name service list self.name_service_list.remove(atm) continue ## okay, put it in the current fragment current_frag.append(atm) self.name_service_list.remove(atm) ## now assign chain_ids and add the atoms to the structure model_ids = polymer_model_dict.keys() model_ids.sort() model_list = [polymer_model_dict[model_id] for model_id in model_ids] num_chains = 0 for frag_list in polymer_model_dict.itervalues(): num_chains = max(num_chains, len(frag_list)) for chain_index in xrange(num_chains): ## get next availible chain_id chain_id = next_chain_id("") ## assign the chain_id to all the atoms in the chain ## TODO: check fragment_id too for model in model_list: frag_list = model[chain_index] for frag in frag_list: for atm in frag: atm.chain_id = chain_id self.struct.add_atom(atm, True) ## free the memory used by the polymer naming service del polymer_model_dict del model_list ## NAME SERVICE FOR NON-POLYMER ATOMS ## cr = (chain_id, res_name) ## ## cr_dict[cr_key] = model_dict ## ## model_dict[model] = frag_list ## ## frag_list = [ frag1, frag2, frag3, ...] ## ## frag = [atm1, atm2, atm3, ...] cr_dict = {} cr_key_list = [] frag_id = None frag = None name_dict = {} ## split atoms into fragments for atm in self.name_service_list: atm_id = (atm.name, atm.alt_loc) atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id, atm.res_name) ## if the atom fragment id matches the current fragment id ## and doesn't conflict with any other atom name in the fragment ## then add it to the fragment if atm_frag_id == frag_id and not name_dict.has_key(atm_id): frag.append(atm) name_dict[atm_id] = True else: cr_key = (atm.chain_id, atm.res_name) ### debug if frag: ConsoleOutput.debug( "name_service: fragment detected in cr=%s" % (str(cr_key))) for a in frag: ConsoleOutput.debug(" " + str(a)) ### /debug try: model_dict = cr_dict[cr_key] except KeyError: model_dict = cr_dict[cr_key] = {} cr_key_list.append(cr_key) try: frag_list = model_dict[atm.model_id] except KeyError: frag_list = model_dict[atm.model_id] = [] name_dict = {atm_id: True} frag_id = atm_frag_id frag = [atm] frag_list.append(frag) ## free self.name_service_list and other vars to save some memory del self.name_service_list new_chain_id = None fragment_id_num = None for cr_key in cr_key_list: ### debug ConsoleOutput.debug("name_service: chain_id / res_name keys") ConsoleOutput.debug(" cr_key: chain_id='%s' res_name='%s'" % (cr_key[0], cr_key[1])) ### /debug ## get the next chain ID, use the cfr group's ## loaded chain_id if possible chain_id = next_chain_id(cr_key[0]) ## if we are not out of chain IDs, use the new chain ID and ## reset the fragment_id if chain_id != None: new_chain_id = chain_id fragment_id_num = 0 elif new_chain_id == None or fragment_id_num == None: ConsoleOutput.fatal( "name_service: unable to assign any chain ids") ## get model dictionary model_dict = cr_dict[cr_key] ## inspect the model dictionary to determine the number ## of fragments in each model -- they should be the same ## and have a 1:1 cooraspondance; if not, match up the ## fragments as much as possible max_frags = -1 for (model, frag_list) in model_dict.iteritems(): frag_list_len = len(frag_list) if max_frags == -1: max_frags = frag_list_len continue if max_frags != frag_list_len: strx = "name_service: model fragments not identical" ConsoleOutput.debug(strx) ConsoleOutput.warning(strx) max_frags = max(max_frags, frag_list_len) ## now iterate through the fragment lists in parallel and assign ## the new chain_id and fragment_id for i in xrange(max_frags): fragment_id_num += 1 for frag_list in model_dict.itervalues(): try: frag = frag_list[i] except IndexError: continue ## assign new chain_id and fragment_id, than place the ## atom in the structure for atm in frag: atm.chain_id = new_chain_id atm.fragment_id = str(fragment_id_num) self.struct.add_atom(atm, True) ## logging ConsoleOutput.warning( "name_service(): added chain_id=%s, res_name=%s, num_residues=%d" % (new_chain_id, cr_key[1], fragment_id_num))
def name_service(self): """Runs the name service on all atoms needing to be named. This is a complicated function which corrects most commonly found errors and omissions from PDB files. """ if len(self.name_service_list) == 0: return ## returns the next available chain_id in self.struct ## XXX: it's possible to run out of chain IDs! def next_chain_id(suggest_chain_id): if suggest_chain_id != "": chain = self.struct.get_chain(suggest_chain_id) if not chain: return suggest_chain_id ## TODO: Add the following alphanumeric string to Constants.py, 2010-09-21 for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789": chain = self.struct.get_chain(chain_id) if not chain: return chain_id raise StructureBuilderError("name_service exhausted new chain_ids") ## NAME SERVICE FOR POLYMER ATOMS ## What if we are given a list of atoms with res_name, frag_id, and ## model_id where the frag_id are sequential? They can be sequential ## several ways using insertion codes, but large breaks often denote ## chain breaks. ## We need to handle the special case of a list of polymer residues ## which do not have chain_ids. This requires a first pass over the ## atom list using different rules than what we use for sorting out ## non-polymers. current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None polymer_model_dict = {} current_frag = None current_frag_list = None for atm in self.name_service_list[:]: ## determine the polymer type of the atom if Library.library_is_amino_acid(atm.res_name): polymer_type = "protein" elif Library.library_is_nucleic_acid(atm.res_name): polymer_type = "dna" else: ## if the atom is not a polymer, we definitely have a break ## in this chain current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None current_frag = None current_frag_list = None continue fragment_id = Structure.FragmentID(atm.fragment_id) ## now we deal with conditions which can terminate the current ## polymer chain if polymer_type!=current_polymer_type or \ atm.model_id!=current_polymer_model_id or \ atm.chain_id!=current_polymer_chain_id or \ fragment_id<current_polymer_frag_id: current_polymer_type = polymer_type current_polymer_model_id = atm.model_id current_polymer_chain_id = atm.chain_id current_polymer_frag_id = Structure.FragmentID(atm.fragment_id) current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment current_frag = [atm] current_frag_list = [current_frag] ## create new fragment list (chain) try: model = polymer_model_dict[atm.model_id] except KeyError: model = [current_frag_list] polymer_model_dict[atm.model_id] = model else: model.append(current_frag_list) ## we have now dealt with the atom, so it can be removed from ## the name service list self.name_service_list.remove(atm) continue ## if we get here, then we know this atom is destine for the ## current chain, and the algorithm needs to place the atom ## in the current fragment, or create a new fragment for it ## to go into; the conditions for it going into the current ## fragment are: it has it have the same res_name, and its ## atom name cannot conflict with the names of atoms already in ## in the fragment if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key(atm.name): current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment and add it to the current fragment list current_frag = [atm] current_frag_list.append(current_frag) ## we have now dealt with the atom, so it can be removed ## from the name service list self.name_service_list.remove(atm) continue ## okay, put it in the current fragment current_frag.append(atm) self.name_service_list.remove(atm) ## now assign chain_ids and add the atoms to the structure model_ids = polymer_model_dict.keys() model_ids.sort() model_list = [polymer_model_dict[model_id] for model_id in model_ids] num_chains = 0 for frag_list in polymer_model_dict.itervalues(): num_chains = max(num_chains, len(frag_list)) for chain_index in xrange(num_chains): ## get next available chain_id chain_id = next_chain_id("") ## assign the chain_id to all the atoms in the chain ## TODO: check fragment_id too, 2010-09-22 for model in model_list: frag_list = model[chain_index] for frag in frag_list: for atm in frag: atm.chain_id = chain_id self.struct.add_atom(atm, True) ## free the memory used by the polymer naming service del polymer_model_dict del model_list ## NAME SERVICE FOR NON-POLYMER ATOMS ## cr = (chain_id, res_name) ## ## cr_dict[cr_key] = model_dict ## ## model_dict[model] = frag_list ## ## frag_list = [ frag1, frag2, frag3, ...] ## ## frag = [atm1, atm2, atm3, ...] cr_dict = {} cr_key_list = [] frag_id = None frag = None name_dict = {} ## split atoms into fragments for atm in self.name_service_list: atm_id = (atm.name, atm.alt_loc) atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id, atm.res_name) ## if the atom fragment id matches the current fragment id ## and doesn't conflict with any other atom name in the fragment ## then add it to the fragment if atm_frag_id==frag_id and not name_dict.has_key(atm_id): frag.append(atm) name_dict[atm_id] = True else: cr_key = (atm.chain_id, atm.res_name) ### debug if frag: msg = "name_service: fragment detected in cr=%s" % ( str(cr_key)) ConsoleOutput.debug(msg) for a in frag: ConsoleOutput.debug(" " + str(a)) ### /debug try: model_dict = cr_dict[cr_key] except KeyError: model_dict = cr_dict[cr_key] = {} cr_key_list.append(cr_key) try: frag_list = model_dict[atm.model_id] except KeyError: frag_list = model_dict[atm.model_id] = [] name_dict = {atm_id: True} frag_id = atm_frag_id frag = [atm] frag_list.append(frag) ## free self.name_service_list and other vars to save some memory del self.name_service_list new_chain_id = None fragment_id_num = None for cr_key in cr_key_list: ### debug msg = "name_service: chain_id / res_name keys\n" msg += " cr_key: chain_id='%s' res_name='%s'" % ( cr_key[0], cr_key[1]) ConsoleOutput.debug(msg) ### /debug ## get the next chain ID, use the cfr group's ## loaded chain_id if possible chain_id = next_chain_id(cr_key[0]) ## if we are not out of chain IDs, use the new chain ID and ## reset the fragment_id if chain_id != None: new_chain_id = chain_id fragment_id_num = 0 elif new_chain_id == None or fragment_id_num == None: ConsoleOutput.fatal("name_service: unable to assign any chain ids") ## get model dictionary model_dict = cr_dict[cr_key] ## inspect the model dictionary to determine the number of ## fragments in each model -- they should be the same ## and have a 1:1 correspondence; if not, match up the ## fragments as much as possible max_frags = -1 for (model, frag_list) in model_dict.iteritems(): frag_list_len = len(frag_list) if max_frags == -1: max_frags = frag_list_len continue if max_frags != frag_list_len: strx = "name_service: model fragments not identical" ConsoleOutput.debug(strx) ConsoleOutput.warning(strx) max_frags = max(max_frags, frag_list_len) ## now iterate through the fragment lists in parallel and assign ## the new chain_id and fragment_id for i in xrange(max_frags): fragment_id_num += 1 for frag_list in model_dict.itervalues(): try: frag = frag_list[i] except IndexError: continue ## assign new chain_id and fragment_id, than place the ## atom in the structure for atm in frag: atm.chain_id = new_chain_id atm.fragment_id = str(fragment_id_num) self.struct.add_atom(atm, True) ## logging ConsoleOutput.warning("name_service(): added chain_id=%s, res_name=%s, num_residues=%d" % ( new_chain_id, cr_key[1], fragment_id_num))