def simple_occupancy_groups(hierarchy, include_single_conformer_groups=False, verbose=False): """Given a selection, return the default occupancy groups""" occupancy_groups = [] # Iterate through the default occupancy groups for g in hierarchy.occupancy_groups_simple(): # Skip single groups if (len(g) == 1) and (len(g[0]) == 1): if verbose: print 'Not making simple restraints for single-atom groups:', ','.join( [ Labeller.format(a) for a in hierarchy.select(flex.size_t(g[0])).atoms() ]) continue if (len(g) == 1) and (not include_single_conformer_groups): if verbose: print 'Not making simple restraints for single-conformer groups:\n\t', '\n\t'.join( [ Labeller.format(a) for a in hierarchy.select( flex.size_t(g[0])).atom_groups() ]) continue selections = [] for sel in g: ags = [ GenericSelection.to_dict(ag) for ag in hierarchy.select(flex.size_t(sel)).atom_groups() ] selections.append(ags) occupancy_groups.append(selections) return occupancy_groups
def expand_alternate_conformations(hierarchy, in_place=False, verbose=False): """Convert all atoms to multiple conformers - full multi-conformer representation of the model""" if not in_place: hierarchy = hierarchy.deep_copy() # Get all of the altlocs that should be present for each atom full_altloc_set = sorted([a for a in hierarchy.altloc_indices() if a]) # If not altlocs found, expand all to "A" if full_altloc_set == []: if verbose: print 'No altlocs in structure: expanding all residues to conformer "A"' full_altloc_set = ['A'] if verbose: print 'Expanding all (appropriate) residues to have altlocs {}'.format( full_altloc_set) print '------------------>' # Iterate through and expand each residue group to have all conformers for chain in hierarchy.chains(): for residue_group in chain.residue_groups(): # If has conformers but has blank altloc atoms (add blank ag to all other ags) if residue_group.have_conformers( ) and residue_group.move_blank_altloc_atom_groups_to_front(): if verbose: print '{} - expanding to pure conformer (altlocs {})'.format( Labeller.format(residue_group), [a.altloc for a in residue_group.atom_groups()]) # Convert all residue_groups to pure alt-conf create_pure_alt_conf_from_proper_alt_conf( residue_group=residue_group, in_place=True) # Can go to next if all conformers are present for this residue group current_set = {a.altloc for a in residue_group.atom_groups()} if not current_set.symmetric_difference(full_altloc_set): continue # Only want to expand conformers for protein atoms (which should be present in all conformers) # or where the residue group is only present in one conformation (single conformer water) # but DO NOT want to expand waters in conformer A to A,B,C etc... if protein_amino_acid_set.intersection( residue_group.unique_resnames()) or ( not residue_group.have_conformers()): if verbose: print '{} - populating missing conformers (current altlocs {}, target set {})'.format( Labeller.format(residue_group), current_set, full_altloc_set) # Populate missing conformers (from the other conformers) populate_missing_conformers(residue_group=residue_group, full_altloc_set=full_altloc_set, in_place=True, verbose=verbose) assert sorted([a.altloc for a in residue_group.atom_groups() ]) == full_altloc_set if verbose: print '{} - updated conformer list: (current altlocs {}, target set {})'.format( Labeller.format(residue_group), [a.altloc for a in residue_group.atom_groups()], full_altloc_set) if verbose: print '------------------>' return hierarchy
def set_conformer_occupancy(hierarchy, altlocs, occupancy, in_place=False, verbose=False): """Normalise the occupancies of a hierarchy so that the occupancies for a residue sum to 1.0""" if isinstance(altlocs, str): altlocs=list(altlocs) else: assert isinstance(altlocs, list), 'altlocs must be either str or list' if (not in_place): hierarchy = hierarchy.deep_copy() for ag in hierarchy.atom_groups(): if ag.altloc in altlocs: if verbose: print '{} - setting occupancy to {}'.format(Labeller.format(ag), occupancy) ag.atoms().set_occ(flex.double(ag.atoms().size(), occupancy)) return hierarchy
def increment_altlocs(hierarchy, offset=1, in_place=False, verbose=False): """Increment all altlocs in the structure by a certain number of letters""" if not in_place: hierarchy = hierarchy.deep_copy() cur_altlocs = [a for a in hierarchy.altloc_indices() if a] all_altlocs = iotbx.pdb.systematic_chain_ids() new_altlocs = dict([(a, all_altlocs[all_altlocs.index(a) + offset]) for a in cur_altlocs]) if verbose: print '------------------>' print 'Updating altlocs:' for a in cur_altlocs: print '{} -> {}'.format(a, new_altlocs[a]) print '------------------>' for atom_group in hierarchy.atom_groups(): if verbose: print '{} - updating altloc: {} -> {}'.format( Labeller.format(atom_group), atom_group.altloc, new_altlocs[atom_group.altloc]) assert atom_group.altloc != '' atom_group.altloc = new_altlocs[atom_group.altloc] return hierarchy
def create_levels_tab(parameterisation): p = parameterisation f = parameterisation.fitter fm = parameterisation.file_manager chain_ids = [c.id for c in p.blank_master_hierarchy().select(flex.bool(p.atom_mask.tolist()),copy_atoms=True).chains()] tab = {'id' : 'levels', 'short_name' : 'ADP Summary', 'long_name' : 'Level-by-level TLS parameterisation', 'description': 'Parameteristaion composed of {} levels.'.format(len(f.levels)), 'tabs' : [], } # --------------------------------> # Create overview sub-tab # --------------------------------> overview_tab = {'id' : tab['id']+'overview', 'active' : True, 'short_name' : 'Overview', 'long_name' : 'Overview of the parameterised hierarchical ADP model', 'description' : '', 'panels' : [], } tab['tabs'].append(overview_tab) # Split the panels up by chain for c_id in chain_ids: # Split up the chains with divider panels prof_f = fm.get_file('png-combined-profile-template').format(c_id) resd_f = fm.get_file('png-residual-profile-template').format(c_id) panel = {'id' : '<h4>Levels for Chain {}</h4>'.format(c_id), 'width' : 12, 'show' : True, 'table' : None, 'objects' : [{'width':6, 'text': 'TLS-level components', 'path':png2base64src_maybe(prof_f, print_on_missing=DEBUG)}, {'width':6, 'text': 'Residual component', 'path':png2base64src_maybe(resd_f, print_on_missing=DEBUG)}], } overview_tab['panels'].append(panel) # Add images to the overview tab for each TLS level for i_level, (level_num, level_lab, level) in enumerate(f): chain_image = fm.get_file('pml-level-chain-template').format(level_num, c_id) stack_image = fm.get_file('png-tls-profile-template').format(level_num, c_id) aniso_image = fm.get_file('png-tls-anisotropy-template').format(level_num, c_id) panel = {'id' : 'Level {} of {} ({})'.format(level_num, len(f.levels),level_lab), 'width' : 4, 'show' : True, 'table' : None, 'objects' : [{'width':12, 'text':'{} atoms.'.format('X')}, {'width':12, 'path':png2base64src_maybe(chain_image, print_on_missing=DEBUG)}, {'width':12, 'path':png2base64src_maybe(stack_image, print_on_missing=DEBUG)}, {'width':12, 'path':png2base64src_maybe(aniso_image, print_on_missing=DEBUG)}], } overview_tab['panels'].append(panel) # Format residual level chain_image = fm.get_file('pml-residual-chain-template').format(c_id) stack_image = fm.get_file('png-residual-profile-template').format(c_id) aniso_image = fm.get_file('png-residual-anisotropy-template').format(c_id) panel = {'id' : 'Final Level (residual)', 'width' : 4, 'show' : True, 'table' : None, 'objects' : [{'width':12, 'text':'{} atoms.'.format('X')}, {'width':12, 'path':png2base64src_maybe(chain_image, print_on_missing=DEBUG)}, {'width':12, 'path':png2base64src_maybe(stack_image, print_on_missing=DEBUG)}, {'width':12, 'path':png2base64src_maybe(aniso_image, print_on_missing=DEBUG)}], } overview_tab['panels'].append(panel) # --------------------------------> # Create tab for each level # --------------------------------> for i_level, (level_num, level_lab, level) in enumerate(f): # Create dictionary for this tab and add to tab_list level_tab = {'id' : tab['id']+'lvl{}'.format(level_num), 'short_name' : 'Level {}'.format(level_num), 'long_name' : 'Level {} ({})'.format(level_num, level_lab), 'description': 'Level {} of {}. '.format(level_num, len(f.levels))+\ 'Composed of {} groups'.format(level.n_groups()), 'panels' : [], } tab['tabs'].append(level_tab) # Add overview at the top of the tab for c_id in chain_ids: partn_image = fm.get_file('pml-level-partition-template').format(level_num, c_id) chain_image = fm.get_file('pml-level-chain-template').format(level_num, c_id) stack_image = fm.get_file('png-tls-profile-template').format(level_num, c_id) aniso_image = fm.get_file('png-tls-anisotropy-template').format(level_num, c_id) panel = {'id' : 'Chain {}'.format(c_id), 'width' : 12, 'show' : True, 'table' : None, 'objects' : [{'width':12, 'text':'{} atoms.'.format('X')}, {'width':6, 'path':png2base64src_maybe(partn_image, print_on_missing=DEBUG)}, {'width':6, 'path':png2base64src_maybe(chain_image, print_on_missing=DEBUG)}, {'width':6, 'path':png2base64src_maybe(stack_image, print_on_missing=DEBUG)}, {'width':6, 'path':png2base64src_maybe(aniso_image, print_on_missing=DEBUG)}], } level_tab['panels'].append(panel) # Read in the TLS models and amplitudes for this level tls_models = pandas.read_csv(fm.get_file('csv-tls-mdl-template').format(level_num)).set_index(['group','model']).drop('Unnamed: 0', axis=1, errors='ignore') tls_amplitudes = pandas.read_csv(fm.get_file('csv-tls-amp-template').format(level_num)).set_index(['group','model','cpt']).drop('Unnamed: 0', axis=1, errors='ignore') # Extract groups for each level for i_group, (group_num, sel, group_fitter) in enumerate(level): # Extract TLS values for this group tls_vals = [tls_models.loc[(group_num, i_mode)] for i_mode in xrange(p.params.fitting.tls_models_per_tls_group)] # Skip if no TLS values if numpy.abs(tls_vals).sum() == 0.0: continue # Get images and format values scl_image = fm.get_file('pml-level-scaled-template').format(level_num, group_num) adp_image = fm.get_file('pml-level-group-template').format(level_num, group_num) amp_image = fm.get_file('png-tls-amp-dist-template').format(level_num, group_num) tls_mdl_strs = [('Mode {}:<br>' + \ '<samp>\n' + \ 'T: {T11:>9.3f}, {T22:>9.3f}, {T33:>9.3f}, {T12:>9.3f}, {T13:>9.3f}, {T23:>9.3f},<br>' + \ 'L: {L11:>9.3f}, {L22:>9.3f}, {L33:>9.3f}, {L12:>9.3f}, {L13:>9.3f}, {L23:>9.3f},<br>' + \ 'S: {S11:>9.3f}, {S12:>9.3f}, {S13:>9.3f}, {S21:>9.3f}, {S22:>9.3f}, {S23:>9.3f}, {S31:>9.3f}, {S32:>9.3f}, {S33:>9.3f}' + \ '\n</samp>' ).format(i_mode+1, **mode_vals.round(3)).replace(' ',' ') if mode_vals.any() else 'Zero-value TLS values for mode {}'.format(i_mode+1) for i_mode, mode_vals in enumerate(tls_vals)] # Create panel dictionary panel = {'id' : 'Group {} - {}'.format(group_num, p.levels[i_level][i_group]), 'width' : 12, #max(4,12//level.n_groups()), 'table' : None, 'objects': [{'width':12, 'text':'<br>'.join(['Number of atoms: {}'.format(sum(sel))])}, {'width':4, 'text':'Shape of disorder (arbitrary scale)', 'path': png2base64src_maybe(scl_image, print_on_missing=DEBUG)}, {'width':4, 'text':'Average size over all datasets', 'path': png2base64src_maybe(adp_image, print_on_missing=DEBUG)}, {'width':4, 'text':'Amplitude Distribution', 'path': png2base64src_maybe(amp_image, print_on_missing=DEBUG)}] + \ [{'width':12,'text':s} for s in tls_mdl_strs], } level_tab['panels'].append(panel) # Make the first panel open if len(level_tab['panels']) > 0: level_tab['panels'][0]['show'] = True # --------------------------------> # Create tab for residual level # --------------------------------> residual_tab = {'id' : 'lvlres', 'short_name' : 'Residual', 'long_name' : 'Final Level (residual)', 'description': '', 'panels' : [], } tab['tabs'].append(residual_tab) # Get selection for fitted atoms atom_sel = flex.bool(p.atom_mask.tolist()) # Create row for each residue for i_chain, c in enumerate(p.blank_master_hierarchy().select(atom_sel,copy_atoms=True).chains()): panel = {'id' : 'Residual components for chain {}'.format(c.id), 'width' : 12, 'table' : None, 'objects': [], } residual_tab['panels'].append(panel) for i_rg, rg in enumerate(c.residue_groups()): short_label = ShortLabeller.format(rg) long_label = Labeller.format(rg) adp_image = fm.get_file('pml-residual-group-template').format(short_label) panel['objects'].append({'width':4, 'text':long_label, 'path': png2base64src_maybe(adp_image, print_on_missing=DEBUG)}) # Make the first panel open residual_tab['panels'][0]['show'] = True return tab
def transfer_residue_groups_from_other(acceptor_hierarchy, donor_hierarchy, in_place=False, verbose=False): """Transfer atom_groups from donor_hierarchy to matching residue_groups in acceptor_hierarchy, creating new chains and residue groups only where necessary""" if not in_place: acceptor_hierarchy = acceptor_hierarchy.deep_copy() # Sort all residues (by chain then id) for the acceptor hierarchy accept_model = acceptor_hierarchy.only_model() accept_dict = {c.id: {} for c in accept_model.chains()} [ accept_dict.get(rg.parent().id).setdefault(rg.resid(), []).append(rg) for rg in accept_model.residue_groups() ] # Dictionary to link matching chains (allows multiple chain As to be linked uniquely to multiple chain As) link_dict = {} # Residues that don't have a matching partner in the old hierarchy tricky_rgs = [] # Iterate through donor chains for donor_ch in donor_hierarchy.only_model().chains(): # If chain not in hierarchy, simply copy across if accept_dict.get(donor_ch.id, None) is None: if verbose: print 'Transferring whole chain: {}'.format( Labeller.format(donor_ch)) accept_model.append_chain(donor_ch.detached_copy()) continue # Chain present, copy by residue_group for donor_rg in donor_ch.residue_groups(): # Find equivalent residue groups in the other hierarchy accept_rg = accept_dict.get(donor_ch.id).get(donor_rg.resid(), []) if len(accept_rg) > 1: # Should only be one... raise Exception( 'More than one residue group in hierarchy with the same residue_id and chain_id' ) elif len(accept_rg) == 1: accept_rg = accept_rg[0] # Record the links between these chains link_dict.setdefault(donor_rg.parent(), accept_rg.parent()) # Transfer atom groups to this residue_group if verbose: print 'Transferring atom groups: {} > {}'.format( Labeller.format(donor_rg), Labeller.format(accept_rg)) for donor_ag in donor_rg.atom_groups(): accept_rg.append_atom_group(donor_ag.detached_copy()) else: # Have the possibility of multiple chains with the same id, so at the moment, store for later tricky_rgs.append(donor_rg) # Transfer residues that have chain matches, but don't have residue matches in the acceptor structures for donor_rg in tricky_rgs: # Get chain from link_dict accept_ch = link_dict.get(donor_rg.parent().id, None) # If the chain isn't linked: if accept_ch is None: # If there's only one chain with the same ID, choose this one possible_chains = [ c for c in accept_model.chains() if c.id == donor_rg.parent().id ] if len(possible_chains) == 1: accept_ch = possible_chains[0] else: raise Failure( "Don't know how to transfer {} to the output model".format( Labeller.format(donor_rg))) # Simply append to chain if verbose: print 'Transferring residue group: {} > {}'.format( Labeller.format(donor_rg), Labeller.format(accept_ch)) accept_ch.append_residue_group(donor_rg.detached_copy()) return acceptor_hierarchy
def resolve_residue_id_clashes(fixed_hierarchy, moving_hierarchy, in_place=False, verbose=False): """Move residues in mov_hierarchiy to new chains if they have the same resid as a residue in fixed_hierarchy but different resnames""" if (not in_place): moving_hierarchy = moving_hierarchy.deep_copy() # Sort all residues (by chain then id) for the fixed hierarchy - chains > residue_ids > residue_groups (objects) fixed_dict = {c.id: {} for c in fixed_hierarchy.only_model().chains()} [ fixed_dict.get(rg.parent().id).setdefault(rg.resid(), []).append(rg) for rg in fixed_hierarchy.residue_groups() ] # Find the residues with clashing resids residues_to_update = [] for rg_mov in moving_hierarchy.only_model().residue_groups(): # Extract equivalent group for this residue rg_ref = fixed_dict.get(rg_mov.parent().id, {}).get(rg_mov.resid(), []) # Check that we've found only one (rather than silently erroring) if len(rg_ref) == 0: continue elif len(rg_ref) > 1: raise Exception( 'More than one matching residue group in the output hierarchy?' ) rg_ref = rg_ref[0] # Check to see if the residue is the same type as in the reference structure if map(str.strip, rg_ref.unique_resnames()) == map(str.strip, rg_mov.unique_resnames()): # Same residue -- that's fine # TODO allow this if the resnames match OR if it's a protein residue? (allows mutations/reactions?) continue else: # Will need to be added to different chain if verbose: print 'Different residues with same id - changing chains: {} != {}'.format( list(rg_ref.unique_resnames()), list(rg_mov.unique_resnames())) residues_to_update.append(rg_mov) # Nothing to do -- return if not residues_to_update: return moving_hierarchy # New chain to add clashing groups to new_chain = None old_chain_prev = None new_chain_ids = find_unused_chain_ids( hierarchies=[fixed_hierarchy, moving_hierarchy]) # Go through and transfer the residue groups to new chains for rg_mov in residues_to_update: old_chain = rg_mov.parent() # See if there is a residue with this id already present in the new chain if (not new_chain) or (rg_mov.resid() in new_chain.get_residue_ids() ) or (old_chain_prev.id != old_chain.id): new_chain = iotbx.pdb.hierarchy.chain(id=new_chain_ids.pop(0)) old_chain.parent().append_chain(new_chain) if verbose: print '{} - moving to chain {}'.format(Labeller.format(rg_mov), new_chain.id) # Remove from old chain and add to the new chain old_chain.remove_residue_group(rg_mov) new_chain.append_residue_group(rg_mov) # Keep track of old_chain of previous residue old_chain_prev = old_chain return moving_hierarchy
def sanitise_occupancies(hierarchy, fixed_conformers=None, min_occ=0.0, max_occ=1.0, in_place=False, verbose=False): """Sanitise the occupancies of a hierarchy so that the occupancies for a residue sum to 1.0""" assert (min_occ >= 0.0) and (max_occ <= 1.0) if fixed_conformers is None: fixed_conformers = [] if (not in_place): hierarchy = hierarchy.deep_copy() # Iterate through the output structure, and normalise the occupancies if necessary for rg in hierarchy.residue_groups(): # Calculate occupancy of the residue group rg_occ = calculate_residue_group_occupancy(residue_group=rg) # If occupancy in range, continue if min_occ <= rg_occ <= max_occ: continue if verbose: print 'Occupancy of residue {} is {} -- sanitising'.format( Labeller.format(rg), rg_occ) # Extract main-conf and alt-conf ags main_ag, alt_ags = split_main_and_alt_conf_atom_groups(rg) # Sanitise main conf if main_ag is not None: if verbose: print '------------------>' print 'Sanitising main-conf atom group:\n\t{}'.format( Labeller.format(main_ag)) print 'Current occupancy: {}'.format( max(main_ag.atoms().extract_occ())) sanitise_atom_group_occupancies_in_place(main_ag, min_occ=min_occ, max_occ=max_occ) if verbose: print 'New occupancy: {}'.format( max(main_ag.atoms().extract_occ())) # Sanitise alt confs if alt_ags is not None: # Get the groups to change and the groups to keep constant ag_chnge = [ ag for ag in alt_ags if (ag.altloc not in fixed_conformers) ] ag_const = [ ag for ag in alt_ags if (ag.altloc in fixed_conformers) ] # Calculate the total occupancy of the groups occ_chnge = sum([max(ag.atoms().extract_occ()) for ag in ag_chnge]) occ_const = sum([max(ag.atoms().extract_occ()) for ag in ag_const]) if occ_const > max_occ: raise Exception( 'Occupancy of fixed atom groups ({}) is already greater than maximum ({})' .format(occ_const, max_occ)) # Normalise the occupancies of the changing groups if verbose: print '------------------>' print 'Sanitising alt-conf atom groups:' print 'Fixed conformers:\n\t{}'.format('\n\t'.join( [Labeller.format(ag) for ag in ag_const])) print 'Other conformers:\n\t{}'.format('\n\t'.join( [Labeller.format(ag) for ag in ag_chnge])) print 'Total occupancy (fixed): {}'.format(occ_const) print 'Individual occupancies: {}'.format(', '.join( map(str, [max(ag.atoms().extract_occ()) for ag in ag_const]))) print 'Total occupancy (other): {}'.format(occ_chnge) print 'Individual occupancies: {}'.format(', '.join( map(str, [max(ag.atoms().extract_occ()) for ag in ag_chnge]))) sanitise_multiple_atom_group_occupancies_in_place( atom_groups=ag_chnge, min_occ=max(0.0, min_occ - occ_const), max_occ=min(1.0, max_occ - occ_const)) if verbose: print 'New total occupancy (other): {}'.format( sum([max(ag.atoms().extract_occ()) for ag in ag_chnge])) print 'Individual occupancies: {}'.format(', '.join( map(str, [max(ag.atoms().extract_occ()) for ag in ag_chnge]))) return hierarchy
def prune_redundant_alternate_conformations(hierarchy, required_altlocs=[], rmsd_cutoff=0.1, in_place=False, verbose=False): """Remove alternate conformers of residues if residues has conformers of required_altlocs and all conformers are within rmsd_cutoff""" if not in_place: hierarchy = hierarchy.deep_copy() required_altlocs = set(required_altlocs) for residue_group in hierarchy.residue_groups(): # Skip if no conformers if not residue_group.have_conformers(): continue # Get the blank and non-blank altloc atom_groups if residue_group.move_blank_altloc_atom_groups_to_front() != 0: main_ag = residue_group.atom_groups()[0] alt_ags = residue_group.atom_groups()[1:] assert main_ag.altloc == '' assert alt_ags != [] else: main_ag = None alt_ags = residue_group.atom_groups() # Check no misplaced main conf assert '' not in [ag.altloc for ag in alt_ags] # Check if required altlocs are present (return if not) if required_altlocs.difference([ag.altloc for ag in alt_ags]): continue # Check if all pair of conformers are within rmsd cutoff prune = True for i, ag_1 in enumerate(alt_ags): for j, ag_2 in enumerate(alt_ags): if j <= i: continue d = calculate_paired_atom_rmsd(atoms_1=ag_1.atoms(), atoms_2=ag_2.atoms(), sort=True, truncate_to_common_set=False) if verbose: print 'Residue {}, alt {} - alt {}: rmsd {}'.format( Labeller.format(residue_group), i, j, d) if (d is None) or (d > rmsd_cutoff): prune = False break if prune is False: break if prune is False: continue # All rmsds below cutoff - prune! if verbose: print 'Pruning {}: altlocs {} -> [""]'.format( Labeller.format(residue_group), [ag.altloc for ag in alt_ags]) if main_ag: # Merge one alt group with the main atom_group new_main_ag = alt_ags[0].detached_copy() new_main_ag.altloc = '' normalise_occupancies(atoms=new_main_ag.atoms(), max_occ=max(main_ag.atoms().extract_occ())) residue_group.merge_atom_groups(main_ag, new_main_ag) else: # Remove one atom_group and set altloc to '' new_main_ag = alt_ags.pop(0) new_main_ag.altloc = '' normalise_occupancies(atoms=new_main_ag.atoms(), max_occ=sum([ max(ag.atoms().extract_occ()) for ag in [new_main_ag] + alt_ags ])) # Remove all remaining alternate groups [residue_group.remove_atom_group(ag) for ag in alt_ags] assert len(residue_group.atom_groups()) == 1 return hierarchy
def split_conformations(filename, params, log=None): if log is None: log = Log(verbose=True) # Read the pdb header - for writing later... header_contents = get_pdb_header(filename) # Read in and validate the input file ens_obj = strip_pdb_to_input(filename, remove_ter=True) ens_obj.hierarchy.only_model() # Create a new copy of the structures new_ens = ens_obj.hierarchy.deep_copy() # Extract conformers from the structure as set all_confs = set(ens_obj.hierarchy.altloc_indices()) all_confs.discard('') if params.options.mode == 'by_residue_name': sel_resnames = params.options.by_residue_name.resname.split(',') sel_confs = [ ag.altloc for ag in new_ens.atom_groups() if (ag.resname in sel_resnames) ] # List of conformers to output for each structure, and suffixes out_confs = map(sorted, [ all_confs.intersection(sel_confs), all_confs.difference(sel_confs) ]) out_suffs = [ params.options.by_residue_name.selected_name, params.options.by_residue_name.unselected_name ] elif params.options.mode == 'by_conformer': sel_resnames = None sel_confs = None # One structure for each conformer out_confs = [[c] for c in sorted(all_confs)] out_suffs = [''.join(c) for c in out_confs] elif params.options.mode == 'by_conformer_group': sel_resnames = None sel_confs = None # One structure for each set of supplied conformer sets out_confs = [ s.split(',') for s in params.options.by_conformer_group.conformers ] out_suffs = [''.join(c) for c in out_confs] else: raise Exception('Invalid selection for options.mode: {}'.format( params.options.mode)) assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format( str(out_confs), str(out_suffs)) for confs, suffix in zip(out_confs, out_suffs): log('Conformers {} -> {}'.format(str(confs), suffix)) # Create paths from the suffixes out_paths = [ '.'.join([ os.path.splitext(filename)[0], params.output.suffix_prefix, suff, 'pdb' ]) for suff in out_suffs ] log.subheading('Processing {}'.format(filename[-70:])) for this_confs, this_path in zip(out_confs, out_paths): if not this_confs: continue # Select atoms to keep - no altloc, or altloc in selection sel_string = ' or '.join( ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs]) # Extract selection from the hierarchy sel_hiery = new_ens.select( new_ens.atom_selection_cache().selection(sel_string), copy_atoms=True) log.bar(True, False) log('Outputting conformer(s) {} to {}'.format(''.join(this_confs), this_path)) log.bar() log('Keeping ANY atom with conformer id: {}'.format( ' or '.join(['" "'] + this_confs))) log('Selection: \n\t' + sel_string) if params.options.pruning.prune_duplicates: log.bar() log('Pruning redundant conformers') # Remove an alternate conformers than are duplicated after selection prune_redundant_alternate_conformations( hierarchy=sel_hiery, required_altlocs=[a for a in sel_hiery.altloc_indices() if a], rmsd_cutoff=params.options.pruning.rmsd_cutoff, in_place=True, verbose=params.settings.verbose) if params.options.reset_altlocs: log.bar() # Change the altlocs so that they start from "A" if len(this_confs) == 1: conf_hash = {this_confs[0]: ' '} else: conf_hash = dict( zip(this_confs, iotbx.pdb.systematic_chain_ids())) log('Resetting structure altlocs:') for k in sorted(conf_hash.keys()): log('\t{} -> "{}"'.format(k, conf_hash[k])) if params.settings.verbose: log.bar() for ag in sel_hiery.atom_groups(): if ag.altloc in this_confs: if params.settings.verbose: log('{} -> alt {}'.format(Labeller.format(ag), conf_hash[ag.altloc])) ag.altloc = conf_hash[ag.altloc] if params.options.reset_occupancies: log.bar() log('Resetting output occupancies (maximum occupancy of 1.0, etc.)' ) # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one rg_occs = [ calculate_residue_group_occupancy(rg) for rg in residue_groups_with_complete_set_of_conformers(sel_hiery) ] non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0] if non_uni: div_occ = min(non_uni) log('Dividing all occupancies by {}'.format(div_occ)) sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() / div_occ) # Normalise the occupancies of any residue groups with more than unitary occupancy log('Fixing any residues that have greater than unitary occupancy') sanitise_occupancies(hierarchy=sel_hiery, min_occ=0.0, max_occ=1.0, in_place=True, verbose=params.settings.verbose) # Perform checks max_occ = max([ calculate_residue_group_occupancy(rg) for rg in sel_hiery.residue_groups() ]) log('Maximum occupancy of output structue: {}'.format(max_occ)) assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!' assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!' log.bar() log('Writing structure: {}'.format(this_path)) log.bar(False, True) # Write header contents with open(this_path, 'w') as fh: fh.write(header_contents) # Write output file sel_hiery.write_pdb_file(this_path, open_append=True) return out_paths