def __init__(self, pdb_file, mtz_file, out_dir, cif_files=[], tag=None, tls_selections=None, prefix='refined'): self.pdb_file = pdb_file self.mtz_file = mtz_file self.cif_files = cif_files self.out_dir = easy_directory(out_dir) self.tag = tag self.tls_selections = [] self.tls_matrices = None self.initial_pdb = os.path.join(self.out_dir, 'initial.pdb') self.out_template = os.path.join(self.out_dir, prefix) shutil.copy(self.pdb_file, self.initial_pdb) self.log = Log(verbose=True) if not tls_selections: tls_selections = self.determine_tls_groups(pdb_file=pdb_file) # Sanitise the tls selections for tls in tls_selections: if tls.startswith('"') and tls.endswith('"'): tls = tls[1:-1] assert '\"' not in tls, 'TLS selection cannot include \": {}'.format( tls) self.tls_selections.append(tls)
def show_summary(self, log=None): if log is None: log = Log() log.subheading('Available datasets') for d in self.datasets: log.bar() d.show_summary(log=log) log.bar()
def make_local_restraints(params, input_hierarchy, log=None): """Create local restraints for a hierarchy""" if log is None: log = Log(verbose=True) log.subheading('Generating local structure restraints') atom_d_pairs = find_atoms_around_alternate_conformers( hierarchy=input_hierarchy.hierarchy, altlocs=params.local_restraints.altlocs.split(',') if params.local_restraints.altlocs else None, dist_cutoff=params.local_restraints.max_distance) # Filter the 0-distance restraints atom_d_pairs = [(a1, a2, d) for a1, a2, d in atom_d_pairs if d > params.local_restraints.min_distance] log('Created {} local restraints for {} conformers with distance cutoff of {}-{}A' .format( len(atom_d_pairs), params.local_restraints.altlocs if params.local_restraints.altlocs else 'all', params.local_restraints.min_distance, params.local_restraints.max_distance)) log('') if params.output.refmac: restraint_list = [ RefmacFormatter.make_distance_restraint( atm_1=a1, atm_2=a2, value=d, sigma=params.local_restraints.sigma_xyz) for a1, a2, d in atom_d_pairs ] rest_block = RefmacFormatter.format_distance_restraints( restraint_list=restraint_list) with open(params.output.refmac, 'a') as fh: fh.write(rest_block + '\n') if params.settings.verbose: log.subheading('refmac local structural restraints') log(rest_block[:1000] + '...' * (len(rest_block) > 1000)) log('') if params.output.phenix: restraint_list = [ PhenixFormatter.make_distance_restraint( atm_1=a1, atm_2=a2, value=d, sigma=params.local_restraints.sigma_xyz) for a1, a2, d in atom_d_pairs ] rest_block = PhenixFormatter.format_distance_restraints( restraint_list=restraint_list) with open(params.output.phenix, 'a') as fh: fh.write(rest_block + '\n') if params.settings.verbose: log.subheading('phenix duplicate conformer restraints') log(rest_block[:1000] + '...' * (len(rest_block) > 1000)) log('')
def __init__(self, id, type, zenodo_id, log=None): if log is None: log = Log() self.log = log super(ZenodoDataset, self).__init__(id=id, type=type) self.zenodo_id = zenodo_id self.base_url = "https://zenodo.org/record/{}/".format(zenodo_id) self.data_url = self.base_url + "files/data.zip" self.data_dir = None
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) # Report log.heading('Validating input parameters and input files') # Check one or other have been provided assert params.input.pdb, 'No pdb files have been provided' for pdb in params.input.pdb: if not os.path.exists(pdb): raise Sorry('pdb does not exist: {}'.format(pdb)) for pdb in params.input.pdb: log.subheading('Reading pdb: {}'.format(pdb)) obj = strip_pdb_to_input(pdb, remove_ter=True) try: obj.hierarchy.only_model() except: raise Sorry('Input structures may only have one model') # Merge the hierarchies final = standardise_multiconformer_model( hierarchy=obj.hierarchy, pruning_rmsd=params.options.pruning_rmsd, in_place=True, verbose=params.settings.verbose) # Update the atoms numbering final.sort_atoms_in_place() # Write output file filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb' log('Writing output structure to {}'.format(filename)) final.write_pdb_file(file_name=filename, crystal_symmetry=obj.crystal_symmetry()) log.heading('FINISHED') log.heading('Final Parameters') log(master_phil.format(params).as_str().strip()) return
def run(params): # Create log object log = Log(log_file=os.path.abspath(params.output.log_file), verbose=True) # Change paths to absolute paths params.input.pandda_dir = os.path.abspath(params.input.pandda_dir) params.output.export_dir = os.path.abspath(params.output.export_dir) # Must be in the pandda directory (pandda objects use relative paths) os.chdir(params.input.pandda_dir) # Report modifed phil log.heading('Processed parameters') log(master_phil.format(params).as_str()) ############################################################################ log.heading('Identifying folders to export') # Find the dataset directories to be exported if params.input.select_datasets: selected_datasets = [] [ selected_datasets.extend(s.split(',')) for s in params.input.select_datasets ] export_dirs = sorted([ os.path.join(params.input.pandda_dir, 'processed_datasets', p) for p in selected_datasets ]) # Filter by existence of path export_dirs = [p for p in export_dirs if os.path.exists(p)] else: export_dirs = sorted( glob.glob( os.path.join(params.input.pandda_dir, 'processed_datasets', '*'))) assert export_dirs, 'No Export Directories Found' # Report log('Exporting:\n\t' + '\n\t'.join(export_dirs)) # Create output directory if not os.path.exists(params.output.export_dir): os.mkdir(params.output.export_dir) # Merge the fitted structures for dir in export_dirs: process_and_export_folder(dir=dir, params=params, log=log) log.heading('FINISHED')
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) log.heading('Validating input parameters') assert params.input.pdb, 'No PDB files given' log.heading('Splitting multi-state structures') # Iterate through the input structures and extract the conformation for pdb in params.input.pdb: split_conformations(filename=pdb, params=params, log=log) log.heading('FINISHED')
def export_folder(dir, params, log=Log()): """Export a subset of a folders contents""" # Extract folder name and report dir_name = os.path.basename(dir) # Get the file list for this folder file_list = get_file_list(dir=dir) # Create output dir exp_dir = os.path.join(params.output.export_dir, params.output.dir_prefix + dir_name) if not os.path.exists(exp_dir): os.mkdir(exp_dir) # Report log.bar() log('Exporting \n\tfrom {!s} \n\t to {!s}'.format(dir, exp_dir)) log.bar() log('Exporting files:') for f in file_list: log('\t' + os.path.relpath(f, start=dir)) log.bar() # Export files for proc_file in file_list: # Check that the file exists if not os.path.exists(proc_file): log('FILE DOES NOT EXIST: {!s}'.format(proc_file)) continue # Exported file path export_file = os.path.join( exp_dir, params.output.file_prefix + os.path.basename(proc_file)) if params.settings.verbose: log('Copying {!s}\n to {!s}'.format(proc_file, export_file)) # Check to see if file already exists and delete if overwrite if os.path.exists(export_file): if params.settings.overwrite: os.remove(export_file) else: raise Exception( 'File already exists: {}. Need to set overwrite=True to continue.' .format(export_file)) shutil.copy(proc_file, export_file) return exp_dir
def standardise_multiconformer_model(hierarchy, pruning_rmsd=0.1, in_place=False, verbose=False, log=None): """Standardise hierarchies by expanding alternate model conformations, and then trimming alternate conformations where possible""" if log is None: log = Log(verbose=True) # Alter the original files? if not in_place: # Copy the hierarchies hierarchy = hierarchy.deep_copy() # Sort the atoms hierarchy.sort_atoms_in_place() log.heading('Preparing to standardise structure') log.subheading( 'Explicitly expanding model to all conformations of the crystal') expand_alternate_conformations(hierarchy=hierarchy, in_place=True, verbose=verbose) log.subheading( 'Pruning unneccessary multi-conformer residues in the expanded structure' ) prune_redundant_alternate_conformations( hierarchy=hierarchy, required_altlocs=hierarchy.altloc_indices(), rmsd_cutoff=pruning_rmsd, in_place=True, verbose=verbose) return hierarchy
def make_link_records(params, input_hierarchy, link_file, log=None): """Create link records to make a continuous peptide chain""" if log is None: log = Log(verbose=True) log.subheading('Checking the continuity of the protein backbone') links, warnings = generate_set_of_alternate_conformer_peptide_links( hierarchy=input_hierarchy.hierarchy) if warnings: log.bar() log('WARNINGS:') log.bar() for w in warnings: log(w) log.bar() log('') if (not links) and (not warnings): log('No breaks in the backbone - hooray! (nothing needs to be done here)' ) return elif (not links): log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) log("!!! >>> There are breaks in the backbone but I'm not able to do anything to fix them <<< !!!" ) log("!!! >>> You'll need to check them manually to see if these are going to be a problem... <<< !!!" ) log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) return link_block = '\n'.join([ format_link_record(atom_1=a1, atom_2=a2, chain_id_1=c1, chain_id_2=c2, link_type=lt) for a1, a2, c1, c2, lt in links ]) log('Need to apply {} links to make the backbone continuous:'.format( len(links))) log('') log(link_block) log('') log('Writing hierarchy with new link records to {}'.format(link_file)) log('(This file can only be used for refinement with REFMAC)') log('') log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) log('!!! ALTHOUGH THE FILE WITH BACKBONE LINKS HAS BEEN OUTPUT, IT SHOULD BE USED WITH CAUTION !!!' ) log('!!! THE CONNECTION OF ALTERNATE CONFORMATIONS OF THE BACKBONE IS GENERALLY "INCORRECT" !!!' ) log('!!! THERE SHOULD BE A VERY GOOD REASON FOR THESE RESTRAINTS TO BE USED !!!' ) log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) input_hierarchy.hierarchy.write_pdb_file( file_name=link_file, crystal_symmetry=input_hierarchy.crystal_symmetry(), link_records=link_block)
class Program(object): """Class meant to provide basic functionality for programs and pipelines""" _NAME = None _TEXT = None _VERSION = None _allowed_statuses = ['running', 'done', 'errored'] log = Log() file_manager = None def write_running_parameters_to_log(self, params): self.log.heading('Processed parameters') self.log(self.master_phil.format(python_object=params).as_str()) self.log.heading('Parameters different to the defaults') self.log( self.master_phil.fetch_diff(source=self.master_phil.format( python_object=params)).as_str()) def check_for_matplotlib(self, backend=None, interactive=False): """Check to see whether we can load matplotlib""" self.log('Checking for matplotlib:') try: import matplotlib matplotlib.interactive(interactive) from matplotlib import pyplot if backend: pyplot.switch_backend(backend) current_backend = pyplot.get_backend() assert current_backend == backend, 'Backend loaded ({}) is not the one requested ({})'.format( current_backend, backend) assert pyplot.isinteractive( ) is interactive, 'Interactive setting is incorrect ({} is not {})'.format( pyplot.isinteractive(), interactive) pyplot.style.use('ggplot') self.log('pyplot loaded successfully. Using backend "{!s}"'.format( current_backend)) return True except: self.log('===================================>>>') self.log( '>> COULD NOT IMPORT MATPLOTLIB. WILL NOT BE ABLE TO GENERATE GRAPHS.' ) self.log('===================================>>>') return False def initialise_file_manager(self, rootdir): self.file_manager = FileManager(rootdir=rootdir) return self.file_manager def update_status(self, status): """Set log files to indicate the status of the program""" assert status in self._allowed_statuses # Delete any that may exist existing_files = [ self.file_manager.get_file('status').format(f) for f in self._allowed_statuses ] [os.remove(f) for f in existing_files if os.path.exists(f)] # Create the new status file with open(self.file_manager.get_file('status').format(status), 'w') as fh: fh.write('') def pickle(self, pickle_file, pickle_object, overwrite=True): """Takes an object and pickles it""" if os.path.exists(pickle_file) and not overwrite: self.log('NOT PICKLING: {!s}'.format( os.path.relpath(pickle_file, start=self.out_dir))) else: self.log('Pickling Object: {!s}'.format( os.path.relpath(pickle_file, start=self.out_dir))) easy_pickle.dump(pickle_file, pickle_object) def unpickle(self, pickle_file): """Takes an object and unpickles it""" self.log('Unpickling File: {!s}'.format( os.path.relpath(pickle_file, start=self.out_dir))) return easy_pickle.load(pickle_file)
def make_occupancy_constraints(params, input_hierarchy, log=None): """Create occupancy groups for a hierarchy""" if log is None: log = Log(verbose=True) log.subheading('Generating occupancy-constrained groups') # Ligand resname identifiers resnames = params.occupancy.resname.split(',') if params.settings.verbose: log('Looking for ligands with resname {!s}'.format( ' or '.join(resnames))) log('') # Make occupancy groups occupancy_groups = overlapping_occupancy_groups( hierarchy=input_hierarchy.hierarchy, resnames=resnames, group_dist=params.occupancy.group_dist, overlap_dist=params.occupancy.overlap_dist, complete_groups=params.occupancy.complete_groups, exclude_altlocs=params.occupancy.exclude_altlocs.split(',') if params.occupancy.exclude_altlocs else [], verbose=params.settings.verbose) # Record whether the occupancy groups are complete (occs sum to 1) if params.occupancy.complete_groups: occupancy_complete = [True] * len(occupancy_groups) else: occupancy_complete = [False] * len(occupancy_groups) if not occupancy_groups: log('No matching residues were found (no occupancy constraints created)' ) return log.bar() log('') log('Created {} occupancy groups for overlapping conformers'.format( len(occupancy_groups))) log('') # Ref-make the default occupancy groups? if params.occupancy.simple_groups: log('simple_groups=={}: Remaking default occupancy restraints for residues' .format(params.occupancy.simple_groups)) if params.settings.verbose: log('') simple_groups = simple_occupancy_groups( hierarchy=input_hierarchy.hierarchy, verbose=params.settings.verbose) num_alts = len( [a for a in input_hierarchy.hierarchy.altloc_indices() if a != '']) occupancy_complete += [ True if len(g) == num_alts else False for g in simple_groups ] occupancy_groups += simple_groups if params.settings.verbose: log('') log('Increased number of occupancy groups to {}'.format( len(occupancy_groups))) log('') if params.output.refmac: restraint_list = RefmacFormatter.make_occupancy_restraints( list_of_lists_of_groups=occupancy_groups, group_completeness=occupancy_complete) rest_block = RefmacFormatter.format_occupancy_restraints( restraint_list=restraint_list) with open(params.output.refmac, 'a') as fh: fh.write(rest_block + '\n') if params.settings.verbose: log.subheading('refmac occupancy restraints') log(rest_block[:1000] + '...' * (len(rest_block) > 1000)) log('') if params.output.phenix: restraint_list = PhenixFormatter.make_occupancy_restraints( list_of_lists_of_groups=occupancy_groups, group_completeness=occupancy_complete) rest_block = PhenixFormatter.format_occupancy_restraints( restraint_list=restraint_list) with open(params.output.phenix, 'a') as fh: fh.write(rest_block + '\n') if params.settings.verbose: log.subheading('phenix occupancy restraints') log(rest_block[:1000] + '...' * (len(rest_block) > 1000)) log('')
def make_b_factor_restraints(params, input_hierarchy, log=None): if log is None: log = Log(verbose=True) pass
def run(self): """Process the dataset""" dataset, dataset_map, grid, map_analyser, args, verbose = self.data # TODO Hardcoded check - to be removed? TODO assert dataset_map.is_sparse() # ============================================================================> # Prepare output objects # ============================================================================> log_strs = [] log_file = dataset.file_manager.get_file('dataset_log') log = Log(log_file=log_file, verbose=False, silent=True) # ============================================================================> # Build new blob search object # ============================================================================> blob_finder = PanddaZMapAnalyser(params=args.params.z_map_analysis, grid=grid, log=log) print('Writing log for dataset {!s} to ...{}'.format( dataset.tag, log_file[log_file.index('processed'):])) # ============================================================================> # Extract the global mask object from the grid # ============================================================================> dset_total_temp = grid.global_mask().total_mask_binary().copy() # ============================================================================> # Generate symmetry masks for this dataset # ============================================================================> log.bar() log('Masking symetry contacts from Z-map.') # Generate symmetry contacts for this dataset and align to reference frame dataset_sym_copies = dataset.model.crystal_contacts( distance_cutoff=args.params.masks.outer_mask + 5, combine_copies=True) dataset_sym_copies.atoms().set_xyz( dataset.model.alignment.nat2ref( dataset_sym_copies.atoms().extract_xyz())) # Only need to write if writing reference frame maps if args.output.developer.write_reference_frame_maps: dataset_sym_copies.write_pdb_file( dataset.file_manager.get_file('symmetry_copies')) # Extract protein atoms from the symmetry copies dataset_sym_sites_cart = non_water( dataset_sym_copies).atoms().extract_xyz() # Generate symmetry contacts grid mask dataset_mask = GridMask(parent=grid, sites_cart=dataset_sym_sites_cart, max_dist=args.params.masks.outer_mask, min_dist=args.params.masks.inner_mask_symmetry) # Combine with the total mask to generate custom mask for this dataset dset_total_temp.put(dataset_mask.inner_mask_indices(), 0) dset_total_idxs = numpy.where(dset_total_temp)[0] log('After masking with symmetry contacts: {} points for Z-map analysis' .format(len(dset_total_idxs))) # Write map of grid + symmetry mask if args.output.developer.write_reference_frame_grid_masks: grid.write_indices_as_map( indices=dset_total_idxs, f_name=dataset.file_manager.get_file('grid_mask'), origin_shift=True) # ============================================================================> # Generate custom masks for this dataset # ============================================================================> if args.params.z_map_analysis.masks.selection_string is not None: log.bar() log('Applying custom mask to the Z-map: "{}"'.format( args.params.z_map_analysis.masks.selection_string)) cache = dataset.model.hierarchy.atom_selection_cache() custom_mask_selection = cache.selection( args.params.z_map_analysis.masks.selection_string) custom_mask_sites = dataset.model.hierarchy.select( custom_mask_selection).atoms().extract_xyz() log('Masking with {} atoms'.format(len(custom_mask_sites))) # Generate custom grid mask dataset_mask = GridMask( parent=grid, sites_cart=custom_mask_sites, max_dist=args.params.z_map_analysis.masks.outer_mask, min_dist=args.params.z_map_analysis.masks.inner_mask) # Combine with the total mask to generate custom mask for this dataset dset_total_temp *= dataset_mask.total_mask_binary() dset_total_idxs = numpy.where(dset_total_temp)[0] log('After masking with custom mask: {} points for Z-map analysis'. format(len(dset_total_idxs))) # Write out mask grid.write_indices_as_map( indices=dset_total_idxs, f_name=dataset.file_manager.get_file('z_map_mask'), origin_shift=True) # ============================================================================> ##### # CALCULATE Z-MAPS AND LOOK FOR LARGE BLOBS ##### # ============================================================================> # Check maps and that all maps are sparse # ============================================================================> assert dataset_map.data is not None, 'Something has gone wrong - this dataset has no loaded map' assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.mean_map.is_sparse() assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.medn_map.is_sparse() assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.stds_map.is_sparse() assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.sadj_map.is_sparse() # ============================================================================> # CALCULATE MEAN-DIFF MAPS # ============================================================================> mean_diff_map = map_analyser.calculate_z_map(map=dataset_map, method='none') # # ============================================================================> # # NAIVE Z-MAP - NOT USING UNCERTAINTY ESTIMATION OR ADJUSTED STDS # # ============================================================================> # z_map_naive = map_analyser.calculate_z_map(map=dataset_map, method='naive') # z_map_naive_normalised = z_map_naive.normalised_copy() # ============================================================================> # UNCERTAINTY Z-MAP - NOT USING ADJUSTED STDS # ============================================================================> z_map_uncty = map_analyser.calculate_z_map( map=dataset_map, uncertainty=dataset_map.meta.map_uncertainty, method='uncertainty') z_map_uncty_normalised = z_map_uncty.normalised_copy() # ============================================================================> # ADJUSTED+UNCERTAINTY Z-MAP # ============================================================================> z_map_compl = map_analyser.calculate_z_map( map=dataset_map, uncertainty=dataset_map.meta.map_uncertainty, method='adjusted+uncertainty') z_map_compl_normalised = z_map_compl.normalised_copy() # ============================================================================> # SELECT WHICH MAP TO DO THE BLOB SEARCHING ON # ============================================================================> # if args.params.statistical_maps.z_map_type == 'naive': # z_map = z_map_naive_normalised # z_map_stats = basic_statistics(flex.double(z_map_naive.data)) if args.params.statistical_maps.z_map_type == 'uncertainty': z_map = z_map_uncty_normalised z_map_stats = basic_statistics(flex.double(z_map_uncty.data)) elif args.params.statistical_maps.z_map_type == 'adjusted+uncertainty': z_map = z_map_compl_normalised z_map_stats = basic_statistics(flex.double(z_map_compl.data)) else: raise Exception('Invalid Z-map type') # ============================================================================> # RECORD Z-MAP FOR STATISTICS # ============================================================================> # Calculate statistics of z-maps dataset_map.meta.z_mean = z_map_stats.mean dataset_map.meta.z_stdv = z_map_stats.bias_corrected_standard_deviation dataset_map.meta.z_skew = z_map_stats.skew dataset_map.meta.z_kurt = z_map_stats.kurtosis # ============================================================================> z_map.meta.type = 'z-map' # ============================================================================> # ============================================================================> ##### # WRITE ALL MAP DISTRIBUTIONS (THESE DON'T USE MUCH SPACE) ##### # ============================================================================> # Sampled Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('s_map_png'), plot_vals=dataset_map.get_map_data(sparse=True)) # Mean-Difference analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('d_mean_map_png'), plot_vals=mean_diff_map.get_map_data(sparse=True)) # # Naive Z-Map # analyse_graphs.map_value_distribution(f_name = dataset.file_manager.get_file('z_map_naive_png'), # plot_vals = z_map_naive.get_map_data(sparse=True), # plot_normal = True) # # Normalised Naive Z-Map # analyse_graphs.map_value_distribution(f_name = dataset.file_manager.get_file('z_map_naive_normalised_png'), # plot_vals = z_map_naive_normalised.get_map_data(sparse=True), # plot_normal = True) # Uncertainty Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('z_map_uncertainty_png'), plot_vals=z_map_uncty.get_map_data(sparse=True), plot_normal=True) # Normalised Uncertainty Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file( 'z_map_uncertainty_normalised_png'), plot_vals=z_map_uncty_normalised.get_map_data(sparse=True), plot_normal=True) # Corrected Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('z_map_corrected_png'), plot_vals=z_map_compl.get_map_data(sparse=True), plot_normal=True) # Normalised Corrected Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file( 'z_map_corrected_normalised_png'), plot_vals=z_map_compl_normalised.get_map_data(sparse=True), plot_normal=True) # Plot Q-Q Plot of Corrected Z-Map to see how normal it is analyse_graphs.qq_plot_against_normal( f_name=dataset.file_manager.get_file('z_map_qq_plot_png'), plot_vals=z_map_compl_normalised.get_map_data(sparse=True)) # ============================================================================> ##### # LOOK FOR CLUSTERS OF LARGE Z-SCORES ##### # ============================================================================> # Contour the grid at a particular Z-Value # ============================================================================> num_clusters, z_clusters = blob_finder.cluster_high_z_values( z_map_data=z_map.get_map_data(sparse=False), point_mask_idx=dset_total_idxs) # ============================================================================> # Too many points to cluster -- probably a bad dataset # ============================================================================> if num_clusters == -1: # This dataset is too noisy to analyse - flag! log_strs.append( 'Z-Map too noisy to analyse -- not sure what has gone wrong here...' ) return dataset, dataset_map.meta, log_strs # ============================================================================> ##### # FILTER/SELECT CLUSTERS OF Z-SCORES ##### # ============================================================================> # Filter the clusters by size and peak height # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.filter_z_clusters_1( z_clusters=z_clusters) blob_finder.validate_clusters(z_clusters) if num_clusters == 0: log_strs.append('===> Minimum cluster peak/size not reached.') # ============================================================================> # Filter the clusters by distance from protein # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.filter_z_clusters_2( z_clusters=z_clusters, dataset=dataset) blob_finder.validate_clusters(z_clusters) if num_clusters == 0: log_strs.append('===> Clusters too far from protein.') # ============================================================================> # Group Nearby Clusters Together # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.group_clusters( z_clusters=z_clusters) blob_finder.validate_clusters(z_clusters) # ============================================================================> # Filter the clusters by symmetry equivalence # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.filter_z_clusters_3( z_clusters=z_clusters, dataset=dataset) blob_finder.validate_clusters(z_clusters) # ============================================================================> ##### # WRITE MAPS ##### # ============================================================================> # write dataset maps in the reference frame # ============================================================================> if args.output.developer.write_reference_frame_maps: dataset_map.to_file( filename=dataset.file_manager.get_file('sampled_map'), space_group=grid.space_group()) mean_diff_map.to_file( filename=dataset.file_manager.get_file('mean_diff_map'), space_group=grid.space_group()) z_map.to_file(filename=dataset.file_manager.get_file('z_map'), space_group=grid.space_group()) # ============================================================================> # Write out mask of the high z-values # ============================================================================> if args.output.developer.write_reference_frame_grid_masks: # Write map of where the blobs are (high-Z mask) highz_points = [] [highz_points.extend(list(x[0])) for x in z_clusters] highz_points = [map(int, v) for v in highz_points] highz_indices = map(grid.indexer(), list(highz_points)) grid.write_indices_as_map( indices=highz_indices, f_name=dataset.file_manager.get_file('high_z_mask'), origin_shift=True) # ============================================================================> # Write different Z-Maps? (Probably only needed for testing) # ============================================================================> if args.output.developer.write_reference_frame_all_z_map_types: # z_map_naive.to_file(filename=dataset.file_manager.get_file('z_map_naive'), space_group=grid.space_group()) # z_map_naive_normalised.to_file(filename=dataset.file_manager.get_file('z_map_naive_normalised'), space_group=grid.space_group()) z_map_uncty.to_file( filename=dataset.file_manager.get_file('z_map_uncertainty'), space_group=grid.space_group()) z_map_uncty_normalised.to_file( filename=dataset.file_manager.get_file( 'z_map_uncertainty_normalised'), space_group=grid.space_group()) z_map_compl.to_file( filename=dataset.file_manager.get_file('z_map_corrected'), space_group=grid.space_group()) z_map_compl_normalised.to_file( filename=dataset.file_manager.get_file( 'z_map_corrected_normalised'), space_group=grid.space_group()) # ============================================================================> # Skip to next dataset if no clusters found # ============================================================================> if num_clusters > 0: log_strs.append('===> {!s} Cluster(s) found.'.format(num_clusters)) else: log_strs.append('===> No Clusters found.') return (dataset, dataset_map.meta, log_strs) assert num_clusters > 0, 'NUMBER OF CLUSTERS AFTER FILTERING == 0!' # ============================================================================> # Extract the map data in non-sparse format # ============================================================================> dset_map_data = dataset_map.get_map_data(sparse=False) avrg_map_data = map_analyser.average_map().get_map_data(sparse=False) # ============================================================================> # Process the identified features # ============================================================================> for event_idx, (event_points, event_values) in enumerate(z_clusters): # Number events from 1 event_num = event_idx + 1 # Create a unique identifier for this event event_key = (dataset.tag, event_num) # ============================================================================> # Create a point cluster object # ============================================================================> point_cluster = PointCluster(id=event_key, points=event_points, values=event_values) # ============================================================================> # Estimate the background correction of the detected feature # ============================================================================> # Extract sites for this cluster and estimate the background correction for the event log_strs.append('----------------------------------->>>') log_strs.append( 'Estimating Event {!s} Background Correction'.format( event_num)) # Generate custom grid mask for this dataset event_mask = GridMask(parent=grid, sites_cart=grid.grid2cart( point_cluster.points, origin_shift=True), max_dist=2.0, min_dist=0.0) log_strs.append( '=> Event sites ({!s} points) expanded to {!s} points'.format( len(point_cluster.points), len(event_mask.outer_mask_indices()))) # Select masks to define regions for bdc calculation exp_event_idxs = flex.size_t(event_mask.outer_mask_indices()) reference_idxs = flex.size_t( grid.global_mask().inner_mask_indices()) # ============================================================================> # Generate BDC-estimation curve and estimate BDC # ============================================================================> event_remains, event_corrs, global_corrs = calculate_varying_bdc_correlations( ref_map_data=avrg_map_data, query_map_data=dset_map_data, feature_idxs=exp_event_idxs, reference_idxs=reference_idxs, min_remain=1.0 - args.params.background_correction.max_bdc, max_remain=1.0 - args.params.background_correction.min_bdc, bdc_increment=args.params.background_correction.increment, verbose=verbose) event_remain_est = calculate_maximum_series_discrepancy( labels=event_remains, series_1=global_corrs, series_2=event_corrs) analyse_graphs.write_occupancy_graph( f_name=dataset.file_manager.get_file('bdc_est_png').format( event_num), x_values=event_remains, global_values=global_corrs, local_values=event_corrs) log_strs.append( '=> Event Background Correction estimated as {!s}'.format( 1 - event_remain_est)) # Reporting (log is normally silenced) blob_finder.log('Min-Max: {} {}'.format( 1.0 - args.params.background_correction.max_bdc, 1.0 - args.params.background_correction.min_bdc)) blob_finder.log('Event number: {}'.format(event_num)) blob_finder.log('Event Remains: {}'.format(','.join( map(str, event_remains)))) blob_finder.log('Event Corrs: {}'.format(','.join( map(str, event_corrs)))) blob_finder.log('Global Corrs: {}'.format(','.join( map(str, global_corrs)))) # Apply multiplier if provided blob_finder.log('Applying multiplier to output 1-BDC: {}'.format( args.params.background_correction.output_multiplier)) event_remain_est = min( event_remain_est * args.params.background_correction.output_multiplier, 1.0 - args.params.background_correction.min_bdc) # ============================================================================> # Calculate the map correlations at the selected BDC # ============================================================================> event_map_data = calculate_bdc_subtracted_map( ref_map_data=avrg_map_data, query_map_data=dset_map_data, bdc=1.0 - event_remain_est) global_corr = numpy.corrcoef( event_map_data.select(reference_idxs), avrg_map_data.select(reference_idxs))[0, 1] local_corr = numpy.corrcoef( event_map_data.select(exp_event_idxs), avrg_map_data.select(exp_event_idxs))[0, 1] # ============================================================================> # Write out EVENT map (in the reference frame) and grid masks # ============================================================================> if args.output.developer.write_reference_frame_maps: event_map = dataset_map.new_from_template(event_map_data, sparse=False) event_map.to_file( filename=dataset.file_manager.get_file('event_map').format( event_num, event_remain_est), space_group=grid.space_group()) if args.output.developer.write_reference_frame_grid_masks: grid.write_indices_as_map( indices=event_mask.outer_mask_indices(), f_name=dataset.file_manager.get_file('grid_mask').replace( '.ccp4', '') + '-event-mask-{}.ccp4'.format(event_num)) # ============================================================================> # Find the nearest atom to the event # ============================================================================> atm = find_nearest_atoms(atoms=list( protein(dataset.model.hierarchy).atoms_with_labels()), query=dataset.model.alignment.ref2nat( grid.grid2cart(sites_grid=[ map(int, point_cluster.centroid) ], origin_shift=True)))[0] log_strs.append( '=> Nearest Residue to event: Chain {}, Residue {} {}'.format( atm.chain_id, atm.resname, atm.resid())) # ============================================================================> # Create an event object # ============================================================================> event_obj = Event(id=point_cluster.id, cluster=point_cluster) event_obj.info.estimated_pseudo_occupancy = event_remain_est event_obj.info.estimated_bdc = 1.0 - event_remain_est event_obj.info.global_correlation = global_corr event_obj.info.local_correlation = local_corr # ============================================================================> # Append to dataset handler # ============================================================================> dataset.events.append(event_obj) # ============================================================================> # Write out pymol script to load all of the maps easily # ============================================================================> pml = PythonScript() pml.set_normalise_maps(False) # Load Structures name = pml.load_pdb( f_name=dataset.file_manager.get_file('aligned_model')) pml.repr_as(obj=name, style='sticks') name = pml.load_pdb( f_name=dataset.file_manager.get_file('symmetry_copies')) pml.repr_hide(obj=name) # Load Sampled Map name = pml.load_map( f_name=dataset.file_manager.get_file('sampled_map')) mesh = pml.make_mesh(obj=name, contour_level=1.0, colour='blue') # Load Z-maps name = pml.load_map(f_name=dataset.file_manager.get_file('z_map')) mesh = pml.make_mesh(obj=name, mesh_suffix='.plus', contour_level=3.0, colour='green') mesh = pml.make_mesh(obj=name, mesh_suffix='.mins', contour_level=-3.0, colour='red') # Load Event maps for f in sorted( glob.glob( dataset.file_manager.get_file('event_map').format( '*', '*'))): name = pml.load_map(f_name=f) mesh = pml.make_mesh(obj=name, contour_level=float(f.split('_')[-2]), colour='hotpink') # Load Miscellaneous maps (e.g. masks) for f in sorted( glob.glob( os.path.join(dataset.file_manager.get_dir('root'), '*mask*.ccp4'))): name = pml.load_map(f_name=f) mesh = pml.make_mesh(obj=name, contour_level=0.0, colour='grey') pml.write_script(f_name=dataset.file_manager.get_file('pymol_script'), overwrite=True) return (dataset, dataset_map.meta, log_strs)
def run(params): ###################################################################### # Validate input ###################################################################### assert params.input.pdb, 'No PDB File Provided' if params.modes.all: params.modes.peptide_bond_links = True params.modes.duplicated_atom_restraints = True params.modes.local_structure_restraints = True params.modes.occupancy_groups = True params.modes.b_factor_restraints = True if params.modes.peptide_bond_links: link_file = os.path.splitext( params.input.pdb)[0] + params.peptide_bonds.suffix if params.modes.duplicated_atom_restraints: pass if params.modes.local_structure_restraints: pass if params.modes.occupancy_groups: pass if params.modes.b_factor_restraints: pass ###################################################################### # Prepare output and input ###################################################################### if params.output.phenix and os.path.exists(params.output.phenix): if params.settings.overwrite: os.remove(params.output.phenix) else: raise Exception('File already exists: {}'.format( params.output.phenix)) if params.output.refmac and os.path.exists(params.output.refmac): if params.settings.overwrite: os.remove(params.output.refmac) else: raise Exception('File already exists: {}'.format( params.output.refmac)) # Open log file if params.output.log: log = Log(log_file=params.output.log, verbose=params.settings.verbose) else: log = Log(verbose=params.settings.overwrite) # Read input files pdb_obj = iotbx.pdb.hierarchy.input(params.input.pdb) pdb_obj.hierarchy.sort_atoms_in_place() ###################################################################### # Generate restraints ###################################################################### if params.modes.peptide_bond_links: make_link_records(params=params, input_hierarchy=pdb_obj, link_file=link_file, log=log) if params.modes.duplicated_atom_restraints: make_duplication_restraints(params=params, input_hierarchy=pdb_obj, log=log) if params.modes.local_structure_restraints: make_local_restraints(params=params, input_hierarchy=pdb_obj, log=log) if params.modes.occupancy_groups: make_occupancy_constraints(params=params, input_hierarchy=pdb_obj, log=log) if params.modes.b_factor_restraints: make_b_factor_restraints(params=params, input_hierarchy=pdb_obj, log=log)
class BFactorRefinementFactory(object): _refine = refine_phenix def __init__(self, pdb_file, mtz_file, out_dir, cif_files=[], tag=None, tls_selections=None, prefix='refined'): self.pdb_file = pdb_file self.mtz_file = mtz_file self.cif_files = cif_files self.out_dir = easy_directory(out_dir) self.tag = tag self.tls_selections = [] self.tls_matrices = None self.initial_pdb = os.path.join(self.out_dir, 'initial.pdb') self.out_template = os.path.join(self.out_dir, prefix) shutil.copy(self.pdb_file, self.initial_pdb) self.log = Log(verbose=True) if not tls_selections: tls_selections = self.determine_tls_groups(pdb_file=pdb_file) # Sanitise the tls selections for tls in tls_selections: if tls.startswith('"') and tls.endswith('"'): tls = tls[1:-1] assert '\"' not in tls, 'TLS selection cannot include \": {}'.format( tls) self.tls_selections.append(tls) def determine_tls_groups(self, pdb_file): self.log.subheading('Determining TLS groups for: {}'.format(pdb_file)) tls_selections = phenix_find_tls_groups(pdb_file) self.log.subheading('Identified TLS Selections:') for s in tls_selections: self.log(s) return tls_selections # def initial_tls_parameters(self): # """Characterise TLS with phenix.tls - legacy function""" # # self.log.subheading('Fitting TLS Matrices to selections') # self.log('writing to output file: {}'.format(self.tls_initial_pdb)) # # cmd = CommandManager('phenix.tls') # cmd.add_command_line_arguments(self.pdb_file) # cmd.add_command_line_arguments(self.cif_files) # cmd.add_command_line_arguments('extract_tls=True') # cmd.add_command_line_arguments([r'selection="{}"'.format(s) for s in self.tls_selections if s is not None]) # cmd.add_command_line_arguments('output_file_name={}'.format(self.tls_initial_pdb)) # # cmd.print_settings() # ret_code = cmd.run() # cmd.write_output(self.tls_initial_pdb.replace('.pdb', '.log')) # # if ret_code != 0: # self.log(cmd.output) # self.log(cmd.error) # raise Exception('Failed to determine TLS parameters: {}'.format(' '.join(cmd.program))) # # return self.tls_initial_pdb, self.extract_tls_from_pdb(self.tls_initial_pdb) def refine_b_factors(self, mode='tls', suffix=None): """Refine the model with phenix.refine, including the TLS model""" assert mode in ['isotropic', 'tls', 'anisotropic'] if suffix is None: suffix = mode strategy = "individual_sites+individual_adp+occupancies" if mode == 'isotropic': strategy += '' params = [r'convert_to_isotropic=True'] elif mode == 'tls': strategy += '+tls' params = [ r'refinement.refine.adp.tls="{}"'.format(t) for t in self.tls_selections ] else: strategy += '' params = [ r'refinement.refine.adp.individual.anisotropic="{}"'.format( ' or '.join(['(' + t + ')' for t in self.tls_selections])) ] self.log.subheading('Refining B-factor model with {}'.format( self._refine.program)) obj = self._refine(pdb_file=self.pdb_file, mtz_file=self.mtz_file, cif_file=self.cif_files, out_prefix=self.out_template + '-' + suffix, strategy=strategy, n_cycles=3, manual_args=params) return obj.out_pdb_file, obj.out_mtz_file @staticmethod def extract_tls_from_pdb(pdb_file): ih = iotbx.pdb.hierarchy.input(pdb_file) tls_params = ih.input.extract_tls_params(ih.hierarchy) return tls_params def show_tls_params(self, tls_params=None, pdb_file=None): if pdb_file: tls_params = self.extract_tls_from_pdb(pdb_file=pdb_file) T = tls_params.tls_params[0].t L = tls_params.tls_params[0].l S = tls_params.tls_params[0].s o = "" for tls in tls_params.tls_params: o += '\n' o += 'selection: {}\n'.format(tls.selection_string) o += 'origin: {}\n'.format(tls.origin) o += 'T: ' + str(tls.t) + '\n' o += 'L: ' + str(tls.l) + '\n' o += 'S: ' + str(tls.s) + '\n' o += '\n' self.log(o)
def run(params): # Identify any existing output directories current_dirs = sorted(glob.glob(params.output.dir_prefix + '*')) if not current_dirs: next_int = 1 else: current_nums = [ s.replace(params.output.dir_prefix, '') for s in current_dirs ] next_int = sorted(map(int, current_nums))[-1] + 1 # Create output directory name from int out_dir = params.output.dir_prefix + '{:04}'.format(next_int) # Create output directory os.mkdir(out_dir) # Create log object log = Log(log_file=os.path.join( out_dir, params.output.out_prefix + '.quick-refine.log'), verbose=params.settings.verbose) # Report if current_dirs: log('Found existing refinement directories: \n\t{}'.format( '\n\t'.join(current_dirs))) log('') log('Creating new output directory: {}'.format(out_dir)) # Validate input parameters log.subheading('Validating input parameters') assert params.input.pdb is not None, 'No PDB given for refinement' assert params.input.mtz is not None, 'No MTZ given for refinement' if os.path.islink(params.input.mtz): log('Converting mtz path to real path:') log('{} -> {}'.format(params.input.mtz, os.path.realpath(params.input.mtz))) params.input.mtz = os.path.realpath(params.input.mtz) # Link input log('Copying/linking files to refinement folder') shutil.copy(params.input.pdb, os.path.abspath(os.path.join(out_dir, 'input.pdb'))) rel_symlink(params.input.mtz, os.path.abspath(os.path.join(out_dir, 'input.mtz'))) # Copy parameter file to output folder if params.input.params: shutil.copy(params.input.params, os.path.abspath(os.path.join(out_dir, 'input.params'))) # Create output prefixes output_prefix = os.path.join(out_dir, params.output.out_prefix) log('Real output file path prefixes: {}'.format(output_prefix)) log('Link output file path prefixes: {}'.format(params.output.link_prefix)) # Create command objects log.subheading('Preparing command line input for refinement program') # PHENIX if params.options.program == 'phenix': cm = CommandManager('phenix.refine') # Command line args cm.add_command_line_arguments([params.input.pdb, params.input.mtz]) cm.add_command_line_arguments( ['output.prefix={}'.format(output_prefix)]) if params.input.cif: cm.add_command_line_arguments(params.input.cif) if params.input.params and os.path.exists(params.input.params): cm.add_command_line_arguments([params.input.params]) # REFMAC elif params.options.program == 'refmac': cm = CommandManager('refmac5') # Command line args cm.add_command_line_arguments( ['xyzin', params.input.pdb, 'hklin', params.input.mtz]) cm.add_command_line_arguments([ 'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz' ]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(['libin', cif]) # Standard input if params.input.params: cm.add_standard_input(open(params.input.params).read().split('\n')) cm.add_standard_input(['END']) elif params.options.program == "buster": cm = CommandManager('refine') # Command line arguments # inputs cm.add_command_line_arguments( ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(['-l', cif]) if params.input.params: cm.add_command_line_arguments(['-Gelly', params.input.params]) # Pass additional command line arguments? if params.input.args: cm.add_command_line_arguments(params.input.args) # Report log(str(cm)) log.bar() log('running refinement... ({})'.format(cm.program[0])) out = cm.run() log.subheading('Refinement output') if not log.verbose: log('output written to log file ({} lines)'.format( cm.output.count('\n'))) log('\n' + cm.output, show=False) if out != 0: log.subheading('Refinement Errors') log(cm.error) log.subheading('Post-processing output files') if params.options.program == "buster": log.subheading('Renaming buster output files') shutil.move(src=os.path.join(out_dir, 'refine.pdb'), dst=output_prefix + '.pdb') shutil.move(src=os.path.join(out_dir, 'refine.mtz'), dst=output_prefix + '.mtz') # Find output files try: real_pdb = glob.glob(output_prefix + '*.pdb')[0] real_mtz = glob.glob(output_prefix + '*.mtz')[0] except: log('Refinement has failed - output files do not exist') log('{}: {}'.format(output_prefix + '*.pdb', glob.glob(output_prefix + '*.pdb'))) log('{}: {}'.format(output_prefix + '*.mtz', glob.glob(output_prefix + '*.mtz'))) raise # List of links to make at the end of the run link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'), (real_mtz, params.output.link_prefix + '.mtz')] # Split conformations if params.options.split_conformations: params.split_conformations.settings.verbose = params.settings.verbose log.subheading('Splitting refined structure conformations') # Running split conformations out_files = split_conformations.split_conformations( filename=real_pdb, params=params.split_conformations, log=log) # Link output files to top for real_file in out_files: link_file = params.output.link_prefix + os.path.basename( real_file.replace(os.path.splitext(real_pdb)[0], '')) link_file_pairs.append([real_file, link_file]) # Link output files log.subheading('linking output files') for real_file, link_file in link_file_pairs: log('Linking {} -> {}'.format(link_file, real_file)) if not os.path.exists(real_file): log('file does not exist: {}'.format(real_file)) continue if os.path.exists(link_file) and os.path.islink(link_file): log('removing existing link: {}'.format(link_file)) os.unlink(link_file) if not os.path.exists(link_file): rel_symlink(real_file, link_file) log.heading('finished - refinement')
def split_conformations(filename, params, log=None): if log is None: log = Log(verbose=True) # Read the pdb header - for writing later... header_contents = get_pdb_header(filename) # Read in and validate the input file ens_obj = strip_pdb_to_input(filename, remove_ter=True) ens_obj.hierarchy.only_model() # Create a new copy of the structures new_ens = ens_obj.hierarchy.deep_copy() # Extract conformers from the structure as set all_confs = set(ens_obj.hierarchy.altloc_indices()) all_confs.discard('') if params.options.mode == 'by_residue_name': sel_resnames = params.options.by_residue_name.resname.split(',') sel_confs = [ ag.altloc for ag in new_ens.atom_groups() if (ag.resname in sel_resnames) ] # List of conformers to output for each structure, and suffixes out_confs = map(sorted, [ all_confs.intersection(sel_confs), all_confs.difference(sel_confs) ]) out_suffs = [ params.options.by_residue_name.selected_name, params.options.by_residue_name.unselected_name ] elif params.options.mode == 'by_conformer': sel_resnames = None sel_confs = None # One structure for each conformer out_confs = [[c] for c in sorted(all_confs)] out_suffs = [''.join(c) for c in out_confs] elif params.options.mode == 'by_conformer_group': sel_resnames = None sel_confs = None # One structure for each set of supplied conformer sets out_confs = [ s.split(',') for s in params.options.by_conformer_group.conformers ] out_suffs = [''.join(c) for c in out_confs] else: raise Exception('Invalid selection for options.mode: {}'.format( params.options.mode)) assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format( str(out_confs), str(out_suffs)) for confs, suffix in zip(out_confs, out_suffs): log('Conformers {} -> {}'.format(str(confs), suffix)) # Create paths from the suffixes out_paths = [ '.'.join([ os.path.splitext(filename)[0], params.output.suffix_prefix, suff, 'pdb' ]) for suff in out_suffs ] log.subheading('Processing {}'.format(filename[-70:])) for this_confs, this_path in zip(out_confs, out_paths): if not this_confs: continue # Select atoms to keep - no altloc, or altloc in selection sel_string = ' or '.join( ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs]) # Extract selection from the hierarchy sel_hiery = new_ens.select( new_ens.atom_selection_cache().selection(sel_string), copy_atoms=True) log.bar(True, False) log('Outputting conformer(s) {} to {}'.format(''.join(this_confs), this_path)) log.bar() log('Keeping ANY atom with conformer id: {}'.format( ' or '.join(['" "'] + this_confs))) log('Selection: \n\t' + sel_string) if params.options.pruning.prune_duplicates: log.bar() log('Pruning redundant conformers') # Remove an alternate conformers than are duplicated after selection prune_redundant_alternate_conformations( hierarchy=sel_hiery, required_altlocs=[a for a in sel_hiery.altloc_indices() if a], rmsd_cutoff=params.options.pruning.rmsd_cutoff, in_place=True, verbose=params.settings.verbose) if params.options.reset_altlocs: log.bar() # Change the altlocs so that they start from "A" if len(this_confs) == 1: conf_hash = {this_confs[0]: ' '} else: conf_hash = dict( zip(this_confs, iotbx.pdb.systematic_chain_ids())) log('Resetting structure altlocs:') for k in sorted(conf_hash.keys()): log('\t{} -> "{}"'.format(k, conf_hash[k])) if params.settings.verbose: log.bar() for ag in sel_hiery.atom_groups(): if ag.altloc in this_confs: if params.settings.verbose: log('{} -> alt {}'.format(Labeller.format(ag), conf_hash[ag.altloc])) ag.altloc = conf_hash[ag.altloc] if params.options.reset_occupancies: log.bar() log('Resetting output occupancies (maximum occupancy of 1.0, etc.)' ) # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one rg_occs = [ calculate_residue_group_occupancy(rg) for rg in residue_groups_with_complete_set_of_conformers(sel_hiery) ] non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0] if non_uni: div_occ = min(non_uni) log('Dividing all occupancies by {}'.format(div_occ)) sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() / div_occ) # Normalise the occupancies of any residue groups with more than unitary occupancy log('Fixing any residues that have greater than unitary occupancy') sanitise_occupancies(hierarchy=sel_hiery, min_occ=0.0, max_occ=1.0, in_place=True, verbose=params.settings.verbose) # Perform checks max_occ = max([ calculate_residue_group_occupancy(rg) for rg in sel_hiery.residue_groups() ]) log('Maximum occupancy of output structue: {}'.format(max_occ)) assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!' assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!' log.bar() log('Writing structure: {}'.format(this_path)) log.bar(False, True) # Write header contents with open(this_path, 'w') as fh: fh.write(header_contents) # Write output file sel_hiery.write_pdb_file(this_path, open_append=True) return out_paths
def run(params): log = Log(log_file=params.output.log_file, verbose=True) # Process MTZs if params.input.mtz: log.heading('Processing {} MTZ Files'.format(len(params.input.mtz))) if params.input.file_label=='filename': labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz] elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.mtz] else: raise Exception('MTZ labelling function not supported: {}'.format(params.input.file_label)) log.bar() log('Grouping {} mtz files by space group'.format(len(params.input.mtz))) crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_mtz(mtz_file=f, id=lab) for f,lab in zip(params.input.mtz, labels)]) log('> Clustered into {} space group(s)'.format(len(crystal_groups))) log.bar() for cg in crystal_groups: log.subheading('Space group {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals))) error = False for c in cg.crystals: for label in params.check_for.column_label: if label is None: continue if label not in c.column_labels: log('Checking: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels)) for label in params.summary.column_label: if label is None: continue if label not in c.column_labels: log('Required: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels)) error = True if error is True: raise Sorry('There are datasets that do not contain the right columns.') log(crystal_statistics('Wavelength', cg.crystals, value_func=lambda c: c.mtz_object().crystals()[1].datasets()[0].wavelength(), header=True)) log(crystal_statistics('Resolution (high)', cg.crystals, value_func=lambda c: c.high_res, header=False)) log(crystal_statistics('Resolution (low)', cg.crystals, value_func=lambda c: c.low_res, header=False)) log(crystal_statistics('Unit cell - vol', cg.crystals, value_func=lambda c: c.unit_cell.volume(), header=False)) log(crystal_statistics('Unit cell - a', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[0], header=False)) log(crystal_statistics('Unit cell - b', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[1], header=False)) log(crystal_statistics('Unit cell - c', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[2], header=False)) log(crystal_statistics('Unit cell - alpha', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[3], header=False)) log(crystal_statistics('Unit cell - beta', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[4], header=False)) log(crystal_statistics('Unit cell - gamma', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[5], header=False, footer=True)) for label in params.summary.column_label: if label is None: continue log(crystal_statistics('Column: {}'.format(label), cg.crystals, value_func=lambda c: c.mtz_object().get_column(label).n_valid_values(), header=False, footer=True)) log.bar(True, False) log('Smallest + Largest Values') log.bar() log(crystal_min_max('Resolution', cg.crystals, value_func=lambda c: c.high_res)) # Process PDBs if params.input.pdb: log.heading('Processing {} PDB Files'.format(len(params.input.pdb))) if params.input.file_label=='filename': labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb] elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.pdb] else: raise Exception('PDB labelling function not supported: {}'.format(params.input.file_label)) log.bar() log('Grouping {} pdb files by space group'.format(len(params.input.pdb))) crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_pdb(pdb_file=f, id=lab) for f,lab in zip(params.input.pdb, labels)]) log('> Clustered into {} space group(s)'.format(len(crystal_groups))) for cg in crystal_groups: log.subheading('Space group: {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals))) log(crystal_statistics('R-work', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_work, header=True)) log(crystal_statistics('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free, header=False, footer=True)) log.bar(True, False) log('Smallest + Largest Values') log.bar() log(crystal_min_max('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free)) log.heading('finished')
def run(params): # Validate input files if not (params.input.pdb or params.input.mtz): raise Sorry( 'No pdb/mtz files have been provided: specify with input.pdb or input.mtz' ) # Check and create output directory if not params.output.out_dir: raise Sorry( 'No output directory has been specified: specify with output.out_dir' ) if not os.path.exists(params.output.out_dir): os.mkdir(params.output.out_dir) # Define and create image directory img_dir = os.path.join(params.output.out_dir, 'dendrograms') if not os.path.exists(img_dir): os.mkdir(img_dir) # Create log object log = Log(log_file=params.output.out_dir + '.clustering.log', verbose=True) # Define output_file_function to copy or symlink files as needed if params.output.file_mode == 'symlink': out_file_func = os.symlink elif params.output.file_mode == 'copy': out_file_func = shutil.copy log.heading('Processing input pdb/mtz files') log('Making dataset labels for {} pdb(s) and {} mtz(s)'.format( len(params.input.pdb), len(params.input.mtz))) try: if params.input.labels.pdb_label == 'filename': p_labels = [ os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb ] elif params.input.labels.pdb_label == 'foldername': p_labels = [ os.path.basename(os.path.dirname(f)) for f in params.input.pdb ] elif params.input.labels.pdb_regex: p_labels = [ re.findall(params.input.labels.pdb_regex, f)[0] for f in params.input.pdb ] else: p_labels = [ 'PDB-{:06d}'.format(i) for i in range(len(params.input.pdb)) ] if params.input.labels.mtz_label == 'filename': m_labels = [ os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz ] elif params.input.labels.mtz_label == 'foldername': m_labels = [ os.path.basename(os.path.dirname(f)) for f in params.input.mtz ] elif params.input.labels.mtz_regex: m_labels = [ re.findall(params.input.labels.mtz_regex, f)[0] for f in params.input.mtz ] else: m_labels = [ 'MTZ-{:06d}'.format(i) for i in range(len(params.input.mtz)) ] except: print 'Error reading file: {}'.format(f) raise # Check labels are unique set_m_labels = set(m_labels) set_p_labels = set(p_labels) if len(set_m_labels) != len(m_labels): raise Sorry('MTZ labels are not unique. Repeated labels: {}'.format( ' '.join([ '{}'.format(l) for l in set_m_labels if m_labels.count(l) != 1 ]))) if len(set_p_labels) != len(p_labels): raise Sorry('PDB labels are not unique. Repeated labels: {}'.format( ' '.join([l for l in set_p_labels if p_labels.count(l) != 1]))) # Report labels if p_labels: log.subheading('PDB Labels') log(', '.join(p_labels)) if m_labels: log.subheading('MTZ Labels') log(', '.join(m_labels)) # Load crystal summaries log.bar(True, True) log('Reading data for {} pdb(s) and {} mtz(s)'.format( len(params.input.pdb), len(params.input.mtz))) if params.input.pdb: pdb_summaries = [ CrystalSummary.from_pdb(pdb_file=f, id=lab) for f, lab in zip(params.input.pdb, p_labels) ] else: pdb_summaries = [] if params.input.mtz: mtz_summaries = [ CrystalSummary.from_mtz(mtz_file=f, id=lab) for f, lab in zip(params.input.mtz, m_labels) ] else: mtz_summaries = [] # Group by SpaceGroup log.subheading('Grouping {} crystals by space group...'.format( len(pdb_summaries + mtz_summaries))) crystal_groups = CrystalGroup.by_space_group(crystals=pdb_summaries + mtz_summaries) log('Grouped crystals into {} space groups'.format(len(crystal_groups))) log.heading('Analysing variation of unit cells for each space group') for cg in crystal_groups: sg_name = 'sg-{}'.format(cg.space_groups[0].split(' (')[0].replace( ' ', '_')) log.subheading('Space Group {}: {} dataset(s)'.format( cg.space_groups[0], len(cg.crystals))) log('Unit Cell Variation:') log(numpy.round(cg.uc_stats.as_pandas_table().T, 2)) log('') log('Making unit cell dendrogram for all crystals with this spacegroup' ) if len(cg.crystals) > 1: cg.dendrogram(fname=os.path.join(img_dir, '{}-all.png'.format(sg_name)), xlab='Crystal', ylab='Linear Cell Variation', annotate_y_min=params.clustering.label_nodes_above) log('') log('Clustering {} unit cells...'.format(len(cg.crystals))) sg_crystal_groups = cg.by_unit_cell( cg.crystals, cutoff=params.clustering.lcv_cutoff) log('Clustered crystals into {} groups'.format(len(sg_crystal_groups))) for i_cg2, cg2 in enumerate(sg_crystal_groups): cluster_name = '{}-cluster-{}'.format(sg_name, i_cg2 + 1) log.bar(True, False) log('Processing cluster: {}'.format(cluster_name)) log.bar(False, True) log('Unit Cell Variation:') log(numpy.round(cg.uc_stats.as_pandas_table().T, 2)) log('') log('Making unit cell dendrogram for this cluster of crystals') if len(cg2.crystals) > 1: cg2.dendrogram( fname=os.path.join(img_dir, '{}.png'.format(cluster_name)), xlab='Crystal', ylab='Linear Cell Variation', ylim=(0, params.clustering.lcv_cutoff), annotate_y_min=params.clustering.label_nodes_above) log('Copying files to output directory') # Go through and link the datasets for each of the spacegroups into a separate folder sub_dir = os.path.join(params.output.out_dir, cluster_name) if not os.path.exists(sub_dir): os.mkdir(sub_dir) # Split the mtzs and pdbs into separate directories -- or not if params.output.split_pdbs_and_mtzs: mtz_dir = os.path.join(sub_dir, 'mtzs') if not os.path.exists(mtz_dir): os.mkdir(mtz_dir) pdb_dir = os.path.join(sub_dir, 'pdbs') if not os.path.exists(pdb_dir): os.mkdir(pdb_dir) else: mtz_dir = pdb_dir = sub_dir for c in cg2.crystals: # Set parameters based on pdb or mtz if c.mtz_file: sub_sub_dir = os.path.join(mtz_dir, c.id) def_file = os.path.abspath(c.mtz_file) def_suff = '.mtz' pos_suff = '.pdb' elif c.pdb_file: sub_sub_dir = os.path.join(pdb_dir, c.id) def_file = os.path.abspath(c.pdb_file) def_suff = '.pdb' pos_suff = '.mtz' # Create subdirectory if not os.path.exists(sub_sub_dir): os.mkdir(sub_sub_dir) # Output file base template out_base = os.path.join(sub_sub_dir, c.id) # Export file out_file = out_base + def_suff if not os.path.exists(out_file): out_file_func(def_file, out_file) # output other as well if filenames are the same pos_file = def_file.replace(def_suff, pos_suff) out_file = out_base + pos_suff if os.path.exists(pos_file) and not os.path.exists(out_file): out_file_func(pos_file, out_file) log.heading('finished')
def process_and_export_folder(dir, params, log=Log()): """Merge structures, transform them and export a subset of a folders contents""" dir_name = os.path.basename(dir) log.heading('Processing directory: {}'.format(dir_name), spacer=True) # Check to see if this folder should be skipped (export fitted folders only) if params.options.required_file_for_export == 'model': if not os.path.exists( os.path.join( dir, 'modelled_structures', PanddaDatasetFilenames.modelled_structure.format( dir_name))): log('No modelled structure in modelled_structures folder.') log('SKIPPING: {}'.format(dir)) return ############################################################################ # Export the pandda folder to output directory ############################################################################ log.subheading('Exporting folder: {}'.format(dir)) exp_dir = export_folder(dir=dir, params=params, log=log) ############################################################################ # Merge input and pandda-modelled structures ############################################################################ # Extract parameters for the merging and set them merging_params = merge_conformations.master_phil.extract() merging_params.input.major = os.path.join( exp_dir, params.output.file_prefix + PanddaDatasetFilenames.input_model.format(dir_name)) merging_params.input.minor = os.path.join( exp_dir, params.output.file_prefix + PanddaDatasetFilenames.modelled_structure.format(dir_name)) merging_params.output.pdb = os.path.join( exp_dir, params.output.file_prefix + PanddaDatasetFilenames.ensemble_structure.format(dir_name)) merging_params.output.log = os.path.splitext( merging_params.output.pdb)[0] + '.log' merging_params.output.make_restraints = True # Apply settings merging_params.settings.overwrite = params.settings.overwrite merging_params.settings.verbose = params.settings.verbose # Change the restraints settings merging_params.restraints.output.phenix = os.path.splitext( os.path.basename( merging_params.output.pdb))[0] + '.restraints-phenix.params' merging_params.restraints.output.refmac = os.path.splitext( os.path.basename( merging_params.output.pdb))[0] + '.restraints-refmac.params' merging_params.restraints.output.log = os.path.splitext( os.path.basename(merging_params.output.pdb))[0] + '.restraints.log' # Check files exist if not os.path.exists(merging_params.input.minor): raise Exception('File does not exist: {}'.format( merging_params.input.minor)) if not os.path.exists(merging_params.input.major): raise Exception('File does not exist: {}'.format( merging_params.input.major)) # Print and run log.subheading('Merging event-map model with input model') merge_conformations.run(params=merging_params)
def make_duplication_restraints(params, input_hierarchy, log=None): """Create coordinate and b-factor restraints for duplicated conformers""" if log is None: log = Log(verbose=True) log.subheading('Generating restraints for duplicated conformers') dup_groups = [] for chn in input_hierarchy.hierarchy.chains(): if (params.duplicates.make_for == 'protein') and not chn.is_protein(): continue elif (params.duplicates.make_for == 'het') and chn.is_protein(): continue for rg in chn.residue_groups(): dup_groups += find_duplicated_conformers_and_generate_atom_pairs( residue_group=rg, rmsd_cutoff=params.duplicates.rmsd_cutoff) if not dup_groups: log('No duplicated conformers (no restraints created)') return # Concatenate atoms into one list atom_pairs = [] [atom_pairs.extend(l) for l in dup_groups] log('Found {} duplicated conformers consisting of {} atoms'.format( len(dup_groups), len(atom_pairs))) log('') if params.output.refmac: restraint_list = [ RefmacFormatter.make_distance_restraint( atm_1=a1, atm_2=a2, value=0.0, sigma=params.duplicates.sigma_xyz) for a1, a2 in atom_pairs ] rest_block = RefmacFormatter.format_distance_restraints( restraint_list=restraint_list) with open(params.output.refmac, 'a') as fh: fh.write(rest_block + '\n') if params.settings.verbose: log.subheading('refmac duplicate conformer restraints') log(rest_block[:1000] + '...' * (len(rest_block) > 1000)) log('') if params.output.phenix: restraint_list = [ PhenixFormatter.make_distance_restraint( atm_1=a1, atm_2=a2, value=0.0, sigma=params.duplicates.sigma_xyz) for a1, a2 in atom_pairs ] rest_block = PhenixFormatter.format_distance_restraints( restraint_list=restraint_list) with open(params.output.phenix, 'a') as fh: fh.write(rest_block + '\n') if params.settings.verbose: log.subheading('phenix duplicate conformer restraints') log(rest_block[:1000] + '...' * (len(rest_block) > 1000)) log('')
def run(params): # Identify any existing output directories current_dirs = sorted(glob.glob(params.output.dir_prefix + "*")) if not current_dirs: next_int = 1 else: current_nums = [ s.replace(params.output.dir_prefix, "") for s in current_dirs ] next_int = sorted(map(int, current_nums))[-1] + 1 # Create output directory name from int out_dir = params.output.dir_prefix + "{:04}".format(next_int) # Create output directory os.mkdir(out_dir) # Create log object log = Log( log_file=os.path.join(out_dir, params.output.out_prefix + ".quick-refine.log"), verbose=params.settings.verbose, ) # Report if current_dirs: log("Found existing refinement directories: \n\t{}".format( "\n\t".join(current_dirs))) log("") log("Creating new output directory: {}".format(out_dir)) # Validate input parameters log.subheading("Validating input parameters") assert params.input.pdb is not None, "No PDB given for refinement" assert params.input.mtz is not None, "No MTZ given for refinement" if os.path.islink(params.input.mtz): log("Converting mtz path to real path:") log("{} -> {}".format(params.input.mtz, os.path.realpath(params.input.mtz))) params.input.mtz = os.path.realpath(params.input.mtz) # Link input log("Copying/linking files to refinement folder") shutil.copy(params.input.pdb, os.path.abspath(os.path.join(out_dir, "input.pdb"))) rel_symlink(params.input.mtz, os.path.abspath(os.path.join(out_dir, "input.mtz"))) # Copy parameter file to output folder if params.input.params: shutil.copy(params.input.params, os.path.abspath(os.path.join(out_dir, "input.params"))) # Create output prefixes output_prefix = out_dir log("Real output file path prefixes: {}".format(output_prefix)) log("Link output file path prefixes: {}".format(params.output.link_prefix)) # Create command objects log.subheading("Preparing command line input for refinement program") # PHENIX if params.options.program == "phenix": cm = CommandManager("phenix.refine") # Command line args cm.add_command_line_arguments([params.input.pdb, params.input.mtz]) cm.add_command_line_arguments( ["output.prefix={}".format(output_prefix)]) if params.input.cif: cm.add_command_line_arguments(params.input.cif) if params.input.params and os.path.exists(params.input.params): cm.add_command_line_arguments([params.input.params]) # REFMAC elif params.options.program == "refmac": cm = CommandManager("refmac5") # Command line args cm.add_command_line_arguments( ["xyzin", params.input.pdb, "hklin", params.input.mtz]) cm.add_command_line_arguments([ "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz" ]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(["libin", cif]) # Standard input if params.input.params: cm.add_standard_input(open(params.input.params).read().split("\n")) cm.add_standard_input(["END"]) # Pass additional command line arguments? if params.input.args: cm.add_command_line_arguments(params.input.args) # Report log(str(cm)) log.bar() log("running refinement... ({})".format(cm.program[0])) out = cm.run() log.subheading("Refinement output") if not log.verbose: log("output written to log file ({} lines)".format( cm.output.count("\n"))) log("\n" + cm.output, show=False) if out != 0: log.subheading("Refinement Errors") log(cm.error) log.subheading("Post-processing output files") # Find output files try: real_pdb = os.path.join(output_prefix, params.output.out_prefix + ".pdb") real_mtz = os.path.join(output_prefix, params.output.out_prefix + ".mtz") print(real_pdb, "\n", real_mtz) except: log("Refinement has failed - output files do not exist") log("{}: {}".format(output_prefix + "*.pdb", glob.glob(output_prefix + "*.pdb"))) log("{}: {}".format(output_prefix + "*.mtz", glob.glob(output_prefix + "*.mtz"))) raise # List of links to make at the end of the run link_file_pairs = [ (real_pdb, params.output.link_prefix + ".pdb"), (real_mtz, params.output.link_prefix + ".mtz"), ] print(link_file_pairs) # Split conformations if params.options.split_conformations: params.split_conformations.settings.verbose = params.settings.verbose log.subheading("Splitting refined structure conformations") # Running split conformations out_files = split_conformations.split_conformations( filename=real_pdb, params=params.split_conformations, log=log) # Link output files to top for real_file in out_files: link_file = params.output.link_prefix + os.path.basename( real_file.replace(os.path.splitext(real_pdb)[0], "")) link_file_pairs.append([real_file, link_file]) # Link output files log.subheading("linking output files") for real_file, link_file in link_file_pairs: log("Linking {} -> {}".format(link_file, real_file)) if not os.path.exists(real_file): log("file does not exist: {}".format(real_file)) continue if os.path.exists(link_file) and os.path.islink(link_file): log("removing existing link: {}".format(link_file)) os.unlink(link_file) if not os.path.exists(link_file): rel_symlink(real_file, link_file) log.heading("finished - refinement")
def merge_complementary_hierarchies(hierarchy_1, hierarchy_2, prune_duplicates_rmsd=0.1, in_place=False, verbose=False, log=None): """Merge hierarchies that are alternate models of the same crystal by expanding alternate model conformations, merging, and then trimming alternate conformations where possible""" if log is None: log = Log(verbose=True) # Alter the original files? if not in_place: # Copy the hierarchies hierarchy_1 = hierarchy_1.deep_copy() hierarchy_2 = hierarchy_2.deep_copy() # Sort the atoms hierarchy_1.sort_atoms_in_place() hierarchy_2.sort_atoms_in_place() log.heading('Preparing to merge structures') log.subheading( 'Explicitly expanding models to all conformations of the crystal') log('Expanding alternate conformations in structure 1') expand_alternate_conformations(hierarchy=hierarchy_1, in_place=True, verbose=verbose) log('Expanding alternate conformations in structure 2') expand_alternate_conformations(hierarchy=hierarchy_2, in_place=True, verbose=verbose) log.subheading( 'Applying conformer shift to the second structure before merging') log('Identifying the altloc shift required from the number of alternate conformers in structure 1' ) conf_offset = find_next_conformer_idx( hierarchy=hierarchy_1, all_ids=iotbx.pdb.systematic_chain_ids()) log('Incrementing all altlocs in structure 2 by {}'.format(conf_offset)) increment_altlocs(hierarchy=hierarchy_2, offset=conf_offset, in_place=True, verbose=verbose) log.subheading('Renaming residues that do not align between structures') resolve_residue_id_clashes(fixed_hierarchy=hierarchy_1, moving_hierarchy=hierarchy_2, in_place=True, verbose=verbose) log.heading('Merging structures') log('Transferring residues from Structure 2 to Structure 1') transfer_residue_groups_from_other(acceptor_hierarchy=hierarchy_1, donor_hierarchy=hierarchy_2, in_place=True, verbose=verbose) log.heading('Post-processing structure') log('Pruning unneccessary multi-conformer residues in the merged structure' ) prune_redundant_alternate_conformations( hierarchy=hierarchy_1, required_altlocs=hierarchy_1.altloc_indices(), rmsd_cutoff=prune_duplicates_rmsd, in_place=True, verbose=verbose) return hierarchy_1
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) # Report log.heading('Validating input parameters and input files') # Check one or other have been provided if (params.input.major or params.input.minor ) and not (params.input.pdb == [None] or params.input.pdb == []): raise Exception( 'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.' ) # Assign files to major and minor if necessary if not (params.input.major and params.input.minor): if len(params.input.pdb) != 2: raise Exception('Must provide zero or two pdb files to input.pdb') params.input.major = params.input.pdb[0] params.input.minor = params.input.pdb[1] # Check files exist if not os.path.exists(params.input.major): raise Exception('input.major does not exist: {}'.format( params.input.major)) if not os.path.exists(params.input.minor): raise Exception('input.minor does not exist: {}'.format( params.input.minor)) # Just check again... assert params.input.major assert params.input.minor assert params.output.pdb # Check existence of output pdb and delete as necessary if os.path.exists(params.output.pdb): if params.settings.overwrite: os.remove(params.output.pdb) else: raise Exception( 'Output file already exists: {}. Run with overwrite=True to remove this file' .format(params.output.pdb)) # Check that the input occupancies are valid if (params.options.minor_occupancy > 1.0) or (params.options.major_occupancy > 1.0): raise Exception( 'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})' .format(params.options.minor_occupancy, params.options.major_occupancy)) # Report validated parameters log.subheading('Processed merging parameters') for obj in master_phil.format(params).objects: if obj.name == 'restraints': continue log(obj.as_str().strip()) # Read in the ligand file and set each residue to the requested conformer log.subheading('Reading input files') maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True) min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True) # Check that ... something try: maj_obj.hierarchy.only_model() min_obj.hierarchy.only_model() except: raise Sorry('Input structures may only have one model') # Multiply the input hierarchies by occupancy multipliers log.subheading('Updating input occupancies prior to merging') log('Multiplying occupancies of input.major by {}'.format( params.options.major_occupancy)) maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() * params.options.major_occupancy) log('Multiplying occupancies of input.minor by {}'.format( params.options.minor_occupancy)) min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() * params.options.minor_occupancy) # Merge the hierarchies final_struct = merge_complementary_hierarchies( hierarchy_1=maj_obj.hierarchy, hierarchy_2=min_obj.hierarchy, prune_duplicates_rmsd=params.options.prune_duplicates_rmsd, in_place=True, verbose=params.settings.verbose) # Set output occupancies log.subheading('Post-processing occupancies') # Set all main-conf occupancies to 1.0 log('Setting all main-conf occupancies to 1.0') set_conformer_occupancy(hierarchy=final_struct, altlocs=[''], occupancy=1.0, in_place=True, verbose=params.settings.verbose) # Reset occupancies if required if params.options.reset_all_occupancies: # Calculate number of altlocs and associated occupancy altlocs = [a for a in final_struct.altloc_indices() if a] if altlocs: new_occ = 1.0 / len(altlocs) # Set the occupancies log('Setting all conformer ({}) occupancies to {}'.format( ','.join(altlocs), new_occ)) set_conformer_occupancy(hierarchy=final_struct, altlocs=altlocs, occupancy=new_occ, in_place=True, verbose=params.settings.verbose) # Update the atoms numbering final_struct.sort_atoms_in_place() final_struct.atoms_reset_serial() # Write output file log('Writing output structure to {}'.format(params.output.pdb)) final_struct.write_pdb_file(file_name=params.output.pdb, crystal_symmetry=maj_obj.crystal_symmetry()) # Run the restraint generation for the merged structure if requested if params.output.make_restraints: # Transfer the other phil objects from the master phil r_params = make_restraints.master_phil.extract() for name, obj in r_params.__dict__.items(): if name.startswith('_'): continue if name not in params.restraints.__dict__: params.restraints.__inject__(name, obj) # Apply the output of merging to input of restraints params.restraints.input.pdb = params.output.pdb # Rename output files to be in same folder as output structure if params.restraints.output.phenix: params.restraints.output.phenix = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.phenix)) if params.restraints.output.refmac: params.restraints.output.refmac = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.refmac)) # Set log file name to this program if one given if params.output.log: params.restraints.output.log = params.output.log elif params.restraints.output.log: params.restraints.output.log = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.log)) # Which alternate conformations to generate restraints for params.restraints.local_restraints.altlocs = ','.join( [a for a in min_obj.hierarchy.altloc_indices() if a]) # Update settigns params.restraints.settings.verbose = params.settings.verbose params.restraints.settings.overwrite = params.settings.overwrite # Report log.heading('Parameters for generating restraints') log(master_phil.format(params).get('restraints').as_str().strip()) log.heading('Generating restraints') # Run make_restraints make_restraints.run(params.restraints) log.heading('FINISHED') log.heading('Final Parameters') log(master_phil.format(params).as_str().strip()) return