Exemplo n.º 1
0
    def __init__(self,
                 pdb_file,
                 mtz_file,
                 out_dir,
                 cif_files=[],
                 tag=None,
                 tls_selections=None,
                 prefix='refined'):

        self.pdb_file = pdb_file
        self.mtz_file = mtz_file
        self.cif_files = cif_files
        self.out_dir = easy_directory(out_dir)
        self.tag = tag
        self.tls_selections = []
        self.tls_matrices = None

        self.initial_pdb = os.path.join(self.out_dir, 'initial.pdb')
        self.out_template = os.path.join(self.out_dir, prefix)

        shutil.copy(self.pdb_file, self.initial_pdb)

        self.log = Log(verbose=True)

        if not tls_selections:
            tls_selections = self.determine_tls_groups(pdb_file=pdb_file)

        # Sanitise the tls selections
        for tls in tls_selections:
            if tls.startswith('"') and tls.endswith('"'):
                tls = tls[1:-1]
            assert '\"' not in tls, 'TLS selection cannot include \": {}'.format(
                tls)
            self.tls_selections.append(tls)
Exemplo n.º 2
0
 def show_summary(self, log=None):
     if log is None: log = Log()
     log.subheading('Available datasets')
     for d in self.datasets:
         log.bar()
         d.show_summary(log=log)
     log.bar()
Exemplo n.º 3
0
def make_local_restraints(params, input_hierarchy, log=None):
    """Create local restraints for a hierarchy"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating local structure restraints')

    atom_d_pairs = find_atoms_around_alternate_conformers(
        hierarchy=input_hierarchy.hierarchy,
        altlocs=params.local_restraints.altlocs.split(',')
        if params.local_restraints.altlocs else None,
        dist_cutoff=params.local_restraints.max_distance)
    # Filter the 0-distance restraints
    atom_d_pairs = [(a1, a2, d) for a1, a2, d in atom_d_pairs
                    if d > params.local_restraints.min_distance]

    log('Created {} local restraints for {} conformers with distance cutoff of {}-{}A'
        .format(
            len(atom_d_pairs), params.local_restraints.altlocs
            if params.local_restraints.altlocs else 'all',
            params.local_restraints.min_distance,
            params.local_restraints.max_distance))
    log('')

    if params.output.refmac:
        restraint_list = [
            RefmacFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=d,
                sigma=params.local_restraints.sigma_xyz)
            for a1, a2, d in atom_d_pairs
        ]
        rest_block = RefmacFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac local structural restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = [
            PhenixFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=d,
                sigma=params.local_restraints.sigma_xyz)
            for a1, a2, d in atom_d_pairs
        ]
        rest_block = PhenixFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix duplicate conformer restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')
Exemplo n.º 4
0
 def __init__(self, id, type, zenodo_id, log=None):
     if log is None: log = Log()
     self.log = log
     super(ZenodoDataset, self).__init__(id=id, type=type)
     self.zenodo_id = zenodo_id
     self.base_url = "https://zenodo.org/record/{}/".format(zenodo_id)
     self.data_url = self.base_url + "files/data.zip"
     self.data_dir = None
Exemplo n.º 5
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    assert params.input.pdb, 'No pdb files have been provided'
    for pdb in params.input.pdb:
        if not os.path.exists(pdb):
            raise Sorry('pdb does not exist: {}'.format(pdb))

    for pdb in params.input.pdb:

        log.subheading('Reading pdb: {}'.format(pdb))
        obj = strip_pdb_to_input(pdb, remove_ter=True)
        try:
            obj.hierarchy.only_model()
        except:
            raise Sorry('Input structures may only have one model')

        # Merge the hierarchies
        final = standardise_multiconformer_model(
            hierarchy=obj.hierarchy,
            pruning_rmsd=params.options.pruning_rmsd,
            in_place=True,
            verbose=params.settings.verbose)

        # Update the atoms numbering
        final.sort_atoms_in_place()

        # Write output file
        filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb'
        log('Writing output structure to {}'.format(filename))
        final.write_pdb_file(file_name=filename,
                             crystal_symmetry=obj.crystal_symmetry())

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Exemplo n.º 6
0
def run(params):

    # Create log object
    log = Log(log_file=os.path.abspath(params.output.log_file), verbose=True)

    # Change paths to absolute paths
    params.input.pandda_dir = os.path.abspath(params.input.pandda_dir)
    params.output.export_dir = os.path.abspath(params.output.export_dir)
    # Must be in the pandda directory (pandda objects use relative paths)
    os.chdir(params.input.pandda_dir)

    # Report modifed phil
    log.heading('Processed parameters')
    log(master_phil.format(params).as_str())

    ############################################################################

    log.heading('Identifying folders to export')

    # Find the dataset directories to be exported
    if params.input.select_datasets:
        selected_datasets = []
        [
            selected_datasets.extend(s.split(','))
            for s in params.input.select_datasets
        ]
        export_dirs = sorted([
            os.path.join(params.input.pandda_dir, 'processed_datasets', p)
            for p in selected_datasets
        ])
        # Filter by existence of path
        export_dirs = [p for p in export_dirs if os.path.exists(p)]
    else:
        export_dirs = sorted(
            glob.glob(
                os.path.join(params.input.pandda_dir, 'processed_datasets',
                             '*')))
    assert export_dirs, 'No Export Directories Found'

    # Report
    log('Exporting:\n\t' + '\n\t'.join(export_dirs))

    # Create output directory
    if not os.path.exists(params.output.export_dir):
        os.mkdir(params.output.export_dir)

    # Merge the fitted structures
    for dir in export_dirs:
        process_and_export_folder(dir=dir, params=params, log=log)

    log.heading('FINISHED')
Exemplo n.º 7
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    log.heading('Validating input parameters')

    assert params.input.pdb, 'No PDB files given'

    log.heading('Splitting multi-state structures')

    # Iterate through the input structures and extract the conformation
    for pdb in params.input.pdb:
        split_conformations(filename=pdb, params=params, log=log)

    log.heading('FINISHED')
Exemplo n.º 8
0
def export_folder(dir, params, log=Log()):
    """Export a subset of a folders contents"""

    # Extract folder name and report
    dir_name = os.path.basename(dir)
    # Get the file list for this folder
    file_list = get_file_list(dir=dir)
    # Create output dir
    exp_dir = os.path.join(params.output.export_dir,
                           params.output.dir_prefix + dir_name)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)
    # Report
    log.bar()
    log('Exporting \n\tfrom {!s} \n\t  to {!s}'.format(dir, exp_dir))
    log.bar()
    log('Exporting files:')
    for f in file_list:
        log('\t' + os.path.relpath(f, start=dir))
    log.bar()
    # Export files
    for proc_file in file_list:
        # Check that the file exists
        if not os.path.exists(proc_file):
            log('FILE DOES NOT EXIST: {!s}'.format(proc_file))
            continue
        # Exported file path
        export_file = os.path.join(
            exp_dir, params.output.file_prefix + os.path.basename(proc_file))
        if params.settings.verbose:
            log('Copying {!s}\n     to {!s}'.format(proc_file, export_file))
        # Check to see if file already exists and delete if overwrite
        if os.path.exists(export_file):
            if params.settings.overwrite:
                os.remove(export_file)
            else:
                raise Exception(
                    'File already exists: {}. Need to set overwrite=True to continue.'
                    .format(export_file))
        shutil.copy(proc_file, export_file)

    return exp_dir
Exemplo n.º 9
0
def standardise_multiconformer_model(hierarchy,
                                     pruning_rmsd=0.1,
                                     in_place=False,
                                     verbose=False,
                                     log=None):
    """Standardise hierarchies by expanding alternate model conformations, and then trimming alternate conformations where possible"""

    if log is None: log = Log(verbose=True)

    # Alter the original files?
    if not in_place:
        # Copy the hierarchies
        hierarchy = hierarchy.deep_copy()

    # Sort the atoms
    hierarchy.sort_atoms_in_place()

    log.heading('Preparing to standardise structure')

    log.subheading(
        'Explicitly expanding model to all conformations of the crystal')
    expand_alternate_conformations(hierarchy=hierarchy,
                                   in_place=True,
                                   verbose=verbose)

    log.subheading(
        'Pruning unneccessary multi-conformer residues in the expanded structure'
    )
    prune_redundant_alternate_conformations(
        hierarchy=hierarchy,
        required_altlocs=hierarchy.altloc_indices(),
        rmsd_cutoff=pruning_rmsd,
        in_place=True,
        verbose=verbose)

    return hierarchy
Exemplo n.º 10
0
def make_link_records(params, input_hierarchy, link_file, log=None):
    """Create link records to make a continuous peptide chain"""

    if log is None: log = Log(verbose=True)

    log.subheading('Checking the continuity of the protein backbone')

    links, warnings = generate_set_of_alternate_conformer_peptide_links(
        hierarchy=input_hierarchy.hierarchy)

    if warnings:
        log.bar()
        log('WARNINGS:')
        log.bar()
        for w in warnings:
            log(w)
        log.bar()
        log('')

    if (not links) and (not warnings):
        log('No breaks in the backbone - hooray! (nothing needs to be done here)'
            )
        return
    elif (not links):
        log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
        log("!!! >>> There are breaks in the backbone but I'm not able to do anything to fix them    <<< !!!"
            )
        log("!!! >>> You'll need to check them manually to see if these are going to be a problem... <<< !!!"
            )
        log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
        return

    link_block = '\n'.join([
        format_link_record(atom_1=a1,
                           atom_2=a2,
                           chain_id_1=c1,
                           chain_id_2=c2,
                           link_type=lt) for a1, a2, c1, c2, lt in links
    ])

    log('Need to apply {} links to make the backbone continuous:'.format(
        len(links)))
    log('')
    log(link_block)
    log('')

    log('Writing hierarchy with new link records to {}'.format(link_file))
    log('(This file can only be used for refinement with REFMAC)')
    log('')
    log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        )
    log('!!! ALTHOUGH THE FILE WITH BACKBONE LINKS HAS BEEN OUTPUT, IT SHOULD BE USED WITH CAUTION !!!'
        )
    log('!!!   THE CONNECTION OF ALTERNATE CONFORMATIONS OF THE BACKBONE IS GENERALLY "INCORRECT"  !!!'
        )
    log('!!!          THERE SHOULD BE A VERY GOOD REASON FOR THESE RESTRAINTS TO BE USED           !!!'
        )
    log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        )

    input_hierarchy.hierarchy.write_pdb_file(
        file_name=link_file,
        crystal_symmetry=input_hierarchy.crystal_symmetry(),
        link_records=link_block)
Exemplo n.º 11
0
class Program(object):
    """Class meant to provide basic functionality for programs and pipelines"""

    _NAME = None
    _TEXT = None
    _VERSION = None

    _allowed_statuses = ['running', 'done', 'errored']

    log = Log()

    file_manager = None

    def write_running_parameters_to_log(self, params):
        self.log.heading('Processed parameters')
        self.log(self.master_phil.format(python_object=params).as_str())
        self.log.heading('Parameters different to the defaults')
        self.log(
            self.master_phil.fetch_diff(source=self.master_phil.format(
                python_object=params)).as_str())

    def check_for_matplotlib(self, backend=None, interactive=False):
        """Check to see whether we can load matplotlib"""
        self.log('Checking for matplotlib:')
        try:
            import matplotlib
            matplotlib.interactive(interactive)
            from matplotlib import pyplot
            if backend:
                pyplot.switch_backend(backend)
                current_backend = pyplot.get_backend()
                assert current_backend == backend, 'Backend loaded ({}) is not the one requested ({})'.format(
                    current_backend, backend)
            assert pyplot.isinteractive(
            ) is interactive, 'Interactive setting is incorrect ({} is not {})'.format(
                pyplot.isinteractive(), interactive)
            pyplot.style.use('ggplot')
            self.log('pyplot loaded successfully. Using backend "{!s}"'.format(
                current_backend))
            return True
        except:
            self.log('===================================>>>')
            self.log(
                '>> COULD NOT IMPORT MATPLOTLIB. WILL NOT BE ABLE TO GENERATE GRAPHS.'
            )
            self.log('===================================>>>')
            return False

    def initialise_file_manager(self, rootdir):
        self.file_manager = FileManager(rootdir=rootdir)
        return self.file_manager

    def update_status(self, status):
        """Set log files to indicate the status of the program"""

        assert status in self._allowed_statuses
        # Delete any that may exist
        existing_files = [
            self.file_manager.get_file('status').format(f)
            for f in self._allowed_statuses
        ]
        [os.remove(f) for f in existing_files if os.path.exists(f)]
        # Create the new  status file
        with open(self.file_manager.get_file('status').format(status),
                  'w') as fh:
            fh.write('')

    def pickle(self, pickle_file, pickle_object, overwrite=True):
        """Takes an object and pickles it"""
        if os.path.exists(pickle_file) and not overwrite:
            self.log('NOT PICKLING: {!s}'.format(
                os.path.relpath(pickle_file, start=self.out_dir)))
        else:
            self.log('Pickling Object: {!s}'.format(
                os.path.relpath(pickle_file, start=self.out_dir)))
            easy_pickle.dump(pickle_file, pickle_object)

    def unpickle(self, pickle_file):
        """Takes an object and unpickles it"""
        self.log('Unpickling File: {!s}'.format(
            os.path.relpath(pickle_file, start=self.out_dir)))
        return easy_pickle.load(pickle_file)
Exemplo n.º 12
0
def make_occupancy_constraints(params, input_hierarchy, log=None):
    """Create occupancy groups for a hierarchy"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating occupancy-constrained groups')

    # Ligand resname identifiers
    resnames = params.occupancy.resname.split(',')
    if params.settings.verbose:
        log('Looking for ligands with resname {!s}'.format(
            ' or '.join(resnames)))
        log('')

    # Make occupancy groups
    occupancy_groups = overlapping_occupancy_groups(
        hierarchy=input_hierarchy.hierarchy,
        resnames=resnames,
        group_dist=params.occupancy.group_dist,
        overlap_dist=params.occupancy.overlap_dist,
        complete_groups=params.occupancy.complete_groups,
        exclude_altlocs=params.occupancy.exclude_altlocs.split(',')
        if params.occupancy.exclude_altlocs else [],
        verbose=params.settings.verbose)
    # Record whether the occupancy groups are complete (occs sum to 1)
    if params.occupancy.complete_groups:
        occupancy_complete = [True] * len(occupancy_groups)
    else:
        occupancy_complete = [False] * len(occupancy_groups)

    if not occupancy_groups:
        log('No matching residues were found (no occupancy constraints created)'
            )
        return

    log.bar()
    log('')
    log('Created {} occupancy groups for overlapping conformers'.format(
        len(occupancy_groups)))
    log('')

    # Ref-make the default occupancy groups?
    if params.occupancy.simple_groups:
        log('simple_groups=={}: Remaking default occupancy restraints for residues'
            .format(params.occupancy.simple_groups))
        if params.settings.verbose: log('')
        simple_groups = simple_occupancy_groups(
            hierarchy=input_hierarchy.hierarchy,
            verbose=params.settings.verbose)
        num_alts = len(
            [a for a in input_hierarchy.hierarchy.altloc_indices() if a != ''])
        occupancy_complete += [
            True if len(g) == num_alts else False for g in simple_groups
        ]
        occupancy_groups += simple_groups
        if params.settings.verbose: log('')
        log('Increased number of occupancy groups to {}'.format(
            len(occupancy_groups)))
        log('')

    if params.output.refmac:
        restraint_list = RefmacFormatter.make_occupancy_restraints(
            list_of_lists_of_groups=occupancy_groups,
            group_completeness=occupancy_complete)
        rest_block = RefmacFormatter.format_occupancy_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac occupancy restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = PhenixFormatter.make_occupancy_restraints(
            list_of_lists_of_groups=occupancy_groups,
            group_completeness=occupancy_complete)
        rest_block = PhenixFormatter.format_occupancy_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix occupancy restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')
Exemplo n.º 13
0
def make_b_factor_restraints(params, input_hierarchy, log=None):

    if log is None: log = Log(verbose=True)

    pass
Exemplo n.º 14
0
    def run(self):
        """Process the dataset"""

        dataset, dataset_map, grid, map_analyser, args, verbose = self.data

        # TODO Hardcoded check - to be removed? TODO
        assert dataset_map.is_sparse()

        # ============================================================================>
        # Prepare output objects
        # ============================================================================>
        log_strs = []
        log_file = dataset.file_manager.get_file('dataset_log')
        log = Log(log_file=log_file, verbose=False, silent=True)

        # ============================================================================>
        # Build new blob search object
        # ============================================================================>
        blob_finder = PanddaZMapAnalyser(params=args.params.z_map_analysis,
                                         grid=grid,
                                         log=log)
        print('Writing log for dataset {!s} to ...{}'.format(
            dataset.tag, log_file[log_file.index('processed'):]))

        # ============================================================================>
        # Extract the global mask object from the grid
        # ============================================================================>
        dset_total_temp = grid.global_mask().total_mask_binary().copy()

        # ============================================================================>
        # Generate symmetry masks for this dataset
        # ============================================================================>
        log.bar()
        log('Masking symetry contacts from Z-map.')
        # Generate symmetry contacts for this dataset and align to reference frame
        dataset_sym_copies = dataset.model.crystal_contacts(
            distance_cutoff=args.params.masks.outer_mask + 5,
            combine_copies=True)
        dataset_sym_copies.atoms().set_xyz(
            dataset.model.alignment.nat2ref(
                dataset_sym_copies.atoms().extract_xyz()))
        # Only need to write if writing reference frame maps
        if args.output.developer.write_reference_frame_maps:
            dataset_sym_copies.write_pdb_file(
                dataset.file_manager.get_file('symmetry_copies'))
        # Extract protein atoms from the symmetry copies
        dataset_sym_sites_cart = non_water(
            dataset_sym_copies).atoms().extract_xyz()
        # Generate symmetry contacts grid mask
        dataset_mask = GridMask(parent=grid,
                                sites_cart=dataset_sym_sites_cart,
                                max_dist=args.params.masks.outer_mask,
                                min_dist=args.params.masks.inner_mask_symmetry)
        # Combine with the total mask to generate custom mask for this dataset
        dset_total_temp.put(dataset_mask.inner_mask_indices(), 0)
        dset_total_idxs = numpy.where(dset_total_temp)[0]
        log('After masking with symmetry contacts: {} points for Z-map analysis'
            .format(len(dset_total_idxs)))
        # Write map of grid + symmetry mask
        if args.output.developer.write_reference_frame_grid_masks:
            grid.write_indices_as_map(
                indices=dset_total_idxs,
                f_name=dataset.file_manager.get_file('grid_mask'),
                origin_shift=True)

        # ============================================================================>
        # Generate custom masks for this dataset
        # ============================================================================>
        if args.params.z_map_analysis.masks.selection_string is not None:
            log.bar()
            log('Applying custom mask to the Z-map: "{}"'.format(
                args.params.z_map_analysis.masks.selection_string))
            cache = dataset.model.hierarchy.atom_selection_cache()
            custom_mask_selection = cache.selection(
                args.params.z_map_analysis.masks.selection_string)
            custom_mask_sites = dataset.model.hierarchy.select(
                custom_mask_selection).atoms().extract_xyz()
            log('Masking with {} atoms'.format(len(custom_mask_sites)))
            # Generate custom grid mask
            dataset_mask = GridMask(
                parent=grid,
                sites_cart=custom_mask_sites,
                max_dist=args.params.z_map_analysis.masks.outer_mask,
                min_dist=args.params.z_map_analysis.masks.inner_mask)
            # Combine with the total mask to generate custom mask for this dataset
            dset_total_temp *= dataset_mask.total_mask_binary()
            dset_total_idxs = numpy.where(dset_total_temp)[0]
            log('After masking with custom mask: {} points for Z-map analysis'.
                format(len(dset_total_idxs)))
            # Write out mask
            grid.write_indices_as_map(
                indices=dset_total_idxs,
                f_name=dataset.file_manager.get_file('z_map_mask'),
                origin_shift=True)

        # ============================================================================>
        #####
        # CALCULATE Z-MAPS AND LOOK FOR LARGE BLOBS
        #####
        # ============================================================================>
        # Check maps and that all maps are sparse
        # ============================================================================>
        assert dataset_map.data is not None, 'Something has gone wrong - this dataset has no loaded map'
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.mean_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.medn_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.stds_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.sadj_map.is_sparse()
        # ============================================================================>
        # CALCULATE MEAN-DIFF MAPS
        # ============================================================================>
        mean_diff_map = map_analyser.calculate_z_map(map=dataset_map,
                                                     method='none')
        #        # ============================================================================>
        #        # NAIVE Z-MAP - NOT USING UNCERTAINTY ESTIMATION OR ADJUSTED STDS
        #        # ============================================================================>
        #        z_map_naive = map_analyser.calculate_z_map(map=dataset_map, method='naive')
        #        z_map_naive_normalised = z_map_naive.normalised_copy()
        # ============================================================================>
        # UNCERTAINTY Z-MAP - NOT USING ADJUSTED STDS
        # ============================================================================>
        z_map_uncty = map_analyser.calculate_z_map(
            map=dataset_map,
            uncertainty=dataset_map.meta.map_uncertainty,
            method='uncertainty')
        z_map_uncty_normalised = z_map_uncty.normalised_copy()
        # ============================================================================>
        # ADJUSTED+UNCERTAINTY Z-MAP
        # ============================================================================>
        z_map_compl = map_analyser.calculate_z_map(
            map=dataset_map,
            uncertainty=dataset_map.meta.map_uncertainty,
            method='adjusted+uncertainty')
        z_map_compl_normalised = z_map_compl.normalised_copy()

        # ============================================================================>
        # SELECT WHICH MAP TO DO THE BLOB SEARCHING ON
        # ============================================================================>
        #        if args.params.statistical_maps.z_map_type == 'naive':
        #            z_map = z_map_naive_normalised
        #            z_map_stats = basic_statistics(flex.double(z_map_naive.data))
        if args.params.statistical_maps.z_map_type == 'uncertainty':
            z_map = z_map_uncty_normalised
            z_map_stats = basic_statistics(flex.double(z_map_uncty.data))
        elif args.params.statistical_maps.z_map_type == 'adjusted+uncertainty':
            z_map = z_map_compl_normalised
            z_map_stats = basic_statistics(flex.double(z_map_compl.data))
        else:
            raise Exception('Invalid Z-map type')

        # ============================================================================>
        # RECORD Z-MAP FOR STATISTICS
        # ============================================================================>
        # Calculate statistics of z-maps
        dataset_map.meta.z_mean = z_map_stats.mean
        dataset_map.meta.z_stdv = z_map_stats.bias_corrected_standard_deviation
        dataset_map.meta.z_skew = z_map_stats.skew
        dataset_map.meta.z_kurt = z_map_stats.kurtosis
        # ============================================================================>
        z_map.meta.type = 'z-map'
        # ============================================================================>

        # ============================================================================>
        #####
        # WRITE ALL MAP DISTRIBUTIONS (THESE DON'T USE MUCH SPACE)
        #####
        # ============================================================================>
        # Sampled Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('s_map_png'),
            plot_vals=dataset_map.get_map_data(sparse=True))
        # Mean-Difference
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('d_mean_map_png'),
            plot_vals=mean_diff_map.get_map_data(sparse=True))
        #        # Naive Z-Map
        #        analyse_graphs.map_value_distribution(f_name      = dataset.file_manager.get_file('z_map_naive_png'),
        #                                              plot_vals   = z_map_naive.get_map_data(sparse=True),
        #                                              plot_normal = True)
        #        # Normalised Naive Z-Map
        #        analyse_graphs.map_value_distribution(f_name      = dataset.file_manager.get_file('z_map_naive_normalised_png'),
        #                                              plot_vals   = z_map_naive_normalised.get_map_data(sparse=True),
        #                                              plot_normal = True)
        # Uncertainty Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('z_map_uncertainty_png'),
            plot_vals=z_map_uncty.get_map_data(sparse=True),
            plot_normal=True)
        # Normalised Uncertainty Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file(
                'z_map_uncertainty_normalised_png'),
            plot_vals=z_map_uncty_normalised.get_map_data(sparse=True),
            plot_normal=True)
        # Corrected Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('z_map_corrected_png'),
            plot_vals=z_map_compl.get_map_data(sparse=True),
            plot_normal=True)
        # Normalised Corrected Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file(
                'z_map_corrected_normalised_png'),
            plot_vals=z_map_compl_normalised.get_map_data(sparse=True),
            plot_normal=True)
        # Plot Q-Q Plot of Corrected Z-Map to see how normal it is
        analyse_graphs.qq_plot_against_normal(
            f_name=dataset.file_manager.get_file('z_map_qq_plot_png'),
            plot_vals=z_map_compl_normalised.get_map_data(sparse=True))

        # ============================================================================>
        #####
        # LOOK FOR CLUSTERS OF LARGE Z-SCORES
        #####
        # ============================================================================>
        # Contour the grid at a particular Z-Value
        # ============================================================================>
        num_clusters, z_clusters = blob_finder.cluster_high_z_values(
            z_map_data=z_map.get_map_data(sparse=False),
            point_mask_idx=dset_total_idxs)
        # ============================================================================>
        # Too many points to cluster -- probably a bad dataset
        # ============================================================================>
        if num_clusters == -1:
            # This dataset is too noisy to analyse - flag!
            log_strs.append(
                'Z-Map too noisy to analyse -- not sure what has gone wrong here...'
            )
            return dataset, dataset_map.meta, log_strs

        # ============================================================================>
        #####
        # FILTER/SELECT CLUSTERS OF Z-SCORES
        #####
        # ============================================================================>
        # Filter the clusters by size and peak height
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_1(
                z_clusters=z_clusters)
            blob_finder.validate_clusters(z_clusters)
            if num_clusters == 0:
                log_strs.append('===> Minimum cluster peak/size not reached.')
        # ============================================================================>
        # Filter the clusters by distance from protein
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_2(
                z_clusters=z_clusters, dataset=dataset)
            blob_finder.validate_clusters(z_clusters)
            if num_clusters == 0:
                log_strs.append('===> Clusters too far from protein.')
        # ============================================================================>
        # Group Nearby Clusters Together
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.group_clusters(
                z_clusters=z_clusters)
            blob_finder.validate_clusters(z_clusters)
        # ============================================================================>
        # Filter the clusters by symmetry equivalence
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_3(
                z_clusters=z_clusters, dataset=dataset)
            blob_finder.validate_clusters(z_clusters)

        # ============================================================================>
        #####
        # WRITE MAPS
        #####
        # ============================================================================>
        # write dataset maps in the reference frame
        # ============================================================================>
        if args.output.developer.write_reference_frame_maps:
            dataset_map.to_file(
                filename=dataset.file_manager.get_file('sampled_map'),
                space_group=grid.space_group())
            mean_diff_map.to_file(
                filename=dataset.file_manager.get_file('mean_diff_map'),
                space_group=grid.space_group())
            z_map.to_file(filename=dataset.file_manager.get_file('z_map'),
                          space_group=grid.space_group())
        # ============================================================================>
        # Write out mask of the high z-values
        # ============================================================================>
        if args.output.developer.write_reference_frame_grid_masks:
            # Write map of where the blobs are (high-Z mask)
            highz_points = []
            [highz_points.extend(list(x[0])) for x in z_clusters]
            highz_points = [map(int, v) for v in highz_points]
            highz_indices = map(grid.indexer(), list(highz_points))
            grid.write_indices_as_map(
                indices=highz_indices,
                f_name=dataset.file_manager.get_file('high_z_mask'),
                origin_shift=True)
        # ============================================================================>
        # Write different Z-Maps? (Probably only needed for testing)
        # ============================================================================>
        if args.output.developer.write_reference_frame_all_z_map_types:
            #            z_map_naive.to_file(filename=dataset.file_manager.get_file('z_map_naive'), space_group=grid.space_group())
            #            z_map_naive_normalised.to_file(filename=dataset.file_manager.get_file('z_map_naive_normalised'), space_group=grid.space_group())
            z_map_uncty.to_file(
                filename=dataset.file_manager.get_file('z_map_uncertainty'),
                space_group=grid.space_group())
            z_map_uncty_normalised.to_file(
                filename=dataset.file_manager.get_file(
                    'z_map_uncertainty_normalised'),
                space_group=grid.space_group())
            z_map_compl.to_file(
                filename=dataset.file_manager.get_file('z_map_corrected'),
                space_group=grid.space_group())
            z_map_compl_normalised.to_file(
                filename=dataset.file_manager.get_file(
                    'z_map_corrected_normalised'),
                space_group=grid.space_group())

        # ============================================================================>
        # Skip to next dataset if no clusters found
        # ============================================================================>
        if num_clusters > 0:
            log_strs.append('===> {!s} Cluster(s) found.'.format(num_clusters))
        else:
            log_strs.append('===> No Clusters found.')
            return (dataset, dataset_map.meta, log_strs)
        assert num_clusters > 0, 'NUMBER OF CLUSTERS AFTER FILTERING == 0!'

        # ============================================================================>
        # Extract the map data in non-sparse format
        # ============================================================================>
        dset_map_data = dataset_map.get_map_data(sparse=False)
        avrg_map_data = map_analyser.average_map().get_map_data(sparse=False)
        # ============================================================================>
        # Process the identified features
        # ============================================================================>
        for event_idx, (event_points, event_values) in enumerate(z_clusters):
            # Number events from 1
            event_num = event_idx + 1
            # Create a unique identifier for this event
            event_key = (dataset.tag, event_num)
            # ============================================================================>
            # Create a point cluster object
            # ============================================================================>
            point_cluster = PointCluster(id=event_key,
                                         points=event_points,
                                         values=event_values)
            # ============================================================================>
            # Estimate the background correction of the detected feature
            # ============================================================================>
            # Extract sites for this cluster and estimate the background correction for the event
            log_strs.append('----------------------------------->>>')
            log_strs.append(
                'Estimating Event {!s} Background Correction'.format(
                    event_num))
            # Generate custom grid mask for this dataset
            event_mask = GridMask(parent=grid,
                                  sites_cart=grid.grid2cart(
                                      point_cluster.points, origin_shift=True),
                                  max_dist=2.0,
                                  min_dist=0.0)
            log_strs.append(
                '=> Event sites ({!s} points) expanded to {!s} points'.format(
                    len(point_cluster.points),
                    len(event_mask.outer_mask_indices())))
            # Select masks to define regions for bdc calculation
            exp_event_idxs = flex.size_t(event_mask.outer_mask_indices())
            reference_idxs = flex.size_t(
                grid.global_mask().inner_mask_indices())
            # ============================================================================>
            # Generate BDC-estimation curve and estimate BDC
            # ============================================================================>
            event_remains, event_corrs, global_corrs = calculate_varying_bdc_correlations(
                ref_map_data=avrg_map_data,
                query_map_data=dset_map_data,
                feature_idxs=exp_event_idxs,
                reference_idxs=reference_idxs,
                min_remain=1.0 - args.params.background_correction.max_bdc,
                max_remain=1.0 - args.params.background_correction.min_bdc,
                bdc_increment=args.params.background_correction.increment,
                verbose=verbose)
            event_remain_est = calculate_maximum_series_discrepancy(
                labels=event_remains,
                series_1=global_corrs,
                series_2=event_corrs)
            analyse_graphs.write_occupancy_graph(
                f_name=dataset.file_manager.get_file('bdc_est_png').format(
                    event_num),
                x_values=event_remains,
                global_values=global_corrs,
                local_values=event_corrs)
            log_strs.append(
                '=> Event Background Correction estimated as {!s}'.format(
                    1 - event_remain_est))
            # Reporting (log is normally silenced)
            blob_finder.log('Min-Max: {} {}'.format(
                1.0 - args.params.background_correction.max_bdc,
                1.0 - args.params.background_correction.min_bdc))
            blob_finder.log('Event number: {}'.format(event_num))
            blob_finder.log('Event Remains: {}'.format(','.join(
                map(str, event_remains))))
            blob_finder.log('Event Corrs:  {}'.format(','.join(
                map(str, event_corrs))))
            blob_finder.log('Global Corrs: {}'.format(','.join(
                map(str, global_corrs))))
            # Apply multiplier if provided
            blob_finder.log('Applying multiplier to output 1-BDC: {}'.format(
                args.params.background_correction.output_multiplier))
            event_remain_est = min(
                event_remain_est *
                args.params.background_correction.output_multiplier,
                1.0 - args.params.background_correction.min_bdc)
            # ============================================================================>
            # Calculate the map correlations at the selected BDC
            # ============================================================================>
            event_map_data = calculate_bdc_subtracted_map(
                ref_map_data=avrg_map_data,
                query_map_data=dset_map_data,
                bdc=1.0 - event_remain_est)
            global_corr = numpy.corrcoef(
                event_map_data.select(reference_idxs),
                avrg_map_data.select(reference_idxs))[0, 1]
            local_corr = numpy.corrcoef(
                event_map_data.select(exp_event_idxs),
                avrg_map_data.select(exp_event_idxs))[0, 1]
            # ============================================================================>
            # Write out EVENT map (in the reference frame) and grid masks
            # ============================================================================>
            if args.output.developer.write_reference_frame_maps:
                event_map = dataset_map.new_from_template(event_map_data,
                                                          sparse=False)
                event_map.to_file(
                    filename=dataset.file_manager.get_file('event_map').format(
                        event_num, event_remain_est),
                    space_group=grid.space_group())
            if args.output.developer.write_reference_frame_grid_masks:
                grid.write_indices_as_map(
                    indices=event_mask.outer_mask_indices(),
                    f_name=dataset.file_manager.get_file('grid_mask').replace(
                        '.ccp4', '') + '-event-mask-{}.ccp4'.format(event_num))

            # ============================================================================>
            # Find the nearest atom to the event
            # ============================================================================>
            atm = find_nearest_atoms(atoms=list(
                protein(dataset.model.hierarchy).atoms_with_labels()),
                                     query=dataset.model.alignment.ref2nat(
                                         grid.grid2cart(sites_grid=[
                                             map(int, point_cluster.centroid)
                                         ],
                                                        origin_shift=True)))[0]
            log_strs.append(
                '=> Nearest Residue to event: Chain {}, Residue {} {}'.format(
                    atm.chain_id, atm.resname, atm.resid()))
            # ============================================================================>
            # Create an event object
            # ============================================================================>
            event_obj = Event(id=point_cluster.id, cluster=point_cluster)
            event_obj.info.estimated_pseudo_occupancy = event_remain_est
            event_obj.info.estimated_bdc = 1.0 - event_remain_est
            event_obj.info.global_correlation = global_corr
            event_obj.info.local_correlation = local_corr
            # ============================================================================>
            # Append to dataset handler
            # ============================================================================>
            dataset.events.append(event_obj)

        # ============================================================================>
        # Write out pymol script to load all of the maps easily
        # ============================================================================>
        pml = PythonScript()
        pml.set_normalise_maps(False)
        # Load Structures
        name = pml.load_pdb(
            f_name=dataset.file_manager.get_file('aligned_model'))
        pml.repr_as(obj=name, style='sticks')
        name = pml.load_pdb(
            f_name=dataset.file_manager.get_file('symmetry_copies'))
        pml.repr_hide(obj=name)
        # Load Sampled Map
        name = pml.load_map(
            f_name=dataset.file_manager.get_file('sampled_map'))
        mesh = pml.make_mesh(obj=name, contour_level=1.0, colour='blue')
        # Load Z-maps
        name = pml.load_map(f_name=dataset.file_manager.get_file('z_map'))
        mesh = pml.make_mesh(obj=name,
                             mesh_suffix='.plus',
                             contour_level=3.0,
                             colour='green')
        mesh = pml.make_mesh(obj=name,
                             mesh_suffix='.mins',
                             contour_level=-3.0,
                             colour='red')
        # Load Event maps
        for f in sorted(
                glob.glob(
                    dataset.file_manager.get_file('event_map').format(
                        '*', '*'))):
            name = pml.load_map(f_name=f)
            mesh = pml.make_mesh(obj=name,
                                 contour_level=float(f.split('_')[-2]),
                                 colour='hotpink')
        # Load Miscellaneous maps (e.g. masks)
        for f in sorted(
                glob.glob(
                    os.path.join(dataset.file_manager.get_dir('root'),
                                 '*mask*.ccp4'))):
            name = pml.load_map(f_name=f)
            mesh = pml.make_mesh(obj=name, contour_level=0.0, colour='grey')

        pml.write_script(f_name=dataset.file_manager.get_file('pymol_script'),
                         overwrite=True)

        return (dataset, dataset_map.meta, log_strs)
Exemplo n.º 15
0
def run(params):

    ######################################################################
    # Validate input
    ######################################################################

    assert params.input.pdb, 'No PDB File Provided'

    if params.modes.all:
        params.modes.peptide_bond_links = True
        params.modes.duplicated_atom_restraints = True
        params.modes.local_structure_restraints = True
        params.modes.occupancy_groups = True
        params.modes.b_factor_restraints = True

    if params.modes.peptide_bond_links:
        link_file = os.path.splitext(
            params.input.pdb)[0] + params.peptide_bonds.suffix
    if params.modes.duplicated_atom_restraints:
        pass
    if params.modes.local_structure_restraints:
        pass
    if params.modes.occupancy_groups:
        pass
    if params.modes.b_factor_restraints:
        pass

    ######################################################################
    # Prepare output and input
    ######################################################################
    if params.output.phenix and os.path.exists(params.output.phenix):
        if params.settings.overwrite: os.remove(params.output.phenix)
        else:
            raise Exception('File already exists: {}'.format(
                params.output.phenix))
    if params.output.refmac and os.path.exists(params.output.refmac):
        if params.settings.overwrite: os.remove(params.output.refmac)
        else:
            raise Exception('File already exists: {}'.format(
                params.output.refmac))

    # Open log file
    if params.output.log:
        log = Log(log_file=params.output.log, verbose=params.settings.verbose)
    else:
        log = Log(verbose=params.settings.overwrite)

    # Read input files
    pdb_obj = iotbx.pdb.hierarchy.input(params.input.pdb)
    pdb_obj.hierarchy.sort_atoms_in_place()

    ######################################################################
    # Generate restraints
    ######################################################################

    if params.modes.peptide_bond_links:
        make_link_records(params=params,
                          input_hierarchy=pdb_obj,
                          link_file=link_file,
                          log=log)

    if params.modes.duplicated_atom_restraints:
        make_duplication_restraints(params=params,
                                    input_hierarchy=pdb_obj,
                                    log=log)

    if params.modes.local_structure_restraints:
        make_local_restraints(params=params, input_hierarchy=pdb_obj, log=log)

    if params.modes.occupancy_groups:
        make_occupancy_constraints(params=params,
                                   input_hierarchy=pdb_obj,
                                   log=log)

    if params.modes.b_factor_restraints:
        make_b_factor_restraints(params=params,
                                 input_hierarchy=pdb_obj,
                                 log=log)
Exemplo n.º 16
0
class BFactorRefinementFactory(object):

    _refine = refine_phenix

    def __init__(self,
                 pdb_file,
                 mtz_file,
                 out_dir,
                 cif_files=[],
                 tag=None,
                 tls_selections=None,
                 prefix='refined'):

        self.pdb_file = pdb_file
        self.mtz_file = mtz_file
        self.cif_files = cif_files
        self.out_dir = easy_directory(out_dir)
        self.tag = tag
        self.tls_selections = []
        self.tls_matrices = None

        self.initial_pdb = os.path.join(self.out_dir, 'initial.pdb')
        self.out_template = os.path.join(self.out_dir, prefix)

        shutil.copy(self.pdb_file, self.initial_pdb)

        self.log = Log(verbose=True)

        if not tls_selections:
            tls_selections = self.determine_tls_groups(pdb_file=pdb_file)

        # Sanitise the tls selections
        for tls in tls_selections:
            if tls.startswith('"') and tls.endswith('"'):
                tls = tls[1:-1]
            assert '\"' not in tls, 'TLS selection cannot include \": {}'.format(
                tls)
            self.tls_selections.append(tls)

    def determine_tls_groups(self, pdb_file):

        self.log.subheading('Determining TLS groups for: {}'.format(pdb_file))

        tls_selections = phenix_find_tls_groups(pdb_file)

        self.log.subheading('Identified TLS Selections:')
        for s in tls_selections:
            self.log(s)

        return tls_selections

#    def initial_tls_parameters(self):
#        """Characterise TLS with phenix.tls - legacy function"""
#
#        self.log.subheading('Fitting TLS Matrices to selections')
#        self.log('writing to output file: {}'.format(self.tls_initial_pdb))
#
#        cmd = CommandManager('phenix.tls')
#        cmd.add_command_line_arguments(self.pdb_file)
#        cmd.add_command_line_arguments(self.cif_files)
#        cmd.add_command_line_arguments('extract_tls=True')
#        cmd.add_command_line_arguments([r'selection="{}"'.format(s) for s in self.tls_selections if s is not None])
#        cmd.add_command_line_arguments('output_file_name={}'.format(self.tls_initial_pdb))
#
#        cmd.print_settings()
#        ret_code = cmd.run()
#        cmd.write_output(self.tls_initial_pdb.replace('.pdb', '.log'))
#
#        if ret_code != 0:
#            self.log(cmd.output)
#            self.log(cmd.error)
#            raise Exception('Failed to determine TLS parameters: {}'.format(' '.join(cmd.program)))
#
#        return self.tls_initial_pdb, self.extract_tls_from_pdb(self.tls_initial_pdb)

    def refine_b_factors(self, mode='tls', suffix=None):
        """Refine the model with phenix.refine, including the TLS model"""

        assert mode in ['isotropic', 'tls', 'anisotropic']

        if suffix is None: suffix = mode

        strategy = "individual_sites+individual_adp+occupancies"

        if mode == 'isotropic':
            strategy += ''
            params = [r'convert_to_isotropic=True']
        elif mode == 'tls':
            strategy += '+tls'
            params = [
                r'refinement.refine.adp.tls="{}"'.format(t)
                for t in self.tls_selections
            ]
        else:
            strategy += ''
            params = [
                r'refinement.refine.adp.individual.anisotropic="{}"'.format(
                    ' or '.join(['(' + t + ')' for t in self.tls_selections]))
            ]

        self.log.subheading('Refining B-factor model with {}'.format(
            self._refine.program))
        obj = self._refine(pdb_file=self.pdb_file,
                           mtz_file=self.mtz_file,
                           cif_file=self.cif_files,
                           out_prefix=self.out_template + '-' + suffix,
                           strategy=strategy,
                           n_cycles=3,
                           manual_args=params)

        return obj.out_pdb_file, obj.out_mtz_file

    @staticmethod
    def extract_tls_from_pdb(pdb_file):
        ih = iotbx.pdb.hierarchy.input(pdb_file)
        tls_params = ih.input.extract_tls_params(ih.hierarchy)
        return tls_params

    def show_tls_params(self, tls_params=None, pdb_file=None):
        if pdb_file: tls_params = self.extract_tls_from_pdb(pdb_file=pdb_file)
        T = tls_params.tls_params[0].t
        L = tls_params.tls_params[0].l
        S = tls_params.tls_params[0].s

        o = ""
        for tls in tls_params.tls_params:
            o += '\n'
            o += 'selection: {}\n'.format(tls.selection_string)
            o += 'origin: {}\n'.format(tls.origin)
            o += 'T: ' + str(tls.t) + '\n'
            o += 'L: ' + str(tls.l) + '\n'
            o += 'S: ' + str(tls.s) + '\n'
        o += '\n'
        self.log(o)
Exemplo n.º 17
0
def run(params):

    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + '*'))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, '') for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + '{:04}'.format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(log_file=os.path.join(
        out_dir, params.output.out_prefix + '.quick-refine.log'),
              verbose=params.settings.verbose)

    # Report
    if current_dirs:
        log('Found existing refinement directories: \n\t{}'.format(
            '\n\t'.join(current_dirs)))
        log('')
    log('Creating new output directory: {}'.format(out_dir))

    # Validate input parameters
    log.subheading('Validating input parameters')
    assert params.input.pdb is not None, 'No PDB given for refinement'
    assert params.input.mtz is not None, 'No MTZ given for refinement'

    if os.path.islink(params.input.mtz):
        log('Converting mtz path to real path:')
        log('{} -> {}'.format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log('Copying/linking files to refinement folder')
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, 'input.pdb')))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, 'input.mtz')))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, 'input.params')))

    # Create output prefixes
    output_prefix = os.path.join(out_dir, params.output.out_prefix)
    log('Real output file path prefixes: {}'.format(output_prefix))
    log('Link output file path prefixes: {}'.format(params.output.link_prefix))

    # Create command objects
    log.subheading('Preparing command line input for refinement program')

    # PHENIX
    if params.options.program == 'phenix':
        cm = CommandManager('phenix.refine')
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ['output.prefix={}'.format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == 'refmac':
        cm = CommandManager('refmac5')
        # Command line args
        cm.add_command_line_arguments(
            ['xyzin', params.input.pdb, 'hklin', params.input.mtz])

        cm.add_command_line_arguments([
            'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz'
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['libin', cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split('\n'))

        cm.add_standard_input(['END'])

    elif params.options.program == "buster":
        cm = CommandManager('refine')
        # Command line arguments
        # inputs
        cm.add_command_line_arguments(
            ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir])

        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['-l', cif])

        if params.input.params:
            cm.add_command_line_arguments(['-Gelly', params.input.params])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log('running refinement... ({})'.format(cm.program[0]))
    out = cm.run()

    log.subheading('Refinement output')
    if not log.verbose:
        log('output written to log file ({} lines)'.format(
            cm.output.count('\n')))

    log('\n' + cm.output, show=False)

    if out != 0:
        log.subheading('Refinement Errors')
        log(cm.error)

    log.subheading('Post-processing output files')

    if params.options.program == "buster":
        log.subheading('Renaming buster output files')

        shutil.move(src=os.path.join(out_dir, 'refine.pdb'),
                    dst=output_prefix + '.pdb')

        shutil.move(src=os.path.join(out_dir, 'refine.mtz'),
                    dst=output_prefix + '.mtz')

    # Find output files
    try:
        real_pdb = glob.glob(output_prefix + '*.pdb')[0]
        real_mtz = glob.glob(output_prefix + '*.mtz')[0]
    except:
        log('Refinement has failed - output files do not exist')
        log('{}: {}'.format(output_prefix + '*.pdb',
                            glob.glob(output_prefix + '*.pdb')))
        log('{}: {}'.format(output_prefix + '*.mtz',
                            glob.glob(output_prefix + '*.mtz')))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'),
                       (real_mtz, params.output.link_prefix + '.mtz')]

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading('Splitting refined structure conformations')
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ''))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading('linking output files')
    for real_file, link_file in link_file_pairs:
        log('Linking {} -> {}'.format(link_file, real_file))
        if not os.path.exists(real_file):
            log('file does not exist: {}'.format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log('removing existing link: {}'.format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading('finished - refinement')
Exemplo n.º 18
0
def split_conformations(filename, params, log=None):

    if log is None: log = Log(verbose=True)

    # Read the pdb header - for writing later...
    header_contents = get_pdb_header(filename)

    # Read in and validate the input file
    ens_obj = strip_pdb_to_input(filename, remove_ter=True)
    ens_obj.hierarchy.only_model()

    # Create a new copy of the structures
    new_ens = ens_obj.hierarchy.deep_copy()

    # Extract conformers from the structure as set
    all_confs = set(ens_obj.hierarchy.altloc_indices())
    all_confs.discard('')

    if params.options.mode == 'by_residue_name':
        sel_resnames = params.options.by_residue_name.resname.split(',')
        sel_confs = [
            ag.altloc for ag in new_ens.atom_groups()
            if (ag.resname in sel_resnames)
        ]
        # List of conformers to output for each structure, and suffixes
        out_confs = map(sorted, [
            all_confs.intersection(sel_confs),
            all_confs.difference(sel_confs)
        ])
        out_suffs = [
            params.options.by_residue_name.selected_name,
            params.options.by_residue_name.unselected_name
        ]
    elif params.options.mode == 'by_conformer':
        sel_resnames = None
        sel_confs = None
        # One structure for each conformer
        out_confs = [[c] for c in sorted(all_confs)]
        out_suffs = [''.join(c) for c in out_confs]
    elif params.options.mode == 'by_conformer_group':
        sel_resnames = None
        sel_confs = None
        # One structure for each set of supplied conformer sets
        out_confs = [
            s.split(',') for s in params.options.by_conformer_group.conformers
        ]
        out_suffs = [''.join(c) for c in out_confs]
    else:
        raise Exception('Invalid selection for options.mode: {}'.format(
            params.options.mode))

    assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format(
        str(out_confs), str(out_suffs))

    for confs, suffix in zip(out_confs, out_suffs):
        log('Conformers {} -> {}'.format(str(confs), suffix))

    # Create paths from the suffixes
    out_paths = [
        '.'.join([
            os.path.splitext(filename)[0], params.output.suffix_prefix, suff,
            'pdb'
        ]) for suff in out_suffs
    ]

    log.subheading('Processing {}'.format(filename[-70:]))

    for this_confs, this_path in zip(out_confs, out_paths):

        if not this_confs: continue

        # Select atoms to keep - no altloc, or altloc in selection
        sel_string = ' or '.join(
            ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs])
        # Extract selection from the hierarchy
        sel_hiery = new_ens.select(
            new_ens.atom_selection_cache().selection(sel_string),
            copy_atoms=True)

        log.bar(True, False)
        log('Outputting conformer(s) {} to {}'.format(''.join(this_confs),
                                                      this_path))
        log.bar()
        log('Keeping ANY atom with conformer id: {}'.format(
            ' or '.join(['" "'] + this_confs)))
        log('Selection: \n\t' + sel_string)

        if params.options.pruning.prune_duplicates:
            log.bar()
            log('Pruning redundant conformers')
            # Remove an alternate conformers than are duplicated after selection
            prune_redundant_alternate_conformations(
                hierarchy=sel_hiery,
                required_altlocs=[a for a in sel_hiery.altloc_indices() if a],
                rmsd_cutoff=params.options.pruning.rmsd_cutoff,
                in_place=True,
                verbose=params.settings.verbose)

        if params.options.reset_altlocs:
            log.bar()
            # Change the altlocs so that they start from "A"
            if len(this_confs) == 1:
                conf_hash = {this_confs[0]: ' '}
            else:
                conf_hash = dict(
                    zip(this_confs, iotbx.pdb.systematic_chain_ids()))
            log('Resetting structure altlocs:')
            for k in sorted(conf_hash.keys()):
                log('\t{} -> "{}"'.format(k, conf_hash[k]))
            if params.settings.verbose: log.bar()
            for ag in sel_hiery.atom_groups():
                if ag.altloc in this_confs:
                    if params.settings.verbose:
                        log('{} -> alt {}'.format(Labeller.format(ag),
                                                  conf_hash[ag.altloc]))
                    ag.altloc = conf_hash[ag.altloc]

        if params.options.reset_occupancies:
            log.bar()
            log('Resetting output occupancies (maximum occupancy of 1.0, etc.)'
                )
            # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one
            rg_occs = [
                calculate_residue_group_occupancy(rg) for rg in
                residue_groups_with_complete_set_of_conformers(sel_hiery)
            ]
            non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0]
            if non_uni:
                div_occ = min(non_uni)
                log('Dividing all occupancies by {}'.format(div_occ))
                sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() /
                                          div_occ)
            # Normalise the occupancies of any residue groups with more than unitary occupancy
            log('Fixing any residues that have greater than unitary occupancy')
            sanitise_occupancies(hierarchy=sel_hiery,
                                 min_occ=0.0,
                                 max_occ=1.0,
                                 in_place=True,
                                 verbose=params.settings.verbose)
            # Perform checks
            max_occ = max([
                calculate_residue_group_occupancy(rg)
                for rg in sel_hiery.residue_groups()
            ])
            log('Maximum occupancy of output structue: {}'.format(max_occ))
            assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!'
            assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!'

        log.bar()
        log('Writing structure: {}'.format(this_path))
        log.bar(False, True)

        # Write header contents
        with open(this_path, 'w') as fh:
            fh.write(header_contents)
        # Write output file
        sel_hiery.write_pdb_file(this_path, open_append=True)

    return out_paths
Exemplo n.º 19
0
def run(params):

    log = Log(log_file=params.output.log_file, verbose=True)

    # Process MTZs
    if params.input.mtz:

        log.heading('Processing {} MTZ Files'.format(len(params.input.mtz)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.mtz]
        else: raise Exception('MTZ labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} mtz files by space group'.format(len(params.input.mtz)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_mtz(mtz_file=f, id=lab) for f,lab in zip(params.input.mtz, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))
        log.bar()

        for cg in crystal_groups:

            log.subheading('Space group {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            error = False
            for c in cg.crystals:
                for label in params.check_for.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Checking: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                for label in params.summary.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Required: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                        error = True
            if error is True: raise Sorry('There are datasets that do not contain the right columns.')

            log(crystal_statistics('Wavelength',         cg.crystals, value_func=lambda c: c.mtz_object().crystals()[1].datasets()[0].wavelength(), header=True))
            log(crystal_statistics('Resolution (high)',  cg.crystals, value_func=lambda c: c.high_res,                                              header=False))
            log(crystal_statistics('Resolution (low)',   cg.crystals, value_func=lambda c: c.low_res,                                               header=False))
            log(crystal_statistics('Unit cell - vol',    cg.crystals, value_func=lambda c: c.unit_cell.volume(),                                    header=False))
            log(crystal_statistics('Unit cell - a',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[0],                             header=False))
            log(crystal_statistics('Unit cell - b',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[1],                             header=False))
            log(crystal_statistics('Unit cell - c',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[2],                             header=False))
            log(crystal_statistics('Unit cell - alpha',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[3],                             header=False))
            log(crystal_statistics('Unit cell - beta',   cg.crystals, value_func=lambda c: c.unit_cell.parameters()[4],                             header=False))
            log(crystal_statistics('Unit cell - gamma',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[5],                             header=False, footer=True))

            for label in params.summary.column_label:
                if label is None: continue
                log(crystal_statistics('Column: {}'.format(label), cg.crystals, value_func=lambda c: c.mtz_object().get_column(label).n_valid_values(),     header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('Resolution', cg.crystals, value_func=lambda c: c.high_res))

    # Process PDBs
    if params.input.pdb:

        log.heading('Processing {} PDB Files'.format(len(params.input.pdb)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.pdb]
        else: raise Exception('PDB labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} pdb files by space group'.format(len(params.input.pdb)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_pdb(pdb_file=f, id=lab) for f,lab in zip(params.input.pdb, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))

        for cg in crystal_groups:

            log.subheading('Space group: {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            log(crystal_statistics('R-work', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_work, header=True))
            log(crystal_statistics('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free, header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('R-free',     cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free))

    log.heading('finished')
Exemplo n.º 20
0
def run(params):

    # Validate input files
    if not (params.input.pdb or params.input.mtz):
        raise Sorry(
            'No pdb/mtz files have been provided: specify with input.pdb or input.mtz'
        )
    # Check and create output directory
    if not params.output.out_dir:
        raise Sorry(
            'No output directory has been specified: specify with output.out_dir'
        )
    if not os.path.exists(params.output.out_dir):
        os.mkdir(params.output.out_dir)
    # Define and create image directory
    img_dir = os.path.join(params.output.out_dir, 'dendrograms')
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)

    # Create log object
    log = Log(log_file=params.output.out_dir + '.clustering.log', verbose=True)

    # Define output_file_function to copy or symlink files as needed
    if params.output.file_mode == 'symlink':
        out_file_func = os.symlink
    elif params.output.file_mode == 'copy':
        out_file_func = shutil.copy

    log.heading('Processing input pdb/mtz files')
    log('Making dataset labels for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    try:
        if params.input.labels.pdb_label == 'filename':
            p_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.pdb
            ]
        elif params.input.labels.pdb_label == 'foldername':
            p_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.pdb
            ]
        elif params.input.labels.pdb_regex:
            p_labels = [
                re.findall(params.input.labels.pdb_regex, f)[0]
                for f in params.input.pdb
            ]
        else:
            p_labels = [
                'PDB-{:06d}'.format(i) for i in range(len(params.input.pdb))
            ]
        if params.input.labels.mtz_label == 'filename':
            m_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.mtz
            ]
        elif params.input.labels.mtz_label == 'foldername':
            m_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.mtz
            ]
        elif params.input.labels.mtz_regex:
            m_labels = [
                re.findall(params.input.labels.mtz_regex, f)[0]
                for f in params.input.mtz
            ]
        else:
            m_labels = [
                'MTZ-{:06d}'.format(i) for i in range(len(params.input.mtz))
            ]
    except:
        print 'Error reading file: {}'.format(f)
        raise

    # Check labels are unique
    set_m_labels = set(m_labels)
    set_p_labels = set(p_labels)
    if len(set_m_labels) != len(m_labels):
        raise Sorry('MTZ labels are not unique. Repeated labels: {}'.format(
            ' '.join([
                '{}'.format(l) for l in set_m_labels if m_labels.count(l) != 1
            ])))
    if len(set_p_labels) != len(p_labels):
        raise Sorry('PDB labels are not unique. Repeated labels: {}'.format(
            ' '.join([l for l in set_p_labels if p_labels.count(l) != 1])))

    # Report labels
    if p_labels:
        log.subheading('PDB Labels')
        log(', '.join(p_labels))
    if m_labels:
        log.subheading('MTZ Labels')
        log(', '.join(m_labels))

    # Load crystal summaries
    log.bar(True, True)
    log('Reading data for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    if params.input.pdb:
        pdb_summaries = [
            CrystalSummary.from_pdb(pdb_file=f, id=lab)
            for f, lab in zip(params.input.pdb, p_labels)
        ]
    else:
        pdb_summaries = []
    if params.input.mtz:
        mtz_summaries = [
            CrystalSummary.from_mtz(mtz_file=f, id=lab)
            for f, lab in zip(params.input.mtz, m_labels)
        ]
    else:
        mtz_summaries = []

    # Group by SpaceGroup
    log.subheading('Grouping {} crystals by space group...'.format(
        len(pdb_summaries + mtz_summaries)))
    crystal_groups = CrystalGroup.by_space_group(crystals=pdb_summaries +
                                                 mtz_summaries)
    log('Grouped crystals into {} space groups'.format(len(crystal_groups)))

    log.heading('Analysing variation of unit cells for each space group')

    for cg in crystal_groups:

        sg_name = 'sg-{}'.format(cg.space_groups[0].split(' (')[0].replace(
            ' ', '_'))

        log.subheading('Space Group {}: {} dataset(s)'.format(
            cg.space_groups[0], len(cg.crystals)))

        log('Unit Cell Variation:')
        log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

        log('')
        log('Making unit cell dendrogram for all crystals with this spacegroup'
            )
        if len(cg.crystals) > 1:
            cg.dendrogram(fname=os.path.join(img_dir,
                                             '{}-all.png'.format(sg_name)),
                          xlab='Crystal',
                          ylab='Linear Cell Variation',
                          annotate_y_min=params.clustering.label_nodes_above)

        log('')
        log('Clustering {} unit cells...'.format(len(cg.crystals)))
        sg_crystal_groups = cg.by_unit_cell(
            cg.crystals, cutoff=params.clustering.lcv_cutoff)
        log('Clustered crystals into {} groups'.format(len(sg_crystal_groups)))

        for i_cg2, cg2 in enumerate(sg_crystal_groups):

            cluster_name = '{}-cluster-{}'.format(sg_name, i_cg2 + 1)

            log.bar(True, False)
            log('Processing cluster: {}'.format(cluster_name))
            log.bar(False, True)

            log('Unit Cell Variation:')
            log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

            log('')
            log('Making unit cell dendrogram for this cluster of crystals')
            if len(cg2.crystals) > 1:
                cg2.dendrogram(
                    fname=os.path.join(img_dir, '{}.png'.format(cluster_name)),
                    xlab='Crystal',
                    ylab='Linear Cell Variation',
                    ylim=(0, params.clustering.lcv_cutoff),
                    annotate_y_min=params.clustering.label_nodes_above)

            log('Copying files to output directory')

            # Go through and link the datasets for each of the spacegroups into a separate folder
            sub_dir = os.path.join(params.output.out_dir, cluster_name)
            if not os.path.exists(sub_dir): os.mkdir(sub_dir)

            # Split the mtzs and pdbs into separate directories -- or not
            if params.output.split_pdbs_and_mtzs:
                mtz_dir = os.path.join(sub_dir, 'mtzs')
                if not os.path.exists(mtz_dir): os.mkdir(mtz_dir)
                pdb_dir = os.path.join(sub_dir, 'pdbs')
                if not os.path.exists(pdb_dir): os.mkdir(pdb_dir)
            else:
                mtz_dir = pdb_dir = sub_dir

            for c in cg2.crystals:
                # Set parameters based on pdb or mtz
                if c.mtz_file:
                    sub_sub_dir = os.path.join(mtz_dir, c.id)
                    def_file = os.path.abspath(c.mtz_file)
                    def_suff = '.mtz'
                    pos_suff = '.pdb'
                elif c.pdb_file:
                    sub_sub_dir = os.path.join(pdb_dir, c.id)
                    def_file = os.path.abspath(c.pdb_file)
                    def_suff = '.pdb'
                    pos_suff = '.mtz'
                # Create subdirectory
                if not os.path.exists(sub_sub_dir): os.mkdir(sub_sub_dir)
                # Output file base template
                out_base = os.path.join(sub_sub_dir, c.id)
                # Export file
                out_file = out_base + def_suff
                if not os.path.exists(out_file):
                    out_file_func(def_file, out_file)
                # output other as well if filenames are the same
                pos_file = def_file.replace(def_suff, pos_suff)
                out_file = out_base + pos_suff
                if os.path.exists(pos_file) and not os.path.exists(out_file):
                    out_file_func(pos_file, out_file)

    log.heading('finished')
Exemplo n.º 21
0
def process_and_export_folder(dir, params, log=Log()):
    """Merge structures, transform them and export a subset of a folders contents"""

    dir_name = os.path.basename(dir)
    log.heading('Processing directory: {}'.format(dir_name), spacer=True)

    # Check to see if this folder should be skipped (export fitted folders only)
    if params.options.required_file_for_export == 'model':
        if not os.path.exists(
                os.path.join(
                    dir, 'modelled_structures',
                    PanddaDatasetFilenames.modelled_structure.format(
                        dir_name))):
            log('No modelled structure in modelled_structures folder.')
            log('SKIPPING: {}'.format(dir))
            return

    ############################################################################
    # Export the pandda folder to output directory
    ############################################################################

    log.subheading('Exporting folder: {}'.format(dir))
    exp_dir = export_folder(dir=dir, params=params, log=log)

    ############################################################################
    # Merge input and pandda-modelled structures
    ############################################################################

    # Extract parameters for the merging and set them
    merging_params = merge_conformations.master_phil.extract()
    merging_params.input.major = os.path.join(
        exp_dir, params.output.file_prefix +
        PanddaDatasetFilenames.input_model.format(dir_name))
    merging_params.input.minor = os.path.join(
        exp_dir, params.output.file_prefix +
        PanddaDatasetFilenames.modelled_structure.format(dir_name))
    merging_params.output.pdb = os.path.join(
        exp_dir, params.output.file_prefix +
        PanddaDatasetFilenames.ensemble_structure.format(dir_name))
    merging_params.output.log = os.path.splitext(
        merging_params.output.pdb)[0] + '.log'
    merging_params.output.make_restraints = True
    # Apply settings
    merging_params.settings.overwrite = params.settings.overwrite
    merging_params.settings.verbose = params.settings.verbose
    # Change the restraints settings
    merging_params.restraints.output.phenix = os.path.splitext(
        os.path.basename(
            merging_params.output.pdb))[0] + '.restraints-phenix.params'
    merging_params.restraints.output.refmac = os.path.splitext(
        os.path.basename(
            merging_params.output.pdb))[0] + '.restraints-refmac.params'
    merging_params.restraints.output.log = os.path.splitext(
        os.path.basename(merging_params.output.pdb))[0] + '.restraints.log'
    # Check files exist
    if not os.path.exists(merging_params.input.minor):
        raise Exception('File does not exist: {}'.format(
            merging_params.input.minor))
    if not os.path.exists(merging_params.input.major):
        raise Exception('File does not exist: {}'.format(
            merging_params.input.major))
    # Print and run
    log.subheading('Merging event-map model with input model')
    merge_conformations.run(params=merging_params)
Exemplo n.º 22
0
def make_duplication_restraints(params, input_hierarchy, log=None):
    """Create coordinate and b-factor restraints for duplicated conformers"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating restraints for duplicated conformers')

    dup_groups = []

    for chn in input_hierarchy.hierarchy.chains():

        if (params.duplicates.make_for == 'protein') and not chn.is_protein():
            continue
        elif (params.duplicates.make_for == 'het') and chn.is_protein():
            continue

        for rg in chn.residue_groups():
            dup_groups += find_duplicated_conformers_and_generate_atom_pairs(
                residue_group=rg, rmsd_cutoff=params.duplicates.rmsd_cutoff)

    if not dup_groups:
        log('No duplicated conformers (no restraints created)')
        return

    # Concatenate atoms into one list
    atom_pairs = []
    [atom_pairs.extend(l) for l in dup_groups]

    log('Found {} duplicated conformers consisting of {} atoms'.format(
        len(dup_groups), len(atom_pairs)))
    log('')

    if params.output.refmac:
        restraint_list = [
            RefmacFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=0.0,
                sigma=params.duplicates.sigma_xyz) for a1, a2 in atom_pairs
        ]
        rest_block = RefmacFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac duplicate conformer restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = [
            PhenixFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=0.0,
                sigma=params.duplicates.sigma_xyz) for a1, a2 in atom_pairs
        ]
        rest_block = PhenixFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix duplicate conformer restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')
Exemplo n.º 23
0
def run(params):
    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + "*"))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, "") for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + "{:04}".format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(
        log_file=os.path.join(out_dir,
                              params.output.out_prefix + ".quick-refine.log"),
        verbose=params.settings.verbose,
    )

    # Report
    if current_dirs:
        log("Found existing refinement directories: \n\t{}".format(
            "\n\t".join(current_dirs)))
        log("")
    log("Creating new output directory: {}".format(out_dir))

    # Validate input parameters
    log.subheading("Validating input parameters")
    assert params.input.pdb is not None, "No PDB given for refinement"
    assert params.input.mtz is not None, "No MTZ given for refinement"

    if os.path.islink(params.input.mtz):
        log("Converting mtz path to real path:")
        log("{} -> {}".format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log("Copying/linking files to refinement folder")
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, "input.pdb")))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, "input.mtz")))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, "input.params")))

    # Create output prefixes

    output_prefix = out_dir

    log("Real output file path prefixes: {}".format(output_prefix))
    log("Link output file path prefixes: {}".format(params.output.link_prefix))

    # Create command objects
    log.subheading("Preparing command line input for refinement program")

    # PHENIX
    if params.options.program == "phenix":
        cm = CommandManager("phenix.refine")
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ["output.prefix={}".format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == "refmac":
        cm = CommandManager("refmac5")
        # Command line args
        cm.add_command_line_arguments(
            ["xyzin", params.input.pdb, "hklin", params.input.mtz])
        cm.add_command_line_arguments([
            "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz"
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(["libin", cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split("\n"))

        cm.add_standard_input(["END"])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log("running refinement... ({})".format(cm.program[0]))
    out = cm.run()

    log.subheading("Refinement output")
    if not log.verbose:
        log("output written to log file ({} lines)".format(
            cm.output.count("\n")))

    log("\n" + cm.output, show=False)

    if out != 0:
        log.subheading("Refinement Errors")
        log(cm.error)

    log.subheading("Post-processing output files")

    # Find output files
    try:
        real_pdb = os.path.join(output_prefix,
                                params.output.out_prefix + ".pdb")
        real_mtz = os.path.join(output_prefix,
                                params.output.out_prefix + ".mtz")

        print(real_pdb, "\n", real_mtz)

    except:
        log("Refinement has failed - output files do not exist")
        log("{}: {}".format(output_prefix + "*.pdb",
                            glob.glob(output_prefix + "*.pdb")))
        log("{}: {}".format(output_prefix + "*.mtz",
                            glob.glob(output_prefix + "*.mtz")))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [
        (real_pdb, params.output.link_prefix + ".pdb"),
        (real_mtz, params.output.link_prefix + ".mtz"),
    ]

    print(link_file_pairs)

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading("Splitting refined structure conformations")
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ""))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading("linking output files")
    for real_file, link_file in link_file_pairs:
        log("Linking {} -> {}".format(link_file, real_file))
        if not os.path.exists(real_file):
            log("file does not exist: {}".format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log("removing existing link: {}".format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading("finished - refinement")
Exemplo n.º 24
0
def merge_complementary_hierarchies(hierarchy_1,
                                    hierarchy_2,
                                    prune_duplicates_rmsd=0.1,
                                    in_place=False,
                                    verbose=False,
                                    log=None):
    """Merge hierarchies that are alternate models of the same crystal by expanding alternate model conformations, merging, and then trimming alternate conformations where possible"""

    if log is None: log = Log(verbose=True)

    # Alter the original files?
    if not in_place:
        # Copy the hierarchies
        hierarchy_1 = hierarchy_1.deep_copy()
        hierarchy_2 = hierarchy_2.deep_copy()

    # Sort the atoms
    hierarchy_1.sort_atoms_in_place()
    hierarchy_2.sort_atoms_in_place()

    log.heading('Preparing to merge structures')

    log.subheading(
        'Explicitly expanding models to all conformations of the crystal')
    log('Expanding alternate conformations in structure 1')
    expand_alternate_conformations(hierarchy=hierarchy_1,
                                   in_place=True,
                                   verbose=verbose)
    log('Expanding alternate conformations in structure 2')
    expand_alternate_conformations(hierarchy=hierarchy_2,
                                   in_place=True,
                                   verbose=verbose)
    log.subheading(
        'Applying conformer shift to the second structure before merging')
    log('Identifying the altloc shift required from the number of alternate conformers in structure 1'
        )
    conf_offset = find_next_conformer_idx(
        hierarchy=hierarchy_1, all_ids=iotbx.pdb.systematic_chain_ids())
    log('Incrementing all altlocs in structure 2 by {}'.format(conf_offset))
    increment_altlocs(hierarchy=hierarchy_2,
                      offset=conf_offset,
                      in_place=True,
                      verbose=verbose)
    log.subheading('Renaming residues that do not align between structures')
    resolve_residue_id_clashes(fixed_hierarchy=hierarchy_1,
                               moving_hierarchy=hierarchy_2,
                               in_place=True,
                               verbose=verbose)

    log.heading('Merging structures')

    log('Transferring residues from Structure 2 to Structure 1')
    transfer_residue_groups_from_other(acceptor_hierarchy=hierarchy_1,
                                       donor_hierarchy=hierarchy_2,
                                       in_place=True,
                                       verbose=verbose)

    log.heading('Post-processing structure')

    log('Pruning unneccessary multi-conformer residues in the merged structure'
        )
    prune_redundant_alternate_conformations(
        hierarchy=hierarchy_1,
        required_altlocs=hierarchy_1.altloc_indices(),
        rmsd_cutoff=prune_duplicates_rmsd,
        in_place=True,
        verbose=verbose)

    return hierarchy_1
Exemplo n.º 25
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    if (params.input.major or params.input.minor
        ) and not (params.input.pdb == [None] or params.input.pdb == []):
        raise Exception(
            'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.'
        )
    # Assign files to major and minor if necessary
    if not (params.input.major and params.input.minor):
        if len(params.input.pdb) != 2:
            raise Exception('Must provide zero or two pdb files to input.pdb')
        params.input.major = params.input.pdb[0]
        params.input.minor = params.input.pdb[1]
    # Check files exist
    if not os.path.exists(params.input.major):
        raise Exception('input.major does not exist: {}'.format(
            params.input.major))
    if not os.path.exists(params.input.minor):
        raise Exception('input.minor does not exist: {}'.format(
            params.input.minor))
    # Just check again...
    assert params.input.major
    assert params.input.minor
    assert params.output.pdb
    # Check existence of output pdb and delete as necessary
    if os.path.exists(params.output.pdb):
        if params.settings.overwrite:
            os.remove(params.output.pdb)
        else:
            raise Exception(
                'Output file already exists: {}. Run with overwrite=True to remove this file'
                .format(params.output.pdb))

    # Check that the input occupancies are valid
    if (params.options.minor_occupancy >
            1.0) or (params.options.major_occupancy > 1.0):
        raise Exception(
            'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})'
            .format(params.options.minor_occupancy,
                    params.options.major_occupancy))

    # Report validated parameters
    log.subheading('Processed merging parameters')
    for obj in master_phil.format(params).objects:
        if obj.name == 'restraints': continue
        log(obj.as_str().strip())

    # Read in the ligand file and set each residue to the requested conformer
    log.subheading('Reading input files')
    maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True)
    min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True)

    # Check that ... something
    try:
        maj_obj.hierarchy.only_model()
        min_obj.hierarchy.only_model()
    except:
        raise Sorry('Input structures may only have one model')

    # Multiply the input hierarchies by occupancy multipliers
    log.subheading('Updating input occupancies prior to merging')
    log('Multiplying occupancies of input.major by {}'.format(
        params.options.major_occupancy))
    maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() *
                                      params.options.major_occupancy)
    log('Multiplying occupancies of input.minor by {}'.format(
        params.options.minor_occupancy))
    min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() *
                                      params.options.minor_occupancy)

    # Merge the hierarchies
    final_struct = merge_complementary_hierarchies(
        hierarchy_1=maj_obj.hierarchy,
        hierarchy_2=min_obj.hierarchy,
        prune_duplicates_rmsd=params.options.prune_duplicates_rmsd,
        in_place=True,
        verbose=params.settings.verbose)

    # Set output occupancies
    log.subheading('Post-processing occupancies')
    # Set all main-conf occupancies to 1.0
    log('Setting all main-conf occupancies to 1.0')
    set_conformer_occupancy(hierarchy=final_struct,
                            altlocs=[''],
                            occupancy=1.0,
                            in_place=True,
                            verbose=params.settings.verbose)
    # Reset occupancies if required
    if params.options.reset_all_occupancies:
        # Calculate number of altlocs and associated occupancy
        altlocs = [a for a in final_struct.altloc_indices() if a]
        if altlocs:
            new_occ = 1.0 / len(altlocs)
            # Set the occupancies
            log('Setting all conformer ({}) occupancies to {}'.format(
                ','.join(altlocs), new_occ))
            set_conformer_occupancy(hierarchy=final_struct,
                                    altlocs=altlocs,
                                    occupancy=new_occ,
                                    in_place=True,
                                    verbose=params.settings.verbose)

    # Update the atoms numbering
    final_struct.sort_atoms_in_place()
    final_struct.atoms_reset_serial()
    # Write output file
    log('Writing output structure to {}'.format(params.output.pdb))
    final_struct.write_pdb_file(file_name=params.output.pdb,
                                crystal_symmetry=maj_obj.crystal_symmetry())

    # Run the restraint generation for the merged structure if requested
    if params.output.make_restraints:

        # Transfer the other phil objects from the master phil
        r_params = make_restraints.master_phil.extract()
        for name, obj in r_params.__dict__.items():
            if name.startswith('_'): continue
            if name not in params.restraints.__dict__:
                params.restraints.__inject__(name, obj)

        # Apply the output of merging to input of restraints
        params.restraints.input.pdb = params.output.pdb
        # Rename output files to be in same folder as output structure
        if params.restraints.output.phenix:
            params.restraints.output.phenix = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.phenix))
        if params.restraints.output.refmac:
            params.restraints.output.refmac = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.refmac))
        # Set log file name to this program if one given
        if params.output.log:
            params.restraints.output.log = params.output.log
        elif params.restraints.output.log:
            params.restraints.output.log = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.log))
        # Which alternate conformations to generate restraints for
        params.restraints.local_restraints.altlocs = ','.join(
            [a for a in min_obj.hierarchy.altloc_indices() if a])
        # Update settigns
        params.restraints.settings.verbose = params.settings.verbose
        params.restraints.settings.overwrite = params.settings.overwrite

        # Report
        log.heading('Parameters for generating restraints')
        log(master_phil.format(params).get('restraints').as_str().strip())
        log.heading('Generating restraints')
        # Run make_restraints
        make_restraints.run(params.restraints)

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return