def test_raise_exception_when_excluding_non_existing_dataset( experiments_024, reflections_024 ): with pytest.raises(ValueError): experiments, refl = select_datasets_on_ids( experiments_024, reflections_024, exclude_datasets=["1"] )
def remove_bad_data(self): """Remove any target model/mtz data and any datasets which were removed from the scaler during scaling.""" # first remove target refl/exps if (self.params.scaling_options.target_model or self.params.scaling_options.target_mtz or self.params.scaling_options.only_target): self.experiments = self.experiments[:-1] self.reflections = self.reflections[:-1] # remove any bad datasets: removed_ids = self.scaler.removed_datasets if removed_ids: logger.info("deleting removed datasets from memory: %s", removed_ids) expids = list(self.experiments.identifiers()) locs_in_list = [expids.index(expid) for expid in removed_ids] self.experiments, self.reflections = select_datasets_on_ids( self.experiments, self.reflections, exclude_datasets=locs_in_list) # also remove negative scales (or scales below 0.001) n = 0 for table in self.reflections: bad_sf = table["inverse_scale_factor"] < 0.001 n += bad_sf.count(True) table.set_flags(bad_sf, table.flags.excluded_for_scaling) if n > 0: logger.info("%s reflections excluded: scale factor < 0.001", n)
def test_exclude_specific_datasets_using_id(experiments_024, reflections_024): experiments, refl = select_datasets_on_ids( experiments_024, reflections_024, exclude_datasets=["0"] ) assert len(refl) == 2 assert list(experiments.identifiers()) == ["2", "4"] assert len(experiments) == 2
def test_raise_exception_when_not_all_identifiers_set(experiments, reflections_024): experiments[0].identifier = "0" experiments[1].identifier = "2" with pytest.raises(ValueError): exp, refl = select_datasets_on_ids( experiments, reflections_024, use_datasets=["2"] )
def test_select_specific_datasets_using_id(experiments_024, reflections_024): use_datasets = ["0", "2"] experiments, refl = select_datasets_on_ids( experiments_024, reflections_024, use_datasets=use_datasets ) assert len(experiments) == 2 assert len(refl) == 2 assert list(experiments.identifiers()) == ["0", "2"]
def test_raise_exception_when_selecting_and_excluding_datasets_at_same_time( experiments_024, reflections_024): with pytest.raises(ValueError): experiments, refl = select_datasets_on_ids( experiments_024, reflections_024, use_datasets=["2", "4"], exclude_datasets=["0"], )
def _filter_min_reflections(self, experiments, reflections): identifiers = [] for expt, refl in zip(experiments, reflections): if len(refl) >= self.params.min_reflections: identifiers.append(expt.identifier) return select_datasets_on_ids(experiments, reflections, use_datasets=identifiers)
def test_correct_handling_with_multi_dataset_table(experiments_024): reflections = flex.reflection_table() reflections["id"] = flex.int([0, 1, 2]) reflections.experiment_identifiers()[0] = "0" reflections.experiment_identifiers()[1] = "2" reflections.experiment_identifiers()[2] = "4" exp, refl = select_datasets_on_ids( experiments_024, [reflections], exclude_datasets=["2"] ) assert list(refl[0].experiment_identifiers().values()) == ["0", "4"] assert list(refl[0]["id"]) == [0, 2]
def __init__(self, experiments, reflections, params=None): super(cosym, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._experiments, self._reflections = self._filter_min_reflections( experiments, reflections) # map experiments and reflections to primitive setting self._experiments, self._reflections = self._map_to_primitive( self._experiments, self._reflections) if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s", len(identifiers), str(identifiers), ) self._experiments, self._reflections = select_datasets_on_ids( self._experiments, self._reflections, use_datasets=identifiers) self._experiments, self._reflections = self._map_to_minimum_cell( self._experiments, self._reflections) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) self.cosym_analysis = CosymAnalysis(datasets, self.params)
def _map_to_primitive(self, experiments, reflections): identifiers = [] for expt, refl in zip(experiments, reflections): cb_op_to_primitive = (expt.crystal.get_crystal_symmetry(). change_of_basis_op_to_primitive_setting()) sel = expt.crystal.get_space_group().is_sys_absent( refl["miller_index"]) if sel.count(True): logger.info( "Eliminating %i systematic absences for experiment %s", sel.count(True), expt.identifier, ) refl = refl.select(~sel) refl["miller_index"] = cb_op_to_primitive.apply( refl["miller_index"]) expt.crystal = expt.crystal.change_basis(cb_op_to_primitive) identifiers.append(expt.identifier) return select_datasets_on_ids(experiments, reflections, use_datasets=identifiers)
def remove_unwanted_datasets(self): """Remove any target model/mtz data and any datasets which were removed from the scaler during scaling.""" # first remove target refl/exps if (self.params.scaling_options.target_model or self.params.scaling_options.target_mtz or self.params.scaling_options.only_target): self.experiments = self.experiments[:-1] self.reflections = self.reflections[:-1] # remove any bad datasets: removed_ids = self.scaler.removed_datasets if removed_ids: logger.info("deleting removed datasets from memory: %s", removed_ids) expids = list(self.experiments.identifiers()) locs_in_list = [] for id_ in removed_ids: locs_in_list.append(expids.index(id_)) self.experiments, self.reflections = select_datasets_on_ids( self.experiments, self.reflections, exclude_datasets=removed_ids)
def remove_image_ranges_below_cutoff( experiments, reflections, ids_to_remove, image_group_to_expid_and_range, expid_to_image_groups, results_summary, ): """Remove image ranges from the datasets.""" n_valid_reflections = reflections.get_flags( reflections.flags.bad_for_scaling, all=False).count(False) experiments_to_delete = [] exclude_images = [] image_ranges_removed = [] # track for results summary n_removed_this_cycle = 1 while n_removed_this_cycle != 0: other_potential_ids_to_remove = [] n_removed_this_cycle = 0 for id_ in sorted(ids_to_remove): exp_id, image_range = image_group_to_expid_and_range[ id_] # numerical id identifier = reflections.experiment_identifiers()[exp_id] if expid_to_image_groups[exp_id][-1] == id_: # is last group image_ranges_removed.append([image_range, exp_id]) logger.info( "Removing image range %s from experiment %s", image_range, identifier, ) exclude_images.append([ identifier + ":" + str(image_range[0]) + ":" + str(image_range[1]) ]) del expid_to_image_groups[exp_id][-1] n_removed_this_cycle += 1 else: other_potential_ids_to_remove.append(id_) ids_to_remove = other_potential_ids_to_remove for id_ in other_potential_ids_to_remove: exp_id, image_range = image_group_to_expid_and_range[id_] identifier = reflections.experiment_identifiers()[exp_id] logger.info( """Image range %s from experiment %s is below the cutoff, but not at the end of a sweep.""", image_range, identifier, ) # Now remove individual batches if -1 in reflections["id"]: reflections = reflections.select(reflections["id"] != -1) reflection_list = reflections.split_by_experiment_id() reflection_list, experiments = exclude_image_ranges_for_scaling( reflection_list, experiments, exclude_images) # if a whole experiment has been excluded: need to remove it here for exp in experiments: if not exp.scan.get_valid_image_ranges( exp.identifier): # if all removed above experiments_to_delete.append(exp.identifier) if experiments_to_delete: experiments, reflection_list = select_datasets_on_ids( experiments, reflection_list, exclude_datasets=experiments_to_delete) assert len(reflection_list) == len(experiments) output_reflections = flex.reflection_table() for r in reflection_list: output_reflections.extend(r) n_valid_filtered_reflections = output_reflections.get_flags( output_reflections.flags.bad_for_scaling, all=False).count(False) results_summary["dataset_removal"].update({ "image_ranges_removed": image_ranges_removed, "experiments_fully_removed": experiments_to_delete, "n_reflections_removed": n_valid_reflections - n_valid_filtered_reflections, }) return output_reflections
def prepare_input(params, experiments, reflections): """Perform checks on the data and prepare the data for scaling. Raises: ValueError - a range of checks are made, a ValueError may be raised for a number of reasons. """ #### First exclude any datasets, before the dataset is split into #### individual reflection tables and expids set. if (params.dataset_selection.exclude_datasets or params.dataset_selection.use_datasets): experiments, reflections = select_datasets_on_ids( experiments, reflections, params.dataset_selection.exclude_datasets, params.dataset_selection.use_datasets, ) ids = flex.size_t() for r in reflections: ids.extend(r.experiment_identifiers().keys()) logger.info( "\nDataset ids for retained datasets are: %s \n", ",".join(str(i) for i in ids), ) #### Split the reflections tables into a list of reflection tables, #### with one table per experiment. logger.info("Checking for the existence of a reflection table \n" "containing multiple datasets \n") reflections = parse_multiple_datasets(reflections) logger.info( "Found %s reflection tables & %s experiments in total.", len(reflections), len(experiments), ) if len(experiments) != len(reflections): raise ValueError( "Mismatched number of experiments and reflection tables found.") #### Assign experiment identifiers. experiments, reflections = assign_unique_identifiers( experiments, reflections) ids = itertools.chain.from_iterable(r.experiment_identifiers().keys() for r in reflections) logger.info("\nDataset ids are: %s \n", ",".join(str(i) for i in ids)) for r in reflections: r.unset_flags(flex.bool(len(r), True), r.flags.bad_for_scaling) r.unset_flags(flex.bool(r.size(), True), r.flags.scaled) reflections, experiments = exclude_image_ranges_for_scaling( reflections, experiments, params.exclude_images) #### Allow checking of consistent indexing, useful for #### targeted / incremental scaling. if params.scaling_options.check_consistent_indexing: logger.info("Running dials.cosym to check consistent indexing:\n") cosym_params = cosym_phil_scope.extract() cosym_params.nproc = params.scaling_options.nproc cosym_instance = cosym(experiments, reflections, cosym_params) cosym_instance.run() experiments = cosym_instance.experiments reflections = cosym_instance.reflections logger.info("Finished running dials.cosym, continuing with scaling.\n") #### Make sure all experiments in same space group sgs = [ expt.crystal.get_space_group().type().number() for expt in experiments ] if len(set(sgs)) > 1: raise ValueError("""The experiments have different space groups: space group numbers found: %s Please reanalyse the data so that space groups are consistent, (consider using dials.reindex, dials.symmetry or dials.cosym) or remove incompatible experiments (using the option exclude_datasets=)""" % ", ".join(map(str, set(sgs)))) logger.info( "Space group being used during scaling is %s", experiments[0].crystal.get_space_group().info(), ) #### If doing targeted scaling, extract data and append an experiment #### and reflection table to the lists if params.scaling_options.target_model: logger.info("Extracting data from structural model.") exp, reflection_table = create_datastructures_for_structural_model( reflections, experiments, params.scaling_options.target_model) experiments.append(exp) reflections.append(reflection_table) elif params.scaling_options.target_mtz: logger.info("Extracting data from merged mtz.") exp, reflection_table = create_datastructures_for_target_mtz( experiments, params.scaling_options.target_mtz) experiments.append(exp) reflections.append(reflection_table) #### Perform any non-batch cutting of the datasets, including the target dataset best_unit_cell = params.reflection_selection.best_unit_cell if best_unit_cell is None: best_unit_cell = determine_best_unit_cell(experiments) for reflection in reflections: if params.cut_data.d_min or params.cut_data.d_max: d = best_unit_cell.d(reflection["miller_index"]) if params.cut_data.d_min: sel = d < params.cut_data.d_min reflection.set_flags(sel, reflection.flags.user_excluded_in_scaling) if params.cut_data.d_max: sel = d > params.cut_data.d_max reflection.set_flags(sel, reflection.flags.user_excluded_in_scaling) if params.cut_data.partiality_cutoff and "partiality" in reflection: reflection.set_flags( reflection["partiality"] < params.cut_data.partiality_cutoff, reflection.flags.user_excluded_in_scaling, ) return params, experiments, reflections
def test_selecting_everything_is_identity_function(experiments_024, reflections_024): exp, refl = select_datasets_on_ids(experiments_024, reflections_024) assert exp is experiments_024 assert refl is reflections_024