示例#1
0
def test_exclude_image_ranges_for_scaling():
    """Test for namesake function."""
    refl1 = flex.reflection_table()
    refl1["xyzobs.px.value"] = flex.vec3_double([(0, 0, 0.5), (0, 0, 1.5),
                                                 (0, 0, 5.5), (0, 0, 9.5),
                                                 (0, 0, 10.5)])
    refl1.set_flags(flex.bool(5, False), refl1.flags.user_excluded_in_scaling)
    refl2 = copy.deepcopy(refl1)
    refl1.experiment_identifiers()[0] = "0"
    refl2.experiment_identifiers()[1] = "1"
    explist = ExperimentList([
        make_scan_experiment(image_range=(2, 20), expid="0"),
        make_scan_experiment(image_range=(2, 20), expid="1"),
    ])
    refls, explist = exclude_image_ranges_for_scaling([refl1, refl2], explist,
                                                      [["1:11:20"]])
    assert list(explist[0].scan.get_valid_image_ranges("0")) == [(2, 20)]
    assert list(explist[1].scan.get_valid_image_ranges("1")) == [(2, 10)]
    assert list(refls[0].get_flags(
        refls[0].flags.user_excluded_in_scaling)) == [
            True,
            False,
            False,
            False,
            False,
        ]
    assert list(refls[1].get_flags(
        refls[0].flags.user_excluded_in_scaling)) == [
            True,
            False,
            False,
            False,
            True,
        ]
示例#2
0
    def remove_image_ranges_below_cutoff(
        experiments,
        reflections,
        ids_to_remove,
        image_group_to_expid_and_range,
        expid_to_image_groups,
        results_summary,
    ):
        """Remove image ranges from the datasets."""
        n_valid_reflections = reflections.get_flags(
            reflections.flags.scaled).count(True)
        expid_to_tableid = {
            v: k
            for k, v in zip(
                reflections.experiment_identifiers().keys(),
                reflections.experiment_identifiers().values(),
            )
        }

        experiments_to_delete = []
        exclude_images = []
        image_ranges_removed = []  # track for results summary
        n_removed_this_cycle = 1
        while n_removed_this_cycle != 0:
            other_potential_ids_to_remove = []
            n_removed_this_cycle = 0
            for id_ in sorted(ids_to_remove):
                exp_id, image_range = image_group_to_expid_and_range[
                    id_]  # identifier
                if (expid_to_image_groups[exp_id][-1] == id_
                        or expid_to_image_groups[exp_id][0]
                        == id_):  # is at edge of scan.
                    # loc = list(experiments.identifiers()).index(exp_id)
                    table_id = expid_to_tableid[exp_id]
                    image_ranges_removed.append([image_range, table_id])
                    logger.info(
                        "Removing image range %s from experiment %s",
                        image_range,
                        table_id,
                    )
                    exclude_images.append(
                        [f"{table_id}:{image_range[0]}:{image_range[1]}"])
                    if expid_to_image_groups[exp_id][-1] == id_:
                        del expid_to_image_groups[exp_id][-1]
                    else:
                        del expid_to_image_groups[exp_id][0]
                    n_removed_this_cycle += 1
                else:
                    other_potential_ids_to_remove.append(id_)
            ids_to_remove = other_potential_ids_to_remove
        for id_ in other_potential_ids_to_remove:
            exp_id, image_range = image_group_to_expid_and_range[id_]
            table_id = expid_to_tableid[exp_id]
            logger.info(
                """Image range %s from experiment %s is below the cutoff, but not at the edge of a sweep.""",
                image_range,
                table_id,
            )

        # Now remove individual batches
        if -1 in reflections["id"]:
            reflections = reflections.select(reflections["id"] != -1)
        reflection_list = reflections.split_by_experiment_id()
        reflection_list, experiments = exclude_image_ranges_for_scaling(
            reflection_list, experiments, exclude_images)

        # check if any image groups were all outliers and missed by the analysis
        # This catches an edge case where there is an image group full of
        # outliers, which gets filtered out before the analysis but should
        # be set as not a valid image range.
        exclude_images = []
        for exp in experiments:
            # if any of the image ranges are not in the sets tested, exclude them
            tested = []
            for exp_id, imgrange in image_group_to_expid_and_range.values():
                if exp_id == exp.identifier:
                    tested.extend(list(range(imgrange[0], imgrange[1] + 1)))
            for imgrange in exp.scan.get_valid_image_ranges(exp.identifier):
                if all([
                        j not in tested
                        for j in range(imgrange[0], imgrange[1] + 1)
                ]):
                    table_id = expid_to_tableid[exp.identifier]
                    exclude_images.append(
                        [f"{table_id}:{imgrange[0]}:{imgrange[1]}"])
                    logger.info("Removing %s due to scaling outlier group.",
                                exclude_images[-1])
        if exclude_images:
            reflection_list, experiments = exclude_image_ranges_for_scaling(
                reflection_list, experiments, exclude_images)

        # if a whole experiment has been excluded: need to remove it here
        ids_removed = []
        for exp, refl in zip(experiments, reflection_list):
            if not exp.scan.get_valid_image_ranges(
                    exp.identifier):  # if all removed above
                experiments_to_delete.append(exp.identifier)
                ids_removed.append(refl.experiment_identifiers().keys()[0])
        if experiments_to_delete:
            experiments, reflection_list = select_datasets_on_identifiers(
                experiments,
                reflection_list,
                exclude_datasets=experiments_to_delete)
        assert len(reflection_list) == len(experiments)

        output_reflections = flex.reflection_table()
        for r in reflection_list:
            output_reflections.extend(r)

        n_valid_filtered_reflections = output_reflections.get_flags(
            output_reflections.flags.scaled).count(True)
        results_summary["dataset_removal"].update({
            "image_ranges_removed":
            image_ranges_removed,
            "experiments_fully_removed":
            experiments_to_delete,
            "experiment_ids_fully_removed":
            ids_removed,
            "n_reflections_removed":
            n_valid_reflections - n_valid_filtered_reflections,
        })
        return output_reflections
示例#3
0
    def remove_image_ranges_below_cutoff(
        experiments,
        reflections,
        ids_to_remove,
        image_group_to_expid_and_range,
        expid_to_image_groups,
        results_summary,
    ):
        """Remove image ranges from the datasets."""
        n_valid_reflections = reflections.get_flags(
            reflections.flags.bad_for_scaling, all=False).count(False)

        experiments_to_delete = []
        exclude_images = []
        image_ranges_removed = []  # track for results summary
        n_removed_this_cycle = 1
        while n_removed_this_cycle != 0:
            other_potential_ids_to_remove = []
            n_removed_this_cycle = 0
            for id_ in sorted(ids_to_remove):
                exp_id, image_range = image_group_to_expid_and_range[
                    id_]  # numerical id
                identifier = reflections.experiment_identifiers()[exp_id]
                if expid_to_image_groups[exp_id][-1] == id_:  # is last group
                    image_ranges_removed.append([image_range, exp_id])
                    logger.info(
                        "Removing image range %s from experiment %s",
                        image_range,
                        identifier,
                    )
                    exclude_images.append([
                        identifier + ":" + str(image_range[0]) + ":" +
                        str(image_range[1])
                    ])
                    del expid_to_image_groups[exp_id][-1]
                    n_removed_this_cycle += 1
                else:
                    other_potential_ids_to_remove.append(id_)
            ids_to_remove = other_potential_ids_to_remove
        for id_ in other_potential_ids_to_remove:
            exp_id, image_range = image_group_to_expid_and_range[id_]
            identifier = reflections.experiment_identifiers()[exp_id]
            logger.info(
                """Image range %s from experiment %s is below the cutoff, but not at the end of a sweep.""",
                image_range,
                identifier,
            )

        # Now remove individual batches
        if -1 in reflections["id"]:
            reflections = reflections.select(reflections["id"] != -1)
        reflection_list = reflections.split_by_experiment_id()
        reflection_list, experiments = exclude_image_ranges_for_scaling(
            reflection_list, experiments, exclude_images)
        # if a whole experiment has been excluded: need to remove it here

        for exp in experiments:
            if not exp.scan.get_valid_image_ranges(
                    exp.identifier):  # if all removed above
                experiments_to_delete.append(exp.identifier)
        if experiments_to_delete:
            experiments, reflection_list = select_datasets_on_ids(
                experiments,
                reflection_list,
                exclude_datasets=experiments_to_delete)
        assert len(reflection_list) == len(experiments)

        output_reflections = flex.reflection_table()
        for r in reflection_list:
            output_reflections.extend(r)

        n_valid_filtered_reflections = output_reflections.get_flags(
            output_reflections.flags.bad_for_scaling, all=False).count(False)
        results_summary["dataset_removal"].update({
            "image_ranges_removed":
            image_ranges_removed,
            "experiments_fully_removed":
            experiments_to_delete,
            "n_reflections_removed":
            n_valid_reflections - n_valid_filtered_reflections,
        })
        return output_reflections
示例#4
0
def prepare_input(params, experiments, reflections):
    """Perform checks on the data and prepare the data for scaling.

    Raises:
        ValueError - a range of checks are made, a ValueError may be raised
            for a number of reasons.

    """

    #### First exclude any datasets, before the dataset is split into
    #### individual reflection tables and expids set.
    if (params.dataset_selection.exclude_datasets
            or params.dataset_selection.use_datasets):
        experiments, reflections = select_datasets_on_ids(
            experiments,
            reflections,
            params.dataset_selection.exclude_datasets,
            params.dataset_selection.use_datasets,
        )
        ids = flex.size_t()
        for r in reflections:
            ids.extend(r.experiment_identifiers().keys())
        logger.info(
            "\nDataset ids for retained datasets are: %s \n",
            ",".join(str(i) for i in ids),
        )

    #### Split the reflections tables into a list of reflection tables,
    #### with one table per experiment.
    logger.info("Checking for the existence of a reflection table \n"
                "containing multiple datasets \n")
    reflections = parse_multiple_datasets(reflections)
    logger.info(
        "Found %s reflection tables & %s experiments in total.",
        len(reflections),
        len(experiments),
    )

    if len(experiments) != len(reflections):
        raise ValueError(
            "Mismatched number of experiments and reflection tables found.")

    #### Assign experiment identifiers.
    experiments, reflections = assign_unique_identifiers(
        experiments, reflections)
    ids = itertools.chain.from_iterable(r.experiment_identifiers().keys()
                                        for r in reflections)
    logger.info("\nDataset ids are: %s \n", ",".join(str(i) for i in ids))

    for r in reflections:
        r.unset_flags(flex.bool(len(r), True), r.flags.bad_for_scaling)
        r.unset_flags(flex.bool(r.size(), True), r.flags.scaled)

    reflections, experiments = exclude_image_ranges_for_scaling(
        reflections, experiments, params.exclude_images)

    #### Allow checking of consistent indexing, useful for
    #### targeted / incremental scaling.
    if params.scaling_options.check_consistent_indexing:
        logger.info("Running dials.cosym to check consistent indexing:\n")
        cosym_params = cosym_phil_scope.extract()
        cosym_params.nproc = params.scaling_options.nproc
        cosym_instance = cosym(experiments, reflections, cosym_params)
        cosym_instance.run()
        experiments = cosym_instance.experiments
        reflections = cosym_instance.reflections
        logger.info("Finished running dials.cosym, continuing with scaling.\n")

    #### Make sure all experiments in same space group
    sgs = [
        expt.crystal.get_space_group().type().number() for expt in experiments
    ]
    if len(set(sgs)) > 1:
        raise ValueError("""The experiments have different space groups:
            space group numbers found: %s
            Please reanalyse the data so that space groups are consistent,
            (consider using dials.reindex, dials.symmetry or dials.cosym) or
            remove incompatible experiments (using the option exclude_datasets=)"""
                         % ", ".join(map(str, set(sgs))))
    logger.info(
        "Space group being used during scaling is %s",
        experiments[0].crystal.get_space_group().info(),
    )

    #### If doing targeted scaling, extract data and append an experiment
    #### and reflection table to the lists
    if params.scaling_options.target_model:
        logger.info("Extracting data from structural model.")
        exp, reflection_table = create_datastructures_for_structural_model(
            reflections, experiments, params.scaling_options.target_model)
        experiments.append(exp)
        reflections.append(reflection_table)

    elif params.scaling_options.target_mtz:
        logger.info("Extracting data from merged mtz.")
        exp, reflection_table = create_datastructures_for_target_mtz(
            experiments, params.scaling_options.target_mtz)
        experiments.append(exp)
        reflections.append(reflection_table)

    #### Perform any non-batch cutting of the datasets, including the target dataset
    best_unit_cell = params.reflection_selection.best_unit_cell
    if best_unit_cell is None:
        best_unit_cell = determine_best_unit_cell(experiments)
    for reflection in reflections:
        if params.cut_data.d_min or params.cut_data.d_max:
            d = best_unit_cell.d(reflection["miller_index"])
            if params.cut_data.d_min:
                sel = d < params.cut_data.d_min
                reflection.set_flags(sel,
                                     reflection.flags.user_excluded_in_scaling)
            if params.cut_data.d_max:
                sel = d > params.cut_data.d_max
                reflection.set_flags(sel,
                                     reflection.flags.user_excluded_in_scaling)
        if params.cut_data.partiality_cutoff and "partiality" in reflection:
            reflection.set_flags(
                reflection["partiality"] < params.cut_data.partiality_cutoff,
                reflection.flags.user_excluded_in_scaling,
            )
    return params, experiments, reflections
示例#5
0
def run(args):
    usage = ("xia2.multiplex [options] [param.phil] "
             "models1.expt models2.expt observations1.refl "
             "observations2.refl...")

    # Create the parser
    parser = OptionParser(
        usage=usage,
        phil=phil_scope,
        read_reflections=True,
        read_experiments=True,
        check_format=False,
        epilog=help_message,
    )

    # Parse the command line
    params, options = parser.parse_args(args=args, show_diff_phil=False)

    # Configure the logging
    xia2.Handlers.Streams.setup_logging(logfile=params.output.log,
                                        verbose=options.verbose)

    logger.info(dials_version())

    # Log the diff phil
    diff_phil = parser.diff_phil.as_str()
    if diff_phil != "":
        logger.info("The following parameters have been modified:\n")
        logger.info(diff_phil)

    # Try to load the models and data
    if len(params.input.experiments) == 0:
        logger.info("No Experiments found in the input")
        parser.print_help()
        return
    if len(params.input.reflections) == 0:
        logger.info("No reflection data found in the input")
        parser.print_help()
        return
    try:
        assert len(params.input.reflections) == len(params.input.experiments)
    except AssertionError:
        raise sys.exit(
            "The number of input reflections files does not match the "
            "number of input experiments")

    if params.seed is not None:
        flex.set_random_seed(params.seed)
        random.seed(params.seed)

    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)
    if len(experiments) < 2:
        sys.exit("xia2.multiplex requires a minimum of two experiments")
    reflections = parse_multiple_datasets(reflections)
    experiments, reflections = assign_unique_identifiers(
        experiments, reflections)

    reflections, experiments = exclude_image_ranges_for_scaling(
        reflections, experiments, params.exclude_images)

    reflections_all = flex.reflection_table()
    assert len(reflections) == 1 or len(reflections) == len(experiments)
    for i, (expt, refl) in enumerate(zip(experiments, reflections)):
        reflections_all.extend(refl)
    reflections_all.assert_experiment_identifiers_are_consistent(experiments)

    if params.identifiers is not None:
        identifiers = []
        for identifier in params.identifiers:
            identifiers.extend(identifier.split(","))
        params.identifiers = identifiers

    try:
        ScaleAndMerge.MultiCrystalScale(experiments, reflections_all, params)
    except ValueError as e:
        sys.exit(str(e))