Пример #1
0
def reflection_tables_to_batch_dependent_properties(reflection_tables,
                                                    experiments,
                                                    scaled_array=None):
    """Extract batch dependent properties from a reflection table list."""
    offsets = calculate_batch_offsets(experiments)
    reflection_tables = assign_batches_to_reflections(reflection_tables,
                                                      offsets)
    # filter bad refls and negative scales
    batches = flex.int()
    scales = flex.double()
    for r in reflection_tables:
        sel = ~r.get_flags(r.flags.bad_for_scaling, all=False)
        sel &= r["inverse_scale_factor"] > 0
        batches.extend(r["batch"].select(sel))
        scales.extend(r["inverse_scale_factor"].select(sel))
    if not scaled_array:
        scaled_array = scaled_data_as_miller_array(reflection_tables,
                                                   experiments)
    ms = scaled_array.customized_copy()
    batch_array = miller.array(ms, data=batches)

    batch_ranges = get_batch_ranges(experiments, offsets)
    batch_data = [{"id": i, "range": r} for i, r in enumerate(batch_ranges)]

    properties = batch_dependent_properties(batch_array, scaled_array,
                                            miller.array(ms, data=scales))

    return properties + (batch_data, )
Пример #2
0
def test_calculate_batch_offsets():
    """Test offset calculation. Offset is next number ending in 01 bigger than
    previous batch numbers which is not consecutive"""
    scan = Scan(image_range=[1, 200], oscillation=[0.0, 1.0])
    exp1 = Experiment(scan=scan)
    exp2 = Experiment()
    offsets = calculate_batch_offsets([exp1, exp2])
    assert offsets == [0, 301]
Пример #3
0
    def from_reflections_and_experiments(cls, reflection_tables, experiments,
                                         params):
        """Construct the resolutionizer from native dials datatypes."""
        # add some assertions about data

        # do batch assignment (same functions as in dials.export)
        offsets = calculate_batch_offsets(experiments)
        reflection_tables = assign_batches_to_reflections(
            reflection_tables, offsets)
        batches = flex.int()
        intensities = flex.double()
        indices = flex.miller_index()
        variances = flex.double()
        for table in reflection_tables:
            if "intensity.scale.value" in table:
                table = filter_reflection_table(table, ["scale"],
                                                partiality_threshold=0.4)
                intensities.extend(table["intensity.scale.value"])
                variances.extend(table["intensity.scale.variance"])
            else:
                table = filter_reflection_table(table, ["profile"],
                                                partiality_threshold=0.4)
                intensities.extend(table["intensity.prf.value"])
                variances.extend(table["intensity.prf.variance"])
            indices.extend(table["miller_index"])
            batches.extend(table["batch"])

        crystal_symmetry = miller.crystal.symmetry(
            unit_cell=determine_best_unit_cell(experiments),
            space_group=experiments[0].crystal.get_space_group(),
            assert_is_compatible_unit_cell=False,
        )
        miller_set = miller.set(crystal_symmetry,
                                indices,
                                anomalous_flag=False)
        i_obs = miller.array(miller_set,
                             data=intensities,
                             sigmas=flex.sqrt(variances))
        i_obs.set_observation_type_xray_intensity()
        i_obs.set_info(miller.array_info(source="DIALS", source_type="refl"))

        ms = i_obs.customized_copy()
        batch_array = miller.array(ms, data=batches)

        if params.reference is not None:
            reference, _ = miller_array_from_mtz(params.reference,
                                                 anomalous=params.anomalous,
                                                 labels=params.labels)
        else:
            reference = None

        return cls(i_obs, params, batches=batch_array, reference=reference)
Пример #4
0
    def test_calculate_batch_ranges(self):
        assert self._run_ranges([(1, 1)]) == [(1, 1)]

        # Zero is shifted
        assert all(
            [x > 0 for x in self._run_ranges_to_set([(0, 0)])]
        ), "Should be no zeroth/negative batch"

        assert not set(self._run_ranges([(1, 1), (1, 1)])) == {
            (1, 1)
        }, "Overlapping simple ranges"

        data_tests = [
            [(1, 1), (1, 1)],
            # while we decide behaviour, remove input
            #            [(1, 1), (8, 8), (9, 9)],
            [(23, 24), (70, 100), (1, 1), (1, 4), (1, 1)],
            [(0, 98)],
        ]
        for data in data_tests:
            print("Running ", data)
            print("  ", self._run_ranges(data))
            assert all(
                [
                    float(x).is_integer()
                    for x in itertools.chain(*self._run_ranges(data))
                ]
            ), "Fractional epochs"
            assert all(
                isinstance(x, int) for x in itertools.chain(*self._run_ranges(data))
            ), "Not all true integers"
            assert all(
                [x > 0 for x in self._run_ranges_to_set([(0, 0)])]
            ), "Should be no zeroth/negative batch"
            assert not has_consecutive_ranges(self._run_ranges(data))

        exp1 = TestBatchRangeCalculations.MockExperiment((1, 1), scan=False)
        exp2 = TestBatchRangeCalculations.MockExperiment((1, 1), scan=False)
        offsets = calculate_batch_offsets([exp1, exp2])
        assert all(float(x).is_integer() for x in offsets)
        assert all(isinstance(x, int) for x in offsets)
        assert all(x > 0 for x in offsets)
Пример #5
0
def export_mtz(integrated_data, experiment_list, params):
    """Export data from integrated_data corresponding to experiment_list to an
    MTZ file hklout."""

    # if mtz filename is auto, then choose scaled.mtz or integrated.mtz
    if params.mtz.hklout in (None, Auto, "auto"):
        if ("intensity.scale.value"
                in integrated_data) and ("intensity.scale.variance"
                                         in integrated_data):
            params.mtz.hklout = "scaled.mtz"
            logger.info(
                "Data appears to be scaled, setting mtz.hklout = 'scaled.mtz'")
        else:
            params.mtz.hklout = "integrated.mtz"
            logger.info(
                "Data appears to be unscaled, setting mtz.hklout = 'integrated.mtz'"
            )

    # First get the experiment identifier information out of the data
    expids_in_table = integrated_data.experiment_identifiers()
    if not list(expids_in_table.keys()):
        reflection_tables = parse_multiple_datasets([integrated_data])
        experiment_list, refl_list = assign_unique_identifiers(
            experiment_list, reflection_tables)
        integrated_data = flex.reflection_table()
        for reflections in refl_list:
            integrated_data.extend(reflections)
        expids_in_table = integrated_data.experiment_identifiers()
    integrated_data.assert_experiment_identifiers_are_consistent(
        experiment_list)
    expids_in_list = list(experiment_list.identifiers())

    # Convert experiment_list to a real python list or else identity assumptions
    # fail like:
    #   assert experiment_list[0] is experiment_list[0]
    # And assumptions about added attributes break
    experiment_list = list(experiment_list)

    # Validate multi-experiment assumptions
    if len(experiment_list) > 1:
        # All experiments should match crystals, or else we need multiple crystals/datasets
        if not all(x.crystal == experiment_list[0].crystal
                   for x in experiment_list[1:]):
            logger.warning(
                "Experiment crystals differ. Using first experiment crystal for file-level data."
            )

        wavelengths = match_wavelengths(experiment_list)
        if len(wavelengths.keys()) > 1:
            logger.info(
                "Multiple wavelengths found: \n%s",
                "\n".join("  Wavlength: %.5f, experiment numbers: %s " %
                          (k, ",".join(map(str, v)))
                          for k, v in wavelengths.items()),
            )
    else:
        wavelengths = OrderedDict(
            {experiment_list[0].beam.get_wavelength(): [0]})

    # also only work correctly with one panel (for the moment)
    if any(len(experiment.detector) != 1 for experiment in experiment_list):
        logger.warning("Ignoring multiple panels in output MTZ")

    best_unit_cell = params.mtz.best_unit_cell
    if best_unit_cell is None:
        best_unit_cell = determine_best_unit_cell(experiment_list)
    integrated_data["d"] = best_unit_cell.d(integrated_data["miller_index"])

    # Clean up the data with the passed in options
    integrated_data = filter_reflection_table(
        integrated_data,
        intensity_choice=params.intensity,
        partiality_threshold=params.mtz.partiality_threshold,
        combine_partials=params.mtz.combine_partials,
        min_isigi=params.mtz.min_isigi,
        filter_ice_rings=params.mtz.filter_ice_rings,
        d_min=params.mtz.d_min,
    )

    # get batch offsets and image ranges - even for scanless experiments
    batch_offsets = [
        expt.scan.get_batch_offset() for expt in experiment_list
        if expt.scan is not None
    ]
    unique_offsets = set(batch_offsets)
    if len(set(unique_offsets)) <= 1:
        logger.debug("Calculating new batches")
        batch_offsets = calculate_batch_offsets(experiment_list)
        batch_starts = [
            e.scan.get_image_range()[0] if e.scan else 0
            for e in experiment_list
        ]
        effective_offsets = [
            o + s for o, s in zip(batch_offsets, batch_starts)
        ]
        unique_offsets = set(effective_offsets)
    else:
        logger.debug("Keeping existing batches")
    image_ranges = get_image_ranges(experiment_list)
    if len(unique_offsets) != len(batch_offsets):

        raise ValueError("Duplicate batch offsets detected: %s" % ", ".join(
            str(item)
            for item, count in Counter(batch_offsets).items() if count > 1))

    # Create the mtz file
    mtz_writer = UnmergedMTZWriter(
        experiment_list[0].crystal.get_space_group())

    # FIXME TODO for more than one experiment into an MTZ file:
    #
    # - add an epoch (or recover an epoch) from the scan and add this as an extra
    #   column to the MTZ file for scaling, so we know that the two lattices were
    #   integrated at the same time
    # ✓ decide a sensible BATCH increment to apply to the BATCH value between
    #   experiments and add this

    for id_ in expids_in_table.keys():
        # Grab our subset of the data
        loc = expids_in_list.index(
            expids_in_table[id_])  # get strid and use to find loc in list
        experiment = experiment_list[loc]
        if len(list(wavelengths.keys())) > 1:
            for i, (wl, exps) in enumerate(wavelengths.items()):
                if loc in exps:
                    wavelength = wl
                    dataset_id = i + 1
                    break
        else:
            wavelength = list(wavelengths.keys())[0]
            dataset_id = 1
        reflections = integrated_data.select(integrated_data["id"] == id_)
        batch_offset = batch_offsets[loc]
        image_range = image_ranges[loc]
        reflections = assign_batches_to_reflections([reflections],
                                                    [batch_offset])[0]
        experiment.data = dict(reflections)

        s0n = matrix.col(experiment.beam.get_s0()).normalize().elems
        logger.debug("Beam vector: %.4f %.4f %.4f" % s0n)

        mtz_writer.add_batch_list(
            image_range,
            experiment,
            wavelength,
            dataset_id,
            batch_offset=batch_offset,
            force_static_model=params.mtz.force_static_model,
        )

        # Create the batch offset array. This gives us an experiment (id)-dependent
        # batch offset to calculate the correct batch from image number.
        experiment.data["batch_offset"] = flex.int(len(experiment.data["id"]),
                                                   batch_offset)

        # Calculate whether we have a ROT value for this experiment, and set the column
        _, _, z = experiment.data["xyzcal.px"].parts()
        if experiment.scan:
            experiment.data[
                "ROT"] = experiment.scan.get_angle_from_array_index(z)
        else:
            experiment.data["ROT"] = z

    mtz_writer.add_crystal(
        crystal_name=params.mtz.crystal_name,
        project_name=params.mtz.project_name,
        unit_cell=best_unit_cell,
    )
    # Note: add unit cell here as may have changed basis since creating mtz.
    # For multi-wave unmerged mtz, we add an empty dataset for each wavelength,
    # but only write the data into the final dataset (for unmerged the batches
    # link the unmerged data to the individual wavelengths).
    for wavelength in wavelengths:
        mtz_writer.add_empty_dataset(wavelength)

    # Combine all of the experiment data columns before writing
    combined_data = {
        k: v.deep_copy()
        for k, v in experiment_list[0].data.items()
    }
    for experiment in experiment_list[1:]:
        for k, v in experiment.data.items():
            combined_data[k].extend(v)
    # ALL columns must be the same length
    assert len({len(v)
                for v in combined_data.values()
                }) == 1, "Column length mismatch"
    assert len(combined_data["id"]) == len(
        integrated_data["id"]), "Lost rows in split/combine"

    # Write all the data and columns to the mtz file
    mtz_writer.write_columns(combined_data)

    logger.info("Saving {} integrated reflections to {}".format(
        len(combined_data["id"]), params.mtz.hklout))
    mtz_file = mtz_writer.mtz_file
    mtz_file.write(params.mtz.hklout)

    return mtz_file
Пример #6
0
 def _run_ranges(self, ranges):
     """Convenience method to run the routine with a minimal experiment, and return the result as ranges of batch number"""
     input_data = [self.MockExperiment(x) for x in ranges]
     return offset_ranges(calculate_batch_offsets(input_data), ranges)
Пример #7
0
    def _scale_prepare(self):
        """Perform all of the preparation required to deliver the scaled
        data. This should sort together the reflection files, ensure that
        they are correctly indexed (via dials.symmetry) and generally tidy
        things up."""

        # AIM discover symmetry and reindex with dials.symmetry, and set the correct
        # reflections in si.reflections, si.experiments

        self._helper.set_working_directory(self.get_working_directory())
        self._factory.set_working_directory(self.get_working_directory())

        need_to_return = False

        self._sweep_handler = SweepInformationHandler(self._scalr_integraters)

        p, x = self._sweep_handler.get_project_info()
        self._scalr_pname = p
        self._scalr_xname = x

        self._helper.set_pname_xname(p, x)

        Journal.block(
            "gathering",
            self.get_scaler_xcrystal().get_name(),
            "Dials",
            {"working directory": self.get_working_directory()},
        )

        # First do stuff to work out if excluding any data
        # Note - does this actually work? I couldn't seem to get it to work
        # in either this pipeline or the standard dials pipeline
        for epoch in self._sweep_handler.get_epochs():
            si = self._sweep_handler.get_sweep_information(epoch)
            intgr = si.get_integrater()
            _, xname, dname = si.get_project_info()
            sname = si.get_sweep_name()

            exclude_sweep = False

            for sweep in PhilIndex.params.xia2.settings.sweep:
                if sweep.id == sname and sweep.exclude:
                    exclude_sweep = True
                    break

            if exclude_sweep:
                self._sweep_handler.remove_epoch(epoch)
                Debug.write("Excluding sweep %s" % sname)
            else:
                Journal.entry({"adding data from": "%s/%s/%s" % (xname, dname, sname)})

        # If multiple files, want to run symmetry to check for consistent indexing
        # also

        # try to reproduce what CCP4ScalerA is doing

        # first assign identifiers to avoid dataset-id collisions
        # Idea is that this should be called anytime you get data anew from the
        # integrater, to intercept and assign unique ids, then set in the
        # sweep_information (si) and always use si.set_reflections/
        # si.get_reflections as we process.

        # self._sweep_handler = self._helper.assign_and_return_datasets(
        #    self._sweep_handler
        # ) symmetry now sorts out identifiers.

        need_to_return = False

        if self._scalr_input_pointgroup:
            self._input_pointgroup_scale_prepare()
        elif (
            len(self._sweep_handler.get_epochs()) > 1
            and PhilIndex.params.xia2.settings.multi_sweep_indexing
        ):
            need_to_return = self._multi_sweep_scale_prepare()
        else:
            need_to_return = self._standard_scale_prepare()

        if need_to_return:
            self.set_scaler_done(False)
            self.set_scaler_prepare_done(False)
            return

        ### After this point, point group is good and only need to
        ### reindex to consistent setting. Don't need to call back to the
        ### integator, just use the data in the sweep info.

        # First work out if we're going to reindex against external reference
        param = PhilIndex.params.xia2.settings.scale
        using_external_references = False
        reference_refl = None
        reference_expt = None
        if param.reference_reflection_file:
            if not param.reference_experiment_file:
                Chatter.write(
                    """
No DIALS reference experiments file provided, reference reflection file will
not be used. Reference mtz files for reindexing not currently supported for
pipeline=dials (supported for pipeline=dials-aimless).
"""
                )
            else:
                reference_refl = param.reference_reflection_file
                reference_expt = param.reference_experiment_file
                using_external_references = True
                Debug.write("Using reference reflections %s" % reference_refl)
                Debug.write("Using reference experiments %s" % reference_expt)

        if len(self._sweep_handler.get_epochs()) > 1:
            if PhilIndex.params.xia2.settings.unify_setting:
                self.unify_setting()

            if PhilIndex.params.xia2.settings.use_brehm_diederichs:
                self.brehm_diederichs_reindexing()
            # If not using Brehm-deidrichs reindexing, set reference as first
            # sweep, unless using external reference.
            elif not using_external_references:
                Debug.write("First sweep will be used as reference for reindexing")
                first = self._sweep_handler.get_epochs()[0]
                si = self._sweep_handler.get_sweep_information(first)
                reference_expt = si.get_experiments()
                reference_refl = si.get_reflections()

        # Now reindex to be consistent with first dataset - run reindex on each
        # dataset with reference (unless did brehm diederichs and didn't supply
        # a reference file)

        if reference_refl and reference_expt:
            exp = load.experiment_list(reference_expt)
            reference_cell = exp[0].crystal.get_unit_cell().parameters()

            # ---------- REINDEX TO CORRECT (REFERENCE) SETTING ----------
            Chatter.write("Reindexing all datasets to common reference")

            if using_external_references:
                epochs = self._sweep_handler.get_epochs()
            else:
                epochs = self._sweep_handler.get_epochs()[1:]
            for epoch in epochs:
                # if we are working with unified UB matrix then this should not
                # be a problem here (note, *if*; *should*)

                # what about e.g. alternative P1 settings?
                # see JIRA MXSW-904
                if PhilIndex.params.xia2.settings.unify_setting:
                    continue

                reindexer = DialsReindex()
                reindexer.set_working_directory(self.get_working_directory())
                auto_logfiler(reindexer)

                si = self._sweep_handler.get_sweep_information(epoch)
                reindexer.set_reference_filename(reference_expt)
                reindexer.set_reference_reflections(reference_refl)
                reindexer.set_indexed_filename(si.get_reflections())
                reindexer.set_experiments_filename(si.get_experiments())
                reindexer.run()

                # At this point, CCP4ScalerA would reset in integrator so that
                # the integrater calls reindex, no need to do that here as
                # have access to the files and will never need to reintegrate.

                si.set_reflections(reindexer.get_reindexed_reflections_filename())
                si.set_experiments(reindexer.get_reindexed_experiments_filename())

                # FIXME how to get some indication of the reindexing used?

                exp = load.experiment_list(
                    reindexer.get_reindexed_experiments_filename()
                )
                cell = exp[0].crystal.get_unit_cell().parameters()

                # Note - no lattice check as this will already be caught by reindex
                Debug.write("Cell: %.2f %.2f %.2f %.2f %.2f %.2f" % cell)
                Debug.write("Ref:  %.2f %.2f %.2f %.2f %.2f %.2f" % reference_cell)

                for j in range(6):
                    if (
                        math.fabs((cell[j] - reference_cell[j]) / reference_cell[j])
                        > 0.1
                    ):
                        raise RuntimeError(
                            "unit cell parameters differ in %s and %s"
                            % (reference_expt, si.get_reflections())
                        )

        # Now make sure all batches ok before finish preparing
        # This should be made safer, currently after dials.scale there is no
        # concept of 'batch', dials.export uses the calculate_batch_offsets
        # to assign batches, giving the same result as below.

        experiments_to_rebatch = []
        for epoch in self._sweep_handler.get_epochs():
            si = self._sweep_handler.get_sweep_information(epoch)
            experiment = si.get_experiments()
            experiments_to_rebatch.append(load.experiment_list(experiment)[0])
        offsets = calculate_batch_offsets(experiments_to_rebatch)

        for i, epoch in enumerate(self._sweep_handler.get_epochs()):
            si = self._sweep_handler.get_sweep_information(epoch)
            r = si.get_batch_range()
            si.set_batch_offset(offsets[i])
            si.set_batches([r[0] + offsets[i], r[1] + offsets[i]])